PC Professionell 2004 December

home *** CD-ROM | disk | FTP | other *** search

/ PC Professionell 2004 December / PCpro_2004_12.ISO / files / webserver / xampp / xampp-perl-addon-1.4.9-installer.exe / UseMod.pm < prev next >

Wrap

Perl POD Document | 2003-09-22 | 13.8 KB | 439 lines

package CGI::Wiki::Formatter::UseMod; use strict; use vars qw( $VERSION @_links_found ); $VERSION = '0.09'; use URI::Escape; use Text::WikiFormat as => 'wikiformat'; use HTML::PullParser; use URI::Find::Delimited; =head1 NAME CGI::Wiki::Formatter::UseMod - UseModWiki-style formatting for CGI::Wiki =head1 DESCRIPTION A formatter backend for L<CGI::Wiki> that supports UseMod-style formatting. =head1 SYNOPSIS use CGI::Wiki::Formatter::UseMod; # Instantiate - see below for parameter details. my $formatter = CGI::Wiki::Formatter::UseMod->new( %config ); # Format some text. my $cooked = $formatter->format($raw); # Find out which other nodes that text would link to. my @links_to = $formatter->find_internal_links($raw); =head1 METHODS =over 4 =item B<new> my $formatter = CGI::Wiki::Formatter::UseMod->new( extended_links => 0, # $FreeLinks implicit_links => 1, # $WikiLinks force_ucfirst_nodes => 1, # $FreeUpper use_headings => 1, # $UseHeadings allowed_tags => [qw(b i)], # defaults to none macros => {}, node_prefix => 'wiki.pl?', node_suffix => '', edit_prefix => 'wiki.pl?action=edit&id=', edit_suffix => '', munge_urls => 0, ); Parameters will default to the values shown above (apart from C<allowed_tags>, which defaults to allowing no tags). =over 4 =item B<URL munging> If you set C<munge_urls> to true, then your URLs will be more user-friendly, for example http://example.com/wiki.cgi?Mailing_List_Managers rather than http://example.com/wiki.cgi?Mailing%20List%20Managers The former behaviour is the actual UseMod behaviour, but requires a little fiddling about in your code (see C<node_name_to_node_param>), so the default is to B<not> munge URLs. =item B<Macros> Be aware that macros are processed I<after> filtering out disallowed HTML tags. They are also not called in any particular order. The keys of macros should be either regexes or strings. The values can be strings, or, if the corresponding key is a regex, can be coderefs. The coderef will be called with the first nine substrings captured by the regex as arguments. I would like to call it with all captured substrings but apparently this is complicated. =back Macro examples: macros => { '@SEARCHBOX' => qq(<form action="wiki.pl" method="get"> <input type="hidden" name="action" value="search"> <input type="text" size="20" name="terms"> <input type="submit"></form>), qr/\@INDEX\s+\[Category\s+([^\]]+)]/ => sub { return "{an index of things in category $_[0]}" } } =cut sub new { my ($class, @args) = @_; my $self = {}; bless $self, $class; $self->_init(@args) or return undef; return $self; } sub _init { my ($self, %args) = @_; # Store the parameters or their defaults. my %defs = ( extended_links => 0, implicit_links => 1, force_ucfirst_nodes => 1, use_headings => 1, allowed_tags => [], macros => {}, node_prefix => 'wiki.pl?', node_suffix => '', edit_prefix => 'wiki.pl?action=edit&id=', edit_suffix => '', munge_urls => 0, ); my %collated = (%defs, %args); foreach my $k (keys %defs) { $self->{"_".$k} = $collated{$k}; } return $self; } =item B<format> my $html = $formatter->format($submitted_content, $wiki); Escapes any tags which weren't specified as allowed on creation, then interpolates any macros, then translates the raw Wiki language supplied into HTML. A L<CGI::Wiki> object can be supplied as an optional second parameter. This object will be used to determine whether a linked-to node exists or not, and alter the presentation of the link accordingly. This is only really in here for use when this method is being called from within L<CGI::Wiki>. =cut sub format { my ($self, $raw, $wiki) = @_; $raw =~ s/\r\n/\n/sg; # CGI newline is \r\n not \n my $safe = ""; my %allowed = map {lc($_) => 1, "/".lc($_) => 1} @{$self->{_allowed_tags}}; # Parse the HTML - even if we're not allowing any tags, because we're # using a custom escaping routine rather than CGI.pm my $parser = HTML::PullParser->new(doc => $raw, start => '"TAG", tag, text', end => '"TAG", tag, text', text => '"TEXT", tag, text'); while (my $token = $parser->get_token) { my ($flag, $tag, $text) = @$token; if ($flag eq "TAG" and !defined $allowed{lc($tag)}) { $safe .= $self->_escape_HTML($text); } else { $safe .= $text; } } # Now do any inline links. my $finder = URI::Find::Delimited->new( ignore_quoted => 1 ); $finder->find(\$safe); # Now process any macros. my %macros = %{$self->{_macros}}; foreach my $key (keys %macros) { my $value = $macros{$key}; if ( ref $value && ref $value eq 'CODE' ) { $safe =~ s/$key/$value->($1, $2, $3, $4, $5, $6, $7, $8, $9)/eg; } else { $safe =~ s/$key/$value/g; } } # Finally set up config and call Text::WikiFormat. my %format_opts = ( extended => $self->{_extended_links}, prefix => $self->{_node_prefix}, implicit_links => $self->{_implicit_links} ); my %format_tags = ( # chromatic made most of the regex below. I will document it when # I understand it properly. indent => qr/^(?:\t+|\s{4,}|\s*\*?(?=\**\*+))/, newline => "", # avoid bogus <br /> paragraph => [ "<p>", "</p>\n", "", "\n", 1 ], # no bogus <br /> extended_link_delimiters => [ '[[', ']]' ], blocks => { ordered => qr/^\s*([\d]+)\.\s*/, unordered => qr/^\s*\*\s*/, definition => qr/^:\s*/ }, definition => [ "<dl>\n", "</dl>\n", "<dt><dd>", "\n" ], indented => { definition => 0 }, blockorder => [ qw( header line ordered unordered code definition paragraph )], nests => { map { $_ => 1} qw( ordered unordered ) }, link => sub { my ($link, $opts) = @_; $opts ||= {}; my $title; ($link, $title) = split(/\|/, $link, 2) if $opts->{extended}; $title =~ s/^\s*// if $title; # strip leading whitespace $title ||= $link; if ( $self->{_force_ucfirst_nodes} ) { $link = $self->_do_freeupper($link); } $link = $self->_munge_spaces($link); my $editlink_not_link = 0; # See whether the linked-to node exists, if we can. if ( $wiki && !$wiki->node_exists( $link ) ) { $editlink_not_link = 1; } $link =~ s/ /_/g if $self->{_munge_urls}; $link = uri_escape( $link ); if ( $editlink_not_link ) { my $prefix = $self->{_edit_prefix}; my $suffix = $self->{_edit_suffix}; return qq|[$title]<a href="$prefix$link$suffix">?</a>|; } else { my $prefix = $self->{_node_prefix}; my $suffix = $self->{_node_suffix}; return qq|<a href="$prefix$link$suffix">$title</a>|; } }, ); return wikiformat($safe, \%format_tags, \%format_opts ); } # CGI.pm is sometimes awkward about actually performing CGI::escapeHTML # if there's a previous instantiation - in the calling script, for example. # So just do it here. sub _escape_HTML { my ($self, $text) = @_; $text =~ s{&}{&}gso; $text =~ s{<}{<}gso; $text =~ s{>}{>}gso; $text =~ s{"}{"}gso; return $text; } =item B<find_internal_links> my @links_to = $formatter->find_internal_links( $content ); Returns a list of all nodes that the supplied content links to. =cut sub find_internal_links { my ($self, $raw) = @_; @_links_found = (); my %format_opts = ( extended => $self->{_extended_links}, prefix => $self->{_node_prefix}, implicit_links => $self->{_implicit_links} ); my %format_tags = ( extended_link_delimiters => [ '[[', ']]' ], link => sub { my ($link, $opts) = @_; $opts ||= {}; my $title; ($link, $title) = split(/\|/, $link, 2) if $opts->{extended}; if ( $self->{_force_ucfirst_nodes} ) { $link = $self->_do_freeupper($link); } $link = $self->_munge_spaces($link); push @CGI::Wiki::Formatter::UseMod::_links_found, $link; return ""; # don't care about output } ); my $foo = wikiformat($raw, \%format_tags, \%format_opts); my @links = @_links_found; @_links_found = (); return @links; } =item B<node_name_to_node_param> use URI::Escape; $param = $formatter->node_name_to_node_param( "Recent Changes" ); my $url = "wiki.pl?" . uri_escape($param); In usemod, the node name is encoded prior to being used as part of the URL. This method does this encoding (essentially, whitespace is munged into underscores). In addition, if C<force_ucfirst_nodes> is in action then the node names will be forced ucfirst if they weren't already. Note that unless C<munge_urls> was set to true when C<new> was called, this method will do nothing. =cut sub node_name_to_node_param { my ($self, $node_name) = @_; return $node_name unless $self->{_munge_urls}; my $param = $node_name; $param = $self->_munge_spaces($param); $param = $self->_do_freeupper($param) if $self->{_force_ucfirst_nodes}; $param =~ s/ /_/g; return $param; } =item B<node_param_to_node_name> my $node = $q->param('node') || ""; $node = $formatter->node_param_to_node_name( $node ); In usemod, the node name is encoded prior to being used as part of the URL, so we must decode it before we can get back the original node name. Note that unless C<munge_urls> was set to true when C<new> was called, this method will do nothing. =cut sub node_param_to_node_name { my ($self, $param) = @_; return $param unless $self->{_munge_urls}; # Note that this might not give us back exactly what we started with, # since in the encoding we collapse and trim whitespace; but this is # how usemod does it (as of 0.92) and usemod is what we're emulating. $param =~ s/_/ /g; return $param; } sub _do_freeupper { my ($self, $node) = @_; # This is the FreeUpper usemod behaviour, slightly modified from # their regexp, as we need to do it before we check whether the # node exists ie before we substitute the spaces with underscores. $node = ucfirst($node); $node =~ s|([- _.,/])([a-z])|$1.uc($2)|ge; return $node; } sub _munge_spaces { my ($self, $node) = @_; # Yes, we really do only munge spaces, not all whitespace. This is # how usemod does it (as of 0.92). $node =~ s/ +/ /g; $node =~ s/^ //; $node =~ s/ $//; return $node } =head1 AUTHOR Kake Pugh (kake@earth.li). =head1 COPYRIGHT Copyright (C) 2003 Kake Pugh. All Rights Reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 CREDITS The OpenGuides London team (L<http://openguides.org/london/>) sent some very helpful bug reports. A lot of the work of this module is done within chromatic's module, L<Text::WikiFormat>. =head1 CAVEATS This doesn't yet support all of UseMod's formatting features and options, by any means. This really truly I<is> a 0.0* release. Please send bug reports, omissions, patches, and stuff, to me at C<kake@earth.li>. =head1 NOTE ON USEMOD COMPATIBILITY UseModWiki "encodes" node names before making them part of a URL, so for example a node about Wombat Defenestration will have a URL like http://example.com/wiki.cgi?Wombat_Defenestration So if we want to emulate a UseModWiki exactly, we need to munge back and forth between node names as titles, and node names as CGI params. my $formatter = CGI::Wiki::Formatter::UseMod->new( munge_urls => 1 ); my $node_param = $q->param('id') || $q->param('keywords') || ""; my $node_name = $formatter->node_param_to_node_name( $node_param ); use URI::Escape; my $url = "http://example.com/wiki.cgi?" . uri_escape( $formatter->node_name_to_node_param( "Wombat Defenestration" ) ); =head1 SEE ALSO =over 4 =item * L<CGI::Wiki> =item * L<Text::WikiFormat> =item * UseModWiki (L<http://www.usemod.com/cgi-bin/wiki.pl>) =back =cut 1;