Acorn User 10

home *** CD-ROM | disk | FTP | other *** search

/ Acorn User 10 / AU_CD10.iso / Updates / Perl / Non-RPC / !Perl / riscos / POD / Parser / DDF.pm next >

Wrap

Perl POD Document | 1998-04-27 | 30.1 KB | 1,086 lines

package Pod::Parser::DDF; use Pod::Parser; use Carp; # "Impression is loosing" (sp? - is it loosing or loozing cf hakerz ?) because: # 0: The ddf output for styles can't cope with style names containing " # 1: keepregion can't be used on two adjacent regions. # (workaround - put a newline inbetween, with arbitarily small fontsize) # 2: keepnext must be turned on at the *start* of the line. I've found that you # can't put two tabs first. @ISA = qw(Pod::Parser); $VERSION = 0.05; use Text::DDF; use Text::Tabs; use strict; use vars qw($normal $title $head1 $head2 $head3 $verbatim $verbatimend $code $file $error $index $itemkeyword $itemkeywordindex $empty $perlrunopts $bold $italic $keepregion $keepnext %styles %cheats $tabs_on_item_ruler %HTML_Escapes); # Don't downcase these words in headings... %cheats = ( "CPAN\n" => 1, "IO\n" => 1, "LC_TIME\n" => 1 ); use Data::Dumper; # print Dumper(\%Units::factors); ## Hmm. Is this OOP style of programming making things just a little too easy? $empty = new Text::DDF::Effect; $bold = new Text::DDF::Effect 'bold'; $italic = new Text::DDF::Effect 'italic'; $keepregion = new Text::DDF::Effect 'keepregion'; $keepnext = new Text::DDF::Effect 'keepnext'; $normal = new Text::DDF::Style 'Normal', qw( font Trinity.Medium fontsize 12pt linespacep 120% spaceabove 0pt spacebelow 14pt); $title = new Text::DDF::Style 'Title', qw( font Homerton.Bold fontsize 20pt linespacep 120% justify centre contents 0 menuitem on autoparagraph), ''; $head1 = new Text::DDF::Style 'Head1', qw( font Homerton.Medium fontsize 18pt linespacep 120% contents 1 shortcut 385 spacebelow 7pt keepnext on menuitem on autoparagraph), ''; $head2 = new Text::DDF::Style 'Head2', qw( font Homerton.Medium fontsize 14pt linespacep 120% contents 2 shortcut 386 spacebelow 7pt keepnext on menuitem on autoparagraph), ''; $head3 = $head2->Copy('Head3'); $head3->Add( qw(fontsize 10pt)); $code = new Text::DDF::Style 'Code', qw( font Corpus.Medium menuitem on autoparagraph), ''; $file = $code->Copy('File'); $verbatim = $code->Copy('Verbatim'); # Need to squeeze more onto the page - fontsize 10 $verbatim->Add( qw( fontsize 10pt spacebelow 0pt leftmargin 36pt rightmargin 0pt returnmargin 36pt tabs), '72pt,144pt,216pt,288pt,360pt,432pt,504pt,576pt' ); $verbatim->AddTabs( qw() ); # This is a hack to get around Impression's inability to keeptogether adjacent regions $verbatimend = new Text::DDF::Style 'Verbatim End', qw( linespacep 0% ); # This needs to be manually applied to lines m/^perl\t/ (match the patter POD insensitive) $perlrunopts = new Text::DDF::Style 'Perl Run Options', qw( leftmargin 72pt rightmargin 0pt returnmargin 1.8pt ruleleftmargin 0pt rulerightmargin 0pt menuitem on autoparagraph), '', 'tabs', '72pt,144pt,216pt,288pt,360pt,432pt,504pt,576pt,648pt'; $error = new Text::DDF::Style 'Error', qw( overprint off menuitem on), 'fontcolour', 'rgb=(0,1,1)', 'backcolour', 'rgb=(1,0,0)'; $index = new Text::DDF::Style 'Index Entry', qw( menuitem on index), '', 'backcolour', 'rgb=(0,1,0)'; $itemkeyword = new Text::DDF::Style 'Item Keyword', qw( bold on menuitem on keepnext on spacebelow 0pt keepnext on); # So that the syntax can be separated # If you make this a copy (rather than an alias) of $index rember to add it to the foreach # below. $itemkeywordindex = $index; my $style; foreach $style ( qw( title head1 head2 head3 code file verbatim verbatimend error index normal itemkeyword perlrunopts) ) { # Make a hash of the form $style{head} = \$head eval "\$styles{'$style'} = \$$style" } %HTML_Escapes = ( 'amp' => '&', # ampersand 'lt' => '<', # left chevron, less-than 'gt' => '>', # right chevron, greater-than 'quot' => '"', # double quote 'nbsp' => ' ', # no-break space 'iexcl' => '¡', # inverted exclamation mark 'cent' => '¢', # cent sign 'pound' => '£', # pound sterling sign 'curren' => '¤', # general currency sign 'yen' => '¥', # yen sign 'brvbar' => '¦', # broken (vertical) bar 'sect' => '§', # section sign 'uml' => '¨', # umlaut (dieresis) 'copy' => '©', # copyright sign 'ordf' => 'ª', # ordinal indicator, feminine 'laquo' => '«', # angle quotation mark, left 'not' => '¬', # not sign 'shy' => '', # soft hyphen 'reg' => '®', # registered sign 'macr' => '¯', # macron 'deg' => '°', # degree sign 'plusmn' => '±', # plus-or-minus sign 'sup2' => '²', # superscript two 'sup3' => '³', # superscript three 'acute' => '´', # acute accent 'micro' => 'µ', # micro sign 'para' => '¶', # pilcrow (paragraph sign) 'middot' => '·', # middle dot 'cedil' => '¸', # cedilla 'sup1' => '¹', # superscript one 'ordm' => 'º', # ordinal indicator, masculine 'raquo' => '»', # angle quotation mark, right 'frac14' => '¼', # fraction one-quarter 'frac12' => '½', # fraction one-half 'frac34' => '¾', # fraction three-quarters 'iquest' => '¿', # inverted question mark 'Agrave' => 'À', # capital A, grave accent 'Aacute' => 'Á', # capital A, acute accent 'Acirc' => 'Â', # capital A, circumflex accent 'Atilde' => 'Ã', # capital A, tilde 'Auml' => 'Ä', # capital A, dieresis or umlaut mark 'Aring' => 'Å', # capital A, ring 'AElig' => 'Æ', # capital AE diphthong (ligature) 'Ccedil' => 'Ç', # capital C, cedilla 'Egrave' => 'È', # capital E, grave accent 'Eacute' => 'É', # capital E, acute accent 'Ecirc' => 'Ê', # capital E, circumflex accent 'Euml' => 'Ë', # capital E, dieresis or umlaut mark 'Igrave' => 'Ì', # capital I, grave accent 'Iacute' => 'Í', # capital I, acute accent 'Icirc' => 'Î', # capital I, circumflex accent 'Iuml' => 'Ï', # capital I, dieresis or umlaut mark 'ETH' => 'Ð', # capital Eth, Icelandic 'Ntilde' => 'Ñ', # capital N, tilde 'Ograve' => 'Ò', # capital O, grave accent 'Oacute' => 'Ó', # capital O, acute accent 'Ocirc' => 'Ô', # capital O, circumflex accent 'Otilde' => 'Õ', # capital O, tilde 'Ouml' => 'Ö', # capital O, dieresis or umlaut mark 'times' => '×', # multiply sign 'Oslash' => 'Ø', # capital O, slash 'Ugrave' => 'Ù', # capital U, grave accent 'Uacute' => 'Ú', # capital U, acute accent 'Ucirc' => 'Û', # capital U, circumflex accent 'Uuml' => 'Ü', # capital U, dieresis or umlaut mark 'Yacute' => 'Ý', # capital Y, acute accent 'THORN' => 'Þ', # capital Thorn, Icelandic 'szlig' => 'ß', # small sharp s, German (sz ligature) 'agrave' => 'à', # small a, grave accent 'aacute' => 'á', # small a, acute accent 'acirc' => 'â', # small a, circumflex accent 'atilde' => 'ã', # small a, tilde 'auml' => 'ä', # small a, dieresis or umlaut mark 'aring' => 'å', # small a, ring 'aelig' => 'æ', # small ae diphthong (ligature) 'ccedil' => 'ç', # small c, cedilla 'egrave' => 'è', # small e, grave accent 'eacute' => 'é', # small e, acute accent 'ecirc' => 'ê', # small e, circumflex accent 'euml' => 'ë', # small e, dieresis or umlaut mark 'igrave' => 'ì', # small i, grave accent 'iacute' => 'í', # small i, acute accent 'icirc' => 'î', # small i, circumflex accent 'iuml' => 'ï', # small i, dieresis or umlaut mark 'eth' => 'ð', # small eth, Icelandic 'ntilde' => 'ñ', # small n, tilde 'ograve' => 'ò', # small o, grave accent 'oacute' => 'ó', # small o, acute accent 'ocirc' => 'ô', # small o, circumflex accent 'otilde' => 'õ', # small o, tilde 'ouml' => 'ö', # small o, dieresis or umlaut mark 'divide' => '÷', # divide sign 'oslash' => 'ø', # small o, slash 'ugrave' => 'ù', # small u, grave accent 'uacute' => 'ú', # small u, acute accent 'ucirc' => 'û', # small u, circumflex accent 'uuml' => 'ü', # small u, dieresis or umlaut mark 'yacute' => 'ý', # small y, acute accent 'thorn' => 'þ', # small thorn, Icelandic 'yuml' => 'ÿ', # small y, dieresis or umlaut mark 'Wcirc' => 'Ŵ', # capital W, circumflex accent 'wcirc' => 'ŵ', # small w, circumflex accent 'Ycirc' => 'Ŷ', # capital Y, circumflex accent 'ycirc' => 'ŷ', # small y, circumflex accent 'hellip' => '…', # ellipsis 'trade' => '™', # trademark, TM 'permil' => '‰', # per thousand (mille) 'bull' => '•', # bullet 'lsquo' => '‘', # quote left 'rsquo' => '’', # quote right 'lsaquo' => '‹', # guille single left 'rsaquo' => '›', # guille single right 'ldquo' => '“', # quote double left 'rdquo' => '”', # quote double right 'ldquor' => '„', # quote double base 'ndash' => '–', # en dash 'mdash' => '—', # em dash 'minus' => '−', # minus sign 'oelig' => 'œ', # oe ligature 'OElig' => 'Œ', # OE ligature 'dagger' => '†', # dagger 'Dagger' => '‡', # double dagger 'filig' => 'ﬁ', # fi ligature 'fllig' => 'ﬂ', # fl ligature ); # "It hasn't rained on Mars for a very long time. # Several hundred million years of hosepipe bans." ## implementation of appropriate subclass methods ... sub nl2ws { foreach( @_ ) { tr/\n\r/ / } @_; } sub space2hard { foreach( @_ ) { tr/ -/ /; } # Spaces to hard spaces, - to (of all things) soft # hyphens, as Impression won't break on these. # Duh. @_; } sub downcase_words { # Downcase all the words. (FAQ section 4) foreach( @_ ) { s/ ( (^\w) #at the beginning of the line | # or (\s\w) #preceded by whitespace ) /\U$1/xg; s/([\w']+)/\u\L$1/g; } @_; } # Apparently unnecessary "$_" interpolation allows encoding of read-only scalars... sub encode ($@) { my $sub = shift; my @answer; foreach( @_ ) { if( ref ) { my @copy = @$_; my $tail = pop @copy; my $head = shift @copy; push @answer, [$head, encode( $sub, @copy ), $tail]; } else { push @answer, &$sub( "$_" ); } } @answer; } sub __output { my $out_fh = shift; foreach( @_ ) { if( ref ) { my @copy = @$_; my $tail = pop @copy; print $out_fh shift @copy; __output( $out_fh, @copy ); print $out_fh $tail; } else { print $out_fh $_; } } } sub output { my $self = shift; my $out_fh = $self->{OUTPUT}; my $fill = ($self->{PRAGMAS}->{FILL} eq 'on'); print $out_fh $self->{KEEPNEXT_HACK} if defined $self->{KEEPNEXT_HACK}; print $out_fh "\t" x ${$self->{INDENT}}[1]; __output( $out_fh, encode( \&ddfencode, @_ )); # foreach( @_ ) # { # if( ref ) # { # print $out_fh $$_[0] if defined $$_[0]; # print $out_fh ddfencode "$$_[1]" if defined $$_[1]; # print $out_fh $$_[2] if defined $$_[2]; # } # else # { # print $out_fh ddfencode "$_"; # } # } } sub output_raw { my $self = shift; my $out_fh = $self->{OUTPUT}; print $out_fh @_; } sub error_text { my $self = shift; warn "@_"; [$error->StyleWrap(@_)]; } sub error { my $self = shift; my $out_fh = $self->{OUTPUT}; $self->output( [$error->StyleWrap(@_)] ); warn "@_"; } $tabs_on_item_ruler = 2; sub find_item_ruler ($$) { my $self = shift; my $stylename = "Item List $_[0]"; my $style = $styles{$stylename}; unless( defined $style ) { # Here's the clever bit. Throw together a new ruler on the fly. # Magic numbers: default over indent is 4 my $offset = ($_[0] - 4) * 6; my $right = $offset + 18; my $left = $offset + 24; # my $next = $offset + 108; $style = new Text::DDF::Style $stylename, 'leftmargin', "${left}pt", 'rightmargin', '0pt', 'returnmargin', "${offset}pt", 'menuitem', 'on', 'autoparagraph', ''; $style->AddTabs( "r${right}pt", "${left}pt" ); $self->output_raw( $style->Define ); $styles{$stylename} = $style; } return $style; } sub item { my $self = shift; my $item_follows = shift; return unless (defined $self->{ITEM}); my $paratag = $self->{ITEM}; my $prev_indent = $self->{INDENTS}->[$#{$self->{INDEX}} - 1] || $self->{DEF_INDENT}; undef $self->{ITEM}; # Yes, I mean local not my. # Want no automatic tabs in ouput in this subroutine local ${$self->{INDENT}}[1] = 0; # Want previous value restored at end. # The item ruler is roughly # ...<...>.......> # # • Bullet # 13 Numbered item # Spam Some technical term. # # except that perl.pod and perlmodlib.pod currently contain lines of the # form # =item * some text # # perlrun.pod has lines # =item 2. # # perlfaq4.pod has lines # =item 2. some text # # The right tab is for aligning numbers and bullets. # The next left tab starts the paragraph if ($paratag =~ s/^\*\s*//m ) { if( length $paratag ) { # • Text $paratag =~ s/\n*$//m; if( $item_follows ) { $self->output( "\t•\t", $self->interpolate($paratag), "\n" ); } else { $self->output( [$keepnext->StyleWrap("\t•\t", $self->interpolate($paratag), "\n")], @_ ? "\t\t" : ''); } } else { $self->output( "\t•" ); } } elsif ($paratag =~ /^[\d\.]+\s*$/) { $self->output( "\t", $self->interpolate ($paratag) ); } elsif ($paratag =~ /^[\d\.]\s*/) { $paratag =~ /^([\d]+)\.?\s*(.*)/; $self->output( "\t$1" ); # @_ is passed in interpolated. unshift @_, $self->interpolate ("$2\n") if length $2; } else { $paratag =~ s/\n*$//m; $paratag =~ s/( +[A-Z][A-Z0-9(), ]*)$//; my $arguments = $&; my $index = ($self->{PRAGMAS}->{ITEM_INDEX} eq 'on'); # Yes, it's messy. # The plan is that all arguments in perlfunc are uppercase, so we should # be able to identify them, and *not* wrap them in the index tag. $self->output( [$itemkeyword->StyleWrap( ( $index ? [$itemkeywordindex->StyleWrap( $self->interpolate( $paratag ) )] : $self->interpolate( $paratag ) ), $self->interpolate($arguments), ($item_follows || @_) ? "\n" : '') ], scalar @_ ? "\t" : $item_follows ? '' : "\n" ); # $item_follows == 1 ensures a full paragraph break after =item Spam # with no subsequent paragraph or further =item directives. # # ie THERE IS NO GENERAL RULE FOR CONVERTING A LIST INTO A SCALAR! # in perlfunc.pod } return unless @_; if ($_[0] =~ /^=/) { # tricked! warn "Tricked by $_[0]"; } $self->output ("\t",@_); } ## Overloaded methods sub begin_input { my $self = shift; #---------------------------------------------------- # Subclasses may wish to make use of some of the # commented-out code below for initializing pragmas #---------------------------------------------------- $self->{PRAGMAS} = { FILL => 'on', STYLE => 'plain', INDENT => [0, 0, $empty], ITEM_INDEX => 'off', KEEPNEXT_HACK => undef }; ## Initialize all PREVIOUS_XXX pragma values my ($name, $value); for (($name, $value) = each %{$self->{PRAGMAS}}) { $self->{PRAGMAS}->{"PREVIOUS_${name}"} = $value; } #---------------------------------------------------- # Indent no., Tabs to indent by, style $self->{DEF_INDENT} = [4, 0, $empty]; $self->{INDENTS} = []; $self->{INDENT} = $self->{DEF_INDENT}; $self->{DONE_TITLE} = 0; # Define the styles foreach (values %styles) { $self->output_raw( $_->Define ); } return; } =head2 end_input() This method is invoked by B<parse_from_filehandle()> immediately I<after> processing input from a filehandle. The base class implementation does nothing but subclasses may override it to perform any per-file cleanup actions. =cut sub end_input { my $self = shift; $self->item() if (defined $self->{ITEM}); } =head2 pragma($pragma_name, $pragma_value) This method is invoked for each pragma encountered inside an C<=pod> paragraph (see the description of the B<parse_pragmas()> method). The pragma name is passed in C<$pragma_name> (which should always be lowercase) and the corresponding value is C<$pragma_value>. The base class implementation of this method does nothing. Derived class implementations of this method should be able to recognize at least the following pragmas and take any necessary actions when they are encountered: =over 4 =item B<fill=value> The argument I<value> should be one of C<on>, C<off>, or C<previous>. Specifies that "filling-mode" should set to 1, 0, or its previous value (respectively). If I<value> is omitted then the default is C<on>. Derived classes may use this to decide whether or not to perform any filling (wrapping) of subsequent text. =item B<style=value> The argument I<value> should be one of C<bold>, C<italic>, C<code>, C<plain>, or C<previous>. Specifies that the current default paragraph font should be set to C<bold>, C<italic>, C<code>, the empty string C<>, or its previous value (respectively). If I<value> is omitted then the default is C<plain>. Derived classes may use this to determine the default font style to use for subsequent text. =item B<indent=value> The argument I<value> should be an integer value (with an optional sign). Specifies that the current indentation level should be reset to the given value. If a plus (minus) sign precedes the number then the indentation level should be incremented (decremented) by the given number. If only a plus or minus sign is given (without a number) then the current indentation level is incremented or decremented by some default amount (to be determined by subclasses). =back The value returned will be 1 if the pragma name was recognized and 0 if it wasnt (in which case the pragma was ignored). Derived classes should override this method if they wish to implement any pragmas. The base class implementation of this method does nothing but it does contain some commented-out code which subclasses may want to make use of when implementing pragmas. =cut sub pragma { my $self = shift; ## convert remaining args to lowercase my $name = lc shift; my $value = lc shift; my $rc = 1; local($_); #---------------------------------------------------- # Subclasses may wish to make use of some of the # commented-out code below for processing pragmas #---------------------------------------------------- my ($abbrev, %abbrev_table); if ($name eq 'fill' || $name eq 'item_index') { my $NAME = uc $name; %abbrev_table = ('on' => 'on', 'of' => 'off', 'p' => 'previous'); $value = 'on' unless ((defined $value) && ($value ne '')); return $rc unless ($value =~ /^(on|of|p)/io); $abbrev = $1; $value = $abbrev_table{$abbrev}; if ($value eq 'previous') { $self->{PRAGMAS}->{$NAME} = $self->{PRAGMAS}->{"PREVIOUS_$NAME"}; } else { $self->{PRAGMAS}->{"PREVIOUS_$NAME"} = $self->{PRAGMAS}->{$NAME}; $self->{PRAGMAS}->{$NAME} = $value; } } elsif ($name eq 'style') { %abbrev_table = ('b' => 'bold', 'i' => 'italic', 'c' => 'code', 'pl' => 'plain', 'pr' => 'previous'); $value = 'plain' unless ((defined $value) && ($value ne '')); return $rc unless ($value =~ /^(b|i|c|pl|pr)/io); $abbrev = $1; $value = $abbrev_table{$abbrev}; if ($value eq 'previous') { $self->{PRAGMAS}->{STYLE} = $self->{PRAGMAS}->{PREVIOUS_STYLE}; } else { $self->{PRAGMAS}->{PREVIOUS_STYLE} = $self->{PRAGMAS}->{STYLE}; $self->{PRAGMAS}->{STYLE} = $value; } } elsif ($name eq 'indent') { return $rc unless ((defined $value) && ($value =~ /^([-+]?)(\d*)$/o)); my ($sign, $number) = ($1, $2); $value .= "4" unless ((defined $number) && ($number ne '')); $self->{PRAGMAS}->{PREVIOUS_INDENT} = $self->{PRAGMAS}->{INDENT}; if ($sign) { ${$self->{PRAGMAS}->{INDENT}}[0] += (0 + $value); } else { ${$self->{PRAGMAS}->{INDENT}}[0] = $value; } } else { $rc = 0; } #---------------------------------------------------- return $rc; } sub command { my $self = shift; my $cmd = shift; my $text = shift; my $sep = shift; $cmd = '' unless (defined $cmd); $text = '' unless (defined $text); $sep = ' ' unless (defined $sep); nl2ws( $text ); $text =~ s/ *$/\n/; # You're going to have to turn on styles such as 'item' here, and leave # them on until you next come in. (or at close of play) # Textblock should do pragmas. # Basically this stops keepnext for the following line, and two tabs. $self->item( $cmd eq 'item' || $cmd eq 'back' ) if (defined $self->{ITEM}); if( 1 == $self->{DONE_TITLE} ) { $self->output_raw( $title->StyleOff ); $self->{DONE_TITLE} = 2; } if( $cmd =~ /^head\d+/ ) { # Headings. if( $self->{DONE_TITLE} == 0 && $text eq "NAME\n" ) { # it's the magic word 'NAME' $self->{DONE_TITLE} = 1; $self->output_raw( $title->StyleOn ); } else { no strict 'refs'; # Hope there's a style defined to match this heading level... $self->output( [${$cmd}->StyleWrap( # Downcase the string if it is *all* shouty # Special case for known acronyms. ($text =~ /[a-z]/ || defined $cheats{$text} ) ? $self->interpolate($text) : encode( \&downcase_words, $self->interpolate($text) ) )] ); } } elsif ($cmd eq 'over') { push(@{$self->{INDENTS}}, $self->{INDENT}); my $previous = ${$self->{INDENT}}[0]; local ($^W) = 0; # Copy it $self->{INDENT} = [ $previous + ($text + 0) || ${$self->{DEF_INDENT}}[0], $tabs_on_item_ruler, # Because this is how the ruler works. $self->find_item_ruler($previous) ]; $self->output_raw( ${$self->{INDENT}}[2] -> StyleOn() ); } elsif ($cmd eq 'back') { $self->item() if (defined $self->{ITEM}); $self->output_raw( ${$self->{INDENT}}[2] -> StyleOff() ); $self->{INDENT} = pop(@{$self->{INDENTS}}); unless (defined $self->{INDENT}) { $self->error( "Unmatched =back\n" ); $self->{INDENT} = $self->{DEF_INDENT}; } } elsif ($cmd eq 'item') { $self->{ITEM} = $text; } else { $self->error( "Unrecognized directive: $cmd" ); } $self->{DONE_TITLE} = 2 unless $self->{DONE_TITLE} == 1; } sub verbatim { my $self = shift; my $text = shift; $self->item() if (defined $self->{ITEM}); my $out_fh = $self->{OUTPUT}; my @lines = expand split (/\n/, $text); # Text::Tabs::expand my $spaces_at_start; my $spaces_this_line; # Strip off spaces common to the start of all lines for( @lines ) { ($spaces_this_line) = /^( *)/; if( defined $spaces_at_start ) { $spaces_at_start = length $spaces_this_line if $spaces_at_start > length $spaces_this_line; } else { $spaces_at_start = length $spaces_this_line; } } for( @lines ) { $_ = substr $_, $spaces_at_start; } ; # Last line needs space after paragraph so treat it specially local ${$self->{INDENT}}[1] = 0; if (@lines) { $self->output( [$keepregion->StyleWrap( [$verbatim->StyleWrap( join ("\n", @lines) . "\n" )] )] ) } $self->output( [$verbatimend->StyleWrap("\n" )] ); } =head2 textblock($text) This method may be overridden by subclasses to take the appropriate action when a normal block of pod text is encountered (although the base class method will usually do what you want). It is passed the text block C<$text> as a parameter. In order to process interior sequences, subclasses implementations of this method will probably want invoke the B<interpolate()> method, passing it the text block C<$text> as a parameter and then perform any desired processing upon the returned result. The base class implementation of this method simply prints the text block as it occurred in the input stream). =cut sub textblock { my $self = shift; my $text = shift; my @text; $text =~ tr/\n\r/ /; $text =~ s/ +$//; my $keepnext_hack = $self->{KEEPNEXT_HACK}; my $colon = $text =~ /:$/; # Convert -- to em dashes if we are in the title $text =~ s/--/—/ if ($self->{DONE_TITLE} == 1); @text = $self->interpolate($text); if( $colon ) { $self->{KEEPNEXT_HACK} = $keepnext->StyleOn(); # Last non whitespace character is ':' - probably something like: # that } push @text, "\n"; if (defined $self->{ITEM}) { $self->item(0, @text); } else { $self->output(@text) } if( $colon ) { $self->output_raw( $keepnext->StyleOff() ); $self->{KEEPNEXT_HACK} = $keepnext_hack; } } =head2 interior_sequence($seq_cmd, $seq_arg) This method should be overridden by subclasses to take the appropriate action when an interior sequence is encountered. An interior sequence is an embedded command within a block of text which appears as a command name (usually a single uppercase character) followed immediately by a string of text which is enclosed in angle brackets. This method is passed the sequence command C<$seq_cmd> and the corresponding text $seq_arg and is invoked by the B<interpolate()> method for each interior sequence that occurs in the string that it is passed. It should return the desired text string to be used in place of the interior sequence. Subclass implementationss of this method may wish to examine the the array referenced by C<$self-E<gt>{SEQUENCES}> which is a stack of all the interior sequences that are currently being processed (they may be nested). The current interior sequence (the one given by C<$seq_cmdE<lt>$seq_argE<gt>>) should always be at the top of this stack. The base class implementation of the B<interior_sequence()> method simply returns the raw text of the of the interior sequence (as it occurred in the input) to the output filehandle. I<text> italicize text, used for emphasis or variables B<text> embolden text, used for switches and programs S<text> text contains non-breaking spaces C<code> literal code L<name> A link (cross reference) to name L<name> manual page L<name/ident> item in manual page L<name/"sec"> section in other manual page L<"sec"> section in this manual page (the quotes are optional) L</"sec"> ditto F<file> Used for filenames X<index> An index entry Z<> A zero-width character E<escape> A named character (very similar to HTML escapes) E<lt> A literal < E<gt> A literal > (these are optional except in other interior sequences and when preceded by a capital letter) E<n> Character number n (probably in ASCII) E<html> Some non-numeric HTML entity, such as E<Agrave> =cut sub interior_sequence { my $self = shift; my $seq_cmd = shift; if ($seq_cmd eq 'I') { return [$italic->StyleWrap( @_ )]; } elsif ($seq_cmd eq 'B') { return [$bold->StyleWrap( @_ )]; } elsif ($seq_cmd eq 'S') { return encode( \&space2hard, @_); } elsif ($seq_cmd eq 'C') { return [$code->StyleWrap( @_ )]; } elsif ($seq_cmd eq 'F') { return [$file->StyleWrap( @_ )]; } elsif ($seq_cmd eq 'X') { return [$index->StyleWrap( @_ )]; } elsif ($seq_cmd eq 'Z') { return ''; } elsif ($seq_cmd eq 'L' || $seq_cmd eq 'E') { # Must be single scalar. if( @_ != 1 || ref $_[0] ) { return $self->error_text( "Interior sequence $seq_cmd<> must be simple scalar, not ", @_ ) } if ($seq_cmd eq 'L') { # L<name> manual page # L<name/ident> item in manual page # L<name/"sec"> section in other manual page # L<"sec"> section in this manual page # (the quotes are optional) # L</"sec"> ditto # How do I tell name from section when there are no quotes? local($_) = $_[0]; s/\s+/ /g; my ($manpage, $sec, @ref) = ($_, ''); if (/^\s*"\s*(.*)\s*"\s*$/o) { ($manpage, $sec) = ('', "\"$1\""); } elsif (m|\s*/\s*|o) { ($manpage, $sec) = ($`, $'); } if ($sec eq '') { @ref = ('the chapter ', [$bold->StyleWrap( $manpage )]) if ($manpage ne ''); } elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) { @ref = "the section on \"$1\""; push @ref, (' in the chapter ', [$bold->StyleWrap( $manpage )]) if ($manpage ne ''); } else { @ref = "the \"$sec\" entry"; push @ref, ($manpage eq '') ? " in this chapter" : (' in the chapter ', [$bold->StyleWrap( $manpage )]); } return @ref; } else { return chr $_[0] if $_[0]=~/^\d+$/; return $HTML_Escapes{$_[0]} if defined $HTML_Escapes{$_[0]}; warn "Unknown escape: E<$_[0]>"; # Drop through to the error generator } } return $self->error_text( "${seq_cmd}<",@_,'>' ); } =head2 interpolate($text, $end_re) This method will translate all text (including any embedded interior sequences) in the given text string C<$text> and return the interpolated result. If a second argument is given, then it is taken to be a regular expression that indicates when to quit interpolating the string. Upon return, the C<$text> parameter will have been modified to contain only the un-processed portion of the given string (which will I<not> contain any text matched by C<$end_re>). This method should probably I<not> be overridden by subclasses. It should be noted that this method invokes itself recursively to handle any nested interior sequences. =cut sub interpolate { my $self = shift; my ($text, $end_re) = @_; $text = '' unless (defined $text); $end_re = '$' unless ((defined $end_re) && ($end_re ne '')); local($_) = $text; my @result; my ($seq_cmd, $end, @seq_arg) = ('', undef); while (($_ ne '') && /([A-Z])<|($end_re)/) { push @result, $`; ## Append text before the match to the result $_ = $'; ## Only text after the match remains to be processed ## See if we matched an interior sequence or an end-expression ($seq_cmd, $end) = ($1, $2); last if (defined $end); ## Saw the end - quit loop here ## At this point we have found an interior sequence, ## we need to obtain its argument push(@{$self->{SEQUENCES}}, $seq_cmd); @seq_arg = $self->interpolate($_, '>'); ## Now process the interior sequence push @result, $self->interior_sequence($seq_cmd, @seq_arg); pop(@{$self->{SEQUENCES}}); } ## Handle whatever is left if we didnt match the ending regexp unless ((defined $end) && ($end_re ne '$')) { push @result, $_; $_ = ''; } ## Modify the input parameter to consume the text that was ## processed so far. $_[0] = $_; ## Return the processed-text return @result; }