Acorn User 10

home *** CD-ROM | disk | FTP | other *** search

/ Acorn User 10 / AU_CD10.iso / Updates / Perl / Non-RPC / !Perl / lib / site_perl / POD / ParserText.pm < prev

Wrap

Perl POD Document | 1998-05-02 | 21.7 KB | 684 lines

############################################################################# # Text.pm -- convert POD data to formatted ASCII text # # Derived from Tom Christiansen's Pod::Text module # (with extensive modifications). # # Copyright (C) 1994-1996 Tom Christiansen. All rights reserved. # This file is part of "PodParser". PodParser is free software; # you can redistribute it and/or modify it under the same terms # as Perl itself. ############################################################################# package Pod::ParserText; $VERSION = 2.00; ## Current version of this package require 5.002; ## requires Perl version 5.002 or later =head1 NAME pod2text - function to convert POD data to formatted ASCII text Pod::ParserText - a class for converting POD data to formatted ASCII text =head1 SYNOPSIS use Pod::ParserText; pod2text("perlfunc.pod"); or use Pod::ParserText; package MyParser; @ISA = qw(Pod::ParserText); sub new { ## constructor code ... } ## implementation of appropriate subclass methods ... package main; $parser = new MyParser; @ARGV = ('-') unless (@ARGV > 0); for (@ARGV) { $parser->parse_from_file($_); } =head1 DESCRIPTION Pod::ParserText is a module that can convert documentation in the POD format (such as can be found throughout the Perl distribution) into formatted ASCII. Termcap is optionally supported for boldface/underline, and can be enabled via C<$Pod::ParserText::termcap=1>. If termcap has not been enabled, then backspaces will be used to simulate bold and underlined text. A separate F<pod2text> program is included that is primarily a wrapper for C<Pod::ParserText::pod2text()>. The single function C<pod2text()> can take one or two arguments. The first should be the name of a file to read the pod from, or "<&STDIN" to read from STDIN. A second argument, if provided, should be a filehandle glob where output should be sent. =head1 SEE ALSO L<Pod::Parser>. =head1 AUTHOR Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> Modified to derive from B<Pod::Parser> by Brad Appleton E<lt>Brad_Appleton-GBDA001@email.mot.comE<gt> =cut ############################################################################# use Exporter (); use Term::Cap; use Pod::Parser; @ISA = qw(Exporter Pod::Parser); @EXPORT = qw(&pod2text); %HTML_Escapes = ( 'amp' => '&', # ampersand 'lt' => '<', # left chevron, less-than 'gt' => '>', # right chevron, greater-than 'quot' => '"', # double quote "Aacute" => "\xC1", # capital A, acute accent "aacute" => "\xE1", # small a, acute accent "Acirc" => "\xC2", # capital A, circumflex accent "acirc" => "\xE2", # small a, circumflex accent "AElig" => "\xC6", # capital AE diphthong (ligature) "aelig" => "\xE6", # small ae diphthong (ligature) "Agrave" => "\xC0", # capital A, grave accent "agrave" => "\xE0", # small a, grave accent "Aring" => "\xC5", # capital A, ring "aring" => "\xE5", # small a, ring "Atilde" => "\xC3", # capital A, tilde "atilde" => "\xE3", # small a, tilde "Auml" => "\xC4", # capital A, dieresis or umlaut mark "auml" => "\xE4", # small a, dieresis or umlaut mark "Ccedil" => "\xC7", # capital C, cedilla "ccedil" => "\xE7", # small c, cedilla "Eacute" => "\xC9", # capital E, acute accent "eacute" => "\xE9", # small e, acute accent "Ecirc" => "\xCA", # capital E, circumflex accent "ecirc" => "\xEA", # small e, circumflex accent "Egrave" => "\xC8", # capital E, grave accent "egrave" => "\xE8", # small e, grave accent "ETH" => "\xD0", # capital Eth, Icelandic "eth" => "\xF0", # small eth, Icelandic "Euml" => "\xCB", # capital E, dieresis or umlaut mark "euml" => "\xEB", # small e, dieresis or umlaut mark "Iacute" => "\xCD", # capital I, acute accent "iacute" => "\xED", # small i, acute accent "Icirc" => "\xCE", # capital I, circumflex accent "icirc" => "\xEE", # small i, circumflex accent "Igrave" => "\xCD", # capital I, grave accent "igrave" => "\xED", # small i, grave accent "Iuml" => "\xCF", # capital I, dieresis or umlaut mark "iuml" => "\xEF", # small i, dieresis or umlaut mark "Ntilde" => "\xD1", # capital N, tilde "ntilde" => "\xF1", # small n, tilde "Oacute" => "\xD3", # capital O, acute accent "oacute" => "\xF3", # small o, acute accent "Ocirc" => "\xD4", # capital O, circumflex accent "ocirc" => "\xF4", # small o, circumflex accent "Ograve" => "\xD2", # capital O, grave accent "ograve" => "\xF2", # small o, grave accent "Oslash" => "\xD8", # capital O, slash "oslash" => "\xF8", # small o, slash "Otilde" => "\xD5", # capital O, tilde "otilde" => "\xF5", # small o, tilde "Ouml" => "\xD6", # capital O, dieresis or umlaut mark "ouml" => "\xF6", # small o, dieresis or umlaut mark "szlig" => "\xDF", # small sharp s, German (sz ligature) "THORN" => "\xDE", # capital THORN, Icelandic "thorn" => "\xFE", # small thorn, Icelandic "Uacute" => "\xDA", # capital U, acute accent "uacute" => "\xFA", # small u, acute accent "Ucirc" => "\xDB", # capital U, circumflex accent "ucirc" => "\xFB", # small u, circumflex accent "Ugrave" => "\xD9", # capital U, grave accent "ugrave" => "\xF9", # small u, grave accent "Uuml" => "\xDC", # capital U, dieresis or umlaut mark "uuml" => "\xFC", # small u, dieresis or umlaut mark "Yacute" => "\xDD", # capital Y, acute accent "yacute" => "\xFD", # small y, acute accent "yuml" => "\xFF", # small y, dieresis or umlaut mark "lchevron" => "\xAB", # left chevron (double less than) "rchevron" => "\xBB", # right chevron (double greater than) ); use strict; use diagnostics; use Carp; ##--------------------------------- ## Function definitions begin here ##--------------------------------- sub version { no strict; return $VERSION; } sub pod2text { my ($infile, $outfile) = @_; local $_; my $text_parser = new Pod::ParserText; $text_parser->parse_from_file($infile, $outfile); } ##------------------------------- ## Method definitions begin here ##------------------------------- sub new { my $this = shift; my $class = ref($this) || $this; my %params = @_; my $self = {%params}; bless $self, $class; $self->initialize(); return $self; } sub initialize { my $self = shift; $self->SUPER::initialize(); return; } sub makespace { my $self = shift; my $out_fh = $self->{OUTPUT}; if ($self->{NEEDSPACE}) { print $out_fh "\n"; $self->{NEEDSPACE} = 0; } } sub bold { my $self = shift; my $line = shift; my $map = $self->{FONTMAP}; return $line if $self->{USE_FORMAT}; if ($self->{TERMCAP}) { $line = "$map->{BOLD}$line$map->{NORM}"; } else { $line =~ s/(.)/$1\b$1/g; } # $line = "$map->{BOLD}$line$map->{NORM}" if $self->{ANSIFY}; return $line; } sub italic { my $self = shift; my $line = shift; my $map = $self->{FONTMAP}; return $line if $self->{USE_FORMAT}; if ($self->{TERMCAP}) { $line = "$map->{UNDL}$line$map->{NORM}"; } else { $line =~ s/(.)/$1\b_/g; } # $line = "$map->{UNDL}$line$map->{NORM}" if $self->{ANSIFY}; return $line; } # Fill a paragraph including underlined and overstricken chars. # It's not perfect for words longer than the margin, and it's probably # slow, but it works. sub fill { my $self = shift; local $_ = shift; my $par = ""; my $indent_space = " " x $self->{INDENT}; my $marg = $self->{SCREEN} - $self->{INDENT}; my $line = $indent_space; my $line_length; foreach (split) { my $word_length = length; $word_length -= 2 while /\010/g; # Subtract backspaces if ($line_length + $word_length > $marg) { $par .= $line . "\n"; $line= $indent_space . $_; $line_length = $word_length; } else { if ($line_length) { $line_length++; $line .= " "; } $line_length += $word_length; $line .= $_; } } $par .= "$line\n" if $line; $par .= "\n"; return $par; } ## Handle a pending "item" paragraph. The lone argument (if given) is the ## corresponding item text. (the item tag should be in $self->{ITEM}). sub item { my $self = shift; local($_) = @_; return unless (defined $self->{ITEM}); my $out_fh = $self->{OUTPUT}; my $paratag = $self->{ITEM}; my $prev_indent = $self->{INDENTS}->[$#{$self->{INDEX}} - 1] || $self->{DEF_INDENT}; undef $self->{ITEM}; if ((defined $_) && ($_ ne '') && (length($paratag) + 3) < $self->{INDENT}) { if (/^=/) { # tricked! $self->output($paratag, INDENT => $prev_indent); } else { $self->IP_output($paratag, $_); } } else { $self->output($paratag, INDENT => $prev_indent); $self->output($_, REFORMAT => 1); } } sub remap_whitespace { my $self = shift; local($_) = shift; tr/\000-\177/\200-\377/; return $_; } sub unmap_whitespace { my $self = shift; local($_) = shift; tr/\200-\377/\000-\177/; return $_; } sub IP_output { my $self = shift; my $tag = shift; local($_) = @_; my $out_fh = $self->{OUTPUT}; my $tag_indent = $self->{INDENTS}->[$#{$self->{INDEX}} - 1] || $self->{DEF_INDENT}; my $tag_cols = $self->{SCREEN} - $tag_indent; my $cols = $self->{SCREEN} - $self->{INDENT}; $tag =~ s/\s*$//; s/\s+/ /g; s/^ //; my $fmt_name = '_Pod_Text_IP_output_format_'; my $str = "format $fmt_name = \n" . (" " x ($tag_indent)) . '@' . ('<' x ($self->{INDENT} - $tag_indent - 1)) . "^" . ("<" x ($cols - 1)) . "\n" . '$tag, $_' . "\n~~" . (" " x ($self->{INDENT} - 2)) . "^" . ("<" x ($cols - 5)) . "\n" . '$_' . "\n\n.\n1"; #warn $str; warn "tag is $tag, _ is $_"; { ## reset format (turn off warning about redefining a format) local($^W) = 0; eval $str; croak if ($@); } select((select($out_fh), $~ = $fmt_name)[0]); local($:) = ($self->{HEADINGS}->[0] eq "SYNOPSIS") ? "\n " : $: ; write $out_fh; } sub output { my $self = shift; local $_ = shift; my $out_fh = $self->{OUTPUT}; my %options; if (@_ > 1) { ## usage was $self->output($text, NAME=>VALUE, ...); %options = @_; } elsif (@_ == 1) { if (ref $_[0]) { ## usage was $self->output($text, { NAME=>VALUE, ... } ); %options = %{$_[0]}; } else { ## usage was $self->output($text, $number); $options{"REFORMAT"} = shift; } } $options{"INDENT"} = $self->{INDENT} unless (defined $options{"INDENT"}); if ((defined $options{"REFORMAT"}) && $options{"REFORMAT"}) { my $cols = $self->{SCREEN} - $options{"INDENT"}; s/\s+/ /g; s/^ //; my $fmt_name = '_Pod_Text_output_format_'; my $str = "format $fmt_name = \n~~" . (" " x ($options{"INDENT"} - 2)) . "^" . ("<" x ($cols - 5)) . "\n" . '$_' . "\n\n.\n1"; { ## reset format (turn off warning about redefining a format) local($^W) = 0; eval $str; croak if ($@); } select((select($out_fh), $~ = $fmt_name)[0]); local($:) = ($self->{HEADINGS}->[0] eq "SYNOPSIS") ? "\n " : $: ; write $out_fh; } else { s/^/' ' x $options{"INDENT"}/gem; s/^\s+\n$/\n/gm; print $out_fh $_; } } sub internal_lrefs { my $self = shift; local $_ = shift; s{L</([^>]+)>}{$1}g; my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); my $retstr = "the "; my $i; for ($i = 0; $i <= $#items; $i++) { $retstr .= "C<$items[$i]>"; $retstr .= ", " if @items > 2 && $i != $#items; $retstr .= " and " if $i+2 == @items; } $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) . " elsewhere in this document "; return $retstr; } sub begin_input { my $self = shift; #---------------------------------------------------- # This class may wish to make use of some of the # commented-out code below for initializing pragmas #---------------------------------------------------- # $self->{PRAGMAS} = { # FILL => 'on', # STYLE => 'plain', # INDENT => 0, # }; # ## Initialize all PREVIOUS_XXX pragma values # my ($name, $value); # for (($name, $value) = each %{$self->{PRAGMAS}}) { # $self->{PRAGMAS}->{"PREVIOUS_${name}"} = $value; # } #---------------------------------------------------- $self->{TERMCAP} = 0; #$self->{USE_FORMAT} = 1; $self->{FONTMAP} = { UNDL => "\x1b[4m", INV => "\x1b[7m", BOLD => "\x1b[1m", NORM => "\x1b[0m", }; if ($self->{TERMCAP} and (! defined $self->{SETUPTERMCAP})) { $self->{SETUPTERMCAP} = 1; my ($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 }; $self->{FONTMAP}->{UNDL} = $term->{'_us'}; $self->{FONTMAP}->{INV} = $term->{'_mr'}; $self->{FONTMAP}->{BOLD} = $term->{'_md'}; $self->{FONTMAP}->{NORM} = $term->{'_me'}; } $self->{SCREEN} = ((defined $ENV{TERMCAP}) && ($ENV{TERMCAP} =~ /co#(\d+)/)[0]) || ((defined $ENV{COLUMNS}) && $ENV{COLUMNS}) || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0] || 72; $self->{FANCY} = 0; $self->{DEF_INDENT} = 4; $self->{INDENTS} = []; $self->{INDENT} = $self->{DEF_INDENT}; $self->{INDEX} = []; $self->{NEEDSPACE} = 0; } sub end_input { my $self = shift; $self->item() if (defined $self->{ITEM}); } sub pragma { my $self = shift; ## convert remaining args to lowercase my $name = lc shift; my $value = lc shift; my $rc = 1; local($_); #---------------------------------------------------- # This class may wish to make use of some of the # commented-out code below for processing pragmas #---------------------------------------------------- # my ($abbrev, %abbrev_table); # if ($name eq 'fill') { # %abbrev_table = ('on' => 'on', # 'of' => 'off', # 'p' => 'previous'); # $value = 'on' unless ((defined $value) && ($value ne '')); # return $rc unless ($value =~ /^(on|of|p)/io); # $abbrev = $1; # $value = $abbrev_table{$abbrev}; # if ($value eq 'previous') { # $self->{PRAGMAS}->{FILL} = $self->{PRAGMAS}->{PREVIOUS_FILL}; # } # else { # $self->{PRAGMAS}->{PREVIOUS_FILL} = $self->{PRAGMAS}->{FILL}; # $self->{PRAGMAS}->{FILL} = $value; # } # } # elsif ($name eq 'style') { # %abbrev_table = ('b' => 'bold', # 'i' => 'italic', # 'c' => 'code', # 'pl' => 'plain', # 'pr' => 'previous'); # $value = 'plain' unless ((defined $value) && ($value ne '')); # return $rc unless ($value =~ /^(b|i|c|pl|pr)/io); # $abbrev = $1; # $value = $abbrev_table{$abbrev}; # if ($value eq 'previous') { # $self->{PRAGMAS}->{STYLE} = $self->{PRAGMAS}->{PREVIOUS_STYLE}; # } # else { # $self->{PRAGMAS}->{PREVIOUS_STYLE} = $self->{PRAGMAS}->{STYLE}; # $self->{PRAGMAS}->{STYLE} = $value; # } # } # elsif ($name eq 'indent') { # return $rc unless ((defined $value) && ($value =~ /^([-+]?)(\d*)$/o)); # my ($sign, $number) = ($1, $2); # $value .= 3 unless ((defined $number) && ($number ne '')); # $self->{PRAGMAS}->{PREVIOUS_INDENT} = $self->{PRAGMAS}->{INDENT}; # if ($sign) { # $self->{PRAGMAS}->{INDENT} += $value; # } # else { # $self->{PRAGMAS}->{INDENT} = $value; # } # } # else { # $rc = 0; # } #---------------------------------------------------- return $rc; } sub command { my $self = shift; my $cmd = shift; local $_ = shift; $cmd = '' unless (defined $cmd); $_ = '' unless (defined $_); my $out_fh = $self->{OUTPUT}; $_ = $self->interpolate($_); s/\s*$/\n/; $self->item() if (defined $self->{ITEM}); if ($cmd eq 'head1') { $self->makespace(); print $out_fh $_; # print $out_fh uc($_); } elsif ($cmd eq 'head2' or $cmd eq 'head2') { $self->makespace(); # s/(\w+)/\u\L$1/g; #print ' ' x $self->{DEF_INDENT}, $_; # print "\xA7"; s/(\w)/\xA7 $1/ if $self->{FANCY}; print $out_fh ' ' x ($self->{DEF_INDENT}/2), $_, "\n"; } elsif ($cmd eq 'over') { push(@{$self->{INDENTS}}, $self->{INDENT}); $self->{INDENT} += ($_ + 0) || $self->{DEF_INDENT}; } elsif ($cmd eq 'back') { $self->{INDENT} = pop(@{$self->{INDENTS}}); unless (defined $self->{INDENT}) { carp "Unmatched =back\n"; $self->{INDENT} = $self->{DEF_INDENT}; } $self->{NEEDSPACE} = 1; } elsif ($cmd eq 'item') { $self->makespace(); # s/\A(\s*)\*/$1\xb7/ if $self->{FANCY}; # s/^(\s*\*\s+)/$1 /; $self->{ITEM} = $_; } else { carp "Unrecognized directive: $cmd\n"; } } sub verbatim { my $self = shift; local $_ = shift; $self->item() if (defined $self->{ITEM}); $self->{NEEDSPACE} = 1; $self->output($_); } sub textblock { my $self = shift; my $text = shift; local($_) = $self->interpolate($text); if (defined $self->{ITEM}) { $self->item($_); } else { s/\s*$/\n/; $self->makespace(); $self->output($_, REFORMAT => 1); } } sub interior_sequence { my $self = shift; my $cmd = shift; my $arg = shift; local($_) = $arg; if ($cmd eq 'C') { no strict; ## dont complain about $HTML_Escapes without package prefix my ($pre, $post) = ("`", "'"); ($pre, $post) = ($HTML_Escapes{"lchevron"}, $HTML_Escapes{"rchevron"}) if ((defined $self->{FANCY}) && $self->{FANCY}); $_ = $pre . $_ . $post; } elsif ($cmd eq 'E') { no strict; ## dont complain about $HTML_Escapes without package prefix if (defined $HTML_Escapes{$_}) { $_ = $HTML_Escapes{$_}; } else { carp "Unknown escape: E<$_>"; $_ = "E<$_>"; } # } # elsif ($cmd eq 'B') { # $_ = $self->bold($_); } elsif ($cmd eq 'I') { # $_ = $self->italic($_); $_ = "*" . $_ . "*"; } elsif (($cmd eq 'X') || ($cmd eq 'Z')) { $_ = ''; } elsif ($cmd eq 'S') { # Escape whitespace until we are ready to print #$_ = $self->remap_whitespace($_); } elsif ($cmd eq 'L') { s/\s+/ /g; my ($manpage, $sec, $ref) = ($_, '', ''); if (/^\s*"\s*(.*)\s*"\s*$/o) { ($manpage, $sec) = ('', "\"$1\""); } elsif (m|\s*/\s*|o) { ($manpage, $sec) = ($`, $'); } if ($sec eq '') { $ref .= "the $manpage manpage" if ($manpage ne ''); } elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) { $ref .= "the section on \"$1\""; $ref .= " in the $manpage manpage" if ($manpage ne ''); } else { $ref .= "the \"$sec\" entry"; $ref .= ($manpage eq '') ? " in this manpage" : " in the $manpage manpage"; } $_ = $ref; #if ( m{^ ([a-zA-Z][^\s\/]+) ($[^$]+\))? $}x ) { # ## LREF: a manpage(3f) # $_ = "the $1$2 manpage"; #} #elsif ( m{^ ([^/]+) / ([:\w]+()?) $}x ) { # ## LREF: an =item on another manpage # $_ = "the \"$2\" entry in the $1 manpage"; #} #elsif ( m{^ / ([:\w]+()?) $}x ) { # ## LREF: an =item on this manpage # $_ = $self->internal_lrefs($1); #} #elsif ( m{^ (?: ([a-zA-Z]\S+?) / )? "?(.*?)"? $}x ) { # ## LREF: a =head2 (head1?), maybe on a manpage, maybe right here # ## the "func" can disambiguate # $_ = ((defined $1) && $1) # ? "the section on \"$2\" in the $1 manpage" # : "the section on \"$2\""; #} } return $_; } 1;