home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Acorn User 10
/
AU_CD10.iso
/
Updates
/
Perl
/
Non-RPC
/
!Perl
/
lib
/
site_perl
/
POD
/
ParserText.pm
< prev
Wrap
Text File
|
1998-05-02
|
22KB
|
684 lines
#############################################################################
# Text.pm -- convert POD data to formatted ASCII text
#
# Derived from Tom Christiansen's Pod::Text module
# (with extensive modifications).
#
# Copyright (C) 1994-1996 Tom Christiansen. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
#############################################################################
package Pod::ParserText;
$VERSION = 2.00; ## Current version of this package
require 5.002; ## requires Perl version 5.002 or later
=head1 NAME
pod2text - function to convert POD data to formatted ASCII text
Pod::ParserText - a class for converting POD data to formatted ASCII text
=head1 SYNOPSIS
use Pod::ParserText;
pod2text("perlfunc.pod");
or
use Pod::ParserText;
package MyParser;
@ISA = qw(Pod::ParserText);
sub new {
## constructor code ...
}
## implementation of appropriate subclass methods ...
package main;
$parser = new MyParser;
@ARGV = ('-') unless (@ARGV > 0);
for (@ARGV) {
$parser->parse_from_file($_);
}
=head1 DESCRIPTION
Pod::ParserText is a module that can convert documentation in the POD
format (such as can be found throughout the Perl distribution) into
formatted ASCII. Termcap is optionally supported for
boldface/underline, and can be enabled via C<$Pod::ParserText::termcap=1>.
If termcap has not been enabled, then backspaces will be used to
simulate bold and underlined text.
A separate F<pod2text> program is included that is primarily a wrapper for
C<Pod::ParserText::pod2text()>.
The single function C<pod2text()> can take one or two arguments. The first
should be the name of a file to read the pod from, or "<&STDIN" to read from
STDIN. A second argument, if provided, should be a filehandle glob where
output should be sent.
=head1 SEE ALSO
L<Pod::Parser>.
=head1 AUTHOR
Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
Modified to derive from B<Pod::Parser> by
Brad Appleton E<lt>Brad_Appleton-GBDA001@email.mot.comE<gt>
=cut
#############################################################################
use Exporter ();
use Term::Cap;
use Pod::Parser;
@ISA = qw(Exporter Pod::Parser);
@EXPORT = qw(&pod2text);
%HTML_Escapes = (
'amp' => '&', # ampersand
'lt' => '<', # left chevron, less-than
'gt' => '>', # right chevron, greater-than
'quot' => '"', # double quote
"Aacute" => "\xC1", # capital A, acute accent
"aacute" => "\xE1", # small a, acute accent
"Acirc" => "\xC2", # capital A, circumflex accent
"acirc" => "\xE2", # small a, circumflex accent
"AElig" => "\xC6", # capital AE diphthong (ligature)
"aelig" => "\xE6", # small ae diphthong (ligature)
"Agrave" => "\xC0", # capital A, grave accent
"agrave" => "\xE0", # small a, grave accent
"Aring" => "\xC5", # capital A, ring
"aring" => "\xE5", # small a, ring
"Atilde" => "\xC3", # capital A, tilde
"atilde" => "\xE3", # small a, tilde
"Auml" => "\xC4", # capital A, dieresis or umlaut mark
"auml" => "\xE4", # small a, dieresis or umlaut mark
"Ccedil" => "\xC7", # capital C, cedilla
"ccedil" => "\xE7", # small c, cedilla
"Eacute" => "\xC9", # capital E, acute accent
"eacute" => "\xE9", # small e, acute accent
"Ecirc" => "\xCA", # capital E, circumflex accent
"ecirc" => "\xEA", # small e, circumflex accent
"Egrave" => "\xC8", # capital E, grave accent
"egrave" => "\xE8", # small e, grave accent
"ETH" => "\xD0", # capital Eth, Icelandic
"eth" => "\xF0", # small eth, Icelandic
"Euml" => "\xCB", # capital E, dieresis or umlaut mark
"euml" => "\xEB", # small e, dieresis or umlaut mark
"Iacute" => "\xCD", # capital I, acute accent
"iacute" => "\xED", # small i, acute accent
"Icirc" => "\xCE", # capital I, circumflex accent
"icirc" => "\xEE", # small i, circumflex accent
"Igrave" => "\xCD", # capital I, grave accent
"igrave" => "\xED", # small i, grave accent
"Iuml" => "\xCF", # capital I, dieresis or umlaut mark
"iuml" => "\xEF", # small i, dieresis or umlaut mark
"Ntilde" => "\xD1", # capital N, tilde
"ntilde" => "\xF1", # small n, tilde
"Oacute" => "\xD3", # capital O, acute accent
"oacute" => "\xF3", # small o, acute accent
"Ocirc" => "\xD4", # capital O, circumflex accent
"ocirc" => "\xF4", # small o, circumflex accent
"Ograve" => "\xD2", # capital O, grave accent
"ograve" => "\xF2", # small o, grave accent
"Oslash" => "\xD8", # capital O, slash
"oslash" => "\xF8", # small o, slash
"Otilde" => "\xD5", # capital O, tilde
"otilde" => "\xF5", # small o, tilde
"Ouml" => "\xD6", # capital O, dieresis or umlaut mark
"ouml" => "\xF6", # small o, dieresis or umlaut mark
"szlig" => "\xDF", # small sharp s, German (sz ligature)
"THORN" => "\xDE", # capital THORN, Icelandic
"thorn" => "\xFE", # small thorn, Icelandic
"Uacute" => "\xDA", # capital U, acute accent
"uacute" => "\xFA", # small u, acute accent
"Ucirc" => "\xDB", # capital U, circumflex accent
"ucirc" => "\xFB", # small u, circumflex accent
"Ugrave" => "\xD9", # capital U, grave accent
"ugrave" => "\xF9", # small u, grave accent
"Uuml" => "\xDC", # capital U, dieresis or umlaut mark
"uuml" => "\xFC", # small u, dieresis or umlaut mark
"Yacute" => "\xDD", # capital Y, acute accent
"yacute" => "\xFD", # small y, acute accent
"yuml" => "\xFF", # small y, dieresis or umlaut mark
"lchevron" => "\xAB", # left chevron (double less than)
"rchevron" => "\xBB", # right chevron (double greater than)
);
use strict;
use diagnostics;
use Carp;
##---------------------------------
## Function definitions begin here
##---------------------------------
sub version {
no strict;
return $VERSION;
}
sub pod2text {
my ($infile, $outfile) = @_;
local $_;
my $text_parser = new Pod::ParserText;
$text_parser->parse_from_file($infile, $outfile);
}
##-------------------------------
## Method definitions begin here
##-------------------------------
sub new {
my $this = shift;
my $class = ref($this) || $this;
my %params = @_;
my $self = {%params};
bless $self, $class;
$self->initialize();
return $self;
}
sub initialize {
my $self = shift;
$self->SUPER::initialize();
return;
}
sub makespace {
my $self = shift;
my $out_fh = $self->{OUTPUT};
if ($self->{NEEDSPACE}) {
print $out_fh "\n";
$self->{NEEDSPACE} = 0;
}
}
sub bold {
my $self = shift;
my $line = shift;
my $map = $self->{FONTMAP};
return $line if $self->{USE_FORMAT};
if ($self->{TERMCAP}) {
$line = "$map->{BOLD}$line$map->{NORM}";
}
else {
$line =~ s/(.)/$1\b$1/g;
}
# $line = "$map->{BOLD}$line$map->{NORM}" if $self->{ANSIFY};
return $line;
}
sub italic {
my $self = shift;
my $line = shift;
my $map = $self->{FONTMAP};
return $line if $self->{USE_FORMAT};
if ($self->{TERMCAP}) {
$line = "$map->{UNDL}$line$map->{NORM}";
}
else {
$line =~ s/(.)/$1\b_/g;
}
# $line = "$map->{UNDL}$line$map->{NORM}" if $self->{ANSIFY};
return $line;
}
# Fill a paragraph including underlined and overstricken chars.
# It's not perfect for words longer than the margin, and it's probably
# slow, but it works.
sub fill {
my $self = shift;
local $_ = shift;
my $par = "";
my $indent_space = " " x $self->{INDENT};
my $marg = $self->{SCREEN} - $self->{INDENT};
my $line = $indent_space;
my $line_length;
foreach (split) {
my $word_length = length;
$word_length -= 2 while /\010/g; # Subtract backspaces
if ($line_length + $word_length > $marg) {
$par .= $line . "\n";
$line= $indent_space . $_;
$line_length = $word_length;
}
else {
if ($line_length) {
$line_length++;
$line .= " ";
}
$line_length += $word_length;
$line .= $_;
}
}
$par .= "$line\n" if $line;
$par .= "\n";
return $par;
}
## Handle a pending "item" paragraph. The lone argument (if given) is the
## corresponding item text. (the item tag should be in $self->{ITEM}).
sub item {
my $self = shift;
local($_) = @_;
return unless (defined $self->{ITEM});
my $out_fh = $self->{OUTPUT};
my $paratag = $self->{ITEM};
my $prev_indent = $self->{INDENTS}->[$#{$self->{INDEX}} - 1]
|| $self->{DEF_INDENT};
undef $self->{ITEM};
if ((defined $_) && ($_ ne '')
&& (length($paratag) + 3) < $self->{INDENT}) {
if (/^=/) { # tricked!
$self->output($paratag, INDENT => $prev_indent);
}
else {
$self->IP_output($paratag, $_);
}
}
else {
$self->output($paratag, INDENT => $prev_indent);
$self->output($_, REFORMAT => 1);
}
}
sub remap_whitespace {
my $self = shift;
local($_) = shift;
tr/\000-\177/\200-\377/;
return $_;
}
sub unmap_whitespace {
my $self = shift;
local($_) = shift;
tr/\200-\377/\000-\177/;
return $_;
}
sub IP_output {
my $self = shift;
my $tag = shift;
local($_) = @_;
my $out_fh = $self->{OUTPUT};
my $tag_indent = $self->{INDENTS}->[$#{$self->{INDEX}} - 1]
|| $self->{DEF_INDENT};
my $tag_cols = $self->{SCREEN} - $tag_indent;
my $cols = $self->{SCREEN} - $self->{INDENT};
$tag =~ s/\s*$//;
s/\s+/ /g;
s/^ //;
my $fmt_name = '_Pod_Text_IP_output_format_';
my $str = "format $fmt_name = \n"
. (" " x ($tag_indent))
. '@' . ('<' x ($self->{INDENT} - $tag_indent - 1))
. "^" . ("<" x ($cols - 1)) . "\n"
. '$tag, $_'
. "\n~~"
. (" " x ($self->{INDENT} - 2))
. "^" . ("<" x ($cols - 5)) . "\n"
. '$_' . "\n\n.\n1";
#warn $str; warn "tag is $tag, _ is $_";
{
## reset format (turn off warning about redefining a format)
local($^W) = 0;
eval $str;
croak if ($@);
}
select((select($out_fh), $~ = $fmt_name)[0]);
local($:) = ($self->{HEADINGS}->[0] eq "SYNOPSIS") ? "\n " : $: ;
write $out_fh;
}
sub output {
my $self = shift;
local $_ = shift;
my $out_fh = $self->{OUTPUT};
my %options;
if (@_ > 1) {
## usage was $self->output($text, NAME=>VALUE, ...);
%options = @_;
}
elsif (@_ == 1) {
if (ref $_[0]) {
## usage was $self->output($text, { NAME=>VALUE, ... } );
%options = %{$_[0]};
}
else {
## usage was $self->output($text, $number);
$options{"REFORMAT"} = shift;
}
}
$options{"INDENT"} = $self->{INDENT} unless (defined $options{"INDENT"});
if ((defined $options{"REFORMAT"}) && $options{"REFORMAT"}) {
my $cols = $self->{SCREEN} - $options{"INDENT"};
s/\s+/ /g;
s/^ //;
my $fmt_name = '_Pod_Text_output_format_';
my $str = "format $fmt_name = \n~~"
. (" " x ($options{"INDENT"} - 2))
. "^" . ("<" x ($cols - 5)) . "\n"
. '$_' . "\n\n.\n1";
{
## reset format (turn off warning about redefining a format)
local($^W) = 0;
eval $str;
croak if ($@);
}
select((select($out_fh), $~ = $fmt_name)[0]);
local($:) = ($self->{HEADINGS}->[0] eq "SYNOPSIS") ? "\n " : $: ;
write $out_fh;
}
else {
s/^/' ' x $options{"INDENT"}/gem;
s/^\s+\n$/\n/gm;
print $out_fh $_;
}
}
sub internal_lrefs {
my $self = shift;
local $_ = shift;
s{L</([^>]+)>}{$1}g;
my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
my $retstr = "the ";
my $i;
for ($i = 0; $i <= $#items; $i++) {
$retstr .= "C<$items[$i]>";
$retstr .= ", " if @items > 2 && $i != $#items;
$retstr .= " and " if $i+2 == @items;
}
$retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
. " elsewhere in this document ";
return $retstr;
}
sub begin_input {
my $self = shift;
#----------------------------------------------------
# This class may wish to make use of some of the
# commented-out code below for initializing pragmas
#----------------------------------------------------
# $self->{PRAGMAS} = {
# FILL => 'on',
# STYLE => 'plain',
# INDENT => 0,
# };
# ## Initialize all PREVIOUS_XXX pragma values
# my ($name, $value);
# for (($name, $value) = each %{$self->{PRAGMAS}}) {
# $self->{PRAGMAS}->{"PREVIOUS_${name}"} = $value;
# }
#----------------------------------------------------
$self->{TERMCAP} = 0;
#$self->{USE_FORMAT} = 1;
$self->{FONTMAP} = {
UNDL => "\x1b[4m",
INV => "\x1b[7m",
BOLD => "\x1b[1m",
NORM => "\x1b[0m",
};
if ($self->{TERMCAP} and (! defined $self->{SETUPTERMCAP})) {
$self->{SETUPTERMCAP} = 1;
my ($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
$self->{FONTMAP}->{UNDL} = $term->{'_us'};
$self->{FONTMAP}->{INV} = $term->{'_mr'};
$self->{FONTMAP}->{BOLD} = $term->{'_md'};
$self->{FONTMAP}->{NORM} = $term->{'_me'};
}
$self->{SCREEN} =
((defined $ENV{TERMCAP}) && ($ENV{TERMCAP} =~ /co#(\d+)/)[0])
|| ((defined $ENV{COLUMNS}) && $ENV{COLUMNS})
|| (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
|| 72;
$self->{FANCY} = 0;
$self->{DEF_INDENT} = 4;
$self->{INDENTS} = [];
$self->{INDENT} = $self->{DEF_INDENT};
$self->{INDEX} = [];
$self->{NEEDSPACE} = 0;
}
sub end_input {
my $self = shift;
$self->item() if (defined $self->{ITEM});
}
sub pragma {
my $self = shift;
## convert remaining args to lowercase
my $name = lc shift;
my $value = lc shift;
my $rc = 1;
local($_);
#----------------------------------------------------
# This class may wish to make use of some of the
# commented-out code below for processing pragmas
#----------------------------------------------------
# my ($abbrev, %abbrev_table);
# if ($name eq 'fill') {
# %abbrev_table = ('on' => 'on',
# 'of' => 'off',
# 'p' => 'previous');
# $value = 'on' unless ((defined $value) && ($value ne ''));
# return $rc unless ($value =~ /^(on|of|p)/io);
# $abbrev = $1;
# $value = $abbrev_table{$abbrev};
# if ($value eq 'previous') {
# $self->{PRAGMAS}->{FILL} = $self->{PRAGMAS}->{PREVIOUS_FILL};
# }
# else {
# $self->{PRAGMAS}->{PREVIOUS_FILL} = $self->{PRAGMAS}->{FILL};
# $self->{PRAGMAS}->{FILL} = $value;
# }
# }
# elsif ($name eq 'style') {
# %abbrev_table = ('b' => 'bold',
# 'i' => 'italic',
# 'c' => 'code',
# 'pl' => 'plain',
# 'pr' => 'previous');
# $value = 'plain' unless ((defined $value) && ($value ne ''));
# return $rc unless ($value =~ /^(b|i|c|pl|pr)/io);
# $abbrev = $1;
# $value = $abbrev_table{$abbrev};
# if ($value eq 'previous') {
# $self->{PRAGMAS}->{STYLE} = $self->{PRAGMAS}->{PREVIOUS_STYLE};
# }
# else {
# $self->{PRAGMAS}->{PREVIOUS_STYLE} = $self->{PRAGMAS}->{STYLE};
# $self->{PRAGMAS}->{STYLE} = $value;
# }
# }
# elsif ($name eq 'indent') {
# return $rc unless ((defined $value) && ($value =~ /^([-+]?)(\d*)$/o));
# my ($sign, $number) = ($1, $2);
# $value .= 3 unless ((defined $number) && ($number ne ''));
# $self->{PRAGMAS}->{PREVIOUS_INDENT} = $self->{PRAGMAS}->{INDENT};
# if ($sign) {
# $self->{PRAGMAS}->{INDENT} += $value;
# }
# else {
# $self->{PRAGMAS}->{INDENT} = $value;
# }
# }
# else {
# $rc = 0;
# }
#----------------------------------------------------
return $rc;
}
sub command {
my $self = shift;
my $cmd = shift;
local $_ = shift;
$cmd = '' unless (defined $cmd);
$_ = '' unless (defined $_);
my $out_fh = $self->{OUTPUT};
$_ = $self->interpolate($_);
s/\s*$/\n/;
$self->item() if (defined $self->{ITEM});
if ($cmd eq 'head1') {
$self->makespace();
print $out_fh $_;
# print $out_fh uc($_);
}
elsif ($cmd eq 'head2' or $cmd eq 'head2') {
$self->makespace();
# s/(\w+)/\u\L$1/g;
#print ' ' x $self->{DEF_INDENT}, $_;
# print "\xA7";
s/(\w)/\xA7 $1/ if $self->{FANCY};
print $out_fh ' ' x ($self->{DEF_INDENT}/2), $_, "\n";
}
elsif ($cmd eq 'over') {
push(@{$self->{INDENTS}}, $self->{INDENT});
$self->{INDENT} += ($_ + 0) || $self->{DEF_INDENT};
}
elsif ($cmd eq 'back') {
$self->{INDENT} = pop(@{$self->{INDENTS}});
unless (defined $self->{INDENT}) {
carp "Unmatched =back\n";
$self->{INDENT} = $self->{DEF_INDENT};
}
$self->{NEEDSPACE} = 1;
}
elsif ($cmd eq 'item') {
$self->makespace();
# s/\A(\s*)\*/$1\xb7/ if $self->{FANCY};
# s/^(\s*\*\s+)/$1 /;
$self->{ITEM} = $_;
}
else {
carp "Unrecognized directive: $cmd\n";
}
}
sub verbatim {
my $self = shift;
local $_ = shift;
$self->item() if (defined $self->{ITEM});
$self->{NEEDSPACE} = 1;
$self->output($_);
}
sub textblock {
my $self = shift;
my $text = shift;
local($_) = $self->interpolate($text);
if (defined $self->{ITEM}) {
$self->item($_);
}
else {
s/\s*$/\n/;
$self->makespace();
$self->output($_, REFORMAT => 1);
}
}
sub interior_sequence {
my $self = shift;
my $cmd = shift;
my $arg = shift;
local($_) = $arg;
if ($cmd eq 'C') {
no strict; ## dont complain about $HTML_Escapes without package prefix
my ($pre, $post) = ("`", "'");
($pre, $post) = ($HTML_Escapes{"lchevron"}, $HTML_Escapes{"rchevron"})
if ((defined $self->{FANCY}) && $self->{FANCY});
$_ = $pre . $_ . $post;
}
elsif ($cmd eq 'E') {
no strict; ## dont complain about $HTML_Escapes without package prefix
if (defined $HTML_Escapes{$_}) {
$_ = $HTML_Escapes{$_};
}
else {
carp "Unknown escape: E<$_>";
$_ = "E<$_>";
}
# }
# elsif ($cmd eq 'B') {
# $_ = $self->bold($_);
}
elsif ($cmd eq 'I') {
# $_ = $self->italic($_);
$_ = "*" . $_ . "*";
}
elsif (($cmd eq 'X') || ($cmd eq 'Z')) {
$_ = '';
}
elsif ($cmd eq 'S') {
# Escape whitespace until we are ready to print
#$_ = $self->remap_whitespace($_);
}
elsif ($cmd eq 'L') {
s/\s+/ /g;
my ($manpage, $sec, $ref) = ($_, '', '');
if (/^\s*"\s*(.*)\s*"\s*$/o) {
($manpage, $sec) = ('', "\"$1\"");
}
elsif (m|\s*/\s*|o) {
($manpage, $sec) = ($`, $');
}
if ($sec eq '') {
$ref .= "the $manpage manpage" if ($manpage ne '');
}
elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) {
$ref .= "the section on \"$1\"";
$ref .= " in the $manpage manpage" if ($manpage ne '');
}
else {
$ref .= "the \"$sec\" entry";
$ref .= ($manpage eq '') ? " in this manpage"
: " in the $manpage manpage";
}
$_ = $ref;
#if ( m{^ ([a-zA-Z][^\s\/]+) (\([^\)]+\))? $}x ) {
# ## LREF: a manpage(3f)
# $_ = "the $1$2 manpage";
#}
#elsif ( m{^ ([^/]+) / ([:\w]+(\(\))?) $}x ) {
# ## LREF: an =item on another manpage
# $_ = "the \"$2\" entry in the $1 manpage";
#}
#elsif ( m{^ / ([:\w]+(\(\))?) $}x ) {
# ## LREF: an =item on this manpage
# $_ = $self->internal_lrefs($1);
#}
#elsif ( m{^ (?: ([a-zA-Z]\S+?) / )? "?(.*?)"? $}x ) {
# ## LREF: a =head2 (head1?), maybe on a manpage, maybe right here
# ## the "func" can disambiguate
# $_ = ((defined $1) && $1)
# ? "the section on \"$2\" in the $1 manpage"
# : "the section on \"$2\"";
#}
}
return $_;
}
1;