home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl -w
-
- # This program will print out all <a href=".."> links in a
- # document together with the text that goes with it.
-
- use HTML::Parser;
-
- my $p = HTML::Parser->new(api_version => 3,
- start_h => [\&a_start_handler, "self,tagname,attr"],
- report_tags => [qw(a img)],
- );
- $p->parse_file(shift || die) || die $!;
-
- sub a_start_handler
- {
- my($self, $tag, $attr) = @_;
- return unless $tag eq "a";
- return unless exists $attr->{href};
- print "A $attr->{href}\n";
-
- $self->handler(text => [], '@{dtext}' );
- $self->handler(start => \&img_handler);
- $self->handler(end => \&a_end_handler, "self,tagname");
- }
-
- sub img_handler
- {
- my($self, $tag, $attr) = @_;
- return unless $tag eq "img";
- push(@{$self->handler("text")}, $attr->{alt} || "[IMG]");
- }
-
- sub a_end_handler
- {
- my($self, $tag) = @_;
- my $text = join("", @{$self->handler("text")});
- $text =~ s/^\s+//;
- $text =~ s/\s+$//;
- $text =~ s/\s+/ /g;
- print "T $text\n";
-
- $self->handler("text", undef);
- $self->handler("start", \&a_start_handler);
- $self->handler("end", undef);
- }
-
-