home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl -w
-
- # See also HTML::Form module
-
- use HTML::PullParser ();
- use HTML::Entities qw(decode_entities);
- use Data::Dumper qw(Dumper);
-
- my @FORM_TAGS = qw(form input textarea button select option);
-
- my $p = HTML::PullParser->new(file => shift || "xxx.html",
- start => 'tag, attr',
- end => 'tag',
- text => '@{text}',
- report_tags => \@FORM_TAGS,
- ) || die "$!";
-
- # a little helper function
- sub get_text {
- my($p, $stop) = @_;
- my $text;
- while (defined(my $t = $p->get_token)) {
- if (ref $t) {
- $p->unget_token($t) unless $t->[0] eq $stop;
- last;
- }
- else {
- $text .= $t;
- }
- }
- return $text;
- }
-
- my @forms;
- while (defined(my $t = $p->get_token)) {
- next unless ref $t; # skip text
- if ($t->[0] eq "form") {
- shift @$t;
- push(@forms, $t);
- while (defined(my $t = $p->get_token)) {
- next unless ref $t; # skip text
- last if $t->[0] eq "/form";
- if ($t->[0] eq "select") {
- my $sel = $t;
- push(@{$forms[-1]}, $t);
- while (defined(my $t = $p->get_token)) {
- next unless ref $t; # skip text
- last if $t->[0] eq "/select";
- #print "select ", Dumper($t), "\n";
- if ($t->[0] eq "option") {
- my $value = $t->[1]->{value};
- my $text = get_text($p, "/option");
- unless (defined $value) {
- $value = decode_entities($text);
- }
- push(@$sel, $value);
- }
- else {
- warn "$t->[0] inside select";
- }
- }
- }
- elsif ($t->[0] =~ /^\/?option$/) {
- warn "option tag outside select";
- }
- elsif ($t->[0] eq "textarea") {
- push(@{$forms[-1]}, $t);
- $t->[1]{value} = get_text($p, "/textarea");
- }
- elsif ($t->[0] =~ m,^/,) {
- warn "stray $t->[0] tag";
- }
- else {
- push(@{$forms[-1]}, $t);
- }
- }
- }
- else {
- warn "form tag $t->[0] outside form";
- }
- }
-
- print Dumper(\@forms), "\n";
-