home *** CD-ROM | disk | FTP | other *** search
- # $Id: Reader.pm,v 1.9 2002/02/03 12:25:42 matt Exp $
-
- package XML::SAX::PurePerl::Reader;
-
- use strict;
- use XML::SAX::PurePerl::Reader::URI;
- use XML::SAX::PurePerl::Productions qw( $SingleChar $Letter $NameChar );
- use Exporter ();
-
- use vars qw(@ISA @EXPORT_OK);
- @ISA = qw(Exporter);
- @EXPORT_OK = qw(
- EOF
- BUFFER
- INTERNAL_BUFFER
- LINE
- COLUMN
- CURRENT
- ENCODING
- );
-
- use constant EOF => 0;
- use constant BUFFER => 1;
- use constant INTERNAL_BUFFER => 2;
- use constant LINE => 3;
- use constant COLUMN => 4;
- use constant MATCHED => 5;
- use constant CURRENT => 6;
- use constant CONSUMED => 7;
- use constant ENCODING => 8;
- use constant SYSTEM_ID => 9;
- use constant PUBLIC_ID => 10;
-
- require XML::SAX::PurePerl::Reader::Stream;
- require XML::SAX::PurePerl::Reader::String;
-
- if ($] >= 5.007002) {
- require XML::SAX::PurePerl::Reader::UnicodeExt;
- }
- else {
- require XML::SAX::PurePerl::Reader::NoUnicodeExt;
- }
-
- sub new {
- my $class = shift;
- my $thing = shift;
-
- # try to figure if this $thing is a handle of some sort
- if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) {
- return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
- }
- my $ioref;
- if (tied($thing)) {
- my $class = ref($thing);
- no strict 'refs';
- $ioref = $thing if defined &{"${class}::TIEHANDLE"};
- }
- else {
- eval {
- $ioref = *{$thing}{IO};
- };
- undef $@;
- }
- if ($ioref) {
- return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
- }
-
- if ($thing =~ /</) {
- # assume it's a string
- return XML::SAX::PurePerl::Reader::String->new($thing)->init;
- }
-
- # assume it is a uri
- return XML::SAX::PurePerl::Reader::URI->new($thing)->init;
- }
-
- sub init {
- my $self = shift;
- $self->[LINE] = 1;
- $self->[COLUMN] = 1;
- $self->nextchar;
- return $self;
- }
-
- sub match {
- my $self = shift;
- if ($self->match_nocheck(@_)) {
- if ($self->[MATCHED] =~ $SingleChar) {
- return 1;
- }
- throw XML::SAX::Exception::Parse (
- Message => "Not a valid XML character: '".
- sprintf("%X", ord($self->[MATCHED])).
- ";'"
- );
- }
- return 0;
- }
-
- sub match_char {
- my $self = shift;
-
- if (defined($self->[CURRENT]) && $self->[CURRENT] eq $_[0]) {
- $self->[MATCHED] = $_[0];
- $self->nextchar;
- return 1;
- }
- $self->[MATCHED] = '';
- return 0;
- }
-
- sub match_re {
- my $self = shift;
-
- if ($self->[CURRENT] =~ $_[0]) {
- $self->[MATCHED] = $self->[CURRENT];
- $self->nextchar;
- return 1;
- }
- $self->[MATCHED] = '';
- return 0;
- }
-
- sub match_not {
- my $self = shift;
-
- my $current = $self->[CURRENT];
- return 0 unless defined $current;
-
- for my $m (@_) {
- if ($current eq $m) {
- $self->[MATCHED] = '';
- return 0;
- }
- }
- $self->[MATCHED] = $current;
- $self->nextchar;
- return 1;
- }
-
- my %hist;
- END {
- foreach my $k (sort { $hist{$a} <=> $hist{$b} } keys %hist ) {
- my $x = $k;
- $k =~ s/^(.{80})(.{3}).*/$1\.\.\./s;
- # warn("$k called $hist{$x} times\n");
- }
- }
-
- sub match_nonext {
- my $self = shift;
-
- my $current = $self->[CURRENT];
- return 0 unless defined $current;
-
- foreach my $m (@_) {
- # $hist{$m}++;
- if (my $ref = ref($m)) {
- if ($ref eq 'Regexp' && $current =~ $m) {
- $self->[MATCHED] = $current;
- return 1;
- }
- }
- elsif ($current eq $m) {
- $self->[MATCHED] = $current;
- return 1;
- }
- }
- $self->[MATCHED] = '';
- return 0;
- }
-
- sub match_nocheck {
- my $self = shift;
-
- if ($self->match_nonext(@_)) {
- $self->nextchar;
-
- return 1;
- }
- return 0;
- }
-
- sub matched {
- my $self = shift;
- return $self->[MATCHED];
- }
-
- my $unpack_type = ($] >= 5.007002) ? 'U*' : 'C*';
-
- sub match_string {
- my $self = shift;
- my ($str) = @_;
- my $matched = '';
- # for my $char (map { chr } unpack($unpack_type, $str)) {
- for my $char (split //, $str) {
- if ($self->match_char($char)) {
- $matched .= $self->[MATCHED];
- }
- else {
- $self->buffer($matched);
- return 0;
- }
- }
- return 1;
- }
-
- # avoids split
- sub match_sequence {
- my $self = shift;
- my $matched = '';
- for my $char (@_) {
- if ($self->match_char($char)) {
- $matched .= $self->[MATCHED];
- }
- else {
- $self->buffer($matched);
- return 0;
- }
- }
- return 1;
- }
-
- sub consume_name {
- my $self = shift;
-
- my $current = $self->[CURRENT];
- return unless defined $current; # perhaps die here instead?
-
- my $name;
- if ($current eq '_') {
- $name = '_';
- }
- elsif ($current eq ':') {
- $name = ':';
- }
- else {
- $self->consume($Letter) ||
- throw XML::SAX::Exception::Parse (
- Message => "Name contains invalid start character: '".
- sprintf("%X", ord($self->[CURRENT])).
- ";'", reader => $self );
- $name = $self->[CONSUMED];
- }
-
- $self->consume($NameChar);
- $name .= $self->[CONSUMED];
- return $name;
- }
-
- sub consume {
- my $self = shift;
-
- my $consumed = '';
-
- while(!$self->eof && $self->match_re(@_)) {
- $consumed .= $self->[MATCHED];
- }
- return length($self->[CONSUMED] = $consumed);
- }
-
-
-
- sub consume_not {
- my $self = shift;
-
- my $consumed = '';
-
- while(!$self->[EOF] && $self->match_not(@_)) {
- $consumed .= $self->[MATCHED];
- }
- return length($self->[CONSUMED] = $consumed);
- }
-
- sub consumed {
- my $self = shift;
- return $self->[CONSUMED];
- }
-
- sub current {
- my $self = shift;
- return $self->[CURRENT];
- }
-
- sub buffer {
- my $self = shift;
- # warn("buffering: '$_[0]' + '$self->[CURRENT]' + '$self->[BUFFER]'\n");
- local $^W;
- my $current = $self->[CURRENT];
- if ($] >= 5.006 && $] < 5.007) {
- $current = pack("C0A*", $current);
- }
- $self->[BUFFER] = $_[0] . $current . $self->[BUFFER];
- $self->[COLUMN] -= length($_[0]);
- $self->nextchar;
- }
-
- sub public_id {
- my ($self, $value) = @_;
- if (defined $value) {
- return $self->[PUBLIC_ID] = $value;
- }
- return $self->[PUBLIC_ID];
- }
-
- sub system_id {
- my ($self, $value) = @_;
- if (defined $value) {
- return $self->[SYSTEM_ID] = $value;
- }
- return $self->[SYSTEM_ID];
- }
-
- sub line {
- shift->[LINE];
- }
-
- sub column {
- shift->[COLUMN];
- }
-
- sub get_encoding {
- my $self = shift;
- return $self->[ENCODING];
- }
-
- sub eof {
- return shift->[EOF];
- }
-
- 1;
-
- __END__
-
- =head1 NAME
-
- XML::Parser::PurePerl::Reader - Abstract Reader factory class
-
- =cut
-