CLIX - Fazer Clix Custa Nix

home *** CD-ROM | disk | FTP | other *** search

/ CLIX - Fazer Clix Custa Nix / CLIX-CD.cdr / mac / lib / URI / URL / _generic.pm next >

Wrap

Text File | 1997-04-07 | 14.6 KB | 515 lines | [TEXT/McPL]

##################################################################### # # Internal pre-defined generic scheme support # # In this implementation all schemes are subclassed from # URI::URL::_generic. This turns out to have reasonable mileage. # See also draft-ietf-uri-relative-url-06.txt package URI::URL::_generic; # base support for generic-RL's require URI::URL; @ISA = qw(URI::URL); use URI::Escape qw(uri_escape uri_unescape %escapes); sub new { # inherited by subclasses my($class, $init, $base) = @_; my $url = bless { }, $class; # create empty object $url->_parse($init); # parse $init into components $url->base($base) if $base; $url; } # Generic-RL parser # See draft-ietf-uri-relative-url-06.txt Section 2 sub _parse { my($self, $u, @comps) = @_; return unless defined $u; # Deside which components to parse (scheme & path is manatory) @comps = qw(netloc query params frag) unless (@comps); my %parse = map {$_ => 1} @comps; # This parsing code is based on # draft-ietf-uri-relative-url-06.txt Section 2.4 # 2.4.1 $self->{'frag'} = uri_unescape($1) if $parse{'frag'} && $u =~ s/#(.*)$//; # 2.4.2 $self->{'scheme'} = lc($1) if $u =~ s/^\s*([\w\+\.\-]+)://; # 2.4.3 $self->netloc("$1") # passing $1 directly fails if netloc is autoloaded if $parse{'netloc'} && $u =~ s!^//([^/]*)!!; # 2.4.4 $self->{'query'} = $1 if $parse{'query'} && $u =~ s/\?(.*)//; # 2.4.5 $self->{'params'} = $1 if $parse{'params'} && $u =~ s/;(.*)//; # 2.4.6 # # RFC 1738 says: # # Note that the "/" between the host (or port) and the # url-path is NOT part of the url-path. # # however, RFC 1808, 2.4.6. says: # # Even though the initial slash is not part of the URL path, # the parser must remember whether or not it was present so # that later processes can differentiate between relative # and absolute paths. Often this is done by simply storing # he preceding slash along with the path. # # In version < 4.01 of URI::URL we used to strip the leading # "/" when asked for $self->path(). This created problems for # the consitency of the interface, so now we just consider the # slash to be part of the path and we also make an empty path # default to "/". # we don't test for $parse{path} becase it is mandatory $self->{'path'} = $u; } # Generic-RL stringify # sub as_string { my $self = shift; return $self->{'_str'} if $self->{'_str'}; my($scheme, $netloc, $frag) = @{$self}{qw(scheme netloc frag)}; my $u = $self->full_path(1); # path+params+query # rfc 1808 says: # Note that the fragment identifier (and the "#" that precedes # it) is not considered part of the URL. However, since it is # commonly used within the same string context as a URL, a parser # must be able to recognize the fragment when it is present and # set it aside as part of the parsing process. $u .= "#" . uri_escape($frag, $URI::URL::unsafe) if defined $frag; $u = "//$netloc$u" if defined $netloc; $u = "$scheme:$u" if $scheme; # Inline: uri_escape($u, $URI::URL::unsafe); $u =~ s/([$URI::URL::unsafe])/$escapes{$1}/go; $self->{'_str'} = $u; # set cache and return } # Generic-RL stringify full path "path;params?query" # sub full_path { my($self, $dont_escape) = @_; my($path, $params, $query) = @{$self}{'path', 'params', 'query'}; my $p = ''; $p .= $path if defined $path; # see comment in _parse 2.4.6 about the next line $p = "/$p" if defined($self->{netloc}) && $p !~ m:^/:; $p .= ";$params" if defined $params; $p .= "?$query" if defined $query; return $p if $dont_escape; # Inline: URI::Escape::uri_escape($p, $URI::URL::unsafe); $p =~ s/([$URI::URL::unsafe])/$escapes{$1}/go; $p; } # default_port() # # subclasses will usually want to override this # sub default_port { undef; } ##################################################################### # # Methods to handle URL's elements # These methods always return the current value, # so you can use $url->path to read the current value. # If a new value is passed, e.g. $url->path('foo'), # it also sets the new value, and returns the previous value. # Use $url->path(undef) to set the value to undefined. sub netloc { my $self = shift; my $old = $self->_elem('netloc', @_); return $old unless @_; # update fields derived from netloc my $nl = $self->{'netloc'} || ''; if ($nl =~ s/^([^:@]*):?(.*?)@//){ $self->{'user'} = uri_unescape($1); $self->{'password'} = uri_unescape($2) if $2 ne ''; } if ($nl =~ /^([^:]*):?(\d*)$/){ my $port = $2; # Since this happes so frequently, we inline this call: # my $host = uri_unescape($1); my $host = $1; $host =~ s/%([\dA-Fa-f]{2})/chr(hex($1))/eg; $self->{'host'} = $host; if ($port ne '') { $self->{'port'} = $port; if ($self->default_port == $port) { $self->{'netloc'} =~ s/:\d+//; } } elsif (defined $self->{'netloc'}) { $self->{'netloc'} =~ s/:$//; # handle empty port spec } } $self->{'_str'} = ''; $old; } # A U T O L O A D E R # Don't remove this comment, it keeps AutoSplit happy!! # @ISA = qw(AutoLoader) # # The rest of the methods are only loaded on demand. Stubs are neccesary # for inheritance to work. #sub netloc; # because netloc is used by the _parse() sub user; sub password; sub host; sub port; sub _netloc_elem; sub epath; sub path; sub path_components; sub eparams; sub params; sub equery; sub query; sub frag; sub crack; sub abs; sub rel; sub eq; 1; __END__ # Fields derived from generic netloc: sub user { shift->_netloc_elem('user', @_); } sub password { shift->_netloc_elem('password',@_); } sub host { shift->_netloc_elem('host', @_); } sub port { my $self = shift; my $old = $self->_netloc_elem('port', @_); defined($old) ? $old : $self->default_port; } sub _netloc_elem { my($self, $elem, @val) = @_; my $old = $self->_elem($elem, @val); return $old unless @val; # update the 'netloc' element my $nl = ''; my $host = $self->{'host'}; if (defined $host) { # can't be any netloc without any host my $user = $self->{'user'}; $nl .= uri_escape($user, $URI::URL::reserved) if defined $user; $nl .= ":" . uri_escape($self->{'password'}, $URI::URL::reserved) if defined($user) and defined($self->{'password'}); $nl .= '@' if length $nl; $nl .= uri_escape($host, $URI::URL::reserved); my $port = $self->{'port'}; $nl .= ":$port" if defined($port) && $port != $self->default_port; } $self->{'netloc'} = $nl; $self->{'_str'} = ''; $old; } sub epath { my $self = shift; my $old = $self->_elem('path', @_); return '/' if !defined($old) || !length($old); return "/$old" if $old !~ m|^/| && defined $self->{'netloc'}; $old; } sub path { my $self = shift; my $old = $self->_elem('path', map { uri_escape($_, $URI::URL::reserved_no_slash) } @_); if ($URI::URL::COMPAT_VER_3) { # We used to get rid of the leading "/" in the path if (defined $old) { $old =~ s|^/||; Carp::croak("Path components contain '/' (you must call epath)") if $old =~ /%2[fF]/; return uri_unescape($old); } return undef; } return '/' if !defined($old) || !length($old); Carp::croak("Path components contain '/' (you must call epath)") if $old =~ /%2[fF]/ and !@_; $old = "/$old" if $old !~ m|^/| && defined $self->{'netloc'}; return uri_unescape($old); } sub path_components { my $self = shift; my $old = $self->{'path'}; $old = '' unless defined $old; $old = "/$old" if $old !~ m|^/| && defined $self->{'netloc'}; if (@_) { $self->_elem('path', join("/", map { uri_escape($_, $URI::URL::reserved.".") } @_)); } map { uri_unescape($_) } split("/", $old, -1); } sub eparams { shift->_elem('params', @_); } sub params { my $self = shift; my $old = $self->_elem('params', map {uri_escape($_,$URI::URL::reserved_no_form)} @_); return uri_unescape($old) if defined $old; undef; } sub equery { shift->_elem('query', @_); } sub query { my $self = shift; my $old = $self->_elem('query', map { uri_escape($_, $URI::URL::reserved_no_form) } @_); if (defined $old) { if ($old =~ /%(?:26|2[bB]|3[dD])/) { # contains escaped '=' '&' or '+' my $mess; for ($old) { $mess = "Query contains both '+' and '%2B'" if /\+/ && /%2[bB]/; $mess = "Form query contains escaped '=' or '&'" if /=/ && /%(?:3[dD]|26)/; } if ($mess) { Carp::croak("$mess (you must call equery)"); } } # Now it should be safe to unescape the string without loosing # information return uri_unescape($old); } undef; } # No efrag method because the fragment is always stored unescaped sub frag { shift->_elem('frag', @_); } sub crack { my $self = shift; return $self unless wantarray; my @c = @{$self}{qw(scheme user password host port path params query frag)}; if (!$c[0]) { # try to determine scheme my $base = $self->base; $c[0] = $base->scheme if $base; $c[0] ||= 'http'; # last resort, default in URI::URL::new } $c[4] ||= $self->default_port; @c; } # Generic-RL: Resolving Relative URL into an Absolute URL # # Based on RFC1808 section 4 # sub abs { my($self, $base, $allow_scheme_in_relative_urls) = @_; my $embed = $self->clone; $base = $self->base unless $base; # default to default base return $embed unless $base; # we have no base (step1) $base = new URI::URL $base unless ref $base; # make obj if needed my($scheme, $host, $path, $params, $query, $frag) = @{$embed}{qw(scheme host path params query frag)}; # just use base if we are empty (2a) return $base->clone unless grep(defined($_) && $_ ne '', $scheme,$host,$port,$path,$params,$query,$frag); # if we have a scheme we must already be absolute (2b), # # but sec. 5.2 also says: Some older parsers allow the scheme name # to be present in a relative URL if it is the same as the base # URL scheme. This is considered to be a loophole in prior # specifications of the partial URLs and should be avoided by # future parsers. # # The old behavoir can be enabled by passing a TRUE value to the # $allow_scheme_in_relative_urls parameter. return $embed if $scheme && (!$allow_scheme_in_relative_urls || $scheme ne $base->{'scheme'}); $embed->{'_str'} = ''; # void cached string $embed->{'scheme'} = $base->{'scheme'}; # (2c) return $embed if $embed->{'netloc'}; # (3) $embed->netloc($base->{'netloc'}); # (3) return $embed if $path =~ m:^/:; # (4) if ($path eq '') { # (5) $embed->{'path'} = $base->{'path'}; # (5) return $embed if defined $embed->{'params'}; # (5a) $embed->{'params'} = $base->{'params'}; # (5a) return $embed if defined $embed->{'query'}; # (5b) $embed->{'query'} = $base->{'query'}; # (5b) return $embed; } # (Step 6) # draft 6 suggests stack based approach my $basepath = $base->{'path'}; my $relpath = $embed->{'path'}; $basepath =~ s!^/!!; $basepath =~ s!/$!/.!; # prevent empty segment my @path = split('/', $basepath); # base path into segments pop(@path); # remove last segment $relpath =~ s!/$!/.!; # prevent empty segment push(@path, split('/', $relpath)); # append relative segments my @newpath = (); my $isdir = 0; my $segment; foreach $segment (@path) { # left to right if ($segment eq '.') { # ignore "same" directory $isdir = 1; } elsif ($segment eq '..') { $isdir = 1; my $last = pop(@newpath); if (!defined $last) { # nothing to pop push(@newpath, $segment); # so must append } elsif ($last eq '..') { # '..' cannot match '..' # so put back again, and append push(@newpath, $last, $segment); } #else # it was a component, # keep popped } else { $isdir = 0; push(@newpath, $segment); } } $embed->{'path'} = '/' . join('/', @newpath) . ($isdir && @newpath ? '/' : ''); $embed; } # The oposite of $url->abs. Return a URL as much relative as possible sub rel { my($self, $base) = @_; my $rel = $self->clone; $base = $self->base unless $base; return $rel unless $base; $base = new URI::URL $base unless ref $base; $rel->base($base); my($scheme, $netloc, $path) = @{$rel}{qw(scheme netloc path)}; if (!defined($scheme) && !defined($netloc)) { # it is already relative return $rel; } my($bscheme, $bnetloc, $bpath) = @{$base}{qw(scheme netloc path)}; for ($netloc, $bnetloc, $bpath) { $_ = '' unless defined } $bpath = "/" unless length $bpath; # a slash is default unless ($scheme eq $bscheme && $netloc eq $bnetloc) { # different location, can't make it relative return $rel; } # Make it relative by eliminating scheme and netloc $rel->{'scheme'} = undef; $rel->netloc(undef); # This loop is based on code from Nicolai Langfeldt <janl@ifi.uio.no>. # It will remove all common initial path components. while (1) { #print "PATHS: $path $bpath\n"; my $i = index($path, '/'); last unless $i >=0 && $i == index($bpath, '/') && substr($path,0,$i) eq substr($bpath,0,$i); substr($path, 0, $i+1) = ''; substr($bpath, 0, $i+1) = ''; } # Add one "../" for each path component left in the base path $path = ('../' x $bpath =~ tr|/|/|) . $path; $rel->epath($path); $rel; } # Compare two URLs sub eq { my($self, $other) = @_; local($^W) = 0; # avoid warnings if we compare undef values $other = URI::URL->new($other, $self) unless ref $other; # Compare scheme and netloc return 0 if ref($self) ne ref($other); # must be same class return 0 if $self->scheme ne $other->scheme; # Always lower case return 0 if lc($self->netloc) ne lc($other->netloc); # Case-insensitive # Compare full_path: # According to <draft-ietf-http-v11-spec-05>: # Characters other than those in the "reserved" and "unsafe" sets # are equivalent to their %XX encodings. my $fp1 = $self->full_path; my $fp2 = $other->full_path; for ($fp1, $fp2) { s,%([\dA-Fa-f]{2}), my $x = $1; my $c = chr(hex($x)); $c =~ /^[;\/?:\@&=+\"\#%<>\0-\040\177]/ ? "%\L$x" : $c; ,eg; } return 0 if $fp1 ne $fp2; return 0 if $self->frag ne $other->frag; 1; } 1;