home *** CD-ROM | disk | FTP | other *** search
- package www;
- $version = "951219.9";
-
-
- %http_return_code =
- (200,"OK",
- 201,"Created",
- 202,"Accepted",
- 203,"Partial Information",
- 204,"No Response",
- 301,"Moved",
- 302,"Found",
- 303,"Method",
- 304,"Not modified",
- 400,"Bad request",
- 401,"Unauthorized",
- 402,"Payment required",
- 403,"Forbidden",
- 404,"Not found",
- 500,"Internal error",
- 501,"Not implemented",
- 502,"Service temporarily overloaded",
- 503,"Gateway timeout");
-
- if (length($0) >= 6 && substr($0, -6) eq 'www.pl')
- {
- seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n";
- print "www.pl version $version\n", '=' x 60, "\n";
- while (<DATA>) {
- next unless /^##>/../^##</; ## select lines to print
- s/^##[<> ]?//; ## clean up
- print;
- }
- exit(0);
- }
-
-
-
- %default_port = ('http', 80,
- 'ftp', 21,
- 'gopher', 70,
- 'telnet', 23,
- 'wais', 210,
- );
-
- %name2protocol = (
- 'www', 'http',
- 'wwwcgi','http',
- );
-
- $last_message_length = 0;
- $useragent = "www.pl/$version";
-
- sub open_http_url
- {
- local(*HTTP, $URL, @options) = @_;
- return &open_http_connection(*HTTP, $URL, undef, undef, undef, @options);
- }
-
-
- sub read_http_header
- {
- local(*HTTP) = @_;
- local(%info, $_);
-
- unless (defined($info{'STATUS'} = <HTTP>)) {
- $info{'TYPE'} = "empty";
- return (0, %info);
- }
- chop $info{'STATUS'};
-
- unless ($info{'STATUS'} =~ m/^HTTP\S+\s+(\d\d\d)\s+(.*\S)/i) {
- $info{'TYPE'} = 'unknown';
- return (0, %info);
- }
-
- $info{'CODE'} = $1;
- $info{'TYPE'} = $2;
- $info{'HEADER'} = '';
-
- while (<HTTP>) {
- last if m/^\s*$/;
- $info{'HEADER'} .= $_; ## save whole text of header.
-
- if (m/^([^\n:]+):[ \t]*(.*\S)/) {
- local($field, $value) = ("\L$1", $2);
- if (defined $info{$field}) {
- $info{$field} .= "\n" . $value;
- } else {
- $info{$field} = $value;
- }
- } elsif (defined $info{'UNKNOWN'}) {
- $info{'UNKNOWN'} .= $_;
- } else {
- $info{'UNKNOWN'} = $_;
- }
- }
-
- return ($info{'CODE'}, %info);
- }
-
- sub grok_URL
- {
- local($_, $noproxy, $defaultprotocol) = @_;
- $noproxy = defined($noproxy) && $noproxy;
-
- local($protocol, $address, $port, $path, $target, $user, $password);
-
- return undef unless m%^(([a-zA-Z]+)://|/*)([^/]+)(/.*)?$%;
-
- ($protocol, $address, $path) = ((index($1,":") >= 0 ? $2 : undef), $3, $4);
-
- if (!defined $protocol)
- {
- if (defined $defaultprotocol) {
- $protocol = $defaultprotocol;
- }
- else
- {
- $address =~ m/^[a-zA-Z]+/;
- if (defined($name2protocol{"\L$&"})) {
- $protocol = $name2protocol{"\L$&"};
- } else {
- $protocol = defined($default_port{"\L$&"}) ? $& : 'http';
- }
- }
- }
- $protocol =~ tr/A-Z/a-z/; ## ensure lower-case.
-
- if ($protocol eq 'ftp' || $protocol eq 'telnet' || $protocol eq 'http')
- {
- if ($address =~ s/^(([^\@:]+)(:([^@]+))?\@)//) {
- ($user, $password) = ($2, $4);
- }
- }
-
- if ($address =~ s/:(\d+)$//) {
- $port = $1;
- } else {
- $port = $default_port{$protocol};
- }
-
- $path = '/' if !defined $path;
-
- local($proxy) = $ENV{$protocol."_proxy"};
- if (!$noproxy && defined($proxy) && !&no_proxy($protocol,$address))
- {
- local($dummy);
- local($old_pass, $old_user);
-
- if ($protocol eq 'http' && (defined($password) || defined($user)))
- {
- $path = "http://$address$path";
- $old_pass = $password;
- $old_user = $user;
- } else {
- ($path = $_) =~ s,^($protocol:)?/*,$protocol://,i;
- }
-
- $target = ($port==$default_port{$protocol})?$address:"$address:$port";
-
- ($protocol, $address, $port, $dummy, $dummy, $user, $password)
- = &grok_URL($proxy, 1);
- $password = $old_pass if defined $old_pass;
- $user = $old_user if defined $old_user;
- }
- ($protocol, $address, $port, $path, $target, $user, $password);
- }
-
-
-
- sub no_proxy
- {
- local($protocol, $targethost) = @_;
- local(@dests, $dest, $host, @hosts, $aliases);
- local($proxy) = $ENV{$protocol."_proxy"};
- return 0 if !defined $proxy;
- $targethost =~ tr/A-Z/a-z/; ## ensure all lowercase;
-
- @dests = ($proxy);
- push(@dests,split(/\s*,\s*/,$ENV{'no_proxy'})) if defined $ENV{'no_proxy'};
-
- foreach $dest (@dests)
- {
- $host = (&grok_URL($dest, 1), 'http')[1];
-
- if (!defined $host) {
- warn "can't grok [$dest] from no_proxy env.var.\n";
- next;
- }
- @hosts = ($host); ## throw in original name just to make sure
- ($host, $aliases) = (gethostbyname($host))[0, 1];
-
- if (defined $aliases) {
- push(@hosts, ($host, split(/\s+/, $aliases)));
- } else {
- push(@hosts, $host);
- }
- foreach $host (@hosts) {
- next if !defined $host;
- return 1 if "\L$host" eq $targethost;
- }
- }
- return 0;
- }
-
- sub ensure_proper_network_library
- {
- require 'network.pl' if !defined $network'version;
- warn "WARNING:\n". __FILE__ .
- qq/ needs a newer version of "network.pl"\n/ if
- !defined($network'version) || $network'version < "950311.5";
- }
-
-
-
- sub open_http_connection
- {
- local(*HTTP, $host, $port, $path, $target, @options) = @_;
- local($post_text, @error, %seen);
- local(%info);
-
- &ensure_proper_network_library;
-
- local($post, $retry, $authorization, $nofollow, $noproxy,
- $head, $debug, $ifmodifiedsince, $quiet, ) = (0) x 10;
- foreach $opt (@options)
- {
- next unless defined($opt) && $opt ne '';
- local($var, $val);
- if ($opt =~ m/^(\w+)=(.*)/) {
- ($var, $val) = ($1, $2);
- } else {
- $var = $opt;
- $val = 1;
- }
- $var =~ tr/A-Z/a-z/; ## ensure variable is lowercase.
- local(@error);
-
- eval "if (defined \$$var) { \$$var = \$val; } else { \@error =
- ('error', 'bad open_http_connection option [$opt]'); }";
- return ('error', "open_http_connection eval: $@") if $@;
- return @error if defined @error;
- }
- $quiet = 0 if $debug; ## debug overrides quiet
-
- local($protocol, $error, $code, $URL, %info, $tmp, $aite);
-
- unless (defined($port) && defined($path))
- {
- ($protocol,$host,$port,$path,$target)=&grok_URL($host,$noproxy,'http');
- if ($protocol ne "http") {
- return ('error',"open_http_connection doesn't grok [$protocol]");
- }
- unless (defined($host)) {
- return ('error', "can't grok [$URL]");
- }
- }
-
- return ('error', "no port in URL [$URL]") unless defined $port;
- return ('error', "no path in URL [$URL]") unless defined $path;
-
- RETRY: while(1)
- {
- if ($port == $default_port{'http'}) {
- $URL = "http://$host";
- } else {
- $URL = "http://$host:$default_port{'http'}";
- }
- $URL .= ord($path) eq ord('/') ? $path : "/$path";
-
- $aite = defined($target) ? "$target via $host" : $host;
-
- &message($debug, "connecting to $aite ...") unless $quiet;
-
- local(%preinfo) = (
- 'PROTOCOL', 'http',
- 'HOST', $host,
- 'PORT', $port,
- 'PATH', $path,
- );
- if (defined $target) {
- $preinfo{'TARGET'} = $target;
- } elsif ($default_port{'http'} == $port) {
- $preinfo{'TARGET'} = $host;
- } else {
- $preinfo{'TARGET'} = "$host:$port";
- }
-
- $error = &network'connect_to(*HTTP, $host, $port);
- if (defined $error) {
- return('error', "can't connect to $aite: $error", %preinfo);
- }
-
- if ($post && $path =~ m/\?/) {
- $post_text = $'; ## everything after the '?'
- $path = $`; ## everything before the '?'
- }
-
- $tmp = $head ? 'HEAD' : (defined $post_text ? 'POST' : 'GET');
-
- &message($debug, "sending request to $aite ...") if !$quiet;
- print HTTP $tmp, " $path HTTP/1.0\n";
-
- if ($ifmodifiedsince) {
- print HTTP "If-Modified-Since: $ifmodifiedsince\n";
- }
-
- print HTTP "Accept: */*\n";
- print HTTP "User-Agent: $useragent\n" if defined $useragent;
-
- if ($authorization) {
- print HTTP "Authorization: Basic ",
- &htuu_encode($authorization), "\n";
- }
-
- if (defined $post_text)
- {
- print HTTP "Content-type: application/x-www-form-urlencoded\n";
- print HTTP "Content-length: ", length $post_text, "\n\n";
- print HTTP $post_text, "\n";
- }
- print HTTP "\n";
- &message($debug, "waiting for data from $aite ...") unless $quiet;
-
- binmode(HTTP); ## just in case.
-
- ($code, %info) = &read_http_header(*HTTP);
- &message(1, "header returns code $code ($info{'TYPE'})") if $debug;
-
- local($val, $key);
- while (($val, $key) = each %preinfo) {
- $info{$val} = $key;
- }
-
- if ($code == 0)
- {
- return('error',"empty response for $URL")
- if $info{'TYPE'} eq 'empty';
- return('error', "non-HTTP response for $URL", %info)
- if $info{'TYPE'} eq 'unknown';
- return('error', "unknown zero-code for $URL", %info);
- }
-
- if ($code == 302) ## 302 is magic for "Found"
- {
- if (!defined $info{'location'}) {
- return('error', "No location info for Found URL $URL", %info);
- }
- local($newURL) = $info{'location'};
-
- $newURL =~ s,^(http:/+[^/:]+):80/,$1/,i;
- $info{"NewURL"} = $newURL;
-
- return('follow', $newURL, %info) if
- $nofollow || $newURL!~m/^http:/i;
-
- $seen{$host, $port, $path} = 1;
-
- &message(1, qq/[note: now moved to "$newURL"]/) unless $quiet;
-
-
- ($protocol, $host, $port, $path, $target) =
- &www'grok_URL($newURL, $noproxy);
- &message(1, "[$protocol][$host][$port][$path]") if $debug;
-
- if (defined $seen{$host, $port, $path})
- {
- return('error', "circular reference among:\n ".
- join("\n ", sort grep(/^http/i, keys %seen)), %seen);
- }
- next RETRY;
- }
- elsif ($code == 500) ## 500 is magic for "internal error"
- {
- if ($retry)
- {
- local($_) = $info{'BODY'} = join('', <HTTP>);
- if (/Can't locate remote host:\s*(\S+)/i) {
- local($times) = ($retry == 1) ?
- "once more" : "up to $retry more times";
- &message(0, "can't locate $1, will try $times ...")
- unless $quiet;
- sleep(5);
- $retry--;
- next RETRY;
- }
- }
- }
-
- if ($code != 200) ## 200 is magic for "OK";
- {
- &clear_message;
- if ($info{'TYPE'} eq '')
- {
- if (defined $http_return_code{$code}) {
- $info{'TYPE'} = $http_return_code{$code};
- } else {
- $info{'TYPE'} = "(unknown status code $code)";
- }
- }
- return ('status', $info{'TYPE'}, %info);
- }
-
- &clear_message;
- return ('ok', 'ok', %info);
- }
- }
-
-
- sub htuu_encode
- {
- local(@in) = unpack("C*", $_[0]);
- local(@out);
-
- push(@in, 0, 0); ## in case we need to round off an odd byte or two
- while (@in >= 3) {
- push(@out, $in[0] >> 2);
- push(@out, (($in[0] << 4) & 060) | (($in[1] >> 4) & 017));
- push(@out, (($in[1] << 2) & 074) | (($in[2] >> 6) & 003));
- push(@out, $in[2] & 077);
- splice(@in, 0, 3); ## remove these three
- }
-
- foreach $new (@out) {
- $new = substr(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
- $new, 1);
- }
-
- if (@in == 2) {
- } elsif (@in == 1) {
- $out[$#out] = '=';
- } else {
- $out[$#out ] = '=';
- $out[$#out -1] = '=';
- }
-
- join('', @out);
- }
-
- sub message
- {
- local($nl) = shift;
- die "oops $nl." unless $nl =~ m/^\d+$/;
- local($text) = join('', @_);
- local($NL) = $nl ? "\n" : "\r";
- $thislength = length($text);
- if ($thislength >= $last_message_length) {
- print STDERR $text, $NL;
- } else {
- print STDERR $text, ' 'x ($last_message_length-$thislength), $NL;
- }
- $last_message_length = $nl ? 0 : $thislength;
- }
-
- sub clear_message
- {
- if ($last_message_length) {
- print STDERR ' ' x $last_message_length, "\r";
- $last_message_length = 0;
- }
- }
-
- 1;
- __END__
-