home *** CD-ROM | disk | FTP | other *** search
Perl Script | 1999-06-25 | 2.8 KB | 131 lines | [TEXT/R*ch] |
- #!/usr/bin/perl
-
- print "Content-type: text/html\n\n";
- use LWP::Simple;
- require 'lock.pl';
-
- # YRTP: the Perl bit
-
- # P.D. Magnus
- # June 1999
-
-
-
-
- # get the query
- $line = $ENV{'QUERY_STRING'}."&";
- $line =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge;
-
- # parse the query
- SWITCH: for ($line) {
- # determine the url
- /url=(.+?)&/ ? ($urline = $1) : ($urline = "");
- # determine the cue type
- $jump = (/cue=jump/);
- }
-
- if ($urline eq "") {$urline = "http://www.fecundity.com/";}
- if (!($urline =~ m|http:|i)) {$urline = "http://".$urline;}
-
- PAGE: for (get $urline) {
-
- # this bit underwrites a really gruddy mechanism for converting relative
- # to absolute url's-- here it figures out the root
- $root = $urline;
- if (($root =~ m|.*\x31htm|) || ($root =~ m|.*\x31pl|)) {
- $root =~ s|(.*)/.+|$1|;
- } else {
- $root .= '/';
- }
-
- # doctor the url
- $urline =~ s|http:|yrtp:|i;
-
- # check for frames
- if (m|<frame|i) {
- s|NOFRAMES.*/NOFRAMES||gsi;
- # absolutize links
- s|SRC\s*=\s*"(?:http://)*(.*?)"|src = "webparser.pl?url=$root/$1&cue=jump"|gi;
- if (!$jump) {s|cue=jump|cue=delay|g};
- s|(?=<src = "webparser.pl?url=.*?)\x2F(?=.*?&)|%2F|g;
- # doctor title
- s|<TITLE>.*</TITLE>|<title>$urline</title>|gi;
- # print out the altered page
- print $_;
- last PAGE;
- }
-
- # convert to lower-case
- $_ = lc;
-
- # tabulate links
- @links = ("http://www.fecundity.com/codeweb");
- push @links, m|href="(.*?)"|g;
- # absolutize
- foreach (@links) {
- if (m|http:|) {
- s|http://||;
- } else {
- $_ = $root.'/'.$_;
- }
- }
-
- # eliminate title
- s|title.*/title||gs;
- # include images with alt's
- s|<img.*?alt="(.+?)".*?\Q>\E|img_$1_|g;
- # convert links
- s|<a.*?\Q>\E|L|g;
- s|</a\Q>\E|M|g;
- # convert special characters
- s|"||g;
- s|&|and|g;
- s| |_|g;
- # headlines
- s|<h1>|X|g;
- s|<h2>|Y|g;
- s|<h3>|Z|g;
- s|<h5>|W|g;
- s|</h\d>|N|g;
- # remaining tags
- s|<.*?\Q>\E||gs;
- # shuck out whitespace
- s|\W||g;
- # convert numbers
- s|(\d+)|A$1j|g;
- for ($i = 1; $i < 10; $i++) {
- $j = chr ($i + 96);
- s|$i|$j|g;
- }
- s|0|k|g;
- # output the java-ized file with the appropriate data inside
- (open (INFILE, $ENV{'DOCUMENT_ROOT'}.'/codeweb/rawcode'.($jump?'_j':'').'.html')) || die;
- &lock(INFILE,0);
- $line = <INFILE>;
- print $line;
- print "<TITLE>".$urline."</TITLE>\n";
-
- do {
- $line = <INFILE>;
- print $line;
- } until ($line =~ m|Signal Code|);
-
- # spit the output
- $line = qq{var output = '$_';\n};
- $line =~ s|''|'B'|;
- print $line;
- # spit the links
- print qq{var outlink = new Array();\n};
- for($i=0; $i<$#links; $i++) {
- print qq{outlink[$i] = '$links[$i]';\n};
- }
-
- do {
- $line = <INFILE>;
- print $line;
- } until ($line =~ m|<!-- Never|);
-
- &unlock(INFILE);
- close (INFILE);
- }
-