home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl #-*-perl-*-
- #!/u3/thesis/clipper/bin/perl # -*-perl-*-
- # Copyright (C) Khun Yee Fung clipper@csd.uwo.ca 1991
- # You can do anything to this program except selling it for profit or
- # pretending you wrote it. The copyright notice must be preserved in all
- # copies. Absolutely no warranty.
- # $Id: archie,v 2.5 1991/07/14 08:20:23 clipper Exp clipper $
- # Machines I have tried: Sun 3, MIPS 4.51, Sequent Symmetry Dynix.
- eval "exec perl -S $0 $*"
- if $running_under_some_shell;
-
- # To get system dependant sys/socket.h name and the domain server IP
- # if resolver libary is not built in.
- require 'archie.depend';
- require $socket;
- require 'resolver.pl';
- require 'newgetopt.pl';
-
- # To get the options on the command line. Exaplanations in the code
- # handling them
- &NGetOpt('match=i', 'reg', 'exact', 'nocase', 'case', 'host=s',
- 'ffile=s', 'format=s', 'along', 'norc', 'syntax');
-
- # Get the name of this program. The last name is the one.
- @prog = split('/', $0);
- $prog = $prog[$#prog];
-
- # Usage string.
- $usage =
- "Usage: $prog [options] word1 word2 ...
- Where options are one or more of the following:
- -case Case sensitive
- -nocase Case insensitive
- -exact Exact match
- -reg Regular expression match
- -match \# Max hits
- -host Instead of quiche.cs.mcgill.ca
- -ffile filename Use a format file
- -format string Specify a format string
- -along Print output along the way, instead of all at once at the end
- -norc Do not read .archierc file in home directory.
- ";
-
- # Should have at least one query.
- if ($#ARGV < 0) {
- print "Please specify at least one query.\n";
- print $usage;
- exit(255);
- }
- @string = @ARGV;
-
- # For the conversion of date in the subroutine date.
- @month = ('', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
- 'Sep', 'Oct', 'Nov', 'Dec');
-
- # The archie port number is 1525. Should probably try the privileged ports
- # (from 901 onwards or something) first.
- $port = 1525;
-
- # The socketaddr structure. See /usr/include/sys/socket.h for the C
- # version. The hostname is hard-wired. As there is a host option, this
- # is probably not very important. Hostcaps is for a strange entry format.
- # I still don't know exactly how the entries are specified.
- $sockaddr = 'S n a4 x8';
- $hostname = '132.206.2.3';
- $hostcaps = 'QUICHE.CS.MCGILL.CA';
-
- # Defaults: maximum hit is 100. It does not mean there will be exactly
- # 100 entries returned, though. Expect a few entries more or less.
- # The default search option is case insensitive. Also, print all entries
- # at once at the end.
- $match = 100; $case = 'S'; $along = 0;
-
- # To get the user name and user home path.
- @pw = getpwuid($<);
- $user = $pw[0];
- $userpath = $pw[7];
-
- # The default format string. Can be overiden by the -format or -ffile
- # options. Can also specify a default format string in ~/.archierc
- $format = "02%seq Host %host
- Last updated: %arc
-
- Location: %dir
- %10type %mode %08size %date %name
-
- ";
-
- # Read ~/.archierc?
- if (!defined($opt_norc)) {
- &parserc();
- }
-
- # print entries when getting them?
- if (defined($opt_along)) {
- $along = 1;
- }
-
- # What is the number of hits wanted?
- if (defined($opt_match)) {
- $match = $opt_match;
- }
-
- # Read a format string from a file.
- if (defined($opt_ffile)) {
- if (!open(FFILE, "$opt_ffile")) {
- die "Can't open format file $opt_ffile\n";
- }
- $format = '';
- while ($_ = <FFILE>) {
- $format .= $_;
- }
- close(FFILE);
- }
-
- # Read a format string on the command line.
- if (defined($opt_format)) {
- $format = $opt_format;
- }
-
- # Set search option to case sensitive.
- if (defined($opt_case)) {
- $case = 'C';
- }
-
- # Set search option to case insensitive.
- if (defined($opt_nocase)) {
- $case = 'S';
- }
-
- # set the search option to regular expression.
- if (defined($opt_reg)) {
- $case = 'R';
- }
-
- # set the search option to exact match
- if (defined($opt_exact)) {
- $case = '=';
- }
-
- # set a new archie host.
- if (defined($opt_host)) {
- $hostname = $opt_host;
- $hostcaps = y/a-z/A-Z/;
- }
-
- # parse the format string,
- $format = &parseformat($format);
-
- # This is for checking the format etc. Not for external use :-)
- if ($opt_syntax) {
- print "Execution until here.\n";
- exit(0);
- }
-
- # Get the IP address of the archie server.
- if ($hostname =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
- $thataddr = pack("CCCC", $1, $2, $3, $4);
- }
- elsif (!(($name, $aliases, $type, $len, $thataddr) =
- gethostbyname($hostname))) {
- $thataddr = &resolver($hostname, $server) || die "No such host";
- }
- $them = pack($sockaddr, &AF_INET, $port, $thataddr);
- chop($thishost = `hostname`);
- ($name, $aliases, $type, $len, $thisaddr) = gethostbyname($thishost);
- $us = pack($sockaddr, &AF_INET, 0, $thisaddr);
-
- # get and bind a socket.
- socket(DATA, &AF_INET, &SOCK_DGRAM, 0) || die "socket:$!\n";
- bind(DATA, $us) || die "bind: $!\n";
-
- select(STDOUT); $| = 1;
-
- # Get the list of matches.
- $hosts = &list($them, $user, @string);
-
- # Print them.
- if (!$along) {
- &output($hosts);
- }
- close(DATA);
-
- # The subroutine list is the `meat' of the query.
- # It sends the query to the archie server host and parses the entries
- # returned by the server.
- sub list {
- local($them, $user, @words) = @_;
- local($answer, $ans, $timeout, $retries, $lines, @lines);
- local($pktsnum, $pktseq, $seq, $rin, $timeleft, $rout, $nfound);
- local($hdr_len, $id, $dum, $backoff, $ctlptr, $word, $kk);
- local($first, $trailer, $header, $rdp, $index);
- local($packet) = 0;
-
- $timeout = 4;
- $retries = 3;
-
- # Construct the query packet.
- @lines = ("VERSION 1\n", "AUTHENTICATOR UNAUTHENTICATED $user\n");
- foreach $word (@words) {
- push(@lines, "DIRECTORY ASCII ARCHIE/MATCH($match,0,$case)/$word\n");
- push(@lines, "LIST ATTRIBUTES COMPONENTS \n");
- }
- $lines = join('', @lines);
-
- $first = 1;
- retry:
- while ($first) {
- $first = 0;
- send(DATA, $lines, 0, $them);
- $pktsnum = 0;
- while (1) {
- $seq = 0;
-
- # wait for a packet to come back.
- $rin = '';
- vec($rin, fileno(DATA), 1) = 1;
- ($nfound, $timeleft) = select($rout = $rin, '', '', $timeout);
- if (($timeleft == 0 || ord($rout) == 0) && ($retries-- > 0)) {
- $timeout *= 2;
- redo retry;
- }
-
- # Read a packet from the server.
- $ans = '';
- if (!(recv(DATA, $ans, 10000, 0))) {
- die "recv: Can't recv. Die.\n";
- }
- $packet++;
-
- # If the first byte is less than 20, then this is a old-fashioned
- # packet. To be phased out later?
- if (($hdr_len = ord(substr($ans, 0, 1))) < 20) {
- # The header format is:
- # CNNNNN
- # The first byte is length, the second short integer the id,
- # The third short integer the sequence number, then the number
- # of packets on the way, a dummy field not useful for a client,
- # and then the backoff time requested by the server.
- $seq = 0;
- ($hdr_len, $id, $seq, $pktsnum, $dum, $backoff) =
- unpack("Cn*", $ans);
- if ($hdr_len < 5) {
- $seq = $pktsnum = 1;
- }
- if ($hdr_len >= 11 && $backoff != 0) {
- $timeout = $backoff;
- }
- next if ($seq == 0);
- substr($ans, 0, $hdr_len) = '';
- }
- else {
- # New format. Still have very vague about the format of
- # this kind of packets. Could not find it in protocol.txt.
- # Got the information from
- # lib/psrv/reply.c and lib/pfs/dirsend.c
- # and a improved protocol.txt from bcn@cs.washington.edu.
- $id = 0;
- $rdp = ($hdr_len & 0xc0) >> 6;
- $hdr_len = $hdr_len & 0x3F;
- $header = substr($ans, 0, $hdr_len);
- substr($ans, 0, $hdr_len) = '';
- $index = index($ans, "\000");
- $index++;
- $trailer = substr($ans, $index);
- substr($ans, $index) = '';
- ($id, $seq, $kk, $dum, $backoff) = unpack("nnnnn", $trailer);
- if ($kk) {
- $pktsnum = $kk;
- }
- if ($backoff) {
- $timeout = $backoff;
- }
- next if ($seq == 0);
- # Get multi packet sequence and quantity.
- if ($ans =~ /^MULTI-PACKET\s+(\d+)\s+OF\s+(\d+)/) {
- # According to the source code, this is sent
- # only as the last packet.
- print "MULTI-PACKET OF is sent\n";
- $seq = $1;
- $pktsnum = $2;
- }
- elsif ($ans =~ /^MULTI-PACKET\s+(\d+)/) {
- print "MULTI-PACKET is sent\n";
- $seq = $1;
- $pktsnum = 0;
- }
- else {
- # output if the user wants to read the entries when
- # still matching.
- if ($along) {
- &output($ans);
- }
- return($ans);
- }
- $index = index($ans, "\n");
- $ans = substr($ans, $index + 1);
- }
- $pktseq++;
- $retries = 3;
- if ($along) {
- &output($ans);
- }
- $answer .= $ans;
- # The condition for getting more packets.
- if ($pktsnum == 0 || $pktseq < $pktsnum) {
- next;
- }
- last;
- }
- }
- return($answer);
- }
-
- # Print the entries in a packet.
- sub output {
- local($list) = @_;
- local(@lines, $dum, $arcmod, $lastmod, $modes, $host, $size, $dir);
- local($name, @attr, @ainfo, $type);
-
- # split the lines in the packet first.
- @lines = split(/\n/, $list);
- $host = '';
- while ($line = shift(@lines)) {
- # If a LINK L line, then get the initial fields for the
- # entry. Output the last entry if there is one.
- if ($line =~ /^LINK L/) {
- if ($host ne '') {
- &write($host, $type, $dir, $size, $arcmod, $modes,
- $lastmod, $name);
- }
- $type = $name = $host = $dir = '';
- $size = $modes = $lastmod = $arcmod = '';
- $#attr = $#ainfo = -1;
- ($dum, $dum, $type, $name, $dum, $host, $dum, $dir, $dum, $dum) =
- split(/ /, $line);
- }
- elsif ($line =~ /^LINK /) {
- # What should I do if the response is LINK but not L?
- }
- elsif ($line =~ /^LINK-INFO/) {
- # A LINK-INFO line. Get one attribute per line.
- ($dum, $dum, $attr, $dum, @info) = split(/ /, $line);
- if ($attr eq 'SIZE') {
- $size = join(' ', @info);
- }
- elsif ($attr eq 'UNIX-MODES') {
- $modes = join(' ', @info);
- }
- elsif ($attr eq 'ARC-MODTIME') {
- $arcmod = join(' ', @info);
- }
- elsif ($attr eq 'LAST-MODIFIED') {
- $lastmod = join(' ', @info);
- }
- else {
- push(@attr, $attr);
- push(@ainfo, join(' ', @info));
- }
- }
- elsif ($line =~ /^VERSION-NOT-SUPPORTED TRY (\d+)-(\d+),(\d+)/) {
- # What should I do if the version is wrong? Die?
- }
- elsif ($line =~ /^NOT-A-DIRECTORY/) {
- # WHat about this?
- }
- elsif ($line =~ /^UNRESOLVED/) {
- }
- elsif ($line =~ /^FILTER/) {
- }
- elsif ($line =~ /^OBJECT-INFO/) {
- }
- elsif ($line =~ /^NONE-FOUND/) {
- }
- elsif ($line =~ /^SUCCESS/) {
- }
- elsif ($line =~ /^FORDWARDED/) {
- }
- elsif ($line =~ /^FAILURE/) {
- }
- elsif ($line =~ /^NOT-AUTHORIZED/) {
- }
- else {
- # I basically don't know what to do if not LINK L and LINK-INFO.
- # If you know, please tell me.
- }
- }
- if ($host ne '') {
- &write($host, $type, $dir, $size, $arcmod, $modes, $lastmod, $name);
- }
- }
-
- # Write the fields out on terminal using the format string.
- sub write {
- local($host, $type, $dir, $size, $arcmod, $mode, $lastmod, $name) = @_;
- local($seq, @path, $date, $path);
- # Get the type of file.
- if ($type eq 'DIRECTORY') {
- $type = 'Directory';
- }
- else {
- $type = 'File';
- }
-
- # A strange format with the host being $hostcaps and the
- # path being in this form: ARCHIE/HOST/path
- if ($host eq $hostcaps) {
- ($archie, $dum, $host, $dir) =
- ($dir =~ m|([^/]+)/([^/]+)/([^/]+)/(.*)$|);
- $dir = '/' . $dir;
- }
-
- # Convert the date string from 19910713123250Z to
- # 1991 Jul 13 12:32:50 GMT
- $date = &date($lastmod);
- $pnum++;
- $seq = $pnum;
-
- # print the entry. Die if something is wrong. Should I
- # Log the output in a file so the effect is not wasted?
- if ((eval "printf $format") eq '') {
- die "A syntax error when printing the format string\n";
- }
- }
-
- # Convert a string.
- sub date {
- local($date) = @_;
- local($year, $month, $day, $hour, $min, $sec) =
- (0, 1, 0, 0, 0, 0);
- local($zone) = 'Z';
-
- ($year, $month, $day, $hour, $min, $sec, $zone) =
- ($date =~ /^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(.*)/);
-
- # A time zone Z is the same as GMT, right? I am not sure. Tell me
- # if you know.
- if ($zone eq 'Z') {
- $zone = 'GMT';
- }
- return "$year $month[$month] $day $hour:$min:$sec $zone";
- }
-
- # Parse the format string to convert it to a valid perl format
- # string.
- sub parseformat {
- local($string) = @_;
- local($nstring, $index, @plist);
-
- $string =~ s/\$/\\\$/g;
- $string =~ s/\{/\\\{/g;
- $string =~ s/\}/\\\}/g;
- $nstring = '';
- $#plist = -1;
- while (($index = index($string, '%')) >= 0) {
- $nstring .= substr($string, 0, $index);
- substr($string, 0, $index) = '';
- if (substr($string, 1, 1) eq '%') {
- substr($string, 0, 2) = '';
- $nstring .= '%%';
- }
- elsif ($string =~ /^\%(\d*)host/) {
- # %12host means the width of the field is 12.
- push(@plist, '$host');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 5) = '';
- }
- elsif ($string =~ /^\%(\d*)dir/) {
- push(@plist, '$dir');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 4) = '';
- }
- elsif ($string =~ /^\%(\d*)mode/) {
- push(@plist, '$mode');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 5) = '';
- }
- elsif ($string =~ /^\%(\d*)date/) {
- push(@plist, '$date');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 5) = '';
- }
- elsif ($string =~ /^\%(\d*)arc/) {
- push(@plist, '$arcmod');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 4) = '';
- }
- elsif ($string =~ /^\%(\d*)seq/) {
- push(@plist, '$seq');
- $nstring .= "\%$1d";
- substr($string, 0, length($1) + 4) = '';
- }
- elsif ($string =~ /^\%(\d*)size/) {
- push(@plist, '$size');
- $nstring .= "\%$1d";
- substr($string, 0, length($1) + 5) = '';
- }
- elsif ($string =~ /^\%(\d*)name/) {
- push(@plist, '$name');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 5) = '';
- }
- elsif ($string =~ /^\%(\d*)type/) {
- push(@plist, '$type');
- $nstring .= "\%$1s";
- substr($string, 0, length($1) + 5) = '';
- }
- else {
- die sprintf("$prog: Format error. Unknown field: %s\n", $string);
- }
- }
- $nstring .= $string;
- $nstring = '"' . $nstring . '"';
- return join(', ', $nstring, @plist);
- }
-
- # Parse the startup file ~/.archierc
- # The format of the file is very simple:
- # command option
- # The format command must be the last one.
- sub parserc {
- if (-e "$userpath/.archierc" && -r "$userpath/.archierc") {
- open (RC, "$userpath/.archierc");
- while (<RC>) {
- chop;
- if ($_ =~ /^\s*match\s+(\d+)\s*$/) {
- $match = $1;
- }
- elsif ($_ =~/^\s*search\s+([a-z]+)\s*$/) {
- if ($1 eq 'case') {
- $case = 'C';
- }
- elsif ($1 eq 'nocase') {
- $case = 'S';
- }
- elsif ($1 eq 'reg') {
- $case = 'R';
- }
- elsif ($1 eq 'exact') {
- $case = '=';
- }
- else {
- print "$prog: $user/.archierc: unknown search option $1\n";
- }
- }
- elsif ($_ =~ /^\s*host\s+(.+)\s*$/) {
- $hostname = $1;
- }
- elsif ($_ =~ /^\s*format\s*$/) {
- $format = '';
- while (<RC>) {
- $format .= $_;
- }
- return;
- }
- elsif ($_ =~ /^\s*$/ || $_ =~ /^\s*\#/) {
- }
- else {
- print "$prog: Unknown option in $user/.archierc: $_\n";
- }
- }
- close(RC);
- }
- }
-