home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- 'di';
- 'ig00';
- #
- # $Header: /tmp_mnt/home/netlabs1/lwall/pl/RCS/clip,v 1.1 92/07/13 12:37:09 lwall Exp Locker: lwall $
- #
- # $Log: clip,v $
- # Revision 1.1 92/07/13 12:37:09 lwall
- # Initial revision
- #
-
- $HOME = $ENV{HOME}
- || $ENV{LOGDIR}
- || (getpwuid($<))[7]
- || die "No home directory!!!\n";
-
- # Configurable parameters, may be overridden in .cliprc
-
- $SPOOLDIR = "/usr/spool/news"; # Where news articles are stored.
- $NEWSLIB = "/usr/lib/news"; # Where Cnews keeps history file.
- $HOMETMP = "$HOME/tmp"; # Where clip should put output.
- $HOMEBIN = "$HOME/bin"; # Where clip should install N script.
- $MAXLOAD = 3; # What load average to suspend at.
- $NICE = 16; # What priority to run at.
- $DEBUG = 0; # Whether to be noisy.
-
- $CLIPRC = "$HOME/.cliprc";
- require $CLIPRC;
-
- # Everything from here on should be machine independent.
-
- open(N0, "$HOMETMP/n0");
- $oldpid = <N0> + 0;
- close N0;
- if ($oldpid) {
- die "Already a clip process running ($oldpid)\n" if kill 0, $oldpid;
- }
-
- die "You must call both &NGSKIP and &SCANNER in $CLIPRC\n"
- unless defined &ngskip && defined &scanner;
-
- sub fixmsg {
- local($_, $file, $line) = @_;
- $line -= ($Preamble =~ y/\n//);
- s/ file \(eval\) at line (\d+)/" $file at line " . ($1 + $line)/eg;
- s/ at \(eval\) line (\d+)/" in $file at line " . ($1 + $line)/eg;
- # $* = 1;s/^/$0: /g; # too noisy to have this
- die;
- }
-
- sub NGSKIP {
- local($userstuff) = @_;
- if ($userstuff =~ tr/\n// >= 3 && $userstuff !~ /study/) {
- $study = "\t\tstudy;\n";
- }
- else {
- $study = "";
- }
- $eval = (($Preamble = <<'END1' . $study) . $userstuff . <<'END2');
- sub ngskip {
- local($_) = $nglist;
- eval {
- &skip if /^cancelled$/;
- END1
- };
- if ($@) {
- $@ = "" if $@ eq "You should never see this\n";
- die $@ if $@;
- }
- }
- END2
- print STDERR $eval if $DEBUG & 1;
- eval $eval;
- &fixmsg($@, (caller)[1,2]) if $@;
- 1;
- }
-
- sub SCANNER {
- local($userstuff) = @_;
- $eval = (($Preamble=<<'END1') . $userstuff . <<'END2');
- sub scanner {
- while (<ART>) {
- $totalhits = 0;
- do {
- study;
- $hits = 0;
- END1
- $totalhits += $hits;
- } while $hits;
- &printhit if $totalhits;
- }
- }
- END2
- print STDERR $eval if $DEBUG & 1;
- eval $eval;
- &fixmsg($@, (caller)[1,2]) if $@;
- 1;
- }
-
- require "timelocal.pl";
-
- fork && exit; # avoid nohup behavior
-
- $pid = $$;
-
- $pmeter = fork;
- defined $pmeter || die "can't fork: $!";
- if ($pmeter == 0) {
- &pmeter($MAXLOAD, $pid, $DEBUG & 2);
- die "Not reached";
- }
-
- if ($HOMEBIN) {
- system "echo kill -HUP $pid >$HOMEBIN/N";
- system "(echo ps $pid; echo ps $pmeter) >$HOMEBIN/P";
- chmod 0755, "$HOMEBIN/N";
- chmod 0755, "$HOMEBIN/P";
- }
-
- setpriority(0, 0, $NICE); # set very slow priority
-
- $date = shift;
- chop($date = `cat $HOME/.lastclip`) if !$date && -f "$HOME/.lastclip";
- $date = &lidate($date);
-
- chdir $SPOOLDIR || die "Can't cd: $!\n";
-
- select(STDERR); $| = 1;
- select(STDOUT); $| = 1;
-
- &CATCH;
- $SIG{HUP} = CATCH; # send SIGHUP to switch to new history file
- $SIG{USR1} = RESTART; # send SIGUSR1 to re-execute clip
- $SIG{ALRM} = IGNORE; # so we can send ourselves an alarm safely
-
- $r = "\r" if -t STDOUT;
-
- ($dev,$ino,$mode,$nlink,$uid) = stat STDOUT;
- $origuid = $uid;
- for (;;) {
- LOGLINE:
- while (<LOG>) {
- $pos = tell(LOG);
- chop;
- ($messid,$date,$nglist) = split(/\t/);
- $0 = "clip at $messid" if $DEBUG & 1;
- $wanted = 1;
- &ngskip;
- next LOGLINE unless $wanted;
- $date = &gidate($date);
- ($ng,$art) = split(m![ /]!,$nglist);
- next unless $art;
- $ng =~ y!.!/!;
- open(ART,"$ng/$art") || next;
- next if -s ART > 100_000;
- $count = 0;
- ++$slept; # to force quick update after big batch
- $/ = '';
- $header = <ART>;
- $_ = '';
- &scanner;
- close ART;
- $sleep = 5;
- }
- continue {
- $/ = "\n";
- }
- print STDERR "clip: caught up$r\n" unless $tailing++;
- sleep $sleep;
- $slept += $sleep;
- if ($slept > 300) {
- $slept = 0;
- if ($date != $lastdate) {
- ($dev,$ino,$mode,$nlink,$uid,$gid) = stat STDOUT;
- exit unless $uid == $origuid;
- open(LASTDATE,">$HOME/.lastclip");
- print LASTDATE &cdate($date),"\n";
- close LASTDATE;
- $lastdate = $date;
- }
- ($dev,$ino) = stat("$NEWSLIB/history");
- if ($dev != $logdev || $ino != $logino) {
- print STDERR "************* NEW HISTORY FILE *************$r\n";
- open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
- ($logdev, $logino) = stat LOG;
- &seekdate($date);
- $tailing = 0;
- $slept = 300; # force immediate update of lastclip.
- }
- }
- $sleep++ if $sleep < 120;
- seek(LOG,$pos,0);
- }
-
- sub hit {
- ($before,$match,$after) = ($`,$&,$');
- $match =~ s/(.)/_\b$1/g;
- $_ = "$before$match$after";
- study;
- $hits++;
- unless ($count++) {
- $header =~ /From: (.*)/;
- ($from = $1) =~ s/.*\((.*)\).*/$1/;
- print("$r\n\@$ng/$art \tFrom: $from$r\n") || exit;
- $header =~ /Subject: (.*)/ && print "Subject: $1$r\n";
- $0 = "clip " . &cdate($date);
- }
- }
-
- sub skip {
- $wanted = 0;
- die "You should never see this\n";
- }
-
- sub printhit {
- if (length() > 500) {
- $wanted = '';
- for ($line = 0; /.*\n/g; $line++) {
- if ($& =~ /_\010/) {
- for ($w = $line - 3; $w <= $line + 3; $w++) {
- vec($wanted,$w,1) = 1;
- }
- }
- }
- $unlines = 0;
- for ($line = 0; /.*\n/g; $line++) {
- if (vec($wanted,$line,1)) {
- print($&,$r) || exit;
- $unlines = 0;
- }
- else {
- print("...\n$r") || exit unless $unlines++;
- }
- }
- }
- else {
- s/\n/\n$r/g if $r;
- print() || exit;
- }
- }
-
- sub seekdate {
- local($start) = shift;
- if ($start == 9_999_999_999) {
- seek(LOG,0,2);
- $pos = tell(LOG);
- print STDERR "$pid starting at eof...$r\n";
- return;
- }
- ($st_dev,$st_ino,$st_mode,$st_nlink,$st_uid,$st_gid,$st_rdev,$st_size,
- $st_atime,$st_mtime,$st_ctime,$st_blksize,$st_blocks) = stat(LOG);
- for ($offset = $st_size - 100_000; $offset > 0; $offset -= 100_000) {
- if (seek(LOG,$offset,0)) {
- $_ = <LOG>; # probably starts in middle of a line
- $_ = <LOG>;
- ($messid,$date,$nglist) = split(/\t/);
- $date = &gidate($date);
- last if $date < $start;
- }
- else {
- $offset = -1;
- }
- }
- seek(LOG,0,0) if $offset < 0;
- while (<LOG>) {
- ($messid,$date,$nglist) = split(/\t/);
- $date = &gidate($date);
- last if $date >= $start;
- }
- $pos = tell(LOG);
- $pct = int($pos * 100 / $st_size);
- print STDERR "$pid starting at $pct% for $start...$r\n";
- $0 = "clip start at $messid" if $DEBUG & 1;
- }
-
- sub CATCH {
- &openout;
- open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
- ($logdev, $logino) = stat LOG;
- $date = 9_999_999_999 unless $date;
- &seekdate($date);
- $tailing = 0;
- $slept = 300; # force immediate update of lastclip.
- $SIG{HUP} = CATCH;
- kill $$, ALRM;
- }
-
- sub RESTART {
- kill 9, $pmeter if $pmeter;
- exec "clip";
- die "Couldn't exec clip: $!\n";
- }
-
- sub cdate {
- ($sec,$min,$hr,$mday,$mon,$year) = localtime($date);
- sprintf("%02d/%02d/%02d %02d:%02d",$mon+1,$mday,$year,$hr,$min);
- }
-
- sub lidate {
- $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
- ? &timelocal(0, $5, $4, $2, $1-1, $3)
- : $_[0];
- }
-
- sub gidate {
- $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
- ? &timegm(0, $5, $4, $2, $1-1, $3)
- : $_[0];
- }
-
- sub openout {
- rename("$HOMETMP/n8", "$HOMETMP/n9");
- rename("$HOMETMP/n7", "$HOMETMP/n8");
- rename("$HOMETMP/n6", "$HOMETMP/n7");
- rename("$HOMETMP/n5", "$HOMETMP/n6");
- rename("$HOMETMP/n4", "$HOMETMP/n5");
- rename("$HOMETMP/n3", "$HOMETMP/n4");
- rename("$HOMETMP/n2", "$HOMETMP/n3");
- rename("$HOMETMP/n1", "$HOMETMP/n2");
- rename("$HOMETMP/n0", "$HOMETMP/n1");
- open(STDOUT, ">$HOMETMP/n0");
- open(STDERR, ">&STDOUT");
- }
-
- sub pmeter {
- # ($loadavg, $pid, $debug) = @ARGV;
- local($loadavg, $pid, $debug) = @_;
- $running = 1;
- $0 = "pmeter @_";
-
- while (kill 0, $pid) {
- `/usr/ucb/uptime` =~ /load average:\s+([\d.]+)/
- || die "Can't run uptime: $!\n";;
-
- if ($1 > $loadavg) {
- kill 'STOP', $pid;
- if ($running) {
- print STDERR "stopping at $1\r\n" if $debug;
- $0 = "pmeter (stopped $pid at $loadavg)";
- $running = 0;
- }
- }
- else {
- kill 'CONT', $pid;
- if (!$running) {
- print STDERR "starting at $1\r\n" if $debug;
- $0 = "pmeter (started $pid at $loadavg)";
- $running = 1;
- }
- }
- sleep 120;
- }
- }
- ###############################################################
-
- # These next few lines are legal in both Perl and nroff.
-
- .00; # finish .ig
-
- 'di \" finish diversion--previous line must be blank
- .nr nl 0-1 \" fake up transition to first page again
- .nr % 0 \" start at page 1
- '; __END__ ##### From here on it's a standard manual page #####
-
- .TH CLIP 1 "July 14, 1992"
- .de M \" man page reference
- \\fI\\$1\\fR\\|(\\$2\)\\$3
- ..
- .AT 3
- .SH NAME
- clip \- personal news clipping service
- .br
- pmeter \- start and stop a process according to load average
- .SH SYNOPSIS
- .B clip
- [
- .I date
- ]
- .SH DESCRIPTION
- .I Newsclip
- scans incoming news by following the C news history file as it grows
- and examining each article listed there for patterns of interest.
- .PP
- If a date argument is supplied on the command line (in the form
- .IR "``mm/dd/yy hh:mm''" ,
- including the embedded space)
- .I clip
- will read articles since that date. With no arguments, it examines
- the user's ~/.lastclip file to know when it left off scanning;
- if no such file exists, every news article will be scanned.
- .PP
- Patterns and options are specified in a .cliprc file situated
- in your home directory. The .cliprc file is simply a gob of Perl
- code that will be evaluated by clip after it has set its default
- options but before it actually goes out to do anything. The only
- mandatory items are a call to each of two subroutines: &NGSKIP and
- &SCANNER. A typical .cliprc file looks like this:
- .nf
-
- .ne 6
- # Options I want to override.
- $MAXLOAD = 4;
-
- # Newsgroups I always read anyway, so don't bother.
- &NGSKIP( <<'END' );
- &skip if /comp\e.lang\e.perl/;
- &skip if /rec\e.humor\e.funny/;
- END
-
- .ne 8
- # Patterns I'm interested in scanning for.
- &SCANNER( <<'END' );
- &hit if /betty[^\e0]boop/i;
- &hit if /roger[^\e0]rabbit/i;
- &hit if /\ebw.*coyote\eb/i;
-
- if (/\ebacme\eb/i) {{
- next if $` =~ /roadrunner\e@$/;
- &hit;
- }}
-
- if (/\eblooney\eb/i) {{
- next if $' =~ /^\es*bin/;
- next if $` =~ /!$/;
- next if $` =~ /fudd\e@/i;
- next if $nglist =~ /alt\e.crazy\e.people/;
- &hit;
- }}
- END
-
- .fi
- The argument to the &NGSKIP routine is a sequence of zero or more Perl
- statements that call &skip if the current article is crossposted to a
- newsgroup that we don't want to scan (generally because we'll read the
- newsgroup anyway). Before your code is called, the $_ variable is
- automatically set to the list of newsgroups from the history file.
- You can invert the logic and just pick the newsgroups you want by
- saying:
- .nf
-
-
- .ne 9
- &NGSKIP(<<'END');
- study;
- {
- last if /comp\e.lang\e.perl/;
- last if /comp\e.org\e.usenix/;
- last if /comp\e.unix\e.bsd/;
- &skip;
- }
- END
-
- .fi
- .PP
- The argument to the &SCANNER routine is a sequence of one or more Perl
- statements that call &hit if an interesting pattern has been spotted.
- Before your code is called, the $_ variable is set to the current paragraph
- of the current article (articles are read and scanned paragraph by paragraph).
- Newsclip will snip excerpts from any paragraph containing patterns,
- underlining the patterns it found. Note that some patterns are
- unqualified hits, while others are hits only if some other pattern doesn't
- match. (It's important to the underlining algorithm that the value
- of $& continue to contain the value of the last
- .I successful
- match \(em all
- of the exceptions are expected to fail.) For the purposes of exception
- scanning, the variables $` and $' are automatically set to the preceding
- and subsequent text by the successful initial pattern match. In addition,
- .I clip
- sets the variable $nglist to the current list of newsgroups, and
- the variable $header to be the header of the current article.
- .PP
- The use of double curlies is merely to allow the ``next'' command to fall
- through to the next test, since ``next'' will simply fall out of an ordinary
- block. The ``last'' command could also have been used, but would be less
- intuitive. Other special thingies you might want to use include \eb
- to assert a word boundary, [^\e0] to match any character but a null,
- including a newline, and any other regular expression goodies you can
- think up to detect spelling errors and variants. Tom Christiansen
- searches for his name with:
- .nf
-
- .ne 4
- if (/\ebtchrist\eb/i || /tom[^\e0]christ(ia|e)ns[eo]n/i) {{ # grr...
- next if /\en\es*Tom Christiansen\es+tchrist\e@convex.com/;
- &hit;
- }}
-
- .fi
- .PP
- .I Newsclip
- forks another copy of itself,
- called
- .IR pmeter ,
- which runs
- .M uptime 1
- every two minutes to make sure the load hasn't gone too
- high. If it has,
- .I pmeter
- suspends
- .I clip
- with a SIGSTOP,
- waiting until the load average goes back down before
- allowing
- .I clip
- to continue.
- Both
- .I pmeter
- and
- .I clip
- continually
- muck with their own externally-visible argument list
- to keep folks running
- .M ps 1
- amused. This has no effect if your operating system doesn't
- support changes to argv being noticed by ps.
- .PP
- Output by default goes to ~/tmp/n0. When
- .I clip
- is hit by a SIGHUP,
- it renames n8 to n9, n7 to n8, ..., n0 to n1, and creates a new n0
- file. For handiness, when
- .I clip
- starts up, it creates a tiny script
- called ``N'' that will send the SIGHUP to the correct process. Typically
- one runs N in the morning to set up a new n0 file, and then reads ~/tmp/n1
- to see what the scanner found since the last time you ran N. A companion
- script, named ``P'', runs
- .I ps
- on the
- .I clip
- and
- .I pmeter
- processes. (This assumes a BSD-style
- .I ps
- program.)
- .PP
- The first line of each article reference begins with an @, in case you
- want to cut and paste that line to something that looks up the article
- for you. I use the following rn macro. Unfortunately I have to paste
- the line twice because of how rn eats typeahead, but hey, it beats a
- kick in the head. This macro should all be on one line \(em it's broken in
- two so that nroff doesn't get upset:
- .nf
-
- .ne 12
- @ %(%m=n?:%(%m!=a?q:)q)%(%"^Jng/art: "=\e([^ @]*\e)/\e([^ ]*\e)?
- g%`perl -e '($_="%1")=~tr#/#.#;print'`^J.%2^J:%(%m=n?^L:))
-
- .fi
- .PP
- Interesting variables to set in your .cliprc file include the following:
- .TP 15
- .B $SPOOLDIR
- Where news articles are stored; defaults to /usr/spool/news.
- .TP
- .B $NEWSLIB
- Where Cnews keeps its history file; defaults to /usr/lib/news.
- .TP
- .B $HOMETMP
- Where clip should put its output files;
- defaults to ~/tmp.
- .TP
- .B $HOMEBIN
- Where clip should install N script;
- defaults to ~/bin.
- .TP
- .B
- $MAXLOAD
- What load average to suspend at; defaults to 3.
- .TP
- .B $NICE
- What priority to run at; defaults to 16.
- .TP
- .B $DEBUG
- Whether to be noisy; defaults to 0. A value
- with the 1 bit set causes
- .I clip
- to be noisy;
- a value with the 2 bit set causes
- .I pmeter
- to be noisy.
- .SH ENVIRONMENT
- HOME or LOGDIR
- .SH SIGNALS
- .nf
- SIGHUP Cycle log files.
- SIGUSR1 Re-exec oneself in case .cliprc changes.
- SIGALRM Make clip wake up early.
- .fi
- .SH FILES
- .nf
- .ta \w'$HOME/.cliprc 'u
- $HOME/.cliprc Your clipping preferences.
- $HOME/.lastclip Date and time of last article scanned.
- $HOMETMP/n[0-9] Log files.
- $HOMEBIN/N Cycle log files: n8 -> n9, n7 -> n8, etc.
- .fi
- .SH AUTHOR
- Larry Wall, with heckling by Tom Christiansen.
- .SH "SEE ALSO"
- .M perl 1 ,
- .M rn 1 ,
- .M ps 1 ,
- .M uptime 1 ,
- .M newsmaint 8 .
- .SH DIAGNOSTICS
- Obscure diagnostics are an obsolete concept rooted in the notion that
- programs must be shoehorned into memory.
- .SH BUGS
- There ought to be a way for multiple users to share a single clip process.
- .PP
- It doesn't work on NNTP-only systems.
- .ex
-