home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- #
- # intersect.pl -- find intersection of lq phrases
- #
- # Tom Christiansen
- # tchrist@convex.com
- # 8 Aug 91
-
- $match = '-mh';
- $lqprog = 'lqshow';
-
- sub gensym { 'gensym' . ++$gensym; };
- *choices = &gensym; # init list of lists
-
- $SIG{'INTR'} = 'INTR';
-
- sub usage {
- select(STDERR);
- print <<FINIS;
- usage: $0 [-CAB <num>] [-v] [-q] [-d directory] [-n] [-l | -L | -R]
- [-mp | -mh | -ma] phrase ... [ [-a | -r] phrase ... ] ...
-
- -a add set of phrases
- -r remove set of phrases
-
- -A lines of context after
- -B lines of context before
- -C lines of context before and after
-
- -n sort numerically
-
- -q use lqkwik instead of lqshow
- -v verbose mode
-
- -l list filenames
- -L just list base names
- -R list raw lqshow triples
-
- -d set LQTEXTDIR
-
- -m[pha] lqphrase match fuzziness
- p precise
- h heuristic
- a almost anything goes
-
- FINIS
-
- exit 1;
-
- }
-
- $sort = "sort +2 +0n +1n";
-
- while ($_ = shift) {
- if (/^-m[pha]$/) { $match = $_; }
- elsif (/^-q$/) { $lqprog = 'lqkwik'; }
- elsif (/^-n$/) { $numsort++; }
- elsif (/^-R$/) { $listonly++; $rawlist++;}
- elsif (/^-v$/) { $verbose++; }
- elsif (/^-a$/) { *choices = &gensym; } # start a new list
- elsif (/^-r$/) { *choices = &gensym; $nand[$gensym]++; }
- elsif (/^-d(.*)$/) { $ENV{'LQTEXTDIR'} = $1 || shift; }
- elsif (/^-l$/i) { $listonly = 1; $shortlist = /L/; }
- elsif (/^-[AB](\d*)$/) {
- $hadwin++;
- y/A-Z/a-z/; # gotta pass lower case
- $lqshow_opts .= ' '. $_;
- $lqshow_opts .= ' ' . shift unless $1;
- }
- elsif (/^-C(\d*)$/) {
- $hadwin++;
- $context = $1 || shift;
- $lqshow_opts = "-a $context -b $context";
- }
- elsif (/^-(.*)/) {
- warn "unknown option: $1\n";
- &usage;
- }
- else {
- push(@choices, $_);
- }
- }
-
- &usage unless @choices;
- ### finally the interesting part...
-
- unless ($hadwin) {
- &getwin;
- $context = int($rows - 3)/2;
- $lqshow_opts = "-a $context -b $context";
- }
-
- $func = 'adding';
- &read_choices(*need, 'gensym1');
-
- @fold = @fnew;
- %fold = %fnew;
-
- for ($i = 2; $i <= $gensym; $i++) {
- $func = $nand[$i] ? 'subtracting' : 'adding';
- &read_choices(*want, "gensym$i");
- &$func(*need, *want);
- }
-
- $TMP = "/tmp/inter$$";
- if ($numsort) {
- # too hard to make sort do this
- #print STDERR "sorting the hard way..." if $verbose;
- $i = 0;
- for (@need) {
- $i++;
- s/^ /0/; # convex printf is broken
- ($block, $offset, $file) = split;
- push(@block, $block);
- push(@offset, $offset);
- $file =~ /(\d+)$/;
- push(@file, $1);
- }
- open (TMP, ">$TMP") || die "can't write to $TMP: $!";
- print TMP @need[sort bynum $[..$#need];
- #print STDERR "done\n" if $verbose;
- } else {
- open (TMP, "| $sort > $TMP");
- for (@need) {
- s/^ /0/; # convex printf is broken
- print TMP;
- }
- }
- unless (close TMP) {
- warn "can't close $TMP correctly";
- &done;
- }
-
- $count = @need;
-
- unless ($verbose) {
- local(%seen);
- for (@need) {
- $fcount++ unless $seen{(split)[2]}++;
- }
- print STDERR "total of $count ", &plural('match', $count);
- print STDERR " in $fcount ", &plural('file',$fcount), "\n";
- }
-
-
- if ($listonly) {
- open (TMP, "<$TMP");
- if ($rawlist) {
- print while <TMP>;
- } else {
- while (<TMP>) {
- s/.*\s(\S+)$/$1/;
- $shortlist && s#.*/##;
- print unless $printed{$_}++;
- }
- }
- } else {
- warn "No matches found\n", &done unless $count;
- $cmd = "$lqprog ";
- $cmd .= "$lqshow_opts " if $lqprog =~ /show/;
- $cmd .= "-f $TMP";
- warn "running: $cmd\n" if $verbose;
- system $cmd;
- }
-
- &done;
-
-
- ##############################################################
-
- sub read_choices {
- local(*results, *targets) = @_;
- local(@pretty) = ();
-
- local($fcount, $count) = (0,0);
-
- die "no targets" unless @targets;
-
- @results = ();
-
- for (@targets) { push (@pretty, "\"$_\""); }
- local($pretty) = join(', ', @pretty);
- print STDERR "$func $pretty: " if $verbose;
-
- # don't let the shell get even close to touching my targets
- die "can't fork: $!" unless defined ($pid = open(KID, "-|"));
- unless ($pid) {
- exec 'lqphrase', $match, @targets;
- die "can't exec lqphrase: $!" ;
- }
-
- @fnew = ();
- %fnew = ();
-
- for (<KID>) {
- $count++;
- $file = (split)[2];
- $fcount++ unless $fnew{$file}++;
- next if %fold && !$fold{$file};
- push(@results, $_);
- push(@fnew, $file);
- }
- close KID;
- print STDERR "$count in $fcount\n" if $verbose;
- die "No matches for $pretty\n" unless $count;
- die "lqphrase exited badly" if $?;
- }
-
- sub adding {
- local($mfunc) = 'add2c';
- &merge;
- }
-
- sub subtracting {
- local($mfunc) = 'sub2c';
- &merge;
- }
-
- sub merge {
- local(*a, *b) = @_;
- local($_);
- local(@c, *f1, *f2, *f3);
- local($file);
-
- print STDERR " intersection => " if $verbose;
-
- &$mfunc(*a, *fold);
- &$mfunc(*b, *fnew);
-
- %fold = %f3;
- @fold = @f3;
-
- local($count) = 0+@c;
- local($fcount) = 0;
- $fcount++ while each %fold;
-
- print STDERR " $count in $fcount\n" if $verbose;
-
- @a = @c;
- }
-
- sub plural {
- local($_, $number) = @_;
- local($end) = 's';
- return $_ if $number == 1;
- if (/[cst]h$/) { $end = 'es'; }
- "$_$end";
- }
-
- sub add2c {
- local(*list, *set) = @_;
- local($i);
-
- for ($i = 0; $i < @list; $i++) {
- next unless $fnew{$set[$i]} && $fold{$set[$i]};
- push(@c, $list[$i]);
- push(@f3, $set[$i]);
- $f3{$set[$i]}++;
- }
- }
-
- sub sub2c {
- local(*list, *set) = @_;
- local($i);
-
- for ($i = 0; $i < @list; $i++) {
- next if $fnew{$set[$i]};
- push(@c, $list[$i]);
- push(@f3, $set[$i]);
- $f3{$set[$i]}++;
- }
- }
-
- sub bynum {
- $file[$a] <=> $file[$b] ||
- $block[$a] <=> $block[$b] ||
- $offset[$a] <=> $offset[$b];
- }
-
-
- sub INTR {
- warn "\nInterrupt!\n";
- &done;
- }
-
- sub done {
- #unlink $TMP;
- exit;
- }
-
- sub getwin {
- $TIOCGWINSZ = 0x40087468; # should be require sys/ioctl.pl
- if (ioctl(STDOUT, $TIOCGWINSZ, $winsize)) {
- ($rows, $cols, $xpixel, $ypixel) = unpack('S4', $winsize);
- } else {
- $cols = $ENV{TERMCAP} =~ /:co#(\d+)/ ? $1 : 80;
- }
- }
-