home *** CD-ROM | disk | FTP | other *** search
-
- /* Copyright © 1991, 1992 by Walter Rothe. You may freely use and modify this
- * program, but not for commercial profit. A modest fee for distribution is
- * allowed. Derivative works must be released with source along with the
- * executable or provisions made to provide the user source, if requested.
- * Uploading source to a major bulletin board system within 6 months of the
- * time of the request satisfies this requirement. This copyright notice
- * must not be deleted from the source.
- */
-
- /* */
- /* Scan */
- /* */
- /* This program scans file(s) looking for a pattern(s). It supports many */
- /* wildcard characters in patterns(*,?,[],[^],[-],&,..,+,|) and can scan */
- /* for up to 125 patterns simultaneously with little speed degradation. */
- /* If a match is found, a whole article can be printed out instead of just */
- /* a number of lines around the match. It also supports recursive directory */
- /* scanning and inverted pattern matching. */
- /* */
- /* Limitations: 1. Total file pathname must be less than LONGWIDTH wide. */
- /* Aborts otherwise. */
- /* 2. Article separator must be less than MAXKWSZ and LWIDTH */
- /* characters long. Aborts otherwise. */
- /* 3. Sentence scan only looks OVERLAP chars to left or right */
- /* of keyword match to see if rest of stuff matches. */
- /* 4. Patterns are a max of LWIDTH chars wide. If not, it */
- /* aborts. */
- /* 5. A max of 125 major terms is supported. Aborts otherwise. */
- /* 6. A major term must have at least 1 set of 2 consequtive */
- /* non wildcard characters. Otherwise, it will abort. */
- /* 7. Article separator must be at least 2 chars long and have */
- /* at least 2 unique chars. Otherwise, it will abort. */
- /* 8. If article size is > window size(buffer size), there */
- /* is a chance that not all of it will be printed out. A */
- /* warning will be printed out if this occurs. At least one */
- /* buffers worth will always be printed out. The part of the */
- /* article in the current buffer will also be printed out. */
- /* 9. The ".." wildcard causes a match on either the left or */
- /* right MT if the LineScan option is set. */
- /* 10.Buffer size must be in longword increments. */
- /* */
- #include <stdio.h>
- #include <exec/types.h>
- #include <exec/memory.h>
- #include <libraries/dos.h>
- #include <libraries/dosextens.h>
- #include <functions.h>
- #include "fcntl.h"
- #include "ctype.h"
- #include <string.h>
- #include "scan.h"
- #include <time.h>
-
- #ifndef LATTICE
- #endif
-
- clock_t StrtTime;
- clock_t EndTime;
- clock_t TotTime=0;
-
- extern FastSearch(); /* Inner loop search(returns maj term #) */
-
- extern Lines *GetMajTrms(); /* Get list of Maj Trms from Min Terms */
-
- extern int NumOfMinTrms;
- extern int NumOfMajTrms;
-
- extern XFI *xfropen();
- extern long xfrread();
- extern xfrclose();
- extern FindRestOfMT();
- extern struct Library *OpenLibrary();
-
- char SubPat[65536]; /* Major term index from two char index */
- unsigned short CurPat;
- unsigned short SavEPat, SavTEPat;
- unsigned short SavOPat, SavTOPat;
- unsigned short frstuppr, scnduppr;
- /* "rest of" signifies the characters remaining after the 2 char subkey is */
- /* removed. Lets say the keyword is "never" and the subkey is "ne". In this */
- /* case, the rest is "ver". If the subkey was "ev", the rest is "never", */
- /* since it doesn't do separate compares. If the keyword is "ab" then the */
- /* subkey is also "ab" and the rest is "ab". We need at least 2 chars since */
- /* tables FrstBt and ScndBt are used, without testing 4 finished, for speed */
- unsigned char DsplTb[2*MAXMTS]; /* Displ 2 1st char of rest of key from MTIdx*/
- char FrstBt[2*MAXMTS]; /* 1st char of rest of keyword from maj term index */
- char ScndBt[2*MAXMTS]; /* 2nd char of rest of keyword from maj term index */
- char LowrCs[256]; /* Lower case char from mixed case chars */
- int MajTrm2MinTrm[MAXMTS]; /* Minterm number indexed by major term num */
- int MajTrm2BitNum[MAXMTS]; /* Bit number in MinSatTbl from maj trm # */
- int MinSatByMTOnly[MAXMTS]; /* 1 if min term is satisfied by 1 maj trm */
- int MajTrm2MaxKeyLen[MAXMTS]; /* Length of longest keyword in major term */
- int MTNumSortedByKeyLen[MAXMTS];/* Maj Trm #'s sorted by length of keyword */
- char *FreeItList[256]; /* Max number of bracket expressions */
- int FrI=0;
- int NextMT;
- int PrntPrevPrev = 0;
- int CurLen;
- int TokLen;
- int CurDirModified=0;
- int LineScan;
- int LineNum = 0; /* Print out line number with match if 1 */
- int TermLp = 0;
- int NumDup;
- int FrstPnt;
- int LastInLast; /* zero if nothing printed out in present buf yet */
- int PrntWidth=0;
- int AlwaysPrint = 0; /* if 1, always print file pathname scanned */
- int LenSt;
- int SzPtr;
- int WroteOverIt = 0;
- int Inv;
- int DKWIdx;
- int SavEIdx, SavTEIdx;
- int SavOIdx, SavTOIdx;
- int NumBefore=0, NumAfter=1;
- int RealFile; /* if 1, outputting to real file instead of screen */
- int InvertMatch=0; /* When 1, outputs articles that dont match */
- int ColReq=0; /* Column that article separator must be in. 0 -> ignore */
- int ColOk;
- int FDsplTbl[MAXDUP]; /* Full Displ to strt of keywrd by MT Index */
- int ArtInPrevBuf; /* Start of article is in previous buffer */
- int BlkSize=1; /* Size of block to write to output stream */
- long NumBlksToWrt; /* Num of blocks to write to output stream */
- long NumBlksToWrt2;
- char *WhereToStrt; /* Pntr 2 start of article to write out */
- char *EndOfPrevBuf; /* Pntr 2 end of buffer used b4 current buf */
- char *iii;
- int bufchr;
- char *SOCB; /* Pntr 2 start of current buf being used */
- char *ASOB; /* Pntr 2 absolute start of current buffer */
- long CurOddOH;
- long CurEvenOH;
- long TotOH;
- long MulFct;
- long OH;
- char *KWTbl[2*MAXMTS][2]; /* Pntrs 2 1st & last char of rest of keywd */
- char *SvTbl[2*MAXMTS]; /* Saves pntr to strt of keyword for later */
- char *DKWTbl[MAXDUP][4]; /* Pntrs 2 1st/last/Dspl/link 2 next dup 4 rest kwd */
- char *EOCB; /* Pntr 2 end last byte in current buf + 1 */
- char *SavEOCB, *SavSOCB;
- char **TmpPtr;
- char ArticleSep[] = "\nArticle";
- char *ArtSep = ArticleSep; /* Pntr 2 rest of article separator */
- char *EOASep = ArticleSep + 7; /* Pntr 2 last char of rest of article sep */
- unsigned char DFASep[2]; /* Displ for art sep into buffer */
- char *CurArtStrt; /* Pntr 2 strt of article in a buffer */
- char EOBK[] = "$-$"; /* Keyword indicating end of buffer */
- char LineSrchDelim[] = "%!%"; /* Article separator when doing line search */
- char *PntPtr;
- char *RightStrt;
- char *LastPntEnd;
- long WinSiz = 16384; /* Size in bytes of each of the 3 buffers */
- FILE *StrmPtr = stdout; /* Output stream */
- FILE *OutFile=NULL; /* Output file */
- FILE *ConfigF=NULL; /* Configuration file */
- char ArtBuf[LWIDTH]; /* Buffer to put article separator in */
- Lines MinArray[MAXMTS];
- Lines *MinTerm = &MinArray[0];
- Lines *EndMin;
- Lines *MinPtr;
- long MinSatTbl[MAXMTS][2]; /* 32 bit fields & masks of MT's satisfied */
- int LastMTTbl[MAXMTS]; /* Daisy chain of major terms found in art */
- int MTIndx;
- int LastMT; /* Index into LastMTTbl of last maj trm fnd */
- Lines Garb1;
- char *Stf1 = (char *)&Garb1;
- Lines Garb2;
- char *Stf2 = (char *)&Garb2;
- Lines Garb3;
- char *Stf3 = (char *)&Garb3;
- char *TmpTrm;
- Lines MTArray[MAXMTS];
- Lines NoBrakArray[MAXMTS]; /* Array of MT's with stuff between [] removed */
- /* Also ] is removed. This array winds up */
- /* having the *,&, and [ chars changed to zero */
- Lines NoBrakAry[MAXMTS]; /* Same as NoBrakArray except the *,&, and [ */
- /* are not overwritten with 0. */
- int TokStrt[2*MAXMTS];
- int TokEnd[2*MAXMTS];
- char *NBAryEnd[MAXMTS]; /* pntrs to end of MT strings with no ] */
- Lines *MajTerm = &MTArray[0];
- Lines *LstMajTrm;
- Lines *EndMT;
- char *InName, *OutName, *LastArg;
- char *Tok, *SavTok, *SavTTok;
- DIB *CurDirPtr=NULL;
- int i, j, k, ll, ci, t1; /* counter variables */
- int FndBrak;
- int StrtBrk;
- BrakTyp1 *BrakIdx=NULL;
- BrakTyp2 *BrakPtr=NULL;
- char *myptr;
- int FndALOGKeyWrd;
- int OutArt = 0;
- int Indx;
- int LFCnt;
- int Mtch1st = 0;
- int MatchFnd=1;
- int MatchNotFnd=0;
- XFI *FHandle=0;
- char *FName;
- char *TmpCS, *TmpCS2;
- long ReadNum;
- char *malloc(), *strcpy(), *strcat();
- int HasCnfgF;
- unsigned char MTNum; /* Major term number. */
- unsigned char MTN; /* Major term index. MTN = MTIdx>>1 */
- unsigned char MTIdx; /* Major term index. MTNum = MTIdx>>1 */
- char **MTTmp;
- char *BufIdx;
- char *TmpP, *BP;
- int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg = 0;
- int NumOfCR = 0; /* Num of line feeds in all buffs already scanned */
- int NumOfCRInCurBuf = 0; /* Num of line feeds found so far in current buf */
- char *LstOut = NULL; /* Pntr to last char outputed in current buffer + 1 */
- long NumTot=-1;
- static int LnSz;
- int Trunc=0;
- int TextWidth; /* Width of current window in characters */
- int HasKeyWrd=1;
- int NoKeyW=0;
- char *HighLightColor="\x9B\x33\x32m";
- char *PathNmColor="\x9B\x33\x33m";
- char *DlydPntStart=NULL; /* Pntr 2 right context that still needs 2b printed */
- int DlydPntRightContext=0;
- long DlydPntSize=0;
- int DlydPntNextBuf=0;
- char *KeyStrt;
- char *WrdStrt;
- char *DlydPntEnd;
- int SizeDiff;
- int TotTokLen;
- int KeyWrdOvlp = MAXKWSZ; /* Used in asyncread.c for main srch buf overlap */
- int OpenNew = 0; /* 1 if finished with internal LZH archive file */
- int LON;
- int StrtLFC;
- char *StrtPP;
- int CutIt = 0; /* binary file cut since could not find a line feed in time */
- int MayNeedLF = 0;
- char *LZHFileName = ""; /* Internal filename of lzh file being scanned */
- char *WildLZH = "*"; /* Wildcard pattern of which lzh int files to search */
- int EnableLZHDecomp = 0; /* When true, enables decompression of .lzh files */
- int TwoCharArtSep = 0; /* If 1, the article separator is only 2 chars long. */
- int SPIdx;
- int iij;
- int FndX;
- int TmpT;
- long ZeroLong=0;
- extern int ItsALZH;
- FILE *zero=NULL;
-
-
- void ClrSubPat()
- /* This should be done in assembly */
- {
- long i;
- for (i=0; i<65536; i++) { SubPat[i] = 0; }
- }
-
- int MaxVal(a, b)
- int *a, *b;
- {
- if (MajTrm2MaxKeyLen[*a] < MajTrm2MaxKeyLen[*b]) { return(-1); }
- else { return( MajTrm2MaxKeyLen[*a] > MajTrm2MaxKeyLen[*b]); }
- }
-
- int WindowSize()
- {
- char c;
- int n = 0, width;
- char buffer[32];
-
- set_raw();
- printf("\2330 q"); /* get window bounds */
- n = 0;
- while( (buffer[n] = getchar()) != 'r' && n++ < 32);
- c = buffer[n-3];
- width = ( (c <= '9' && c > '0') ? (c - '0') * 10 : 0 )
- + buffer[n-2] - '0';
- buffer[n-1] = '\0';
- set_con();
- return(width);
- }
-
- void fxwrite( Buf, BlkSiz, Count, Strm, KeyWrdPrs )
- /* if "line number" option selected, output line number with output text, */
- /* otherwise just output text. fx is called for present buf, fy for prev. */
- /* Note that at each buffer switch, the number of line feeds in the prev */
- /* buffer is added to the previous total(NumOfCR). Also truncate lines */
- /* that are wider than window, if -t option set. Change color of word */
- /* containing the matched keyword so it's highlighted. */
-
- char *Buf;
- size_t BlkSiz, Count;
- FILE *Strm;
- int KeyWrdPrs; /* set by caller if keyword is present at end of buffer */
- {
- long NumBytInRec;
- int OnSameLine=0;
- static int ColorOn=0;
- int ColorEnable=0;
- int DfSz;
- char *SavLO;
- int TurnColorOff=0;
- int MinBefore; /* min # of chars before keyword needing color highlight */
-
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=Buf; iii<Buf+Count; iii++) {
- bufchr = (int)(*iii);
- if( ( !(*iii > 32 || (*iii < 0 && *iii > -97)) && *iii != '\n')
- || *iii == 0x7E) *iii = ' ';
- }
- }
-
- if( LineNum ) {
- if( Count == 0) return;
- if( Buf < SOCB ) { ErrP("Error: program bug!!! Buf < SOCB \n"); }
- if( Buf > EOCB ) { ErrP("Error: program bug!!! Buf > EOCB \n"); }
- if( LstOut < SOCB ) { ErrP("Error: LstOut < SOCB \n"); }
- if( LstOut > EOCB ) { ErrP("Error: LstOut > EOCB \n"); }
- /* count line feeds from last output to start of current output */
- CntCRInCurBuf( Buf);
- if( NumTot == NumOfCR + NumOfCRInCurBuf ) OnSameLine = 1;
- while( Count != 0 ) {
- SavLO = LstOut;
- if( ColorOn && OnSameLine ) {
- while( Count != 0 && isalnum((int)(*LstOut))){ LstOut++; Count--; }
- fprintf( Strm, HighLightColor );
- if( Count != 0) TurnColorOff = 1;
- }
- else {
- while( Count != 0 && *LstOut != '\n') { LstOut++; Count--; }
- }
- NumTot = NumOfCR + NumOfCRInCurBuf;
- if( Count != 0 && *LstOut == '\n') {
- NumOfCRInCurBuf++;
- LstOut++;
- Count--;
- }
- if( ColorEnable && (!Trunc || LnSz <= TextWidth) ) {
- ColorEnable = 0;
- ColorOn = 1;
- OnSameLine = 1;
- fprintf( Strm, HighLightColor );
- }
- if( KeyWrdPrs && Count == 0 && !ColorOn && !RealFile) {
- MinBefore = TotTokLen;
- while( (isalnum((int)(*(LstOut-1))) || MinBefore > 0)
- && LstOut != SavLO) {
- Count++;
- LstOut--;
- MinBefore--;
- }
- ColorEnable = 1;
- }
- /* if last time we printed a match, we were on the same line as */
- /* current match, don't put out line #. */
- if( !OnSameLine ) {
- if( NumTot > 99999 ) {
- if( Trunc ) { /* truncate line if line is too long */
- DfSz = LstOut - SavLO;
- LnSz = DfSz + 8; /* size of data plus size of line # */
- if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 8; }
- if( LnSz >= TextWidth && ColorEnable ) {
- fprintf( Strm, HighLightColor );
- }
- fprintf( Strm, "%7ld ", NumTot+1 );
- if( LnSz >= TextWidth && ColorEnable ) {
- fprintf( Strm, "\2330m");
- }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fprintf( Strm, "%7ld ", NumTot+1 );
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- }
- }
- else {
- if( Trunc ) {
- DfSz = LstOut - SavLO;
- LnSz = DfSz + 6;
- if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 6; }
- if( LnSz >= TextWidth && ColorEnable ) {
- fprintf( Strm, HighLightColor );
- }
- fprintf( Strm, "%5ld ", NumTot+1 );
- if( LnSz >= TextWidth && ColorEnable ) {
- fprintf( Strm, "\2330m");
- }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fprintf( Strm, "%5ld ", NumTot+1 );
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- }
- }
- OnSameLine = 0;
- }
- else {
- if( Trunc ) {
- DfSz = LstOut - SavLO;
- if( DfSz + LnSz > TextWidth ) {
- DfSz = TextWidth - LnSz;
- LnSz = TextWidth;
- }
- else { LnSz += DfSz; }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- }
- OnSameLine = 0;
- if( ColorOn ) {
- fprintf( Strm, "\2330m");
- if( *(LstOut-1) == '\n' ) {
- ColorOn = 0;
- }
- else {
- if( TurnColorOff ) OnSameLine = 1;
- }
- if( TurnColorOff ) { TurnColorOff = 0; ColorOn = 0; }
- }
- }
- }
- }
- else { /* no line numbers */
- SavLO = Buf;
- if( ColorOn ) {
- /* Change color of the part of matched keyword to the right of */
- /* the two char subpat up to the 1st space or line feed. */
- LstOut = Buf;
- while( Count != 0 && isalnum((int)(*LstOut)) ) {
- LstOut++;
- Count--;
- }
- fprintf( Strm, HighLightColor );
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- SavLO = LstOut;
- LstOut += Count;
- fprintf( Strm, "\2330m");
- if( Count != 0) ColorOn = 0;
- }
- else {
- LstOut = SavLO + Count;
- }
- if( KeyWrdPrs && !RealFile) {
- /* Change color of the part of matched keyword to the left of and */
- /* including the two char subpat. Stop moving left on space or */
- /* start of buffer. Note that when this routine is called with */
- /* KeyWrdPrs set, the two char subpat is always the last item in */
- /* the buffer. The left context is before it. */
- Count = 0;
- MinBefore = TotTokLen;
- while( (isalnum((int)(*(LstOut-1))) || MinBefore > 0)
- && LstOut != SavLO) {
- Count++;
- LstOut--;
- MinBefore--;
- }
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- fprintf( Strm, HighLightColor );
- SavLO = LstOut;
- LstOut += Count;
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- fprintf( Strm, "\2330m");
- if( *(LstOut-1) != '\n' ) ColorOn = 1;
- }
- else {
- fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm );
- }
- }
- if( CutIt ) { if( LnSz != TextWidth) fprintf( Strm, "\n"); CutIt = 0; }
- fflush( Strm );
- return;
- }
-
- void fywrite( Buf, BlkSiz, Count, Strm )
- /* if "line number" option selected, output line number with output text */
- /* otherwise just output text. fy is called for prev buf, fx for present */
- char *Buf;
- size_t BlkSiz, Count;
- FILE *Strm;
- {
- char *CurPtr;
- int OnSameLine=0;
- long NumCR2EndOfPBuf=0;
- char *SavLO;
- int DfSz;
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=Buf; iii<Buf+Count; iii++) {
- bufchr = (int)(*iii);
- if( ( !(*iii > 32 || (*iii < 0 && *iii > -97)) && *iii != '\n')
- || *iii == 0x7E) *iii = ' ';
- }
- }
- if( LineNum ) {
- if( Buf < SavSOCB ) { ErrP("Error: program bug!!! Buf < Prev SOCB \n"); }
- if( Buf > SavEOCB ) { ErrP("Error: program bug!!! Buf > Prev EOCB \n"); }
- if( Count == 0 ) { ErrP("Error2: Count to fywrite = 0 \n"); }
- if( LstOut < SOCB ) { ErrP("Error2: LstOut < SOCB \n"); }
- if( LstOut > EOCB ) { ErrP("Error2: LstOut > EOCB \n"); }
- /* count line feeds from start of output in prev buf 2 end of prev buf */
- CurPtr = SavEOCB;
- for(; CurPtr>=Buf; CurPtr-- ) { if( *CurPtr == '\n') NumCR2EndOfPBuf++; }
- CurPtr++;
- if( NumTot == NumOfCR - NumCR2EndOfPBuf ) OnSameLine = 1;
- while( Count != 0 ) {
- SavLO = CurPtr;
- while( (--Count != 0) && (*CurPtr != '\n') ) { CurPtr++; }
- NumTot = NumOfCR - NumCR2EndOfPBuf;
- if( *CurPtr != '\n') NumCR2EndOfPBuf--;
- CurPtr++;
- /* if last time we printed a match, we were on the same line as */
- /* current match, don't put out line #. */
- if( !OnSameLine ) {
- if( NumTot > 99999 ) {
- fprintf( Strm, "%7ld ", NumTot+1 );
- if( Trunc ) { /* truncate line if line is too long */
- DfSz = CurPtr - SavLO;
- LnSz = DfSz + 8; /* size of data plus size of line # */
- if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 8; }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm );
- }
- }
- else {
- fprintf( Strm, "%5ld ", NumTot+1 );
- if( Trunc ) {
- DfSz = CurPtr - SavLO;
- LnSz = DfSz + 6;
- if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 6; }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm );
- }
- }
- OnSameLine = 0;
- }
- else {
- if( Trunc ) {
- DfSz = CurPtr - SavLO;
- if( DfSz + LnSz > TextWidth ) {
- DfSz = TextWidth - LnSz;
- LnSz = 0;
- }
- else { LnSz += DfSz; }
- fwrite( SavLO, BlkSiz, DfSz, Strm );
- }
- else {
- fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm );
- }
- OnSameLine = 0;
- }
- }
- NumTot = NumOfCR;
- }
- else {
- fwrite( Buf, BlkSiz, Count, Strm );
- };
- if( CutIt ) { if( LnSz != TextWidth) fprintf( Strm, "\n"); CutIt = 0; }
- return;
- }
-
- void _abort()
- {
- CleanIt();
- abort();
- Exit(2);
- }
-
- BackSlash(str)
- char *str;
- {
- /* Converts string pointed to by str that contains backslashes to a */
- /* string without backslashes. */
- int i = 0, k = 0, t, c;
- char *subst = "000";
- while( str[i]) {
- if( str[i] == '\\') {
- switch( str[++i] ) {
- case('a'): str[i] = '\a'; break;
- case('b'): str[i] = '\b'; break;
- case('f'): str[i] = '\f'; break;
- case('n'): str[i] = '\n'; break;
- case('r'): str[i] = '\r'; break;
- case('t'): str[i] = '\t'; break;
- case('v'): str[i] = '\v'; break;
- case('"'): str[i] = '\"'; break;
- case('\''): str[i] = '\''; break;
- case('\\'): str[i] = '\\'; break;
- case('x'):
- case('X'):
- sscanf(&str[i+1], "%x", &t);
- c = t;
- if(( t > 0) && ( t < 256)) {
- while( t > 0) { i++; t = t >> 4; }
- str[i] = (char)c;
- }
- else { ErrP("Error: only 2 hex digit allowed after \\x \n"); }
- break;
- case('0'): case('1'): case('2'): case('3'): case('4'):
- case('5'): case('6'): case('7'): case('8'): case('9'):
- subst[0] = str[i];
- for(c=1; (str[++i] >= '0') && (str[i] <= '9') && (c < 3); c++) {
- subst[c] = str[i];
- }
- subst[c] = '\0';
- if( (t = atoi( subst)) < 256) { str[--i] = (char)t; }
- else { ErrP("Error: decimal # after \\ must be < 256\n"); }
- break;
- default: ErrP("Error: illegal character after backslash\n");
- }
- }
- str[k++] = str[i++];
- }
- str[k] = '\0';
- }
-
- CleanIt()
- {
- BPTR TLock;
- DIB *TempPtr;
- #ifdef DEBUGCLEAN
- printf("1st statement in cleanit\n");
- fflush( zero );
- #endif
- /* pop up any dir levels and restore original current directory */
- while ( CurDirPtr != NULL ) {
- if (CurDirPtr->CurLock != 0 ) {
- UnLock( CurDirPtr->CurLock );
- #ifdef DEBUGCLEAN
- printf("unlocked\n");
- fflush( zero );
- #endif
- }
- if (CurDirPtr->OldLock != 0 ) {
- TLock = CurrentDir( CurDirPtr->OldLock );
- #ifdef DEBUGCLEAN
- printf("set currentdir\n");
- fflush( zero );
- #endif
- }
- if (CurDirPtr->BackLink != NULL) {
- TempPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = TempPtr;
- #ifdef DEBUGCLEAN
- printf("freed what CurDirPtr points to\n");
- fflush( zero );
- #endif
- }
- else {
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = NULL;
- #ifdef DEBUGCLEAN
- printf("last free of what CurDirPtr points to\n");
- fflush( zero );
- #endif
- }
- }
- xfrclose( FHandle);
- NumOfCR = 0;
- if( ConfigF != NULL ) { fclose( ConfigF ); ConfigF = 0; }
- if( OutFile != NULL ) { fclose( OutFile ); OutFile = 0; }
- if( BrakIdx != NULL ) {
- FreeMem( BrakIdx, (long)(MAXMTS*sizeof(BrakTyp1)));
- BrakIdx = 0;
- #ifdef DEBUGCLEAN
- printf("freed mem for braketidx\n");
- fflush( zero );
- #endif
- }
- if( BrakPtr != NULL ) {
- FreeMem( BrakPtr, (long)(MAXMTS*sizeof(BrakTyp2)));
- BrakPtr = 0;
- #ifdef DEBUGCLEAN
- printf("freed mem for braketptr\n");
- fflush( zero );
- #endif
- }
- while( FrI > 0 ) FreeMem( FreeItList[--FrI], 256 );
- }
-
- /* Normal strncpy with NULL added if n < length of s */
- char *mystrncpy(d, s, n)
- char *d;
- const char *s;
- size_t n;
-
- {
- char c;
- char *t = d;
-
- while(n && (c = *s)) { *d = c; ++s; ++d; --n; }
- if( n) { *d = 0; } else { if( d-- > t) *d = 0; }
- return(t);
- }
-
- ErrP(msg)
- char *msg;
- {
- fprintf(stderr,msg);
- CleanIt();
- exit(1);
- }
-
- PathPrint(Matched)
- /* Print pathname of file being searched. "AlwaysPrint" signifies that the */
- /* pathname should be printed even if a match is not found in a file. If */
- /* a match is not found in a file, it's pathname is overwritten by the */
- /* next file scanned. When doing screen output, the color of the pathname */
- /* printed is changed. */
- int Matched;
- {
- static unsigned long BlkSz = 1;
- static unsigned long Wrt1 = 1;
- static unsigned long Wrt2 = 2;
- static unsigned long Wrt9 = 9;
- static char LastFileNm[LONGWIDTH] = "\0";
- LLines TempFN;
- extern int Mtch1st; /* flag set after 1st match in a file */
- int pad;
- strcpy( TempFN, FName);
- if( ItsALZH ) {
- strcat( TempFN, "(" );
- strcat( TempFN, LZHFileName );
- strcat( TempFN, ")" );
- }
- if(Matched) { /* a match was found before this routine was called */
- if(RealFile) {
- if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */
- fwrite("->>>>>>> ",BlkSz,Wrt9,StrmPtr);
- fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr);
- fwrite("\n",BlkSz,Wrt1,StrmPtr);
- strcpy(LastFileNm, TempFN);
- Mtch1st = 1;
- }
- else {
- if(!Mtch1st) { fwrite("\n",BlkSz,Wrt1,StrmPtr); Mtch1st = 1; }
- }
- }
- else {
- if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */
- printf(PathNmColor);
- fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr);
- fwrite("\n",BlkSz,Wrt1,StrmPtr);
- printf("\2330m");
- strcpy(LastFileNm, TempFN);
- Mtch1st = 1;
- }
- else {
- if(!Mtch1st) { fwrite("\n",BlkSz,Wrt1,StrmPtr); Mtch1st = 1; }
- }
- }
- }
- else {
- if(!RealFile && AlwaysPrint) {
- if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */
- fwrite("\r",BlkSz,Wrt1,StrmPtr);
- printf(PathNmColor);
- fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr);
- printf("\2330m");
- pad = strlen(TempFN) - strlen(LastFileNm);
- while (pad < 0) { fputs(" ",StrmPtr); pad++; }
- fflush(StrmPtr);
- strcpy(LastFileNm, TempFN);
- Mtch1st = 0;
- }
- }
- }
- }
-
- /* This procedure initializes the "beginning" and "end arg" numbers for the */
- /* NextFile function. Some command line interpreters */
- /* automatically expand wild cards which makes it hard to know which */
- /* absolute argument ends the list of file names. This function terminates */
- /* the list with the 1st "-" or when a certain # of arguments are left. */
- /* Input parameters NumBefore and NumAfter are constants used to make this */
- /* routine generic for different CLI command formats. */
-
- int InitNextFile(argv, argc, NumBefore, NumAfter)
- int argc; char *argv[];
- int NumBefore; /* Number of arguments before filenames start but not */
- /* including the cmd itself or any "-" arguments. */
- /* This is provided to support generic reusability. */
- int NumAfter; /* Number of arguments after filenames end. No "-" args */
- {
- extern int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg;
- int ArgCSav; char **ArgVSav;
- int NumB, NumA;
- char *CmdPtr;
-
- CmdLnArgIdx = 0;
- NumB = NumBefore;
- ArgVSav = argv;
- ArgCSav = argc;
- /* Assign CmdLnArgIdx the index of the first filename on the command line */
- while (( NumB >= 0) && (CmdLnArgIdx < ArgCSav)) {
- CmdPtr = *(++ArgVSav);
- CmdLnArgIdx++;
- if (*CmdPtr != '-') NumB--;
- }
- if (CmdLnArgIdx == ArgCSav) { ErrP("InitNextFile: # before too large\n"); }
- NumA = NumAfter;
- ArgVSav = &argv[argc];
- /* Assign LstCmdLnArgIdx the index of the last filename on the command line */
- LstCmdLnArgIdx = argc;
- while (( NumA >= 0) && (LstCmdLnArgIdx > 0)) {
- CmdPtr = *(--ArgVSav);
- LstCmdLnArgIdx--;
- if (*CmdPtr != '-') NumA--;
- else { NumA--; }
- }
- if (LstCmdLnArgIdx == 0) { ErrP("InitNextFile: # args after too large\n"); }
- }
-
- /* This function is used to parse the command line for filenames. The */
- /* InitNextFile procedure does the initial work to find absolute beginning */
- /* and ending argument numbers and this function uses those numbers to */
- /* retrieves a directory and wildcard pattern from the command line and */
- /* search for any files matching the pattern. Each time it is called, it */
- /* returns a pointer to another full pathname until no more match. Then it */
- /* does the same thing with the next command line argument until all command */
- /* line args are tried. A NULL pointer returned means no more matches exist. */
-
- char *NextFile(ArgVSav)
- char *ArgVSav[];
- {
- extern int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg;
- static FnshdRecurs = 1;
- static LLines LstDirNm = "\0"; /* Init to empty string */
- static LLines FullPathNm;
- DIB *TmpPtr;
- static int StopOnMatch;
- DIB *NewPtr;
- FIB *TmpFIBPtr;
- BPTR CLock, NewLock, TLock;
- char *PathNmPtr1;
- char *PathNmPtr2;
- char *EndOfDir;
- static char *CurFileNm;
- static char *ScanAll = "*";
- static LLines CurDirName;
- static LLines SavDirName;
- char *StrtOfPathNm;
- int FileNtFnd = 1;
- int Len;
- int ii;
- char *Sidx;
- char *Sidx2;
- char *FName;
-
- do {
- if( CmdLnArgIdx > LstCmdLnArgIdx ) {
- if( LstDirNm[0] != '\0') {
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = NULL;
- }
- return(NULL);
- }
- if (FnshdRecurs) { /* get and decode next cmd line argument */
- StrtOfPathNm = ArgVSav[ CmdLnArgIdx ];
- if( *StrtOfPathNm == '-' ) {
- CmdLnArgIdx++;
- continue;
- }
- FnshdRecurs = 0;
- if( strpbrk( StrtOfPathNm, ":/") != NULL) { /* has dir info */
- /* get directory pathname from command line */
- if( (unsigned long)(PathNmPtr1 = strrchr( StrtOfPathNm, ':')) >
- (unsigned long)(PathNmPtr2 = strrchr( StrtOfPathNm, '/')) ) {
- EndOfDir = PathNmPtr1 + 1;
- }
- else {
- EndOfDir = PathNmPtr2 + 1;
- }
- if( (EndOfDir - StrtOfPathNm + 1) > LONGWIDTH )
- ErrP("File pathname too long. Aborting...\n");
- mystrncpy( CurDirName, StrtOfPathNm, EndOfDir - StrtOfPathNm + 1);
- /* get pointer to start of filename in pathname */
- CurFileNm = EndOfDir;
- }
- else { /* cmd ln arg only has file info */
- /* get pointer to start of filename in pathname */
- CurFileNm = StrtOfPathNm;
- CurDirName[0] = ' '; /* flag as default directory */
- CurDirName[1] = '\0';
- }
- /* ifonly given dir name, fill in filename to match any file in dir */
- if( CurFileNm[0] == '\0' ) {
- CurFileNm = ScanAll;
- }
- if( strpbrk( CurFileNm, "*?.") == NULL ) {
- if( CurDirName[0] == ' ' ) { strcpy( CurDirName, CurFileNm); }
- else {
- if( strlen(CurDirName) + strlen(CurFileNm) >= LONGWIDTH )
- ErrP("Total file pathname too long. Aborting...\n");
- strcat( CurDirName, CurFileNm);
- }
- strcat( CurDirName, "/" );
- CurFileNm[0] = '*';
- CurFileNm[1] = '\0';
- }
- StopOnMatch = 1;
- if( (strpbrk( CurFileNm, "*?") != NULL) || RecursFlg) {
- StopOnMatch = 0; /* has wildcard or is recursive */
- }
- if(strcmp(LstDirNm,CurDirName)){/*lstdir!sameas cur */
- /* if last dir was locked, then free it */
- if( LstDirNm[0] != '\0') {
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = NULL;
- }
- strcpy(LstDirNm, CurDirName);
- /* The FileInfoBlock structure has to be long-word alligned, so */
- /* we should use AllocMem to insure this condition is met. */
- CurDirPtr = AllocMem((long)sizeof(struct DirInfoBlock),
- MEMF_PUBLIC | MEMF_CLEAR);
- CurDirPtr->BackLink = NULL; /* anchor */
- /* Get a lock on the upper level Directory */
- if((strpbrk( CurDirName, ":") == NULL)) {
- /* lock the current default directory */
- if(!strcmp(CurDirName, " ")) CurDirName[0] = '\0';
- CurDirPtr->CurLock = Lock( (UBYTE *)CurDirName, ACCESS_READ );
- if (!CurDirPtr->CurLock) {
- fprintf(stderr,"Err with dir-> %s\n",CurDirName);
- ErrP("Could not get lock on upper directory\n");
- }
- CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock);
- TmpFIBPtr = (FIB *)AllocMem((long)sizeof(FIB),
- MEMF_PUBLIC | MEMF_CLEAR);
- CLock = CurDirPtr->CurLock;
- ii = LONGWIDTH - 1;
- /* build full pathname of current default directory */
- while (CLock) {
- NewLock = ParentDir(CLock);
- Examine(CLock, TmpFIBPtr);
- FName = TmpFIBPtr->fib_FileName;
- if (*FName == '\0')
- FName = "ram";
- Len = strlen(FName);
- if (NewLock) {
- ii -= Len + 1;
- if( ii < 0 )
- ErrP("Full pathname too long. Aborting...\n");
- memcpy(CurDirName + ii, FName, Len);
- CurDirName[ii+Len] = '/';
- }
- else {
- ii -= Len + 1;
- if( ii < 0 )
- ErrP("Full pathname too long. Aborting...\n");
- memcpy(CurDirName + ii, FName, Len);
- CurDirName[ii+Len] = ':';
- }
- if(CLock != CurDirPtr->CurLock) { UnLock(CLock); }
- CLock = NewLock;
- }
- FreeMem(TmpFIBPtr, (long)sizeof(FIB));
- memmove(CurDirName, CurDirName + ii, LONGWIDTH - ii);
- CurDirName[LONGWIDTH - ii - 1] = '\0';
- strcpy(SavDirName, CurDirName);
- TLock = CurrentDir(CurDirPtr->OldLock);
- }
- else {
- strcpy(SavDirName, CurDirName);
- CurDirPtr->CurLock = Lock((UBYTE *)CurDirName, ACCESS_READ );
- if (!CurDirPtr->CurLock) {
- fprintf(stderr,"Error with dir-> %s\n",CurDirName);
- ErrP("Could not get lock on upper directory\n");
- }
- CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock);
- if( CurDirPtr->OldLock == 0) {
- fprintf(stderr,"Error with dir-> %s\n",CurDirName);
- ErrP("Could not set current directory\n");
- }
- }
- }
- else { /* last directory name is same as current so copy it */
- strcpy(CurDirName, SavDirName);
- }
- if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) {
- /* upper directory is empty so terminate this tree */
- FnshdRecurs = 1;
- }
- if( ((FIB *)CurDirPtr)->fib_DirEntryType <= 0) {
- /* Program normally assumes last part of a pathname is a */
- /* directory unless there is a "*", "?", or "." in it. It comes */
- /* here if it thought it is a directory when it's really a file. */
- FnshdRecurs = 0;
- UnLock( CurDirPtr->CurLock );
- CurDirName[strlen(CurDirName)-1] = '\0';
- /* Get position of last "/" or ":" in current dir name, */
- /* whichever is greater */
- Sidx = strchr(CurDirName,':');
- if( (Sidx2=strrchr(CurDirName,'/')) != NULL ) Sidx = Sidx2;
- /* Copy stuff after last "/" or ":" to filename string */
- strcpy( CurFileNm, Sidx+1 );
- /* Delete it from current directory name */
- *(Sidx+1) = '\0';
- CurDirPtr->CurLock = Lock( (UBYTE *)CurDirName, ACCESS_READ );
- if (!CurDirPtr->CurLock) {
- fprintf(stderr,"Err with dir-> %s\n",CurDirName);
- ErrP("Could not get lock on upper directory\n");
- }
- if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) {
- /* upper directory is empty so terminate this tree */
- FnshdRecurs = 1;
- }
- }
- }
- while ( !ExNext( CurDirPtr->CurLock, (FIB *)CurDirPtr)) {
- if (CurDirPtr->BackLink != NULL) {
- /* pop directory */
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- TmpPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = TmpPtr;
- }
- else {
- /* Dont pop from upper most dir since we may need it later */
- FnshdRecurs = 1;
- break;
- }
- }
- while (!FnshdRecurs && (((FIB *)CurDirPtr)->fib_DirEntryType > 0)) {
- if (RecursFlg) {
- TmpPtr = CurDirPtr;
- CurDirPtr = AllocMem((long)sizeof(struct DirInfoBlock),
- MEMF_PUBLIC | MEMF_CLEAR);
- CurDirPtr->BackLink = TmpPtr;
- /* Get a lock on the Current Directory */
- CurDirPtr->CurLock =
- Lock((UBYTE *)(((FIB *)TmpPtr)->fib_FileName),ACCESS_READ);
- if (CurDirPtr->CurLock == 0) {
- fprintf(stderr,"Could not get lock on intermediate dir\n");
- TmpPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long )sizeof( struct DirInfoBlock ));
- CurDirPtr = TmpPtr;
- }
- else {
- CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock);
- if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) {
- if (CurDirPtr->BackLink != NULL) {
- /* pop dir */
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- TmpPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long )sizeof( struct DirInfoBlock ));
- CurDirPtr = TmpPtr;
- }
- else {
- /* Dont pop from upper most dir since we may need it later */
- FnshdRecurs = 1;
- break;
- }
- }
- }
- }
- while ( !ExNext( CurDirPtr->CurLock, (FIB *)CurDirPtr)) { /* pop dir */
- if (CurDirPtr->BackLink != NULL) {
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- TmpPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = TmpPtr;
- }
- else {
- /* Dont pop from upper most dir since we may need it later */
- FnshdRecurs = 1;
- break;
- }
- }
- }
- if( FnshdRecurs ) { CmdLnArgIdx++; }
- else {
- /* test if file matches pattern */
- /* printf("%s\n", (((FIB *)CurDirPtr)->fib_FileName) ); */
- if( newwildcmp( CurFileNm, ((FIB *)CurDirPtr)->fib_FileName)) {
- FileNtFnd = 0;
- }
- }
- }
- while( FileNtFnd );
- ii = LONGWIDTH - 1;
- NewPtr = CurDirPtr;
- do {
- TmpPtr = NewPtr;
- FName = ((FIB *)TmpPtr)->fib_FileName;
- Len = strlen(FName);
- if (ii == LONGWIDTH - 1) {
- ii -= Len;
- if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n");
- memcpy(FullPathNm + ii, FName, Len);
- }
- else {
- ii -= Len + 1;
- if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n");
- memcpy(FullPathNm + ii, FName, Len);
- FullPathNm[ii+Len] = '/';
- }
- NewPtr = TmpPtr->BackLink;
- }
- while( TmpPtr->BackLink != NULL );
- Len = strlen(CurDirName);
- ii -= Len;
- if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n");
- memcpy(FullPathNm + ii, CurDirName, Len);
- memmove(FullPathNm, FullPathNm + ii, LONGWIDTH - ii);
- FullPathNm[LONGWIDTH - ii - 1] = '\0';
- if (StopOnMatch) {
- FnshdRecurs = 1;
- while (CurDirPtr->BackLink != NULL) {
- /* pop directory */
- UnLock( CurDirPtr->CurLock );
- TLock = CurrentDir( CurDirPtr->OldLock );
- TmpPtr = CurDirPtr->BackLink;
- FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) );
- CurDirPtr = TmpPtr;
- }
- CmdLnArgIdx++;
- StopOnMatch = 0;
- }
- return( FullPathNm );
- }
-
- long SubPatFreq(SubPat) /* LSB is 0.000001 percent */
- unsigned short SubPat;
- { /* Expected frequency of char in hundredth of percents */
- int Freq[256] = { 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10,
- /* " " ! " # $ % & ' */
- 1000, 10, 100, 10, 100, 10, 10, 10,
- /* ( ) * + , - . / */
- 100, 100, 100, 10, 100, 10, 100, 100,
- /* 0 1 2 3 4 5 6 7 */
- 500, 500, 500, 500, 500, 500, 500, 500,
- /* 8 9 : ; < = > ? */
- 500, 500, 100, 50, 100, 100, 100, 100,
- /* @ A B C D E F G */
- 100, 810, 140, 270, 380, 1300, 290, 200,
- /* H I J K L M N O */
- 520, 630, 13, 40, 340, 250, 710, 790,
- /* P Q R S T U V W */
- 190, 11, 680, 610, 1050, 240, 90, 150,
- /* X Y Z [ \ ] ^ _ */
- 150, 190, 07, 100, 100, 100, 10, 10,
- /* " " a b c d e f g */
- 100, 810, 140, 270, 380, 1300, 290, 200,
- /* h i j k l m n o */
- 520, 630, 13, 40, 340, 250, 710, 790,
- /* p q r s t u v w */
- 190, 11, 680, 610, 1050, 240, 90, 150,
- /* x y z { |_ } */
- 150, 190, 7, 100, 100, 100, 100, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
- 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 };
-
- return( Freq[ (SubPat >> 8) & 255 ] * Freq[ SubPat & 255 ]);
- }
-
- PrntHlp()
- {
- fprintf(stderr,"scan's pattern matching algorithm:\n"
- " ? Matches any single character except newline\n"
- " [chars] Match any characters within braces\n"
- " [c1-c2] Match any characters from c1 to c2\n"
- " [^chars] Match any characters not within braces\n"
- " \\xYY Matches hex number YY as a character\n"
- " \\Y Matches the standard C escape sequence Y\n"
- " \\YYY Matches the decimal number YYY as a character\n"
- " | Either pattern on left or right must match\n"
- " + Same as | \n"
- " * Pattern on left and right must both match and be in same\n"
- " word. Match on left must come before match on right.\n"
- " #? Same as *.\n"
- " & Pattern on left and right must both match and be in same\n"
- " sentence. Match on left must come before match on right.\n"
- " .. Pattern on left and right must both match and be in same\n"
- " article. Order of left and right matches is not important.\n"
- " This is alot faster than &. This is only useful during\n"
- " article scans.\n"
- "scan's config file format(there is an implicit | after each pattern):\n"
- " article separator\n"
- " column article separator must be in. 0 -> ignore\n"
- " invert match flag. 1 -> invert match. 0 -> normal\n"
- " window size in bytes\n"
- " search pattern1\n"
- " .\n"
- " search patternN\n"
- );
- }
-
- main(argc, argv)
- int argc; char *argv[];
- {
- int ArgCSav; char **ArgVSav;
- /* StrtTime = clock(); */
- // printf("%ld %s %s %s %s %s\n",argc,argv[1],argv[2],argv[3],argv[4],argv[5]);
-
- /* Scan the command line and initialize based on different options */
- ArgVSav = argv; ArgCSav = argc;
- HasCnfgF = 0;
- NumAfter = 1;
- LineScan = 1;
- for (++argv; --argc; ++argv) {
- PndQues2Ast( *argv );
- InName = *argv;
- if (*InName++ == '-') {
- TermLp = 0;
- while((*InName != 0) && !TermLp) {
- switch(*InName++) {
- case('a'): LineScan = 0; break;
- case('c'): if (sscanf(InName,"%d",&ColReq)) {
- while( isdigit( (int)(*InName))) InName++;
- break;
- }
- else { ErrP("Bad column for article separator\n"); }
- case('f'): HasCnfgF = 1; NumAfter = 0;
- if ((ConfigF = fopen(InName,"r")) == NULL) {
- if(*InName != 0) ErrP("Bad config file name\n");
- if((ConfigF = fopen("s:scan.config","r")) == NULL)
- ErrP("Could not open s:scan.config\n");
- }
- /* Handle initialization of article separator, */
- /* article column, window size, common words, and */
- /* inverse pattern from configuration file. */
- if (fscanf(ConfigF,"%s\n",ArtBuf) != 1) ErrP("Could"
- " not read article separator from config file\n");
- ArtSep = EOASep = ArtBuf;
- while (*EOASep != '\0') EOASep++;
- --EOASep;
- if (fscanf(ConfigF,"%d\n",&ColReq) != 1)
- ErrP("Bad column for article separator\n");
- if (fscanf(ConfigF,"%d\n",&InvertMatch) != 1)
- ErrP("Err reading match inversion flag\n");
- if (fscanf(ConfigF,"%ld\n",&WinSiz) != 1) ErrP(
- "Could not read window size from config file\n");
- /* Initialize Min Term Array with pattern to */
- /* search for from config file */
- EndMin = &MinArray[0] + sizeof(MinArray);
- while ((fgets(Stf1, LWIDTH-1, ConfigF) > 0) &&
- (MinTerm != EndMin)) if(*Stf1!='\n')
- strcpy((char *)MinTerm++, strtok(Stf1,";\n\0"));
- if( MinTerm == EndMin) ErrP(
- "Too many patterns to search for. Aborting...\n");
- if (ferror(ConfigF)) ErrP("Error reading"
- " config file search pattern. Aborting...\n");
- fclose(ConfigF);
- ConfigF = 0;
- LineScan = 0;
- TermLp = 1;
- break;
- case('h'): if( isdigit( (int)(*InName))) {
- HighLightColor[2] = *InName++;
- if( isdigit( (int)(*InName))) {
- PathNmColor[2] = *InName++;
- }
- }
- break;
- case('i'): InvertMatch = 1; break;
- /* Do a line search instead of an article search. Print out xx lines around */
- /* match, where xx is a number following the -l. */
- case('l'): LineScan = 1; PrntWidth = atoi(InName);
- while( isdigit( (int)(*InName))) InName++;
- break;
- case('n'): LineNum = 1; break;
- /* Without -o option, strmptr defaults to stdout */
- case('o'): if ((OutFile = fopen(InName,"w")) == NULL)
- ErrP("Could not open output file\n");
- RealFile = 1; StrmPtr = OutFile;
- TermLp = 1; break;
- case('p'): AlwaysPrint = 1; break;
- case('r'): RecursFlg = 1; break;
- /* Without -s option, the article separator defaults to "Article" */
- case('s'): ArtSep = InName;
- while(*InName != 0) { InName++; }
- EOASep = --InName; TermLp = 1; break;
- case('t'): Trunc = 1; break;
- case('v'): fprintf(stderr,"Scan version 1.0 by Walter Rothe, "
- "Copyright © 1991,1992.\n");
- CleanIt();
- exit(1);
- /* Without -w option, window size defaults to 16kb */
- case('w'): if(sscanf(InName,"%ld",&WinSiz)) {
- while( isdigit( (int)(*InName))) InName++;
- break;
- }
- else { ErrP("Bad window size\n"); }
- case('x'): PrntHlp(); CleanIt(); exit(1);
- case('z'): if( *InName ) WildLZH = InName;
- EnableLZHDecomp = 1;
- TermLp = 1; break;
- }
- }
- }
- else { LastArg = --InName; } /* save last non "-" arg on cmd line */
- }
-
- if (ArgCSav < 3) {
- fprintf(stderr,
- " Copyright © 1991,1992 by Walter Rothe.\n"
- " + Scan SrchFile(s) Pattern -[hlnoprtwz] OR\n"
- " - Scan SrchFile(s) -f[CnfgFile] -[orz] OR\n"
- " @ Scan SrchFile(s) Pattern -a[cioprswz] OR\n"
- " # Scan -[vx]\n"
- " SrchFile :+-@ Pathname of file(s) to be searched\n"
- " Pattern :+@ What to srch for; Ex. sale..d*paint[3i]+paint&prog\n"
- " -a :@ Article scan. Prints out all articles with matches.\n"
- " -cColumn :@ Column article separtor must be in(1..?)\n"
- " -fCnfgFile:@ Get parms from config file\n"
- " -f :@ Get parms from s:scan.config\n"
- " -hxy :+ Highlight match with x color and pathname, y color.\n"
- " -i :@ Invert matching so nonmatching articles are printed\n"
- " -lxx :+ Line search with xx lines around target printed\n"
- " -n :+ Print line numbers with matched text(slower)\n"
- " -oOutFile :+-@ Send output to file\n"
- " -p :+-@ Always print file pathnames scanned\n"
- " -r :+-@ Recursively scan down directories\n"
- " -sArtSep :@ Article separator(def Article)\n"
- " -t :+ Truncate output to window width. Only works with -n\n"
- " -v :# Print version number. Other options nulled.\n"
- " -wWinSize :+@ Window size(def 16384 bytes). Mod(size,4) must be 0\n"
- " -x :# Print out more help info. Nulls other options\n"
- " -zWildPat :+-@ Decompres .lzh/.lha with int files matching WildPat\n"
- " -z :+-@ Decompress all .lzh and .lha files before scanning.\n"
- );
- CleanIt();
- exit(1);
- }
-
- /* If doing a line search, turn off inverted pattern matching. and set */
- /* article separator to a pattern that should never occur. */
- if (LineScan) {
- InvertMatch = 0;
- ArtSep = EOASep = LineSrchDelim;
- while (*EOASep != '\0') {
- EOASep++;
- }
- --EOASep;
- }
- else {
- if( (ColReq > LWIDTH) || (ColReq < 0) ) {
- ErrP("Column # for article sep too large or neg. Aborting...\n");
- }
- }
-
- /* For LH5 decompression, window size must be at least 8192.
- if( EnableLZHDecomp ) WinSiz = 16384;
-
- /* Init pointers and data so NextFile function can be called sequentially */
- InitNextFile(ArgVSav, ArgCSav, NumBefore, NumAfter);
-
- /* If inverted matching, set to not print out when 1st article header found */
- if (InvertMatch) { OutArt = 1; }
-
- /* Find 1st filename to scan and open it and read in 1st buffer and start */
- /* an asyncronous read to fill the 2nd buffer. */
-
- if ((FName = NextFile(ArgVSav)) != NULL) {
- if ((FHandle = xfropen(FName,&ReadNum)) == NULL) {
- ErrP("Problems opening 1st file to scan\n");
- }
- if (ReadNum < 0) {
- ErrP("Could not read any data from 1st file\n");
- }
- }
- else {
- ErrP("No files found to scan\n");
- }
-
- /* Get text width for later */
- if( !RealFile ) {
- TextWidth = WindowSize();
- }
-
- PathPrint(MatchNotFnd);
-
- LstOut = SOCB;
-
- /* parse out the min terms from command line argument */
- if (!HasCnfgF) {
- TmpTrm = strtok(LastArg++,"+|");
- if( strlen(TmpTrm) >= LWIDTH) {
- ErrP("A minterm in the pattern is too long. Aborting...\n");
- }
- while (TmpTrm != 0) {
- strcpy((char *)(MinTerm++), TmpTrm );
- TmpTrm = strtok(NULL,"+|");
- }
- }
-
- /* Convert any backslashes in minterm array to their equivalent char */
- MinPtr = &MinArray[0];
- while ( MinPtr != MinTerm ) {
- BackSlash(MinPtr);
- PndQues2Ast(MinPtr++);
- }
-
- /* Convert any backslashes in article separator to their equivalent char */
- BackSlash(ArtSep);
- EOASep = ArtSep;
- while (*EOASep != '\0') { EOASep++; } /* get pntr to end of art sep */
- --EOASep;
- if( (EOASep - ArtSep + 1 >= LWIDTH) || (EOASep - ArtSep + 1 >= MAXKWSZ))
- ErrP("Article separator too long. Aborting...\n");
-
- /* printout list of min terms, art sep, win size */
-
- #ifdef DEBUG
- printf("Window size is %ld \n", WinSiz);
- printf("Article separator is %s \n", ArtSep);
- MinPtr = &MinArray[0];
- while ( MinPtr != MinTerm ) {
- printf("%s \n",MinPtr++);
- }
- #endif
-
- /* Generate list of major terms from list of minterms. Note that 1 is */
- /* reserved for the article separator and 2 for the end of buffer key. */
- /* 0 is not used. If there are more than 127, there will be a slowdown */
- /* since the duplicate keyword algorithm must be used. */
-
- /* Create a table, indexed by major term number of what it's minterm */
- /* number is. It is called MajTrm2MinTrm. */
-
- /* Create a table, indexed by major term number, mapping it to a bit */
- /* number in the MinSatTbl. This is a number from 0 to 31 and is */
- /* assigned according to its order; the 1st major term encountered */
- /* for a new minterm in the pattern is assigned 0, and the next 1, etc. */
- /* This table is called MajTrm2BitNum. */
-
- /* Create a table, indexed by major term number, indicating if the */
- /* associated minterm is satisfied by this major term only, or not. */
- /* This table is called MinSatByMTOnly and contains 0 or 1. */
-
- /* Create a table, indexed by minterm number, containing a bit field, */
- /* and mask for the bit field. Every time a major term is found, a bit */
- /* is set in the appropriate bit field and the mask applied. If the */
- /* result is -1, then the minterm is satisfied. This table is called */
- /* MinSatTbl. */
-
- MinPtr = &MTArray[0];
- strcpy((char *)(MinPtr++), ArtSep);
- strcpy((char *)(MinPtr), EOBK);
-
- if ((LstMajTrm = GetMajTrms()) == NULL) {
- ErrP("Error building major term table\n");
- }
-
- MinPtr = &MTArray[0];
-
- #ifdef DEBUG
- while ( MinPtr != LstMajTrm ) {
- printf("%s \n",MinPtr++);
- }
- printf("number of minterms is %d \n",NumOfMinTrms);
- printf("number of majterms is %d \n",NumOfMajTrms);
- #endif
-
- /* Create a table, indexed by major term number, containing daisy */
- /* chained pointers of major terms found in the article. When the */
- /* article delimiter is found, this table is traversed to determine */
- /* which bit fields to reset. This table is initially nulled out so */
- /* detection of loops is possible. When the article delimiter is found, */
- /* the pointers are set back to null again. If a loop is detected, it's */
- /* pointer is not modified and the variable LastMT is not modified. */
- /* This table is called LastMTTbl. */
-
- for (i=0; i<NumOfMajTrms; i++) { LastMTTbl[i] = 0; }
- LastMT = 0;
-
- /* Clear SubPat table */
- ClrSubPat();
-
- /* If we find a two char article separator, increase its length to 3 so */
- /* it will be handled properly. Add a 0x20 char to the end. Later on we */
- /* will copy the subpat value from the 0x20 index to all other possible */
- /* indeces and shorten EOASep back to 2 char from the start. */
- if (strlen(ArtSep) < 3) { /* handle two char article separators */
- TwoCharArtSep = 1;
- *((char *)&MTArray[0] + 2) = 0x20;
- *((char *)&MTArray[0] + 3) = 0x00;
- *(ArtSep + 2) = 0x20;
- *(ArtSep + 3) = 0x00;
- EOASep++;
- }
-
- /* Initialize mixed case to lower case conversion table */
-
- for (i=0; i<256; i++) { LowrCs[i] = tolower(i); }
-
- /* Create table matching major term # to the length of the MT's longest */
- /* keyword containing none of the following characters: ?, &, *, and */
- /* not within brackets. Table is called MajTrm2MaxKeyLen. */
- /* Ignore 1st 3 major terms which are 0(reserved), 1(Article separator), */
- /* and 2(End of buffer). Note that in MTArray, article separator is */
- /* element 0 and EOB is element 1. Also note difference between MT number */
- /* and MT index. MT index of 2 or 3 matches MT number 1 and 4 or 5 */
- /* matches MT number 2. The 1st MT index is double the MT number. This is */
- /* because there is an odd and even keyword associated with each MT. */
-
- BrakIdx = AllocMem( (long)(MAXMTS*sizeof(BrakTyp1)), 0);
- BrakPtr = AllocMem( (long)(MAXMTS*sizeof(BrakTyp2)), 0);
-
- for (i=1; i<=NumOfMajTrms; i++) {
- MajTrm2MaxKeyLen[i] = 0;
- strcpy(Stf1, (char *)&MTArray[i-1]);
- strcpy(Stf3, (char *)&MTArray[i-1]);
- j = 0; k = 0; FndBrak = 0; ll = 0;
- while (Stf1[j]) { /* Remove stuff between */
- if (FndBrak) { /* brackets from major */
- if (Stf1[j] == ']') { /* term and put in Stf1 */
- FndBrak = 0;
- BrakIdx[i][k-1] = (unsigned char)ll; /* Limit size of mem rq */
- BrakPtr[i][ll] = AllocMem((long)256,MEMF_CLEAR);
- FreeItList[FrI++] = (char *)(BrakPtr[i][ll]);
- /* load a 1 into characters that will cause a match and 0 */
- /* into those that wont. */
- Inv = 0;
- if(Stf3[StrtBrk+1] == '^') { Inv = 1; StrtBrk++; }
- for (ci=StrtBrk+1; ci<j; ci++) {
- if (Stf3[ci] == '-') {
- for (t1 = Stf3[ci-1]; t1 != Stf3[ci+1]; t1++) {
- BrakPtr[i][ll][t1] = 1;
- BrakPtr[i][ll][toupper(t1)] = 1;
- }
- }
- else {
- BrakPtr[i][ll][Stf3[ci]] = 1;
- BrakPtr[i][ll][toupper(Stf3[ci])] = 1;
- }
- }
- if (Inv) for (t1=0; t1<256; t1++) BrakPtr[i][ll][t1] ^= 1;
- if(++ll >= MAXBRAKS) {
- ErrP("Too many bracket wildcards in major term\n");
- }
- }
- }
- else {
- if (Stf1[j] == '[') { FndBrak = 1; StrtBrk = j; }
- if (i < 3) {
- Stf1[k++] = Stf1[j]; /* Dont convert art sep */
- } /* to lower case */
- else {
- Stf1[k++] = LowrCs[Stf1[j]]; /* Keep [ in string for */
- } /* use as a delimiter */
- Stf3[j] = LowrCs[Stf3[j]];
- }
- j++;
- }
- Stf1[k] = 0;
- LenSt = strlen(Stf1);
- NBAryEnd[i] = (char *)&NoBrakAry[i] + LenSt - 1;
- strcpy( (char *)&NoBrakArray[i], Stf1); /* Save MT with no */
- /* bracket stuff */
- strcpy( (char *)&NoBrakAry[i], Stf1); /* This array will */
- /* not be overwritten. */
- Tok = strtok( Stf1, "?&*["); /* Tokenize Stf1 with */
- while (Tok != NULL) { /* delimiters of ?&*[. */
- if (strlen(Tok) > MajTrm2MaxKeyLen[i]) { /* Find longest token. */
- MajTrm2MaxKeyLen[i] = strlen(Tok);
- }
- Tok = strtok( NULL, "?&*[");
- }
- if (MajTrm2MaxKeyLen[i] < 2) {
- fprintf(stderr,"No keyword found in MT longer than 1 char, so\n");
- fprintf(stderr,"MT>>> %s <<<will be ignored.\n",Stf1);
- }
- }
-
- #ifdef DEBUG
- for (i=1; i<=NumOfMajTrms; i++) {
- printf("Maj Trm # %d has a max keyword length of %d\n",i,
- MajTrm2MaxKeyLen[i]);
- }
- #endif
-
- /* Sort all major terms except ArtSep and EOB by the width of their */
- /* longest keyword containing none of the following */
- /* characters: ?, &, *, and not within brackets. Throw out any minterms */
- /* with 1 char keywords. Smallest keyword length 1st. */
- /* ArtSep is hard coded to be 1st and EOB 2nd in prioritized list. */
-
- for (i=1; i<=NumOfMajTrms; i++) { MTNumSortedByKeyLen[i] = i; }
- qsort( &MTNumSortedByKeyLen[3], NumOfMajTrms-2,
- sizeof(MTNumSortedByKeyLen[3]), MaxVal);
-
- #ifdef DEBUG
- printf("Major terms ordered by keyword length follows\n");
- for (i=3; i<=NumOfMajTrms; i++) {
- printf("%d\n",MTNumSortedByKeyLen[i]);
- }
- #endif
-
- /* Go thru sorted MT list, looking for two two-character sub patterns */
- /* to indicate the possible presence of the major term in the input */
- /* buffer. Try to find sub patterns that do not appear often in the */
- /* buffer. 2 are needed because the search is word(two char) oriented. */
- /* One for the even byte and one for the odd byte. Note that both */
- /* have to be in the same keyword although not necessarily overlaping. */
-
- /* Starting with the major term with shortest keyword, and proceeding */
- /* to the MT with the largest, find a two-char subpattern within the MT */
- /* that has the lowest processing time overhead associated with it. */
- /* Assume the time overhead per 10000 chars, for a single subpat */
- /* is = SubPatFreq1*t1; where t1 is about 23usec. For a duplicate */
- /* subpat, the overhead is = SubPatFreq2*(t3+(NumOfDupls+1)*t2); */
- /* t3 is about 21usec and t2 about 64usec. These times are for a 68K */
- /* based 7 MHZ system with no wait states. Make sure, if duplicating */
- /* the subpat, that the base keyword is different. For right now, */
- /* print out an error and ignore the minterm if you cant find either */
- /* a nonduplicated subpat or a duplicated one with a different base */
- /* keyword. Force using dupl subpats when exceeding 128 major terms. */
-
- /* Build SubPat table concurrently with assigning two-char subpatterns. */
- /* Use the partially complete SubPat table to find NumOfDupls. */
-
- for (i=1; i<=NumOfMajTrms; i++) {
- NextMT = MTNumSortedByKeyLen[i];
- SavTEPat = 0;
- SavTOPat = 0;
- TotOH = 2147483647; /* max Overhead(usec) for odd and even sub pattern */
- Tok = strtok( (char *)&NoBrakArray[NextMT], "?&*["); /* Tokenize with */
- while (Tok != NULL) { /* delimiters of ?&*[. */
- if((TokLen = strlen(Tok)) >= 3) { /* SubPat must have > 2 chars */
- CurOddOH = 2147483647; /* Init to max pos overhead */
- CurEvenOH = 2147483647;
- FndALOGKeyWrd = 0; /* Found At Least One Good Key Word in token */
- for (j=0; j<TokLen-1; j=j+2) { /* Go thru token a word at a time */
- CurPat = (Tok[j] << 8) | Tok[j+1]; /* Construct test subpat */
- /* If SubPat table does not yet have an entry for CurPat, */
- /* and if the limit of 128 Odd and 128 Even SubPats has not */
- /* been reached, then compute the overhead for CurPat and if */
- /* less than the previous smallest SubPat overhead(for this */
- /* token) then save pointers to CurPat. */
- if (((MTNum = SubPat[CurPat]) == 0) && (i < 128)) {
- if ((OH = SubPatFreq(CurPat)*T1) < CurEvenOH) {
- CurEvenOH = OH;
- SavEPat = CurPat;
- SavTok = Tok;
- SavEIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- /* If there is currently an element in the SubPat table or if */
- /* we have exceeded the 128 possible major terms then */
- /* determine what the overhead for the subpat would be if it */
- /* would be put in the duplicate keyword table. If less than */
- /* the previous least overhead subpat, then save it. */
- else {
- if (MTNum > 5) { /* Not Article separator or End Of Buf */
- if ( DsplTb[MTNum] != 0) { /* One element in SubPat tbl */
- /* Overhead must take into account extra overhead 4 */
- /* current element and duplicate overhead for new el.*/
- /* OH = SubPatFreq*((t3+t2-t1) + (t3+2*t2)) */
- MulFct = 2*T3 + 3*T2 - T1;
- if ((OH = SubPatFreq(CurPat)*MulFct) < CurEvenOH) {
- /* if token is identical to token currently in */
- /* keyword table, bypass this keyword. */
- if (!memcmp(Tok,SvTbl[MTNum],TokLen)){
- continue;
- }
- CurEvenOH = OH;
- SavEPat = CurPat;
- SavTok = Tok;
- SavEIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- else { /* More than 1 element in table(duplicates). */
- NumDup = 1;
- TmpPtr = &DKWTbl[MTNum][3];
- /* Find number of duplicated subpatterns */
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL){NumDup++;}
- /* Overhead is for a duplicated subpattern */
- if( (NumDup+1) << ((1<<29)/1000000/T3) ) {
- MulFct = T2 + ((NumDup+1)*T3);
- }
- else {
- MulFct = (1<<29)/1000000/T3 + NumDup;
- }
- /* If overhead is less than previous least overhead */
- /* for other even subpats, then save pntr to CurPat */
- if ((OH = SubPatFreq(CurPat)*MulFct) < CurEvenOH) {
- /* if token is identical to token of another */
- /* duplicated keyword, bypass this keyword. */
- TmpPtr = &DKWTbl[MTNum][0];
- do {
- if (!memcmp(Tok,TmpPtr,TokLen)){
- continue;
- }
- }
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL);
- CurEvenOH = OH;
- SavEPat = CurPat;
- SavTok = Tok;
- SavEIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- }
- }
- }
- /* Now that we're finished finding the even subpattern with the */
- /* least overhead in the token, now find the same thing for the */
- /* odd subpat in the same token. */
- if (FndALOGKeyWrd) {
- FndALOGKeyWrd = 0;
- for (j=1; j<TokLen-1; j=j+2) {
- CurPat = (Tok[j] << 8) | Tok[j+1];
- if ((CurPat == SavEPat) && (i == 1)) {
- /* dont allow article separator 2 use duplicate keywrds */
- continue;
- }
- if (((MTNum = SubPat[CurPat]) == 0) && (i < 128)) {
- if ((OH = SubPatFreq(CurPat)*T1) < CurOddOH) {
- CurOddOH = OH;
- SavOPat = CurPat;
- SavOIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- else {
- if (MTNum > 5) { /* Not Article separator or End Of Buf */
- if ( DsplTb[MTNum] != 0) {
- if ((OH = SubPatFreq(CurPat)*T1) < CurOddOH) {
- /* if token is identical to token currently in */
- /* keyword table, bypass this keyword. */
- if (!memcmp(Tok,SvTbl[MTNum],TokLen)){
- continue;
- }
- CurOddOH = OH;
- SavOPat = CurPat;
- SavOIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- else {
- NumDup = 1;
- TmpPtr = &DKWTbl[MTNum][3];
- /* Find number of duplicated subpatterns */
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) {
- NumDup++;
- }
- if( (NumDup+1) << ((1<<29)/1000000/T3) ) {
- MulFct = T2 + ((NumDup+1)*T3);
- }
- else {
- MulFct = (1<<29)/1000000/T3 + NumDup;
- }
- if ((OH = SubPatFreq(CurPat)*MulFct) < CurOddOH) {
- /* if token is identical to token of another */
- /* duplicated keyword, bypass this keyword. */
- TmpPtr = &DKWTbl[MTNum][0];
- do {
- if (!memcmp(Tok,TmpPtr,TokLen)){
- continue;
- }
- }
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL);
- CurOddOH = OH;
- SavOPat = CurPat;
- SavOIdx = j;
- FndALOGKeyWrd = 1;
- }
- }
- }
- }
- }
- }
- /* If the total minimum overhead for the current token is less */
- /* than that of the previously computed tokens then save that */
- /* tokens info. */
- if ((CurEvenOH + CurOddOH < TotOH) && FndALOGKeyWrd) {
- /* Total overhead in usec for 20000 characters searched */
- TotOH = CurEvenOH + CurOddOH;
- SavTOPat = SavOPat; /* Even SubPat with least overhead */
- SavTEPat = SavEPat; /* Odd SubPat with least overhead */
- SavTOIdx = SavOIdx; /* Index into token of odd SubPat */
- SavTEIdx = SavEIdx; /* Index into token of even SubPat */
- /* pntr to start of best token(not subpat) in MT */
- SavTTok = SavTok;
- #ifdef DEBUG
- printf("MT %d has best even subpat of %c %c\n",NextMT,
- SavTTok[SavTEIdx], SavTTok[SavTEIdx+1]);
- printf("MT %d has best odd subpat of %c %c\n",NextMT,
- SavTTok[SavTOIdx], SavTTok[SavTOIdx+1]);
- #endif
- }
- }
- else {
- if(TokLen == 2 && NextMT > 2 ) { /* SubPat must have > 1 chars */
- /* Handle 2 character sub patterns. Dont allow duplicate */
- /* keyword chaining, for now. */
- CurOddOH = 2147483647; /* Init to max pos overhead */
- CurEvenOH = 2147483647;
- FndALOGKeyWrd = 0; /* Fnd At Least One Good Key Word in token */
- CurPat = (Tok[0] << 8) | Tok[1]; /* Construct test subpat */
- /* If SubPat table does not yet have an entry for CurPat, */
- /* and if the limit of 128 Odd and 128 Even SubPats has not */
- /* been reached, then compute the overhead for CurPat and if */
- /* less than the previous smallest SubPat overhead(for this */
- /* token) then save pointers to CurPat. */
- if ((SubPat[CurPat] == 0) && (i < 128)) {
- CurEvenOH = SubPatFreq(CurPat)*T1;
- SavEPat = CurPat;
- SavTok = Tok;
- SavEIdx = 0;
- FndALOGKeyWrd = 1;
- }
- if (FndALOGKeyWrd) {
- for( j=0; j<256; j++ ) {
- CurPat = (Tok[1] << 8) | j;
- if( (CurPat == SavEPat) || (i >= 128)
- || (SubPat[CurPat] != 0) ) FndALOGKeyWrd = 0;
- }
- if( FndALOGKeyWrd == 1 ) {
- CurPat = ((CurPat>>8) << 8) | 0x41;
- CurOddOH = 1000000000 + SubPatFreq(CurPat)*T1;
- SavOPat = CurPat;
- SavOIdx = 1;
- }
- FndX = 1;
- for( j=0; j<256; j++ ) {
- CurPat = Tok[0] | j<<8;
- if( (CurPat == SavEPat) || (i >= 128)
- || (SubPat[CurPat] != 0) ) FndX = 0;
- }
- if( FndX == 1 && !TwoCharArtSep) {
- CurPat = (CurPat & 255) | 0x41<<8;
- if( 1000000000 + SubPatFreq(CurPat)*T1 < CurOddOH ) {
- CurOddOH = 1000000000 + SubPatFreq(CurPat)*T1;
- SavOPat = CurPat;
- SavOIdx = -1;
- FndALOGKeyWrd = 1;
- }
- }
- }
- /* If the total minimum overhead for the current token is less */
- /* than that of the previously computed tokens then save that */
- /* tokens info. */
- if ((CurEvenOH + CurOddOH < TotOH) && FndALOGKeyWrd) {
- /* Total overhead in usec for 20000 characters searched */
- TotOH = CurEvenOH + CurOddOH;
- SavTOPat = SavOPat; /* Even SubPat with least overhead */
- SavTEPat = SavEPat; /* Odd SubPat with least overhead */
- SavTOIdx = SavOIdx; /* Index into token of odd SubPat */
- SavTEIdx = SavEIdx; /* Index into token of even SubPat */
- /* pntr to start of best token(not subpat) in MT */
- SavTTok = SavTok;
- #ifdef DEBUG
- printf("MT %d has best even subpat of %c %c\n",NextMT,
- SavTTok[SavTEIdx], SavTTok[SavTEIdx+1]);
- printf("MT %d has best odd subpat of %c %c\n",NextMT,
- SavTTok[SavTOIdx], SavTTok[SavTOIdx+1]);
- #endif
- }
- }
- }
- Tok = strtok( NULL, "?&*["); /* Get next token of MT to look at */
- }
- Tok = SavTTok;
- /* If we have found the best odd and even SubPat of the MT, then put */
- /* the SubPats in the various tables for faster searching later. */
- if (TotOH != 2147483647) { /* Have indeed found the best SubPats */
- /* 1st do even SubPat */
- if ((MTNum = SubPat[SavTEPat]) == 0) { /* Non duplicated keyword */
- MTN = SubPat[SavTEPat] = NextMT<<1; /* Need to use */
- /* MTNumSortedByKeyLen[SubPat(x)] to */
- /* find Major Term Number later. */
- frstuppr = toupper(SavTEPat >> 8);
- scnduppr = toupper(SavTEPat & 255);
- SubPat[(frstuppr*256) + (SavTEPat&255)] = MTN;
- SubPat[(SavTEPat&0xFF00) + scnduppr] = MTN;
- SubPat[(frstuppr*256) + scnduppr] = MTN;
- TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT];
- TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1;
- if (SavTEIdx == 0) { /* SubPat is 1st 2 chars of keyword(token) */
- if (strlen(Tok) < 4) { /* Keyword is 2 or 3 chars long */
- KWTbl[MTN][0] = Tok + 3; /* Pntr to start of rest of keywrd*/
- KWTbl[MTN][1] = Tok + 2; /* Pntr to end of rest of keyword */
- if( strlen(Tok) == 3 ) { /* Keyword is 3 chars long */
- FrstBt[MTN] = Tok[1]; /* 1st byte of rest of keyword */
- ScndBt[MTN] = Tok[2]; /* 2nd byte of rest of keyword */
- /* Displacemnt 2 where "rest of keywrd" should b in buf */
- DsplTb[MTN] = 128 - 1 + 0;
- }
- else {
- FrstBt[MTN] = Tok[0]; /* 1st byte of rest of keyword */
- ScndBt[MTN] = Tok[1]; /* 2nd byte of rest of keyword */
- /* Displacemnt 2 where "rest of keywrd" should b in buf */
- DsplTb[MTN] = 128 - 2 + 0;
- }
- SvTbl[MTN] = Tok;
- /* Displacement to start of keyword in buffer */
- FDsplTbl[MTN] = -2;
- }
- else {
- KWTbl[MTN][0] = Tok + 4;
- KWTbl[MTN][1] = Tok + strlen(Tok) - 1;
- FrstBt[MTN] = Tok[2];
- ScndBt[MTN] = Tok[3];
- DsplTb[MTN] = 128 - 0 + 0;
- SvTbl[MTN] = Tok;
- FDsplTbl[MTN] = -2;
- }
- }
- else {
- KWTbl[MTN][0] = Tok + 2; /* Pntr to start of rest of keyword */
- KWTbl[MTN][1] = Tok + strlen(Tok) - 1;
- FrstBt[MTN] = Tok[0];
- ScndBt[MTN] = Tok[1];
- DsplTb[MTN] = 128 - 2 - SavTEIdx;
- SvTbl[MTN] = Tok;
- FDsplTbl[MTN] = - SavTEIdx - 2;
- }
- if (MTN < 6) {
- DsplTb[MTN] = 0; /* EOB or article separator */
- if (MTN == 2 || MTN == 3) {
- DFASep[0] = 128 - 2 - SavTEIdx;
- }
- }
- }
- else { /* Duplicated keyword(more than 1 keyword for 1 subpattern) */
- MTN = SubPat[SavTEPat]; /* Need to use */
- /* MTNumSortedByKeyLen[SubPat(x)] to */
- /* find Major Term Number later. */
- DKWIdx = NextMT << 1;
- TokStrt[DKWIdx] = Tok - (char *)&NoBrakArray[NextMT];
- TokEnd[DKWIdx] = TokStrt[DKWIdx] + strlen(Tok) - 1;
- if (DsplTb[MTN] != 0) { /* Non Duplicated single entry */
- /* 1st remove the single entry and put it in the dupl tble */
- /* then put the new element in the dupl table also */
- DsplTb[MTN] = 0; /* Adding 2nd keyword that has same subpat */
- DKWTbl[MTN][0] = SvTbl[MTN]; /* Pntr 2 strt of rest of kwd */
- DKWTbl[MTN][1] = KWTbl[MTN][1]; /* Pntr 2 end of rest of kwd */
- DKWTbl[MTN][2] = (char *)FDsplTbl[MTN] + 128;
- DKWTbl[MTN][3] = (char *)&DKWTbl[DKWIdx];
- DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */
- DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */
- DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTEIdx); /* Displace */
- DKWTbl[DKWIdx][3] = NULL; /* link to next */
- FDsplTbl[DKWIdx] = - SavTEIdx - 2;
- }
- else { /* Duplicated entry */
- /* Put another duplicated entry in the dup keyword table */
- TmpPtr = &DKWTbl[MTN][0];
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) { MTTmp = TmpPtr; }
- *(MTTmp+3) = (char *)&DKWTbl[DKWIdx];
- DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */
- DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */
- DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTEIdx); /* Displace */
- DKWTbl[DKWIdx][3] = NULL; /* link to next */
- FDsplTbl[DKWIdx] = - SavTEIdx - 2;
- }
- }
- /* Now put stuff from odd SubPat into various tables */
- if ((MTNum = SubPat[SavTOPat]) == 0) {
- if( strlen(SavTTok) > 2 ) {
- MTN = SubPat[SavTOPat] = (NextMT<<1)+1; /* Need to use */
- /* MTNumSortedByKeyLen[SubPat(x)] to*/
- /* find Major Term Number later. */
- frstuppr = toupper(SavTOPat >> 8);
- scnduppr = toupper(SavTOPat & 255);
- SubPat[(frstuppr*256) + (SavTOPat&255)] = MTN;
- SubPat[(SavTOPat&0xFF00) + scnduppr] = MTN;
- SubPat[(frstuppr*256) + scnduppr] = MTN;
- TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT];
- TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1;
- KWTbl[MTN][0] = Tok + 2; /* Pntr to strt of rest of keyword */
- if (strlen(Tok) == 3) {
- /* Set to less than pntr 2 start so nothing chckd, since it*/
- KWTbl[MTN][1] = Tok; /* should already have been verified */
- }
- else {
- KWTbl[MTN][1] = Tok + strlen(Tok) - 1; /* Pntr 2 rest kwd */
- }
- FrstBt[MTN] = Tok[0]; /* 1st byte of rest of keyword */
- ScndBt[MTN] = Tok[1]; /* 2nd byte of rest of keyword */
- /* Displacement to where "rest of keywrd" should be in buffer */
- DsplTb[MTN] = 128 - 2 - SavTOIdx;
- SvTbl[MTN] = Tok;
- FDsplTbl[MTN] = - SavTOIdx - 2;
- if (MTN < 6) {
- DsplTb[MTN] = 0; /* EOB or article separator */
- if (MTN == 2 || MTN == 3) {
- DFASep[1] = 128 - 2 - SavTOIdx;
- }
- }
- }
- else {
- MTN = (NextMT<<1)+1;
- for( iij = 0; iij < 256; iij++ ) {
- if( SavTOIdx == -1 ) {
- CurPat = (SavTOPat & 255) | iij<<8;
- }
- else {
- CurPat = ((SavTOPat>>8)<<8) | iij;
- }
- SubPat[CurPat] = MTN;
- frstuppr = toupper(CurPat >> 8);
- scnduppr = toupper(CurPat & 255);
- SubPat[(frstuppr*256) + (CurPat&255)] = MTN;
- SubPat[(CurPat&0xFF00) + scnduppr] = MTN;
- SubPat[(frstuppr*256) + scnduppr] = MTN;
- }
- TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT];
- TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1;
- KWTbl[MTN][0] = Tok+1; /* Pntr to strt of rest of keyword */
- /* Set to less than pntr 2 start so nothing chckd, since it */
- KWTbl[MTN][1] = Tok; /* should already have been verified */
- FrstBt[MTN] = (char)(SavTEPat>>8); /* 1st byteofrestof keywrd */
- ScndBt[MTN] = (char)SavTEPat; /* 2nd byte of rest of keyword */
- /* Displacement to where "rest of keywrd" should be in buf */
- DsplTb[MTN] = 128 - 2 - SavTOIdx;
- SvTbl[MTN] = Tok;
- FDsplTbl[MTN] = - 2 - SavTOIdx;
- }
- }
- else { /* Duplicated keyword(more than 1 keyword for 1 subpattern) */
- MTN = SubPat[SavTOPat]; /* Need to use */
- /* MTNumSortedByKeyLen[SubPat(x)] to */
- /* find Major Term Number later. */
- DKWIdx = (NextMT << 1) + 1;
- TokStrt[DKWIdx] = Tok - (char *)&NoBrakArray[NextMT];
- TokEnd[DKWIdx] = TokStrt[DKWIdx] + strlen(Tok) - 1;
- if (DsplTb[MTN] != 0) { /* Non Duplicated single entry */
- /* 1st remove the single entry and put it in the dupl tble */
- /* then put the new element in the dupl table also */
- DsplTb[MTN] = 0; /* Adding 2nd keyword that has same subpat */
- DKWTbl[MTN][0] = SvTbl[MTN]; /* Pntr 2 strt of rest of kwd */
- DKWTbl[MTN][1] = KWTbl[MTN][1]; /* Pntr 2 end of rest of kwd */
- DKWTbl[MTN][2] = (char *)FDsplTbl[MTN] + 128;
- DKWTbl[MTN][3] = (char *)&DKWTbl[DKWIdx];
- DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */
- DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */
- DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTOIdx); /* Displace */
- DKWTbl[DKWIdx][3] = NULL; /* link to next */
- FDsplTbl[DKWIdx] = - SavTOIdx - 2;
- }
- else { /* Duplicated entry */
- /* Put another duplicated entry in the dup keyword table */
- TmpPtr = &DKWTbl[MTN][0];
- while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) {MTTmp = TmpPtr;}
- *(MTTmp+3) = (char *)&DKWTbl[DKWIdx];
- DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */
- DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */
- DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTOIdx); /* Displace */
- DKWTbl[DKWIdx][3] = NULL; /* link to next */
- FDsplTbl[DKWIdx] = - SavTOIdx - 2;
- }
- }
- }
- /* Could not find at least one good keyword in all the tokens of a */
- /* major term. A good keyword has at least one set of unique even and */
- /* odd 2 char subpats and the keyword must not be the same as any */
- /* other keyword already in the table that has the same subpat. */
- /* For right now, print out an error and exit. Needs to be handled */
- /* better later. This should not happen very often at all. */
- else {
- if(i==1) {
- fprintf(stderr,"Article separators must have at least 2 unique\n");
- fprintf(stderr,"characters. Try adding a \\n or space to it.\n");
- ErrP("Aborting...\n");
- }
- else {
- fprintf(stderr,"For search pattern %s, didn't\n",
- &MTArray[NextMT-1]);
- fprintf(stderr,"find a unique even & odd 2 char subpat. MT=%d.\n",
- NextMT);
- fprintf(stderr,"Try adding a unique char to search pattern.\n");
- ErrP("Aborting...\n");
- }
- }
- }
- /* if article separator is only two char long, reset its end pointer back */
- /* back to what it originally was. Also copy subpat table value for the */
- /* 0x20 prefix to all other possible prefixes. */
- if( TwoCharArtSep ) {
- EOASep--;
- *(ArtSep + 2) = 0x00;
- SPIdx = *(ArtSep + 1) << 8;
- for( iij = 0; iij < 256; iij++ ) {
- if( SubPat[SPIdx + iij] == 0 ) {
- SubPat[SPIdx + iij] = 3;
- }
- else {
- if( SubPat[SPIdx + iij] > 3 ) {
- fprintf(stderr,"\nOdd Art sep srch pat(hex): %lx",SPIdx+iij);
- if( SubPat[SPIdx+iij] & 1 == 1 ) {
- fprintf(stderr,"\nThe odd part(don't care) of the following "
- "srch pat conflicts(hex): %lx",
- *(short *)SvTbl[SubPat[SPIdx+iij]] );
- }
- else {
- fprintf(stderr,"\nEven srch pat that conflicts is(hex): %lx",
- *SvTbl[SubPat[SPIdx+iij]] );
- }
- fprintf(stderr,"\nSrch pat is in major term %d\n",
- (unsigned int)(SubPat[SPIdx+iij] >> 1));
- ErrP("Err:Try adding another char to conflicting srch pat.\n");
- }
- }
- }
- }
- LastInLast = 0;
- LastPntEnd = NULL;
- /* default start of article so nothing bad will happen if there are no */
- /* article separators in the file */
- CurArtStrt = BufIdx;
-
- /* StrtTime = clock(); */
- while (1==1) {
- MTIdx = FastSearch(BufIdx);
- MTNum = MTIdx >> 1;
- if (MTIdx > 5) {
- /* Since keyword was found, need to make sure the rest of the major term */
- /* matches also, before declaring major term satisfied. */
- if (!OutArt && FindRestOfMT(BufIdx, MTIdx)) {
- if (!MinSatByMTOnly[MTNum] && !LineScan) {
- /* Set bit indicating MT satisfied. */
- TmpT = MajTrm2BitNum[MTNum];
- MinSatTbl[MajTrm2MinTrm[MTNum]][1] |= 1 << TmpT;
- /* Since major term was satisfied, need to check MinSatTbl to see if */
- /* minterm is fully satisfied. */
- Indx = MajTrm2MinTrm[ MTNum];
- if ((MinSatTbl[Indx][0] | MinSatTbl[Indx][1]) == -1 ) {
- /* Since minterm was satisfied, we set a flag "OutArt" so that when next */
- /* article separator or "End of Data" is encountered, the article will */
- /* be printed out. */
- OutArt = 1;
- }
- /* Add major term to chain of those needing to be reset at "End Of Article" */
- /* First check for loop. Loop is when a maj trm occured more than once */
- /* in the article. */
- if (!LastMTTbl[ MTNum] && (MTNum != LastMT)) {
- LastMTTbl[ LastMT] = MTNum;
- LastMT = MTNum;
- }
- }
- /* Comes here if only 1 maj term needs to be satisfied for minterm to be */
- /* satisfied. In this case, set flag to print out article at next EOA. */
- /* Also comes here if the Line Scan flag is set. */
- else {
- if (LineScan) {
- PathPrint(MatchFnd);
- /* if match found in overlapped portion at end of buffer,*/
- /* set next overlap size so the match will not be found */
- /* again once the new buffer is switched to. */
-
- if( EOCB - BufIdx < MAXKWSZ) KeyWrdOvlp = EOCB - BufIdx;
-
- /* Find start of word with match in it for delayed print */
- /* This is needed so that the front part of the matched */
- /* word will be highlighted when right context from the */
- /* previous match overlaps this match. The front would */
- /* not normally be highlighted if the two char keyword */
- /* does not occur at the start of the matched word. */
- KeyStrt = BufIdx + FDsplTbl[MTIdx];
- WrdStrt = KeyStrt - 1;
- while( isalnum((int)(*WrdStrt)) && WrdStrt >= SOCB) {
- WrdStrt--;
- }
- WrdStrt++;
- if( KeyStrt - WrdStrt > 256 ) WrdStrt = KeyStrt - 256;
- SizeDiff = KeyStrt - WrdStrt;
- if( DlydPntNextBuf ) {
- /* Comes here if the right context from the previous match overlapped two */
- /* buffers. The part in the previous buffer has been printed out already. */
- /* Determine where the right context stops in pres buf by counting line */
- /* feeds. */
- DlydPntNextBuf = 0;
- PntPtr = SOCB;
- StrtLFC = LFCnt - 1;
- while((PntPtr < EOCB) && (LFCnt <= PrntWidth)) {
- if(*PntPtr == '\n') { LFCnt++; }
- if( PntPtr - SOCB > 256 * (LFCnt - StrtLFC) ) {
- LFCnt = PrntWidth + 1;
- CutIt = 1;
- }
- PntPtr++;
- }
- DlydPntStart = SOCB;
- DlydPntSize = PntPtr - SOCB;
- DlydPntRightContext = 1;
- LastInLast = 0; /* 1st print in pres buf */
- }
- DlydPntEnd = DlydPntStart + DlydPntSize;
- if( DlydPntRightContext && DlydPntEnd < WrdStrt) {
- /* Comes here if we know that the right context from the previous match */
- /* does not extend up to the present match. Print it out. */
- fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW);
- DlydPntRightContext = 0;
- RightStrt = DlydPntEnd;
- LastPntEnd = DlydPntEnd - 1;
- }
- if( DlydPntRightContext && DlydPntEnd >= WrdStrt) {
- /* Comes here if we know that the right context from the previous match */
- /* extends all the way to the present match, and farther. Chop it off so */
- /* it only extends up to the start of the present matched word and print */
- /* the context. */
- DlydPntRightContext = 0;
- DlydPntSize = WrdStrt - DlydPntStart;
- if( DlydPntSize < 0 ) {
- fxwrite(DlydPntStart,BlkSize,ZeroLong,StrmPtr,NoKeyW);
- NumBlksToWrt = TokEnd[MTIdx] - TokStrt[MTIdx] + 1 +
- SizeDiff + DlydPntSize;
- RightStrt = DlydPntStart + NumBlksToWrt;
- TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1;
- /* Now print out matched word with keyword in it for present match */
- fxwrite(DlydPntStart,BlkSize,NumBlksToWrt,StrmPtr,HasKeyWrd);
- }
- else {
- /* print stuff between last and present match */
- fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW);
- NumBlksToWrt = TokEnd[MTIdx] - TokStrt[MTIdx] + 1 +
- SizeDiff;
- RightStrt = WrdStrt + NumBlksToWrt;
- TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1;
- /* Now print out matched word with keyword in it for present match */
- fxwrite(WrdStrt,BlkSize,NumBlksToWrt,StrmPtr,HasKeyWrd);
- }
- }
- else {
- /* Print out plus and minus PrntWidth lines around match. */
- FrstPnt = 1;
- LFCnt = 0;
- StrtLFC = LFCnt - 1;
- PntPtr = BufIdx + FDsplTbl[MTIdx] - 1;
- StrtPP = PntPtr;
- while((PntPtr >= SOCB) && (LFCnt <= PrntWidth) &&
- ((PntPtr > LastPntEnd) || LastInLast)) {
- if(*PntPtr == '\n') { LFCnt++; }
- if( StrtPP - PntPtr > 256 * (LFCnt - StrtLFC) )
- LFCnt = PrntWidth + 1;
- PntPtr--;
- }
- /* print out any context info in previous buffer */
- if((PntPtr < SOCB) && (CurArtStrt != SOCB) &&
- (LFCnt <= PrntWidth)) {
- StrtLFC = LFCnt - 1;
- PntPtr = EndOfPrevBuf + (PntPtr - SOCB);
- StrtPP = PntPtr;
- while((PntPtr >= CurArtStrt) && (LFCnt <= PrntWidth) &&
- ((PntPtr > LastPntEnd) && LastInLast)) {
- if(*PntPtr == '\n') { LFCnt++; }
- if( StrtPP - PntPtr > 256 * (LFCnt - StrtLFC) )
- LFCnt = PrntWidth + 1;
- PntPtr--;
- }
- if(PntPtr + 1 < EndOfPrevBuf) {
- NumBlksToWrt = EndOfPrevBuf - PntPtr - 1;
- if(FrstPnt && (*(PntPtr+1) == '\n')) {
- NumBlksToWrt--;
- PntPtr++;
- FrstPnt = 0;
- }
- if(NumBlksToWrt > 0) {
- fywrite(PntPtr+1,BlkSize,NumBlksToWrt,StrmPtr);
- }
- }
- PntPtr = SOCB - 1;
- LastPntEnd = PntPtr;
- }
- /* print out context info in current buffer to the left of, and including */
- /* the matched keyword. */
- PntPtr++;
- NumBlksToWrt = BufIdx + FDsplTbl[MTIdx] + TokEnd[MTIdx] -
- TokStrt[MTIdx] - PntPtr + 1;
- RightStrt = PntPtr + NumBlksToWrt;
- if(PntPtr + NumBlksToWrt - 1 > LastPntEnd || LastInLast) {
- if (!LastInLast && (PntPtr <= LastPntEnd)) {
- PntPtr = LastPntEnd + 1;
- NumBlksToWrt = RightStrt - PntPtr;
- }
- if(FrstPnt && (*PntPtr == '\n')) {
- NumBlksToWrt--;
- PntPtr++;
- FrstPnt = 0;
- }
- if(NumBlksToWrt > 0) {
- TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1;
- fxwrite(PntPtr,BlkSize,NumBlksToWrt,StrmPtr,
- HasKeyWrd);
- RightStrt = PntPtr + NumBlksToWrt;
- }
- }
- }
- /* save pointer to last item printed to it wont be */
- /* printed again. */
- LastPntEnd = RightStrt - 1;
- /* print out context info in the present buf, to the right of the matched */
- /* keyword. */
- LFCnt = 0;
- StrtLFC = -1;
- PntPtr = RightStrt;
- while((PntPtr < EOCB) && (LFCnt <= PrntWidth)) {
- if(*PntPtr == '\n') { LFCnt++; }
- if( PntPtr - RightStrt > 256 * (LFCnt - StrtLFC) ) {
- LFCnt = PrntWidth + 1;
- CutIt = 1;
- }
- PntPtr++;
- }
- NumBlksToWrt = PntPtr - RightStrt;
- if( NumBlksToWrt > 0 || LastInLast || PntPtr==EOCB) {
- /* Set flag so that when EOCB or next match is */
- /* found, right context will be printed out. */
- DlydPntRightContext = 1;
- DlydPntStart = RightStrt;
- DlydPntSize = NumBlksToWrt;
- }
- LastInLast = 0;
- }
- else {
- OutArt = 1;
- }
- }
- }
- }
- else {
- if (MTIdx < 4) { /* MT of 2 or 3 indicates article separator */
- /* If flag is set specifying that article separator needs to be in a */
- /* certain column, then check for this. */
- ColOk = 1;
- if (ColReq) {
- /* if article separator is not in right column, reset ColOk to 0 */
- if( *(BufIdx + FDsplTbl[MTIdx] - ColReq) == '\n') {
- TmpP = BufIdx + FDsplTbl[MTIdx];
- for( BP=TmpP-ColReq+1; BP<TmpP; BP++) {
- if( *BP == '\n') { ColOk = 0; break; }
- }
- }
- else {
- ColOk = 0;
- }
- }
- if (ColOk && !LineScan) {
- /* If flag "OutArt" is set, then print out article */
- if (OutArt != InvertMatch) {
- if( WroteOverIt ) {
- fprintf(stderr,"Warning: Article was too long. Did "
- "not print out front part of article.\n Suggest"
- " increasing window size if not LHA.\n");
- /* if( !RealFile ) ErrP("Aborting...\n"); */
- }
- PathPrint(MatchFnd);
- TmpCS2 = TmpCS = BufIdx + FDsplTbl[MTIdx];
- /* if article sep is found in the middle of a line, back */
- /* up until a line feed is found and use as strt of art. */
- for( iii=TmpCS2; iii>TmpCS2-LWIDTH; iii--) {
- if( *iii == '\n') { TmpCS = iii + 1; break; }
- }
- if( !PrntPrevPrev ) {
- TmpCS2 = CurArtStrt;
- /* if article sep is found in the middle of line, back */
- /* up until line feed is found and use as strt of art. */
- for( iii=TmpCS2; iii>TmpCS2-LWIDTH; iii--) {
- if( *iii == '\n') { CurArtStrt = iii + 1; break; }
- }
- }
- if (ArtInPrevBuf) {
- /* take care of case where article sep is found at */
- /* the very end of previous buf and the same article*/
- /* separator found before the start of current buf. */
- NumBlksToWrt = EndOfPrevBuf - CurArtStrt;
- NumBlksToWrt2 = TmpCS - SOCB;
- if (NumBlksToWrt + NumBlksToWrt2 > MAXKWSZ) {
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=CurArtStrt; iii<EndOfPrevBuf; iii++) {
- bufchr = (int)(*iii);
- if (!isprint(bufchr) && *iii != '\n') *iii = ' ';
- }
- }
- /* take care of case where article separator */
- /* straddles the end of buffer. */
- if(NumBlksToWrt2 < 0){ NumBlksToWrt += NumBlksToWrt2; }
- /* write out part of article in previous buffer */
- fwrite(CurArtStrt,BlkSize,NumBlksToWrt,StrmPtr);
- }
- CurArtStrt = SOCB;
- }
- NumBlksToWrt = TmpCS - CurArtStrt;
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=CurArtStrt; iii<TmpCS; iii++) {
- bufchr = (int)(*iii);
- if (!isprint(bufchr) && *iii != '\n') {
- *iii = ' ';
- }
- }
- }
- if (NumBlksToWrt > 0) {
- fwrite(CurArtStrt,BlkSize,NumBlksToWrt,StrmPtr);
- }
- }
- WroteOverIt = 0;
- PrntPrevPrev = 0;
- OutArt = 0;
- ArtInPrevBuf = 0;
- /* Reset list of partially satisfied minterms since article separator */
- /* was found. */
- Indx = 0;
- while (MTIndx = LastMTTbl[ Indx]) {
- TmpT = MajTrm2BitNum[MTIndx];
- MinSatTbl[MajTrm2MinTrm[MTIndx]][1] ^= 1 << TmpT;
- LastMTTbl[ Indx] = 0;
- Indx = MTIndx;
- }
- LastMT = 0;
- /* Mark start of article so if pattern is found in it, the whole article */
- /* can be printed out */
- CurArtStrt = BufIdx + FDsplTbl[MTIdx];
- /* If, over 5 articles, a few two-char subpatterns dominate(# of subpat */
- /* hits is > threshold), reselect a new two-char subpat for the */
- /* corresponding major terms. */
- }
- }
- else { /* Must be at End Of Buffer */
- /* Switch buffers since EOB found */
- /* Note that this could possibly be sped up if open on next file can */
- /* occur while last buffer of current file is scanned. */
- /* EndTime = clock(); */
- /* printf("time1= %lu and time2= %lu \n",StrtTime,EndTime); */
- /* TotTime += EndTime - StrtTime; */
-
- /* If we had a match in the buffer before the buffer before the */
- /* one we are about to switch to, or earlier, print it out the */
- /* previous buffers contents. */
- if (ArtInPrevBuf) {
- if (OutArt != InvertMatch && !LineScan) {
- PathPrint(MatchFnd);
- if( WroteOverIt ) {
- fprintf(stderr,"Warning: Article was too long. Did "
- "not print out front part of article.\n Suggest"
- " increasing window size if not LHA.\n");
- /* if( !RealFile ) ErrP("Aborting...\n"); */
- }
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=CurArtStrt; iii<EndOfPrevBuf; iii++) {
- bufchr = (int)(*iii);
- if (!isprint(bufchr) && *iii != '\n') {
- *iii = ' ';
- }
- }
- }
- NumBlksToWrt = EndOfPrevBuf - CurArtStrt;
- fwrite(CurArtStrt,BlkSize,NumBlksToWrt,StrmPtr);
- }
- else {
- WroteOverIt = 1;
- }
- /* If "Start of Article" is in buffer being overwritten by async read, */
- /* set "Start of Article" to beginning of next buffer after one being */
- /* overwritten. This is the same as setting it to the start of the */
- /* buffer just finished being used. */
- CurArtStrt = SOCB;
- PrntPrevPrev = 1;
- }
- else { ArtInPrevBuf = 1; }
- EndOfPrevBuf = EOCB;
- LastInLast = 1;
- if( DlydPntNextBuf ) {
- DlydPntNextBuf = 0;
- PntPtr = SOCB;
- StrtLFC = LFCnt - 1;
- while((PntPtr < EOCB) && (LFCnt <= PrntWidth)) {
- if(*PntPtr == '\n') { LFCnt++; }
- if( PntPtr - SOCB > 256 * (LFCnt - StrtLFC) ) {
- LFCnt = PrntWidth + 1;
- CutIt = 1;
- }
- PntPtr++;
- }
- DlydPntStart = SOCB;
- DlydPntSize = PntPtr - SOCB;
- DlydPntRightContext = 1;
- }
- if( DlydPntRightContext) {
- /* print right context from previous match if line-scan */
- /* option set */
- fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW);
- DlydPntRightContext = 0;
- if( LFCnt <= PrntWidth ) DlydPntNextBuf = 1;
- if( (DlydPntStart + DlydPntSize) == EOCB && *(EOCB-1) != '\n')
- MayNeedLF = 1;
- }
- SavEOCB = EOCB;
- SavSOCB = SOCB;
- if( LineNum ) { CountCR(); }
- /* Note that on both normal, LZH archive files, and internal */
- /* LZH archive files, an extra read of -1 bytes follows the */
- /* last read of a nonzero # of bytes. When OpenNew is on return,*/
- /* it sets things up as if it did a read of zero bytes, even */
- /* though it returns -1. */
- if( xfrread(FHandle) <= 0 ) {
- KeyWrdOvlp = MAXKWSZ;
- if (OutArt != InvertMatch && !LineScan) {
- PathPrint(MatchFnd);
- if( !PrntPrevPrev ) {
- TmpCS2 = CurArtStrt;
- /* if article sep is found in the middle of line, back */
- /* up until line feed is found and use as strt of art. */
- for( iii=TmpCS2; iii>TmpCS2-LWIDTH; iii--) {
- if( *iii == '\n') { CurArtStrt = iii + 1; break; }
- }
- }
- NumBlksToWrt = EndOfPrevBuf - CurArtStrt;
- if (!RealFile) {
- /* set all nonprinting chars to blanks */
- for (iii=CurArtStrt; iii<EndOfPrevBuf; iii++) {
- bufchr = (int)(*iii);
- if (!isprint(bufchr) && *iii != '\n') {
- *iii = ' ';
- }
- }
- }
- fwrite(CurArtStrt,BlkSize,NumBlksToWrt,StrmPtr);
- }
-
- if( MayNeedLF ) { fwrite("\n",1,1,StrmPtr); MayNeedLF = 0; }
- if( !LON ) { /* if not doing an lzh or finished with lzh */
- LON = 1;
- xfrclose(FHandle);
- if ((FName=NextFile(ArgVSav)) != NULL) {
- if ((FHandle = xfropen(FName,&ReadNum)) == NULL) {
- ErrP("Problems opening file to scan\n");
- }
- if (ReadNum < 0) {
- ErrP("Could not read any data from file\n");
- }
- }
- else {
- /* printf("time1= %lu ;time2= %lu \n",StrtTime,EndTime);*/
- /* EndTime = clock(); */
- /* TotTime += EndTime - StrtTime; */
- /* printf("total time(50ths of sec) = %lu\n",TotTime); */
- if(!RealFile && AlwaysPrint && !Mtch1st) {
- fwrite("\n",1,1,StrmPtr);
- }
- CleanIt();
- exit(0); /* terminate normally */
- }
- }
-
- /* StrtTime = clock(); */
- PathPrint(MatchNotFnd);
- NumOfCR = 0;
- NumTot = -1;
- LstOut = SOCB;
- OutArt = 0;
- ArtInPrevBuf = 0;
- PrntPrevPrev = 0;
- DlydPntNextBuf = 0;
- WroteOverIt = 0;
- LastInLast = 0;
- LastPntEnd = NULL;
- /* If inverted matching, set to not print out when 1st */
- /* article header found */
- if (InvertMatch) { OutArt = 1; }
- /* default start of article so nothing bad will happen */
- /* if there are no article separators in the file */
- CurArtStrt = BufIdx;
- /* Reset list of partially satisfied minterms since */
- /* article separator was found. */
- Indx = 0;
- while (MTIndx = LastMTTbl[ Indx]) {
- TmpT = MajTrm2BitNum[MTIndx];
- MinSatTbl[MajTrm2MinTrm[MTIndx]][1] ^= 1 << TmpT;
- LastMTTbl[ Indx] = 0;
- Indx = MTIndx;
- }
- LastMT = 0;
- }
- else {
- if( ItsALZH ) PathPrint(MatchNotFnd);
- LstOut = SOCB;
- MayNeedLF = 0;
- }
- }
- }
- }
- }
-