home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
- /* multipattern matcher */
- #include <stdio.h>
- #define MAXPAT 256
- #define MAXLINE 1024
- #define MAXSYM 256
- #define MAXMEMBER1 4096
- #define MAXPATFILE 260000
- #define BLOCKSIZE 8192
- #define MAXHASH 8192
- #define mm 8191
- #define max_num 30000
- #define W_DELIM 252
- #define L_DELIM 10
-
- extern COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched;
- extern INVERSE;
- extern WORDBOUND, WHOLELINE, NOUPPER;
- extern unsigned char CurrentFileName[], Progname[];
- extern total_line;
-
- int LONG = 0;
- int SHORT = 0;
- int p_size=0;
- unsigned char SHIFT1[MAXMEMBER1];
- unsigned char tr[MAXSYM];
- unsigned char tr1[MAXSYM];
- struct pat_list {
- int index;
- struct pat_list *next;
- } *HASH[MAXHASH];
- struct pat_list *pt, *qt;
- unsigned char buf[MAXPATFILE+BLOCKSIZE];
- unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT];
- unsigned char *patt[max_num];
- unsigned char pat_len[max_num];
-
-
- prepf(fp)
- int fp;
- {
- int length=0, i, p=1, pdx=0, num_pat;
- unsigned char *pat_ptr=pat_spool, temp[10];
- unsigned Mask = 15;
- int num_read;
-
- while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) {
- length = length + num_read;
- if(length > MAXPATFILE) {
- fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE);
- exit(2);
- }
- }
- buf[length] = '\n';
- i = 0; p=1;
- while(i<length) {
- patt[p] = pat_ptr;
- if(WORDBOUND) *pat_ptr++ = W_DELIM;
- if(WHOLELINE) *pat_ptr++ = L_DELIM;
- while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
- if(WORDBOUND) *pat_ptr++ = W_DELIM;
- if(WHOLELINE) *pat_ptr++ = L_DELIM; /* Can't be both on */
- *pat_ptr++ = 0;
- p++;
- }
- if(p>max_num) {
- fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num);
- exit(2);
- }
- for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */
- for(i=0; i< MAXSYM; i++) tr[i] = i;
- if(NOUPPER) {
- for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A';
- }
- if(WORDBOUND) {
- tr['\n'] = tr['\t'] = tr[' '] = tr[','] = tr[';'] = tr[':'] = W_DELIM;
- tr['!'] = tr['"'] = tr['?'] = tr['<'] = tr['>'] = W_DELIM;
- tr['='] = tr['['] = tr[']'] = W_DELIM;
- tr['('] = tr[')'] = tr['$'] = tr['/'] = tr['\\'] = W_DELIM;
- }
- for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
- #ifdef DEBUG
- for(i=1; i<p; i++) printf("%s\n", patt[i]); fflush(stdout);
- printf("\n\n");
- #endif
- num_pat = p-1;
- p_size = MAXPAT;
- for(i=1 ; i <= num_pat; i++) {
- p = strlen(patt[i]);
- pat_len[i] = p;
- if(p!=0 && p < p_size) p_size = p;
- }
- if(p_size == 0) {
- fprintf(stderr, "%s: the pattern file is empty\n");
- exit(2);
- }
- if(length > 400 && p_size > 2) LONG = 1;
- if(p_size == 1) SHORT = 1;
- for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 2;
- for(i=0; i<MAXHASH; i++) {
- HASH[i] = 0;
- }
- for(i=1; i<= num_pat; i++) f_prep(i, patt[i]);
- }
-
-
- mgrep(fd)
- int fd;
- {
- register char r_newline = '\n';
- unsigned char text[2*BLOCKSIZE+MAXLINE];
- register int buf_end, num_read, i=0, j, start, end, residue = 0;
-
- text[MAXLINE-1] = '\n'; /* initial case */
- start = MAXLINE-1;
-
- while( (num_read = read(fd, text+MAXLINE, BLOCKSIZE)) > 0)
- {
- if(INVERSE && COUNT) countline(text+MAXLINE, num_read);
- buf_end = end = MAXLINE + num_read -1 ;
- while(text[end] != r_newline && end > MAXLINE) end--;
- residue = buf_end - end + 1 ;
- text[start-1] = r_newline;
- if(SHORT) m_short(text, start, end);
- else monkey1(text, start, end);
- if(FILENAMEONLY && num_of_matched) {
- printf("%s\n", CurrentFileName);
- return;
- }
- start = MAXLINE - residue;
- if(start < 0) {
- start = 1;
- }
- strncpy(text+start, text+end, residue);
- } /* end of while(num_read = ... */
- text[MAXLINE] = '\n';
- text[start-1] = '\n';
- if(residue > 1) {
- if(SHORT) m_short(text, start, end);
- else monkey1(text, start, end);
- }
- return;
- } /* end mgrep */
-
- countline(text, len)
- unsigned char *text; int len;
- {
- int i;
- for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
- }
-
-
- monkey1( text, start, end )
- int start, end; register unsigned char *text;
- {
- register unsigned char *textend;
- register unsigned hash, i;
- register unsigned char shift;
- register int m1, j, Long=LONG;
- int pat_index, m=p_size;
- int MATCHED=0;
- register unsigned char *qx;
- register struct pat_list *p;
- unsigned char *lastout;
- int OUT=0;
-
- textend = text + end;
- m1 = m - 1;
- lastout = text+start+1;
- text = text + start + m1 ;
- while (text <= textend) {
- hash=tr1[*text];
- hash=(hash<<4)+(tr1[*(text-1)]);
- if(Long) hash=(hash<<4)+(tr1[*(text-2)]);
- shift = SHIFT1[hash];
- if(shift == 0) {
- hash=0;
- for(i=0;i<=m1;i++) {
- hash=(hash<<4)+(tr1[*(text-i)]);
- }
- hash=hash&mm;
- p = HASH[hash];
- while(p != 0) {
- pat_index = p->index;
- p = p->next;
- qx = text-m1;
- j = 0;
- while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
- if (j > m1 ) {
- if(pat_len[pat_index] <= j) {
- if(text > textend) return;
- num_of_matched++;
- if(FILENAMEONLY || SILENT) return;
- MATCHED=1;
- if(COUNT) {
- while (*text != '\n') text++;
- }
- else {
- if(!INVERSE) {
- if(FNAME) printf("%s: ",CurrentFileName);
- while(*(--text) != '\n');
- while(*(++text) != '\n') putchar(*text);
- printf("\n");
- }
- else {
- if(FNAME) printf("%s: ",CurrentFileName);
- while(*(--text) != '\n');
- if(lastout < text) OUT=1;
- while(lastout < text) putchar(*lastout++);
- if(OUT) {
- putchar('\n');
- OUT=0;
- }
- while(*(++text) != '\n');
- lastout=text+1;
- }
- }
- /*
- else {
- if(FNAME) printf("%s: ",CurrentFileName);
- while(*(--text) != '\n');
- while(*(++text) != '\n') putchar(*text);
- printf("\n");
- }
- */
- }
- }
- if(MATCHED) break;
- }
- if(!MATCHED) shift = 1;
- else {
- MATCHED = 0;
- shift = m1;
- }
- }
- text = text + shift;
- }
- if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
- }
-
- m_short( text, start, end )
- int start, end; register unsigned char *text;
- {
- register unsigned char *textend;
- register unsigned i;
- register int j;
- register struct pat_list *p, *q;
- register int pat_index;
- int MATCHED=0;
- int OUT=0;
- unsigned char *lastout;
- unsigned char *qx;
-
- textend = text + end;
- lastout = text + start + 1;
- text = text + start - 1 ;
- while (++text <= textend) {
- p = HASH[*text];
- while(p != 0) {
- pat_index = p->index;
- p = p->next;
- qx = text;
- j = 0;
- while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
- if(pat_len[pat_index] <= j) {
- if(text >= textend) return;
- num_of_matched++;
- if(FILENAMEONLY || SILENT) return;
- if(COUNT) {
- while (*text != '\n') text++;
- }
- else {
- if(FNAME) printf("%s: ",CurrentFileName);
- if(!INVERSE) {
- while(*(--text) != '\n');
- while(*(++text) != '\n') putchar(*text);
- printf("\n");
- MATCHED = 1;
- }
- else {
- while(*(--text) != '\n');
- if(lastout < text) OUT=1;
- while(lastout < text) putchar(*lastout++);
- if(OUT) {
- putchar('\n');
- OUT=0;
- }
- while(*(++text) != '\n');
- lastout=text+1;
- MATCHED = 1;
- }
- }
- }
- if(MATCHED) break;
- }
- MATCHED = 0;
- } /* while */
- if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
- }
-
- f_prep(pat_index, Pattern)
- unsigned char *Pattern ; int pat_index;
- {
- int i, j, m;
- register unsigned hash, Mask=15;
- m = p_size;
- for (i = m-1; i>=(1+LONG); i--) {
- hash = (Pattern[i] & Mask);
- hash = (hash << 4) + (Pattern[i-1]& Mask);
- if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask);
- if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
- }
- if(SHORT) Mask = 255; /* 011111111 */
- hash = 0;
- for(i = m-1; i>=0; i--) {
- hash = (hash << 4) + (tr[Pattern[i]]&Mask);
- }
- /*
- if(INVERSE) hash = Pattern[1];
- */
- hash=hash&mm;
- qt = (struct pat_list *) malloc(sizeof(struct pat_list));
- qt->index = pat_index;
- pt = HASH[hash];
- qt->next = pt;
- HASH[hash] = qt;
- }
-
-
-