home *** CD-ROM | disk | FTP | other *** search
- /*
- * grep.c -- Get Regular Expression and Print
- *
- * To get instructions for use invoke as:
- * grep ?
- *
- * The following was extensively modified for use with BDS C.
- * However, the code which constructs a match pattern and that which
- * checks for a match within a line (i.e the hard part) suffered only
- * minor cosmetic changes.
- *
- * Functionally, the pattern mark ": " was changed to ":_" to avoid an
- * inconvience when using MicroShell.
- *
- * By the way, if you are running CP/M 2.x and are not running MicroShell,
- * you are doing yourself a great dis-service. Among MANY other things,
- * it makes "users" useful. No, I have no financial interest in MicroShell.
- *
- * H.R.Moran, Jr. 2/28/83
- *
- * P.S.
- * If you are not convinced in re Microshell, you will want to uncomment
- * the #include "dio.h" and undefine MICROSHELL to get output redirection
- * internal to grep. Input re-direction doesn't work in spite of what the
- * instructions for use say. I haven't missed it.
- * H.R.M.
- */
-
- /*
- *
- *
- * The information in this document is subject to change
- * without notice and should not be construed as a commitment
- * by Digital Equipment Corporation or by DECUS.
- *
- * Neither Digital Equipment Corporation, DECUS, nor the authors
- * assume any responsibility for the use or reliability of this
- * document or the described software.
- *
- * Copyright (C) 1980, DECUS
- *
- *
- * General permission to copy or modify, but not for profit, is
- * hereby granted, provided that the above copyright notice is
- * included and reference made to the fact that reproduction
- * privileges were granted by DECUS.
- *
- */
-
-
-
- #include <bdscio.h>
- /*
- * #include "dio.h"
- */
- #define MICROSHELL /* don't use dio, Microshell will provide it */
-
-
- #define FILE struct _buf
- #define BOOL int /* Boolean i.e. YES or NO */
- #define PROC int /* PROCedure i.e. function returning no value */
-
- #define YES 1
- #define NO 0
- #define NIL 0 /* pointer to nothing */
-
- /*
- * CP/M Stuff
- */
-
- #define TBUF 0x80 /* Address of the Transient BUFfer */
- #define MAXENTRIES 256 /* max # of file name entries permitted at a time */
- #define NAMSIZ 8
- #define TYPSIZ 3
- #define NAMESIZE (NAMSIZ+TYPSIZ+4) /* chars in a full file name */
- #define FCB struct file_control_block
-
-
- FCB { /* CP/M File Control Block structure */
- char drive;
- char name[NAMSIZ];
- char type[TYPSIZ];
- char extent;
- char s[2];
- char rec_cnt;
- char d[16];
- char reccnt;
- char r[3]; /* I think this is required to use setfcb() */
- };
-
- #define setdisk(d) bdos(14, d) /* select a disk */
- #define srch1st(f) bdos(17, f) /* find 1'st match BDOS call */
- #define srchnxt(f) bdos(18, f) /* find next match BDOS call */
- #define getdisk() bdos(25) /* find current default disk */
- #define setdma(p) bdos(26, p) /* set dma address BDOS call */
- #define getkey() (bios(3) & 0x7f) /* non-echoing getchar() */
-
- /* These definitions from DECUS */
-
- #define EOF (-1)
- #define NULL 0 /* I prefer NIL hrm */
- #define TRUE 1 /* I prefer YES hrm */
- #define FALSE 0 /* I perfer NO hrm */
-
- #define LMAX 512 /* Line Length */
- #define PMAX 256 /* Pattern Length */
-
- /*
- * Pattern Tokens
- */
-
- #define CHAR 1
- #define BOL 2
- #define EOL 3
- #define ANY 4
- #define CLASS 5
- #define NCLASS 6
- #define STAR 7
- #define PLUS 8
- #define MINUS 9
- #define ALPHA 10
- #define DIGIT 11
- #define NALPHA 12
- #define PUNCT 13
- #define RANGE 14
- #define ENDPAT 15
-
- /*
- * GLOBAL Declarations
- */
-
- char *mbufcpy(); /* copy a buffer masking msbit of each byte */
- int strcmp(); /* compare 2 strings */
-
- char *patt_str; /* copy of the argv[] which holds specified pattern */
- char names[MAXENTRIES][NAMESIZE]; /* directory name entry array */
- FCB fcb, *fcb_ptr; /* file control block stuff */
- char curr_disk; /* currently selected default disk */
- int num_entries; /* # of matched file entries in directory */
- int curr_entry; /* current matched entry */
-
- /* These declarations from DECUS */
-
- BOOL cflag; /* count of matching lines flag */
- BOOL fflag; /* file name for matching names flag */
- BOOL nflag; /* line number flag */
- BOOL vflag; /* inVert i.e. show non-matching lines flag */
- BOOL debug; /* print Debugging information */
-
- int nfile;
- char *pp; /* pattern buffer pointer */
- char *file_name;
- char lbuf[LMAX];
- char pbuf[PMAX]; /* Pattern buffer */
-
-
- /*
- * main()
- *
- * Collect the options and call the real worker routines
- */
-
- PROC
- main(argc, argv)
- int argc;
- char *argv[];
- {
- FILE *f, fff;
- BOOL gotpattern, gotcha;
- char *p;
- int c, i;
-
- #ifndef MICROSHELL
- dioinit(&argc, argv);
- #endif
- f = &fff;
- debug = cflag = nflag = vflag = fflag = NO;
- if ( argc <= 1 )
- usage("No arguments");
- if ( argc == 2 && argv[1][0] == '?' && argv[1][1] == 0 ) {
- help();
- exit(1);
- }
- nfile = argc-1; /* assume they are all file specifications for now */
- gotpattern = NO;
- for ( i = 1; i < argc; ++i ) {
- p = argv[i];
- if ( *p == '-' ) {
- ++p;
- while ( c = *p++ ) {
- switch ( tolower(c ) ) {
- case '?':
- help();
- break;
-
- case 'c':
- cflag = YES;
- break;
-
- case 'd':
- debug = YES;
- break;
-
- case 'f':
- fflag = YES;
- break;
-
- case 'n':
- nflag = YES;
- break;
-
- case 'v':
- vflag = YES;
- break;
-
- default:
- usage("Unknown flag");
- }
- }
- argv[i] = NIL;
- --nfile;
- }
- else if ( ! gotpattern ) {
- patt_str = p;
- compile(p);
- argv[i] = NIL;
- gotpattern = YES;
- --nfile;
- }
- }
- if ( ! gotpattern )
- usage("No pattern");
- fflag ^= ( nfile > 0 );
- for ( i = 1; i < argc; ++i ) {
- if ( p = argv[i] ) {
- if ( fwild(p) == 0 )
- printf("\nNo file: <%s>", p);
- else {
- for ( gotcha = NO; (file_name = fnext(f)); gotcha = YES )
- grep(f, file_name);
- if ( ! gotcha )
- cant(p);
- }
- }
- }
- #ifndef MICROSHELL
- dioflush();
- #endif
- }
-
- /*
- * compile()
- *
- * Compile the pattern into global pbuf[]
- */
-
- PROC
- compile(source)
- char *source; /* Pattern to compile */
- {
- char *cclass(); /* Compile class routine */
-
- char *s; /* Source string pointer */
- char *lp; /* Last pattern pointer */
- int c; /* Current character */
- int o; /* Temp */
- char *spp; /* Save beginning of pattern */
-
- s = source;
- if ( debug )
- printf("Pattern = \"%s\"\n", s);
- pp = pbuf;
- while ( c = *s++ ) {
- /*
- * STAR, PLUS and MINUS are special.
- */
- if ( c == '*' || c == '+' || c == '-' ) {
- if ( pp == pbuf || (o=pp[-1]) == BOL ||
- o == EOL || o == STAR ||
- o == PLUS || o == MINUS)
- badpat("Illegal occurrance op.", source, s);
- store(ENDPAT);
- store(ENDPAT);
- spp = pp; /* Save pattern end */
- while ( --pp > lp ) /* Move pattern down */
- *pp = pp[-1]; /* one byte */
- *pp = (c == '*') ? STAR :
- (c == '-') ? MINUS : PLUS;
- pp = spp; /* Restore pattern end */
- continue;
- }
- /*
- * All the rest.
- */
- lp = pp; /* Remember start */
- switch ( c ) {
-
- case '^':
- store(BOL);
- break;
-
- case '$':
- store(EOL);
- break;
-
- case '.':
- store(ANY);
- break;
-
- case '[':
- s = cclass(source, s);
- break;
-
- case ':':
- if ( *s ) {
- c = *s++;
- switch ( tolower(c) ) {
-
- case 'a':
- store(ALPHA);
- break;
-
- case 'd':
- store(DIGIT);
- break;
-
- case 'n':
- store(NALPHA);
- break;
-
- case '_':
- store(PUNCT);
- break;
-
- default:
- badpat("Unknown : type", source, s);
-
- }
- break;
- }
- else
- badpat("No : type", source, s);
-
- case '\\':
- if ( *s )
- c = *s++;
-
- default:
- store(CHAR);
- store(tolower(c));
- }
- }
- store(ENDPAT);
- store(0); /* Terminate string */
- if ( debug ) {
- for ( lp = pbuf; lp < pp; ) {
- if ( (c = (*lp++ & 0xff) ) < ' ' || c > 0x7f )
- printf("\\%02x ", c);
- else
- printf("%c ", c);
- }
- printf("\n");
- }
- }
-
-
- /*
- * grep()
- *
- * Scan the file for the pattern in pbuf[]
- */
-
- PROC
- grep(fp, fn)
- FILE *fp; /* File to process */
- char *fn; /* File name (for -f option) */
- {
- BOOL m;
- int lno, count;
-
- lno = count = 0;
- while ( fgetss(lbuf, LMAX, fp) ) {
- ++lno;
- if ( kbhit() && (getkey() == '\3') ) /* CTRL-C abort ? */
- exit(1);
- m = match();
- if ( (m && ! vflag) || (! m && vflag) ) {
- ++count;
- if ( ! cflag ) {
- if ( fflag && fn ) {
- file(fn);
- fn = NIL;
- }
- if ( nflag )
- printf("%4d ", lno);
- if ( lbuf[strlen(lbuf) - 1] == '\n' )
- printf("%s", lbuf);
- else
- printf("%s\n", lbuf);
- }
- }
- }
- if ( cflag ) {
- if ( fflag && fn )
- file(fn);
- printf("%d\n", count);
- }
- }
-
- /*
- * cclass()
- *
- * Compile a class (within [])
- */
-
- char *
- cclass(source, src)
- char *source; /* Pattern start -- for error msg. */
- char *src; /* Class start */
- {
- char *s; /* Source pointer */
- char *cp; /* Pattern start */
- int c; /* Current character */
- int o; /* Temp */
-
- s = src;
- o = CLASS;
- if ( *s == '^' ) {
- ++s;
- o = NCLASS;
- }
- store(o);
- cp = pp;
- store(0); /* Byte count */
- while ( (c = *s++) && c != ']' ) {
- if ( c == '\\' ) { /* Store quoted char */
- if ( (c = *s++) == '\0' ) /* Gotta get something */
- badpat("Class terminates badly", source, s);
- else
- store(tolower(c));
- }
- else if ( c == '-' && (pp - cp) > 1 && *s != ']' && *s != '\0' ) {
- c = pp[-1]; /* Range start */
- pp[-1] = RANGE; /* Range signal */
- store(c); /* Re-store start */
- c = *s++; /* Get end char and */
- store(tolower(c)); /* Store it */
- }
- else {
- store(tolower(c)); /* Store normal char */
- }
- }
- if ( c != ']' )
- badpat("Unterminated class", source, s);
- if ( (c = (pp - cp)) >= 256 )
- badpat("Class too large", source, s);
- if ( c == 0 )
- badpat("Empty class", source, s);
- *cp = c;
- return (s);
- }
-
- /*
- * store()
- *
- * append sub-pattern to pattern if there is room
- */
-
- PROC
- store(op)
- char op;
- {
- if ( pp >= &pbuf[PMAX] )
- error("Pattern too complex\n");
- *pp++ = op;
- }
-
-
- /*
- * badpat()
- *
- * Identify a problem with the search pattern
- */
-
- PROC
- badpat(message, source, stop)
- char *message; /* Error message */
- char *source; /* Pattern start */
- char *stop; /* Pattern end */
- {
- int c;
-
- printf("-GREP-E-%s, pattern is\"%s\"\n", message, source);
- printf("-GREP-E-Stopped at byte %d, '%c'\n", stop-source, stop[-1]);
- error("?GREP-E-Bad pattern\n");
- }
-
- /*
- * match()
- *
- * Match the current line (in lbuf[]), return YES if it does.
- */
-
- BOOL
- match()
- {
- char *pmatch();
-
- char *l; /* Line pointer */
-
- for ( l = lbuf; *l; l++ ) {
- if ( pmatch(l, pbuf) != NIL )
- return (YES);
- }
- return (NO);
- }
-
-
- /*
- * attempt to match a pattern with a line of text
- * if sucessful, return a pointer to the match
- * else return NIL
- *
- * Uses RECURSION
- */
-
- char *
- pmatch(line, pattern)
- char *line; /* (partial) line to match */
- char *pattern; /* (partial) pattern to match */
- {
- char *l; /* Current line pointer */
- char *p; /* Current pattern pointer */
- char c; /* Current character */
- char *e; /* End for STAR and PLUS match */
- int op; /* Pattern operation */
- int n; /* Class counter */
- char *are; /* Start of STAR match */
-
- l = line;
- if ( debug )
- printf("pmatch(\"%s\")\n", line);
- p = pattern;
- while ( (op = *p++) != ENDPAT ) {
- if ( debug )
- printf("byte[%d] = %02x, '%c', op = %02x\n", l-line, *l, *l, op);
- switch ( op ) {
- case CHAR:
- if ( tolower(*l++) != *p++ )
- return (NIL);
- break;
-
- case BOL:
- if ( l != lbuf )
- return (NIL);
- break;
-
- case EOL:
- if ( *l != '\0' )
- return (NIL);
- break;
-
- case ANY:
- if ( *l++ == '\0' )
- return (NIL);
- break;
-
- case DIGIT:
- c = *l++;
- if ( ! isdigit(c) )
- return (NIL);
- break;
-
- case ALPHA:
- c = tolower(*l++);
- if ( ! islower(c) )
- return (NIL);
- break;
-
- case NALPHA:
- c = tolower(*l++);
- if ( ! islower(c) && ! isdigit(c) )
- return (NIL);
- break;
-
- case PUNCT:
- c = *l++;
- if ( c == '\0' || c > ' ' )
- return (NIL);
- break;
-
- case CLASS:
- case NCLASS:
- c = tolower(*l++);
- n = *p++ & 0xff;
- do {
- if ( *p == RANGE ) {
- p += 3;
- n -= 2;
- if ( c >= p[-2] && c <= p[-1] )
- break;
- }
- else if ( c == *p++ )
- break;
- } while ( --n > 1 );
- if ( (op == CLASS) == (n <= 1) )
- return (NIL);
- if ( op == CLASS )
- p += n - 2;
- break;
-
- case MINUS:
- e = pmatch(l, p); /* Look for a match */
- while ( *p++ != ENDPAT ) /* Skip over pattern */
- ;
- if ( e ) /* Got a match? */
- l = e; /* Yes, update string */
- break; /* Always succeeds */
-
- case PLUS: /* One or more ... */
- if ( (l = pmatch(l, p) ) == 0 )
- return (NIL); /* Gotta have a match */
- case STAR: /* Zero or more ... */
- are = l; /* Remember line start */
- while ( *l && (e = pmatch(l, p)) )
- l = e; /* Get longest match */
- while ( *p++ != ENDPAT ) /* Skip over pattern */
- ;
- while ( l >= are ) { /* Try to match rest */
- if ( e = pmatch(l, p) )
- return (e);
- --l; /* Nope, try earlier */
- }
- return (NIL); /* Nothing else worked */
-
- default:
- printf("Bad op code %d\n", op);
- error("Cannot happen -- match\n");
- }
- }
- return (l);
- }
-
- /*
- * file()
- *
- * Identify the file under consideration
- */
-
- PROC
- file(s)
- char *s;
- {
- printf("File %s: Pattern \"%s\"\n", s, patt_str);
- }
-
- /*
- * cant()
- *
- * Identify the offending file specification
- */
-
- PROC
- cant(s)
- char *s;
- {
- printf("can't find any files matching <%s>\n", s);
- exit(1);
- }
-
- /*
- * fwild()
- *
- * Construct an array of file names which match
- * the wild card file specification in global array names[]
- * return the number of entries made in names[]
- */
-
- fwild(wildname)
- char *wildname;
- {
- int i, j, k; /* general purpose indices */
-
- num_entries = 0;
- setfcb(&fcb, wildname);
- fcb.extent = '\0'; /* match only first extent */
- fcb.s[0] = fcb.s[1] = '\0'; /* required by some versions of CP/M */
- curr_disk = getdisk();
- if ( ! fcb.drive ) /* no explicit drive spec, make it explicit */
- fcb.drive = curr_disk + 1;
- setdisk(fcb.drive-1);
- setdma(TBUF); /* set DMA address to tbuff */
- for ( i = srch1st(&fcb); i != 255; i = srchnxt(&fcb),++num_entries ) {
- if ( num_entries >= MAXENTRIES )
- break;
- fcb_ptr = (TBUF + 32*(i & 3));
- fcb2nam(names[num_entries], fcb_ptr, fcb.drive);
- }
- if ( i != 255 )
- printf("\nToo many files, only %d will be processed", MAXENTRIES);
- if ( num_entries > 1 )
- qsort(names, num_entries, NAMESIZE, &strcmp);
- curr_entry = -1; /* will be incremented BEFORE use */
- if ( debug ) {
- printf("\nDebug of fwild().. it found:\n");
- for ( i = 0; i < num_entries; ++i )
- printf("\n<%s>", names[i]);
- printf("\n");
- }
- setdisk(curr_disk); /* restore default disk drive */
- return (num_entries);
- }
-
- /*
- * fnext()
- *
- * close current file -- if there is one open
- * open next one
- * and return a pointer to its name
- */
-
- char *
- fnext(f)
- FILE *f;
- {
- if ( curr_entry >= 0 )
- fclose(f);
- if ( ++curr_entry < num_entries ) {
- if ( fopen(&names[curr_entry], f) < 0 )
- return (NIL);
- return (&names[curr_entry]);
- }
- return (NIL);
- }
-
- /*
- * error()
- *
- * print a message and die
- */
-
- PROC
- error(s)
- {
- puts(s);
- exit(1);
- }
-
- /*
- * fgetss()
- *
- * fgets() with a max size of buffer
- * ignore buffer size issue for now (hrm)
- */
-
- char *
- fgetss(buf, bufsiz, f)
- char *buf;
- int bufsiz;
- FILE *f;
- {
-
- return (fgets(buf, f));
- }
-
- /*
- * fcb2nam()
- *
- * extract the file name from fcb f and put it in string n
- * pre-pend a drive specifier
- */
-
- PROC
- fcb2nam(n, f, d)
- char *n; /* name */
- FCB *f; /* file control block */
- char d; /* drive */
- {
- char c;
- int i;
-
- *n++ = d + '@';
- *n++ = ':';
- for ( i = 0; i < NAMSIZ; ++i, ++n )
- if ( (c = f->name[i]) == ' ' )
- break;
- else
- *n = c;
- *n++ = '.';
- for ( i = 0; i < TYPSIZ; ++i, ++n )
- if ( (c = (f->type[i] & 0x7f)) == ' ' )
- break;
- else
- *n = c;
- *n = '\0';
- }
-
- /*
- * usage()
- *
- * Give an error message and "how things should be invoked"
- */
-
- PROC
- usage(s)
- char *s;
- {
- printf("?GREP-E-%s\n", s);
- printf("Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
- exit(1);
- }
-
-
- /*
- * help()
- *
- * Show instructions for use
- */
-
- PROC
- help()
- {
-
- pscrt("grep searches a file for a given pattern. Execute by\n");
- pscrt("\tgrep [flags] regular_expression file_list\n\n");
- pscrt("Flags are single characters preceeded by '-':\n");
- pscrt("\t-c\tOnly a count of matching lines is printed\n");
- pscrt("\t-f\tPrint file name for matching lines switch, see below\n");
- pscrt("\t-n\tEach line is preceeded by its line number\n");
- pscrt("\t-v\tOnly print non-matching lines\n\n");
- pscrt("The file_list is a list of files");
- pscrt(" (wildcards are acceptable).\n");
- pscrt("If no files are given, input comes from the terminal.");
- pscrt(" There is no prompting.\n");
- pscrt("The file name is normally printed if there is a file given.\n");
- pscrt("The -f flag reverses this action");
- pscrt(" (print name no file, not if more).\n\n");
- pscrt("The regular_expression defines the pattern to search for.");
- pscrt(" Upper- and\n");
- pscrt("lower-case are always ignored. Blank lines never match.");
- pscrt(" The expression\n");
- pscrt("should be quoted to make it a single argv[].\n");
- pscrt("x\tAn ordinary character (not mentioned below)");
- pscrt(" matches that character.\n");
- pscrt("'\\'\tThe backslash quotes any character.");
- pscrt(" \"\\$\" matches a dollar-sign.\n");
- pscrt("'^'\tA circumflex at the beginning of an expression\n");
- pscrt("\tmatches the beginning of a line.\n");
- pscrt("'$'\tA dollar-sign at the end of an expression matches the");
- pscrt(" end of a line.\n");
- pscrt("'.'\tA period matches any character except \"new-line\".\n\n");
- pscrt("\tA colon matches a class of characters described below\n\n");
- pscrt("\t\t\":a\"\tmatches any alphabetic\n");
- pscrt("\t\t\":d\"\tmatches digits,\n");
- pscrt("\t\t\":n\"\tmatches alphanumerics\n");
- pscrt("\t\t\":_\"\tmatches spaces, tabs, and\n");
- pscrt("\t\t\tother control characters, such as new-line.\n");
- pscrt("'*'\tAn expression followed by an asterisk matches zero or\n");
- pscrt("\tmore occurrances of that expression:\n\n");
- pscrt("\t\t\"fo*\" matches \"f\", \"fo\" \"foo\", etc.\n\n");
- pscrt("'+'\tAn expression followed by a plus sign matches one\n");
- pscrt("\tor more occurrances of that expression:\n\n");
- pscrt("\t\t\"fo+\" matches \"fo\", etc.\n\n");
- pscrt("'-'\tAn expression followed by a minus sign optionally\n");
- pscrt("\tmatches the expression.\n");
- pscrt("'[]'\tA string enclosed in square brackets matches any\n");
- pscrt("\t\tcharacter in that string, but no others.\n\n");
- pscrt("\tIf the first character in the string is a circumflex,\n");
- pscrt("\tthe expression matches any character except \"new-line\"\n");
- pscrt("\tand the characters in the string. For example, \"[xyz]\"\n");
- pscrt("\tmatches \"xx\" and \"zyx\", while \"[^xyz]\"\n");
- pscrt("\tmatches \"abc\" but not \"axb\".\n");
- pscrt("\tA range of characters may be specified by two characters\n");
- pscrt("\tseperated by \"-\".\n");
- pscrt("\tNote that,");
- pscrt(" [a-z] matches alphabetics, while [z-a] never matches.\n\n");
- pscrt("The concatenation of regular expressions");
- pscrt(" is a regular expression.\n");
- }
-
-
- /*
- * put a string to the crt (really standard out)
- * count the newlines. If CRT size is reached, prompt for continuation
- * then continue.
- */
-
- #define MAXCRT 22
-
- PROC
- pscrt(s)
- char *s;
- {
- int c;
- char *count;
-
- count = "\0"; /* This is a fakeout to achieve STATIC */
- for ( ; *s; ++s ) {
- putchar(*s);
- if ( *s == '\n' && ++*count >= MAXCRT ) {
- *count = '\0';
- puts("--STRIKE SPACE BAR TO CONTINUE");
- getkey();
- putchar('\r');
- }
- }
- }
- ny ch