home *** CD-ROM | disk | FTP | other *** search
- /* grep.c
- =============================================================
- UNIX grep utility
- =============================================================
- Note: Compiles with Zortech C++ v 2.0 (__ZTC__ defined)
- Guidelines 2.0 + Manx CG65 (MPU6502 defined)
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #include <dos.h>
-
- int main (int argc, char ** argv);
- void file (char * s);
- void cant (char * s);
- void help (char ** hp);
- void Usage (void);
- void compile (char * source);
- char * cclass (char * source,char * src);
- void store (int op);
- void badpat (char * message, char * source, char * stop);
- void grep (FILE * fp,char * fn);
- int match (void);
- char * pmatch (char * line, char * pattern);
- void error (char * s);
- char ToLower (char c);
-
- #define LMAX 512 // Size of input line buffer
- #define PMAX 512 // Maximum compiled pattern length
-
- enum { CHAR=1, // Any character
- BOL, // Start of line
- EOL, // End of line
- ANY, // Wildcard character
- CLASS, // Class of characters
- NOTCLASS, // Not Class
- ZERO_OR_MORE,
- ONE_OR_MORE,
- OR,
- ALPHA,
- DIGIT,
- HEXDIGIT,
- QUOTE,
- PUNCT, // Punctuation character ,.; etc.
- NALPHA, // Alphanumeric
- WHITESPACE, // Whitespace
- RANGE, // Range of characters
- ENDPAT
- };
-
- int cflag = 0; // Default. Don't just count lines
- int fflag = 1; // Default. Show filenames
- int nflag = 0; // Default. Don't show line numbers
- int vflag = 0; // Default. Matching lines
- int caseflag = 1; // Default. Case sensitive
-
- int nfile;
-
- char *pp;
- char lbuf[LMAX+2];
- char pbuf[PMAX+2];
-
- #ifdef MONITOR
- #include <prof.hpp>
- Profiler GrepProf("Grep");
- #endif
-
-
- int main(int argc, char **argv)
- {
- register char * p, *cp;
- register int c, i;
- int gotpattern;
- FILE * f;
- struct FIND * finfo;
- char path[64];
-
-
- if (argc <= 2)
- Usage();
-
- nfile = argc-1;
- gotpattern = 0;
-
- for (i=1; i < argc; ++i) {
-
- p = argv[i];
-
- if (*p == '-') {
- ++p;
- while ( (c = *p++) != 0) {
-
- switch(tolower(c)) {
-
- case 'c':
-
- ++cflag;
- break;
-
- case 'f':
-
- ++fflag;
- break;
-
- case 'n':
-
- ++nflag;
- break;
-
- case 'v':
-
- ++vflag;
- break;
-
- case 'y': // -y -> make case insensitive
-
- caseflag = 0;
- break;
-
- default:
-
- printf("Unknown Option : \"-%c\"\n",c);
- exit(1);
- }
- }
- argv[i] = 0;
- --nfile;
-
- } else if (!gotpattern) {
- compile(p);
- argv[i] = 0;
- ++gotpattern;
- --nfile;
- }
- }
- if (!gotpattern)
- error("No pattern");
-
- if (nfile != 0) {
-
- for (i=1; i < argc; ++i) {
-
- if ((p = argv[i]) != 0) {
-
- finfo = findfirst(p, 0);
-
- strcpy(path,p);
- #ifndef MPU6502
- cp = strrchr(path,'\\');
- if (cp == 0)
- #endif
- cp = strchr(path,':');
-
- if (cp == 0)
- cp = path;
- else
- cp++;
-
- while(finfo != 0) {
-
- strcpy(cp,finfo->name); // name = path+name
-
- if ((f=fopen(path, "r")) == 0)
- cant(path);
- else {
- grep(f, path);
- fclose(f);
- }
- finfo = findnext();
- }
- }
- }
- }
- exit(0);
- }
-
-
-
-
- void file(char * s)
- {
- printf("File : %-16s ", s);
- }
-
-
- void cant(char * s)
- {
- printf("Can't open \"%s\"\n", s);
- }
-
-
-
- void Usage()
- {
- printf("GREP [-options] reg_expr file(s)\n\n");
- printf(" -c = Show # of matches only. -n = Show line numbers\n");
- printf(" -f = Don't show filenames -v = Show only nonmatching lines\n");
- printf(" -y = Ignore case\n\n");
-
- exit(1);
- }
-
- /* -----------------------------------------------
- Compile the pattern 'source' into global pbuf[]
- -----------------------------------------------
- */
-
- void compile(char * source)
- {
- register char * s; // Source string pointer
- register char * lp; // Last pattern pointer
- register int c; // Current character
- int o; // Temp
- char * spp; // Save beginning of pattern
-
- s = source;
-
- pp = pbuf;
- lp = pp;
-
- while ((c = *s++) != 0) { // ZERO_OR_MORE, ONE_OR_MORE and OR are special
-
- if (c == '*' || c == '+' || c == '-') {
-
- if (pp==pbuf || (o=pp[-1])==BOL || o==EOL || o==ZERO_OR_MORE || o==ONE_OR_MORE || o==OR)
- badpat("Illegal op ", source, s);
-
- store(ENDPAT);
- store(ENDPAT);
- spp = pp; // Save pattern end
-
- while (--pp > lp) // Move pattern down
- *pp = pp[-1]; // one byte
-
- *pp = (c == '*') ? ZERO_OR_MORE : (c == '-') ? OR : ONE_OR_MORE;
-
- pp = spp; // Restore pattern end
- continue;
- }
-
- // All the rest.
-
- lp = pp; // Remember start
-
- switch(c) {
-
- case '^':
-
- store(BOL);
- break;
-
- case '$':
-
- store(EOL);
- break;
-
- case '.':
-
- store(ANY);
- break;
-
- case '[':
-
- s = cclass(source, s);
- break;
-
- case ':':
-
- if (*s) {
- c = *s++;
- switch(tolower(c)) {
- case 'a':
- store(ALPHA);
- break;
- case 'd':
- store(DIGIT);
- break;
- case 'n':
- store(NALPHA);
- break;
- case 'b':
- store(WHITESPACE);
- break;
- case 'x':
- store(HEXDIGIT);
- break;
- case 'p':
- store(PUNCT);
- break;
- case 'q':
- store(QUOTE);
- break;
- default:
- badpat("Unknown predefined class", source, s);
- }
- break;
- }
- else
- badpat("':' needs class", source, s);
-
- case '\\':
-
- if (*s)
- c = *s++;
-
- default:
-
- store(CHAR);
- store(ToLower(c));
- }
- }
- store(ENDPAT);
- store(0); // Terminate string
-
- }
-
-
- /* ------------------------------------------
- Compile class within []
-
- source = Pattern start - for error message
- src = Class start
- ------------------------------------------
- */
-
- char * cclass(char * source, char * src)
- {
- register char * s; // Source pointer
- register char * cp; // Pattern start
- register int c; // Current character
- int o; // Temp
-
- s = src;
- o = CLASS;
-
- if (*s == '^') {
- ++s;
- o = NOTCLASS;
- }
-
- store(o);
- cp = pp;
- store(0); // Byte count
-
- while (((c = *s++) != 0) && c!=']') {
-
- if (c == '\\') { // Store quoted char
- if ((c = *s++) == '\0') // Gotta get something
- badpat("Bad terminator", source, s);
- else
- store(ToLower(c));
- }
- else if (c == '-' && (pp - cp) > 1 && *s != ']' && *s != '\0') {
- c = pp[-1]; // Range start
- pp[-1] = RANGE; // Range signal
- store(c); // Re-store start
- c = *s++; // Get end char and
- store(ToLower(c)); // Store it
- }
- else {
- store(ToLower(c)); // Store normal char
- }
- }
-
- if (c != ']')
- badpat("Need ']'", source, s);
-
- if ((c = (pp - cp)) >= 256)
- badpat("Class too large", source, s);
-
- if (c == 0)
- badpat("Empty class", source, s);
-
- *cp = c;
- return(s);
- }
-
-
- void store(int op)
- {
- if (pp >= &pbuf[PMAX])
- error("Pattern too complex\n");
- *pp++ = op;
- }
-
-
- /* ------------------------------------
- message = error message
- source = start of offending pattern
- stop = end of offending pattern
- ------------------------------------
- */
-
- void badpat(char * message, char * source, char * stop)
- {
- printf("\"%s\" - Bad Pattern - %s\n",source,message);
-
- for(int i=0; i<stop-source; i++)
- printf(" ");
- printf("^");
- exit(1);
- }
-
-
- /* -------------------------------------
- Scan the file for the pattern in ebuf
- -------------------------------------
- fp = file to process
- fn = File name for -f option
- */
-
- void grep(FILE * fp, char * fn)
- {
- register int lno, count, m;
-
- lno = 0;
- count = 0;
-
- while (fgets(lbuf, LMAX, fp)) {
-
- m = strlen(lbuf) - 1; // Drop CR-LF from end of line
-
- while( m && lbuf[m] == '\x0a' || lbuf[m] == '\x0d') {
- lbuf[m--] = 0;
- }
-
- ++lno;
- m = match();
- if ((m && !vflag) || (!m && vflag)) {
- ++count;
- if (!cflag) {
- if (fflag && fn) {
- file(fn);
- fn = 0;
- printf("\n");
- }
- if (nflag)
- printf("%d\t", lno);
- printf("%s\n", lbuf);
- }
- }
- }
- if (cflag) {
- if (fflag && fn)
- file(fn);
- printf("%u match(es)\n", count);
- }
- }
-
-
- /* -------------------------------------------------------
- Match the current line (in lbuf[]), return 1 if it does
- -------------------------------------------------------
- */
-
- int match()
- {
- register char * l; // Line pointer
-
- for (l = lbuf; *l; l++) {
- if (pmatch(l, pbuf))
- return(1);
- }
- return(0);
- }
-
-
- /* --------------------------------------------------------------
- Match a (partial) pattern in a (partial) line
- --------------------------------------------------------------
- This routine, match() and fgets() account for the bulk of the
- program's execution time
- */
-
- char * pmatch(char * line, char * pattern)
- {
- register char * l; // Current line pointer
- register char * p; // Current pattern pointer
- register char c; // Current character
- char * e; // End for ZERO_OR_MORE and ONE_OR_MORE match
- int op; // Pattern operation
- int n; // Class counter
- char * are; // Start of ZERO_OR_MORE match
-
- l = line;
- p = pattern;
-
- while ((op = *p++) != ENDPAT) {
-
- switch(op) {
-
- case CHAR:
-
- if (ToLower(*l) != *p++)
- return(0);
- l++;
- break;
-
- case BOL:
-
- if (l != lbuf)
- return(0);
- break;
-
- case EOL:
-
- if (*l != '\0')
- return(0);
- break;
-
- case ANY:
-
- if (*l++ == '\0')
- return(0);
- break;
-
- case DIGIT:
-
- if ((c = *l++) < '0' || (c > '9'))
- return(0);
- break;
-
- case HEXDIGIT:
-
- c = *l++;
- if (!isxdigit(c))
- return 0;
- break;
-
- case PUNCT:
-
- c = *l++;
- if (!ispunct(c))
- return 0;
- break;
-
- case QUOTE:
-
- c = *l++;
- if ( !(c == '\"' || c == '\'') )
- return 0;
- break;
-
- case ALPHA:
-
- c = *l++;
- if (!isalpha(c))
- return 0;
- break;
-
- case NALPHA:
-
- c=*l++;
- if (!isalnum(c))
- return(0);
- break;
-
- case WHITESPACE:
-
- c = *l++;
- if (c == 0 || !isspace(c))
- return(0);
- break;
-
- case CLASS:
- case NOTCLASS:
-
- c = ToLower(*l);
- l++;
- n = *p++ & 0xff;
-
- do {
- if (*p == RANGE) {
- p += 3;
- n -= 2;
- if (c >= p[-2] && c <= p[-1])
- break;
- }
- else if (c == *p++)
- break;
- } while (--n > 1);
-
- if ((op == CLASS) == (n <= 1))
- return(0);
-
- if (op == CLASS)
- p += n - 2;
- break;
-
- case OR:
-
- e = pmatch(l, p); // Look for a match
-
- while (*p++ != ENDPAT)
- ; // Skip over pattern
- if (e) // Got a match?
- l = e; // Yes, update string
- break; // Always succeeds
-
- case ONE_OR_MORE: // One or more ...
-
- if ((l = pmatch(l, p)) == 0)
- return(0); // Gotta have a match
-
- case ZERO_OR_MORE: // Zero or more ...
-
- are = l; // Remember line start
-
- while (*l && ((e = pmatch(l, p)) != 0))
- l = e; // Get longest match
-
- while (*p++ != ENDPAT)
- ; // Skip over pattern
-
- while (l >= are) { // Try to match rest
- if ((e = pmatch(l, p)) != 0)
- return(e);
- --l; // Nope, try earlier
- }
- return(0); // Nothing else worked
-
- default:
-
- printf("Bad op\n");
- exit(1);
- }
- }
- return(l);
- }
-
-
- void error(char * s)
- {
- fprintf(stderr, "%s", s);
- exit(1);
- }
-
-
- char ToLower(char c)
- {
- return caseflag ? c : tolower(c);
- }
-
-