home *** CD-ROM | disk | FTP | other *** search
- /*
- * mult.c
- * dennis bednar 08 08 85 Original creation.
- * dennis bednar 01 09 86 Added -F flag, added debug flag.
- * report bugs/suggestions etc. to dennis@rlgvax.uucp
- *
- * mult read the input (stdin or file(s)), comparing adjacent lines.
- * In the normal case, the second, and succeeding copies of repeated
- * lines are output to stdout.
- * Note that repeated lines must be adjacent, see sort(1).
- * This tool is sort of the opposite of uniq.
- *
- * -fn = use field number n in each line for the comparison, n = 1 = first.
- * Note - in the 2 lines " abc def" and "abc def", "abc" is field # 1,
- * and "def" is field number 2, multiple white space chars are field separators.
- *
- * -a = output 1st of multiple occurences
- * Note - this flag is very useful in conjunction with -fn flag.
- * Example: trying to find all include files which are in multiple dirs:
- * with input sorted by 1st column:
- stdio.h /usr/include
- stdio.h /tmp/junk
- * we would use both "-f1 -a" flags to print only those lines in which
- * include files were in more than one directory, but not outputing
- * those lines in which include files were in only one directory.
- *
- */
-
- #include <stdio.h>
-
- char *cmd; /* in case of error */
- int aflag; /* 1 if -a */
- int dflag; /* 1 if -d debug */
- int fflag; /* 1 if -fn */
- char Fflag = '\0'; /* field separator, 0 = white space, else one char */
- int fieldnum; /* value of # in -f# option, valid if fflag == 1 */
-
- extern char *u_errmesg();
-
- /* f/w ref */
- char *find_field();
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- int i;
- FILE *infp;
-
-
- cmd = argv[0];
-
- /* loop thru args, stopping at end of args or first file name */
- for (i = 1; i < argc; ++i)
- {
- if (argv[i][0] != '-')
- break; /* found first non-option, ie 1st filename */
- if (strcmp(argv[i], "-a") == 0)
- {
- aflag=1;
- continue;
- }
-
- /* get debug flag */
- if (strcmp(argv[i], "-d") == 0)
- {
- ++dflag; /* enable debugging */
- printf("Debugging on\n");
- continue; /* goto next argument */
- }
-
- /* get field number */
- if (strncmp(argv[i], "-f", 2) == 0)
- {
- if (fflag)
- goto usage; /* only one -fn allowed */
- fflag = 1;
- if (argv[i][2] == '\0')
- goto usage;
- fieldnum = atoi(argv[i]+2);
- if (fieldnum <= 0)
- {
- fprintf(stderr, "%s: 'field number' must be positive\n", cmd);
- goto usage;
- }
- continue;
- }
-
- /* get field separator character */
- if (strncmp(argv[i], "-F", 2) == 0)
- {
- if (Fflag)
- goto usage; /* only one -Fc allowed */
- Fflag = argv[i][2]; /* save field separator char */
- if (argv[i][2] == '\0')
- goto usage; /* no field separator */
- continue;
- }
- usage:
- fprintf(stderr, "usage: %s [-a] [-d] [-fn] [-Fc] [file ...]\n", cmd);
- fprintf(stderr, " outputs 2nd, 3rd, ... of multiple lines\n");
- fprintf(stderr, " -a = also output 1st one of multiple lines\n");
- fprintf(stderr, " -d = debug\n");
- fprintf(stderr, " -fn = use field number n to compare instead of line, 1=1st field,\n");
- fprintf(stderr, " with white space as field separator\n");
- fprintf(stderr, " -Fc = means use character 'c' as the field separator\n");
- exit(1);
- }
-
- if (i == argc) /* no file names given */
- mult(stdin); /* so read from stdin */
- else
- for ( ;i < argc; ++i) /* use given file names */
- {
- infp = fopen( argv[i], "r");
- if (infp == (FILE *)NULL)
- {
- fprintf(stderr, "%s: cant open %s: %s\n", cmd, argv[i], u_errmesg());
- continue;
- }
- mult( infp );
- fclose(infp);
- }
- }
-
- /* save the lines here */
-
- struct t_line
- {
- #define LINESIZE 2048
- char linebuf [ LINESIZE ];
- } line [2];
-
- /* use index for faster copy!! */
- int old = 0; /* index of old line */
- int new = 1; /* index of new line */
-
- /* state flag to help decide actions based on state transitions */
- #define S_START 0
- #define S_UNIQLINE 1 /* saw 1st line or new one different than the old */
- #define S_MULTLINE 2 /* saw new line which is same as the first */
- int state = S_START;
-
- /* address of the first character in each line buffer */
- #define OLDLINE line[old].linebuf
- #define NEWLINE line[new].linebuf
-
- mult( infp )
- FILE *infp;
- {
- int isdiff; /* 1 iff old line != new line */
-
- /* keep reading lines until eof */
- while (1)
- {
-
- /* this is not very efficient, but its the only way
- * I could think of, otherwise main() gets ugly.
- */
-
- /* read in next line from input */
- if (fgets(NEWLINE, LINESIZE, infp) == NULL)
- return; /* EOF - no state transition */
-
- stripnl(NEWLINE); /* remove ending newline from string */
-
- /* first time mult() is called, we must save the 1st line
- * read as the 'oldline' for comparing against future 'newline's
- */
- if (state == S_START)
- {
- swapline(); /* copy new line to old line */
- state = S_UNIQLINE;
- continue; /* get next line */
- }
-
- /* compare the old vs new line, since needed in both states */
- /* compute it once to make code more efficient */
-
- #define DIFF strcmp
- if (fflag) /* compare by field ? */
- /* yes, pass the global fieldnum so that same_field()
- * is kept modular, and reusable in other applications
- */
- isdiff = !same_field(OLDLINE, NEWLINE, fieldnum);
- else /* no compare entire line */
- isdiff = (DIFF(OLDLINE, NEWLINE));
-
-
- if (state == S_UNIQLINE)
- {
- if (isdiff)
- {
- swapline();
- /* stay in same state */
- }
- else
- {
- if (aflag)
- printf("%s\n", OLDLINE);
- printf("%s\n", NEWLINE);
- swapline();
- state = S_MULTLINE;
- }
- }
- else if (state == S_MULTLINE)
- {
- if (isdiff)
- {
- swapline();
- state = S_UNIQLINE;
- }
- else
- {
- printf("%s\n", NEWLINE);
- swapline();
- /* stay in multiple line state */
- }
- }
- }
- }
-
-
- /*
- * swap old line with new line
- * Called after read into new line, so that effect is same as copying
- * newline to old line, and discarding newline.
- */
- swapline()
- {
- register int t; /* temp */
-
- t = old;
- old = new;
- new = t;
- }
-
-
- /*
- * return 1 iff field number 'fieldnum' (1=1st) is same in
- * old line vs. new line.
- */
- same_field(oldline, newline, fieldnum)
- char *oldline,
- *newline;
- int fieldnum;
-
- {
- char *op, /* old field ptr */
- *np; /* new field ptr */
-
- op = find_field(oldline, fieldnum);
- if (dflag) /* debug */
- {
- /* dump out the fields being compared */
- char *cp;
- printf("Old field %d = <", fieldnum);
- if (*op == '\0') /* past last field in line */
- printf("UNDEF");
- else
- for (cp = op; *cp && !field_dlm(*cp); ++cp)
- printf("%c", *cp);
- printf("> ");
- printf("Old line = <%s>\n", oldline);
- }
- np = find_field(newline, fieldnum);
- if (dflag)
- {
- char *cp;
- printf("New field %d = <", fieldnum);
- if (*np == '\0') /* past last field in line */
- printf("UNDEF");
- else
- for (cp = np; *cp && !field_dlm(*cp); ++cp)
- printf("%c", *cp);
- printf("> ");
- printf("New line = <%s>\n", newline);
- }
-
- if (*op == '\0' || *np == '\0') /* is either field non-existent ? */
- return 0; /* assume failed to match */
-
- /* compare fields until either one ends */
- /* a field ends with either a non-zero delimiter or a '\0' char */
- for ( ; *op || *np; ++op, ++np) /* both strings not exhausted */
- {
- /* Important: Please note that field_dlm() checks for '\0' also */
- if (field_dlm(*op) && field_dlm(*np)) /* both reached end */
- return 1; /* hit end of field */
- /* next cmp will handle case when only one field delimiter */
- if (*op != *np) /* cmp both chars in the field */
- return 0; /* failed to match */
- /* both matched, keep going */
- }
-
- /* both strings hit EOS, so matched that way */
- return 1; /* matched */
- }
-
-
- /*
- * return 1 iff a field delimiter such as white space or end of string
- * a null char is always a field delimiter, because the null replaces
- * the last newline after the line has been read in.
- */
- field_dlm(c)
- char c;
- {
- if (c == '\0') /* is it a null at End of String ? */
- return 1; /* yes, return true, because a delimiter */
- if (Fflag) /* field separator defined ? */
- return (c == Fflag); /* yes, see if it matches the one given */
- else /* no, must check for white space */
- return (c == ' ' || c == '\t' || c == '\n');
- }
-
-
- /*
- * return ptr to 'num' nth field, 1 = first field in the buffer.
- * return ptr to '\0' if ask for a field not present
- */
- char *
- find_field (line, num)
- char *line;
- int num;
- {
- char *cp; /* ptr to return */
-
- /* must ask for valid field number */
- if (num < 1)
- return (line+strlen(line)); /* '\0' */
-
- /* beginning of line */
- cp = line;
-
- while ( num-- > 0)
- {
- /* skip poss leading white space */
-
- #define iswhite(c) ( (((c) & 0xff) == '\t') || (((c) & 0xff) == ' ') )
-
- if (Fflag) /* using non-white field delimiter */
- ; /* so first char is field 1 */
- else /* using white space field dlm */
- {
- while (*cp && iswhite(*cp))
- ++cp;
- /* cp is now at '\0' EOS or 1st non-white */
- }
-
-
- /* stop if at beginning of desired field */
- if (num <= 0)
- break;
-
- /* else skip over this symbol to either End of String
- * or next white space , or next delimiter.
- */
- /* now find the last char of this symbol */
- if (Fflag) /* non-white field delimiter */
- {
- while (*cp && !field_dlm(*cp))
- ++cp;
- /* hit '\0' EOS or field delimiter */
- if (*cp) /* fld */
- ++cp; /* so make it point to begin of next field */
- else
- ; /* don't go past end of string !!! */
- }
- else /* white space delimiter */
- {
- while (*cp && !iswhite(*cp))
- ++cp;
- /* cp points to EOS or next white space char */
- }
- }
-
- return cp;
- }
-
-
- /*
- * strip ending new line from string returned by fgets.
- * If not present as last char , then line too long.
- */
- stripnl(s)
- char *s;
- {
- char *cp;
-
- cp = &s[strlen(s) - 1]; /* ptr to last char of string */
- if (*cp == '\n') /* is last char a new line */
- *cp = '\0'; /* yes, remove it */
- else
- {
- fprintf(stderr, "%s: error line <%s>... was too long\n", cmd, s);
- exit(1);
- }
- }
-
- /*
- * return the error message string using errno
- * More flexibility than perror(3).
- */
- char *
- u_errmesg()
- {
- #ifdef unix
- extern int errno;
- extern int sys_nerr;
- extern char *sys_errlist[];
- static char buffer[50];
-
- if (errno < 0 || errno >= sys_nerr)
- {
- sprintf( buffer, "errno %d undefined (%d=max)", errno, sys_nerr);
- return(buffer);
- }
-
- return( sys_errlist[errno] );
- #else
- return ("unknown error");
- #endif
- }
-