home *** CD-ROM | disk | FTP | other *** search
- /*
- * chop - chop selected fields out of text.
- * G. L. Sicherman. August 7, 1990. February 10, 1991.
- *
- * chop -cLIST [file ...]
- * chop -fLIST [-dchar] [-b] [file ...]
- *
- * Chop writes the specified fields (-f) or columns (-c)
- * to the standard output.
- * Default output field separator is horizontal tab.
- * Default field separator is white space;
- * -dx... specifies characters x... as input field separators
- * and the first x as output field separator.
- * If the input separator is white space, leading white space
- * is ignored unless you specify -b.
- *
- * Unlike cut(1), chop processes *all* input lines as specified,
- * and a line can be as long as you like.
- *
- * Carriage returns, line feeds, and form feeds all act as
- * line separators. Other control characters (other than
- * separators) are treated as ordinary characters in fields.
- * In columns, BS is -1 column, HT jumps to a standard
- * 8-column tab stop.
- *
- * EXIT CODES: 0 on success;
- * 1 on syntax errors;
- * 2 on file access failures.
- */
-
- #include <ctype.h>
- #include <stdio.h>
- #include <string.h>
-
- char *calloc();
-
- #define INFINITY (-1)
- #define DEFOUTSEP '\t'
-
- struct rangetype {
- int first;
- int last;
- };
-
- static int bflag = 0;
- static char *cflag = (char *)NULL;
- static char *dflag = (char *)NULL;
- static char *fflag = (char *)NULL;
- static FILE *Input;
- static int nranges;
- static char *progn;
- static struct rangetype *range;
- static int (*process)();
- static int cprocess(), fprocess();
-
- usage()
- {
- fprintf(stderr, "usage: %s -cLIST [file ...]\n", progn);
- fprintf(stderr, " %s -fLIST [-dCHARS] [-b] [file ...]\n", progn);
- exit(1);
- }
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- progn = *argv;
- while (--argc) {
- if ('-'!=**++argv) break;
- switch(*++*argv) {
- case 'b':
- bflag = 1;
- break;
- case 'c':
- if (!*(cflag = ++*argv)) {
- fprintf(stderr,
- "%s: column list required after -c\n", progn);
- exit(1);
- }
- break;
- case 'd':
- if (!*(dflag = ++*argv)) {
- fprintf(stderr,
- "%s: character required after -d\n", progn);
- exit(1);
- }
- break;
- case 'f':
- if (!*(fflag = ++*argv)) {
- fprintf(stderr,
- "%s: field list required after -f\n", progn);
- exit(1);
- }
- break;
- default:
- usage();
- }
- }
- if (!fflag == !cflag) usage();
- if (cflag) {
- process = cprocess;
- setupranges(cflag);
- }
- else {
- process = fprocess;
- setupranges(fflag);
- }
- if (cflag && dflag) fprintf(stderr, "%s: -d option ignored with -c\n",
- progn);
- if (cflag && bflag) fprintf(stderr, "%s: -b option ignored with -c\n",
- progn);
- if (fflag && dflag && bflag) {
- fprintf(stderr, "%s: -b option ignored with -d\n", progn);
- bflag = 0;
- }
- if (argc) while (argc--) {
- Input = fopen(*argv++, "r");
- if (!Input) {
- fprintf(stderr, "%s: cannot read %s\n",
- progn, *--argv);
- exit(2);
- }
- (*process)(Input);
- fclose(Input);
- }
- else (*process)(stdin);
- }
-
- /*
- * chop -c
- */
- static int incol, outcol;
-
- static int
- cprocess(stream)
- FILE *stream;
- {
- int k;
-
- incol = outcol = 0;
- for (;;) {
- switch(k = getc(stream)) {
- case EOF: return;
- case '\b': if (!incol--) incol = 0;
- break;
- case '\t': incol = (incol+8)&~7;
- break;
- case '\n':
- case '\r':
- case '\f': catchup();
- putchar(k);
- incol = outcol = 0;
- break;
- default:
- catchup();
- if (wantit(++outcol)) putchar(k);
- incol++;
- }
- }
- }
-
- catchup()
- {
- while (incol > outcol) if (wantit(++outcol)) putchar(' ');
- while (incol < outcol) if (wantit(outcol--)) putchar('\b');
- }
-
- /*
- * chop -f
- */
-
- static int
- fprocess(stream)
- FILE *stream;
- {
- int field, go;
- int k;
- int later;
- char *seps;
- int want;
-
- field = later = 0;
- seps = dflag? dflag: "\t ";
- for (;;) {
- newfield:
- field++;
- go = 0;
- want = wantit(field);
- if (want && later++) putchar(*seps);
- for (;;) { /* process one field */
- k = getc(stream);
- if (EOF==k) return;
- if (strchr(seps, k)) {
- if (field==1 && !go
- && !dflag && !bflag) continue;
- else break;
- }
- if (strchr("\n\r\f", k)) {
- putchar(k);
- field = later = 0;
- goto newfield; /* i.e., break 2 */
- }
- go = 1;
- if (want) putchar(k);
- }
- /*
- * If white space, skip it.
- */
- if (!dflag) for (;;) {
- k = getc(stream);
- if (EOF==k) {
- putchar(*seps);
- return;
- }
- if (strchr(seps, k)) continue;
- ungetc(k, stream);
- break;
- }
- }
- }
-
- int
- wantit(f)
- int f;
- {
- int r;
-
- for (r=0; r<nranges; r++) {
- if (range[r].last != INFINITY && range[r].last < f) continue;
- if (range[r].first <= f) return 1;
- }
- return 0;
- }
-
- setupranges(list)
- char *list;
- {
- char *cursor;
- /*
- * How many commas?
- */
- nranges = 1;
- for (cursor = list; *cursor; cursor++)
- if (',' == *cursor) nranges++;
- range = (struct rangetype *)calloc(nranges, sizeof(struct rangetype));
- nranges = 0;
- for (cursor = list; cursor; ) {
- onerange(cursor);
- if (cursor = strchr(cursor,',')) cursor++;
- }
- }
-
- onerange(r)
- char *r;
- {
- if (isdigit(*r)) {
- range[nranges].first = atoi(r);
- while (isdigit(*r)) r++;
- if (','==*r || !*r) {
- range[nranges].last = range[nranges].first;
- nranges++;
- return;
- }
- if ('-'!=*r++) {
- fprintf(stderr,
- "%s: bad character '%c' in field list\n",
- progn, *--r);
- exit(1);
- }
- }
- else if ('-'==*r++) {
- range[nranges].first = 1;
- }
- else {
- fprintf(stderr, "%s: bad character '%c' in field list\n",
- progn, *--r);
- exit(1);
- }
- if (','==*r || !*r) {
- range[nranges++].last = INFINITY;
- return;
- }
- if (!isdigit(*r)) {
- fprintf(stderr, "%s: bad character '%c' in field list\n",
- progn, *r);
- exit(1);
- }
- range[nranges++].last = atoi(r);
- while (isdigit(*r)) r++;
- if (*r && ','!=*r) {
- fprintf(stderr, "%s: bad character '%c' in field list\n",
- progn, *r);
- exit(1);
- }
- }
-