home *** CD-ROM | disk | FTP | other *** search
- /* TDS v1.0 - Transliteration, Deletion, and Squeeze filter
- * Copyright 1990 by Edward Lee
- * edlee@chinet.chi.il.us
- *
- * TDS is a fast superset of the UNIX Sys V(tm) TR program.
- *
- * Suggested compilation:
- * MSDOS Turbo C v2.0: tcc -K -O -f- -mt -lt tds.c
- * Sys V: cc -O -s tds.c -o tds
- * Sys V with shared libs: cc -O -s tds.c -o tds -lc_s
- *
- * This program uses a prefix string length indicator to allow ALL characters
- * to be handled. Examine the MYSTR type definition if you do not know what
- * is meant by a prefix string length indicator. This idea is borrowed from
- * Texas Instruments by the author of this program, and it is implemented
- * here in a more general way. As far as I know, the concept previously had
- * no name. The advantage of using a prefix string length indicator is that
- * it requires no special character to terminate a string.
- */
-
- /*
- * PROGRAM HISTORY
- *~01:00-04:20 08Jun1990, Program performs multiple char translation
- * 16:15-18:33 ", Added option switch processing
- * 10:00-11:11 09Jun, Added -c)omplement operation in setup_t()
- * 12:20-12:33 ", Added -s)queeze operation in main()
- * 12:33-13:33 ", Moved -c)omplement operation to its own routine
- * 23:00-23:30 ", Added -d)elete operation in main()
- * 23:30-23:52 ", Tied the various options together
- * 21:31-22:46 10Jun, Changed prefixed string type into a structure
- * 16:50-17:19 11Jun, Parenthesized conditional, corrected off-by-one error
- * 17:19-18:56 ", Induced and incorporated undocumented length rules
- * 18:56-17:42 ", Cleaned up unneeded variables, tested program
- * 01:05-01:20 14Jun, Added code for -ds option combination
- * 02:30-02:42 Optimized -ds code
- * 01:55 17Jun, Added #ifndef ushort, compiled under Turbo C v2.0
- * 15:30-16:56 23Aug, Began to incorporate esch() and range() routines which
- * were developed and tested separately over four days
- * 01:15-01:39 24Aug, Modified range() for variable parameters
- * 01:00-03:06 27Aug, Revised comments, checked code consistency & compromised
- * 15:34-18:32 29Aug, Modifed range() and range2() for MYSTR, added error msgs,
- * compiled, tested, corrected off-by-one error in range2(),
- * 03:17-03:25 01Sep, Corrected a change in getopts() which introduced an
- * indefinite pointer condition.
- * 02:40 25Nov, Corrected the update of s2->i[] in setup_t()
- * 20:34 25Nov, Cleaned up unused variables, unneeded brackets
- * 11:25 21Dec, Test
- *~23:30-23:47 21Dec, Deleted unused function, minor loop optimization, test
- *~12:00-12:45 22Dec, Deleted unnecessary complications in setup_t, test,
- * optimized (delete && squeeze) section, test
- *~21:45-22:07 22Dec, Optimized setup_t(), test
- *~ 11:20 23Dec, Cleanup, test
- * 11:20 26Dec, Replaced char with unchar to prevent crashes on *nix
- * systems; spent two days tracking this down
- * Deleted range() and replaced function with range2()
- * 14:15 26Dec, Final test before release
- */
-
- #include <stdio.h>
-
- #ifndef __TURBOC__
- #include <sys/types.h>
- #endif
-
- #ifndef unchar
- #define unchar unsigned char
- #endif
-
- #ifndef ushort
- #define ushort short unsigned int
- #endif
-
- #define SIZE (32768)
- #define TRUE (1)
- #define FALSE (0)
-
- #define MAXMYSLEN (256)
- typedef struct {
- int len;
- unchar s [MAXMYSLEN];
- ushort i [256]; /* Map of unique characters in {string}, */
- /* a.k.a the i)ntersection of {string} */
- /* with the character set {0...255} */
- } MYSTR;
-
- unchar a[SIZE]; /* Input/Output character buffer */
-
- unchar t[] = { /* Translation table for character codes 0-255 */
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
- 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
- 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
- 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
- 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
- 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
- 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
- 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
- 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
- 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
- 250, 251, 252, 253, 254, 255
- }; /* Translation table for character codes 0-255 */
-
- int delete, invert, squeeze=FALSE; /* Boolean variables */
-
-
- void error(s)
- unchar *s;
- {
- (void)fputs(s, stderr);
- (void)fputs("\n", stderr);
- exit(1);
- } /* error */
-
-
- /* Routine to decode backslash escape codes at run-time */
- int esch(p)
- unchar **p;
- {
- int n;
-
- if (**p != '\\') { /* Backslash escape code? */
- n=(int)**p; /* No */
-
- if (**p=='\000') /* Real end-of-string? */
- return(-1); /* Yes */
-
- ++(*p); /* Some other character */
- return(n);
- }
-
- ++(*p); /* Skip over '\' */
-
- /* Octal escape character */
- if (**p >= '0' && **p <= '7') {
- n = (**p-48);
-
- ++(*p);
-
- if (**p >= '0' && **p <= '7') {
- n = (n<<3) + (**p-48); /* (n<<3) = (n*8) */
-
- ++(*p);
-
- if (**p >= '0' && **p <= '7') {
- n = (n<<3) + (**p-48);
- ++(*p);
- }
- }
-
- if (n>255)
- error ("tds: the maximum octal escape code is \\377");
-
- return(n);
- } /* Octal */
-
- /* Decimal escape character */
- if (**p == 'd' || **p == 'D') {
- ++(*p);
-
- if (**p >= '0' && **p <= '9') {
- n = (**p-48);
-
- ++(*p);
-
- if (**p >= '0' && **p <= '9') {
- n = n*10 + (**p-48);
-
- ++(*p);
-
- if (**p >= '0' && **p <= '9') {
- n = n*10 + (**p-48);
- ++(*p);
- }
- }
-
- if (n>255)
- error ("tds: the maximum decimal escape code is \\d255");
-
- return(n);
- }
- } /* Decimal */
-
- /* Hexadecimal escape character */
- if (**p=='h' || **p=='H' || **p=='x' || **p=='X') {
- ++(*p);
-
- if((**p >= '0' && **p <= '9') ||
- (**p >= 'A' && **p <= 'F') || (**p >= 'a' && **p <= 'f')) {
-
- if (**p <= '9') /* Make ASCII hex digit into an integer */
- n = (**p-48); /* 48 = ((int)'0') */
- else
- if (**p <= 'F')
- n = (**p-55); /* 55 = ((int)'A' - 10) */
- else
- n = (**p-87); /* 87 = ((int)'a' - 10) */
-
- ++(*p);
-
- if((**p >= '0' && **p <= '9') ||
- (**p >= 'A' && **p <= 'F') || (**p >= 'a' && **p <= 'f')) {
-
- if (**p <= '9')
- n = (n<<4) + (**p-48); /* (n<<4) = (n*16) */
- else
- if (**p <= 'F')
- n = (n<<4) + (**p-55);
- else
- n = (n<<4) + (**p-87);
-
- ++(*p);
- }
-
- return(n);
- }
- } /* Hexadecimal */
-
- /* Miscellaneous escape codes */
- if (**p=='a' || **p=='A') { /* A)udible bell */
- ++(*p);
- return(7);
- }
-
- if (**p=='b' || **p=='B') { /* B)ackspace */
- ++(*p);
- return(8);
- }
-
- if (**p=='t' || **p=='T') { /* T)ab */
- ++(*p);
- return(9);
- }
-
- if (**p=='n' || **p=='N') { /* N)ewline, linefeed */
- ++(*p);
- return(10);
- }
-
- if (**p=='v' || **p=='V') { /* V)ertical tab */
- ++(*p);
- return(11);
- }
-
- if (**p=='f' || **p=='F') { /* F)ormfeed */
- ++(*p);
- return(12);
- }
-
- if (**p=='r' || **p=='R') { /* carriage R)eturn */
- ++(*p);
- return(13);
- }
-
- if (**p=='s' || **p=='S') { /* S)pace */
- ++(*p);
- return(32);
- }
-
- if (**p=='\\') { /* backslash */
- ++(*p);
- return(92);
- }
-
- if (**p=='\000') /* real end-of-string */
- return(-1);
-
- n=(int)**p; /* non-escape code */
- ++(*p);
- return(n);
- } /* esch */
-
-
- /* Append a character to a prefix string */
- void pcappend(c, s)
- unchar c;
- MYSTR *s;
- {
- if (s->len >= MAXMYSLEN)
- error ("tds: maximum string length exceeded");
-
- s -> s[s->len] = c;
- s -> len += 1; /* Update length indicator */
- s -> i[(int)c] = TRUE; /* Update intersection map */
- } /* pcappend */
-
-
- void range2(s, d)
- unchar **s;
- MYSTR *d;
- {
- int c1, c2, n;
-
- if (**s=='[')
- ++(*s);
-
- while (**s != ']') {
- c1 = esch (s); /* Get the 1st character */
-
- if (c1 < 0)
- error ("tds: was expecting a character map in string2, e.g. [a-zr*5]");
-
- if (**s=='-') {
- ++(*s);
-
- c2 = esch (s); /* Get the 2nd character */
- if (c2 < 0)
- error ("tds: was expecting end of character range after '-', e.g. [a-zZ-A], in string2");
-
- if (c1 < c2) /* Does the range ascend or descend? */
- while (c1 <= c2) /* It ascends */
- pcappend (c1++, d); /* Fill up the destination buffer */
- else
- while (c1 >= c2) /* It descends */
- pcappend (c1--, d); /* Fill up the destination buffer */
- } else
- if (**s=='*') {
- ++(*s);
-
- n=(-1);
-
- if (**s >= '0' && **s <= '9') {
- n = (**s-48);
-
- ++(*s);
-
- if (**s >= '0' && **s <= '9') {
- n = n*10 + (**s-48);
-
- ++(*s);
-
- if (**s >= '0' && **s <= '9') {
- n = n*10 + (**s-48);
-
- ++(*s);
- }
- }
- }
-
- if (n>256)
- error ("tds: character multiplier may not exceed 256, e.g. [a*256], in string2");
-
- if (n<0)
- n= 256 - (d -> len);
-
- while (n--)
- (void)pcappend(c1, d);
-
- } else
- error ("tds: was expecting a '-' or '*' in character range, e.g. [a-zr*5], in string2");
- } /* while */
-
- ++(*s); /* Skip over closing ']' */
- } /* range2 */
-
-
- void clr_mystr(s)
- MYSTR *s;
- {
- int i;
-
- s -> len = 0;
-
- i=256;
- do {
- --i;
- s -> i[i] = FALSE;
- } while (i);
-
- } /* clr_mystr */
-
-
- void getopts(argc, argv, as1, as2)
- int argc;
- unchar *argv[];
- MYSTR *as1, *as2;
- {
- unchar *c;
- int flag, i;
-
- flag = 0;
-
- for (i=1; i<argc; i++) {
- c=argv[i];
-
- if (*c == '-')
- while (*c++) {
- if (*c == 'c')
- invert=TRUE;
- else
- if (*c == 'd')
- delete=TRUE;
- else
- if (*c == 's')
- squeeze=TRUE;
- } /* while */
- else
- if (flag==0) {
- while (*c)
- if (*c == '[')
- (void)range2 (&c, as1);
- else
- (void)pcappend(esch (&c), as1);
-
- ++flag;
- } else
- while (*c)
- if (*c == '[')
- (void)range2 (&c, as2);
- else
- (void)pcappend(esch (&c), as2);
-
- } /* for */
- } /* getopts */
-
-
- /* Complement prefixed string: s = {0...255 character set} - {s.i[]} */
- void complement(s)
- MYSTR *s;
- {
- int i=256;
- int j=0;
-
- do { /* Invert the intersection map */
- --i;
- s -> i[i] = 1 - (s -> i[i]);
- } while (i);
-
- s -> len = 256 - (s -> len); /* Update the length */
-
- for (i=0; i<256; i++)
- if (s -> i[i] == TRUE)
- s -> s[j++] = (char)i; /* Make string reflect new map and length */
- } /* complement */
-
-
- /* Set up translation table */
- void setup_t(s1, s2)
- MYSTR *s1, *s2;
- {
- int i;
- int len = s1->len;
-
- if (s2->len == 0) /* Added for compatibility with tr */
- *s2 = *s1;
- else
- if (s1->len > s2->len) /* Length of s1 > length of s2 ? */
- len = s2->len;
-
- for (i=0; i<len; i++) /* Make {t} = s/{s1}/{s2}/ */
- t[ (int)(s1->s[i]) ] = s2->s[i];
- } /* setup_t */
-
-
- int main (argc, argv)
- int argc;
- unchar *argv[];
- {
- int ch, i, j, n;
- int lastch=(-1); /* Initialize to a number outside the 0-255 character set */
- unchar tch;
- MYSTR s1, s2;
-
- (void)clr_mystr(&s1);
- (void)clr_mystr(&s2);
-
- (void)getopts(argc, argv, &s1, &s2);
-
-
- if (invert)
- (void)complement(&s1);
-
-
- (void)setup_t(&s1, &s2);
-
-
- if (delete && squeeze) {
- do {
- n=fread(a, 1, SIZE, stdin);
-
- j=0;
- for (i=0; i<n; i++) {
- if (s1.i[ (int)a[i] ] == FALSE) {
- ch=(int)a[i]; /* Present, untranslated input character */
-
- if ( (ch != lastch) ||
- (s2.i[ch] == FALSE) ) {
- a[j++] = ch;
- lastch = ch;
- } /* if */
-
- } /* if */
-
- }
-
- (void)fwrite(a, 1, j, stdout);
- } while (n!=0);
-
- return(0);
- } /* if (delete && squeeze) */
-
-
- if (delete) {
- do {
- n=fread(a, 1, SIZE, stdin);
-
- j=0;
- for (i=0; i<n; i++)
- if (s1.i[ (int)a[i] ] == FALSE)
- a[j++] = a[i];
-
- (void)fwrite(a, 1, j, stdout);
- } while (n!=0);
-
- return(0);
- } /* if (delete) */
-
-
- if (squeeze) {
- do {
- n=fread(a, 1, SIZE, stdin);
-
- j=0;
- for (i=0; i<n; i++) {
- ch=(int)a[i]; /* Present, untranslated input character */
- tch=t[ch]; /* Present, translated input character */
-
- if ( (tch != lastch) ||
- (s2.i[tch] == FALSE) ) {
- a[j++] = tch;
- lastch = tch;
- }
- } /* for */
-
- (void)fwrite(a, 1, j, stdout);
- } while (n!=0);
-
- return(0);
- } /* if (squeeze) */
-
-
- /* A straight-forward translation filter when there are no option flags: */
- do {
- n=fread(a, 1, SIZE, stdin);
-
- for (i=0; i<n; i++)
- a[i] = t[ (int)a[i] ];
-
- (void)fwrite(a, 1, n, stdout);
- } while (n!=0);
- return(0);
- } /* main */
-