home *** CD-ROM | disk | FTP | other *** search
- #define L 512
- #define N 7
- #define C 20
- #define MEM (16*2048)
- #define NF 10
-
- int ibuf[259];
- int obuf[259];
- char *file;
- char *filep;
- int nfiles;
- int nlines;
- int ntext;
- int *lspace;
- char *tspace;
- int cmp();
- int term();
- int mflg;
- char *outfil;
- char tabchar;
- int eargc;
- char **eargv;
-
- char fold[128] {
- 0000,0001,0002,0003,0004,0005,0006,0007,
- 0010,0011,0012,0013,0014,0015,0016,0017,
- 0020,0021,0022,0023,0024,0025,0026,0027,
- 0030,0031,0032,0033,0034,0035,0036,0037,
- 0040,0041,0042,0043,0044,0045,0046,0047,
- 0050,0051,0052,0053,0054,0055,0056,0057,
- 0060,0061,0062,0063,0064,0065,0066,0067,
- 0070,0071,0072,0073,0074,0075,0076,0077,
- 0100,0101,0102,0103,0104,0105,0106,0107,
- 0110,0111,0112,0113,0114,0115,0116,0117,
- 0120,0121,0122,0123,0124,0125,0126,0127,
- 0130,0131,0132,0133,0134,0134,0136,0137,
- 0140,0101,0102,0103,0104,0105,0106,0107,
- 0110,0111,0112,0113,0114,0115,0116,0117,
- 0120,0121,0122,0123,0124,0125,0126,0127,
- 0130,0131,0132,0173,0174,0175,0176,0177
- };
- char nofold[128];
- char dict[128] {
- 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
- 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
- 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1
- };
- char nodict[128] { 1 };
-
- struct field {
- char *code;
- char *ignore;
- int nflg;
- int rflg;
- int bflg;
- char *m[2];
- char *n[2];
- } fields[NF];
- int proto[9] {
- &fold,
- &nodict,
- 0,
- 1,
- 0,
- 0,-1,
- 0,0
- };
- int nfields;
- int error 1;
-
- main(argc, argv)
- char **argv;
- {
- register a, i;
- char *arg;
- register int *p;
- int *q;
-
- for(a=0; a<128; a++)
- nofold[a] = a;
- nodict[127] = 1;
- copyproto();
- eargv = argv;
- while (--argc > 0) {
- if(**++argv == '-') for(arg = *argv;;) {
- switch(*++arg) {
- case '\0':
- if(arg[-1] == '-')
- eargv[eargc++] = "-";
- break;
-
- case 'm':
- mflg++;
- continue;
-
- case 'o':
- if(--argc > 0)
- outfil = *++argv;
- continue;
-
- default:
- field(++*argv,1);
- break;
- }
- break;
- } else if (**argv == '+') {
- if(++nfields>=NF) {
- mess("Too many keys\n");
- exit(1);
- }
- copyproto();
- field(++*argv,0);
- } else
- eargv[eargc++] = *argv;
- }
- q = &fields[0];
- for(a=1; a<=nfields; a++) {
- p = &fields[a];
- for(i=0; i<5; i++) /*sensitive to sizeof(proto)*/
- if(p[i] != proto[i])
- goto next;
- for(i=0; i<5; i++)
- p[i] = q[i];
- next: ;
- }
- if(eargc == 0)
- eargv[eargc++] = "-";
-
- a = MEM;
- i = lspace = sbrk(0);
- while(brk(a) == -1)
- a =- 512;
- brk(a =- 512); /* for recursion */
- a =- i;
- nlines = ((a-L)>>1) & 077777;
- nlines =/ 5;
- ntext = nlines*8;
- tspace = lspace+nlines;
- file = "/usr/tmp/stmXaa";
- loop:
- filep = file;
- while(*filep != 'X')
- filep++;
- for(*filep = 'a';;(*filep)++) {
- if(stat(file, lspace) < 0) {
- a = creat(file, 0600);
- if(a >= 0)
- break;
- }
- if(*filep == 'z') {
- if(file[1] != 't') {
- file = "/tmp/stmXaa";
- goto loop;
- }
- mess("Cannot locate temp\n");
- exit(1);
- }
- }
- close(a);
- filep++;
- if ((signal(2, 1) & 01) == 0)
- signal(2, term);
- nfiles = eargc;
- if(!mflg) {
- ibuf[0] = -1;
- sort();
- close(0);
- }
- for(a = mflg?0:eargc; a+N < nfiles; a=+N) {
- newfile();
- merge(a, a+N);
- }
- if(a != nfiles) {
- oldfile();
- merge(a, nfiles);
- }
- error = 0;
- term();
- }
-
- sort()
- {
- register char *cp;
- register *lp, c;
- int done;
- int i;
- int f;
-
- done = 0;
- i = 0;
- do {
- cp = tspace;
- lp = lspace;
- while(lp < lspace+nlines && cp < tspace+ntext) {
- *lp++ = cp;
- while((*cp++ = c = getc(ibuf)) != '\n') {
- if(c >= 0) continue;
- cp--;
- close(ibuf[0]);
- if(i < eargc) {
- if((f = setfil(i++)) == 0)
- ibuf[0] = 0;
- else if(fopen(f, ibuf) < 0)
- cant(f);
- } else
- break;
- }
- if(c < 0) {
- done++;
- lp--;
- break;
- }
- }
- qsort(lspace, lp-lspace, 2, cmp);
- if(done == 0 || nfiles != eargc)
- newfile(); else
- oldfile();
- while(lp > lspace) {
- cp = *--lp;
- if(*cp)
- do
- putc(*cp, obuf);
- while(*cp++ != '\n');
- }
- fflush(obuf);
- close(obuf[0]);
- } while(done == 0);
- }
-
- struct merg
- {
- char l[L];
- int b[259];
- };
-
- merge(a, b)
- {
- register struct merg *p;
- register char *cp;
- register i;
- struct { int *ip;};
- int f;
- int j;
- int k,l,c;
-
- p = lspace;
- j = 0;
- for(i=a; i<b; i++) {
- f = setfil(i);
- if(f == 0)
- p->b[0] = dup(0);
- else if(fopen(f, p->b) < 0)
- cant(f);
- ibuf[j] = p;
- if(!rline(p)) j++;
- p++;
- }
-
- do {
- i = j;
- qsort(ibuf, i, 2, cmp);
- l = 0;
- while(i--) {
- cp = ibuf[i];
- if(*cp == '\0') {
- l = 1;
- if(rline(ibuf[i])) {
- k = i;
- while(++k < j)
- ibuf[k-1] = ibuf[k];
- j--;
- }
- }
- }
- } while(l);
-
- i = j;
- if(i > 0) for(;;) {
- cp = ibuf[i-1];
- if(i == 1 || cmp(&ibuf[i-1], &ibuf[i-2]))
- do
- putc(*cp, obuf);
- while(*cp++ != '\n');
- if(rline(ibuf[i-1])) {
- i--;
- if(i == 0)
- break;
- }
- cp = &ibuf[i];
- while (--cp.ip > ibuf && cmp(cp.ip, cp.ip-1) < 0) {
- p = *cp.ip;
- *cp.ip = *(cp.ip-1);
- *(cp.ip-1) = p;
- }
- }
- p = lspace;
- for(i=a; i<b; i++) {
- close(p->b[0]);
- p++;
- if(i >= eargc)
- close(creat(setfil(i)));
- }
- fflush(obuf);
- close(obuf[0]);
- }
-
- rline(mp)
- struct merg *mp;
- {
- register char *cp;
- register *bp, c;
-
- bp = mp->b;
- cp = mp->l;
- do {
- c = getc(bp);
- if(c < 0)
- return(1);
- *cp++ = c;
- } while(c != '\n');
- *cp = '\0';
- return(0);
- }
-
- newfile()
- {
-
- if(fcreat(setfil(nfiles), obuf) < 0) {
- mess("Can't create temp\n");
- term();
- }
- nfiles++;
- }
-
- char *
- setfil(i)
- {
-
- if(i < eargc)
- if(eargv[i][0] == '-' && eargv[i][1] == '\0')
- return(0);
- else
- return(eargv[i]);
- i =- eargc;
- filep[0] = i/26 + 'a';
- filep[1] = i%26 + 'a';
- return(file);
- }
-
- oldfile()
- {
-
- if(outfil) {
- if(fcreat(outfil, obuf) < 0) {
- mess("Can't create output\n");
- term();
- }
- } else
- obuf[0] = 1;
- }
-
- cant(f)
- {
- mess("Can't open ");
- mess(f);
- mess("\n");
- term();
- }
-
- term()
- {
- register i;
-
- if(nfiles == eargc)
- nfiles++;
- for(i=eargc; i<nfiles; i++) {
- unlink(setfil(i));
- }
- exit(error);
- }
-
- cmp(a, b)
- int *a,*b;
- {
- register char *ra, *rb;
-
- ra = *a - 1;
- rb = *b - 1;
-
- while(*++ra == *++rb)
- if(*ra == '\n')
- return(0);
-
- return(*rb - *ra);
- }
-
-
- skip(pp, fp, j)
- struct field *fp;
- char *pp;
- {
- register i;
- register char *p;
-
- p = pp;
- if( (i=fp->m[j]) < 0)
- return(-1);
- while(i-- > 0) {
- if(tabchar != 0) {
- while(*p != tabchar)
- if(*p != '\n')
- p++;
- else goto ret;
- p++;
- } else {
- while(blank(*p))
- p++;
- while(!blank(*p))
- if(*p != '\n')
- p++;
- else goto ret;
- }
- }
- if(fp->bflg)
- while(blank(*p))
- p++;
- i = fp->n[j];
- while(i-- > 0) {
- if(*p != '\n')
- p++;
- else goto ret;
- }
- ret:
- return(p);
- }
-
- digit(c)
- {
-
- return(c <= '9' && c >= '0');
- }
-
- mess(s)
- char *s;
- {
- while(*s)
- write(2, s++, 1);
- }
-
- copyproto()
- {
- register int i, *p, *q;
-
- p = proto;
- q = &fields[nfields];
- for(i=0; i<sizeof(proto)/2; i++)
- *q++ = *p++;
- }
-
- field(s,k)
- char *s;
- {
- register struct field *p;
- p = &fields[nfields];
- for(; *s!=0; s++) {
- switch(*s) {
- case '\0':
- return;
-
- case 'a':
- p->code = nofold;
- break;
-
- case 'b':
- p->bflg++;
- break;
-
- case 'd':
- p->ignore = dict;
- break;
-
- case 'n':
- p->nflg++;
- break;
- case 't':
- tabchar = *++s;
- if(tabchar == 0) s--;
- break;
-
- case 'r':
- p->rflg = -1;
- break;
-
- default:
- p->m[k] = number(&s);
- if(*s == '.')
- s++;
- p->n[k] = number(&s);
- s--;
- }
- }
- }
-
- number(ppa)
- char **ppa;
- {
- int n;
- register char *pa;
- pa = *ppa;
- n = 0;
- while(digit(*pa))
- n = n*10 + *pa++ - '0';
- *ppa = pa;
- return(n);
- }
-
- blank(c)
- {
- if(c==' ' || c=='\t')
- return(1);
- return(0);
- }
-
- int (*qscmp)();
- int qses;
-
- qsort(a, n, es, fc)
- char *a;
- int n, es;
- int (*fc)();
- {
- qscmp = fc;
- qses = es;
- qs1(a, a+n*es);
- }
-
- qs1(a, l)
- char *a, *l;
- {
- register char *i, *j, *es;
- char **k;
- char *lp, *hp;
- int n, c;
-
-
- es = qses;
-
- start:
- if((n=l-a) <= es)
- return;
-
-
- n = ((n/(2*es))*es) & 077777;
- hp = lp = a+n;
- i = a;
- j = l-es;
-
-
- for(;;) {
- if(i < lp) {
- if((c = (*qscmp)(i, lp)) == 0) {
- qsexc(i, lp =- es);
- continue;
- }
- if(c < 0) {
- i =+ es;
- continue;
- }
- }
-
- loop:
- if(j > hp) {
- if((c = (*qscmp)(hp, j)) == 0) {
- qsexc(hp =+ es, j);
- goto loop;
- }
- if(c > 0) {
- if(i == lp) {
- qstexc(i, hp =+ es, j);
- i = lp =+ es;
- goto loop;
- }
- qsexc(i, j);
- j =- es;
- i =+ es;
- continue;
- }
- j =- es;
- goto loop;
- }
-
-
- if(i == lp) {
- for(k=lp+2; k<=hp;) *(*k++)='\0';
- if(lp-a >= l-hp) {
- qs1(hp+es, l);
- l = lp;
- } else {
- qs1(a, lp);
- a = hp+es;
- }
- goto start;
- }
-
-
- qstexc(j, lp =- es, i);
- j = hp =- es;
- }
- }
-
- qsexc(i, j)
- char *i, *j;
- {
- register char *ri, *rj, c;
- int n;
-
- n = qses;
- ri = i;
- rj = j;
- do {
- c = *ri;
- *ri++ = *rj;
- *rj++ = c;
- } while(--n);
- }
-
- qstexc(i, j, k)
- char *i, *j, *k;
- {
- register char *ri, *rj, *rk;
- char c;
- int n;
-
- n = qses;
- ri = i;
- rj = j;
- rk = k;
- do {
- c = *ri;
- *ri++ = *rk;
- *rk++ = *rj;
- *rj++ = c;
- } while(--n);
- }
-