home *** CD-ROM | disk | FTP | other *** search
- /* join F1 F2 on stuff */
-
- #include <stdio.h>
- #define F1 0
- #define F2 1
- #define NFLD 20 /* max field per line */
- #define comp() cmp(ppi[F1][j1],ppi[F2][j2])
-
- FILE *f[2];
- char buf[2][BUFSIZ]; /*input lines */
- char *ppi[2][NFLD]; /* pointers to fields in lines */
- char *s1,*s2;
- int j1 = 1; /* join of this field of file 1 */
- int j2 = 1; /* join of this field of file 2 */
- int olist[2*NFLD]; /* output these fields */
- int olistf[2*NFLD]; /* from these files */
- int no; /* number of entries in olist */
- int sep1 = ' '; /* default field separator */
- int sep2 = '\t';
- char* null = "";
- int unpub1;
- int unpub2;
- int aflg;
-
- main(argc, argv)
- char *argv[];
- {
- int i;
- int n1, n2;
- long top2, bot2;
- long ftell();
-
- while (argc > 1 && argv[1][0] == '-') {
- if (argv[1][1] == '\0')
- break;
- switch (argv[1][1]) {
- case 'a':
- switch(argv[1][2]) {
- case '1':
- aflg |= 1;
- break;
- case '2':
- aflg |= 2;
- break;
- default:
- aflg |= 3;
- }
- break;
- case 'e':
- null = argv[2];
- argv++;
- argc--;
- break;
- case 't':
- sep1 = sep2 = argv[1][2];
- break;
- case 'o':
- for (no = 0; no < 2*NFLD; no++) {
- if (argv[2][0] == '1' && argv[2][1] == '.') {
- olistf[no] = F1;
- olist[no] = atoi(&argv[2][2]);
- } else if (argv[2][0] == '2' && argv[2][1] == '.') {
- olist[no] = atoi(&argv[2][2]);
- olistf[no] = F2;
- } else
- break;
- argc--;
- argv++;
- }
- break;
- case 'j':
- if (argv[1][2] == '1')
- j1 = atoi(argv[2]);
- else if (argv[1][2] == '2')
- j2 = atoi(argv[2]);
- else
- j1 = j2 = atoi(argv[2]);
- argc--;
- argv++;
- break;
- }
- argc--;
- argv++;
- }
- for (i = 0; i < no; i++)
- olist[i]--; /* 0 origin */
- if (argc != 3)
- error("usage: join [-j1 x -j2 y] [-o list] file1 file2");
- j1--;
- j2--; /* everyone else believes in 0 origin */
- s1 = ppi[F1][j1];
- s2 = ppi[F2][j2];
- if (argv[1][0] == '-')
- f[F1] = stdin;
- else if ((f[F1] = fopen(argv[1], "r")) == NULL)
- error("can't open %s", argv[1]);
- if ((f[F2] = fopen(argv[2], "r")) == NULL)
- error("can't open %s", argv[2]);
-
- #define get1() n1=input(F1)
- #define get2() n2=input(F2)
- get1();
- bot2 = ftell(f[F2]);
- get2();
- while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) {
- if(n1>0 && n2>0 && comp()>0 || n1==0) {
- if(aflg&2) output(0, n2);
- bot2 = ftell(f[F2]);
- get2();
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
- if(aflg&1) output(n1, 0);
- get1();
- } else /*(n1>0 && n2>0 && comp()==0)*/ {
- while(n2>0 && comp()==0) {
- output(n1, n2);
- top2 = ftell(f[F2]);
- get2();
- }
- fseek(f[F2], bot2, 0);
- get2();
- get1();
- for(;;) {
- if(n1>0 && n2>0 && comp()==0) {
- output(n1, n2);
- get2();
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
- fseek(f[F2], bot2, 0);
- get2();
- get1();
- } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
- fseek(f[F2], top2, 0);
- bot2 = top2;
- get2();
- break;
- }
- }
- }
- }
- return(0);
- }
-
- input(n) /* get input line and split into fields */
- {
- register int i, c;
- char *bp;
- char **pp;
-
- bp = buf[n];
- pp = ppi[n];
- if (fgets(bp, BUFSIZ, f[n]) == NULL)
- return(0);
- for (i = 0; ; i++) {
- if (sep1 == ' ') /* strip multiples */
- while ((c = *bp) == sep1 || c == sep2)
- bp++; /* skip blanks */
- else
- c = *bp;
- if (c == '\n' || c == '\0')
- break;
- *pp++ = bp; /* record beginning */
- while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
- bp++;
- *bp++ = '\0'; /* mark end by overwriting blank */
- /* fails badly if string doesn't have \n at end */
- }
- *pp = 0;
- return(i);
- }
-
- output(on1, on2) /* print items from olist */
- int on1, on2;
- {
- int i;
- char *temp;
-
- if (no <= 0) { /* default case */
- printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]);
- for (i = 0; i < on1; i++)
- if (i != j1)
- printf("%c%s", sep1, ppi[F1][i]);
- for (i = 0; i < on2; i++)
- if (i != j2)
- printf("%c%s", sep1, ppi[F2][i]);
- printf("\n");
- } else {
- for (i = 0; i < no; i++) {
- temp = ppi[olistf[i]][olist[i]];
- if(olistf[i]==F1 && on1<=olist[i] ||
- olistf[i]==F2 && on2<=olist[i] ||
- *temp==0)
- temp = null;
- printf("%s", temp);
- if (i == no - 1)
- printf("\n");
- else
- printf("%c", sep1);
- }
- }
- }
-
- error(s1, s2, s3, s4, s5)
- char *s1;
- {
- fprintf(stderr, "join: ");
- fprintf(stderr, s1, s2, s3, s4, s5);
- fprintf(stderr, "\n");
- exit(1);
- }
-
- cmp(s1, s2)
- char *s1, *s2;
- {
- return(strcmp(s1, s2));
- }
-