home *** CD-ROM | disk | FTP | other *** search
- /*************************************************************************\
- cookietool: remove duplicate entries from a cookie file
- Various options for sorting the output. The expected file format is
- plain text with a "%%" line ending each cookie.
- Usage:
- cookietool [options] <database> [logfile]
- options: meaning:
- -s sort output
- -sl " , looking at the last line only
- -sw " , looking at the last word only
- -s<sep> " , starting after the last occurence of <sep>, e.g. '--'
- -p passive, don't delete anything
- -c case-sensitive comparisons (for both sorting and deleting)
- -d[0-3] how fussy about word delimiters? (default: 2)
- -a delete cookies that are "abbreviations" of another, too
- -o overwrite the input file directly (no tempfile)
- Note: Option -o is risky and should only be used if no disk space for a
- tempfile is available.
- \*************************************************************************/
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "strstuff.h"
-
- char version[] = "$VER: cookietool 2.1 (19.11.96)";
-
- struct cookie {
- UBYTE *text;
- UBYTE *sorthook;
- long number;
- };
-
- struct cookie *clist;
-
- long listsize = 1000; /* will be adjusted dynamically */
- long listed = 0;
-
- #define FBUFSIZE 16384 /* we'll use larger file buffers */
- #define CBUFSIZE 20000
- #define LBUFSIZE 2000
- UBYTE cbuf[CBUFSIZE]; /* large enough to hold one complete cookie */
- UBYTE line[LBUFSIZE]; /* large enough to hold the longest line */
-
-
- void help(UBYTE *s)
- /* print a help text and nag about illegal parameter <s> */
- {
- if (s) printf("illegal option '%s'\n", s);
- printf("usage: cookietool [options] <database> \n");
- printf("where options are:\n");
- printf(" -p passive, don't delete anything\n");
- printf(" -a treat 'abbreviations' as doubles (i.e. delete them, too)\n");
- printf(" -s sort cookies\n");
- printf(" -sl \" , looking at the last line only\n");
- printf(" -sw \" , looking at the last word only\n");
- printf(" -s<sep> \" , starting after the last <sep>, e.g. '-s--'\n");
- printf(" -d[0-3] how fussy about word delimiters? (default: 2)\n");
- printf(" -c case sensitive comparisons\n");
- printf(" -o overwrite directly, no tempfile (caution!)\n");
- }
-
-
- int cookie_cmp(struct cookie *a, struct cookie *b)
- /* compares cookies by string and, if these are identical, by number */
- {
- int c;
- if (c = str_cmp(a->sorthook, b->sorthook))
- return c;
- else
- return (a->number) - (b->number);
- }
-
- void sift(struct cookie v[], long i, long m, int mode)
- /* centre routine to heapsort() */
- /* mode==0: sort by number, mode==±1: sort by name (ascending/descending) */
- {
- long j;
- struct cookie temp;
-
- if (mode != 0) { /* by name */
- while ((j = 2*(i+1)-1) <= m) {
- if (j < m && mode*cookie_cmp(&v[j], &v[j+1]) < 0)
- j++;
- if (mode*cookie_cmp(&v[i], &v[j]) < 0) {
- temp = v[i]; v[i] = v[j]; v[j] = temp;
- i = j;
- } else
- i = m; /* done */
- }
- } else { /* by number */
- while ((j = 2*(i+1)-1) <= m) {
- if (j < m && (v[j].number < v[j+1].number) )
- j++;
- if (v[i].number < v[j].number) {
- temp = v[i]; v[i] = v[j]; v[j] = temp;
- i = j;
- } else
- i = m; /* done */
- }
- }
- }
-
-
- void my_heapsort(struct cookie v[], long n, int mode)
- /* mode==0: sort by number, mode==±1: sort by name (ascending/descending) */
- {
- long i;
- struct cookie temp;
-
- if (n<2) /* no sorting necessary */
- return;
- for (i = n/2-1; i >= 0; i--)
- sift(v, i, n-1, mode);
- for (i = n-1; i >= 1; i--) {
- temp = v[0]; v[0] = v[i]; v[i] = temp;
- sift(v, 0, i-1, mode);
- }
- }
-
-
- UBYTE hooktarget[16];
-
- void set_hooks(int mode)
- /* adjust sorthooks for the final sort, according to the desired mode */
- {
- long l;
- int hot;
- UBYTE *s;
-
- printf("adjusting sort hooks"); fflush(stdout);
- for (l=0; l<listed; l++) {
- s = clist[l].text;
- switch (mode) {
- case 2: /* start of last line */
- hot = 1;
- while (*s) {
- if (*s == '\n')
- hot = 1;
- else if (hot) {
- clist[l].sorthook = s; hot = 0;
- }
- s++;
- } break;
- case 3: /* start of last word */
- hot = 1;
- while (*s) {
- if (isspace(*s))
- hot = 1;
- else if (hot) {
- clist[l].sorthook = s; hot = 0;
- }
- s++;
- } break;
- case 4: /* at last occurence of <hooktarget> */
- while (s) {
- clist[l].sorthook = s++;
- s = strstr(s, hooktarget);
- } break;
- default:
- }
- }
- printf(", done.\n");
- }
-
-
- void one_cookie(int doubles, int abbrevs, int sortmode, FILE *fp)
- /* delete cookies and log them to a file */
- {
- long i, j, dbl = 0, abr = 0;
- int c;
-
- if (doubles) {
- printf("removing double entries");
- if (abbrevs) printf(" + 'abbreviations'");
- fflush(stdout);
- my_heapsort(clist, listed, -1); /* sort descending by string */
- for (i = listed-1; i > 0; i = j) {
- for (j = i-1; j >= 0 && ( (c=str_cmp(clist[j].text,clist[i].text)) == 0
- || (abbrevs && c == STR_SHORTER) ); j--) {
- if (fp)
- if (fprintf(fp, "%s\n%%%%\n", clist[j].text) <= 0) {
- printf("\nfile error, aborted !!!\n");
- exit(20);
- }
- free(clist[j].text);
- clist[j] = clist[--listed];
- if (c == 0) dbl++; else abr++;
- }
- }
- printf(", done. (%ld + %ld found)\n",dbl,abr);
- }
- if (sortmode>0) {
- if (sortmode>1) set_hooks(sortmode);
- printf("sorting"); fflush(stdout);
- my_heapsort(clist, listed, 1); /* sort ascending by string */
- } else {
- printf("restoring order"); fflush(stdout);
- my_heapsort(clist, listed, 0); /* sort by number */
- }
- printf(", done.\n");
- }
-
-
- void read_cookies(FILE *fp)
- {
- long cbuflen, ignored=0;
- int lines;
- UBYTE *s;
-
- printf("reading cookies"); fflush(stdout);
- strcpy(cbuf,""); lines=0; cbuflen=0;
- while (fgets(line,LBUFSIZE,fp)) {
- if (strncmp(line,"%%",2)==0) { /* "end of cookie"-marker */
- if (lines>0) { /* store the cookie */
- /* but drop the last LF, to avoid trouble in recognizing abbrev's: */
- cbuflen = strlen(cbuf);
- if (cbuf[cbuflen-1] == '\n')
- cbuf[--cbuflen] = '\0';
- if (clist[listed].text = malloc(cbuflen+1)) { /* mind the '\0'! */
- clist[listed].number = listed+ignored;
- strcpy(clist[listed].text, cbuf);
- s = clist[listed].sorthook = clist[listed].text;
- } else {
- printf("\nout of memory\n");
- exit(20);
- }
- if (++listed == listsize) {
- listsize = 3 * listsize / 2;
- clist = realloc(clist, listsize * sizeof(struct cookie));
- if (!clist) {
- printf("\nlist reallocation failed\n");
- exit(20);
- }
- }
- } else {
- ignored++; /* or ignore it */
- }
- /* start a new one */
- strcpy(cbuf,""); lines=0; cbuflen=0;
- } else {
- if ((cbuflen += strlen(line)) >= CBUFSIZE) {
- printf("\ncookie too big (>%ld chars)\n", CBUFSIZE);
- exit(20);
- }
- strcat(cbuf,line); lines++;
- }
- }
- printf(", done. (%ld read, %ld empty)\n", listed, ignored);
- }
-
-
- write_cookies(FILE *fp)
- /* also frees the allocated memory! */
- {
- long l;
-
- printf("writing cookies"); fflush(stdout);
- for (l=0; l<listed; l++) {
- if (fprintf(fp, "%s\n%%%%\n", clist[l].text) <= 0) {
- printf("\nfile error, aborted !!!\n");
- exit(20);
- }
- free(clist[l].text);
- }
- printf(", done. (%ld written)\n", listed);
- }
-
-
- int main(int argc, char *argv[])
- {
- UBYTE *s;
- int dirty = 0, passive = 0, abbrevs = 0, finalsort = 0;
- int case_sense = 0, bordermode = 2;
- UBYTE name1[100], name2[100], name3[100];
- FILE *infile, *outfile, *logfile;
-
- name1[0] = name2[0] = name3[0] = '\0';
- if (argc<2) {
- help(NULL);
- return 5;
- }
- while (--argc) {
- s = *++argv;
- if (*s != '-') {
- if (name1[0] == '\0')
- strcpy(name1, s);
- else
- strcpy(name3, s);
- } else {
- switch (*++s) {
- case 's':
- switch (*++s) {
- case '\0': finalsort = 1; break;
- case 'l': finalsort = 2; break;
- case 'w': finalsort = 3; break;
- default: if ispunct(*s) {
- finalsort = 4; strncpy(hooktarget,s,15);
- } else {
- help(argv[0]); return 5;
- }
- } break;
- case 'd': if isdigit(*++s)
- bordermode = atoi(s);
- else {
- help(argv[0]); return 5;
- } break;
- case 'c': case_sense = 1; break;
- case 'a': abbrevs = 1; break;
- case 'p': passive = 1; break;
- case 'o': dirty = 1; break;
- default:
- help(argv[0]); return 5;
- }
- }
- }
- /* important, before calling anything from strstuff: */
- str_setup(bordermode, case_sense);
- if (name1[0] == '\0') {
- help(NULL);
- return 5;
- }
- if (dirty) {
- strcpy(name2, name1);
- printf("Warning! You have enabled direct writeback mode!\n");
- printf("\e[2mDon't break (or crash) cookietool now, ");
- printf("or you will inevitably lose data!\e[0m\n");
- } else
- strcpy(name2, "ct_temp_crunchfile");
- printf("cookietool "); print_strstat();
- clist = malloc(listsize * sizeof(struct cookie));
- if (!clist) {
- printf("list allocation failed\n");
- return 20;
- }
- if (!(infile = fopen(name1,"r"))) {
- printf("Can't open %s for input!\n", name1);
- return 10;
- }
- setvbuf(infile, NULL, _IOFBF, FBUFSIZE);
- if (name3[0] != '\0') {
- if (!(logfile = fopen(name3,"w"))) {
- printf("Can't open %s for output!\n", name3);
- return 10;
- }
- } else
- logfile = NULL;
- read_cookies(infile); fclose(infile);
- one_cookie(!passive, abbrevs, finalsort, logfile);
- if (logfile) fclose(logfile);
- if (!(outfile = fopen(name2,"w"))) {
- printf("Can't open %s for output!\n", name2);
- return 10;
- }
- setvbuf(outfile, NULL, _IOFBF, FBUFSIZE);
- write_cookies(outfile); fclose(outfile);
- free(clist);
- if (!dirty) { /* replace the input file */
- if (remove(name1) != 0 || rename(name2, name1) != 0) {
- printf("Couldn't overwrite the input file! Your cookies are in '%s'.\n", name2);
- return 5;
- }
- }
- return 0;
- }
-
-