home *** CD-ROM | disk | FTP | other *** search
- /*************************************************************************\
- cdbsplit: split parts off your cookie database,
- by keyword, by line length, by number of lines, or as groups of
- "similar" cookies.
- Expected file format is plain text with a "%%" line ending each cookie.
- Usage:
- cdbsplit [options] <database> <newfile>
- options: meaning:
- -l<lines> / -L<lines> range for number of lines in a cookie
- -w<width> / -W<width> range for cookie line width
- -k<keywd> / -K<keywd> search for (or avoid) a keyword
- -f<n> / -F<n> extract the first <n> / all but the first <n> cookies
- -m<n> find groups of cookies starting with <n> matching characters
- -d[0-3] how fussy about word delimiters? (default: 2)
- -c case-sensitive comparisons (for both keywords and groups)
- -a append, if output file exists (instead of failing)
- \*************************************************************************/
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "strstuff.h"
-
- char version[] = "$VER: cdbsplit 2.1 (19.11.96)";
-
- #define FBUFSIZE 16384 /* we'll use larger file buffers */
- #define CBUFSIZE 20000
- #define LBUFSIZE 2000
- char line[LBUFSIZE]; /* large enough to hold the longest line */
- char cbuf[CBUFSIZE]; /* large enough to hold one complete cookie */
- char cbak[CBUFSIZE]; /* backup of the last cookie, to find groups */
- char uppercase[256]; /* conversion table */
-
- int l_min=0, l_max=0, w_min=0, w_max=0, matchlen=0, avoid=0;
- long firstonly=0;
- char target[100];
-
-
- void help(char *s)
- /* print a help text and nag about illegal parameter <s> */
- {
- if (s) printf("illegal option '%s'\n", s);
- printf("usage: cdbsplit [options] <cookiefile> <newfile>\n");
- printf("where options are:\n");
- printf(" -l<lines> / -L<lines> range for number of lines in a cookie\n");
- printf(" -w<width> / -W<width> range for cookie line width\n");
- printf(" -k<keywd> / -K<keywd> search for (or avoid) a keyword\n");
- printf(" -f<n> / -F<n> extract / avoid the first <n> cookies\n");
- printf(" -m<m> extract groups of cookies with <m> matching chars\n");
- printf(" -d[0-3] how fussy about word delimiters? (default: 2)\n");
- printf(" -c case sensitive comparisons\n");
- printf(" -a append to an existing output file\n");
- }
-
-
- void filter_cookies(FILE *fp1, FILE *fp2, FILE *fp3)
- {
- long count=0, hits=0, cbuflen, result;
- int ok=0, ok2, lines, width, w;
-
- strcpy(cbak,""); strcpy(cbuf,""); cbuflen = lines = width = 0;
- while (fgets(line,LBUFSIZE,fp1)) {
- if (strncmp(line,"%%",2)==0) { /* "end of cookie"-marker */
- /* perform the checks: */
- if (matchlen) {
- ok2 = ok; ok = (strn_cmp(cbak, cbuf, matchlen) == 0);
- if (*cbak) { /* skip the first loop */
- if (ok || ok2) {
- result = fprintf(fp2, "%s%%%%\n", cbak); hits++;
- } else
- result = fprintf(fp3, "%s%%%%\n", cbak);
- if (result<=0) { printf("\nfile error, aborted !!!\n"); exit(20); }
- }
- strcpy(cbak, cbuf);
- } else {
- ok = (lines >= l_min) && (width >= w_min);
- if (l_max) ok = ok && (lines <= l_max);
- if (w_max) ok = ok && (width <= w_max);
- if (firstonly>0) ok = ok && (count < firstonly);
- if (firstonly<0) ok = ok && (count >= -firstonly);
- if (target[0]) {
- ok2 = str_str(cbuf, target) != NULL;
- ok = (avoid) ? ok && !ok2 : ok && ok2;
- }
- if (ok) { /* "good" cookie, copy it */
- result = fprintf(fp2, "%s%%%%\n", cbuf); hits++;
- } else /* dump "bad" cookies to the other file */
- result = fprintf(fp3, "%s%%%%\n", cbuf);
- if (result<=0) { printf("\nfile error, aborted !!!\n"); exit(20); }
- }
- count++;
- if (count%100 == 0) {
- printf("Copying cookies, %ld hits, %ld misses.\r", hits, count-hits);
- fflush(stdout);
- }
- strcpy(cbuf,""); /* start a new cookie */
- cbuflen = lines = width = 0;
- } else {
- w = strlen(line);
- if ((cbuflen += w) >= CBUFSIZE) {
- printf("\ncookie too big (>%ld chars)\n", CBUFSIZE);
- exit(20);
- }
- strcat(cbuf,line); lines++;
- if (w > width) width = w;
- }
- }
- if (matchlen) { /* one cookie still pending in this mode */
- if (ok) {
- result = fprintf(fp2, "%s%%%%\n", cbak); hits++;
- } else
- result = fprintf(fp3, "%s%%%%\n", cbak);
- if (result<=0) { printf("\nfile error, aborted !!!\n"); exit(20); }
- }
- printf("\nDone, %ld hits out of %ld.\n", hits, count);
- }
-
-
- int main(int argc, char *argv[])
- {
- char *s;
- char name1[100], name2[100];
- char name3[] = "cdb_temp_kickme";
- int append = 0, case_sense = 0, bordermode = 2;
- FILE *infile, *hitfile, *dumpfile;
-
- name1[0] = name2[0] = target[0] = '\0';
- if (argc < 3) {
- help(NULL);
- return 5;
- }
- while (--argc) {
- s = *++argv;
- if (*s != '-') {
- if (name1[0] == '\0')
- strcpy(name1, s);
- else
- strcpy(name2, s);
- } else {
- switch (*++s) {
- case 'k': strcpy(target, ++s); break;
- case 'K': strcpy(target, ++s); avoid=1; break;
- case 'm': matchlen = atoi(++s); break;
- case 'l': l_min = atoi(++s); break;
- case 'L': l_max = atoi(++s); break;
- case 'w': w_min = atoi(++s); break;
- case 'W': w_max = atoi(++s); break;
- case 'f': firstonly = atol(++s); break;
- case 'F': firstonly = -atol(++s); break;
- case 'a': append = 1; break;
- case 'c': case_sense = 1; break;
- case 'd': if isdigit(*++s)
- bordermode = atoi(s);
- else {
- help(argv[0]); return 5;
- } break;
- default: help(argv[0]); return 5;
- }
- }
- }
- str_setup(bordermode, case_sense); /* !!! */
- if (name1[0] == '\0' || name2[0] == '\0') {
- help(NULL);
- return 5;
- }
- if (!(infile = fopen(name1,"r"))) {
- printf("Can't open '%s' for input!\n", name1);
- return 10;
- }
- setvbuf(infile, NULL, _IOFBF, FBUFSIZE);
- if (!append && (hitfile = fopen(name2,"r"))) {
- printf("Error: '%s' exists! Use -a to append.\n", name2);
- return 10;
- }
- if (!(hitfile = fopen(name2,"a"))) {
- printf("Can't open '%s' for output!\n", name2);
- return 10;
- }
- setvbuf(hitfile, NULL, _IOFBF, FBUFSIZE);
- if (!(dumpfile = fopen(name3,"w"))) {
- printf("Can't open '%s' for output!\n", name3);
- return 10;
- }
- setvbuf(dumpfile, NULL, _IOFBF, FBUFSIZE);
- printf("cdbsplit "); print_strstat();
- printf("Extracting from '%s' to '%s',\n", name1, name2);
- if (matchlen) {
- printf(" searching for groups of cookies with %d matching characters.\n",
- matchlen);
- } else {
- if (target[0] != '\0')
- printf(" %s is \"%s\".\n",
- (avoid) ? "string to avoid" : "search string", target);
- if (l_max)
- printf(" looking for cookies %d - %d lines long.\n", l_min, l_max);
- else if (l_min)
- printf(" looking for cookies at least %d lines long.\n", l_min);
- if (w_max)
- printf(" looking for cookies %d - %d columns wide.\n", w_min, w_max);
- else if (w_min)
- printf(" looking for cookies at least %d columns wide.\n", w_min);
- if (firstonly>0)
- printf(" stopping after %ld cookies.\n", firstonly);
- else if (firstonly<0)
- printf(" starting after %ld cookies.\n", -firstonly);
- }
- /* OK, here we go: */
- filter_cookies(infile,hitfile,dumpfile);
- fclose(infile); fclose(hitfile); fclose(dumpfile);
- if (remove(name1) != 0 || rename(name3, name1) != 0) {
- printf("Couldn't overwrite the input file! Your cookies are in '%s'.\n", name3);
- return 5;
- }
- return 0;
- }
-
-