home *** CD-ROM | disk | FTP | other *** search
- Date: Wed, 23 Apr 86 20:03:28 EST
- From: Edward_Vielmetti%UMich-MTS.Mailnet@MIT-MULTICS.ARPA
- To: info-ibmpc@USC-ISIB.ARPA
- Subject: Breakup.C
-
- /*** BREAKUP. Break up a (presumably large) file into smaller pieces,
- / about a set of breakpoints. Invoked as:
- / BREAKUP BigFile.Ext -C1 A1 -C2 A2 -C3 A2 etc...
- / where the arguments (breakpoints) are:
- / -B nnn break after next nnn bytes
- / -L nnn " " " " lines
- / -S str break after next occurrence of "str"
- / -LB nnn break after newline after next nnn bytes
- / -LS str break after newline after next occurrence of "str"
- / -R repeat last breakpoint until eof on BigFile
- /
- / Written by Charles Roth, December 1983. This program is in the public
- / domain. */
-
- #include <stdio.h>
- #define DASH_B 0
- #define DASH_S 1
- #define DASH_L 2
- #define DASH_LB 3
- #define DASH_LS 4
- #define NONE 5
- #define FUNCTION
- #define NOT !
-
- FUNCTION main (argc, argv)
- int argc; char *argv[];
- {
- FILE *in, *out, *fopen();
- int outnum, c, breaktest, next, ringlen, ringpos, i, r, strfound;
- long breaknumb, count;
- char fname[80], fext[80], outfile[80], breakstr[80], ringbuf[80];
-
-
- /*** Mark the end of the argument list so PARSE knows when to stop. */
- argv[argc] = NULL;
-
- if (argc <= 1) {
- printf ("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
- "Usage: BREAKUP File.Ext -C1 A1 -C2 A2 -C3 A3 etc...",
- " where each -Cn An means break after...",
- " -B nnn next nnn bytes",
- " -L nnn next nnn lines",
- " -S str next occurrence of \"str\"",
- " -LB nnn end-of-line after next nnn bytes",
- " -LS str end-of-line after next occurrence of \"str\"",
- " -R repeat last breakpoint indefinitely.");
- exit(1);
- }
-
- /*** Make sure the input file exists, and open it.*/
- if ( (in = fopen (argv[1], "r")) == NULL) {
- printf ("No such file %s\n", argv[1]);
- exit(1);
- }
- xstrbreak (argv[1], fname, fext, ".");
-
- /*** Prepare for main character-by-character loop. NEXT means "close
- / file and process next breakpoint." STRFOUND is used by the -LS
- / option to remember if the string was found somewhere in the line. */
- next = 1;
- out = NULL;
- outnum = 0;
- strfound = 0;
-
- while ( (c = getc (in)) >= 0) {
-
- if (next) {
- /*** Reset the counts, close the old file, open the new one. */
- next = 0;
- count = 0;
- if (out != NULL) {fclose (out); printf ("%s\n", outfile);}
- sprintf (outfile, "%s.%03d", fname, outnum++);
- out = fopen (outfile, "w");
-
- /*** Parse the next breakpoint and return its type. */
- breaktest = parse (argv, breakstr, &breaknumb);
-
- /*** Initialize the ring buffer for testing -s, -ls strings */
- if (breaktest==DASH_S || breaktest==DASH_LS) {
- ringlen = strlen (breakstr);
- ringpos = 0;
- }
- }
-
- putc (c, out);
-
- /*** For each possible type of breakpoint, test the type and see if
- / the appropriate condition has happened to break off a new piece. */
- if (breaktest==DASH_B && ++count >= breaknumb) next = 1;
- if (breaktest==DASH_L && c=='\n' && ++count >= breaknumb) next = 1;
- if (breaktest==DASH_LB && ++count >= breaknumb && c=='\n') next = 1;
- if (breaktest==DASH_S || breaktest==DASH_LS) {
- /*** The "str" test is the most difficult. Keep a ring buffer of
- / the characters encountered so far, size equal to the size of
- / the break string. Each time around the main character loop,
- / add the new char to the end of the ring buffer, and compare
- / the ring buffer against the break string. */
- ringbuf[ringpos] = c;
- ringpos = (ringpos + 1) % ringlen;
- for (i=0, r=ringpos; breakstr[i]; ++i, r = (r+1) % ringlen)
- if (breakstr[i] != ringbuf[r]) break;
- if (breaktest==DASH_S) next = NOT breakstr[i];
- if (breaktest==DASH_LS) strfound = NOT breakstr[i] || strfound;
- }
-
- if (breaktest==DASH_LS && c=='\n') {
- next = strfound;
- strfound = 0;
- }
-
- }
- fclose (out);
- printf ("%s\n", outfile);
- }
-
-
- /*** PARSE breakpoint commands. On each call, returns 'next' breakpoint
- / type, sets BREAKNUMB to 'nnn' part of -l, -b, -lb breakpoints, and
- / BREAKSTR to string part of -s, -ls breakpoint. */
-
- FUNCTION parse (argv, breakstr, breaknumb)
- char breakstr[], *argv[];
- long *breaknumb;
- {
- static int a = 1;
- static char realstr[2] = {0, 0};
- char comstr[80], octalstr[10];
- int type, p, octalval;
-
- if (argv[++a] == NULL) {--a; return(NONE);}
-
- xstrlower (argv[a]);
- if (xstreq (argv[a], "-r")) a = a-2;
-
- if (xstreq (argv[a], "-l")) type = DASH_L;
- else if (xstreq (argv[a], "-b")) type = DASH_B;
- else if (xstreq (argv[a], "-s")) type = DASH_S;
- else if (xstreq (argv[a], "-lb")) type = DASH_LB;
- else if (xstreq (argv[a], "-ls")) type = DASH_LS;
-
- ++a;
- if (type==DASH_S || type==DASH_LS) {
- /*** Convert the various \ escape sequences to their proper form. */
- strcpy (breakstr, argv[a]);
- while (xstralter (breakstr, "\\\"", "\"")) ;
- while (xstralter (breakstr, "\\n", "\n")) ;
- while (xstralter (breakstr, "\\\\", "\\")) ;
-
- /*** Convert \ddd octal strings to the actual characters. */
- for (p=0; breakstr[p]; ++p) {
- if (breakstr[p] == '\\') {
- xstrsub (octalstr, breakstr, p, 4);
- if (NOT sscanf (octalstr, "\\%3o", &octalval)) continue;
- realstr[0] = octalval;
- xstralter (breakstr, octalstr, realstr);
- }
- }
- }
-
- if (type==DASH_B || type==DASH_LB || type==DASH_L)
- sscanf (argv[a], "%ld", breaknumb);
-
- return(type);
- }
-