home *** CD-ROM | disk | FTP | other *** search
- static char Uni_id[] = "@(#)28.1";
- /* UNISRC_ID: @(#)bsearchstr.c 28.1 86/01/04 */
- /*
- * Binary search routine for sorted file of strings (lines).
- * Compile with -DDEBUG for standalone testing and extra output.
- */
-
- #include <stdio.h>
- #include <ctype.h>
-
- #define chNull ('\0')
- #define cpNull ((char *) NULL)
-
-
- #ifdef DEBUG
- /*
- * Test usage: cmd file pattern [caseins [tellnext]]
- *
- * If there is a third argument, sends caseins == 1.
- * If there is a fourth argument, sends tellnext == 1.
- */
-
- main (argc, argv)
- int argc;
- char **argv;
- {
- FILE *fp;
- char buf[BUFSIZ];
-
- fp = fopen (argv[1], "r");
- printf ("==min=== ==max=== ==pos1== ==pos2== cmp match\n");
- printf ("Search returns %ld\n",
- bsearchstr (fp, argv[2], (argc > 3), (argc > 4)));
- printf ("%s\n", fgets (buf, BUFSIZ, fp));
- }
- #endif /* DEBUG */
-
-
- /***************************************************************************
- * B S E A R C H S T R
- *
- * Binary search a stream consisting of sorted lines of data for the first
- * line that starts with the given pattern, or the next line if none and
- * tellnext == 1. Set the file location to the first byte of that line
- * and return the offset.
- *
- * When caseins is set, takes pains not to modify the memory pointed to by
- * pattern. Uses multi-statement macros for efficiency.
- *
- * See bsearchstr(3) for more information.
- *
- * Author: Alan Silverstein, Hewlett-Packard Fort Collins Systems Division
- */
-
- #define LT 0
- #define EQ 1
- #define GT 2
-
- #define RETURN(value) { if (caseins) free (pattern); return (value); }
- #define SETPOS { if (fseek (filep, pos, 0) < 0) RETURN (-2L); }
-
- long bsearchstr (filep, pattern, caseins, tellnext)
- FILE *filep; /* file to search */
- char *pattern; /* start of line to match */
- int caseins; /* case insensitive? */
- int tellnext; /* tell next if not found? */
- {
- /* start of first line not yet compared (lower limit) */
- long min = 0;
- /* start of line after last line not yet compared (upper limit) */
- long max;
-
- long pos; /* current offset in file */
- char *patp; /* pattern pointer */
- /* warning: this must be an int, not a char */
- register int ch; /* current char to compare */
- register int cmp; /* results of comparison */
- int match = 0; /* true if match found */
-
- char *malloc();
-
- /*
- * PREPARE PATTERN FOR CASE-INSENSITIVE SEARCH:
- *
- * Use toupper(), not tolower(), to be compatible with the actual function
- * of sort -f.
- */
-
- if (caseins)
- {
- int len = strlen (pattern) + 1; /* chars to uppercase */
- char *cp; /* for copying data */
- char *cpend; /* end of copy range */
-
- if ((cp = patp = malloc (len)) == cpNull)
- return (-2L);
-
- /* now remember to free (pattern) before returning */
- /* can use the RETURN() macro to accomplish this */
-
- cpend = cp + len;
-
- while (cp < cpend) /* copy chNull also */
- *cp++ = toupper (*pattern++); /* toupper() is not a macro! */
-
- pattern = patp; /* "move" to new location */
- }
-
- /*
- * SET MAX TO END OF FILE (byte past last) by seeking there:
- */
-
- if ((fseek (filep, 0L, 2) < 0) || ((max = ftell (filep)) < 0))
- RETURN (-2L);
-
- /*
- * SET NEXT STARTING POSITION (where to find string and compare):
- */
-
- while (min < max) /* do until closure happens */
- {
- pos = (min + max) / 2;
- SETPOS;
-
- #ifdef DEBUG
- printf ("%8ld %8ld %8ld ", min, max, pos);
- #endif
-
- /*
- * LOOK FOR THE NEXT END OF LINE IN THE FORWARD DIRECTION:
- */
-
- while ((pos < max) && (getc (filep) != '\n'))
- pos++;
-
- pos++; /* skip newline */
-
- /*
- * If we ran into max, we are nearly done, assuming the current last line is
- * not really huge compared to all the others (but this will work out OK too).
- * Simply reset pos = min and check the first current line.
- */
-
- if (pos >= max)
- pos = min;
-
- /*
- * COMPARE THE CURRENT LINE TO THE PATTERN:
- *
- * If pattern[0] == chNull, never does the for loop, so it matches anything.
- */
-
- SETPOS;
-
- for (cmp = EQ, patp = pattern; (*patp != chNull); patp++)
- {
- ch = caseins ? toupper (getc (filep)) : getc (filep);
-
- if ((ch < *patp) || (ch == '\n') || (ch == EOF))
- { /* line < pattern */
- cmp = LT;
- break;
- }
- else if (ch > *patp) /* line > pattern */
- {
- cmp = GT;
- break;
- }
- }
-
- match += (cmp == EQ); /* note a match */
-
- #ifdef DEBUG
- printf ("%8ld %c %5d\n", pos,
- ((cmp == LT) ? '<' : ((cmp == EQ) ? '=' : '>')), match);
- #endif
-
- /*
- * MOVE MIN OR MAX according to the results of comparison:
- *
- * If pattern[0] == chNull, cmp == EQ, which is "safe".
- */
-
- if (cmp == LT) /* min => next line */
- {
- min = pos + (patp - pattern);
-
- while ((ch != '\n') && (min < max)) /* find end */
- {
- ch = getc (filep);
- min++;
- }
- min++; /* skip newline */
- }
- else /* max => this line */
- {
- max = pos;
- }
-
- } /* while */
-
- /*
- * FINISH UP:
- *
- * Note that min could be one too large if the last line is unterminated
- * and less than pattern, so we use max instead.
- */
-
- if (match || tellnext) /* success */
- {
- pos = max;
- SETPOS;
- RETURN (pos);
- }
- else /* failure */
- {
- RETURN (-1L);
- }
-
- } /* bsearchstr */
-