home *** CD-ROM | disk | FTP | other *** search
- static char * sccsid = "@(#) strings.c (v1.6.9 5/23/89)";
- /*
- * strings
- * =======
- *
- * Find and output the printable strings with a certain minimal length or
- * more in any files.
- *
- * This is a rewrite of 4BSD strings, which had some errors:
- * - ^L is a printable character (4.3BSD, SUN OS)
- * - 0x80 is a printable character (4.3BSD, SUN OS)
- * - on some systems (MX2) it does not get the segment of initialized data
- * correctly
- *
- * A printable string is any string of printable characters. A printable
- * character is any obvious one, plus blank. If flag -t was specified TAB
- * is considered a printable character too. If flag -c was specified
- * on the command line, only strings are output, which are followed
- * by LF or NUL, just like the original BSD manual entry claimed.
- *
- */
-
- # include "strings.h"
-
- # include <ctype.h>
-
- char isp [256];
- char * isp_mid;
- # define IS_PRINTABLE(c) (isp_mid[c])
-
- int fd;
-
- CHAR_TYPE buf [IN_BUF_LEN];
-
- int ind_whole = 0; /* Shall we examine whole file ? */
- int ind_offset = 0; /* Offsets required ? */
- int min_str_len = 4; /* If a string has more characters, it is output */
- int ind_file = 0; /* # of filenames in command */
- int ind_c = 0; /* Only strings ending with NUL or LF ? */
- int ind_tab = 0; /* Is TAB a printable character ? */
- int ind_prefix = 0; /* should the filename be added before string ? */
- int ind_version = 0; /* print version information ? */
-
- LSEEK_TYPE offset;
-
- char * cur_file_name;
-
- extern CHAR_TYPE * out_buf, * level;
- extern int num_out_buf;
- extern int saved;
-
- # define SEARCH 1
- # define DECIDE 2
- # define TRY 3
-
- usage (n)
- int n;
- {
- out ("usage: strings [options] [file ...]\n");
- # ifdef I_SPECIAL
- out ("-a : look in whole file. Default : only look in initialized data\n");
- out ("- : the same as -a\n");
- # endif
- out ("-o : output offset in decimal before each string.\n");
- out ("-N : only output sequences of length >= N; N is a number > 0.\n");
- out ("-c : only output C strings; terminated by \\0 or \\n\n");
- out ("-e : the next word is taken as a filename, even if it starts with '-'.\n");
- out ("-t : TAB is considered a printable character too.\n");
- out ("-p : the name of the input file is output before each string.\n");
- out ("-v : only print version information, examine no files.\n");
- # ifdef I_SPECIAL
- out ("Options can be combined like '-a20ot'.\n");
- # else
- out ("Options can be combined like '-20o'.\n");
- out ("The whole file is scanned.\n");
- # endif
- exit (n);
- }
-
- out_int (n)
- register int n;
- {
- char s [20];
- register int i;
- s [19] = '\0';
- i = 18;
- if (n == 0) {
- s [18] = '0';
- i = 17;
- }
- while (n != 0) {
- s[i--] = '0' + (n % 10);
- n /= 10;
- }
- out (s+i+1);
- }
-
- print_version()
- {
- out (sccsid); out ("\n\n");
- out ("Compilation flags:\n");
- out ("FOUND = \""); out (FOUND); out ("\"\n");
- # ifdef FCNTL
- out ("FCNTL = "); out_int (FCNTL); out ("\n");
- # else
- out ("FCNTL is not defined.\n");
- # endif FCNTL
- # ifdef FAST_COPY
- out ("FAST_COPY is defined.\n");
- # else
- out ("FAST_COPY is not defined.\n");
- # endif FAST_COPY
- # ifdef WHAT_LSEEK
- out ("WHAT_LSEEK = "); out_int (WHAT_LSEEK); out ("\n");
- # else
- out ("WHAT_LSEEK is not defined.\n");
- # endif WHAT_LSEEK
- # ifdef I_SPECIAL
- out ("I_SPECIAL is defined.\n");
- # else
- out ("I_SPECIAL is not defined.\n");
- # endif I_SPECIAL
- out ("IN_BUFLEN = "); out_int (IN_BUF_LEN); out ("\n");
- out ("OUT_BUFLEN = "); out_int (OUT_BUF_LEN); out ("\n");
- out ("THRESHOLD = "); out_int (THRESHOLD); out ("\n");
- }
-
- options (ac, av, f)
- int ac;
- char * av [];
- char ** f;
- {
- register int i, j;
- int take_file = 0; /* is the next word a file ? */
- int had_whole, had_offset, had_min_str_len;
- int had_c, had_tab, had_prefix, had_version;
-
- had_whole = had_offset = had_min_str_len = had_c = had_tab = 0;
- had_prefix = had_version = 0;
- for (i = 1; i < ac; i++) {
- if (take_file == 0 && av[i][0] == '-') {
- if (av[i][1] == '\0') {
- if (had_whole == 1)
- usage (5);
- had_whole = 1;
- ind_whole = 1;
- } else {
- for (j = 1; av[i][j] != '\0'; j++) {
- if ('0' <= av[i][j] && av[i][j] <= '9') {
- if (had_min_str_len == 1)
- usage (3);
- for (min_str_len = 0; '0' <= av[i][j] && av[i][j] <= '9'; j++)
- min_str_len = min_str_len * 10 + av[i][j] - '0';
- j--; /* So we don't lose a character */
- had_min_str_len = 1;
- } else
- switch (av[i][j]) {
- # ifdef I_SPECIAL
- case 'a':
- case '-':
- if (had_whole == 1)
- usage (5);
- had_whole = 1;
- ind_whole = 1;
- break;
- # endif
- case 'o':
- if (had_offset == 1)
- usage (6);
- had_offset = ind_offset = 1;
- break;
- case 'c':
- if (had_c == 1)
- usage (7);
- had_c = ind_c = 1;
- break;
- case 'f':
- take_file = 1;
- break;
- case 't':
- if (had_tab == 1)
- usage (8);
- had_tab = ind_tab = 1;
- break;
- case 'p':
- if (had_prefix == 1)
- usage (9);
- had_prefix = ind_prefix = 1;
- break;
- case 'v':
- if (had_version == 1)
- usage (10);
- had_version = ind_version = 1;
- break;
- default:
- usage (8);
- break;
- }
- }
- }
- } else {
- f [ind_file++] = av[i];
- take_file = 0;
- }
- }
- if (min_str_len <= 0)
- usage (4);
- # ifdef DEBUG
- fprintf (prot, "ind_offset = %3d\n", ind_offset);
- fprintf (prot, "ind_whole = %3d\n", ind_whole);
- fprintf (prot, "ind_file = %3d\n", ind_file);
- fprintf (prot, "ind_c = %3d\n", ind_c);
- fprintf (prot, "min_str_len = %3d\n", min_str_len);
- fprintf (prot, "ind_tab = %3d\n", ind_tab);
- fprintf (prot, "ind_prefix = %3d\n", ind_prefix);
- fprintf (prot, "ind_version = %3d\n", ind_version);
- if (ind_file == 0)
- fprintf (prot, "had no files on command line\n");
- else {
- fprintf (prot, "had %1d files on command line\n", ind_file);
- for (i = 0; i < ind_file; i++)
- fprintf (prot, "%s\n", f[i]);
- }
- # endif
- }
-
- init ()
- {
- register int i;
- int min;
- char c;
-
- min = 0;
- for (i = 0; i < 256; i++) {
- c = i;
- if (c < min)
- min = c;
- }
- isp_mid = isp - min;
- for (i = 0; i < 256; i++) {
- c = i;
- isp_mid [c] = isascii(c) && isprint(c);
- }
- if (ind_tab)
- isp_mid['\t'] = 1;
-
- init_output ();
- }
-
- main (argc, argv)
- int argc;
- char * argv[];
- {
- register int i;
- char ** f;
-
- # ifdef DEBUG
- if ((prot = fopen ("prot", "w")) == NULL) {
- fprintf (stderr, "could not open prot\n");
- exit (1);
- }
- # endif
- f = (char **) malloc ((unsigned)(sizeof (char *) * argc));
- options (argc, argv, f);
- if (ind_version) {
- print_version ();
- exit (0);
- }
- init ();
-
- if (ind_file == 0)
- strings ((char*)NULL);
- else
- for (i = 0; i < ind_file; i++) {
- if (ind_file != 1)
- out_name (f[i]);
- strings (f[i]);
- }
- exit (0);
- }
-
- out_name (b)
- register CHAR_TYPE * b;
- {
- CHAR_TYPE s [45];
- CHAR_TYPE * s2 = (CHAR_TYPE*) " ";
- register int n, i;
-
- for (i = 0; i < 45; i++)
- s [i] = '-';
- n = strlen (s);
- i = strlen (b);
- # ifdef DEBUG
- fprintf (prot, "out_name :: n = %d, i = %d\n", n, i);
- # endif
- if (n*2 + (i+2) > 80)
- n = (80 - (i+2)) / 2;
- # ifdef DEBUG
- fprintf (prot, "out_name :: first string is %d long.\n", n);
- # endif
- (void) append (s, s+n, 0);
-
- (void) append (s2, s2+1, 0);
- (void) append (b, b+i, 0);
- (void) append (s2, s2+1, 0);
-
- if (2*n + (i+2) < 80)
- n++;
- # ifdef DEBUG
- fprintf (prot, "out_name :: second string is %d long.\n", n);
- # endif
- (void) append (s, s+n, 1);
- }
-
- int
- examine (state, n)
- register int state;
- int n;
- /*
- * Find strings of printable characters in buf and append them to
- * the output buffer, if they meet certain conditions.
- *
- * The main part of this routine is a DFA (deterministic finite automaton) with
- * three states.
- * These states are
- * SEARCH : search for a printable character by examining characters in
- * distance min_str_len. If found, set b1 to the start
- * of the sequence and enter state TRY.
- * TRY : We have found a printable character. Set b2 to the first character
- * after the end of the sequence by single stepping. Set state to
- * DECIDE.
- * DECIDE : We have found a sequence of printable characters. If the first
- * character after the sequence is in the buffer, then we can decide
- * what to do with the sequence (even if flag -c was not specified).
- * If not then the stuff is buffered, state set to TRY, and returned
- * to the caller to read a new block of input.
- * It is tested whether the sequence meets the requirements.
- * Either it is output by placing it permanently into the output
- * buffer, or it is forgotten.
- */
- {
- register CHAR_TYPE * b, * b1, * b2, * end;
-
- end = buf + n;
-
- b = b1 = b2 = buf;
- for (;b < end; b = b2+1) {
- # ifdef DEBUG
- fprintf (prot, "state = %s; b at %d\n",
- state == SEARCH ? "SEARCH" : (state == DECIDE ? "DECIDE" : "TRY"),
- (int)(b - buf));
- # endif
- b1 = b;
- switch (state) {
- case SEARCH:
- /*
- * Search a character which might be in a sequence of
- * printable characters. Note that it suffices to examine
- * characters in distance min_str_len.
- */
- for (;b2 < end && !IS_PRINTABLE(*b2); b2 += min_str_len);
- /*
- * If we have stepped outside the buffer, we must examine
- * the end of the buffer yet.
- */
- if (b2 >= end)
- b2 = end;
- b1 = b2-1;
- /*
- * Find the start of the current sequence.
- */
- while (b1 >= buf && IS_PRINTABLE(*b1))
- b1--;
- b1++;
- if (b1 >= end)
- return (SEARCH);
- /* FALL THROUGH */
- case TRY:
- /*
- * Find the end of the current sequence. Set b2 one beyond.
- */
- while (b2 < end && IS_PRINTABLE(*b2))
- b2++;
- # ifdef DEBUG
- fprintf (prot, "found seq between %1d and %1d -->",
- (int)(b1-buf), (int)(b2-buf));
- { CHAR_TYPE * tmp;
- for (tmp = b1; tmp < b2; tmp++)
- if (IS_PRINTABLE(*tmp))
- fputc (*tmp, prot);
- else
- fputc ('.', prot);
- }
- fprintf (prot, "<--\n");
- # endif
- /*
- * Should set state to DECIDE; but we don't need it.
- * state will be reset anyway.
- */
- /* FALL THROUGH */
- case DECIDE:
- /*
- * Can we decide what to do with the sequence which
- * we have found? We cannot, if we are at the end of
- * the block, because we need just one more character.
- */
- if (b2 >= end) {
- # ifdef DEBUG
- fprintf (prot, "I cannot decide. Must read a new block.\n");
- # endif
- (void) append (b1, b2, 0);
- return (TRY);
- }
- # ifdef DEBUG
- fprintf (prot, "I can decide.\n");
- if (ind_c) {
- if (*b2 == '\0' || *b2 == '\n')
- fprintf (prot, "String is a C string; followed by %s\n",
- *b2 == '\0'?"NUL":"\\n");
- }
- # endif
- if (((int)(b2-b1)+saved >= min_str_len) &&
- (!ind_c || (*b2 == '\0' || *b2 == '\n'))) {
- /*
- * String is accepted. Copy it to the output buffer.
- */
- # ifdef DEBUG
- fprintf (prot, "Accept string.\n");
- # endif
- (void) append (b1, b2, 1);
- } else {
- /*
- * String is refused. Forget any temporarily buffered
- * stuff in output buffer.
- */
- # ifdef DEBUG
- fprintf (prot, "String refused.\n");
- # endif
- level = out_buf + num_out_buf;
- saved = 0;
- }
- state = SEARCH;
- } /* switch */
- } /* for (;b < end; ... */
- return (state);
- }
-
- strings (name)
- char * name;
- /*
- * Find strings in a file or an input stream.
- * This routine sets the limits to handle a file, either to the
- * whole file, or to the initialized data only.
- * In a loop it reads blocks from the file and calls the DFA ('examine').
- * Examine returns its state, so that it can be reentered at the
- * right place.
- */
- {
- register int n, state;
- LSEEK_TYPE l, first, last;
-
- if (name == NULL) {
- fd = 0;
- } else
- if ((fd = open (name, O_RDONLY, 0)) == -1) {
- perror (name);
- return;
- }
- if (name == NULL || ind_whole == 1) {
- first = (LSEEK_TYPE)0;
- last = (LSEEK_TYPE)(-1); /* --> no limit */
- } else {
- # ifdef I_SPECIAL
- /*
- * Get the limits for reading.
- * If the file is not an object, then we look at whole file.
- */
- get_limits (fd, &first, &last);
- # ifdef DEBUG
- fprintf (prot, "lseek to %ld; last = %ld\n", (long)first, (long)last);
- # endif
- if (lseek (fd, first, 0) != first) {
- perror ("lseek");
- return;
- }
- # else I_SPECIAL
- first = (LSEEK_TYPE)0;
- last = (LSEEK_TYPE)(-1); /* --> no limit */
- # endif I_SPECIAL
- }
-
- cur_file_name = name;
- offset = first;
- state = SEARCH;
- for (;;) {
- /*
- * Do we really have to read a block ?
- * How much should we read? The difficult thing here
- * is to watch out not to read beyond the limits of
- * initialized data.
- */
- if (last != (LSEEK_TYPE)(-1)) {
- l = last - offset;
- if (l <= 0)
- break;
- if (l > IN_BUF_LEN)
- l = IN_BUF_LEN;
- } else
- l = IN_BUF_LEN;
- # ifdef DEBUG
- fprintf (prot, "reading %1ld chars\n", l);
- # endif
- if ((n = read (fd, buf, (int)l)) <= 0)
- break;
- # ifdef DEBUG
- fprintf (prot, "read %1d characters\n", n);
- fflush (prot);
- # endif
-
- state = examine (state, n);
-
- offset += n;
- }
- if (n == -1)
- perror ("read");
- /*
- * If the piece of the file ended with a string of printable characters,
- * we must check whether this string is valid.
- * We need not peek at the first character after the strings, as we know
- * that it cannot be \0 or \n.
- */
- if (saved > 0 && !ind_c)
- if (saved >= min_str_len)
- (void) append (buf, buf, 1);
- /*
- * We must flush the output buffer.
- */
- flush_output ();
- if (name != 0)
- (void)close (fd);
- }
-
- # ifndef FAST_COPY
- char *
- FAST_COPY (from, to, count)
- register char * from, * to;
- register int count;
- {
- register char * tmp;
-
- tmp = to;
- while (count--)
- *to++ = *from++;
- return (tmp);
- }
- # endif FAST_COPY
-