diff/diff.c 644 473 0 40471 4704135041 6073 /* GNU DIFF main routine. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ /* GNU DIFF was written by Mike Haertel, David Hayes, Richard Stallman and Len Tower. */ #define GDIFF_MAIN #include "regex.h" #include "diff.h" #include "getopt.h" /* Nonzero for -r: if comparing two directories, compare their common subdirectories recursively. */ int recursive; /* For debugging: don't do discard_confusing_lines. */ int no_discards; /* Return a string containing the command options with which diff was invoked. Spaces appear between what were separate ARGV-elements. There is a space at the beginning but none at the end. If there were no options, the result is an empty string. Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, the length of that vector. */ static char * option_list (optionvec, count) char **optionvec; /* Was `vector', but that collides on Alliant. */ int count; { int i; int length = 0; char *result; for (i = 0; i < count; i++) length += strlen (optionvec[i]) + 1; result = (char *) xmalloc (length + 1); result[0] = 0; for (i = 0; i < count; i++) { strcat (result, " "); strcat (result, optionvec[i]); } return result; } static struct option longopts[] = { {"ignore-blank-lines", 0, 0, 'B'}, {"context", 2, 0, 129}, {"ifdef", 1, 0, 'D'}, {"show-function-line", 1, 0, 'F'}, {"speed-large-files", 0, 0, 'H'}, {"ignore-matching-lines", 1, 0, 'I'}, {"entire-new-file", 0, 0, 'N'}, {"starting-file", 1, 0, 'S'}, {"initial-tab", 0, 0, 'T'}, {"text", 0, 0, 'a'}, {"all-text", 0, 0, 'a'}, {"ascii", 0, 0, 'a'}, {"ignore-space-change", 0, 0, 'b'}, {"minimal", 0, 0, 'd'}, {"ed", 0, 0, 'e'}, {"reversed-ed", 0, 0, 'f'}, {"ignore-case", 0, 0, 'i'}, {"print", 0, 0, 'l'}, {"rcs", 0, 0, 'n'}, {"show-c-function", 0, 0, 'p'}, {"binary", 0, 0, 'q'}, {"brief", 0, 0, 'q'}, {"recursive", 0, 0, 'r'}, {"report-identical-files", 0, 0, 's'}, {"expand-tabs", 0, 0, 't'}, {"ignore-all-space", 0, 0, 'w'}, {"version", 0, 0, 'v'}, {0, 0, 0, 0} }; main (argc, argv) int argc; char *argv[]; { int val; int c; int prev = -1; int longind; extern char *version_string; program = argv[0]; /* Do our initializations. */ output_style = OUTPUT_NORMAL; always_text_flag = FALSE; ignore_space_change_flag = FALSE; ignore_all_space_flag = FALSE; length_varies = FALSE; ignore_case_flag = FALSE; ignore_blank_lines_flag = FALSE; ignore_regexp = 0; function_regexp = 0; print_file_same_flag = FALSE; entire_new_file_flag = FALSE; no_details_flag = FALSE; context = -1; line_end_char = '\n'; tab_align_flag = FALSE; tab_expand_flag = FALSE; recursive = FALSE; paginate_flag = FALSE; ifdef_string = NULL; heuristic = FALSE; dir_start_file = NULL; msg_chain = NULL; msg_chain_end = NULL; no_discards = 0; /* Decode the options. */ while ((c = getopt_long (argc, argv, "0123456789abBcC:dD:efF:hHiI:lnNpqrsS:tTvw", longopts, &longind)) != EOF) { if (c == 0) /* Long option. */ c = longopts[longind].val; switch (c) { /* All digits combine in decimal to specify the context-size. */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': if (context == -1) context = 0; /* If a context length has already been specified, more digits allowed only if they follow right after the others. Reject two separate runs of digits, or digits after -C. */ else if (prev < '0' || prev > '9') fatal ("context length specified twice"); context = context * 10 + c - '0'; break; case 'a': /* Treat all files as text files; never treat as binary. */ always_text_flag = 1; break; case 'b': /* Ignore changes in amount of whitespace. */ ignore_space_change_flag = 1; length_varies = 1; break; case 'B': /* Ignore changes affecting only blank lines. */ ignore_blank_lines_flag = 1; break; case 'C': case 129: /* +context[=lines] */ if (optarg) { if (context >= 0) fatal ("context length specified twice"); { char *p; for (p = optarg; *p; p++) if (*p < '0' || *p > '9') fatal ("invalid context length argument"); } context = atoi (optarg); } /* Falls through. */ case 'c': /* Make context-style output. */ specify_style (OUTPUT_CONTEXT); break; case 'd': /* Don't discard lines. This makes things slower (sometimes much slower) but will find a guaranteed minimal set of changes. */ no_discards = 1; break; case 'D': /* Make merged #ifdef output. */ specify_style (OUTPUT_IFDEF); ifdef_string = optarg; break; case 'e': /* Make output that is a valid `ed' script. */ specify_style (OUTPUT_ED); break; case 'f': /* Make output that looks vaguely like an `ed' script but has changes in the order they appear in the file. */ specify_style (OUTPUT_FORWARD_ED); break; case 'F': /* Show, for each set of changes, the previous line that matches the specified regexp. Currently affects only context-style output. */ function_regexp = optarg; break; case 'h': /* Split the files into chunks of around 1500 lines for faster processing. Usually does not change the result. This currently has no effect. */ break; case 'H': /* Turn on heuristics that speed processing of large files with a small density of changes. */ heuristic = 1; break; case 'i': /* Ignore changes in case. */ ignore_case_flag = 1; break; case 'I': /* Ignore changes affecting only lines that match the specified regexp. */ ignore_regexp = optarg; break; case 'l': /* Pass the output through `pr' to paginate it. */ paginate_flag = 1; break; case 'n': /* Output RCS-style diffs, like `-f' except that each command specifies the number of lines affected. */ specify_style (OUTPUT_RCS); break; case 'N': /* When comparing directories, if a file appears only in one directory, treat it as present but empty in the other. */ entire_new_file_flag = 1; break; case 'p': /* Make context-style output and show name of last C function. */ specify_style (OUTPUT_CONTEXT); function_regexp = "^[_a-zA-Z]"; break; case 'q': no_details_flag = 1; break; case 'r': /* When comparing directories, recursively compare any subdirectories found. */ recursive = 1; break; case 's': /* Print a message if the files are the same. */ print_file_same_flag = 1; break; case 'S': /* When comparing directories, start with the specified file name. This is used for resuming an aborted comparison. */ dir_start_file = optarg; break; case 't': /* Expand tabs to spaces in the output so that it preserves the alignment of the input files. */ tab_expand_flag = 1; break; case 'T': /* Use a tab in the output, rather than a space, before the text of an input line, so as to keep the proper alignment in the input line without changing the characters in it. */ tab_align_flag = 1; break; case 'v': printf ("GNU diff version %s\n", version_string); break; case 'w': /* Ignore horizontal whitespace when comparing lines. */ ignore_all_space_flag = 1; length_varies = 1; break; default: usage (); } prev = c; } if (optind != argc - 2) usage (); if (ignore_regexp) { char *val; bzero (&ignore_regexp_compiled, sizeof ignore_regexp_compiled); val = re_compile_pattern (ignore_regexp, strlen (ignore_regexp), &ignore_regexp_compiled); if (val != 0) error ("regexp to ignore: ", val); ignore_regexp_compiled.fastmap = (char *) xmalloc (256); } if (function_regexp) { char *val; bzero (&function_regexp_compiled, sizeof function_regexp_compiled); val = re_compile_pattern (function_regexp, strlen (function_regexp), &function_regexp_compiled); if (val != 0) error ("regexp for previous line: ", val); function_regexp_compiled.fastmap = (char *) xmalloc (256); } if (output_style != OUTPUT_CONTEXT) context = 0; else if (context == -1) /* Default amount of context for -c. */ context = 3; switch_string = option_list (argv + 1, optind - 1); val = compare_files (0, argv[optind], 0, argv[optind + 1], 0); /* Print any messages that were saved up for last. */ print_message_queue (); exit (val); } usage () { fprintf (stderr, "\ Usage: diff [-#] [-abBcdefhHilnNprstTvw] [-C lines] [-F regexp] [-I regexp]\n\ [-S file] [-D symbol] [+ignore-blank-lines] [+context[=lines]]\n\ [+ifdef symbol] [+show-function-line regexp] [+speed-large-files]\n"); fprintf (stderr, "\ [+ignore-matching-lines regexp] [+entire-new-file] [+initial-tab]\n\ [+starting-file file] [+text] [+all-text] [+ascii] [+minimal]\n\ [+ignore-space-change] [+ed] [+reversed-ed] [+ignore-case] [+print]\n"); fprintf (stderr, "\ [+rcs] [+show-c-function] [+binary] [+brief] [+recursive]\n\ [+report-identical-files] [+expand-tabs] [+ignore-all-space]\n\ [+version] path1 path2\n"); exit (1); } specify_style (style) enum output_style style; { if (output_style != OUTPUT_NORMAL && output_style != style) error ("conflicting specifications of output style"); output_style = style; } /* Compare two files (or dirs) with specified names DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion. (if DIR0 is 0, then the name is just NAME0, etc.) This is self-contained; it opens the files and closes them. Value is 0 if files are identical, 1 if different, 2 if there is a problem opening them. */ int compare_files (dir0, name0, dir1, name1, depth) char *dir0, *dir1; char *name0, *name1; int depth; { struct file_data inf[2]; register int i; int val; int errorcount = 0; int stat_result[2]; /* If this is directory comparison, perhaps we have a file that exists only in one of the directories. If so, just print a message to that effect. */ if (! entire_new_file_flag && (name0 == 0 || name1 == 0)) { char *name = name0 == 0 ? name1 : name0; char *dir = name0 == 0 ? dir1 : dir0; message ("Only in %s: %s\n", dir, name); /* Return 1 so that diff_dirs will return 1 ("some files differ"). */ return 1; } /* Mark any nonexistent file with -1 in the desc field. */ inf[0].desc = name0 == 0 ? -1 : 0; inf[1].desc = name1 == 0 ? -1 : 0; /* Now record the full name of each file, including nonexistent ones. */ if (name0 == 0) name0 = name1; if (name1 == 0) name1 = name0; inf[0].name = dir0 == 0 ? name0 : concat (dir0, "/", name0); inf[1].name = dir1 == 0 ? name1 : concat (dir1, "/", name1); /* Stat the files. Record whether they are directories. Record in stat_result whether stat fails. */ for (i = 0; i <= 1; i++) { bzero (&inf[i].stat, sizeof(struct stat)); inf[i].dir_p = 0; stat_result[i] = 0; if (inf[i].desc != -1 && strcmp (inf[i].name, "-")) { char *filename = inf[i].name; stat_result[i] = stat (filename, &inf[i].stat); if (stat_result[i] < 0) { perror_with_name (filename); errorcount = 1; } else inf[i].dir_p = (S_IFDIR == (inf[i].stat.st_mode & S_IFMT)); } } /* See if the two named files are actually the same physical file. If so, we know they are identical without actually reading them. */ if (inf[0].stat.st_ino == inf[1].stat.st_ino && inf[0].stat.st_dev == inf[1].stat.st_dev && stat_result[0] == 0 && stat_result[1] == 0) { val = 0; goto done; } if (name0 == 0) inf[0].dir_p = inf[1].dir_p; if (name1 == 0) inf[1].dir_p = inf[0].dir_p; /* Open the files and record their descriptors. */ for (i = 0; i <= 1; i++) { if (inf[i].desc == -1) ; else if (!strcmp (inf[i].name, "-")) { inf[i].desc = 0; inf[i].name = "Standard Input"; } /* Don't bother opening if stat already failed. */ else if (stat_result[i] == 0 && ! inf[i].dir_p) { char *filename = inf[i].name; inf[i].desc = open (filename, O_RDONLY, 0); if (0 > inf[i].desc) { perror_with_name (filename); errorcount = 1; } } } if (errorcount) { /* If either file should exist but fails to be opened, return 2. */ val = 2; } else if (inf[0].dir_p && inf[1].dir_p) { if (output_style == OUTPUT_IFDEF) fatal ("-D option not supported with directories"); /* If both are directories, compare the files in them. */ if (depth > 0 && !recursive) { /* But don't compare dir contents one level down unless -r was specified. */ message ("Common subdirectories: %s and %s\n", inf[0].name, inf[1].name); val = 0; } else { val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth, 0, 0); } } else if (depth == 0 && (inf[0].dir_p || inf[1].dir_p)) { /* If only one is a directory, and it was specified in the command line, use the file in that dir whose basename matches the other file. */ int dir_arg = (inf[0].dir_p ? 0 : 1); int fnm_arg = (inf[0].dir_p ? 1 : 0); char *p = rindex (inf[fnm_arg].name, '/'); char *filename = concat (inf[dir_arg].name, "/", (p ? p+1 : inf[fnm_arg].name)); inf[dir_arg].desc = open (filename, O_RDONLY, 0); if (0 > inf[dir_arg].desc) { perror_with_name (filename); val = 2; } else { /* JF: patch from the net to check and make sure we can really free this. If it's from argv[], freeing it is a *really* bad idea */ if (0 != (dir_arg ? dir1 : dir0)) free (inf[dir_arg].name); inf[dir_arg].name = filename; if (fstat (inf[dir_arg].desc, &inf[dir_arg].stat) < 0) pfatal_with_name (inf[dir_arg].name); inf[dir_arg].dir_p = (S_IFDIR == (inf[dir_arg].stat.st_mode & S_IFMT)); if (inf[dir_arg].dir_p) { error ("%s is a directory but %s is not", inf[dir_arg].name, inf[fnm_arg].name); val = 1; } else val = diff_2_files (inf, depth); } } else if (depth > 0 && (inf[0].dir_p || inf[1].dir_p)) { /* Perhaps we have a subdirectory that exists only in one directory. If so, just print a message to that effect. */ if (inf[0].desc == -1 || inf[1].desc == -1) { if (entire_new_file_flag && recursive) val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth, inf[0].desc == -1, inf[1].desc == -1); else { char *dir = (inf[0].desc == -1) ? dir1 : dir0; message ("Only in %s: %s\n", dir, name0); val = 1; } } else { /* We have a subdirectory in one directory and a file in the other. */ if (inf[0].dir_p) message ("%s is a directory but %s is not\n", inf[0].name, inf[1].name); else message ("%s is a directory but %s is not\n", inf[1].name, inf[0].name); /* This is a difference. */ val = 1; } } else { /* Both exist and both are ordinary files. */ val = diff_2_files (inf, depth); } /* Now the comparison has been done, if no error prevented it, and VAL is the value this function will return. */ if (inf[0].desc > 0) close (inf[0].desc); if (inf[1].desc > 0) close (inf[1].desc); done: if (val == 0 && !inf[0].dir_p) { if (print_file_same_flag) message ("Files %s and %s are identical\n", inf[0].name, inf[1].name); } else fflush (stdout); if (dir0 != 0) free (inf[0].name); if (dir1 != 0) free (inf[1].name); return val; } } /* Now the comparison has been done, if no error prevented it, and VAL is the value this function will return. */ if (inf[0].desc > 0) close (inf[0].desc); if (inf[1].desc > 0) diff/analyze.c 644 473 0 57046 4704135041 6634 /* Analyze file differences for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ /* The basic algorithm is described in: "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, p 251. */ #include "diff.h" struct change *find_change (); extern int no_discards; static int *xvec, *yvec; /* Vectors being compared. */ static int *fdiag; /* Vector, indexed by diagonal, containing the X coordinate of the point furthest along the given diagonal in the forward search of the edit matrix. */ static int *bdiag; /* Vector, indexed by diagonal, containing the X coordinate of the point furthest along the given diagonal in the backward search of the edit matrix. */ /* Find the midpoint of the shortest edit script for a specified portion of the two files. We scan from the beginnings of the files, and simultaneously from the ends, doing a breadth-first search through the space of edit-sequence. When the two searches meet, we have found the midpoint of the shortest edit sequence. The value returned is the number of the diagonal on which the midpoint lies. The diagonal number equals the number of inserted lines minus the number of deleted lines (counting only lines before the midpoint). The edit cost is stored into *COST; this is the total number of lines inserted or deleted (counting only lines before the midpoint). This function assumes that the first lines of the specified portions of the two files do not match, and likewise that the last lines do not match. The caller must trim matching lines from the beginning and end of the portions it is going to specify. Note that if we return the "wrong" diagonal value, or if the value of bdiag at that diagonal is "wrong", the worst this can do is cause suboptimal diff output. It cannot cause incorrect diff output. */ static int diag (xoff, xlim, yoff, ylim, cost) int xoff, xlim, yoff, ylim; int *cost; { int *const fd = fdiag; /* Give the compiler a chance. */ int *const bd = bdiag; /* Additional help for the compiler. */ int *const xv = xvec; /* Still more help for the comiler. */ int *const yv = yvec; /* And more and more . . . */ const int dmin = xoff - ylim; /* Minimum valid diagonal. */ const int dmax = xlim - yoff; /* Maximum valid diagonal. */ const int fmid = xoff - yoff; /* Center diagonal of top-down search. */ const int bmid = xlim - ylim; /* Center diagonal of bottom-up search. */ int fmin = fmid, fmax = fmid; /* Limits of top-down search. */ int bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */ int c; /* Cost. */ int odd = fmid - bmid & 1; /* True if southeast corner is on an odd diagonal with respect to the northwest. */ fd[fmid] = xoff; bd[bmid] = xlim; for (c = 1;; ++c) { int d; /* Active diagonal. */ int big_snake = 0; /* Extend the top-down search by an edit step in each diagonal. */ fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin; fmax < dmax ? fd[++fmax + 1] = -1 : --fmax; for (d = fmax; d >= fmin; d -= 2) { int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1]; if (tlo >= thi) x = tlo + 1; else x = thi; oldx = x; y = x - d; while (x < xlim && y < ylim && xv[x] == yv[y]) ++x, ++y; if (x - oldx > 20) big_snake = 1; fd[d] = x; if (odd && bmin <= d && d <= bmax && bd[d] <= fd[d]) { *cost = 2 * c - 1; return d; } } /* Similar extend the bottom-up search. */ bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin; bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax; for (d = bmax; d >= bmin; d -= 2) { int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1]; if (tlo < thi) x = tlo; else x = thi - 1; oldx = x; y = x - d; while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) --x, --y; if (oldx - x > 20) big_snake = 1; bd[d] = x; if (!odd && fmin <= d && d <= fmax && bd[d] <= fd[d]) { *cost = 2 * c; return d; } } /* Heuristic: check occasionally for a diagonal that has made lots of progress compared with the edit distance. If we have any such, find the one that has made the most progress and return it as if it had succeeded. With this heuristic, for files with a constant small density of changes, the algorithm is linear in the file size. */ if (c > 200 && big_snake && heuristic) { int best; int bestpos; best = 0; for (d = fmax; d >= fmin; d -= 2) { int dd = d - fmid; if ((fd[d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd))) { if (fd[d] * 2 - dd > best && fd[d] - xoff > 20 && fd[d] - d - yoff > 20) { int k; int x = fd[d]; /* We have a good enough best diagonal; now insist that it end with a significant snake. */ for (k = 1; k <= 20; k++) if (xvec[x - k] != yvec[x - d - k]) break; if (k == 21) { best = fd[d] * 2 - dd; bestpos = d; } } } } if (best > 0) { *cost = 2 * c - 1; return bestpos; } best = 0; for (d = bmax; d >= bmin; d -= 2) { int dd = d - bmid; if ((xlim - bd[d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd))) { if ((xlim - bd[d]) * 2 + dd > best && xlim - bd[d] > 20 && ylim - (bd[d] - d) > 20) { /* We have a good enough best diagonal; now insist that it end with a significant snake. */ int k; int x = bd[d]; for (k = 0; k < 20; k++) if (xvec[x + k] != yvec[x - d + k]) break; if (k == 20) { best = (xlim - bd[d]) * 2 + dd; bestpos = d; } } } } if (best > 0) { *cost = 2 * c - 1; return bestpos; } } } } /* Compare in detail contiguous subsequences of the two files which are known, as a whole, to match each other. The results are recorded in the vectors files[N].changed_flag, by storing a 1 in the element for each line that is an insertion or deletion. The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and discarded lines are not counted. */ static void compareseq (xoff, xlim, yoff, ylim) int xoff, xlim, yoff, ylim; { /* Slide down the bottom initial diagonal. */ while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) ++xoff, ++yoff; /* Slide up the top initial diagonal. */ while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) --xlim, --ylim; /* Handle simple cases. */ if (xoff == xlim) while (yoff < ylim) files[1].changed_flag[files[1].realindexes[yoff++]] = 1; else if (yoff == ylim) while (xoff < xlim) files[0].changed_flag[files[0].realindexes[xoff++]] = 1; else { int c, d, f, b; /* Find a point of correspondence in the middle of the files. */ d = diag (xoff, xlim, yoff, ylim, &c); f = fdiag[d]; b = bdiag[d]; if (c == 1) { /* This should be impossible, because it implies that one of the two subsequences is empty, and that case was handled above without calling `diag'. Let's verify that this is true. */ abort (); #if 0 /* The two subsequences differ by a single insert or delete; record it and we are done. */ if (d < xoff - yoff) files[1].changed_flag[files[1].realindexes[b - d - 1]] = 1; else files[0].changed_flag[files[0].realindexes[b]] = 1; #endif } else { /* Use that point to split this problem into two subproblems. */ compareseq (xoff, b, yoff, b - d); /* This used to use f instead of b, but that is incorrect! It is not necessarily the case that diagonal d has a snake from b to f. */ compareseq (b, xlim, b - d, ylim); } } } /* Discard lines from one file that have no matches in the other file. A line which is discarded will not be considered by the actual comparison algorithm; it will be as if that line were not in the file. The file's `realindexes' table maps virtual line numbers (which don't count the discarded lines) into real line numbers; this is how the actual comparison algorithm produces results that are comprehensible when the discarded lines are counted. When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output. */ void discard_confusing_lines (filevec) struct file_data filevec[]; { int f, i; char *discarded[2]; int *equiv_count[2]; /* Allocate our results. */ for (f = 0; f < 2; f++) { filevec[f].undiscarded = (int *) xmalloc (filevec[f].buffered_lines * sizeof (int)); filevec[f].realindexes = (int *) xmalloc (filevec[f].buffered_lines * sizeof (int)); } /* Set up equiv_count[F][I] as the number of lines in file F that fall in equivalence class I. */ equiv_count[0] = (int *) xmalloc (filevec[0].equiv_max * sizeof (int)); bzero (equiv_count[0], filevec[0].equiv_max * sizeof (int)); equiv_count[1] = (int *) xmalloc (filevec[1].equiv_max * sizeof (int)); bzero (equiv_count[1], filevec[1].equiv_max * sizeof (int)); for (i = 0; i < filevec[0].buffered_lines; ++i) ++equiv_count[0][filevec[0].equivs[i]]; for (i = 0; i < filevec[1].buffered_lines; ++i) ++equiv_count[1][filevec[1].equivs[i]]; /* Set up tables of which lines are going to be discarded. */ discarded[0] = (char *) xmalloc (filevec[0].buffered_lines); discarded[1] = (char *) xmalloc (filevec[1].buffered_lines); bzero (discarded[0], filevec[0].buffered_lines); bzero (discarded[1], filevec[1].buffered_lines); /* Mark to be discarded each line that matches no line of the other file. If a line matches many lines, mark it as provisionally discardable. */ for (f = 0; f < 2; f++) { int end = filevec[f].buffered_lines; char *discards = discarded[f]; int *counts = equiv_count[1 - f]; int *equivs = filevec[f].equivs; for (i = 0; i < end; i++) { int nmatch = counts[equivs[i]]; if (equivs[i] == 0) continue; if (nmatch == 0) discards[i] = 1; else if (nmatch > 5) discards[i] = 2; } } /* Don't really discard the provisional lines if there are less than ten discardable lines in a row. */ for (f = 0; f < 2; f++) { int end = filevec[f].buffered_lines; char *discards = discarded[f]; for (i = 0; i < end; i++) if (discards[i]) { register int j; int length; int provisional = 0; /* Cancel provisional discards at the beginning. */ while (i < end && discards[i] == 2) discards[i] = 0; /* Find end of this run of discardable lines. Count how many are provisionally discardable. */ for (j = i; j < end; j++) { if (discards[j] == 0) break; if (discards[j] == 2) ++provisional; } /* Cancel provisional discards at the end, and shrink the run. */ while (j > i && discards[j - 1] == 2) discards[j - 1] = 0, --provisional; /* Now we have the length of a run of discardable lines whose first and last are not provisional. */ length = j - i; /* If half the lines in the run are provisional, cancel discarding of all provisional lines in the run. */ if (provisional * 2 > length) { while (j > i) if (discards[--j] == 2) discards[j] = 0; } else { /* Cancel provisional discards that are near either end. */ for (j = 0; j < 5 && j < length; j++) if (discards[i + j] == 2) discards[i + j] = 0; /* Meanwhile, I advances to the last line of the run. */ i += length - 1; length -= 5; for (j = 0; j < 5 && j < length; j++) if (discards[i - j] == 2) discards[i - j] = 0; } } } /* Discard from file 0. */ for (f = 0; f < 2; f++) { char *discards = discarded[f]; int end = filevec[f].buffered_lines; int j = 0; for (i = 0; i < end; ++i) if (no_discards || discards[i] == 0) { filevec[f].undiscarded[j] = filevec[f].equivs[i]; filevec[f].realindexes[j++] = i; } else filevec[f].changed_flag[i] = 1; filevec[f].nondiscarded_lines = j; } free (discarded[1]); free (discarded[0]); free (equiv_count[1]); free (equiv_count[0]); } /* Adjust inserts/deletes of blank lines to join changes as much as possible. We do something when a run of changed lines include a blank line at one end and have an excluded blank line at the other. We are free to choose which blank line is included. `compareseq' always chooses the one at the beginning, but usually it is cleaner to consider the following blank line to be the "change". The only exception is if the preceding blank line would join this change to other changes. */ int inhibit; static void shift_boundaries (filevec) struct file_data filevec[]; { int f; if (inhibit) return; for (f = 0; f < 2; f++) { char *changed = filevec[f].changed_flag; char *other_changed = filevec[1-f].changed_flag; int i = 0; int j = 0; int i_end = filevec[f].buffered_lines; int preceding = -1; int other_preceding = -1; while (1) { int start, end, other_start; /* Scan forwards to find beginning of another run of changes. Also keep track of the corresponding point in the other file. */ while (i < i_end && changed[i] == 0) { while (other_changed[j++]) /* Non-corresponding lines in the other file will count as the preceding batch of changes. */ other_preceding = j; i++; } if (i == i_end) break; start = i; other_start = j; while (1) { /* Now find the end of this run of changes. */ while (i < i_end && changed[i] != 0) i++; end = i; /* If the first changed line matches the following unchanged one, and this run does not follow right after a previous run, and there are no lines deleted from the other file here, then classify the first changed line as unchanged and the following line as changed in its place. */ /* You might ask, how could this run follow right after another? Only because the previous run was shifted here. */ if (end != i_end && files[f].equivs[start] == files[f].equivs[end] && !other_changed[j] && end != i_end && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) { changed[end++] = 1; changed[start++] = 0; ++i; /* Since one line-that-matches is now before this run instead of after, we must advance in the other file to keep in synch. */ ++j; } else break; } preceding = i; other_preceding = j; } } } /* Cons an additional entry onto the front of an edit script OLD. LINE0 and LINE1 are the first affected lines in the two files (origin 0). DELETED is the number of lines deleted here from file 0. INSERTED is the number of lines inserted here in file 1. If DELETED is 0 then LINE0 is the number of the line before which the insertion was done; vice versa for INSERTED and LINE1. */ static struct change * add_change (line0, line1, deleted, inserted, old) int line0, line1, deleted, inserted; struct change *old; { struct change *new = (struct change *) xmalloc (sizeof (struct change)); new->line0 = line0; new->line1 = line1; new->inserted = inserted; new->deleted = deleted; new->link = old; return new; } /* Scan the tables of which lines are inserted and deleted, producing an edit script in reverse order. */ static struct change * build_reverse_script (filevec) struct file_data filevec[]; { struct change *script = 0; char *changed0 = filevec[0].changed_flag; char *changed1 = filevec[1].changed_flag; int len0 = filevec[0].buffered_lines; int len1 = filevec[1].buffered_lines; /* Note that changedN[len0] does exist, and contains 0. */ int i0 = 0, i1 = 0; while (i0 < len0 || i1 < len1) { if (changed0[i0] || changed1[i1]) { int line0 = i0, line1 = i1; /* Find # lines changed here in each file. */ while (changed0[i0]) ++i0; while (changed1[i1]) ++i1; /* Record this change. */ script = add_change (line0, line1, i0 - line0, i1 - line1, script); } /* We have reached lines in the two files that match each other. */ i0++, i1++; } return script; } /* Scan the tables of which lines are inserted and deleted, producing an edit script in forward order. */ static struct change * build_script (filevec) struct file_data filevec[]; { struct change *script = 0; char *changed0 = filevec[0].changed_flag; char *changed1 = filevec[1].changed_flag; int len0 = filevec[0].buffered_lines; int len1 = filevec[1].buffered_lines; int i0 = len0, i1 = len1; /* Note that changedN[-1] does exist, and contains 0. */ /* In RCS comparisons, making the existence or nonexistence of trailing newlines really matter. */ if (output_style == OUTPUT_RCS && filevec[0].missing_newline != filevec[1].missing_newline) changed0[len0 - 1] = changed1[len1 - 1] = 1; while (i0 >= 0 || i1 >= 0) { if (changed0[i0 - 1] || changed1[i1 - 1]) { int line0 = i0, line1 = i1; /* Find # lines changed here in each file. */ while (changed0[i0 - 1]) --i0; while (changed1[i1 - 1]) --i1; /* Record this change. */ script = add_change (i0, i1, line0 - i0, line1 - i1, script); } /* We have reached lines in the two files that match each other. */ i0--, i1--; } return script; } /* Report the differences of two files. DEPTH is the current directory depth. */ int diff_2_files (filevec, depth) struct file_data filevec[]; int depth; { int diags; int i; struct change *e, *p; struct change *script; int binary; int changes; /* See if the two named files are actually the same physical file. If so, we know they are identical without actually reading them. */ if (filevec[0].stat.st_ino == filevec[1].stat.st_ino && filevec[0].stat.st_dev == filevec[1].stat.st_dev) return 0; binary = read_files (filevec); /* If we have detected that file 0 is a binary file, compare the two files as binary. This can happen only when the first chunk is read. Also, -q means treat all files as binary. */ if (binary || no_details_flag) { int differs = (filevec[0].buffered_chars != filevec[1].buffered_chars || bcmp (filevec[0].buffer, filevec[1].buffer, filevec[1].buffered_chars)); if (differs) message (binary ? "Binary files %s and %s differ\n" : "Files %s and %s differ\n", filevec[0].name, filevec[1].name); for (i = 0; i < 2; ++i) if (filevec[i].buffer) free (filevec[i].buffer); return differs; } if (filevec[0].missing_newline != filevec[1].missing_newline && output_style != OUTPUT_RCS) { if (filevec[0].missing_newline) message ("No newline at end of file %s\n", filevec[0].name, ""); if (filevec[1].missing_newline) message ("No newline at end of file %s\n", filevec[1].name, ""); } /* Allocate vectors for the results of comparison: a flag for each line of each file, saying whether that line is an insertion or deletion. Allocate an extra element, always zero, at each end of each vector. */ filevec[0].changed_flag = (char *) xmalloc (filevec[0].buffered_lines + 2); filevec[1].changed_flag = (char *) xmalloc (filevec[1].buffered_lines + 2); bzero (filevec[0].changed_flag, filevec[0].buffered_lines + 2); bzero (filevec[1].changed_flag, filevec[1].buffered_lines + 2); filevec[0].changed_flag++; filevec[1].changed_flag++; /* Some lines are obviously insertions or deletions because they don't match anything. Detect them now, and avoid even thinking about them in the main comparison algorithm. */ discard_confusing_lines (filevec); /* Now do the main comparison algorithm, considering just the undiscarded lines. */ xvec = filevec[0].undiscarded; yvec = filevec[1].undiscarded; diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; fdiag = (int *) xmalloc (diags * sizeof (int)); fdiag += filevec[1].nondiscarded_lines + 1; bdiag = (int *) xmalloc (diags * sizeof (int)); bdiag += filevec[1].nondiscarded_lines + 1; files[0] = filevec[0]; files[1] = filevec[1]; compareseq (0, filevec[0].nondiscarded_lines, 0, filevec[1].nondiscarded_lines); bdiag -= filevec[1].nondiscarded_lines + 1; free (bdiag); fdiag -= filevec[1].nondiscarded_lines + 1; free (fdiag); /* Modify the results slightly to make them prettier in cases where that can validly be done. */ shift_boundaries (filevec); /* Get the results of comparison in the form of a chain of `struct change's -- an edit script. */ if (output_style == OUTPUT_ED) script = build_reverse_script (filevec); else script = build_script (filevec); if (script) { setup_output (files[0].name, files[1].name, depth); if (output_style == OUTPUT_CONTEXT) print_context_header (files); switch (output_style) { case OUTPUT_CONTEXT: print_context_script (script); break; case OUTPUT_ED: print_ed_script (script); break; case OUTPUT_FORWARD_ED: pr_forward_ed_script (script); break; case OUTPUT_RCS: print_rcs_script (script); break; case OUTPUT_NORMAL: print_normal_script (script); break; case OUTPUT_IFDEF: print_ifdef_script (script); break; } finish_output (); } /* Set CHANGES if we had any diffs that were printed. If some changes are being ignored, we must scan the script to decide. */ if (ignore_blank_lines_flag || ignore_regexp) { struct change *next = script; changes = 0; while (next && changes == 0) { struct change *this, *end; int first0, last0, first1, last1, deletes, inserts; /* Find a set of changes that belong together. */ this = next; end = find_change (next); /* Disconnect them from the rest of the changes, making them a hunk, and remember the rest for next iteration. */ next = end->link; end->link = NULL; /* Determine whether this hunk was printed. */ analyze_hunk (this, &first0, &last0, &first1, &last1, &deletes, &inserts); /* Reconnect the script so it will all be freed properly. */ end->link = next; if (deletes || inserts) changes = 1; } } else changes = (script != 0); for (i = 1; i >= 0; --i) { free (filevec[i].realindexes); free (filevec[i].undiscarded); } for (i = 1; i >= 0; --i) free (--filevec[i].changed_flag); for (i = 1; i >= 0; --i) free (filevec[i].equivs); for (i = 0; i < 2; ++i) { if (filevec[i].buffer != 0) free (filevec[i].buffer); free (filevec[i].linbuf); } for (e = script; e; e = p) { p = e->link; free (e); } return changes; } } } else changes = (script != 0); for (i = 1; i >= 0; --i) { free (filevec[i].realindexes); free (filevec[i].undiscarded); } for (i = 1; i >= 0; --i) free (--filevec[i].changed_flag); for (i = 1; i >= 0; --i) free (filevec[i].equivs); for (i = 0; i < 2; ++i) { if (filevec[i].buffer != 0) free (filevec[i].buffer); free (filevec[i].linbuf); } for (e = script; e; e = p) { p = e->link; diff/io.c 644 473 0 40771 4704135042 5576 /* File I/O for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" /* Rotate a value n bits to the left. */ #define UINT_BIT (sizeof (unsigned) * CHAR_BIT) #define ROL(v, n) ((v) << (n) | (v) >> UINT_BIT - (n)) /* Given a hash value and a new character, return a new hash value. */ #define HASH(h, c) ((c) + ROL (h, 7)) /* Current file under consideration. */ struct file_data *current; /* Check for binary files and compare them for exact identity. */ /* Return 1 if BUF contains a 0 or a character with the 0200 bit set. SIZE is the number of characters in BUF. */ static int binary_file_p (buf, size) char *buf; int size; { while (--size >= 0) if (*buf == 0 || (*buf++ & 0200) != 0) return 1; return 0; } int binary_file_threshold = 512; /* Slurp the current file completely into core. Return nonzero if it appears to be a binary file. */ static int slurp () { /* If we have a nonexistent file at this stage, treat it as empty. */ if (current->desc < 0) { current->bufsize = 0; current->buffered_chars = 0; current->buffer = 0; } /* If it's a regular file, we can just get the size out of the stat block and slurp it in all at once. */ /* In all cases, we leave room in the buffer for 2 extra chars beyond those that current->bufsize describes: one for a newline (in case the text does not end with one) and one for a sentinel in find_identical_ends. */ else if ((current->stat.st_mode & S_IFMT) == S_IFREG) { current->bufsize = current->stat.st_size; current->buffer = (char *) xmalloc (current->bufsize + 2); current->buffered_chars = read (current->desc, current->buffer, current->bufsize); if (current->buffered_chars < 0) pfatal_with_name (current->name); } else { int cc; current->bufsize = 4096; current->buffer = (char *) xmalloc (current->bufsize + 2); current->buffered_chars = 0; /* Not a regular file; read it in a little at a time, growing the buffer as necessary. */ while ((cc = read (current->desc, current->buffer + current->buffered_chars, current->bufsize - current->buffered_chars)) > 0) { current->buffered_chars += cc; if (current->buffered_chars == current->bufsize) { current->bufsize = current->bufsize * 2; current->buffer = (char *) xrealloc (current->buffer, current->bufsize + 2); } } if (cc < 0) pfatal_with_name (current->name); } /* Check first part of file to see if it's a binary file. */ if (! always_text_flag && binary_file_p (current->buffer, min (current->buffered_chars, binary_file_threshold))) return 1; /* If not binary, make sure text ends in a newline, but remember that we had to add one. */ if (current->buffered_chars > 0 && current->buffer[current->buffered_chars - 1] != '\n') { current->missing_newline = 1; current->buffer[current->buffered_chars++] = '\n'; } else current->missing_newline = 0; /* Don't use uninitialized storage. */ if (current->buffer != 0) current->buffer[current->buffered_chars] = '\0'; return 0; } /* Split the file into lines, simultaneously computing the hash codes for each line. */ void find_and_hash_each_line () { unsigned h; int i; unsigned char *p = (unsigned char *) current->prefix_end, *ip, c; /* Attempt to get a good initial guess as to the number of lines. */ current->linbufsize = current->buffered_chars / 50 + 5; current->linbuf = (struct line_def *) xmalloc (current->linbufsize * sizeof (struct line_def)); if (function_regexp || output_style == OUTPUT_IFDEF) { /* If the -C, -D or -F option is used, we need to find the lines of the matching prefix. At least we will need to find the last few, but since we don't know how many, it's easiest to find them all. If -D is specified, we need all the lines of the first file. */ current->buffered_lines = 0; p = (unsigned char *) current->buffer; } else { /* Skip the identical prefixes, except be prepared to handle context. In fact, handle 1 more preceding line than the context says, in case shift_boundaries moves things backwards in this file. */ current->buffered_lines = current->prefix_lines - context - 1; if (current->buffered_lines < 0) current->buffered_lines = 0; for (i = 0; i < context + 1; ++i) /* Unless we are at the beginning, */ if ((char *) p != current->buffer) /* Back up at least 1 char until at the start of a line. */ while ((char *) --p != current->buffer && p[-1] != '\n') ; } while ((char *) p < current->suffix_begin) { h = 0; ip = p; if (current->prefix_end <= (char *) p) { /* Hash this line until we find a newline. */ if (ignore_case_flag) { if (ignore_all_space_flag) while ((c = *p) != '\n') { if (! isspace (c)) if (isupper (c)) h = HASH (h, tolower (c)); else h = HASH (h, c); ++p; } else if (ignore_space_change_flag) while ((c = *p) != '\n') { if (c == ' ' || c == '\t') { while ((c = *p) == ' ' || c == '\t') ++p; if (c == '\n') break; h = HASH (h, ' '); } /* C is now the first non-space. */ if (isupper (c)) h = HASH (h, tolower (c)); else h = HASH (h, c); ++p; } else while ((c = *p) != '\n') { if (isupper (c)) h = HASH (h, tolower (c)); else h = HASH (h, c); ++p; } } else { if (ignore_all_space_flag) while ((c = *p) != '\n') { if (! isspace (c)) h = HASH (h, c); ++p; } else if (ignore_space_change_flag) while ((c = *p) != '\n') { if (c == ' ' || c == '\t') { while ((c = *p) == ' ' || c == '\t') ++p; if (c == '\n') break; h = HASH (h, ' '); } /* C is not the first non-space. */ h = HASH (h, c); ++p; } else while ((c = *p) != '\n') { h = HASH (h, c); ++p; } } } else /* This line is part of the matching prefix, so we don't need to hash it. */ while (*p != '\n') ++p; /* Maybe increase the size of the line table. */ if (current->buffered_lines >= current->linbufsize) { while (current->buffered_lines >= current->linbufsize) current->linbufsize *= 2; current->linbuf = (struct line_def *) xrealloc (current->linbuf, current->linbufsize * sizeof (struct line_def)); } current->linbuf[current->buffered_lines].text = (char *) ip; current->linbuf[current->buffered_lines].length = p - ip + 1; current->linbuf[current->buffered_lines].hash = h; ++current->buffered_lines; ++p; } if (output_style == OUTPUT_RCS && current->missing_newline) --current->linbuf[current->buffered_lines - 1].length; i = 0; while ((i < context || output_style == OUTPUT_IFDEF) && (char *) p < current->buffer + current->buffered_chars) { ip = p; while (*p++ != '\n') ; /* Maybe increase the size of the line table. */ if (current->buffered_lines >= current->linbufsize) { while (current->buffered_lines >= current->linbufsize) current->linbufsize *= 2; current->linbuf = (struct line_def *) xrealloc (current->linbuf, current->linbufsize * sizeof (struct line_def)); } current->linbuf[current->buffered_lines].text = (char *) ip; current->linbuf[current->buffered_lines].length = p - ip; current->linbuf[current->buffered_lines].hash = 0; ++current->buffered_lines; ++i; } } /* Given a vector of two file_data objects, find the identical prefixes and suffixes of each object. */ static void find_identical_ends (filevec) struct file_data filevec[]; { char *p0, *p1, *end0, *beg0; int lines; if (filevec[0].buffered_chars == 0 || filevec[1].buffered_chars == 0) { filevec[0].prefix_end = filevec[0].buffer; filevec[1].prefix_end = filevec[1].buffer; filevec[0].prefix_lines = filevec[1].prefix_lines = 0; filevec[0].suffix_begin = filevec[0].buffer + filevec[0].buffered_chars; filevec[1].suffix_begin = filevec[1].buffer + filevec[1].buffered_chars; filevec[0].suffix_lines = filevec[1].suffix_lines = 0; return; } /* Find identical prefix. */ p0 = filevec[0].buffer; p1 = filevec[1].buffer; lines = 0; /* Insert end "sentinels", in this case characters that are guarranteed to make the equality test false, and thus terminate the loop. */ if (filevec[0].buffered_chars < filevec[1].buffered_chars) p0[filevec[0].buffered_chars] = ~p1[filevec[0].buffered_chars]; else p1[filevec[1].buffered_chars] = ~p0[filevec[1].buffered_chars]; /* Loop until first mismatch, or to the sentinel characters. */ while (1) { char c = *p0++; if (c != *p1++) break; if (c == '\n') ++lines; } /* Don't count missing newline as part of prefix in RCS mode. */ if (output_style == OUTPUT_RCS && ((filevec[0].missing_newline && p0 - filevec[0].buffer > filevec[0].buffered_chars) || (filevec[1].missing_newline && p1 - filevec[1].buffer > filevec[1].buffered_chars))) --p0, --p1, --lines; /* If the sentinel was passed, and lengths are equal, the files are identical. */ if (p0 - filevec[0].buffer > filevec[0].buffered_chars && filevec[0].buffered_chars == filevec[1].buffered_chars) { filevec[0].prefix_end = p0 - 1; filevec[1].prefix_end = p1 - 1; filevec[0].prefix_lines = filevec[1].prefix_lines = lines; filevec[0].suffix_begin = filevec[0].buffer; filevec[1].suffix_begin = filevec[1].buffer; filevec[0].suffix_lines = filevec[1].suffix_lines = lines; return; } /* Point at first nonmatching characters. */ --p0, --p1; /* Skip back to last line-beginning in the prefix. */ while (p0 != filevec[0].buffer && p0[-1] != '\n') --p0, --p1; /* Record the prefix. */ filevec[0].prefix_end = p0; filevec[1].prefix_end = p1; filevec[0].prefix_lines = filevec[1].prefix_lines = lines; /* Find identical suffix. */ /* P0 and P1 point beyond the last chars not yet compared. */ p0 = filevec[0].buffer + filevec[0].buffered_chars; p1 = filevec[1].buffer + filevec[1].buffered_chars; lines = 0; if (output_style != OUTPUT_RCS || filevec[0].missing_newline == filevec[1].missing_newline) { end0 = p0; /* Addr of last char in file 0. */ /* Get value of P0 at which we should stop scanning backward: this is when either P0 or P1 points at the last char of the identical prefix. */ if (filevec[0].buffered_chars < filevec[1].buffered_chars) beg0 = filevec[0].prefix_end - 1; else /* Figure out where P0 will be when P1 is at the end of the prefix. Thus we only need to test P0. */ beg0 = (filevec[0].prefix_end - 1 + filevec[0].buffered_chars - filevec[1].buffered_chars); /* Scan back until chars don't match or we reach that point. */ while (p0 != beg0) { char c = *--p0; if (c != *--p1) break; if (c == '\n') ++lines; } /* Make P0 and P1 point at the first char of the matching suffix. */ ++p0, ++p1; /* Advance to next place that is a line-beginning in both files. */ while (p0 != end0 && !((p0 == filevec[0].buffer || p0[-1] == '\n') && (p1 == filevec[1].buffer || p1[-1] == '\n'))) ++p0, ++p1; /* Subtract one, since the last newline isn't followed by a line. */ --lines; } /* Record the suffix. */ filevec[0].suffix_begin = p0; filevec[1].suffix_begin = p1; filevec[0].suffix_lines = filevec[1].suffix_lines = lines; } /* Lines are put into equivalence classes (of lines that match in line_cmp). Each equivalence class is represented by one of these structures, but only while the classes are being computed. Afterward, each class is represented by a number. */ struct equivclass { struct equivclass *next; /* Next item in this bucket. */ struct line_def line; /* A line that fits this class. */ }; /* Hash-table: array of buckets, each being a chain of equivalence classes. */ static struct equivclass **buckets; /* Size of the bucket array. */ static int nbuckets; /* Array in which the equivalence classes are allocated. The bucket-chains go through the elements in this array. The number of an equivalence class is its index in this array. */ static struct equivclass *equivs; /* Index of first free element in the array `equivs'. */ static int equivs_index; /* Size allocated to the array `equivs'. */ static int equivs_alloc; /* Largest primes less than some power of two, for nbuckets. Values range from useful to preposterous. If one of these numbers isn't prime after all, don't blame it on me, blame it on primes (6) . . . */ static int primes[] = { 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 16777213, 33554393, 67108859, /* Preposterously large . . . */ -1 }; /* Index of current nbuckets in primes. */ static int primes_index; /* Find the equiv class associated with line N of the current file. */ static int find_equiv_class (n) int n; { int bucket; struct equivclass *b, *p = NULL; /* Equivalence class 0 is permanently allocated to lines that were not hashed because they were parts of identical prefixes or suffixes. */ if (n < current->prefix_lines || current->linbuf[n].text >= current->suffix_begin) return 0; /* Check through the appropriate bucket to see if there isn't already an equivalence class for this line. */ bucket = current->linbuf[n].hash % nbuckets; b = buckets[bucket]; while (b) { if (b->line.hash == current->linbuf[n].hash && (b->line.length == current->linbuf[n].length /* Lines of different lengths can match with certain options. */ || length_varies) && !line_cmp (&b->line, ¤t->linbuf[n])) return b - equivs; p = b, b = b->next; } /* Create a new equivalence class in this bucket. */ p = &equivs[equivs_index++]; p->next = buckets[bucket]; buckets[bucket] = p; p->line = current->linbuf[n]; return equivs_index - 1; } /* Given a vector of two file_data objects, read the file associated with each one, and build the table of equivalence classes. Return nonzero if either file appears to be a binary file. */ int read_files (filevec) struct file_data filevec[]; { int i, j; int binary = 0; int this_binary; current = &filevec[0]; binary = this_binary = slurp (); current = &filevec[1]; this_binary = slurp (); if (binary || this_binary) return 1; find_identical_ends (filevec); for (i = 0; i < 2; ++i) { current = &filevec[i]; find_and_hash_each_line (); } /* This is guaranteed to be enough space. */ equivs_alloc = filevec[0].buffered_lines + filevec[1].buffered_lines + 1; equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass)); /* Equivalence class 0 is permanently safe for lines that were not hashed. Real equivalence classes start at 1. */ equivs_index = 1; primes_index = 0; while (primes[primes_index] < equivs_alloc / 3) primes_index++; buckets = (struct equivclass **) xmalloc (primes[primes_index] * sizeof (struct equivclass *)); bzero (buckets, primes[primes_index] * sizeof (struct equivclass *)); nbuckets = primes[primes_index]; for (i = 0; i < 2; ++i) { current = &filevec[i]; current->equivs = (int *) xmalloc (current->buffered_lines * sizeof (int)); for (j = 0; j < current->buffered_lines; ++j) current->equivs[j] = find_equiv_class (j); } filevec[0].equiv_max = filevec[1].equiv_max = equivs_index; free (equivs); free (buckets); return 0; } mes_inddiff/context.c 644 473 0 20553 4704135043 6650 /* Context-format output routines for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" #include "regex.h" static void pr_context_hunk (); static struct change *find_hunk (); static void mark_ignorable (); static int find_function (); /* Last place find_function started searching from. */ static int find_function_last_search; /* The value find_function returned when it started searching there. */ static int find_function_last_match; /* Print a header for a context diff, with the file names and dates. */ void print_context_header (inf) struct file_data *inf; { fprintf (outfile, "*** %s\t%s", inf[0].name, ctime (&inf[0].stat.st_mtime)); fprintf (outfile, "--- %s\t%s", inf[1].name, ctime (&inf[1].stat.st_mtime)); } /* Print an edit script in context format. */ void print_context_script (script) struct change *script; { if (ignore_blank_lines_flag || ignore_regexp) mark_ignorable (script); else { struct change *e; for (e = script; e; e = e->link) e->ignore = 0; } find_function_last_search = 0; find_function_last_match = -1; print_script (script, find_hunk, pr_context_hunk); } /* Print a pair of line numbers with a comma, translated for file FILE. If the second number is smaller, use the first in place of it. Args A and B are internal line numbers. We print the translated (real) line numbers. */ static void print_context_number_range (file, a, b) struct file_data *file; int a, b; { int trans_a, trans_b; translate_range (file, a, b, &trans_a, &trans_b); /* Note: we can have B < A in the case of a range of no lines. In this case, we should print the line number before the range, which is B. */ if (trans_b >= trans_a) fprintf (outfile, "%d,%d", trans_a, trans_b); else fprintf (outfile, "%d", trans_b); } /* Print a portion of an edit script in context format. HUNK is the beginning of the portion to be printed. The end is marked by a `link' that has been nulled out. Prints out lines from both files, and precedes each line with the appropriate flag-character. */ static void pr_context_hunk (hunk) struct change *hunk; { int first0, last0, first1, last1, show_from, show_to, i; struct change *next; char *prefix; char *function; int function_length; /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &first0, &last0, &first1, &last1, &show_from, &show_to); if (!show_from && !show_to) return; /* Include a context's width before and after. */ first0 = max (first0 - context, 0); first1 = max (first1 - context, 0); last0 = min (last0 + context, files[0].buffered_lines - 1); last1 = min (last1 + context, files[1].buffered_lines - 1); /* If desired, find the preceding function definition line in file 0. */ function = 0; if (function_regexp) find_function (&files[0], first0, &function, &function_length); /* If we looked for and found a function this is part of, include its name in the header of the diff section. */ fprintf (outfile, "***************"); if (function) { fprintf (outfile, " "); fwrite (function, 1, min (function_length - 1, 40), outfile); } fprintf (outfile, "\n*** "); print_context_number_range (&files[0], first0, last0); fprintf (outfile, " ****\n"); if (show_from) { next = hunk; for (i = first0; i <= last0; i++) { /* Skip past changes that apply (in file 0) only to lines before line I. */ while (next && next->line0 + next->deleted <= i) next = next->link; /* Compute the marking for line I. */ prefix = " "; if (next && next->line0 <= i) /* The change NEXT covers this line. If lines were inserted here in file 1, this is "changed". Otherwise it is "deleted". */ prefix = (next->inserted > 0 ? "!" : "-"); print_1_line (prefix, &files[0].linbuf[i]); } } fprintf (outfile, "--- "); print_context_number_range (&files[1], first1, last1); fprintf (outfile, " ----\n"); if (show_to) { next = hunk; for (i = first1; i <= last1; i++) { /* Skip past changes that apply (in file 1) only to lines before line I. */ while (next && next->line1 + next->inserted <= i) next = next->link; /* Compute the marking for line I. */ prefix = " "; if (next && next->line1 <= i) /* The change NEXT covers this line. If lines were deleted here in file 0, this is "changed". Otherwise it is "inserted". */ prefix = (next->deleted > 0 ? "!" : "+"); print_1_line (prefix, &files[1].linbuf[i]); } } } /* Scan a (forward-ordered) edit script for the first place that at least 2*CONTEXT unchanged lines appear, and return a pointer to the `struct change' for the last change before those lines. */ static struct change * find_hunk (start) struct change *start; { struct change *prev; int top0, top1; int thresh; do { /* Computer number of first line in each file beyond this changed. */ top0 = start->line0 + start->deleted; top1 = start->line1 + start->inserted; prev = start; start = start->link; /* Threshold distance is 2*CONTEXT between two non-ignorable changes, but only CONTEXT if one is ignorable. */ thresh = ((prev->ignore || (start && start->ignore)) ? context : 2 * context); /* It is not supposed to matter which file we check in the end-test. If it would matter, crash. */ if (start && start->line0 - top0 != start->line1 - top1) abort (); } while (start /* Keep going if less than THRESH lines elapse before the affected line. */ && start->line0 < top0 + thresh); return prev; } /* Set the `ignore' flag properly in each change in SCRIPT. It should be 1 if all the lines inserted or deleted in that change are ignorable lines. */ static void mark_ignorable (script) struct change *script; { while (script) { struct change *next = script->link; int first0, last0, first1, last1, deletes, inserts; /* Turn this change into a hunk: detach it from the others. */ script->link = 0; /* Determine whether this change is ignorable. */ analyze_hunk (script, &first0, &last0, &first1, &last1, &deletes, &inserts); /* Reconnect the chain as before. */ script->link = next; /* If the change is ignorable, mark it. */ script->ignore = (!deletes && !inserts); /* Advance to the following change. */ script = next; } } /* Find the last function-header line in FILE prior to line number LINENUM. This is a line containing a match for the regexp in `function_regexp'. Store the address of the line text into LINEP and the length of the line into LENP. Do not store anything if no function-header is found. */ static int find_function (file, linenum, linep, lenp) struct file_data *file; int linenum; char **linep; int *lenp; { int i = linenum; int last = find_function_last_search; find_function_last_search = i; while (--i >= last) { /* See if this line is what we want. */ if (0 <= re_search (&function_regexp_compiled, files[0].linbuf[i].text, files[0].linbuf[i].length, 0, files[0].linbuf[i].length, 0)) { *linep = files[0].linbuf[i].text; *lenp = files[0].linbuf[i].length; find_function_last_match = i; return 1; } } /* If we search back to where we started searching the previous time, find the line we found last time. */ if (find_function_last_match >= 0) { i = find_function_last_match; *linep = files[0].linbuf[i].text; *lenp = files[0].linbuf[i].length; return 1; } return 0; } es[0].linbuf[i].length, 0, files[0].linbuf[i].length, 0)) { *linep = files[0].linbuf[i].text; *lenp = files[0].linbuf[i].length; diff/ed.c 644 473 0 12206 4704135043 5550 /* Output routines for ed-script format. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" static void print_rcs_hunk (); static void print_ed_hunk (); static void pr_forward_ed_hunk (); void translate_range (); struct change *find_change (); struct change *find_reverse_change (); /* Print our script as ed commands. */ void print_ed_script (script) struct change *script; { print_script (script, find_reverse_change, print_ed_hunk); } /* Print a hunk of an ed diff */ static void print_ed_hunk (hunk) struct change *hunk; { int f0, l0, f1, l1; int deletes, inserts; #if 0 hunk = flip_script (hunk); #endif #ifdef DEBUG debug_script (hunk); #endif /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts); if (!deletes && !inserts) return; /* Print out the line number header for this hunk */ print_number_range (',', &files[0], f0, l0); fprintf (outfile, "%c\n", change_letter (inserts, deletes)); /* Print new/changed lines from second file, if needed */ if (inserts) { int i; int inserting = 1; for (i = f1; i <= l1; i++) { /* Resume the insert, if we stopped. */ if (! inserting) fprintf (outfile, "%da\n", i - f1 + translate_line_number (&files[0], f0) - 1); inserting = 1; /* If the file's line is just a dot, it would confuse `ed'. So output it with a double dot, and set the flag LEADING_DOT so that we will output another ed-command later to change the double dot into a single dot. */ if (files[1].linbuf[i].text[0] == '.' && files[1].linbuf[i].text[1] == '\n') { fprintf (outfile, "..\n"); fprintf (outfile, ".\n"); /* Now change that double dot to the desired single dot. */ fprintf (outfile, "%ds/^\\.\\././\n", i - f1 + translate_line_number (&files[0], f0)); inserting = 0; } else /* Line is not `.', so output it unmodified. */ print_1_line ("", &files[1].linbuf[i]); } /* End insert mode, if we are still in it. */ if (inserting) fprintf (outfile, ".\n"); } } /* Print change script in the style of ed commands, but print the changes in the order they appear in the input files, which means that the commands are not truly useful with ed. */ void pr_forward_ed_script (script) struct change *script; { print_script (script, find_change, pr_forward_ed_hunk); } static void pr_forward_ed_hunk (hunk) struct change *hunk; { int i; int f0, l0, f1, l1; int deletes, inserts; /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts); if (!deletes && !inserts) return; fprintf (outfile, "%c", change_letter (inserts, deletes)); print_number_range (' ', files, f0, l0); fprintf (outfile, "\n"); /* If deletion only, print just the number range. */ if (!inserts) return; /* For insertion (with or without deletion), print the number range and the lines from file 2. */ for (i = f1; i <= l1; i++) print_1_line ("", &files[1].linbuf[i]); fprintf (outfile, ".\n"); } /* Print in a format somewhat like ed commands except that each insert command states the number of lines it inserts. This format is used for RCS. */ void print_rcs_script (script) struct change *script; { print_script (script, find_change, print_rcs_hunk); } /* Print a hunk of an RCS diff */ static void print_rcs_hunk (hunk) struct change *hunk; { int i; int f0, l0, f1, l1; int deletes, inserts; int tf0, tl0, tf1, tl1; /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts); if (!deletes && !inserts) return; translate_range (&files[0], f0, l0, &tf0, &tl0); if (deletes) { fprintf (outfile, "d"); /* For deletion, print just the starting line number from file 0 and the number of lines deleted. */ fprintf (outfile, "%d %d\n", tf0, (tl0 >= tf0 ? tl0 - tf0 + 1 : 1)); } if (inserts) { fprintf (outfile, "a"); /* Take last-line-number from file 0 and # lines from file 1. */ translate_range (&files[1], f1, l1, &tf1, &tl1); fprintf (outfile, "%d %d\n", tl0, (tl1 >= tf1 ? tl1 - tf1 + 1 : 1)); /* Print the inserted lines. */ for (i = f1; i <= l1; i++) print_1_line ("", &files[1].linbuf[i]); } } eted. */ fprintf (outfile, "%d %d\n", tf0, (tl0 >= tf0 ? tl0 - tf0 + 1 : 1)); } if (inserts) { fprintf (outfile, "a"); /* Take last-line-number from file 0 and # lines from file 1. */ translate_range (&files[1], f1, l1, &tf1, &tl1); fprintf (outfile, "%d %d\n", tl0, (tl1 >= tf1 ? tl1 - tf1 + 1 :diff/normal.c 644 473 0 4242 4704135043 6431 /* Normal-format output routines for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" void print_normal_hunk (); void print_number_range (); struct change *find_change (); /* Print the edit-script SCRIPT as a normal diff. INF points to an array of descriptions of the two files. */ void print_normal_script (script) struct change *script; { print_script (script, find_change, print_normal_hunk); } /* Print a hunk of a normal diff. This is a contiguous portion of a complete edit script, describing changes in consecutive lines. */ void print_normal_hunk (hunk) struct change *hunk; { int first0, last0, first1, last1, deletes, inserts; register int i; /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts); if (!deletes && !inserts) return; /* Print out the line number header for this hunk */ print_number_range (',', &files[0], first0, last0); fprintf (outfile, "%c", change_letter (inserts, deletes)); print_number_range (',', &files[1], first1, last1); fprintf (outfile, "\n"); /* Print the lines that the first file has. */ if (deletes) for (i = first0; i <= last0; i++) print_1_line ("<", &files[0].linbuf[i]); if (inserts && deletes) fprintf (outfile, "---\n"); /* Print the lines that the second file has. */ if (inserts) for (i = first1; i <= last1; i++) print_1_line (">", &files[1].linbuf[i]); } st0); fprintf (outfile, "%c", change_letter (inserts, deletes)); print_number_range (',', &files[1], first1, last1); fprintf (outfile, "\n"); /* Print the lines that the first file has. */ if (deletes) for (i = first0; i <= last0; i++) print_1_line ("<", &files[0].linbuf[i]); if (inserts && deletes) fprintf (outfile, "-diff/ifdef.c 644 473 0 5004 4704135044 6214 /* #ifdef-format output routines for GNU DIFF. Copyright (C) 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY. No author or distributor accepts responsibility to anyone for the consequences of using it or for whether it serves any particular purpose or works at all, unless he says so in writing. Refer to the GNU DIFF General Public License for full details. Everyone is granted permission to copy, modify and redistribute GNU DIFF, but only under the conditions described in the GNU DIFF General Public License. A copy of this license is supposed to have been given to you along with GNU DIFF so you can know your rights and responsibilities. It should be in a file named COPYING. Among other things, the copyright notice and this notice must be preserved on all copies. */ #include "diff.h" static void print_ifdef_hunk (); struct change *find_change (); static int next_line; /* Print the edit-script SCRIPT as a merged #ifdef file. */ void print_ifdef_script (script) struct change *script; { next_line = 0; print_script (script, find_change, print_ifdef_hunk); while (next_line < files[0].buffered_lines) print_1_line ("", &files[0].linbuf[next_line++]); } /* Print a hunk of an ifdef diff. This is a contiguous portion of a complete edit script, describing changes in consecutive lines. */ static void print_ifdef_hunk (hunk) struct change *hunk; { int first0, last0, first1, last1, deletes, inserts; register int i; /* Determine range of line numbers involved in each file. */ analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts); if (!deletes && !inserts) return; /* Print out lines up to this change. */ while (next_line < first0) print_1_line ("", &files[0].linbuf[next_line++]); /* Print out stuff deleted from first file. */ if (deletes) { fprintf (outfile, "#ifndef %s\n", ifdef_string); for (i = first0; i <= last0; i++) print_1_line ("", &files[0].linbuf[i]); next_line = i; } /* Print out stuff inserted from second file. */ if (inserts) { if (deletes) fprintf (outfile, "#else /* %s */\n", ifdef_string); else fprintf (outfile, "#ifdef %s\n", ifdef_string); for (i = first1; i <= last1; i++) print_1_line ("", &files[1].linbuf[i]); } if (inserts) fprintf (outfile, "#endif /* %s */\n", ifdef_string); else fprintf (outfile, "#endif /* not %s */\n", ifdef_string); } i++) print_1_line ("", &files[0].linbuf[i]); next_line = i; } /* Print out stuff inserted from second file. */ if (inserts) { if (deletes) fprintf (outfile, "#else /* %s */\n", ifdef_string); else fprintf (outfile, "#ifdef %s\n", ifdef_string); for (i = first1; i <= last1; i++) print_1_line ("", &files[1].linbuf[i]); } if (inserts) fprintf (outfile, "#endif /* %s */\n", ifdef_string); else fprintf (outfile, "#endif /* not %s */\n", ifdef_string)diff/util.c 644 473 0 34465 4704135044 6151 /* Support routines for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" /* Use when a system call returns non-zero status. TEXT should normally be the file name. */ void perror_with_name (text) char *text; { fprintf (stderr, "%s: ", program); perror (text); } /* Use when a system call returns non-zero status and that is fatal. */ void pfatal_with_name (text) char *text; { print_message_queue (); fprintf (stderr, "%s: ", program); perror (text); exit (2); } /* Print an error message from the format-string FORMAT with args ARG1 and ARG2. */ void error (format, arg, arg1) char *format; char *arg; char *arg1; { fprintf (stderr, "%s: ", program); fprintf (stderr, format, arg, arg1); fprintf (stderr, "\n"); } /* Print an error message containing the string TEXT, then exit. */ void fatal (message) char *message; { print_message_queue (); error (message, ""); exit (2); } /* Like printf, except if -l in effect then save the message and print later. This is used for things like "binary files differ" and "Only in ...". */ void message (format, arg1, arg2) char *format, *arg1, *arg2; { if (paginate_flag) { struct msg *new = (struct msg *) xmalloc (sizeof (struct msg)); if (msg_chain_end == 0) msg_chain = msg_chain_end = new; else { msg_chain_end->next = new; msg_chain_end = new; } new->format = format; new->arg1 = concat (arg1, "", ""); new->arg2 = concat (arg2, "", ""); new->next = 0; } else printf (format, arg1, arg2); } /* Output all the messages that were saved up by calls to `message'. */ void print_message_queue () { struct msg *m; for (m = msg_chain; m; m = m->next) printf (m->format, m->arg1, m->arg2); } /* Call before outputting the results of comparing files NAME0 and NAME1 to set up OUTFILE, the stdio stream for the output to go to. Usually, OUTFILE is just stdout. But when -l was specified we fork off a `pr' and make OUTFILE a pipe to it. `pr' then outputs to our stdout. */ void setup_output (name0, name1, depth) char *name0, *name1; int depth; { char *name; /* Construct the header of this piece of diff. */ name = (char *) xmalloc (strlen (name0) + strlen (name1) + strlen (switch_string) + 15); strcpy (name, "diff"); strcat (name, switch_string); strcat (name, " "); strcat (name, name0); strcat (name, " "); strcat (name, name1); if (paginate_flag) { int pipes[2]; int desc; /* For a `pr' and make OUTFILE a pipe to it. */ pipe (pipes); fflush (stdout); desc = vfork (); if (desc < 0) pfatal_with_name ("vfork"); if (desc == 0) { close (pipes[1]); close (fileno (stdin)); if (dup2 (pipes[0], fileno (stdin)) < 0) pfatal_with_name ("dup2"); close (pipes[0]); if (execl (PR_FILE_NAME, PR_FILE_NAME, "-f", "-h", name, 0) < 0) pfatal_with_name (PR_FILE_NAME); } else { close (pipes[0]); outfile = fdopen (pipes[1], "w"); } } else { /* If -l was not specified, output the diff straight to `stdout'. */ outfile = stdout; /* If handling multiple files (because scanning a directory), print which files the following output is about. */ if (depth > 0) printf ("%s\n", name); } free (name); } /* Call after the end of output of diffs for one file. Close OUTFILE and get rid of the `pr' subfork. */ void finish_output () { if (outfile != stdout) { fclose (outfile); wait (0); } } /* Compare two lines (typically one from each input file) according to the command line options. Each line is described by a `struct line_def'. Return 1 if the lines differ, like `bcmp'. */ int line_cmp (s1, s2) struct line_def *s1, *s2; { register char *t1, *t2; register char end_char = line_end_char; int savechar; /* Check first for exact identity. If that is true, return 0 immediately. This detects the common case of exact identity faster than complete comparison would. */ t1 = s1->text; t2 = s2->text; /* Alter the character following line 2 so it doesn't match that following line 1. (We used to alter the character after line 1, but that caused trouble if line 2 directly follows line 1.) */ savechar = s2->text[s2->length]; s2->text[s2->length] = s1->text[s1->length] + 1; /* Now find the first mismatch; this won't go past the character we just changed. */ while (*t1++ == *t2++); /* Undo the alteration. */ s2->text[s2->length] = savechar; /* If the comparison stopped at the alteration, the two lines are identical. */ if (t2 == s2->text + s2->length + 1) return 0; /* Not exactly identical, but perhaps they match anyway when case or whitespace is ignored. */ if (ignore_case_flag || ignore_space_change_flag || ignore_all_space_flag) { t1 = s1->text; t2 = s2->text; while (1) { register char c1 = *t1++; register char c2 = *t2++; /* Ignore horizontal whitespace if -b or -w is specified. */ if (ignore_all_space_flag) { /* For -w, just skip past any spaces or tabs. */ while (c1 == ' ' || c1 == '\t') c1 = *t1++; while (c2 == ' ' || c2 == '\t') c2 = *t2++; } else if (ignore_space_change_flag) { /* For -b, advance past any sequence of whitespace in line 1 and consider it just one Space, or nothing at all if it is at the end of the line. */ if (c1 == ' ' || c1 == '\t') { while (1) { c1 = *t1++; if (c1 == end_char) break; if (c1 != ' ' && c1 != '\t') { --t1; c1 = ' '; break; } } } /* Likewise for line 2. */ if (c2 == ' ' || c2 == '\t') { while (1) { c2 = *t2++; if (c2 == end_char) break; if (c2 != ' ' && c2 != '\t') { --t2; c2 = ' '; break; } } } } /* Upcase all letters if -i is specified. */ if (ignore_case_flag) { if (islower (c1)) c1 = toupper (c1); if (islower (c2)) c2 = toupper (c2); } if (c1 != c2) break; if (c1 == end_char) return 0; } } return (1); } /* Find the consecutive changes at the start of the script START. Return the last link before the first gap. */ struct change * find_change (start) struct change *start; { return start; } struct change * find_reverse_change (start) struct change *start; { return start; } /* Divide SCRIPT into pieces by calling HUNKFUN and print each piece with PRINTFUN. Both functions take one arg, an edit script. HUNKFUN is called with the tail of the script and returns the last link that belongs together with the start of the tail. PRINTFUN takes a subscript which belongs together (with a null link at the end) and prints it. */ void print_script (script, hunkfun, printfun) struct change *script; struct change * (*hunkfun) (); void (*printfun) (); { struct change *next = script; while (next) { struct change *this, *end; /* Find a set of changes that belong together. */ this = next; end = (*hunkfun) (next); /* Disconnect them from the rest of the changes, making them a hunk, and remember the rest for next iteration. */ next = end->link; end->link = NULL; #ifdef DEBUG debug_script (this); #endif /* Print this hunk. */ (*printfun) (this); /* Reconnect the script so it will all be freed properly. */ end->link = next; } } /* Print the text of a single line LINE, flagging it with the characters in LINE_FLAG (which say whether the line is inserted, deleted, changed, etc.). */ void print_1_line (line_flag, line) char *line_flag; struct line_def *line; { fprintf (outfile, "%s", line_flag); /* If -T was specified, use a Tab between the line-flag and the text. Otherwise use a Space (as Unix diff does). Print neither space nor tab if line-flags are empty. */ if (line_flag[0] != 0) { if (tab_align_flag) fprintf (outfile, "\t"); else fprintf (outfile, " "); } /* Now output the contents of the line. If -t was specified, expand tabs to spaces. Otherwise output verbatim. */ if (tab_expand_flag) { register int column = 0; register int i; for (i = 0; i < line->length; i++) { register char c = line->text[i]; if (c == '\t') { putc (' ', outfile); column++; while (column & 7) { putc (' ', outfile); column++; } } else if (c == '\b') { column--; putc (c, outfile); } else { column++; putc (c, outfile); } } } else fwrite (line->text, sizeof (char), line->length, outfile); if (line_end_char != '\n') putc ('\n', outfile); } change_letter (inserts, deletes) int inserts, deletes; { if (!inserts) return 'd'; else if (!deletes) return 'a'; else return 'c'; } /* Translate an internal line number (an index into diff's table of lines) into an actual line number in the input file. The internal line number is LNUM. FILE points to the data on the file. Internal line numbers count from 0 within the current chunk. Actual line numbers count from 1 within the entire file; in addition, they include lines ignored for comparison purposes. The `ltran' feature is no longer in use. */ int translate_line_number (file, lnum) struct file_data *file; int lnum; { return lnum + 1; } void translate_range (file, a, b, aptr, bptr) struct file_data *file; int a, b; int *aptr, *bptr; { *aptr = translate_line_number (file, a - 1) + 1; *bptr = translate_line_number (file, b + 1) - 1; } /* Print a pair of line numbers with SEPCHAR, translated for file FILE. If the two numbers are identical, print just one number. Args A and B are internal line numbers. We print the translated (real) line numbers. */ void print_number_range (sepchar, file, a, b) char sepchar; struct file_data *file; int a, b; { int trans_a, trans_b; translate_range (file, a, b, &trans_a, &trans_b); /* Note: we can have B < A in the case of a range of no lines. In this case, we should print the line number before the range, which is B. */ if (trans_b > trans_a) fprintf (outfile, "%d%c%d", trans_a, sepchar, trans_b); else fprintf (outfile, "%d", trans_b); } /* Look at a hunk of edit script and report the range of lines in each file that it applies to. HUNK is the start of the hunk, which is a chain of `struct change'. The first and last line numbers of file 0 are stored in *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. Note that these are internal line numbers that count from 0. If no lines from file 0 are deleted, then FIRST0 is LAST0+1. Also set *DELETES nonzero if any lines of file 0 are deleted and set *INSERTS nonzero if any lines of file 1 are inserted. If only ignorable lines are inserted or deleted, both are set to 0. */ void analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts) struct change *hunk; int *first0, *last0, *first1, *last1; int *deletes, *inserts; { int f0, l0, f1, l1, show_from, show_to; int i; int nontrivial = !(ignore_blank_lines_flag || ignore_regexp); struct change *next; show_from = show_to = 0; f0 = hunk->line0; f1 = hunk->line1; for (next = hunk; next; next = next->link) { l0 = next->line0 + next->deleted - 1; l1 = next->line1 + next->inserted - 1; show_from += next->deleted; show_to += next->inserted; for (i = next->line0; i <= l0 && ! nontrivial; i++) if ((!ignore_blank_lines_flag || files[0].linbuf[i].length > 1) && (!ignore_regexp || 0 > re_search (&ignore_regexp_compiled, files[0].linbuf[i].text, files[0].linbuf[i].length, 0, files[0].linbuf[i].length, 0))) nontrivial = 1; for (i = next->line1; i <= l1 && ! nontrivial; i++) if ((!ignore_blank_lines_flag || files[1].linbuf[i].length > 1) && (!ignore_regexp || 0 > re_search (&ignore_regexp_compiled, files[1].linbuf[i].text, files[1].linbuf[i].length, 0, files[1].linbuf[i].length, 0))) nontrivial = 1; } *first0 = f0; *last0 = l0; *first1 = f1; *last1 = l1; /* If all inserted or deleted lines are ignorable, tell the caller to ignore this hunk. */ if (!nontrivial) show_from = show_to = 0; *deletes = show_from; *inserts = show_to; } /* malloc a block of memory, with fatal error message if we can't do it. */ VOID * xmalloc (size) unsigned size; { register VOID *value; if (size == 0) size = 1; value = (VOID *) malloc (size); if (!value) fatal ("virtual memory exhausted"); return value; } /* realloc a block of memory, with fatal error message if we can't do it. */ VOID * xrealloc (old, size) VOID *old; unsigned int size; { register VOID *value; if (size == 0) size = 1; value = (VOID *) realloc (old, size); if (!value) fatal ("virtual memory exhausted"); return value; } /* Concatenate three strings, returning a newly malloc'd string. */ char * concat (s1, s2, s3) char *s1, *s2, *s3; { int len = strlen (s1) + strlen (s2) + strlen (s3); char *new = (char *) xmalloc (len + 1); strcpy (new, s1); strcat (new, s2); strcat (new, s3); return new; } debug_script (sp) struct change *sp; { fflush (stdout); for (; sp; sp = sp->link) fprintf (stderr, "%3d %3d delete %d insert %d\n", sp->line0, sp->line1, sp->deleted, sp->inserted); fflush (stderr); } atenate three strings, returning a newly malloc'd string. */ char * concat (s1, s2, s3) char *s1, *s2, *s3; { int len = strlen (s1) + strlen (s2) + strlen (s3); char *new = (char *) xmalloc (ldiff/dir.c 644 473 0 13133 4704135044 5737 /* Read, sort and compare two directories. Used for GNU DIFF. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" static int compare_names (); /* Read the directory named DIRNAME and return a sorted vector of filenames for its contents. NONEX nonzero means this directory is known to be nonexistent, so return zero files. */ struct dirdata { int length; /* # elements in `files' */ char **files; /* Sorted names of files in the dir */ }; static struct dirdata dir_sort (dirname, nonex) char *dirname; int nonex; { register DIR *reading; register struct direct *next; struct dirdata dirdata; /* Address of block containing the files that are described. */ char **files; /* Length of block that `files' points to, measured in files. */ int nfiles; /* Index of first unused in `files'. */ int files_index; if (nonex) { dirdata.length = 0; dirdata.files = 0; return dirdata; } /* Open the directory and check for errors. */ reading = opendir (dirname); if (!reading) { perror_with_name (dirname); dirdata.length = -1; return dirdata; } /* Initialize the table of filenames. */ nfiles = 100; files = (char **) xmalloc (nfiles * sizeof (char *)); files_index = 0; /* Read the directory entries, and insert the subfiles into the `files' table. */ while (next = readdir (reading)) { /* Ignore the files `.' and `..' */ if (next->d_name[0] == '.' && (next->d_name[1] == 0 || (next->d_name[1] == '.' && next->d_name[2] == 0))) continue; if (files_index == nfiles) { nfiles *= 2; files = (char **) xrealloc (files, sizeof (char *) * nfiles); } files[files_index++] = concat (next->d_name, "", ""); } closedir (reading); /* Sort the table. */ qsort (files, files_index, sizeof (char *), compare_names); /* Return a description of location and length of the table. */ dirdata.files = files; dirdata.length = files_index; return dirdata; } /* Sort the files now in the table. */ static int compare_names (file1, file2) char **file1, **file2; { return strcmp (*file1, *file2); } /* Compare the contents of two directories named NAME1 and NAME2. This is a top-level routine; it does everything necessary for diff on two directories. NONEX1 nonzero says directory NAME1 doesn't exist, but pretend it is empty. Likewise NONEX2. HANDLE_FILE is a caller-provided subroutine called to handle each file. It gets five operands: dir and name (rel to original working dir) of file in dir 1, dir and name pathname of file in dir 2, and the recursion depth. For a file that appears in only one of the dirs, one of the name-args to HANDLE_FILE is zero. DEPTH is the current depth in recursion. Returns the maximum of all the values returned by HANDLE_FILE, or 2 if trouble is encountered in opening files. */ int diff_dirs (name1, name2, handle_file, depth, nonex1, nonex2) char *name1, *name2; int (*handle_file) (); int nonex1, nonex2; { struct dirdata data1, data2; register int i1, i2; int val = 0; int v1; /* Get sorted contents of both dirs. */ data1 = dir_sort (name1, nonex1); data2 = dir_sort (name2, nonex2); if (data1.length == -1 || data2.length == -1) { if (data1.length >= 0) free (data1.files); if (data2.length >= 0) free (data2.files); return 2; } i1 = 0; i2 = 0; /* If -Sname was specified, and this is the topmost level of comparison, ignore all file names less than the specified starting name. */ if (dir_start_file && depth == 0) { while (i1 < data1.length && strcmp (data1.files[i1], dir_start_file) < 0) i1++; while (i2 < data2.length && strcmp (data2.files[i2], dir_start_file) < 0) i2++; } /* Loop while files remain in one or both dirs. */ while (i1 < data1.length || i2 < data2.length) { int nameorder; /* Compare next name in dir 1 with next name in dir 2. At the end of a dir, pretend the "next name" in that dir is very large. */ if (i1 == data1.length) nameorder = 1; else if (i2 == data2.length) nameorder = -1; else nameorder = strcmp (data1.files[i1], data2.files[i2]); if (nameorder == 0) { /* We have found a file (or subdir) in common between both dirs. Compare the two files. */ v1 = (*handle_file) (name1, data1.files[i1], name2, data2.files[i2], depth + 1); i1++, i2++; } if (nameorder < 0) { /* Next filename in dir 1 is less; that is a file in dir 1 only. */ v1 = (*handle_file) (name1, data1.files[i1], name2, 0, depth + 1); i1++; } if (nameorder > 0) { /* Next filename in dir 2 is less; that is a file in dir 2 only. */ v1 = (*handle_file) (name1, 0, name2, data2.files[i2], depth + 1); i2++; } if (v1 > val) val = v1; } free (data1.files); free (data2.files); return val; } iles[i2], depth + 1); i1++, i2++; } if (nameorder < 0) { /* Next filename in dir 1 is less; that is a file in dir 1 only. */ v1 = (*handle_file) (name1, data1.files[i1], name2, 0, depth + 1); i1++; } if (nameorder > 0) { /* Next filename in dir 2 is less; that is a file in dir 2 only. */ v1 = (*handle_file) (name1, 0, name2, data2.files[i2], depth + 1); i2++; } idiff/version.c 644 473 0 103 4704135044 6577 /* Version number of GNU diff. */ char *version_string = "1.14"; iles); return val; } iles[i2], depth + 1); i1++, i2++; } if (nameorder < 0) { /* Next filename in dir 1 is less; that is a file in dir 1 only. */ v1 = (*handle_file) (name1, data1.files[i1], name2, 0, depth + 1); i1++; } if (nameorder > 0) { /* Next filename in dir 2 is less; that is a file in dir 2 only. */ v1 = (*handle_file) (name1, 0, name2, data2.files[i2], depth + 1); i2++; } idiff/diff.h 644 473 0 22747 4704135044 6111 /* Shared definitions for GNU DIFF Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #ifdef USG #include #ifdef HAVE_NDIR #include #else #include #endif /* not HAVE_NDIR */ #include #ifndef HAVE_DIRECT #define direct dirent #endif #else /* not USG */ #include #include #include #endif #ifdef USG /* Define needed BSD functions in terms of sysV library. */ #define bcopy(s,d,n) memcpy((d),(s),(n)) #define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) #define bzero(s,n) memset((s),0,(n)) #define dup2(f,t) (close(t),fcntl((f),F_DUPFD,(t))) #define vfork fork #define index strchr #define rindex strrchr #endif #include extern int errno; extern int sys_nerr; extern char *sys_errlist[]; #define EOS (0) #define FALSE (0) #define TRUE 1 #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #ifndef PR_FILE_NAME #define PR_FILE_NAME "/bin/pr" #endif /* Support old-fashioned C compilers. */ #if defined (__STDC__) || defined (__GNUC__) #include "limits.h" #else #define INT_MAX 2147483647 #define CHAR_BIT 8 #endif /* Support old-fashioned C compilers. */ #if !defined (__STDC__) && !defined (__GNUC__) #define const #endif /* Variables for command line options */ #ifndef GDIFF_MAIN #define EXTERN extern #else #define EXTERN #endif enum output_style { /* Default output style. */ OUTPUT_NORMAL, /* Output the differences with lines of context before and after (-c). */ OUTPUT_CONTEXT, /* Output the differences as commands suitable for `ed' (-e). */ OUTPUT_ED, /* Output the diff as a forward ed script (-f). */ OUTPUT_FORWARD_ED, /* Like -f, but output a count of changed lines in each "command" (-n). */ OUTPUT_RCS, /* Output merged #ifdef'd file (-D). */ OUTPUT_IFDEF }; EXTERN enum output_style output_style; /* Number of lines of context to show in each set of diffs. This is zero when context is not to be shown. */ EXTERN int context; /* Consider all files as text files (-a). Don't interpret codes over 0177 as implying a "binary file". */ EXTERN int always_text_flag; /* Ignore changes in horizontal whitespace (-b). */ EXTERN int ignore_space_change_flag; /* Ignore all horizontal whitespace (-w). */ EXTERN int ignore_all_space_flag; /* Ignore changes that affect only blank lines (-B). */ EXTERN int ignore_blank_lines_flag; /* Ignore changes that affect only lines matching this regexp (-I). */ EXTERN char *ignore_regexp; /* Result of regex-compilation of `ignore_regexp'. */ EXTERN struct re_pattern_buffer ignore_regexp_compiled; /* 1 if lines may match even if their lengths are different. This depends on various options. */ EXTERN int length_varies; /* Ignore differences in case of letters (-i). */ EXTERN int ignore_case_flag; /* Regexp to identify function-header lines (-F). */ EXTERN char *function_regexp; /* Result of regex-compilation of `function_regexp'. */ EXTERN struct re_pattern_buffer function_regexp_compiled; /* Say only whether files differ, not how (-q). */ EXTERN int no_details_flag; /* Report files compared that match (-s). Normally nothing is output when that happens. */ EXTERN int print_file_same_flag; /* character that ends a line. Currently this is always `\n'. */ EXTERN char line_end_char; /* Output the differences with exactly 8 columns added to each line so that any tabs in the text line up properly (-T). */ EXTERN int tab_align_flag; /* Expand tabs in the output so the text lines up properly despite the characters added to the front of each line (-t). */ EXTERN int tab_expand_flag; /* In directory comparison, specify file to start with (-S). All file names less than this name are ignored. */ EXTERN char *dir_start_file; /* If a file is new (appears in only one dir) include its entire contents (-N). Then `patch' would create the file with appropriate contents. */ EXTERN int entire_new_file_flag; /* Pipe each file's output through pr (-l). */ EXTERN int paginate_flag; /* String to use for #ifdef (-D). */ EXTERN char * ifdef_string; /* String containing all the command options diff received, with spaces between and at the beginning but none at the end. If there were no options given, this string is empty. */ EXTERN char * switch_string; /* Nonzero means use heuristics for better speed. */ EXTERN int heuristic; /* Name of program the user invoked (for error messages). */ EXTERN char * program; /* The result of comparison is an "edit script": a chain of `struct change'. Each `struct change' represents one place where some lines are deleted and some are inserted. LINE0 and LINE1 are the first affected lines in the two files (origin 0). DELETED is the number of lines deleted here from file 0. INSERTED is the number of lines inserted here in file 1. If DELETED is 0 then LINE0 is the number of the line before which the insertion was done; vice versa for INSERTED and LINE1. */ struct change { struct change *link; /* Previous or next edit command */ int inserted; /* # lines of file 1 changed here. */ int deleted; /* # lines of file 0 changed here. */ int line0; /* Line number of 1st deleted line. */ int line1; /* Line number of 1st inserted line. */ char ignore; /* Flag used in context.c */ }; /* Structures that describe the input files. */ /* Data on one line of text. */ struct line_def { char *text; int length; unsigned hash; }; /* Data on one input file being compared. */ struct file_data { int desc; /* File descriptor */ char *name; /* File name */ struct stat stat; /* File status from fstat() */ int dir_p; /* 1 if file is a directory */ /* Buffer in which text of file is read. */ char * buffer; /* Allocated size of buffer. */ int bufsize; /* Number of valid characters now in the buffer. */ int buffered_chars; /* Array of data on analyzed lines of this chunk of this file. */ struct line_def *linbuf; /* Allocated size of linbuf array (# of elements). */ int linbufsize; /* Number of elements of linbuf containing valid data. */ int buffered_lines; /* Pointer to end of prefix of this file to ignore when hashing. */ char *prefix_end; /* Count of lines in the prefix. */ int prefix_lines; /* Pointer to start of suffix of this file to ignore when hashing. */ char *suffix_begin; /* Count of lines in the suffix. */ int suffix_lines; /* Vector, indexed by line number, containing an equivalence code for each line. It is this vector that is actually compared with that of another file to generate differences. */ int *equivs; /* Vector, like the previous one except that the elements for discarded lines have been squeezed out. */ int *undiscarded; /* Vector mapping virtual line numbers (not counting discarded lines) to real ones (counting those lines). Both are origin-0. */ int *realindexes; /* Total number of nondiscarded lines. */ int nondiscarded_lines; /* Vector, indexed by real origin-0 line number, containing 1 for a line that is an insertion or a deletion. The results of comparison are stored here. */ char *changed_flag; /* 1 if file ends in a line with no final newline. */ int missing_newline; /* 1 more than the maximum equivalence value used for this or its sibling file. */ int equiv_max; /* Table translating diff's internal line numbers to actual line numbers in the file. This is needed only when some lines have been discarded. The allocated size is always linbufsize and the number of valid elements is buffered_lines. */ int *ltran; }; /* Describe the two files currently being compared. */ struct file_data files[2]; /* Queue up one-line messages to be printed at the end, when -l is specified. Each message is recorded with a `struct msg'. */ struct msg { struct msg *next; char *format; char *arg1; char *arg2; }; /* Head of the chain of queues messages. */ EXTERN struct msg *msg_chain; /* Tail of the chain of queues messages. */ EXTERN struct msg *msg_chain_end; /* Stdio stream to output diffs to. */ EXTERN FILE *outfile; /* Declare various functions. */ #ifdef __STDC__ #define VOID void #else #define VOID char #endif VOID *xmalloc (); VOID *xrealloc (); char *concat (); void free (); char *rindex (); char *index (); void analyze_hunk (); void error (); void fatal (); void message (); void perror_with_name (); void pfatal_with_name (); void print_1_line (); void print_message_queue (); void print_script (); messages. */ EXTERN stdiff/regex.c 644 473 0 131243 4704135045 6317 /* Extended regular expression matching and search library. Copyright (C) 1985, 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* To test, compile with -Dtest. This Dtestable feature turns this into a self-contained program which reads a pattern, describes how it compiles, then reads a string and searches for it. */ #ifdef emacs /* The `emacs' switch turns on certain special matching commands that make sense only in emacs. */ #include "config.h" #include "lisp.h" #include "buffer.h" #include "syntax.h" #else /* not emacs */ #ifdef USG #ifndef BSTRING #define bcopy(s,d,n) memcpy((d),(s),(n)) #define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) #define bzero(s,n) memset((s),0,(n)) #endif #endif char *malloc (); char *realloc (); /* Make alloca work the best possible way. */ #ifdef __GNUC__ #define alloca __builtin_alloca #else #ifdef sparc #include #else char *alloca (); #endif #endif /* Define the syntax stuff, so we can do the \<...\> things. */ #ifndef Sword /* must be non-zero in some of the tests below... */ #define Sword 1 #endif #define SYNTAX(c) re_syntax_table[c] #ifdef SYNTAX_TABLE char *re_syntax_table; #else static char re_syntax_table[256]; static void init_syntax_once () { register int c; static int done = 0; if (done) return; bzero (re_syntax_table, sizeof re_syntax_table); for (c = 'a'; c <= 'z'; c++) re_syntax_table[c] = Sword; for (c = 'A'; c <= 'Z'; c++) re_syntax_table[c] = Sword; for (c = '0'; c <= '9'; c++) re_syntax_table[c] = Sword; done = 1; } #endif /* SYNTAX_TABLE */ #endif /* not emacs */ #include "regex.h" /* Number of failure points to allocate space for initially, when matching. If this number is exceeded, more space is allocated, so it is not a hard limit. */ #ifndef NFAILURES #define NFAILURES 80 #endif /* NFAILURES */ /* width of a byte in bits */ #define BYTEWIDTH 8 #ifndef SIGN_EXTEND_CHAR #define SIGN_EXTEND_CHAR(x) (x) #endif static int obscure_syntax = 0; /* Specify the precise syntax of regexp for compilation. This provides for compatibility for various utilities which historically have different, incompatible syntaxes. The argument SYNTAX is a bit-mask containing the two bits RE_NO_BK_PARENS and RE_NO_BK_VBAR. */ int re_set_syntax (syntax) { int ret; ret = obscure_syntax; obscure_syntax = syntax; return ret; } /* re_compile_pattern takes a regular-expression string and converts it into a buffer full of byte commands for matching. PATTERN is the address of the pattern string SIZE is the length of it. BUFP is a struct re_pattern_buffer * which points to the info on where to store the byte commands. This structure contains a char * which points to the actual space, which should have been obtained with malloc. re_compile_pattern may use realloc to grow the buffer space. The number of bytes of commands can be found out by looking in the struct re_pattern_buffer that bufp pointed to, after re_compile_pattern returns. */ #define PATPUSH(ch) (*b++ = (char) (ch)) #define PATFETCH(c) \ {if (p == pend) goto end_of_pattern; \ c = * (unsigned char *) p++; \ if (translate) c = translate[c]; } #define PATFETCH_RAW(c) \ {if (p == pend) goto end_of_pattern; \ c = * (unsigned char *) p++; } #define PATUNFETCH p-- #define EXTEND_BUFFER \ { char *old_buffer = bufp->buffer; \ if (bufp->allocated == (1L<<16)) goto too_big; \ bufp->allocated *= 2; \ if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \ bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \ if (bufp->buffer == 0) \ goto memory_exhausted; \ b = (b - old_buffer) + bufp->buffer; \ if (fixup_jump) \ fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \ if (laststart) \ laststart = (laststart - old_buffer) + bufp->buffer; \ begalt = (begalt - old_buffer) + bufp->buffer; \ if (pending_exact) \ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ } static int store_jump (), insert_jump (); char * re_compile_pattern (pattern, size, bufp) char *pattern; int size; struct re_pattern_buffer *bufp; { register char *b = bufp->buffer; register char *p = pattern; char *pend = pattern + size; register unsigned c, c1; char *p1; unsigned char *translate = (unsigned char *) bufp->translate; /* address of the count-byte of the most recently inserted "exactn" command. This makes it possible to tell whether a new exact-match character can be added to that command or requires a new "exactn" command. */ char *pending_exact = 0; /* address of the place where a forward-jump should go to the end of the containing expression. Each alternative of an "or", except the last, ends with a forward-jump of this sort. */ char *fixup_jump = 0; /* address of start of the most recently finished expression. This tells postfix * where to find the start of its operand. */ char *laststart = 0; /* In processing a repeat, 1 means zero matches is allowed */ char zero_times_ok; /* In processing a repeat, 1 means many matches is allowed */ char many_times_ok; /* address of beginning of regexp, or inside of last \( */ char *begalt = b; /* Stack of information saved by \( and restored by \). Four stack elements are pushed by each \(: First, the value of b. Second, the value of fixup_jump. Third, the value of regnum. Fourth, the value of begalt. */ int stackb[40]; int *stackp = stackb; int *stacke = stackb + 40; int *stackt; /* Counts \('s as they are encountered. Remembered for the matching \), where it becomes the "register number" to put in the stop_memory command */ int regnum = 1; bufp->fastmap_accurate = 0; #ifndef emacs #ifndef SYNTAX_TABLE /* * Initialize the syntax table. */ init_syntax_once(); #endif #endif if (bufp->allocated == 0) { bufp->allocated = 28; if (bufp->buffer) /* EXTEND_BUFFER loses when bufp->allocated is 0 */ bufp->buffer = (char *) realloc (bufp->buffer, 28); else /* Caller did not allocate a buffer. Do it for him */ bufp->buffer = (char *) malloc (28); if (!bufp->buffer) goto memory_exhausted; begalt = b = bufp->buffer; } while (p != pend) { if (b - bufp->buffer > bufp->allocated - 10) /* Note that EXTEND_BUFFER clobbers c */ EXTEND_BUFFER; PATFETCH (c); switch (c) { case '$': { char *p1 = p; /* When testing what follows the $, look past the \-constructs that don't consume anything. */ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) while (p1 != pend) { if (*p1 == '\\' && p1 + 1 != pend && (p1[1] == '<' || p1[1] == '>' || p1[1] == '`' || p1[1] == '\'' #ifdef emacs || p1[1] == '=' #endif || p1[1] == 'b' || p1[1] == 'B')) p1 += 2; else break; } if (obscure_syntax & RE_TIGHT_VBAR) { if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend) goto normal_char; /* Make operand of last vbar end before this `$'. */ if (fixup_jump) store_jump (fixup_jump, jump, b); fixup_jump = 0; PATPUSH (endline); break; } /* $ means succeed if at end of line, but only in special contexts. If randomly in middle of pattern, it is a normal character. */ if (p1 == pend || *p1 == '\n' || (obscure_syntax & RE_CONTEXT_INDEP_OPS) || (obscure_syntax & RE_NO_BK_PARENS ? *p1 == ')' : *p1 == '\\' && p1[1] == ')') || (obscure_syntax & RE_NO_BK_VBAR ? *p1 == '|' : *p1 == '\\' && p1[1] == '|')) { PATPUSH (endline); break; } goto normal_char; } case '^': /* ^ means succeed if at beg of line, but only if no preceding pattern. */ if (laststart && p[-2] != '\n' && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; if (obscure_syntax & RE_TIGHT_VBAR) { if (p != pattern + 1 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; PATPUSH (begline); begalt = b; } else PATPUSH (begline); break; case '+': case '?': if (obscure_syntax & RE_BK_PLUS_QM) goto normal_char; handle_plus: case '*': /* If there is no previous pattern, char not special. */ if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; /* If there is a sequence of repetition chars, collapse it down to equivalent to just one. */ zero_times_ok = 0; many_times_ok = 0; while (1) { zero_times_ok |= c != '+'; many_times_ok |= c != '?'; if (p == pend) break; PATFETCH (c); if (c == '*') ; else if (!(obscure_syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')) ; else if ((obscure_syntax & RE_BK_PLUS_QM) && c == '\\') { int c1; PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) { PATUNFETCH; PATUNFETCH; break; } c = c1; } else { PATUNFETCH; break; } } /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ if (!laststart) break; /* Now we know whether 0 matches is allowed, and whether 2 or more matches is allowed. */ if (many_times_ok) { /* If more than one repetition is allowed, put in a backward jump at the end. */ store_jump (b, maybe_finalize_jump, laststart - 3); b += 3; } insert_jump (on_failure_jump, laststart, b + 3, b); pending_exact = 0; b += 3; if (!zero_times_ok) { /* At least one repetition required: insert before the loop a skip over the initial on-failure-jump instruction */ insert_jump (dummy_failure_jump, laststart, laststart + 6, b); b += 3; } break; case '.': laststart = b; PATPUSH (anychar); break; case '[': while (b - bufp->buffer > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) /* Note that EXTEND_BUFFER clobbers c */ EXTEND_BUFFER; laststart = b; if (*p == '^') PATPUSH (charset_not), p++; else PATPUSH (charset); p1 = p; PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); /* Clear the whole map */ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); /* Read in characters and ranges, setting map bits */ while (1) { PATFETCH (c); /* If awk, \ escapes characters when inside [...]. */ if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\') { PATFETCH(c1); b[c1 / BYTEWIDTH] |= 1 << (c1 % BYTEWIDTH); continue; } if (c == ']' && p != p1 + 1) break; if (*p == '-' && p[1] != ']') { PATFETCH (c1); PATFETCH (c1); while (c <= c1) b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; } else { b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); } } /* Discard any bitmap bytes that are all 0 at the end of the map. Decrement the map-length byte too. */ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; b += b[-1]; break; case '(': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_open; case ')': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_close; case '\n': if (! (obscure_syntax & RE_NEWLINE_OR)) goto normal_char; else goto handle_bar; case '|': if (! (obscure_syntax & RE_NO_BK_VBAR)) goto normal_char; else goto handle_bar; case '\\': if (p == pend) goto invalid_pattern; PATFETCH_RAW (c); switch (c) { case '(': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_open: if (stackp == stacke) goto nesting_too_deep; if (regnum < RE_NREGS) { PATPUSH (start_memory); PATPUSH (regnum); } *stackp++ = b - bufp->buffer; *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; *stackp++ = regnum++; *stackp++ = begalt - bufp->buffer; fixup_jump = 0; laststart = 0; begalt = b; break; case ')': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_close: if (stackp == stackb) goto unmatched_close; begalt = *--stackp + bufp->buffer; if (fixup_jump) store_jump (fixup_jump, jump, b); if (stackp[-1] < RE_NREGS) { PATPUSH (stop_memory); PATPUSH (stackp[-1]); } stackp -= 2; fixup_jump = 0; if (*stackp) fixup_jump = *stackp + bufp->buffer - 1; laststart = *--stackp + bufp->buffer; break; case '|': if (obscure_syntax & RE_NO_BK_VBAR) goto normal_backsl; handle_bar: insert_jump (on_failure_jump, begalt, b + 6, b); pending_exact = 0; b += 3; if (fixup_jump) store_jump (fixup_jump, jump, b); fixup_jump = b; b += 3; laststart = 0; begalt = b; break; #ifdef emacs case '=': PATPUSH (at_dot); break; case 's': laststart = b; PATPUSH (syntaxspec); PATFETCH (c); PATPUSH (syntax_spec_code[c]); break; case 'S': laststart = b; PATPUSH (notsyntaxspec); PATFETCH (c); PATPUSH (syntax_spec_code[c]); break; #endif /* emacs */ case 'w': laststart = b; PATPUSH (wordchar); break; case 'W': laststart = b; PATPUSH (notwordchar); break; case '<': PATPUSH (wordbeg); break; case '>': PATPUSH (wordend); break; case 'b': PATPUSH (wordbound); break; case 'B': PATPUSH (notwordbound); break; case '`': PATPUSH (begbuf); break; case '\'': PATPUSH (endbuf); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c1 = c - '0'; if (c1 >= regnum) goto normal_char; for (stackt = stackp - 2; stackt > stackb; stackt -= 4) if (*stackt == c1) goto normal_char; laststart = b; PATPUSH (duplicate); PATPUSH (c1); break; case '+': case '?': if (obscure_syntax & RE_BK_PLUS_QM) goto handle_plus; default: normal_backsl: /* You might think it would be useful for \ to mean not to translate; but if we don't translate it it will never match anything. */ if (translate) c = translate[c]; goto normal_char; } break; default: normal_char: if (!pending_exact || pending_exact + *pending_exact + 1 != b || *pending_exact == 0177 || *p == '*' || *p == '^' || ((obscure_syntax & RE_BK_PLUS_QM) ? *p == '\\' && (p[1] == '+' || p[1] == '?') : (*p == '+' || *p == '?'))) { laststart = b; PATPUSH (exactn); pending_exact = b; PATPUSH (0); } PATPUSH (c); (*pending_exact)++; } } if (fixup_jump) store_jump (fixup_jump, jump, b); if (stackp != stackb) goto unmatched_open; bufp->used = b - bufp->buffer; return 0; invalid_pattern: return "Invalid regular expression"; unmatched_open: return "Unmatched \\("; unmatched_close: return "Unmatched \\)"; end_of_pattern: return "Premature end of regular expression"; nesting_too_deep: return "Nesting too deep"; too_big: return "Regular expression too big"; memory_exhausted: return "Memory exhausted"; } /* Store where `from' points a jump operation to jump to where `to' points. `opcode' is the opcode to store. */ static int store_jump (from, opcode, to) char *from, *to; char opcode; { from[0] = opcode; from[1] = (to - (from + 3)) & 0377; from[2] = (to - (from + 3)) >> 8; } /* Open up space at char FROM, and insert there a jump to TO. CURRENT_END gives te end of the storage no in use, so we know how much data to copy up. OP is the opcode of the jump to insert. If you call this function, you must zero out pending_exact. */ static int insert_jump (op, from, to, current_end) char op; char *from, *to, *current_end; { register char *pto = current_end + 3; register char *pfrom = current_end; while (pfrom != from) *--pto = *--pfrom; store_jump (from, op, to); } /* Given a pattern, compute a fastmap from it. The fastmap records which of the (1 << BYTEWIDTH) possible characters can start a string that matches the pattern. This fastmap is used by re_search to skip quickly over totally implausible text. The caller must supply the address of a (1 << BYTEWIDTH)-byte data area as bufp->fastmap. The other components of bufp describe the pattern to be used. */ void re_compile_fastmap (bufp) struct re_pattern_buffer *bufp; { unsigned char *pattern = (unsigned char *) bufp->buffer; int size = bufp->used; register char *fastmap = bufp->fastmap; register unsigned char *p = pattern; register unsigned char *pend = pattern + size; register int j, k; unsigned char *translate = (unsigned char *) bufp->translate; unsigned char *stackb[NFAILURES]; unsigned char **stackp = stackb; bzero (fastmap, (1 << BYTEWIDTH)); bufp->fastmap_accurate = 1; bufp->can_be_null = 0; while (p) { if (p == pend) { bufp->can_be_null = 1; break; } #ifdef SWITCH_ENUM_BUG switch ((int) ((enum regexpcode) *p++)) #else switch ((enum regexpcode) *p++) #endif { case exactn: if (translate) fastmap[translate[p[1]]] = 1; else fastmap[p[1]] = 1; break; case begline: case before_dot: case at_dot: case after_dot: case begbuf: case endbuf: case wordbound: case notwordbound: case wordbeg: case wordend: continue; case endline: if (translate) fastmap[translate['\n']] = 1; else fastmap['\n'] = 1; if (bufp->can_be_null != 1) bufp->can_be_null = 2; break; case finalize_jump: case maybe_finalize_jump: case jump: case dummy_failure_jump: bufp->can_be_null = 1; j = *p++ & 0377; j += SIGN_EXTEND_CHAR (*(char *)p) << 8; p += j + 1; /* The 1 compensates for missing ++ above */ if (j > 0) continue; /* Jump backward reached implies we just went through the body of a loop and matched nothing. Opcode jumped to should be an on_failure_jump. Just treat it like an ordinary jump. For a * loop, it has pushed its failure point already; if so, discard that as redundant. */ if ((enum regexpcode) *p != on_failure_jump) continue; p++; j = *p++ & 0377; j += SIGN_EXTEND_CHAR (*(char *)p) << 8; p += j + 1; /* The 1 compensates for missing ++ above */ if (stackp != stackb && *stackp == p) stackp--; continue; case on_failure_jump: j = *p++ & 0377; j += SIGN_EXTEND_CHAR (*(char *)p) << 8; p++; *++stackp = p + j; continue; case start_memory: case stop_memory: p++; continue; case duplicate: bufp->can_be_null = 1; fastmap['\n'] = 1; case anychar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (j != '\n') fastmap[j] = 1; if (bufp->can_be_null) return; /* Don't return; check the alternative paths so we can set can_be_null if appropriate. */ break; case wordchar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) == Sword) fastmap[j] = 1; break; case notwordchar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) != Sword) fastmap[j] = 1; break; #ifdef emacs case syntaxspec: k = *p++; for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) == (enum syntaxcode) k) fastmap[j] = 1; break; case notsyntaxspec: k = *p++; for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) != (enum syntaxcode) k) fastmap[j] = 1; break; #endif /* emacs */ case charset: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) { if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; } break; case charset_not: /* Chars beyond end of map must be allowed */ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) { if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; } break; } /* Get here means we have successfully found the possible starting characters of one path of the pattern. We need not follow this path any farther. Instead, look at the next alternative remembered in the stack. */ if (stackp != stackb) p = *stackp--; else break; } } /* Like re_search_2, below, but only one string is specified. */ int re_search (pbufp, string, size, startpos, range, regs) struct re_pattern_buffer *pbufp; char *string; int size, startpos, range; struct re_registers *regs; { return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, regs, size); } /* Like re_match_2 but tries first a match starting at index STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of places to try before giving up. If RANGE is negative, the starting positions tried are STARTPOS, STARTPOS - 1, etc. It is up to the caller to make sure that range is not so large as to take the starting position outside of the input strings. The value returned is the position at which the match was found, or -1 if no match was found, or -2 if error (such as failure stack overflow). */ int re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop) struct re_pattern_buffer *pbufp; char *string1, *string2; int size1, size2; int startpos; register int range; struct re_registers *regs; int mstop; { register char *fastmap = pbufp->fastmap; register unsigned char *translate = (unsigned char *) pbufp->translate; int total = size1 + size2; int val; /* Update the fastmap now if not correct already */ if (fastmap && !pbufp->fastmap_accurate) re_compile_fastmap (pbufp); /* Don't waste time in a long search for a pattern that says it is anchored. */ if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf && range > 0) { if (startpos > 0) return -1; else range = 1; } while (1) { /* If a fastmap is supplied, skip quickly over characters that cannot possibly be the start of a match. Note, however, that if the pattern can possibly match the null string, we must test it at each starting point so that we take the first null string we get. */ if (fastmap && startpos < total && pbufp->can_be_null != 1) { if (range > 0) { register int lim = 0; register unsigned char *p; int irange = range; if (startpos < size1 && startpos + range >= size1) lim = range - (size1 - startpos); p = ((unsigned char *) &(startpos >= size1 ? string2 - size1 : string1)[startpos]); if (translate) { while (range > lim && !fastmap[translate[*p++]]) range--; } else { while (range > lim && !fastmap[*p++]) range--; } startpos += irange - range; } else { register unsigned char c; if (startpos >= size1) c = string2[startpos - size1]; else c = string1[startpos]; c &= 0xff; if (translate ? !fastmap[translate[c]] : !fastmap[c]) goto advance; } } if (range >= 0 && startpos == total && fastmap && pbufp->can_be_null == 0) return -1; val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop); if (0 <= val) return startpos; if (val == -2) return -2; #ifdef C_ALLOCA alloca (0); #endif /* C_ALLOCA */ advance: if (!range) break; if (range > 0) range--, startpos++; else range++, startpos--; } return -1; } #ifndef emacs /* emacs never uses this */ int re_match (pbufp, string, size, pos, regs) struct re_pattern_buffer *pbufp; char *string; int size, pos; struct re_registers *regs; { return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size); } #endif /* emacs */ /* Maximum size of failure stack. Beyond this, overflow is an error. */ int re_max_failures = 2000; static int bcmp_translate(); /* Match the pattern described by PBUFP against data which is the virtual concatenation of STRING1 and STRING2. SIZE1 and SIZE2 are the sizes of the two data strings. Start the match at position POS. Do not consider matching past the position MSTOP. If pbufp->fastmap is nonzero, then it had better be up to date. The reason that the data to match are specified as two components which are to be regarded as concatenated is so this function can be used directly on the contents of an Emacs buffer. -1 is returned if there is no match. -2 is returned if there is an error (such as match stack overflow). Otherwise the value is the length of the substring which was matched. */ int re_match_2 (pbufp, string1_arg, size1, string2_arg, size2, pos, regs, mstop) struct re_pattern_buffer *pbufp; char *string1_arg, *string2_arg; int size1, size2; int pos; struct re_registers *regs; int mstop; { register unsigned char *p = (unsigned char *) pbufp->buffer; register unsigned char *pend = p + pbufp->used; unsigned char *string1 = (unsigned char *) string1_arg; unsigned char *string2 = (unsigned char *) string2_arg; /* End of first string */ unsigned char *end1; /* End of second string */ unsigned char *end2; /* Pointer just past last char to consider matching */ unsigned char *end_match_1, *end_match_2; register unsigned char *d, *dend; register int mcnt; unsigned char *translate = (unsigned char *) pbufp->translate; /* Failure point stack. Each place that can handle a failure further down the line pushes a failure point on this stack. It consists of two char *'s. The first one pushed is where to resume scanning the pattern; the second pushed is where to resume scanning the strings. If the latter is zero, the failure point is a "dummy". If a failure happens and the innermost failure point is dormant, it discards that failure point and tries the next one. */ unsigned char *initial_stack[2 * NFAILURES]; unsigned char **stackb = initial_stack; unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES]; /* Information on the "contents" of registers. These are pointers into the input strings; they record just what was matched (on this attempt) by some part of the pattern. The start_memory command stores the start of a register's contents and the stop_memory command stores the end. At that point, regstart[regnum] points to the first character in the register, regend[regnum] points to the first character beyond the end of the register, regstart_seg1[regnum] is true iff regstart[regnum] points into string1, and regend_seg1[regnum] is true iff regend[regnum] points into string1. */ unsigned char *regstart[RE_NREGS]; unsigned char *regend[RE_NREGS]; unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS]; /* Set up pointers to ends of strings. Don't allow the second string to be empty unless both are empty. */ if (!size2) { string2 = string1; size2 = size1; string1 = 0; size1 = 0; } end1 = string1 + size1; end2 = string2 + size2; /* Compute where to stop matching, within the two strings */ if (mstop <= size1) { end_match_1 = string1 + mstop; end_match_2 = string2; } else { end_match_1 = end1; end_match_2 = string2 + mstop - size1; } /* Initialize \) text positions to -1 to mark ones that no \( or \) has been seen for. */ for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++) regend[mcnt] = (unsigned char *) -1; /* `p' scans through the pattern as `d' scans through the data. `dend' is the end of the input string that `d' points within. `d' is advanced into the following input string whenever necessary, but this happens before fetching; therefore, at the beginning of the loop, `d' can be pointing at the end of a string, but it cannot equal string2. */ if (pos <= size1) d = string1 + pos, dend = end_match_1; else d = string2 + pos - size1, dend = end_match_2; /* Write PREFETCH; just before fetching a character with *d. */ #define PREFETCH \ while (d == dend) \ { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \ d = string2; /* end of string1 => advance to string2. */ \ dend = end_match_2; } /* This loop loops over pattern commands. It exits by returning from the function if match is complete, or it drops through if match fails at this starting point in the input data. */ while (1) { if (p == pend) /* End of pattern means we have succeeded! */ { /* If caller wants register contents data back, convert it to indices */ if (regs) { regs->start[0] = pos; if (dend == end_match_1) regs->end[0] = d - string1; else regs->end[0] = d - string2 + size1; for (mcnt = 1; mcnt < RE_NREGS; mcnt++) { if (regend[mcnt] == (unsigned char *) -1) { regs->start[mcnt] = -1; regs->end[mcnt] = -1; continue; } if (regstart_seg1[mcnt]) regs->start[mcnt] = regstart[mcnt] - string1; else regs->start[mcnt] = regstart[mcnt] - string2 + size1; if (regend_seg1[mcnt]) regs->end[mcnt] = regend[mcnt] - string1; else regs->end[mcnt] = regend[mcnt] - string2 + size1; } } if (dend == end_match_1) return (d - string1 - pos); else return d - string2 + size1 - pos; } /* Otherwise match next pattern command */ #ifdef SWITCH_ENUM_BUG switch ((int) ((enum regexpcode) *p++)) #else switch ((enum regexpcode) *p++) #endif { /* \( is represented by a start_memory, \) by a stop_memory. Both of those commands contain a "register number" argument. The text matched within the \( and \) is recorded under that number. Then, \ turns into a `duplicate' command which is followed by the numeric value of as the register number. */ case start_memory: regstart[*p] = d; regstart_seg1[*p++] = (dend == end_match_1); break; case stop_memory: regend[*p] = d; regend_seg1[*p++] = (dend == end_match_1); break; case duplicate: { int regno = *p++; /* Get which register to match against */ register unsigned char *d2, *dend2; d2 = regstart[regno]; dend2 = ((regstart_seg1[regno] == regend_seg1[regno]) ? regend[regno] : end_match_1); while (1) { /* Advance to next segment in register contents, if necessary */ while (d2 == dend2) { if (dend2 == end_match_2) break; if (dend2 == regend[regno]) break; d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ } /* At end of register contents => success */ if (d2 == dend2) break; /* Advance to next segment in data being matched, if necessary */ PREFETCH; /* mcnt gets # consecutive chars to compare */ mcnt = dend - d; if (mcnt > dend2 - d2) mcnt = dend2 - d2; /* Compare that many; failure if mismatch, else skip them. */ if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) goto fail; d += mcnt, d2 += mcnt; } } break; case anychar: /* fetch a data character */ PREFETCH; /* Match anything but a newline. */ if ((translate ? translate[*d++] : *d++) == '\n') goto fail; break; case charset: case charset_not: { /* Nonzero for charset_not */ int not = 0; register int c; if (*(p - 1) == (unsigned char) charset_not) not = 1; /* fetch a data character */ PREFETCH; if (translate) c = translate [*d]; else c = *d; if (c < *p * BYTEWIDTH && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; p += 1 + *p; if (!not) goto fail; d++; break; } case begline: if (d == string1 || d[-1] == '\n') break; goto fail; case endline: if (d == end2 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) break; goto fail; /* "or" constructs ("|") are handled by starting each alternative with an on_failure_jump that points to the start of the next alternative. Each alternative except the last ends with a jump to the joining point. (Actually, each jump except for the last one really jumps to the following jump, because tensioning the jumps is a hassle.) */ /* The start of a stupid repeat has an on_failure_jump that points past the end of the repeat text. This makes a failure point so that, on failure to match a repetition, matching restarts past as many repetitions have been found with no way to fail and look for another one. */ /* A smart repeat is similar but loops back to the on_failure_jump so that each repetition makes another failure point. */ case on_failure_jump: if (stackp == stacke) { unsigned char **stackx; if (stacke - stackb > re_max_failures * 2) return -2; stackx = (unsigned char **) alloca (2 * (stacke - stackb) * sizeof (char *)); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp = stackx + (stackp - stackb); stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p++; *stackp++ = mcnt + p; *stackp++ = d; break; /* The end of a smart repeat has an maybe_finalize_jump back. Change it either to a finalize_jump or an ordinary jump. */ case maybe_finalize_jump: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p++; { register unsigned char *p2 = p; /* Compare what follows with the begining of the repeat. If we can establish that there is nothing that they would both match, we can change to finalize_jump */ while (p2 != pend && (*p2 == (unsigned char) stop_memory || *p2 == (unsigned char) start_memory)) p2++; if (p2 == pend) p[-3] = (unsigned char) finalize_jump; else if (*p2 == (unsigned char) exactn || *p2 == (unsigned char) endline) { register int c = *p2 == (unsigned char) endline ? '\n' : p2[2]; register unsigned char *p1 = p + mcnt; /* p1[0] ... p1[2] are an on_failure_jump. Examine what follows that */ if (p1[3] == (unsigned char) exactn && p1[5] != c) p[-3] = (unsigned char) finalize_jump; else if (p1[3] == (unsigned char) charset || p1[3] == (unsigned char) charset_not) { int not = p1[3] == (unsigned char) charset_not; if (c < p1[4] * BYTEWIDTH && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; /* not is 1 if c would match */ /* That means it is not safe to finalize */ if (!not) p[-3] = (unsigned char) finalize_jump; } } } p -= 2; if (p[-1] != (unsigned char) finalize_jump) { p[-1] = (unsigned char) jump; goto nofinalize; } /* The end of a stupid repeat has a finalize-jump back to the start, where another failure point will be made which will point after all the repetitions found so far. */ case finalize_jump: stackp -= 2; case jump: nofinalize: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p += mcnt + 1; /* The 1 compensates for missing ++ above */ break; case dummy_failure_jump: if (stackp == stacke) { unsigned char **stackx = (unsigned char **) alloca (2 * (stacke - stackb) * sizeof (char *)); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp = stackx + (stackp - stackb); stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } *stackp++ = 0; *stackp++ = 0; goto nofinalize; case wordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ break; if ((SYNTAX (d[-1]) == Sword) != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) break; goto fail; case notwordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ goto fail; if ((SYNTAX (d[-1]) == Sword) != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) goto fail; break; case wordbeg: if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ goto fail; if (d == string1 /* Points to first char */ || SYNTAX (d[-1]) != Sword) /* prev char not letter */ break; goto fail; case wordend: if (d == string1 /* Points to first char */ || SYNTAX (d[-1]) != Sword) /* prev char not letter */ goto fail; if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */ break; goto fail; #ifdef emacs case before_dot: if (((d - string2 <= (unsigned) size2) ? d - bf_p2 : d - bf_p1) <= point) goto fail; break; case at_dot: if (((d - string2 <= (unsigned) size2) ? d - bf_p2 : d - bf_p1) == point) goto fail; break; case after_dot: if (((d - string2 <= (unsigned) size2) ? d - bf_p2 : d - bf_p1) >= point) goto fail; break; case wordchar: mcnt = (int) Sword; goto matchsyntax; case syntaxspec: mcnt = *p++; matchsyntax: PREFETCH; if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; break; case notwordchar: mcnt = (int) Sword; goto matchnotsyntax; case notsyntaxspec: mcnt = *p++; matchnotsyntax: PREFETCH; if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; break; #else case wordchar: PREFETCH; if (SYNTAX (*d++) == 0) goto fail; break; case notwordchar: PREFETCH; if (SYNTAX (*d++) != 0) goto fail; break; #endif /* not emacs */ case begbuf: if (d == string1) /* Note, d cannot equal string2 */ break; /* unless string1 == string2. */ goto fail; case endbuf: if (d == end2 || (d == end1 && size2 == 0)) break; goto fail; case exactn: /* Match the next few pattern characters exactly. mcnt is how many characters to match. */ mcnt = *p++; if (translate) { do { PREFETCH; if (translate[*d++] != *p++) goto fail; } while (--mcnt); } else { do { PREFETCH; if (*d++ != *p++) goto fail; } while (--mcnt); } break; } continue; /* Successfully matched one pattern command; keep matching */ /* Jump here if any matching operation fails. */ fail: if (stackp != stackb) /* A restart point is known. Restart there and pop it. */ { if (!stackp[-2]) { /* If innermost failure point is dormant, flush it and keep looking */ stackp -= 2; goto fail; } d = *--stackp; p = *--stackp; if (d >= string1 && d <= end1) dend = end_match_1; } else break; /* Matching at this starting point really fails! */ } return -1; /* Failure to match */ } static int bcmp_translate (s1, s2, len, translate) unsigned char *s1, *s2; register int len; unsigned char *translate; { register unsigned char *p1 = s1, *p2 = s2; while (len) { if (translate [*p1++] != translate [*p2++]) return 1; len--; } return 0; } /* Entry points compatible with bsd4.2 regex library */ #ifndef emacs static struct re_pattern_buffer re_comp_buf; char * re_comp (s) char *s; { if (!s) { if (!re_comp_buf.buffer) return "No previous regular expression"; return 0; } if (!re_comp_buf.buffer) { if (!(re_comp_buf.buffer = (char *) malloc (200))) return "Memory exhausted"; re_comp_buf.allocated = 200; if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) return "Memory exhausted"; } return re_compile_pattern (s, strlen (s), &re_comp_buf); } int re_exec (s) char *s; { int len = strlen (s); return 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); } #endif /* emacs */ #ifdef test #include /* Indexed by a character, gives the upper case equivalent of the character */ static char upcase[0400] = { 000, 001, 002, 003, 004, 005, 006, 007, 010, 011, 012, 013, 014, 015, 016, 017, 020, 021, 022, 023, 024, 025, 026, 027, 030, 031, 032, 033, 034, 035, 036, 037, 040, 041, 042, 043, 044, 045, 046, 047, 050, 051, 052, 053, 054, 055, 056, 057, 060, 061, 062, 063, 064, 065, 066, 067, 070, 071, 072, 073, 074, 075, 076, 077, 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 }; main (argc, argv) int argc; char **argv; { char pat[80]; struct re_pattern_buffer buf; int i; char c; char fastmap[(1 << BYTEWIDTH)]; /* Allow a command argument to specify the style of syntax. */ if (argc > 1) obscure_syntax = atoi (argv[1]); buf.allocated = 40; buf.buffer = (char *) malloc (buf.allocated); buf.fastmap = fastmap; buf.translate = upcase; while (1) { gets (pat); if (*pat) { re_compile_pattern (pat, strlen(pat), &buf); for (i = 0; i < buf.used; i++) printchar (buf.buffer[i]); putchar ('\n'); printf ("%d allocated, %d used.\n", buf.allocated, buf.used); re_compile_fastmap (&buf); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (fastmap[i]) printchar (i); putchar ('\n'); } gets (pat); /* Now read the string to match against */ i = re_match (&buf, pat, strlen (pat), 0, 0); printf ("Match value %d.\n", i); } } #ifdef NOTDEF print_buf (bufp) struct re_pattern_buffer *bufp; { int i; printf ("buf is :\n----------------\n"); for (i = 0; i < bufp->used; i++) printchar (bufp->buffer[i]); printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->fastmap[i]) printchar (i); printf ("\nAllowed by translate: "); if (bufp->translate) for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->translate[i]) printchar (i); printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); } #endif printchar (c) char c; { if (c < 041 || c >= 0177) { putchar ('\\'); putchar (((c >> 6) & 3) + '0'); putchar (((c >> 3) & 7) + '0'); putchar ((c & 7) + '0'); } else putchar (c); } error (string) char *string; { puts (string); exit (1); } #endif /* test */ if (bufp->translate[i]) printchar (i); printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); } #endif printchar (c) char c; { if (c < 041 || c >= 0177) { putchar ('\\'); putchar (((c >> 6) & 3) + '0'); putchar (((c >> 3) diff/regex.h 644 473 0 21477 4704135045 6313 /* Definitions for data structures callers pass the regex library. Copyright (C) 1985, 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Define number of parens for which we record the beginnings and ends. This affects how much space the `struct re_registers' type takes up. */ #ifndef RE_NREGS #define RE_NREGS 10 #endif /* These bits are used in the obscure_syntax variable to choose among alternative regexp syntaxes. */ /* 1 means plain parentheses serve as grouping, and backslash parentheses are needed for literal searching. 0 means backslash-parentheses are grouping, and plain parentheses are for literal searching. */ #define RE_NO_BK_PARENS 1 /* 1 means plain | serves as the "or"-operator, and \| is a literal. 0 means \| serves as the "or"-operator, and | is a literal. */ #define RE_NO_BK_VBAR 2 /* 0 means plain + or ? serves as an operator, and \+, \? are literals. 1 means \+, \? are operators and plain +, ? are literals. */ #define RE_BK_PLUS_QM 4 /* 1 means | binds tighter than ^ or $. 0 means the contrary. */ #define RE_TIGHT_VBAR 8 /* 1 means treat \n as an _OR operator 0 means treat it as a normal character */ #define RE_NEWLINE_OR 16 /* 0 means that special characters may act as normal characters in some contexts. Specifically, this applies to: ^ - only special at the beginning, or after ( or | $ - only special at the end, or before ) or | *, +, ? - only special when not after the beginning, (, or |. 1 means that a special characters (such as *, ^, and $) always have their special meaning regardless of the surrounding context. */ #define RE_CONTEXT_INDEP_OPS 32 /* 0 means that \ before anything inside [ and ] is taken as a real \. 1 means that such a \ escapes the following character. This is a special case for AWK. */ #define RE_AWK_CLASS_HACK 64 /* Now define combinations of bits for the standard possibilities. */ #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | RE_CONTEXT_INDEP_OPS) #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) #define RE_SYNTAX_EMACS 0 /* This data structure is used to represent a compiled pattern. */ struct re_pattern_buffer { char *buffer; /* Space holding the compiled pattern commands. */ long allocated; /* Size of space that buffer points to */ long used; /* Length of portion of buffer actually occupied */ char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ /* re_search uses the fastmap, if there is one, to skip quickly over totally implausible characters */ char *translate; /* Translate table to apply to all characters before comparing. Or zero for no translation. The translation is applied to a pattern when it is compiled and to data when it is matched. */ char fastmap_accurate; /* Set to zero when a new pattern is stored, set to one when the fastmap is updated from it. */ char can_be_null; /* Set to one by compiling fastmap if this pattern might match the null string. It does not necessarily match the null string in that case, but if this is zero, it cannot. 2 as value means can match null string but at end of range or before a character listed in the fastmap. */ }; /* Structure to store "register" contents data in. Pass the address of such a structure as an argument to re_match, etc., if you want this information back. start[i] and end[i] record the string matched by \( ... \) grouping i, for i from 1 to RE_NREGS - 1. start[0] and end[0] record the entire string matched. */ struct re_registers { int start[RE_NREGS]; int end[RE_NREGS]; }; /* These are the command codes that appear in compiled regular expressions, one per byte. Some command codes are followed by argument bytes. A command code can specify any interpretation whatever for its arguments. Zero-bytes may appear in the compiled regular expression. */ enum regexpcode { unused, exactn, /* followed by one byte giving n, and then by n literal bytes */ begline, /* fails unless at beginning of line */ endline, /* fails unless at end of line */ jump, /* followed by two bytes giving relative address to jump to */ on_failure_jump, /* followed by two bytes giving relative address of place to resume at in case of failure. */ finalize_jump, /* Throw away latest failure point and then jump to address. */ maybe_finalize_jump, /* Like jump but finalize if safe to do so. This is used to jump back to the beginning of a repeat. If the command that follows this jump is clearly incompatible with the one at the beginning of the repeat, such that we can be sure that there is no use backtracking out of repetitions already completed, then we finalize. */ dummy_failure_jump, /* jump, and push a dummy failure point. This failure point will be thrown away if an attempt is made to use it for a failure. A + construct makes this before the first repeat. */ anychar, /* matches any one character */ charset, /* matches any one char belonging to specified set. First following byte is # bitmap bytes. Then come bytes for a bit-map saying which chars are in. Bits in each byte are ordered low-bit-first. A character is in the set if its bit is 1. A character too large to have a bit in the map is automatically not in the set */ charset_not, /* similar but match any character that is NOT one of those specified */ start_memory, /* starts remembering the text that is matched and stores it in a memory register. followed by one byte containing the register number. Register numbers must be in the range 0 through NREGS. */ stop_memory, /* stops remembering the text that is matched and stores it in a memory register. followed by one byte containing the register number. Register numbers must be in the range 0 through NREGS. */ duplicate, /* match a duplicate of something remembered. Followed by one byte containing the index of the memory register. */ before_dot, /* Succeeds if before dot */ at_dot, /* Succeeds if at dot */ after_dot, /* Succeeds if after dot */ begbuf, /* Succeeds if at beginning of buffer */ endbuf, /* Succeeds if at end of buffer */ wordchar, /* Matches any word-constituent character */ notwordchar, /* Matches any char that is not a word-constituent */ wordbeg, /* Succeeds if at word beginning */ wordend, /* Succeeds if at word end */ wordbound, /* Succeeds if at a word boundary */ notwordbound, /* Succeeds if not at a word boundary */ syntaxspec, /* Matches any character whose syntax is specified. followed by a byte which contains a syntax code, Sword or such like */ notsyntaxspec /* Matches any character whose syntax differs from the specified. */ }; #ifdef __STDC__ extern char *re_compile_pattern (char *, int, struct re_pattern_buffer *); /* Is this really advertised? */ extern void re_compile_fastmap (struct re_pattern_buffer *); extern int re_search (struct re_pattern_buffer *, char*, int, int, int, struct re_registers *); extern int re_search_2 (struct re_pattern_buffer *, char *, int, char *, int, int, int, struct re_registers *, int); extern int re_match (struct re_pattern_buffer *, char *, int, int, struct re_registers *); extern int re_match_2 (struct re_pattern_buffer *, char *, int, char *, int, int, struct re_registers *, int); /* 4.2 bsd compatibility (yuck) */ extern char *re_comp (char *); extern int re_exec (char *); #else /* !__STDC__ */ extern char *re_compile_pattern (); /* Is this really advertised? */ extern void re_compile_fastmap (); extern int re_search (), re_search_2 (); extern int re_match (), re_match_2 (); /* 4.2 bsd compatibility (yuck) */ extern char *re_comp (); extern int re_exec (); #endif /* __STDC__ */ #ifdef SYNTAX_TABLE extern char *re_syntax_table; #endif har *, int, int, struct re_registers *, int); /* 4.2 bsd compatibility (yuck) */ extern char *re_comp (char *); extern int re_exec (char *); #else /* !__STDC__ */ extern char *re_compile_pattediff/diff3.c 644 473 0 121410 4704135046 6174 /* Three-way file comparison program (diff3) for Project GNU Copyright (C) 1988, 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Written by Randy Smith */ #ifdef __STDC__ #define VOID void #else #define VOID char #endif /* * Include files. */ #include #include #ifdef USG #include /* Define needed BSD functions in terms of sysV library. */ #define bcopy(s,d,n) memcpy((d),(s),(n)) #define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) #define bzero(s,n) memset((s),0,(n)) #define dup2(f,t) (close(t),fcntl((f),F_DUPFD,(t))) #define vfork fork #define index strchr #define rindex strrchr #endif /* * Internal data structures and macros for the diff3 program; includes * data structures for both diff3 diffs and normal diffs. */ /* * Different files within a diff */ #define FILE0 0 #define FILE1 1 #define FILE2 2 /* * Three way diffs are build out of two two-way diffs; the file which * the two two-way diffs share is: */ #define FILEC FILE0 /* The ranges are indexed by */ #define START 0 #define END 1 enum diff_type { ERROR, /* Should not be used */ ADD, /* Two way diff add */ CHANGE, /* Two way diff change */ DELETE, /* Two way diff delete */ DIFF_ALL, /* All three are different */ DIFF_1ST, /* Only the first is different */ DIFF_2ND, /* Only the second */ DIFF_3RD /* Only the third */ }; /* Two-way diff */ struct diff_block { int ranges[2][2]; /* Ranges are inclusive */ char **lines[2]; /* The actual lines (may contain nulls) */ int *lengths[2]; /* The lengths of the lines (since nulls) */ struct diff_block *next; }; /* Three-way diff */ struct diff3_block { enum diff_type correspond; /* Type of diff */ int ranges[3][2]; /* Ranges are inclusive */ char **lines[3]; /* The actual lines (may contain nulls) */ int *lengths[3]; /* The lengths of the lines (since nulls) */ struct diff3_block *next; }; /* * Access the ranges on a diff block. */ #define D_LOWLINE(diff, filenum) \ ((diff)->ranges[filenum][START]) #define D_HIGHLINE(diff, filenum) \ ((diff)->ranges[filenum][END]) #define D_NUMLINES(diff, filenum) \ (D_HIGHLINE((diff), (filenum)) - D_LOWLINE((diff), (filenum)) + 1) /* * Access the line numbers in a file in a diff by absolute line number * (i.e. line number within the original file). Note that these are * lvalues and can be used for assignment. */ #define D_LINENUM(diff, filenum, linenum) \ (*((diff)->lines[filenum] + linenum - D_LOWLINE((diff), (filenum)))) #define D_LINELEN(diff, filenum, linenum) \ (*((diff)->lengths[filenum] + linenum - D_LOWLINE((diff), (filenum)))) /* * Access the line numbers in a file in a diff by relative line * numbers (i.e. line number within the diff itself). Note that these * are lvalues and can be used for assignment. */ #define D_RELNUM(diff, filenum, linenum) \ (*((diff)->lines[filenum] + linenum)) #define D_RELLEN(diff, filenum, linenum) \ (*((diff)->lengths[filenum] + linenum)) /* * And get at them directly, when that should be necessary. */ #define D_LINEARRAY(diff, filenum) \ ((diff)->lines[filenum]) #define D_LENARRAY(diff, filenum) \ ((diff)->lengths[filenum]) /* * Next block. */ #define D_NEXT(diff) ((diff)->next) /* * Access the type of a diff3 block. */ #define D3_TYPE(diff) ((diff)->correspond) /* * Line mappings based on diffs. The first maps off the top of the * diff, the second off of the bottom. */ #define D_HIGH_MAPLINE(diff, fromfile, tofile, lineno) \ ((lineno) \ - D_HIGHLINE ((diff), (fromfile)) \ + D_HIGHLINE ((diff), (tofile))) #define D_LOW_MAPLINE(diff, fromfile, tofile, lineno) \ ((lineno) \ - D_LOWLINE ((diff), (fromfile)) \ + D_LOWLINE ((diff), (tofile))) /* * General memory allocation function. */ #define ALLOCATE(number, type) \ (type *) xmalloc ((number) * sizeof (type)) /* * Options variables for flags set on command line. * * EDSCRIPT: Write out an ed script instead of the standard diff3 format. * * FLAGGING: Indicates that in the case of overlapping diffs (type * DIFF_ALL), the lines which would normally be deleted from file 1 * should be preserved with a special flagging mechanism. * * DONT_WRITE_OVERLAP: 1 if information for overlapping diffs should * not be output. * * DONT_WRITE_SIMPLE: 1 if information for non-overlapping diffs * should not be output. * * FINALWRITE: 1 if a :wq should be included at the end of the script * to write out the file being edited. */ int edscript; int flagging; int dont_write_overlap; int dont_write_simple; int finalwrite; extern int optind; char *argv0; /* * Forward function declarations. */ struct diff_block *process_diff (); struct diff3_block *make_3way_diff (); void output_diff3 (); int output_diff3_edscript (); void usage (); struct diff3_block *using_to_diff3_block (); int copy_stringlist (); struct diff3_block *create_diff3_block (); int compare_line_list (); int read_diff (); enum diff_type process_diff_control (); char *scan_diff_line (); struct diff3_block *reverse_diff3_blocklist (); VOID *xmalloc (); VOID *xrealloc (); /* * No options take arguments. "i" is my own addition; it stands for * "include write command", to emulate system V behavior. */ #define ARGSTRING "eix3EX" char diff_program[] = DIFF_PROGRAM; /* * Main program. Calls diff twice on two pairs of input files, * combines the two diffs, and outputs them. */ main (argc, argv) int argc; char **argv; { int c; int mapping [3]; int shiftmap; int incompat; int overlap_count; struct diff_block *thread1, *thread2; struct diff3_block *diff; edscript = flagging = dont_write_overlap = dont_write_simple = finalwrite = 0; incompat = shiftmap = 0; argv0 = argv[0]; while ((c = getopt (argc, argv, ARGSTRING)) != EOF) { edscript = 1; switch (c) { case 'x': dont_write_simple = 1; incompat++; break; case '3': dont_write_overlap = 1; incompat++; break; case 'i': finalwrite = 1; incompat++; break; case 'X': dont_write_simple = 1; /* Falls through */ case 'E': flagging = 1; /* Falls through */ case 'e': incompat++; break; case '?': default: usage (); /* NOTREACHED */ } } if (incompat > 1) /* Make sure you only have one of a */ /* set of arguments */ usage (); if (argc - optind != 3) usage (); if (*argv[optind] == '-' && *(argv[optind] + 1) == '\0') { /* Sigh. We've got standard input as the first arg. We can't */ /* call diff twice on stdin */ mapping [0] = 1; mapping [1] = 2; mapping [2] = 0; shiftmap = 1; } else { /* Normal, what you'd expect */ mapping [0] = 0; mapping [1] = 1; mapping [2] = 2; shiftmap = 0; } if (shiftmap) { thread1 = process_diff (argv[optind + 2], argv[optind]); thread2 = process_diff (argv[optind + 2], argv[optind + 1]); } else { thread1 = process_diff (argv[optind], argv[optind + 1]); thread2 = process_diff (argv[optind], argv[optind + 2]); } diff = make_3way_diff (thread1, thread2); if (edscript) overlap_count = output_diff3_edscript (stdout, diff, mapping, argv[optind], argv[optind + 1], argv[optind + 2]); else output_diff3 (stdout, diff, mapping); if (edscript && overlap_count > 0) /* We don't return overlap_count since it could overflow; exit status is just 8 bits. */ exit (1); exit (0); } /* * Explain, patiently and kindly, how to use this program. Then exit. */ void usage () { fprintf (stderr, "Usage:\t%s [ -exEX3 ] [ -i ] file1 file2 file3\n", argv0); fprintf (stderr, "\tOnly one of [exEX3] allowed\n"); exit (1); } /* * Routines that combine the two diffs together into one. The * algorithm used follows: * * File0 is shared in common between the two diffs. * Diff01 is the diff between 0 and 1. * Diff02 is the diff between 0 and 2. * * 1) Find the range for the first block in File0. * a) Take the lowest of the two ranges (in File0) in the two * current blocks (one from each diff) as being the low * water mark. Assign the upper end of this block as * being the high water mark and move the current block up * one. Mark the block just moved over as to be used. * b) Check the next block in the diff that the high water * mark is *not* from. * * *If* the high water mark is above * the low end of the range in that block, * * mark that block as to be used and move the current * block up. Set the high water mark to the max of * the high end of this block and the current. Repeat b. * * 2) Find the corresponding ranges in Files1 (from the blocks * in diff01; line per line outside of diffs) and in File2. * Create a diff3_block, reserving space as indicated by the ranges. * * 3) Copy all of the pointers for file0 in. At least for now, * do bcmp's between corresponding strings in the two diffs. * * 4) Copy all of the pointers for file1 and 2 in. Get what you * need from file0 (when there isn't a diff block, it's * identical to file0 within the range between diff blocks). * * 5) If the diff blocks you used came from only one of the two * strings of diffs, then that file (i.e. the one other than * file 0 in that diff) is the odd person out. If you used * diff blocks from both sets, check to see if files 1 and 2 match: * * Same number of lines? If so, do a set of bcmp's (if a * bcmp matches; copy the pointer over; it'll be easier later * if you have to do any compares). If they match, 1 & 2 are * the same. If not, all three different. * * Then you do it again, until you run out of blocks. * */ /* * This routine makes a three way diff (chain of diff3_block's) from two * two way diffs (chains of diff_block's). It is assumed that each of * the two diffs passed are off of the same file (i.e. that each of the * diffs were made "from" the same file). The three way diff pointer * returned will have numbering 0--the common file, 1--the other file * in diff1, and 2--the other file in diff2. */ struct diff3_block * make_3way_diff (thread1, thread2) struct diff_block *thread1, *thread2; { /* * This routine works on the two diffs passed to it as threads. * Thread number 0 is diff1, thread number 1 is diff2. The USING * array is set to the base of the list of blocks to be used to * construct each block of the three way diff; if no blocks from a * particular thread are to be used, that element of the using array * is set to 0. The elements LAST_USING array are set to the last * elements on each of the using lists. * * The HIGH_WATER_MARK is set to the highest line number in File 0 * described in any of the diffs in either of the USING lists. The * HIGH_WATER_THREAD names the thread. Similarly the BASE_WATER_MARK * and BASE_WATER_THREAD describe the lowest line number in File 0 * described in any of the diffs in either of the USING lists. The * HIGH_WATER_DIFF is the diff from which the HIGH_WATER_MARK was * taken. * * The HIGH_WATER_DIFF should always be equal to LAST_USING * [HIGH_WATER_THREAD]. The OTHER_DIFF is the next diff to check for * higher water, and should always be equal to * CURRENT[HIGH_WATER_THREAD ^ 0x1]. The OTHER_THREAD is the thread * in which the OTHER_DIFF is, and hence should always be equal to * HIGH_WATER_THREAD ^ 0x1. * * The variable LAST_DIFF is kept set to the last diff block produced * by this routine, for line correspondence purposes between that diff * and the one currently being worked on. It is initialized to * ZERO_DIFF before any blocks have been created. */ struct diff_block *using[2], *last_using[2], *current[2]; int high_water_mark; int high_water_thread, base_water_thread, other_thread; struct diff_block *high_water_diff, *other_diff; struct diff3_block *result, *tmpblock, *result_last, *last_diff; static struct diff3_block zero_diff = { ERROR, { {0, 0}, {0, 0}, {0, 0} }, { (char **) 0, (char **) 0, (char **) 0 }, { (int *) 0, (int *) 0, (int *) 0 }, (struct diff3_block *) 0 }; /* Initialization */ result = result_last = (struct diff3_block *) 0; current[0] = thread1; current[1] = thread2; last_diff = &zero_diff; /* Sniff up the threads until we reach the end */ while (current[0] || current[1]) { using[0] = using[1] = last_using[0] = last_using[1] = (struct diff_block *) 0; /* Setup low and high water threads, diffs, and marks. */ if (!current[0]) base_water_thread = 1; else if (!current[1]) base_water_thread = 0; else base_water_thread = (D_LOWLINE (current[0], FILE0) > D_LOWLINE (current[1], FILE0)); high_water_thread = base_water_thread; high_water_diff = current[high_water_thread]; #if 0 /* low and high waters start off same diff */ base_water_mark = D_LOWLINE (high_water_diff, FILE0); #endif high_water_mark = D_HIGHLINE (high_water_diff, FILE0); /* Make the diff you just got info from into the using class */ using[high_water_thread] = last_using[high_water_thread] = high_water_diff; current[high_water_thread] = high_water_diff->next; last_using[high_water_thread]->next = (struct diff_block *) 0; /* And mark the other diff */ other_thread = high_water_thread ^ 0x1; other_diff = current[other_thread]; /* Shuffle up the ladder, checking the other diff to see if it needs to be incorporated */ while (other_diff && D_LOWLINE (other_diff, FILE0) <= high_water_mark + 1) { /* Incorporate this diff into the using list. Note that this doesn't take it off the current list */ if (using[other_thread]) last_using[other_thread]->next = other_diff; else using[other_thread] = other_diff; last_using[other_thread] = other_diff; /* Take it off the current list. Note that this following code assumes that other_diff enters it equal to current[high_water_thread ^ 0x1] */ current[other_thread] = current[other_thread]->next; other_diff->next = (struct diff_block *) 0; /* Set the high_water stuff If this comparison is equal, then this is the last pass through this loop; since diff blocks within a given thread cannot overlap, the high_water_mark will be *below* the range_start of either of the next diffs. */ if (high_water_mark < D_HIGHLINE (other_diff, FILE0)) { high_water_thread ^= 1; high_water_diff = other_diff; high_water_mark = D_HIGHLINE (other_diff, FILE0); } /* Set the other diff */ other_thread = high_water_thread ^ 0x1; other_diff = current[other_thread]; } /* The using lists contain a list of all of the blocks to be included in this diff3_block. Create it. */ tmpblock = using_to_diff3_block (using, last_using, base_water_thread, high_water_thread, last_diff); if (!tmpblock) fatal ("internal: screwup in format of diff blocks"); /* Put it on the list */ if (result) result_last->next = tmpblock; else result = tmpblock; result_last = tmpblock; /* Setup corresponding lines correctly */ last_diff = tmpblock; } return result; } /* * using_to_diff3_block: * This routine takes two lists of blocks (from two separate diff * threads) and puts them together into one diff3 block. * It then returns a pointer to this diff3 block or 0 for failure. * * All arguments besides using are for the convenience of the routine; * they could be derived from the using array. * LAST_USING is a pair of pointers to the last blocks in the using * structure. * LOW_THREAD and HIGH_THREAD tell which threads contain the lowest * and highest line numbers for File0. * last_diff contains the last diff produced in the calling routine. * This is used for lines mappings which would still be identical to * the state that diff ended in. * * A distinction should be made in this routine between the two diffs * that are part of a normal two diff block, and the three diffs that * are part of a diff3_block. */ struct diff3_block * using_to_diff3_block (using, last_using, low_thread, high_thread, last_diff) struct diff_block *using[2], *last_using[2]; int low_thread, high_thread; struct diff3_block *last_diff; { int lowc, highc, low1, high1, low2, high2; struct diff3_block *result; struct diff_block *ptr; int i; int current0line; /* Find the range in file0 */ lowc = using[low_thread]->ranges[0][START]; highc = last_using[high_thread]->ranges[0][END]; /* Find the ranges in the other files. If using[x] is null, that means that the file to which that diff refers is equivalent to file 0 over this range */ if (using[0]) { low1 = D_LOW_MAPLINE (using[0], FILE0, FILE1, lowc); high1 = D_HIGH_MAPLINE (last_using[0], FILE0, FILE1, highc); } else { low1 = D_HIGH_MAPLINE (last_diff, FILEC, FILE1, lowc); high1 = D_HIGH_MAPLINE (last_diff, FILEC, FILE1, highc); } /* * Note that in the following, we use file 1 relative to the diff, * and file 2 relative to the corresponding lines struct. */ if (using[1]) { low2 = D_LOW_MAPLINE (using[1], FILE0, FILE1, lowc); high2 = D_HIGH_MAPLINE (last_using[1], FILE0, FILE1, highc); } else { low2 = D_HIGH_MAPLINE (last_diff, FILEC, FILE2, lowc); high2 = D_HIGH_MAPLINE (last_diff, FILEC, FILE2, highc); } /* Create a block with the appropriate sizes */ result = create_diff3_block (lowc, highc, low1, high1, low2, high2); /* Copy over all of the information for File 0. Return with a zero if any of the compares failed. */ for (ptr = using[0]; ptr; ptr = D_NEXT (ptr)) { int result_offset = D_LOWLINE (ptr, FILE0) - lowc; int copy_size = D_HIGHLINE (ptr, FILE0) - D_LOWLINE (ptr, FILE0) + 1; if (!copy_stringlist (D_LINEARRAY (ptr, FILE0), D_LENARRAY (ptr, FILE0), D_LINEARRAY (result, FILEC) + result_offset, D_LENARRAY (result, FILEC) + result_offset, copy_size)) return 0; } for (ptr = using[1]; ptr; ptr = D_NEXT (ptr)) { int result_offset = D_LOWLINE (ptr, FILEC) - lowc; int copy_size = D_HIGHLINE (ptr, FILEC) - D_LOWLINE (ptr, FILEC) + 1; if (!copy_stringlist (D_LINEARRAY (ptr, FILE0), D_LENARRAY (ptr, FILE0), D_LINEARRAY (result, FILEC) + result_offset, D_LENARRAY (result, FILEC) + result_offset, copy_size)) return 0; } /* Copy stuff for file 1. First deal with anything that might be before the first diff. */ for (i = 0; i + low1 < (using[0] ? D_LOWLINE (using[0], FILE1) : high1 + 1); i++) { D_RELNUM (result, FILE1, i) = D_RELNUM (result, FILEC, i); D_RELLEN (result, FILE1, i) = D_RELLEN (result, FILEC, i); } for (ptr = using[0]; ptr; ptr = D_NEXT (ptr)) { int result_offset = D_LOWLINE (ptr, FILE1) - low1; int copy_size = D_HIGHLINE (ptr, FILE1) - D_LOWLINE (ptr, FILE1) + 1; if (!copy_stringlist (D_LINEARRAY (ptr, FILE1), D_LENARRAY (ptr, FILE1), D_LINEARRAY (result, FILE1) + result_offset, D_LENARRAY (result, FILE1) + result_offset, copy_size)) return 0; /* Catch the lines between here and the next diff */ current0line = D_HIGHLINE (ptr, FILE0) + 1 - lowc; for (i = D_HIGHLINE (ptr, FILE1) + 1 - low1; i < (D_NEXT (ptr) ? D_LOWLINE (D_NEXT (ptr), FILE1) : high1 + 1) - low1; i++) { D_RELNUM (result, FILE1, i) = D_RELNUM (result, FILEC, current0line); D_RELLEN (result, FILE1, i) = D_RELLEN (result, FILEC, current0line++); } } /* Copy stuff for file 2. First deal with anything that might be before the first diff. */ for (i = 0; i + low2 < (using[1] ? D_LOWLINE (using[1], FILE1) : high2 + 1); i++) { D_RELNUM (result, FILE2, i) = D_RELNUM (result, FILEC, i); D_RELLEN (result, FILE2, i) = D_RELLEN (result, FILEC, i); } for (ptr = using[1]; ptr; ptr = D_NEXT (ptr)) { int result_offset = D_LOWLINE (ptr, FILE1) - low2; int copy_size = D_HIGHLINE (ptr, FILE1) - D_LOWLINE (ptr, FILE1) + 1; if (!copy_stringlist (D_LINEARRAY (ptr, FILE1), D_LENARRAY (ptr, FILE1), D_LINEARRAY (result, FILE2) + result_offset, D_LENARRAY (result, FILE2) + result_offset, copy_size)) return 0; /* Catch the lines between here and the next diff */ current0line = D_HIGHLINE (ptr, FILE0) + 1 - lowc; for (i = D_HIGHLINE (ptr, FILE1) + 1 - low2; i < (D_NEXT (ptr) ? D_LOWLINE (D_NEXT (ptr), FILE1) : high2 + 1) - low2; i++) { D_RELNUM (result, FILE2, i) = D_RELNUM (result, FILEC, current0line); D_RELLEN (result, FILE2, i) = D_RELLEN (result, FILEC, current0line++); } } /* Set correspond */ if (!using[0]) D3_TYPE (result) = DIFF_3RD; else if (!using[1]) D3_TYPE (result) = DIFF_2ND; else { int nl1 = D_HIGHLINE (result, FILE1) - D_LOWLINE (result, FILE1) + 1; int nl2 = D_HIGHLINE (result, FILE2) - D_LOWLINE (result, FILE2) + 1; if (nl1 != nl2 || !compare_line_list (D_LINEARRAY (result, FILE1), D_LENARRAY (result, FILE1), D_LINEARRAY (result, FILE2), D_LENARRAY (result, FILE2), nl1)) D3_TYPE (result) = DIFF_ALL; else D3_TYPE (result) = DIFF_1ST; } return result; } /* * This routine copies pointers from a list of strings to a different list * of strings. If a spot in the second list is already filled, it * makes sure that it is filled with the same string; if not it * returns 0, the copy incomplete. * Upon successful completion of the copy, it returns 1. */ int copy_stringlist (fromptrs, fromlengths, toptrs, tolengths, copynum) char *fromptrs[], *toptrs[]; int *fromlengths, *tolengths; int copynum; { register char **f = fromptrs, **t = toptrs; register int *fl = fromlengths, *tl = tolengths; while (copynum--) { if (*t) { if (*fl != *tl || bcmp (*f, *t, *fl)) return 0; } else { *t = *f ; *tl = *fl; } t++; f++; tl++; fl++; } return 1; } /* * Create a diff3_block, with ranges as specified in the arguments. * Allocate the arrays for the various pointers (and zero them) based * on the arguments passed. Return the block as a result. */ struct diff3_block * create_diff3_block (low0, high0, low1, high1, low2, high2) register int low0, high0, low1, high1, low2, high2; { struct diff3_block *result = ALLOCATE (1, struct diff3_block); int numlines; D3_TYPE (result) = ERROR; D_NEXT (result) = 0; /* Assign ranges */ D_LOWLINE (result, FILE0) = low0; D_HIGHLINE (result, FILE0) = high0; D_LOWLINE (result, FILE1) = low1; D_HIGHLINE (result, FILE1) = high1; D_LOWLINE (result, FILE2) = low2; D_HIGHLINE (result, FILE2) = high2; /* Allocate and zero space */ numlines = D_NUMLINES (result, FILE0); if (numlines) { D_LINEARRAY (result, FILE0) = ALLOCATE (numlines, char *); D_LENARRAY (result, FILE0) = ALLOCATE (numlines, int); bzero (D_LINEARRAY (result, FILE0), (numlines * sizeof (char *))); bzero (D_LENARRAY (result, FILE0), (numlines * sizeof (int))); } else { D_LINEARRAY (result, FILE0) = (char **) 0; D_LENARRAY (result, FILE0) = (int *) 0; } numlines = D_NUMLINES (result, FILE1); if (numlines) { D_LINEARRAY (result, FILE1) = ALLOCATE (numlines, char *); D_LENARRAY (result, FILE1) = ALLOCATE (numlines, int); bzero (D_LINEARRAY (result, FILE1), (numlines * sizeof (char *))); bzero (D_LENARRAY (result, FILE1), (numlines * sizeof (int))); } else { D_LINEARRAY (result, FILE1) = (char **) 0; D_LENARRAY (result, FILE1) = (int *) 0; } numlines = D_NUMLINES (result, FILE2); if (numlines) { D_LINEARRAY (result, FILE2) = ALLOCATE (numlines, char *); D_LENARRAY (result, FILE2) = ALLOCATE (numlines, int); bzero (D_LINEARRAY (result, FILE2), (numlines * sizeof (char *))); bzero (D_LENARRAY (result, FILE2), (numlines * sizeof (int))); } else { D_LINEARRAY (result, FILE2) = (char **) 0; D_LENARRAY (result, FILE2) = (int *) 0; } /* Return */ return result; } /* * Compare two lists of lines of text. * Return 1 if they are equivalent, 0 if not. */ int compare_line_list (list1, lengths1, list2, lengths2, nl) char *list1[], *list2[]; int *lengths1, *lengths2; int nl; { char **l1 = list1, **l2 = list2; int *lgths1 = lengths1, *lgths2 = lengths2; while (nl--) if (!*l1 || !*l2 || *lgths1 != *lgths2++ || bcmp (*l1++, *l2++, *lgths1++)) return 0; return 1; } /* * Routines to input and parse two way diffs. */ extern char **environ; #define DIFF_CHUNK_SIZE 10000 struct diff_block * process_diff (filea, fileb) char *filea, *fileb; { char *diff_contents; int diff_size; char *scan_diff; enum diff_type dt; int i; struct diff_block *block_list, *block_list_end, *bptr; diff_size = read_diff (filea, fileb, &diff_contents); scan_diff = diff_contents; bptr = block_list_end = block_list = (struct diff_block *) 0; while (scan_diff - diff_contents < diff_size) { bptr = ALLOCATE (1, struct diff_block); bptr->next = 0; bptr->lines[0] = bptr->lines[1] = (char **) 0; bptr->lengths[0] = bptr->lengths[1] = (int *) 0; dt = process_diff_control (&scan_diff, bptr); if (dt == ERROR) fatal ("Bad format in diff output"); if (*scan_diff != '\n') fatal ("Bad format in diff output"); scan_diff++; /* Force appropriate ranges to be null, if necessary */ switch (dt) { case ADD: bptr->ranges[0][0]++; break; case DELETE: bptr->ranges[1][0]++; break; case CHANGE: break; default: fatal ("internal: Bad diff type in process_diff"); break; } /* Allocate space for the pointers for the lines from filea, and parcel them out among these pointers */ if (dt != ADD) { bptr->lines[0] = ALLOCATE ((bptr->ranges[0][END] - bptr->ranges[0][START] + 1), char *); bptr->lengths[0] = ALLOCATE ((bptr->ranges[0][END] - bptr->ranges[0][START] + 1), int); for (i = 0; i <= (bptr->ranges[0][END] - bptr->ranges[0][START]); i++) scan_diff = scan_diff_line (scan_diff, &(bptr->lines[0][i]), &(bptr->lengths[0][i]), diff_contents + diff_size, '<'); } /* Get past the separator for changes */ if (dt == CHANGE) { if (strncmp (scan_diff, "---\n", 4)) fatal ("Bad diff format: bad change separator"); scan_diff += 4; } /* Allocate space for the pointers for the lines from fileb, and parcel them out among these pointers */ if (dt != DELETE) { bptr->lines[1] = ALLOCATE ((bptr->ranges[1][END] - bptr->ranges[1][START] + 1), char *); bptr->lengths[1] = ALLOCATE ((bptr->ranges[1][END] - bptr->ranges[1][START] + 1), int); for (i = 0; i <= (bptr->ranges[1][END] - bptr->ranges[1][START]); i++) scan_diff = scan_diff_line (scan_diff, &(bptr->lines[1][i]), &(bptr->lengths[1][i]), diff_contents + diff_size, '>'); } /* Place this block on the blocklist */ if (block_list_end) block_list_end->next = bptr; else block_list = bptr; block_list_end = bptr; } if (scan_diff - diff_contents != diff_size) fatal ("bad diff format; incomplete last line"); return block_list; } /* * This routine will parse a normal format diff control string. It * returns the type of the diff (ERROR if the format is bad). All of * the other important information is filled into to the structure * pointed to by db, and the string pointer (whose location is passed * to this routine) is updated to point beyond the end of the string * parsed. Note that only the ranges in the diff_block will be set by * this routine. * * If some specific pair of numbers has been reduced to a single * number, then both corresponding numbers in the diff block are set * to that number. In general these numbers are interpetted as ranges * inclusive, unless being used by the ADD or DELETE commands. It is * assumed that these will be special cased in a superior routine. */ enum diff_type process_diff_control (string, db) char **string; struct diff_block *db; { char *s = *string; int holdnum; enum diff_type type; /* These macros are defined here because they can use variables defined in this function. Don't try this at home kids, we're trained professionals! Also note that SKIPWHITE only recognizes tabs and spaces, and that READNUM can only read positive, integral numbers */ #define SKIPWHITE(s) { while (*s == ' ' || *s == '\t') s++; } #define READNUM(s, num) \ { if (!isdigit (*s)) return ERROR; holdnum = 0; \ do { holdnum = (*s++ - '0' + holdnum * 10); } \ while (isdigit (*s)); (num) = holdnum; } /* Read first set of digits */ SKIPWHITE (s); READNUM (s, db->ranges[0][START]); /* Was that the only digit? */ SKIPWHITE(s); if (*s == ',') { /* Get the next digit */ s++; READNUM (s, db->ranges[0][END]); } else db->ranges[0][END] = db->ranges[0][START]; /* Get the letter */ SKIPWHITE (s); switch (*s) { case 'a': type = ADD; break; case 'c': type = CHANGE; break; case 'd': type = DELETE; break; default: return ERROR; /* Bad format */ } s++; /* Past letter */ /* Read second set of digits */ SKIPWHITE (s); READNUM (s, db->ranges[1][START]); /* Was that the only digit? */ SKIPWHITE(s); if (*s == ',') { /* Get the next digit */ s++; READNUM (s, db->ranges[1][END]); SKIPWHITE (s); /* To move to end */ } else db->ranges[1][END] = db->ranges[1][START]; *string = s; return type; } int read_diff (filea, fileb, output_placement) char *filea, *fileb; char **output_placement; { char *argv[4]; int fds[2]; char *diff_result; long current_chunk_size; int bytes; int total; argv[0] = diff_program; argv[1] = filea; argv[2] = fileb; argv[3] = (char *) 0; pipe (fds); if (!fork ()) { /* Child */ dup2 (fds[1], 1); /* Make stdout the pipe to the parent */ /* Leave stdin alone; the diff may need it */ execve (diff_program, argv, environ); perror_with_exit ("Exec failed"); } close (fds[1]); /* Prevent erroneous lack of EOF */ current_chunk_size = DIFF_CHUNK_SIZE; diff_result = (char *) xmalloc (current_chunk_size); total = 0; do { bytes = myread (fds[0], diff_result + total, current_chunk_size - total); total += bytes; if (total == current_chunk_size) diff_result = (char *) xrealloc (diff_result, (current_chunk_size *= 2)); } while (bytes); *output_placement = diff_result; return total; } /* * Scan a regular diff line (consisting of > or <, followed by a * space, followed by text (including nulls) up to a newline. * * This next routine began life as a macro and many parameters in it * are used as call-by-reference values. */ char * scan_diff_line (scan_ptr, set_start, set_length, limit, firstchar) char *scan_ptr, **set_start; int *set_length; char *limit; char firstchar; { char *line_ptr = scan_ptr + 2; if (!(scan_ptr[0] == (firstchar) && scan_ptr[1] == ' ')) fatal ("Bad diff format; incorrect leading line chars"); *set_start = line_ptr; while (*line_ptr != '\n') line_ptr++; if (line_ptr >= limit) fatal ("Bad diff format; overflow in parse"); /* Don't include newline, but do return the beginning of the next line */ *set_length = line_ptr - *set_start; return line_ptr + 1; } /* * This routine outputs a three way diff passed as a list of * diff3_block's. * The argument MAPPING is indexed by external file number (in the * argument list) and contains the internal file number (from the * diff passed). This is important because the user expects his * outputs in terms of the argument list number, and the diff passed * may have been done slightly differently (if the first argument in * the argument list was the standard input, for example). */ void output_diff3 (outputfile, diff, mapping) FILE *outputfile; struct diff3_block *diff; int mapping[3]; { int rev_mapping[3]; static int eliminate[3] = { 1, 0, 0}; int i; int oddoneout; char *cp; struct diff3_block *ptr; int line; int dontprint; static int skew_increment[3] = { 2, 3, 1 }; /* 0==>2==>1==>3 */ for (i = 0; i < 3; i++) rev_mapping [mapping[i]] = i; for (ptr = diff; ptr; ptr = D_NEXT (ptr)) { char x[2]; switch (ptr->correspond) { case DIFF_ALL: x[0] = '\0'; dontprint = 3; /* Print them all */ oddoneout = 3; /* Nobody's odder than anyone else */ break; case DIFF_1ST: case DIFF_2ND: case DIFF_3RD: oddoneout = rev_mapping[(int) ptr->correspond - (int) DIFF_1ST]; x[0] = oddoneout + '1'; x[1] = '\0'; dontprint = eliminate [oddoneout]; break; default: fatal ("internal: Bad diff type passed to output"); } fprintf (outputfile, "====%s\n", x); /* Go 0, 2, 1 if the first and third outputs are equivalent. */ for (i = 0; i < 3; i = (oddoneout == 1 ? skew_increment[i] : i + 1)) { int realfile = mapping[i]; int lowt = D_LOWLINE (ptr, realfile), hight = D_HIGHLINE (ptr, realfile); fprintf (outputfile, "%d:", i + 1); switch (lowt - hight) { case 1: fprintf (outputfile, "%da\n", lowt - 1); break; case 0: fprintf (outputfile, "%dc\n", lowt); break; default: fprintf (outputfile, "%d,%dc\n", lowt, hight); break; } if (i == dontprint) continue; for (line = 0; line < hight - lowt + 1; line++) { fprintf (outputfile, " "); for (cp = D_RELNUM (ptr, realfile, line); cp < (D_RELNUM (ptr, realfile, line) + D_RELLEN (ptr, realfile, line)); cp++) putc (*cp, outputfile); putc ('\n', outputfile); } } } } /* * This routine outputs a diff3 set of blocks as an ed script. This * script applies the changes between file's 2 & 3 to file 1. It * takes the precise format of the ed script to be output from global * variables set during options processing. Note that it does * destructive things to the set of diff3 blocks it is passed; it * reverses their order (this gets around the problems involved with * changing line numbers in an ed script). * * Note that this routine has the same problem of mapping as the last * one did; the variable MAPPING maps from file number according to * the argument list to file number according to the diff passed. All * files listed below are in terms of the argument list. * * Also, occasionally this routine needs the real names of the files * on which it works. Thus file0, file1, and file2 are the filenames * passed on the command line. * * Returns the number of overlaps. * * See options.h for documentation on the global variables which this * routine pays attention to. */ int output_diff3_edscript (outputfile, diff, mapping, file0, file1, file2) FILE *outputfile; struct diff3_block *diff; int mapping[3]; char *file0, *file1, *file2; { int rev_mapping[3]; int i; int leading_dot; int overlap_count = 0; struct diff3_block *newblock, *thisblock; char *cp; leading_dot = 0; for (i = 0; i < 3; i++) rev_mapping [mapping [i]] = i; newblock = reverse_diff3_blocklist (diff); for (thisblock = newblock; thisblock; thisblock = thisblock->next) { /* Must do mapping correctly. */ enum diff_type type = ((thisblock->correspond == DIFF_ALL) ? DIFF_ALL : ((enum diff_type) (((int) DIFF_1ST) + rev_mapping [(int) thisblock->correspond - (int) DIFF_1ST]))); /* If we aren't supposed to do this output block, skip it */ if (type == DIFF_2ND || type == DIFF_1ST || (type == DIFF_3RD && dont_write_simple) || (type == DIFF_ALL && dont_write_overlap)) continue; if (flagging && type == DIFF_ALL) /* Do special flagging */ { /* Put in lines from FILE2 with bracket */ fprintf (outputfile, "%da\n", D_HIGHLINE (thisblock, mapping [FILE0])); fprintf (outputfile, "=======\n"); for (i = 0; i < D_NUMLINES (thisblock, mapping [FILE2]); i++) { if (D_RELNUM (thisblock, mapping[FILE2], i)[0] == '.') { leading_dot = 1; fprintf(outputfile, "."); } for (cp = D_RELNUM (thisblock, mapping [FILE2], i); cp < (D_RELNUM (thisblock, mapping [FILE2], i) + D_RELLEN (thisblock, mapping [FILE2], i)); cp++) putc (*cp, outputfile); putc ('\n', outputfile); } fprintf (outputfile, ">>>>>>> %s\n.\n", file2); overlap_count++; /* Add in code to take care of leading dots, if necessary. */ if (leading_dot) { fprintf (outputfile, "%d,%ds/^\\.\\./\\./\n", D_HIGHLINE (thisblock, mapping [FILE0]) + 1, (D_HIGHLINE (thisblock, mapping [FILE0]) + D_NUMLINES (thisblock, mapping [FILE2]))); leading_dot = 0; } /* Put in code to do initial bracket of lines from FILE0 */ fprintf (outputfile, "%da\n<<<<<<< %s\n.\n", D_LOWLINE (thisblock, mapping[FILE0]) - 1, file0); } else if (D_NUMLINES (thisblock, mapping [FILE2]) == 0) /* Write out a delete */ { if (D_NUMLINES (thisblock, mapping [FILE0]) == 1) fprintf (outputfile, "%dd\n", D_LOWLINE (thisblock, mapping [FILE0])); else fprintf (outputfile, "%d,%dd\n", D_LOWLINE (thisblock, mapping [FILE0]), D_HIGHLINE (thisblock, mapping [FILE0])); } else /* Write out an add or change */ { switch (D_NUMLINES (thisblock, mapping [FILE0])) { case 0: fprintf (outputfile, "%da\n", D_HIGHLINE (thisblock, mapping [FILE0])); break; case 1: fprintf (outputfile, "%dc\n", D_HIGHLINE (thisblock, mapping [FILE0])); break; default: fprintf (outputfile, "%d,%dc\n", D_LOWLINE (thisblock, mapping [FILE0]), D_HIGHLINE (thisblock, mapping [FILE0])); break; } for (i = 0; i < D_NUMLINES (thisblock, mapping [FILE2]); i++) { if (D_RELNUM (thisblock, mapping [FILE2], i)[0] == '.') { leading_dot = 1; fprintf (outputfile, "."); } for (cp = D_RELNUM (thisblock, mapping [FILE2], i); cp < (D_RELNUM (thisblock, mapping [FILE2], i) + D_RELLEN (thisblock, mapping [FILE2], i)); cp++) putc (*cp, outputfile); putc ('\n', outputfile); } fprintf (outputfile, ".\n"); /* Add in code to take care of leading dots, if necessary. */ if (leading_dot) { fprintf (outputfile, "%d,%ds/^\\.\\./\\./\n", D_HIGHLINE (thisblock, mapping [FILE0]) + 1, (D_HIGHLINE (thisblock, mapping [FILE0]) + D_NUMLINES (thisblock, mapping [FILE2]))); leading_dot = 0; } } } if (finalwrite) fprintf (outputfile, "w\nq\n"); return overlap_count; } /* * Reverse the order of the list of diff3 blocks. */ struct diff3_block * reverse_diff3_blocklist (diff) struct diff3_block *diff; { register struct diff3_block *tmp, *next, *prev; for (tmp = diff, prev = (struct diff3_block *) 0; tmp; tmp = next) { next = tmp->next; tmp->next = prev; prev = tmp; } return prev; } int myread (fd, ptr, size) int fd, size; char *ptr; { int result = read (fd, ptr, size); if (result < 0) perror_with_exit ("Read failed"); return result; } VOID * xmalloc (size) int size; { VOID *result = (VOID *) malloc (size); if (!result) fatal ("Malloc failed"); return result; } VOID * xrealloc (ptr, size) VOID *ptr; int size; { VOID *result = (VOID *) realloc (ptr, size); if (!result) fatal ("Malloc failed"); return result; } fatal (string) char *string; { fprintf (stderr, "%s: %s\n", argv0, string); exit (1); } perror_with_exit (string) char *string; { perror (string); exit (1); } turn result; } VOID * xmalloc (size) int size; { VOID *result = (VOID *) malloc (size); if (!result) fatal ("Malloc failed"); return result; } VOID * xrealloc (ptr, size) VOID *ptr; int size; { VOID *result = (VOID *) diff/getopt.c 644 473 0 37540 4704135046 6475 /* Getopt for GNU. Copyright (C) 1987, 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* This version of `getopt' appears to the caller like standard Unix `getopt' but it behaves differently for the user, since it allows the user to intersperse the options with the other arguments. As `getopt' works, it permutes the elements of `argv' so that, when it is done, all the options precede everything else. Thus all application programs are extended to handle flexible argument order. Setting the environment variable _POSIX_OPTION_ORDER disables permutation. Then the behavior is completely standard. GNU application programs can use a third alternative mode in which they can distinguish the relative order of options and other arguments. */ #include /* If compiled with GNU C, use the built-in alloca */ #ifdef __GNUC__ #define alloca __builtin_alloca #else /* not __GNUC__ */ #ifdef sparc #include #else char *alloca (); #endif #endif /* not __GNUC__ */ #ifdef USG #define bcopy(s, d, l) memcpy((d), (s), (l)) #define index strchr #endif char *getenv (); char *index (); char *malloc (); /* For communication from `getopt' to the caller. When `getopt' finds an option that takes an argument, the argument value is returned here. Also, when `ordering' is RETURN_IN_ORDER, each non-option ARGV-element is returned here. */ char *optarg = 0; /* Index in ARGV of the next element to be scanned. This is used for communication to and from the caller and for communication between successive calls to `getopt'. On entry to `getopt', zero means this is the first call; initialize. When `getopt' returns EOF, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next how much of ARGV has been scanned so far. */ int optind = 0; /* The next char to be scanned in the option-element in which the last option character we returned was found. This allows us to pick up the scan where we left off. If this is zero, or a null string, it means resume the scan by advancing to the next ARGV-element. */ static char *nextchar; /* Callers store zero here to inhibit the error message for unrecognized options. */ int opterr = 1; /* Describe how to deal with options that follow non-option ARGV-elements. If the caller did not specify anything, the default is REQUIRE_ORDER if the environment variable _POSIX_OPTION_ORDER is defined, PERMUTE otherwise. REQUIRE_ORDER means don't recognize them as options. Stop option processing when the first non-option is seen. This is what Unix does. PERMUTE is the default. We permute the contents of ARGV as we scan, so that eventually all the options are at the end. This allows options to be given in any order, even with programs that were not written to expect this. RETURN_IN_ORDER is an option available to programs that were written to expect options and other ARGV-elements in any order and that care about the ordering of the two. We describe each non-option ARGV-element as if it were the argument of an option with character code one. Using `-' as the first character of the list of option characters requests this mode of operation. The special argument `--' forces an end of option-scanning regardless of the value of `ordering'. In the case of RETURN_IN_ORDER, only `--' can cause `getopt' to return EOF with `optind' != ARGC. */ static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering; /* Describe the long-named options requested by the application. _GETOPT_LONG_OPTIONS is a vector of `struct option' terminated by an element containing a name which is zero. The field `has_arg' is 1 if the option takes an argument, 2 if it takes an optional argument. */ struct option { char *name; int has_arg; int *flag; int val; }; struct option *_getopt_long_options; int _getopt_long_only = 0; #if 0 /* This is an ugly kludge. Programs should use the opt_index argument to getopt_long instead. */ /* Name of long-named option actually found. */ char *_getopt_option_name; #endif /* Index in _GETOPT_LONG_OPTIONS of the long-named option actually found. Only valid when a long-named option was found. */ int option_index; /* Handle permutation of arguments. */ /* Describe the part of ARGV that contains non-options that have been skipped. `first_nonopt' is the index in ARGV of the first of them; `last_nonopt' is the index after the last of them. */ static int first_nonopt; static int last_nonopt; /* Exchange two adjacent subsequences of ARGV. One subsequence is elements [first_nonopt,last_nonopt) which contains all the non-options that have been skipped so far. The other is elements [last_nonopt,optind), which contains all the options processed since those non-options were skipped. `first_nonopt' and `last_nonopt' are relocated so that they describe the new indices of the non-options in ARGV after they are moved. */ static void exchange (argv) char **argv; { int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *); char **temp = (char **) alloca (nonopts_size); /* Interchange the two blocks of data in argv. */ bcopy (&argv[first_nonopt], temp, nonopts_size); bcopy (&argv[last_nonopt], &argv[first_nonopt], (optind - last_nonopt) * sizeof (char *)); bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size); /* Update records for the slots the non-options now occupy. */ first_nonopt += (optind - last_nonopt); last_nonopt = optind; } /* Scan elements of ARGV (whose length is ARGC) for option characters given in OPTSTRING. If an element of ARGV starts with '-', and is not exactly "-" or "--", then it is an option element. The characters of this element (aside from the initial '-') are option characters. If `getopt' is called repeatedly, it returns successively each of the option characters from each of the option elements. If `getopt' finds another option character, it returns that character, updating `optind' and `nextchar' so that the next call to `getopt' can resume the scan with the following option character or ARGV-element. If there are no more option characters, `getopt' returns `EOF'. Then `optind' is the index in ARGV of the first ARGV-element that is not an option. (The ARGV-elements have been permuted so that those that are not options now come last.) OPTSTRING is a string containing the legitimate option characters. If an option character is seen that is not listed in OPTSTRING, return '?' after printing an error message. If you set `opterr' to zero, the error message is suppressed but we still return '?'. If a char in OPTSTRING is followed by a colon, that means it wants an arg, so the following text in the same ARGV-element, or the text of the following ARGV-element, is returned in `optarg'. Two colons mean an option that wants an optional arg; if there is text in the current ARGV-element, it is returned in `optarg', otherwise `optarg' is set to zero. If OPTSTRING starts with `-', it requests a different method of handling the non-option ARGV-elements. See the comments about RETURN_IN_ORDER, above. Long-named options begin with `+' instead of `-'. Their names may be abbreviated as long as the abbreviation is unique or is an exact match for some defined option. If they have an argument, it follows the option name in the same ARGV-element, separated from the option name by a `=', or else the in next ARGV-element. `getopt' returns 0 when it finds a long-named option. */ int getopt (argc, argv, optstring) int argc; char **argv; char *optstring; { optarg = 0; /* Initialize the internal data when the first call is made. Start processing options with ARGV-element 1 (since ARGV-element 0 is the program name); the sequence of previously skipped non-option ARGV-elements is empty. */ if (optind == 0) { first_nonopt = last_nonopt = optind = 1; nextchar = 0; /* Determine how to handle the ordering of options and nonoptions. */ if (optstring[0] == '-') ordering = RETURN_IN_ORDER; else if (getenv ("_POSIX_OPTION_ORDER") != 0) ordering = REQUIRE_ORDER; else ordering = PERMUTE; } if (nextchar == 0 || *nextchar == 0) { if (ordering == PERMUTE) { /* If we have just processed some options following some non-options, exchange them so that the options come first. */ if (first_nonopt != last_nonopt && last_nonopt != optind) exchange (argv); else if (last_nonopt != optind) first_nonopt = optind; /* Now skip any additional non-options and extend the range of non-options previously skipped. */ while (optind < argc && (argv[optind][0] != '-' || argv[optind][1] == 0) && (_getopt_long_options == 0 || argv[optind][0] != '+' || argv[optind][1] == 0)) optind++; last_nonopt = optind; } /* Special ARGV-element `--' means premature end of options. Skip it like a null option, then exchange with previous non-options as if it were an option, then skip everything else like a non-option. */ if (optind != argc && !strcmp (argv[optind], "--")) { optind++; if (first_nonopt != last_nonopt && last_nonopt != optind) exchange (argv); else if (first_nonopt == last_nonopt) first_nonopt = optind; last_nonopt = argc; optind = argc; } /* If we have done all the ARGV-elements, stop the scan and back over any non-options that we skipped and permuted. */ if (optind == argc) { /* Set the next-arg-index to point at the non-options that we previously skipped, so the caller will digest them. */ if (first_nonopt != last_nonopt) optind = first_nonopt; return EOF; } /* If we have come to a non-option and did not permute it, either stop the scan or describe it to the caller and pass it by. */ if ((argv[optind][0] != '-' || argv[optind][1] == 0) && (_getopt_long_options == 0 || argv[optind][0] != '+' || argv[optind][1] == 0)) { if (ordering == REQUIRE_ORDER) return EOF; optarg = argv[optind++]; #if 0 _getopt_option_name = 0; #endif return 1; } /* We have found another option-ARGV-element. Start decoding its characters. */ nextchar = argv[optind] + 1; } if (_getopt_long_options != 0 && (argv[optind][0] == '+' || (_getopt_long_only && argv[optind][0] == '-')) ) { struct option *p; char *s = nextchar; int exact = 0; int ambig = 0; struct option *pfound = 0; int indfound; while (*s && *s != '=') s++; /* Test all options for either exact match or abbreviated matches. */ for (p = _getopt_long_options, option_index = 0; p->name; p++, option_index++) if (!strncmp (p->name, nextchar, s - nextchar)) { if (s - nextchar == strlen (p->name)) { /* Exact match found. */ pfound = p; indfound = option_index; exact = 1; break; } else if (pfound == 0) { /* First nonexact match found. */ pfound = p; indfound = option_index; } else /* Second nonexact match found. */ ambig = 1; } if (ambig && !exact) { fprintf (stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[optind]); nextchar += strlen (nextchar); return '?'; } if (pfound != 0) { option_index = indfound; optind++; if (*s) { if (pfound->has_arg > 0) optarg = s + 1; else { fprintf (stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[optind - 1][0], pfound->name); nextchar += strlen (nextchar); return '?'; } } else if (pfound->has_arg) { if (optind < argc) optarg = argv[optind++]; else if (pfound->has_arg != 2) { fprintf (stderr, "%s: option `%s' requires an argument\n", argv[0], argv[optind - 1]); nextchar += strlen (nextchar); return '?'; } } #if 0 _getopt_option_name = pfound->name; #endif nextchar += strlen (nextchar); if (pfound->flag) *(pfound->flag) = pfound->val; return 0; } /* Can't find it as a long option. If this is getopt_long_only, and the option starts with '-' and is a valid short option, then interpret it as a short option. Otherwise it's an error. */ if (_getopt_long_only == 0 || argv[optind][0] == '+' || index (optstring, *nextchar) == 0) { if (opterr != 0) fprintf (stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[optind][0], nextchar); nextchar += strlen (nextchar); return '?'; } } /* Look at and handle the next option-character. */ { char c = *nextchar++; char *temp = index (optstring, c); /* Increment `optind' when we start to process its last character. */ if (*nextchar == 0) optind++; if (temp == 0 || c == ':') { if (opterr != 0) { if (c < 040 || c >= 0177) fprintf (stderr, "%s: unrecognized option, character code 0%o\n", argv[0], c); else fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); } return '?'; } if (temp[1] == ':') { if (temp[2] == ':') { /* This is an option that accepts an argument optionally. */ if (*nextchar != 0) { optarg = nextchar; optind++; } else optarg = 0; nextchar = 0; } else { /* This is an option that requires an argument. */ if (*nextchar != 0) { optarg = nextchar; /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */ optind++; } else if (optind == argc) { if (opterr != 0) fprintf (stderr, "%s: option `-%c' requires an argument\n", argv[0], c); c = '?'; } else /* We already incremented `optind' once; increment it again when taking next ARGV-elt as argument. */ optarg = argv[optind++]; nextchar = 0; } } return c; } } #ifdef TEST /* Compile with -DTEST to make an executable for use in testing the above definition of `getopt'. */ int main (argc, argv) int argc; char **argv; { char c; int digit_optind = 0; while (1) { int this_option_optind = optind; if ((c = getopt (argc, argv, "abc:d:0123456789")) == EOF) break; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': printf ("option c with value `%s'\n", optarg); break; case '?': break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) printf ("%s ", argv[optind++]); printf ("\n"); } return 0; } #endif /* TEST */ tind; printf ("option %c\n", c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': pridiff/getopt1.c 644 473 0 6333 4704135047 6533 /* Getopt for GNU. Copyright (C) 1987, 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "getopt.h" int getopt_long (argc, argv, options, long_options, opt_index) int argc; char **argv; char *options; struct option *long_options; int *opt_index; { int val; _getopt_long_options = long_options; val = getopt (argc, argv, options); if (val == 0) *opt_index = option_index; return val; } /* Like getopt_long, but there are no short options. That is, '-' as well as '+' indicates a long option. Of course, long_options can contain single character options but '-ab' is not the same as '-a -b'. */ int getopt_long_only (argc, argv, options, long_options, opt_index) int argc; char **argv; char *options; struct option *long_options; int *opt_index; { int val; _getopt_long_options = long_options; _getopt_long_only = 1; val = getopt (argc, argv, options); if (val == 0) *opt_index = option_index; return val; } #ifdef TEST #include int main (argc, argv) int argc; char **argv; { char c; int digit_optind = 0; while (1) { int this_option_optind = optind; char *name = '\0'; int option_index = 0; static struct option long_options[] = {{ "add", 1, 0, 0 }, { "append", 0, 0, 0 }, { "delete", 1, 0, 0 }, { "verbose", 0, 0, 0 }, { "create", 0, 0, 0 }, { "file", 1, 0, 0 }, { 0, 0, 0, 0}}; c = getopt_long (argc, argv, "abc:d:0123456789", long_options, &option_index); if (c == EOF) break; switch (c) { case 0: printf ("option %s", (long_options[option_index]).name); if (optarg) printf (" with arg %s", optarg); printf ("\n"); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': printf ("option c with value `%s'\n", optarg); break; case '?': break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) printf ("%s ", argv[optind++]); printf ("\n"); } return 0; } #endif /* TEST */ , c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': printf ("option c with value `%s'\n", optarg); break; case '?': break; default: printf ("?? getopt returned character code 0%diff/getopt.h 644 473 0 6031 4704135047 6452 /* declarations for getopt Copyright (C) 1989 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* For communication from `getopt' to the caller. When `getopt' finds an option that takes an argument, the argument value is returned here. Also, when `ordering' is RETURN_IN_ORDER, each non-option ARGV-element is returned here. */ extern char *optarg; /* Index in ARGV of the next element to be scanned. This is used for communication to and from the caller and for communication between successive calls to `getopt'. On entry to `getopt', zero means this is the first call; initialize. When `getopt' returns EOF, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next how much of ARGV has been scanned so far. */ extern int optind; /* Callers store zero here to inhibit the error message `getopt' prints for unrecognized options. */ extern int opterr; /* Describe the long-named options requested by the application. _GETOPT_LONG_OPTIONS is a vector of `struct option' terminated by an element containing a name which is zero. The field `has_arg' is: 0 if the option does not take an argument, 1 if the option requires an argument, 2 if the option takes an optional argument. If the field `flag' is nonzero, it points to a variable that is set to the value given in the field `val' when the option is found, but left unchanged if the option is not found. */ struct option { char *name; int has_arg; int *flag; int val; }; extern struct option *_getopt_long_options; /* If nonzero, tell getopt that '-' means a long option. Set by getopt_long_only. */ extern int _getopt_long_only; #if 0 /* Name of long-named option actually found. Only changed when a long-named option is found. Set to zero when returning a non-option arg in `optarg'. */ extern char *_getopt_option_name; #endif /* The index in GETOPT_LONG_OPTIONS of the long-named option found. Only valid when a long-named option has been found by the most recent call to `getopt'. */ extern int option_index; #ifdef __STDC__ int getopt (int, char **, char *); int getopt_long (int, char **, char *, struct option *, int *); int getopt_long_only (int, char **, char *, struct option *, int *); #else int getopt (); int getopt_long (); int getopt_long_only (); #endif g in `optarg'. */ extern char *_getopt_option_name; #endif /* The index in GETOPT_LONG_OPTIONS of the long-named option found. Only valid when a long-named option has been found by the most recent call to `getopt'. */ extern int option_index; #ifdef __STDC__ int getopt (int, char **, char *); int getopt_long (int, char **, char *, struct option *, int *); int getopt_long_only (int, char **, char *, struct option *, int *); #else int getopt (); int getopt_long (); int getdiff/README 644 473 0 4340 4704135047 5660 This directory contains the GNU DIFF and DIFF3 utilities, version 1.14. See file COPYING for copying conditions. To compile and install on system V, you must edit the makefile according to comments therein. Report bugs to bug-gnu-utils@prep.ai.mit.edu This version of diff provides all the features of BSD's diff. It has these additional features: -a Always treat files as text and compare them line-by-line, even if they do not appear to be ASCII. -B ignore changes that just insert or delete blank lines. -C # request -c format and specify number of context lines. -F regexp in context format, for each unit of differences, show some of the last preceding line that matches the specified regexp. -H use heuristics to speed handling of large files that have numerous scattered small changes. The algorithm becomes asymptotically linear for such files! -I regexp ignore changes that just insert or delete lines that match the specified regexp. -N in directory comparison, if a file is found in only one directory, treat it as present but empty in the other directory. -p equivalent to -c -F'^[_a-zA-Z]'. This is useful for C code because it shows which function each change is in. -T print a tab rather than a space before the text of a line in normal or context format. This causes the alignment of tabs in the line to look normal. GNU DIFF was written by Mike Haertel, David Hayes, Richard Stallman and Len Tower. The basic algorithm is described in: "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, p 251. Suggested projects for improving GNU DIFF: * Handle very large files by not keeping the entire text in core. One way to do this is to scan the files sequentally to compute hash codes of the lines and put the lines in equivalence classes based only on hash code. Then compare the files normally. This will produce some false matches. Then scan the two files sequentially again, checking each match to see whether it is real. When a match is not real, mark both the "matching" lines as changed. Then build an edit script as usual. The output routines would have to be changed to scan the files sequentially looking for the text to print. to scan the files sequentally to compute hash codes of the lines and put the lines in equivalence classes based only on hash code. Then compare the files normally. This will produce some false matches. Then scan the two files sequentially again, checking each match to see whether it diff/diagmeet.note 644 473 0 2055 4704135047 7447 Here is a comparison matrix which shows a case in which it is possible for the forward and backward scan in `diag' to meet along a nonzero length of diagonal simultaneous (so that bdiag[d] and fdiag[d] are not equal) even though there is no snake on that diagonal at the meeting point. 85 1 1 1 159 1 1 17 1 2 3 4 60 1 2 1 2 2 3 4 71 3 3 4 5 85 4 3 4 5 17 5 4 5 1 6 4 5 6 183 7 5 6 7 10 8 6 7 1 9 6 7 8 12 7 8 9 10 13 10 8 9 10 14 10 9 10 17 10 10 1 10 9 10 1 8 10 10 10 183 8 7 9 9 9 10 7 6 8 9 8 8 1 6 5 7 7 1 5 6 6 1 5 5 5 50 5 4 4 4 1 4 3 3 85 5 4 3 2 2 1 2 1 17 5 4 3 2 1 1 1 1 0 85 1 1 1 159 1 1 17 7 8 9 10 13 10 8 9 10 14 10 9 10 17 10 10 1 10 9 10 1 8 10 10 10 183 8 7 9 9 9 10 7 6 8 9 8 8 1 6 5 7 7 1 5 6 6 1 5 5 5 50 5 4 4 4 1 4 3 3 85 5 4 3 2 2 1 2 1 17 5 4 3 2 1 1 1 1 0 diff/Makefile 644 473 0 6075 4704136537 6454 # Makefile for GNU DIFF # Copyright (C) 1988, 1989 Free Software Foundation, Inc. # This file is part of GNU DIFF. # GNU DIFF is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 1, or (at your option) # any later version. # # GNU DIFF is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU DIFF; see the file COPYING. If not, write to # the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. # You can compile this with ordinary cc as well, # but gcc makes it faster. # Also, gcc supports -O and -g together. CC=gcc -O CFLAGS = -g INSTALL = install # On system V, enable these three lines: CFLAGS = -g -DUSG -DHAVE_NDIR -DHAVE_DIRECT # LIBS = -lPW INSTALL = cp LIBS = -lx # (If you compile with GCC, you don't need to define LIBS.) # And, depending on the names and contents of your header files, # add either -DHAVE_NDIR or -DHAVE_DIRECT or both to CFLAGS. # Add -DHAVE_NDIR to CFLAGS if your system used ndir.h instead of dirent.h # Add -DHAVE_DIRECT to CFLAGS if your system uses 'struct direct' instead of # 'struct dirent' (this is the case at least with one add-on ndir library). bindir=/usr/local/bin prefix= # All source files srcs=diff.c analyze.c io.c context.c ed.c normal.c ifdef.c util.c dir.c \ version.c diff.h regex.c regex.h limits.h diff3.c \ getopt.c getopt1.c getopt.h # Object files for diff only. objs=$(archpfx)diff.o $(archpfx)analyze.o $(archpfx)io.o $(archpfx)context.o \ $(archpfx)ed.o $(archpfx)normal.o $(archpfx)util.o $(archpfx)dir.o \ $(archpfx)regex.o $(archpfx)ifdef.o $(archpfx)version.o \ $(archpfx)getopt.o $(archpfx)getopt1.o tapefiles = $(srcs) README diagmeet.note Makefile COPYING ChangeLog all: $(archpfx)diff $(archpfx)diff3 $(archpfx)diff3: $(archpfx)diff3.o $(CC) -o $(archpfx)diff3 $(CFLAGS) $(LDFLAGS) $(archpfx)diff3.o $(LIBS) $(archpfx)diff: $(objs) $(CC) -o $(archpfx)diff $(CFLAGS) $(LDFLAGS) $(objs) $(LIBS) $(objs): diff.h $(archpfx)context.o $(archpfx)diff.o: regex.h $(archpfx)diff3.o: diff3.c $(CC) -c $(CFLAGS) -DDIFF_PROGRAM=\"$(bindir)/diff\" diff3.c \ $(OUTPUT_OPTION) clean: rm -f *.o $(archpfx)diff $(archpfx)diff3 diff.tar diff.tar.Z install: install-diff install-diff3 install-diff: $(prefix)$(bindir)/gnudiff $(prefix)$(bindir)/gnudiff: $(archpfx)diff $(INSTALL) $(archpfx)diff $(prefix)$(bindir)/gnudiff install-diff3: $(prefix)$(bindir)/gnudiff3 $(prefix)$(bindir)/gnudiff3: $(archpfx)diff3 $(INSTALL) $(archpfx)diff3 $(prefix)$(bindir)/gnudiff3 diff.tar: $(tapefiles) mkdir tmp mkdir tmp/diff -ln $(tapefiles) tmp/diff for file in $(tapefiles); do \ if [ ! -r tmp/diff/$$file ]; then cp $$file tmp/diff; fi \ done cd tmp; tar cf ../diff.tar diff rm -rf tmp diff.tar.Z: diff.tar compress < diff.tar > diff.tar.Z $(bindir)/gnudiff: $(archpfx)diff $(INSTALL) $(archpfx)diff $(prefix)$(bindir)/gnudiff install-diff3: $(prefix)$(bindir)/gnudiff3 $(prefix)$(bindir)/gnudiff3: $(archpfx)diff3 $(INSTALL) $(archpfx)diff3 $(prefix)$(bindir)/gnudiff3 diff.tar: $(tapefiles) mkdir tmp mkdir tmp/diff -ln $(tapefiles) tmp/diff for file in $(tapefiles); do \ if [ ! -r tmp/diff/$$file ]; then cp $$file tmp/diff; fi \ done cd tmp; tar cf ../diff.tar diff rm -rfdiff/COPYING 644 473 0 30310 4704135047 6047 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 675 Mass Ave, Cambridge, MA 02139, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the programdiff/ChangeLog 644 473 0 24676 4704135050 6602 Thu Mar 1 17:19:23 1990 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * analyze.c (diff_2_files): `message' requires three args. Fri Feb 23 10:56:50 1990 David J. MacKenzie (djm at albert.ai.mit.edu) * diff.h, util.c, diff3.c: Change 'void *' to 'VOID *', with VOID defined as void if __STDC__, char if not. Sun Feb 18 20:31:58 1990 David J. MacKenzie (djm at albert.ai.mit.edu) * Makefile: Add rules for getopt.c, getopt1.c, getopt.h. * getopt.c, getopt.h, getopt1.c: New files. * main.c (main, usage): Add long options. * analyze.c (shift_boundaries): Remove unused var 'j_end'. Thu Feb 8 02:43:16 1990 Jim Kingdon (kingdon at pogo.ai.mit.edu) * GNUmakefile: include ../Makerules before Makefile. Fri Feb 2 23:21:38 1990 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * analyze.c (diif_2_files): If -B or -I, don't return 1 if all changes were ignored. Wed Jan 24 20:43:57 1990 Richard Stallman (rms at albert.ai.mit.edu) * diff3.c (fatal): Output to stderr. Thu Jan 11 00:25:56 1990 David J. MacKenzie (djm at hobbes.ai.mit.edu) * diff.c (usage): Mention -v. Wed Jan 10 16:06:38 1990 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff3.c (output_diff3_edscript): Return number of overlaps. (main): If have overlaps, exit with status 1. Sun Dec 24 10:29:20 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * io.c (find_equiv_class): Fix typo that came from changing init of B to an assigment. * version.c: New file. * diff.c (main): -v prints version number. * io.c (binary_file_p): Null char implies binary file. Fri Nov 17 23:44:55 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * util.c (print_1_line): Fix off by 1 error. Thu Nov 16 13:51:10 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * util.c (xcalloc): Function deleted. * io.c (slurp): Null-terminate the buffer. * io.c (read_files): Delete unused vars. * io.c (find_equiv_class): Don't index by N if too low. * dir.c (dir_sort): Delete the extra declaration of compare_names. * diff.h: Don't declare xcalloc. Declare some other functions. * analyze.c (shift_boundaries): Test for END at end of range before indexing by it. Fix typo `preceeding' in var names. Sat Nov 11 14:04:16 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff3.c (using_to_diff3_block): Delete unused vars. (make_3way_diff, process_diff_control, read_diff, output_diff3): Likewise. Mon Nov 6 18:15:50 EST 1989 Jay Fenlason (hack@ai.mit.edu) * README Fix typo. Fri Nov 3 15:27:47 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (usage): Mention -D. * ifdef.c (print_ifdef_hunk): Write comments on #else and #endif. Sun Oct 29 16:41:07 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (compare_files): Don't fflush for identical files. Wed Oct 25 17:57:12 1989 Randy Smith (randy at apple-gunkies.ai.mit.edu) * diff3.c (using_to_diff3_block): When defaulting lines from FILE0, only copy up to just under the *lowest* line mentioned in the next diff. * diff3.c (fatal): Add \n to error messages. Wed Oct 25 15:05:49 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * Makefile (tapefiles): Add ChangeLog. Tue Oct 3 00:51:17 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff3.c (process_diff, create_diff3_block): Init ->next field. Fri Sep 29 08:16:45 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * util.c (line_cmp): Alter end char of line 2, not line 1. Wed Sep 20 00:12:37 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * Makefile (diff.tar): Expect ln to fail on some files; copy them with cp. Mon Sep 18 02:54:29 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * Handle -D option: * io.c (find_and_hash_each_line): Keep all lines of 1st file. * diff.c (main): Handle -D option. (compare_files): Reject -D if files spec'd are directories. * analyze.c (diff_2_files): Handle OUTPUT_IFDEF case. Fri Sep 1 20:15:50 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (option_list): Rename arg VECTOR as OPTIONVEC. Mon Aug 28 17:58:27 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (compare_files): Clear entire inf[i].stat. Wed Aug 23 17:48:47 1989 Richard Stallman (rms at apple-gunkies.ai.mit.edu) * io.c (find_identical_ends): Sign was backward determining where to bound the scan for the suffix. Wed Aug 16 12:49:16 1989 Richard Stallman (rms at hobbes.ai.mit.edu) * analyze.c (diff_2_files): If -q, treat all files as binary. * diff.c (main): Detect -q, record in no_details_flag. Sun Jul 30 23:12:00 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (usage): New function. (main): Call it. Wed Jul 26 02:02:19 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (main): Make -C imply -c. Thu Jul 20 17:57:51 1989 Chris Hanson (cph at kleph) * io.c (find_and_hash_each_line): Bug fix in context handling, introduced by last change. Fri Jul 14 17:39:20 1989 Chris Hanson (cph at kleph) * analyze.c: To make RCS work correctly on files that don't necessarily end in newline, introduce some changes that cause diffs to be sensitive to missing final newline. Because non-RCS modes don't want to be affected by these changes, they are conditional on `output_style == OUTPUT_RCS'. (diff_2_files) [OUTPUT_RCS]: Suppress the "File X missing newline" message. (build_script) [OUTPUT_RCS]: Cause the last line to compare as different if exactly one of the files is missing its final newline. * io.c (find_and_hash_each_line): Bug fix in ignore_space_change mode. Change line's length to include the newline. For OUTPUT_RCS, decrement last line's length if there is no final newline. (find_identical_ends) [OUTPUT_RCS]: If one of the files is missing a final newline, make sure it's not included in either the prefix or suffix. * util.c (print_1_line): Change line output routine to account for line length including the newline. Tue Jun 27 02:35:28 1989 Roland McGrath (roland at hobbes.ai.mit.edu) * Makefile: Inserted $(archpfx) where appropriate. Wed May 17 20:18:43 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff3.c [USG]: Include fcntl.h. * diff.h [USG]: New compilation flags HAVE_NDIR, HAVE_DIRECT. Wed Apr 26 15:35:57 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * dir.c (diff_dirs): Two new args, NONEX1 and NONEX2, say to pretend nonex dirs are empty. (dir_sort): New arg NONEX, likewise. * diff.c (compare_files): Pass those args. Sometimes call diff_dirs if subdir exists in just one place. Wed Apr 12 01:10:27 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * io.c (find_identical_ends): Set END0 *after* last char during backward scan for suffix. Sat Apr 8 15:49:49 1989 Randall Smith (randy at apple-gunkies.ai.mit.edu) * diff3.c (using_to_diff3_block): Now find high marks in files 1 and 2 through mapping off of the last difference instead of the first. * diff3.c: Many trivial changes to spelling inside comments. Fri Feb 24 12:38:03 1989 Randall Smith (randy at gluteus.ai.mit.edu) * util.c, normal.c, io.c, ed.c, dir.c, diff.h, diff.c, context.c, analyze.c, Makefile: Changed copyright header to conform with new GNU General Public license. * diff3.c: Changed copyright header to conform with new GNU General Public license. * COPYING: Made a hard link to /gp/rms/COPYING. Fri Feb 24 10:01:58 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * io.c (slurp): Leave 2 chars space at end of buffer, not one. (find_identical_ends): Special case if either file is empty; don't try to make a sentinel since could crash. Wed Feb 15 14:24:48 1989 Jay Fenlason (hack at apple-gunkies.ai.mit.edu) * diff3.c (message) Re-wrote routine to avoid using alloca() Wed Feb 15 06:19:14 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * io.c (find_identical_ends): Delete the variable `bytes'. Sun Feb 12 11:50:36 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * io.c (slurp): ->bufsize is nominal amount we have room for; add room for sentinel when calling xmalloc or xrealloc. * io.c (find_identical_ends): Do need overrun check in finding suffix. Fri Feb 10 01:28:15 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.c (main): -C now takes arg to specify context length. Now -p to show C function name--Damned IEEE! Fatal error if context length spec'd twice. * ed.c (print_ed_hunk): Now special treatment only for lines containing precisely a dot and nothing else. Output `..', end the insert, substitute that one line, then resume the insert if nec. * io.c (find_and_hash_lines): When backing up over starting context, don't move past buffer-beg. * io.c (find_identical_ends): Use sentinels to make the loops faster. If files are identical, skip the 2nd loop and return quickly. (slurp): Leave 1 char extra space after each buffer. * analyze.c (diff_2_files): Mention difference in final newlines. Wed Jan 25 22:44:44 1989 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * dir.c (diff_dirs): Use * when calling fcn ptr variable. Sat Dec 17 14:12:06 1988 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * Makefile: New vars INSTALL and LIBS used in some rules; provide default defns plus commented-put defns for sysV. Thu Nov 17 16:42:53 1988 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * dir.c (dir_sort): Open-trouble not fatal; just say # files is -1. (diff_dirs): If dir_sort does that, give up and return 2. * diff.c (compare_files): Don't open directories. Don't close them specially either. Cross-propagate inf[i].dir_p sooner. Sun Nov 13 11:19:36 1988 Richard Stallman (rms at sugar-bombs.ai.mit.edu) * diff.h: Declare index, rindex. * diff.c (compare_files): If comparing foodir with b/f, use foodir/f, not foodir/b/f. * diff.c (compare_files): Don't print "are identical" msg for 2 dirs. Status now 1 if one file is a dir and the other isn't, etc. Thu Nov 3 16:30:24 1988 Randall Smith (randy at gluteus.ai.mit.edu) * Makefile: Added a define for diff3 to define DIFF_PROGRAM. * util.c: Added hack to make sure that perror was not called with a null pointer. * diff.c: Changed S_IFDIR to S_IFMT in masking type of file bits out. * diff3.c: Included USG compatibility defines. * diff.h: Moved sys/file.h into #else USG section (not needed or wanted on System V). * ed.c, analyze.c, context.c: Shortened names to 12 characters for the sake of System V (too simple not to do). Local Variables: mode: indented-text left-margin: 8 version-control: never End: ine DIFF_PROGRAM. * util.c: Added hack to make sure that perror diff/limits.h 644 473 0 2222 4704135050 6441 /* Number of bits in a `char'. */ #define CHAR_BIT 8 /* No multibyte characters supported yet. */ #define MB_LEN_MAX 1 /* Minimum and maximum values a `signed char' can hold. */ #define SCHAR_MIN (-128) #define SCHAR_MAX 127 /* Maximum value an `unsigned char' can hold. (Minimum is 0). */ #define UCHAR_MAX 255U /* Minimum and maximum values a `char' can hold. */ #ifdef __CHAR_UNSIGNED__ #define CHAR_MIN 0 #define CHAR_MAX 255U #else #define CHAR_MIN (-128) #define CHAR_MAX 127 #endif /* Minimum and maximum values a `signed short int' can hold. */ #define SHRT_MIN (-32768) #define SHRT_MAX 32767 /* Maximum value an `unsigned short int' can hold. (Minimum is 0). */ #define USHRT_MAX 65535U /* Minimum and maximum values a `signed int' can hold. */ #define INT_MIN (-INT_MAX-1) #define INT_MAX 2147483647 /* Maximum value an `unsigned int' can hold. (Minimum is 0). */ #define UINT_MAX 4294967295U /* Minimum and maximum values a `signed long int' can hold. (Same as `int'). */ #define LONG_MIN (-LONG_MAX-1) #define LONG_MAX 2147483647 /* Maximum value an `unsigned long int' can hold. (Minimum is 0). */ #define ULONG_MAX 4294967295U n hold. (Minimum is 0). */ #define USHRT_MAX 65535U /* Minimum and maximum values a `signed int' can hold. */ #define INT_MIN (-INT_MAX-1) #define INT_MAX 2147483647 /* Maximum value an `unsigned int' can hold. (Minimum is 0). */ #define UINT_MAX 4294967295U /* Minimum and maximum values a `signed long int' can hold. (Same as `int'). */ #define LONG_MIdiff/.,resv.vak 644 473 0 236 4704324321 6567 macro 1 Go "$1" . tabset -1 "" 9 17 *8 set keys "-el+i-I+s-af+tT-S+zF" set search "dir.h" set indentcol 32 status P0,-1,0,-2,1,0 PZ,0 f436,0,19,32, z C1 Z0 . nimum is 0). */ #define USHRT_MAX 65535U /* Minimum and maximum values a `signed int' can hold. */ #define INT_MIN (-INT_MAX-1) #define INT_MAX 2147483647 /* Maximum value an `unsigned int' can hold. (Minimum is 0). */ #define UINT_MAX 4294967295U /* Minimum and maximum values a `signed long int' can hold. (Same as `int'). */ #define LONG_MI