home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The C Users' Group Library 1994 August
/
wc-cdrom-cusersgrouplibrary-1994-08.iso
/
vol_300
/
355_03
/
slk3.exe
/
SDIF
/
SDIF.C
< prev
Wrap
C/C++ Source or Header
|
1991-06-09
|
14KB
|
656 lines
/*
Sherlock File Comparison Program.
source: sdif.c
started: February 11, 1988
version: see below
PUBLIC DOMAIN SOFTWARE
Sherlock, including the SPP, SDEL and SDIF programs, was placed in
the public domain on June 15, 1991, by its author,
Edward K. Ream
166 North Prospect Ave.
Madison, WI 53705.
(608) 257-0802
Sherlock may be used for any commercial or non-commercial purpose.
DISCLAIMER OF WARRANTIES
Edward K. Ream (Ream) specifically disclaims all warranties,
expressed or implied, with respect to this computer software,
including but not limited to implied warranties of merchantability
and fitness for a particular purpose. In no event shall Ream be
liable for any loss of profit or any commercial damage, including
but not limited to special, incidental consequential or other damages.
*/
/*
Define the compiler to be used (usually from the command line.)
MICRO_SOFT Use MicroSoft v4.00
TURBOC Use Turbo C v1.0
*/
/*
Miscellaneous global constants.
*/
#define TRUE (1)
#define FALSE (0)
#define BAD_EXIT 1
typedef int bool;
/*
Include subsidiary header files.
SL.H MUST be included even if SHERLOCK.C is not linked in.
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <process.h>
#include <io.h>
#include "sl.h"
#define SIGNON "SDIF v1.7: June 15, 1991"
#ifdef SHERLOCK
#define USAGE1\
"usage: SDIF in1(with macros) in2(without) [options] ++/--tracepoint\n\n"
#else
#define USAGE1\
"usage: SDIF in1(with macros) in2(without) [options]\n\n"
#endif
#define USAGE2 "-b Report inserted blank lines.\n"
#define USAGE3 "-s Report detailed status of comparison.\n"
#define USAGE4 "-v List all lines of in1 file.\n"
#define USAGE5 "-? Print version number and exit.\n"
/*
There are two windows, one for each file. Each window holds up to
WINDOW_LINES lines and up to WINDOW_CHARS characters. These windows
are used to do "look-ahead" comparisons of lines.
Lines are inserted from the back of the window buffers and deleted from
the front. When the back of the window buffer can not hold the next
line, the non-deleted lines are moved to the front of the buffer. The
window buffer is made much larger than required lines so this moving of
lines in the window buffer doesn't happen often.
We expect an average line size of less than 40, so that WINDOW_LINES
lines will take about 160 characters. Thus, the window will have to be
repacked about once in every 100 inserted lines. This will not
slow down the program in any way.
*/
#define WINDOW_LINES 20
#define WINDOW_CHARS 7000
/* Global flags. */
bool b_flag = FALSE;
bool v_flag = FALSE;
bool s_flag = FALSE;
/* Define the windows. */
typedef struct {
FILE * file; /* File handle. */
bool eof; /* End of file flag. */
int line; /* Current line number. */
int nlines; /* # of lines in window. */
int index[WINDOW_LINES]; /* Indices into window. */
char window[WINDOW_CHARS]; /* Chars of window. */
int first; /* Index of first character. */
int last; /* Index of last character+1. */
} w_type;
w_type w1, w2;
/* Global file names. */
char *in1 = NULL;
char *in2 = NULL;
/*
Function prototypes.
*/
void advance (w_type *wp);
bool fill_buf (char * buffer, w_type *wp);
void insert (w_type *wp);
void print_change (int n);
void print_insert (int n1, int n2);
void print_match (void);
bool resynch (int n1, int n2);
void sdif (void);
/* Main routine. Process command line arguments. */
int
main(int argc, char **argv)
{
char *arg;
int i;
/* These two calls MUST come before any others. */
SL_INIT();
SL_PARSE(argc, argv, "++", "--");
TRACEPB("main", printf("(%d, %p)\n", argc, argv));
/* Always put out the sign on message. */
printf("%s\n", SIGNON);
/* Make first test for correct command line. */
if (argc == 2 && (strcmp(argv[1], "-?")==0)) {
exit(BAD_EXIT);
}
else if (argc < 3) {
printf("%s%s%s%s%s", USAGE1, USAGE2, USAGE3, USAGE4, USAGE5);
exit(BAD_EXIT);
}
/* Process all the arguments on the command line. */
argc--;
argv++;
while (argc-- > 0) {
arg = *argv++;
if (strcmp(arg, "-b")==0) {
b_flag = TRUE;
}
else if (strcmp(arg, "-s")==0) {
s_flag = TRUE;
}
else if (strcmp(arg, "-v")==0) {
v_flag = TRUE;
}
else if (strcmp(arg, "-?")==0) {
/* Ignore it. */
;
}
else if (in1 == NULL) {
in1 = arg;
}
else if (in2 == NULL) {
in2 = arg;
}
else {
printf("Extra file argument: %s\n", arg);
exit(BAD_EXIT);
}
}
/* Open the input files. */
w1.file = fopen(in1, "r");
if (w1.file == NULL) {
printf("Can not open %s\n", in1);
exit(BAD_EXIT);
}
w2.file = fopen(in2, "r");
if (w2.file == NULL) {
printf("Can not open %s\n", in2);
fclose(w2.file);
exit(BAD_EXIT);
}
/* Initialize the windows. */
w1.line = 1;
w2.line = 1;
w1.eof = FALSE;
w2.eof = FALSE;
w1.nlines = 0;
w2.nlines = 0;
w1.first = 0;
w2.first = 0;
w1.last = 0;
w2.last = 0;
for (i = 0; i < WINDOW_LINES; i++) {
w1.index[i] = 0;
w2.index[i] = 0;
}
/* Compare the two files and print out differences. */
sdif();
/* Close the files. */
fclose(w1.file);
fclose(w2.file);
/* Print out statistics. */
TRACE("dump", SL_DUMP());
RETURN_VOID("main");
}
/*
Compare two files line by line.
Print lines that do not match.
Assume that file 1 contains any inserted lines.
*/
void
sdif(void)
{
int i, j;
TICKB("sdif");
/* Fill up the window buffers. */
for (i = 0; i < WINDOW_LINES; i++) {
insert(&w1);
insert(&w2);
}
loop:
if (w1.nlines == 0 && w2.nlines == 0) {
RETURN_VOID("sdif");
}
else if (w1.nlines == 0 && w2.nlines >= 10) {
printf("\nFile %s ends before file %s\n", in1, in2);
RETURN_VOID("sdif");
}
else if (w2.nlines == 0 && w1.nlines >= 10) {
printf("\nFile %s ends before file %s\n", in2, in1);
RETURN_VOID("sdif");
}
if(compare(0, 0)) {
/* Lines match. */
print_match();
advance(&w1);
advance(&w2);
goto loop;
}
/* Look for some changed or inserted lines. */
for (i = 1; i < WINDOW_LINES; i++) {
/* 3/9/89: don't resynch on duplicated lines. */
if (resynch(i, i) && !compare(i, i+1)) {
if (s_flag) {
printf("----- %d changed lines\n", i);
}
for (j = 0; j < i; j++) {
print_change(j);
}
for (j = 0; j < i; j++) {
advance(&w1);
advance(&w2);
}
goto loop;
}
if (resynch(i, 0)) {
if (s_flag) {
printf("----- %d inserted lines\n", i);
}
for (j = 0; j < i; j++) {
print_insert(j, -1);
}
for (j = 0; j < i; j++) {
advance(&w1);
}
goto loop;
}
}
/*
Look for lines inserted in file 2.
This can happen as a result of previous erroneous advances.
*/
for (i = 1; i < WINDOW_LINES; i++) {
if (resynch(0, i)) {
if (s_flag) {
printf("----- %d back inserted lines\n", i);
}
for (j = 0; j < i; j++) {
print_insert(-1, j);
}
for (j = 0; j < i; j++) {
advance(&w2);
}
goto loop;
}
}
/*
We haven't identified either a single group of insertions or
a single group of changed lines. We have probably just seen
a combination of changes and insertions. Just advance both
files one line each. We'll get back in synch quickly.
*/
if (s_flag) {
printf("----- failure advance\n");
}
if (w1.nlines) {
print_insert(0, -1);
}
else {
print_insert(-1, 0);
}
advance(&w1);
advance(&w2);
goto loop;
}
/*
Advance one line in the indicated window.
This frees up space at the beginning of the window buffer.
*/
void
advance(w_type *wp)
{
int freed;
int i;
int lines;
TRACEPB("advance", printf("(%p)\n", wp));
lines = wp -> nlines;
if (lines == 0) {
RETURN_VOID("advance");
}
freed = strlen(&wp->window[wp->index[0]])+1;
wp -> first += freed;
lines--;
for (i = 0; i < lines; i++) {
wp -> index[i] = wp -> index[i+1];
}
wp -> nlines--;
wp -> line++;
/* Refill the buffer. */
if (wp -> nlines == WINDOW_LINES-1) {
insert(wp);
}
TICKX("advance");
}
/*
Return TRUE if the indicated lines match.
*/
bool
compare(int n1, int n2)
{
char *p1, *p2;
int i;
TRACEPB("compare", printf("(%d, %d)\n", n1, n2));
if (n1 >= w1.nlines || n2 >= w2.nlines) {
RETURN_BOOL("compare", FALSE);
}
else {
p1 = &w1.window[w1.index[n1]];
p2 = &w2.window[w2.index[n2]];
RETURN_BOOL("compare", strcmp(p1, p2) == 0);
}
}
/*
Fill a buffer from a file.
Set the end of file flag if appropriate.
*/
bool
fill_buf(char *buffer, w_type *wp)
{
int c;
int i;
TRACEPB("fill_buf", printf("(%p, %p)\n", buffer, wp));
if (wp -> eof) {
RETURN_BOOL("fill_buf", FALSE);
}
else {
for (i = 0;;) {
c = fgetc(wp -> file);
if (c == '\r') {
continue;
}
if (c == EOF) {
wp -> eof = TRUE;
if (i == 0) {
RETURN_BOOL("fill_buf", FALSE);
}
break;
}
else if (c == '\n') {
buffer[i++] = c;
break;
}
else {
buffer[i++] = c;
}
}
}
buffer[i] = '\0';
RETURN_BOOL("fill_buf", TRUE);
}
/*
Insert a line at the end of the window.
Pack the buffer if required.
*/
void
insert(w_type *wp)
{
char buffer [1000];
int size, avail;
int i, p, q;
TRACEPB("insert", printf("(%p)\n", wp));
if (!fill_buf(buffer, wp)) {
RETURN_VOID("insert");
}
size = strlen(buffer)+1;
avail = WINDOW_CHARS - wp -> last;
if (wp -> nlines >= WINDOW_LINES) {
printf("insert: too many lines.\n");
RETURN_VOID("insert");
}
/* Compact buffer. */
if (size >= avail) {
/* Adjust indices. */
for (i = 0; i < wp -> nlines; i++) {
wp -> index[i] -= wp -> first;
}
/* Move the characters in the buffer. */
for (p = wp -> first, q = 0; p < wp -> last; p++, q++) {
wp -> window[q] = wp -> window[p];
}
/* Adjust counts. */
wp -> last -= wp -> first;
avail += wp -> first;
wp -> first = 0;
}
/* Insert the buffer at the end of the window. */
if (size < avail) {
strcpy(&wp -> window[wp -> last], buffer);
wp -> index[wp -> nlines] = wp -> last;
wp -> last += size;
wp -> nlines++;
}
else {
printf("not enough room in window!!\n");
exit(BAD_EXIT);
}
TICKX("insert");
}
/*
Print a changed line (from file 1).
*/
void
print_change(int n)
{
int i;
char *p, *p1, *p2;
TRACEPB("print_change", printf("(%d)\n", n));
p1 = &w1.window[w1.index[n]];
p2 = &w2.window[w2.index[n]];
p = p1;
/* Do not print mismatches that involve only white space. */
if (!b_flag && !v_flag) {
while (*p1) {
if (*p1 != ' ' && *p1 != '\t' && *p1 != '\n') {
goto print;
}
p1++;
}
while (*p2) {
if (*p2 != ' ' && *p2 != '\t' && *p2 != '\n') {
goto print;
}
p2++;
}
RETURN_VOID("print_change");
}
print:
if (v_flag) {
printf("%3d %3d* %s", w1.line+n, w2.line+n, p);
}
else {
printf("%3d %3d: %s", w1.line+n, w2.line+n, p);
}
TICKX("print_change");
}
/*
Print an inserted line.
The line comes from in1 if n1 >0, or from n2 if n2 > 0.
*/
void
print_insert(int n1, int n2)
{
char *p;
TRACEPB("print_insert", printf("(%d, %d)\n", n1, n2));
/* Do not print blank lines. */
if (!b_flag && !v_flag && n1 >= 0) {
p = &w1.window[w1.index[n1]];
while (*p) {
if (*p != ' ' && *p != '\t' && *p != '\n') {
goto print;
}
p++;
}
RETURN_VOID("print_insert");
}
else if (!b_flag && !v_flag && n2 >= 0) {
p = &w2.window[w2.index[n2]];
while (*p) {
if (*p != ' ' && *p != '\t' && *p != '\n') {
goto print;
}
p++;
}
RETURN_VOID("print_insert");
}
print:
if (n1 >= 0) {
p = &w1.window[w1.index[n1]];
printf("%3d %3s: %s", w1.line+n1, " ", p);
/* -----
if (v_flag || n1 > 0) {
printf("%3d %3s: %s", w1.line+n1, " ", p);
}
else {
printf("%3d %3d: %s", w1.line+n1, w2.line+n1, p);
}
----- */
}
else {
p = &w2.window[w2.index[n2]];
printf("%3s %3d: %s", " ", w2.line+n2, p);
/* -----
if (v_flag || n2 > 0) {
printf("%3s %3d: %s", " ", w2.line+n2, p);
}
else {
printf("%3d %3d: %s", w1.line+n2, w2.line+n2, p);
}
----- */
}
TICKX("print_insert");
}
/*
Print a matched line if the -v option was given.
*/
void
print_match(void)
{
TICKB("print_match");
if (v_flag) {
printf("%3d %3d: %s",
w1.line, w2.line, &w1.window[w1.index[0]]);
}
TICKX("print_match");
}
/*
Return TRUE if the indicated lines match and can be used to
resynchronize the files.
*/
bool
resynch(int n1, int n2)
{
char *p;
int count;
TRACEPB("resynch", printf("(%d, %d)\n", n1, n2));
if (!compare(n1, n2)) {
RETURN_BOOL("resynch", FALSE);
}
p = &w1.window[w1.index[n1]];
/* Make sure we have a non-trivial resynch point. */
count = 0;
while (*p) {
if (*p != ' ' && *p != '\t' && *p != '\n') {
count++;
}
p++;
}
if ( count >= 1 ||
(w1.eof && w2.eof && n1 == w1.nlines-1 && n2 == w2.nlines-1)
) {
/* Non-trivial matched lines or match to end of file. */
RETURN_BOOL("resynch", TRUE);
}
else {
/* Trivial matched lines. Look ahead for an answer. */
RETURN_BOOL("resynch", resynch(n1+1, n2+1));
}
}