home *** CD-ROM | disk | FTP | other *** search
- /* File I/O for Eclectus integration utilities.
- Copyright (C) 1992-1996 Eclectus (D. John Anderson, Alan B. Harper).
-
- This file is part of the Eclectus integration utilities.
-
- Eclectus integration utilities are free software; you can redistribute
- it and/or modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 1, or
- (at your option) any later version.
-
- Eclectus integration utilities is distributed in the hope that it
- will be useful, but WITHOUT ANY WARRANTY; without even the implied
- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with the Eclectus integration utilities; see the file COPYING.
- If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge,
- MA 02139, USA. */
-
- #include "diff.h"
- #include <ctype.h>
- #include <string.h>
- #include <stddef.h>
- #include <stdlib.h>
-
- #ifdef DF_MACHINE_MACINTOSH
- #include <Errors.h>
- #include <Files.h>
- #include <CursorCtl.h>
- #endif
-
- #ifdef DF_MACHINE_WINDOWS
- #define WIN32_LEAN_AND_MEAN
- #include <Windows.h>
- #endif
-
- #ifdef DF_MACHINE_NEXT
- #include <sys/types.h>
- #include <sys/dir.h>
- #include <sys/stat.h>
- #include <sys/time.h>
- /*
- * These defines are normally in libc.h. However, libc.h won't compile correctly
- * when the ansi flag is on and there are too many problems to fix so we'll just declare
- * these items here.
- */
- extern int mkdir(const char *, int);
- extern int chmod(const char *, int);
- #endif
-
- /*
- * You can modify this list of extensions to specify which files are binary on machines
- * other than Macintosh OS. Macintosh stores attributes with a file which indicate
- * binary files.
- *
- * You can also specify extensions to ignore.
- *
- * The program will treat the case of extensions as insignificant.
- *
- * Note: Remember to update MAX_EXTENSION_LENGTH to reflect the longest extension in both
- * both lists. You must enter these extension in lower case. The last extension must
- * be the empty string
- */
-
- #define MAX_EXTENSION_LENGTH 3
-
- #ifndef DF_MACHINE_MACINTOSH
- static char *binaryExtensions [] = {
- "aps",
- "bmp",
- "bsc",
- "dll",
- "exe",
- "hlp",
- "ico",
- "ink",
- "lib",
- "mdp",
- "mpw",
- "mwk",
- "ncb",
- "obj",
- "os",
- "osx",
- "pch",
- "pdb",
- "prj",
- "res",
- "sbr",
- "scr",
- "vcp",
- "wri",
- "8bf",
- ""
- };
- #endif
-
- static char *ignoreExtensions [] = {
- "i",
- "idb", /* MSVC 4.0 intermediate file */
- "ilk", /* MSVC intermediate link file */
- "ncb", /* MSVC 4.0 binary project file cache */
- "obj", /* MSVC Object files */
- "pch", /* MSVC precompiled header files */
- "pdb", /* MSVC program database */
- "tmp", /* MSVC temporary file */
- ""
- };
-
- /* Lines are put into equivalence classes (of lines that match in line_cmp).
- Each equivalence class is represented by one of these structures,
- but only while the classes are being computed.
- Afterward, each class is represented by a number. */
- struct equivclass
- {
- struct equivclass *next; /* Next item in this bucket. */
- struct line_def line; /* A line that fits this class. */
- };
-
- /* Hash-table: array of buckets, each being a chain of equivalence classes. */
- static struct equivclass **buckets;
-
- /* Size of the bucket array. */
- static int nbuckets;
-
- /* Array in which the equivalence classes are allocated.
- The bucket-chains go through the elements in this array.
- The number of an equivalence class is its index in this array. */
- static struct equivclass *equivs;
-
- /* Index of first free element in the array `equivs'. */
- static int equivs_index;
-
- /* Largest primes less than some power of two, for nbuckets. Values range
- from useful to preposterous. If one of these numbers isn't prime
- after all, don't blame it on me, blame it on primes (6) . . . */
- static int primes[] =
- {
- 509,
- 1021,
- 2039,
- 4093,
- 8191,
- 16381,
- 32749,
- 65521,
- 131071,
- 262139,
- 524287,
- 1048573,
- 2097143,
- 4194301,
- 8388593,
- 16777213,
- 33554393,
- 67108859, /* Preposterously large . . . */
- -1
- };
-
- static void find_and_hash_each_line (register struct file_data *filePtr);
- static void find_equiv_class (register struct file_data *filePtr);
- static void find_identical_ends (register struct file_data *file0Ptr, register struct file_data *file1Ptr);
- static int PathHasExtension (const char *pathCharPtr, const char **pathExtensions);
-
-
- /*
- * This is a machine dependent routine that copies selected file fileAttributes
- * from the file named sourceNamePtr to the file named destNamePtr according
- * to fileAttributes, which may contain any of the following bits:
- * LOCK_MASK - locks the destination file
- * DATE_MASK - copies the date from the source file to the destination file
- * RESOURCE_MASK - copies the resource fork on Mac, copies the FILE_ATTRIBUTE_HIDDEN
- * and FILE_ATTRIBUTE_SYSTEM attribute on Windows and the execute
- * bit on NeXT.
- * We all know that this routine isn't reliable because of "race conditions."
- * The time that occurs between when you read a filename and when you access
- * it allows the filename to change in a multi-user environment. However, the
- * typical brainless operating systems don't let you refer to files using an
- * open reference number for many operations, such as those used in this
- * routine.
- */
-
- #ifdef DF_MACHINE_MACINTOSH
- void
- CopyFileAttributes (const char *sourceNamePtr, const char *destNamePtr, const int fileAttributes)
- {
- HParamBlockRec destFInfoHParamBlock;
- HParamBlockRec destLockHParamBlock;
- Str255 destNamePString;
- ParamBlockRec destResourceParamBlk;
- int ioResult;
- int pathLength;
- int sourceFInfoExists;
- HParamBlockRec sourceFInfoHParamBlock;
- Str255 sourceNamePString;
- ParamBlockRec sourceResourceParamBlk;
-
- if (fileAttributes != 0) {
- pathLength = strlen (destNamePtr);
- if (pathLength > FILENAME_MAX) {
- fprintf (stderr, "%s\n", destNamePtr);
- Error (PATH_TOO_LONG);
- }
- destNamePString [0] = (unsigned char) pathLength;
- memcpy (destNamePString + 1, destNamePtr, pathLength);
-
- pathLength = strlen (sourceNamePtr);
- if (pathLength > FILENAME_MAX) {
- fprintf (stderr, "%s\n", sourceNamePtr);
- Error (PATH_TOO_LONG);
- }
- sourceNamePString [0] = (unsigned char) pathLength;
- memcpy (sourceNamePString + 1, sourceNamePtr, pathLength);
-
- sourceFInfoExists = FALSE;
- if ((fileAttributes & (DATE_MASK | RESOURCE_MASK)) != 0) {
- bzero (&destFInfoHParamBlock, sizeof (HParamBlockRec));
- destFInfoHParamBlock.fileParam.ioNamePtr = destNamePString;
- ioResult = PBHGetFInfoSync (&destFInfoHParamBlock);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
-
- bzero (&sourceFInfoHParamBlock, sizeof (HParamBlockRec));
- sourceFInfoHParamBlock.fileParam.ioNamePtr = sourceNamePString;
- ioResult = PBHGetFInfoSync (&sourceFInfoHParamBlock);
- sourceFInfoExists = ioResult == noErr;
- }
-
- if ((fileAttributes & RESOURCE_MASK) != 0) {
- bzero (&sourceResourceParamBlk, sizeof (ParamBlockRec));
- sourceResourceParamBlk.fileParam.ioNamePtr = sourceNamePString;
- sourceResourceParamBlk.ioParam.ioPermssn = fsRdPerm;
- ioResult = PBOpenRFSync (&sourceResourceParamBlk);
- if (ioResult == noErr) { /* Don't copy if source doesn't exist */
- bzero (&destResourceParamBlk, sizeof (ParamBlockRec));
- destResourceParamBlk.fileParam.ioNamePtr = destNamePString;
- destResourceParamBlk.ioParam.ioPermssn = fsWrPerm;
- ioResult = PBOpenRFSync (&destResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- ioResult = PBGetEOFSync (&sourceResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- sourceResourceParamBlk.ioParam.ioBuffer = xmalloc ((size_t) sourceResourceParamBlk.ioParam.ioMisc);
- sourceResourceParamBlk.ioParam.ioReqCount = (long) sourceResourceParamBlk.ioParam.ioMisc;
- sourceResourceParamBlk.ioParam.ioPosMode = fsFromStart;
- ioResult = PBReadSync (&sourceResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- destResourceParamBlk.ioParam.ioBuffer = sourceResourceParamBlk.ioParam.ioBuffer;
- destResourceParamBlk.ioParam.ioReqCount = (long) sourceResourceParamBlk.ioParam.ioMisc;
- destResourceParamBlk.ioParam.ioPosMode = fsFromStart;
- ioResult = PBWriteSync (&destResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- free (sourceResourceParamBlk.ioParam.ioBuffer);
- ioResult = PBCloseSync (&sourceResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- ioResult = PBCloseSync (&destResourceParamBlk);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
- if (sourceFInfoExists) {
- destFInfoHParamBlock.fileParam.ioFlFndrInfo.fdType =
- sourceFInfoHParamBlock.fileParam.ioFlFndrInfo.fdType;
- destFInfoHParamBlock.fileParam.ioFlFndrInfo.fdCreator =
- sourceFInfoHParamBlock.fileParam.ioFlFndrInfo.fdCreator;
- destFInfoHParamBlock.fileParam.ioFlFndrInfo.fdLocation =
- sourceFInfoHParamBlock.fileParam.ioFlFndrInfo.fdLocation;
- }
- }
- if ((fileAttributes & DATE_MASK) != 0 && sourceFInfoExists) {
- destFInfoHParamBlock.fileParam.ioFlCrDat = sourceFInfoHParamBlock.fileParam.ioFlCrDat;
- destFInfoHParamBlock.fileParam.ioFlMdDat = sourceFInfoHParamBlock.fileParam.ioFlMdDat;
- }
- if ((fileAttributes & LOCK_MASK) != 0) {
- bzero (&destLockHParamBlock, sizeof (HParamBlockRec));
- destLockHParamBlock.fileParam.ioNamePtr = destNamePString;
- ioResult = PBHSetFLockSync (&destLockHParamBlock);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
-
- if ((fileAttributes & (DATE_MASK | RESOURCE_MASK)) != 0) {
- destFInfoHParamBlock.fileParam.ioVRefNum = 0;
- destFInfoHParamBlock.fileParam.ioDirID = 0;
- ioResult = PBHSetFInfoSync (&destFInfoHParamBlock);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
-
- }
- }
- #endif
-
- #ifdef DF_MACHINE_WINDOWS
- void
- CopyFileAttributes (const char *sourceNamePtr, const char *destNamePtr, const int fileAttributes)
- {
- FILETIME creationTime;
- DWORD destFileAttributes;
- HANDLE destFileHandle;
- FILETIME lastAccessTime;
- FILETIME lastWriteTime;
- DWORD sourceFileAttributes;
- HANDLE sourceFileHandle;
-
- if (fileAttributes != 0) {
- if ((fileAttributes & DATE_MASK) != 0) {
- sourceFileHandle = CreateFile(sourceNamePtr, /* file name pointer */
- GENERIC_READ, /* open for reading */
- FILE_SHARE_READ, /* share reads */
- (LPSECURITY_ATTRIBUTES) NULL, /* no security */
- OPEN_EXISTING, /* existing file only */
- FILE_ATTRIBUTE_NORMAL, /* normal file */
- (HANDLE) NULL); /* no attr. template */
- if (sourceFileHandle == INVALID_HANDLE_VALUE)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- if (!GetFileTime (sourceFileHandle, &creationTime,
- &lastAccessTime,
- &lastWriteTime))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- if (!CloseHandle (sourceFileHandle))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- destFileHandle = CreateFile (destNamePtr, /* file name pointer */
- GENERIC_WRITE, /* open for writing */
- FILE_SHARE_WRITE, /* share writes */
- (LPSECURITY_ATTRIBUTES) NULL, /* no security */
- OPEN_EXISTING, /* existing file only */
- FILE_ATTRIBUTE_NORMAL, /* normal file */
- (HANDLE) NULL); /* no attr. template */
- if (destFileHandle == INVALID_HANDLE_VALUE)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, sourceNamePtr);
- if (!SetFileTime (destFileHandle, &creationTime,
- &lastAccessTime,
- &lastWriteTime))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- if (!CloseHandle (destFileHandle))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destFileHandle);
- }
-
- if ((fileAttributes & (LOCK_MASK + RESOURCE_MASK)) != 0) {
- destFileAttributes = GetFileAttributes (destNamePtr);
- if (destFileAttributes == -1)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- if ((fileAttributes & RESOURCE_MASK) != 0) {
- sourceFileAttributes = GetFileAttributes (sourceNamePtr);
- if (sourceFileAttributes != -1)
- destFileAttributes =
- (destFileAttributes & ~(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) |
- (sourceFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM));
- }
- if ((fileAttributes & LOCK_MASK) != 0)
- destFileAttributes |= FILE_ATTRIBUTE_READONLY;
- if (!SetFileAttributes (destNamePtr, destFileAttributes))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
- }
- }
- #endif
-
- #ifdef DF_MACHINE_NEXT
- void
- CopyFileAttributes (const char *sourceNamePtr, const char *destNamePtr, const int fileAttributes)
- {
- struct stat destStatBuffer;
- int ioResult;
- int sourceIOResult;
- struct timeval timeValue[2];
- struct stat sourceStatBuffer;
-
- if (fileAttributes != 0) {
- sourceIOResult = 0;
- if ((fileAttributes & (LOCK_MASK + RESOURCE_MASK)) != 0) {
- ioResult = stat(destNamePtr, &destStatBuffer);
- if (ioResult != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
-
- if ((fileAttributes & (DATE_MASK + RESOURCE_MASK)) != 0)
- sourceIOResult = stat(sourceNamePtr, &sourceStatBuffer);
-
- if (sourceIOResult == 0 && (fileAttributes & RESOURCE_MASK) != 0)
- destStatBuffer.st_mode = (destStatBuffer.st_mode & ~0111) + (sourceStatBuffer.st_mode & 0111);
-
- if ((fileAttributes & LOCK_MASK) != 0)
- destStatBuffer.st_mode &= ~0222;
-
- if (sourceIOResult == 0 && (fileAttributes & DATE_MASK) != 0) {
- timeValue[0].tv_sec = sourceStatBuffer.st_atime;
- timeValue[0].tv_usec = 0;
- timeValue[1].tv_sec = sourceStatBuffer.st_mtime;
- timeValue[1].tv_usec = 0;
- ioResult = utimes (destNamePtr, timeValue);
- if (ioResult != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
-
- if ((fileAttributes & (LOCK_MASK + RESOURCE_MASK)) != 0) {
- ioResult = chmod (destNamePtr, destStatBuffer.st_mode);
- if (ioResult != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, destNamePtr);
- }
- }
- }
- #endif
-
- FILE *
- CreateTypedFile (const char *pathCharPtr, int binary)
- /*
- * It is well known that getting and setting the file type after creating the file
- * is non-atomic and suffers from potential race conditions. There is no
- * alternative because we are required to return a FILE *.
- */
- {
- char *fileModeCharPtr;
- FILE *filePtr;
- #ifdef DF_MACHINE_MACINTOSH
- HParamBlockRec hParamBlock;
- int ioResult;
- Str255 pathNamePString;
- int pathLength;
- #endif
-
- #ifdef DF_MACHINE_MACINTOSH
- SpinCursor (1);
- bzero (&hParamBlock, sizeof (HParamBlockRec));
- pathLength = strlen (pathCharPtr);
- if (pathLength > FILENAME_MAX)
- return (BAD_FILE_TYPE);
- pathNamePString [0] = (unsigned char) pathLength;
- memcpy (pathNamePString + 1, pathCharPtr, pathLength);
- hParamBlock.fileParam.ioNamePtr = pathNamePString;
- #endif
- fileModeCharPtr = "w";
- if (binary)
- fileModeCharPtr = "wb";
- filePtr = fopen (pathCharPtr, fileModeCharPtr);
- if (filePtr == NULL)
- ErrorWithStringArgument (CANT_CREATE_FILE, pathCharPtr);
- #ifdef DF_MACHINE_MACINTOSH
- ioResult = PBHGetFInfoSync (&hParamBlock);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, pathCharPtr);
- hParamBlock.fileParam.ioVRefNum = 0;
- hParamBlock.fileParam.ioDirID = 0;
- if (binary) {
- hParamBlock.fileParam.ioFlFndrInfo.fdCreator = (OSType) '\?\?\?\?';
- hParamBlock.fileParam.ioFlFndrInfo.fdType = (OSType) '\?\?\?\?';
- } else /* ASCII */
- hParamBlock.fileParam.ioFlFndrInfo.fdCreator = (OSType) 'MPS ';
- ioResult = PBHSetFInfoSync (&hParamBlock);
- if (ioResult != noErr)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, pathCharPtr);
- #endif
- return (filePtr);
- }
-
- /*
- * A machine dependent routine that tests a path and returns what it can figure out
- * about what type of file it is
- */
-
- #ifdef DF_MACHINE_MACINTOSH
- int
- FileType (const char *pathCharPtr)
- {
- OSErr ioResult;
- CInfoPBRec paramBlk;
- int pathLength;
- Str255 pathNamePString;
-
- bzero (¶mBlk, sizeof (CInfoPBRec));
- pathLength = strlen (pathCharPtr);
- if (pathLength > FILENAME_MAX)
- return (BAD_FILE_TYPE);
- pathNamePString [0] = (unsigned char) pathLength;
- memcpy (pathNamePString + 1, pathCharPtr, pathLength);
- paramBlk.hFileInfo.ioNamePtr = pathNamePString;
- ioResult = PBGetCatInfoSync (¶mBlk);
- if (ioResult != noErr)
- return (BAD_FILE_TYPE);
-
- if ((paramBlk.hFileInfo.ioFlAttrib & 0X10) != 0)
- return (DIRECTORY_TYPE);
- if (paramBlk.hFileInfo.ioFlFndrInfo.fdType == (OSType) 'TEXT')
- return (TEXT_TYPE);
- return (BINARY_TYPE);
- }
- #endif
-
- #ifndef DF_MACHINE_MACINTOSH
- int
- FileType (const char *pathCharPtr)
- {
- #ifdef DF_MACHINE_WINDOWS
- {
- DWORD fileAttributes;
-
- fileAttributes = GetFileAttributes (pathCharPtr);
- if (fileAttributes == -1)
- return (BAD_FILE_TYPE);
- if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
- return (DIRECTORY_TYPE);
- }
- #elif defined (DF_MACHINE_NEXT)
- {
- int ioResult;
- struct stat statBuffer;
-
- ioResult = stat(pathCharPtr, &statBuffer);
- if (ioResult != 0)
- return (BAD_FILE_TYPE);
- if ((statBuffer.st_mode & S_IFMT) == S_IFDIR)
- return (DIRECTORY_TYPE);
- }
- #endif
- if (PathHasExtension (pathCharPtr, binaryExtensions))
- return (BINARY_TYPE);
- return (TEXT_TYPE);
- }
- #endif
-
- /* Split the file into lines, simultaneously computing the hash codes for each line. */
-
- void
- find_and_hash_each_line (register struct file_data *filePtr)
- {
- unsigned char c;
- char *endCharPtr;
- unsigned h;
- unsigned char *ip;
- unsigned char *p;
-
- p = (unsigned char *) filePtr->buffer;
-
- if (filePtr->buffer != NULL) {
- /* Attempt to get a good initial guess as to the number of lines. */
- if (filePtr->linbuf == NULL) {
- filePtr->linbufsize = filePtr->buffered_chars / 50 + 5;
- filePtr->linbuf = (struct line_def *) xmalloc (filePtr->linbufsize * sizeof (struct line_def));
- }
-
- filePtr->buffered_lines = 0;
- endCharPtr = filePtr->buffer + filePtr->buffered_chars;
-
- while ((char *) p < endCharPtr)
- {
- h = 0;
- ip = p;
-
- if (filePtr->prefix_end <= (char *) p && filePtr->suffix_begin >= (char *) p)
- {
- /* Hash this line until we find a newline. */
- while ((c = *p) != '\n')
- {
- h = HASH (h, c);
- ++p;
- }
- }
- else
- /* This line is part of the matching prefix,
- so we don't need to hash it. */
- while (*p != '\n')
- ++p;
-
- /* Maybe increase the size of the line table. */
- if (filePtr->buffered_lines >= filePtr->linbufsize)
- {
- while (filePtr->buffered_lines >= filePtr->linbufsize)
- filePtr->linbufsize *= 2;
- filePtr->linbuf = (struct line_def *) xrealloc (filePtr->linbuf,
- filePtr->linbufsize * sizeof (struct line_def));
- }
- filePtr->linbuf[filePtr->buffered_lines].text = (char *) ip;
- filePtr->linbuf[filePtr->buffered_lines].length = p - ip + 1;
- filePtr->linbuf[filePtr->buffered_lines].hash = h;
- ++filePtr->buffered_lines;
- ++p;
- }
-
- filePtr->linbufsize = filePtr->buffered_lines;
- filePtr->linbuf = (struct line_def *) xrealloc (filePtr->linbuf,
- filePtr->linbufsize * sizeof (struct line_def));
-
- /*
- The following if statement from the GNU sources doesn't work when merging files that
- don't end in a return and no changes are detected. I've modified the if as shown below
- and I'm not 100% sure that this change won't introduce bugs, however it does pass my
- test suite. -- DJA
-
- if (filePtr->missing_newline && filePtr->suffix_begin == endCharPtr)
- */
-
- if (filePtr->missing_newline && filePtr->buffer[filePtr->buffered_chars-1] == '\n')
- --filePtr->linbuf[filePtr->buffered_lines - 1].length;
- }
- }
-
- /* Find the equiv class associated with all lines in filePtr. */
-
- void
- find_equiv_class (register struct file_data *filePtr)
- {
- struct equivclass *b;
- int bucket;
- int equivsValue;
- unsigned int lineIndex;
- struct equivclass *p;
-
- if (filePtr->equivs == NULL)
- filePtr->equivs = (int *) xmalloc (filePtr->buffered_lines * sizeof (int));
-
- for (lineIndex = 0; lineIndex < filePtr->buffered_lines; ++lineIndex)
- {
- p = NULL;
- /* Equivalence class 0 is permanently allocated to lines that were
- not hashed because they were parts of identical prefixes or
- suffixes. */
- if (lineIndex < filePtr->prefix_lines || filePtr->linbuf[lineIndex].text >= filePtr->suffix_begin)
- {
- equivsValue = 0;
- goto SetEquivsValue;
- }
-
- /* Check through the appropriate bucket to see if there isn't already
- an equivalence class for this line. */
- bucket = filePtr->linbuf[lineIndex].hash % nbuckets;
- b = buckets[bucket];
- while (b)
- {
- if (b->line.hash == filePtr->linbuf[lineIndex].hash &&
- b->line.length == filePtr->linbuf[lineIndex].length &&
- !line_cmp (&b->line, &filePtr->linbuf[lineIndex]))
- {
- equivsValue = b - equivs;
- goto SetEquivsValue;
- }
- p = b, b = b->next;
- }
-
- /* Create a new equivalence class in this bucket. */
-
- p = &equivs[equivs_index++];
- p->next = buckets[bucket];
- buckets[bucket] = p;
- p->line = filePtr->linbuf[lineIndex];
- equivsValue = equivs_index - 1;
-
- SetEquivsValue:
- filePtr->equivs[lineIndex] = equivsValue;
-
- }
- }
-
- /* Given a vector of two file_data objects, find the identical prefixes and suffixes of each object. */
-
- static void
- find_identical_ends (register struct file_data *file0Ptr, register struct file_data *file1Ptr)
- {
- char *beg0;
- char *end0;
- int lines;
- register char *p0;
- register char *p1;
-
- if (file0Ptr->buffered_chars == 0 || file1Ptr->buffered_chars == 0)
- {
- file0Ptr->prefix_end = file0Ptr->buffer;
- file1Ptr->prefix_end = file1Ptr->buffer;
- file0Ptr->prefix_lines = file1Ptr->prefix_lines = 0;
- file0Ptr->suffix_begin = file0Ptr->buffer + file0Ptr->buffered_chars;
- file1Ptr->suffix_begin = file1Ptr->buffer + file1Ptr->buffered_chars;
- file0Ptr->suffix_lines = file1Ptr->suffix_lines = 0;
- return;
- }
-
- /* Find identical prefix. */
-
- p0 = file0Ptr->buffer;
- p1 = file1Ptr->buffer;
- lines = 0;
-
- /* Insert end "sentinels", in this case characters that are guaranteed
- to make the equality test false, and thus terminate the loop. */
-
- if (file0Ptr->buffered_chars < file1Ptr->buffered_chars)
- p0[file0Ptr->buffered_chars] = (char) ~p1[file0Ptr->buffered_chars];
- else
- p1[file1Ptr->buffered_chars] = (char) ~p0[file1Ptr->buffered_chars];
-
- /* Loop until first mismatch, or to the sentinel characters. */
- while (1)
- {
- char c = *p0++;
- if (c != *p1++)
- break;
- if (c == '\n')
- ++lines;
- }
-
- /* Don't count missing newline as part of prefix. */
- if ( (file0Ptr->missing_newline && (unsigned int) (p0 - file0Ptr->buffer) > file0Ptr->buffered_chars) ||
- (file1Ptr->missing_newline && (unsigned int) (p1 - file1Ptr->buffer) > file1Ptr->buffered_chars) )
- --p0, --p1, --lines;
-
- /* If the sentinel was passed, and lengths are equal, the
- files are identical. */
-
- if ((unsigned int) (p0 - file0Ptr->buffer) > file0Ptr->buffered_chars
- && file0Ptr->buffered_chars == file1Ptr->buffered_chars)
- {
- file0Ptr->prefix_end = p0 - 1;
- file1Ptr->prefix_end = p1 - 1;
- file0Ptr->prefix_lines = file1Ptr->prefix_lines = lines;
- file0Ptr->suffix_begin = file0Ptr->buffer;
- file1Ptr->suffix_begin = file1Ptr->buffer;
- file0Ptr->suffix_lines = file1Ptr->suffix_lines = lines;
- return;
- }
-
- /* Point at first nonmatching characters. */
- --p0, --p1;
-
- /* Skip back to last line-beginning in the prefix. */
- while (p0 != file0Ptr->buffer && p0[-1] != '\n')
- --p0, --p1;
-
- /* Record the prefix. */
- file0Ptr->prefix_end = p0;
- file1Ptr->prefix_end = p1;
- file0Ptr->prefix_lines = file1Ptr->prefix_lines = lines;
-
- /* Find identical suffix. */
-
- /* P0 and P1 point beyond the last chars not yet compared. */
- p0 = file0Ptr->buffer + file0Ptr->buffered_chars;
- p1 = file1Ptr->buffer + file1Ptr->buffered_chars;
- lines = 0;
-
- if (file0Ptr->missing_newline == file1Ptr->missing_newline)
- {
- end0 = p0; /* Addr of last char in file 0. */
-
- /* Get value of P0 at which we should stop scanning backward:
- this is when either P0 or P1 points just past the last char
- of the identical prefix. */
- if (file0Ptr->buffered_chars < file1Ptr->buffered_chars)
- beg0 = file0Ptr->prefix_end;
- else
- /* Figure out where P0 will be when P1 is at the end of the prefix.
- Thus we only need to test P0. */
- beg0 = (file0Ptr->prefix_end + file0Ptr->buffered_chars - file1Ptr->buffered_chars);
-
- /* Scan back until chars don't match or we reach that point. */
- while (p0 != beg0)
- {
- char c = *--p0;
- if (c != *--p1)
- {
- /* Point at the first char of the matching suffix. */
- ++p0, ++p1;
- break;
- }
- if (c == '\n')
- ++lines;
- }
-
- /* Are we at a line-beginning in both files? */
- if (p0 != end0
- && !((p0 == file0Ptr->buffer || p0[-1] == '\n')
- &&
- (p1 == file1Ptr->buffer || p1[-1] == '\n')))
- {
- /* No. We counted one line too many. */
- --lines;
- /* Advance to next place that is a line-beginning in both files. */
- do
- {
- ++p0, ++p1;
- }
- while (p0 != end0 && p0[-1] != '\n');
- }
- }
-
- /* Record the suffix. */
- file0Ptr->suffix_begin = p0;
- file1Ptr->suffix_begin = p1;
- file0Ptr->suffix_lines = file1Ptr->suffix_lines = lines;
- }
-
- IgnoreFile (const char *nameSuffixPtr)
- {
- return (*nameSuffixPtr == '.' || PathHasExtension (nameSuffixPtr, ignoreExtensions));
- }
-
- /*
- * A machine dependent routine that creates a directory. The result returned is zero if
- * it succeeds otherwise non-zero.
- */
-
- #ifdef DF_MACHINE_MACINTOSH
- int
- MakeDirectory (const char *pathCharPtr)
- {
- OSErr ioResult;
- HParamBlockRec paramBlk;
- int pathLength;
- Str255 pathNamePString;
-
- bzero (¶mBlk, sizeof (HParamBlockRec));
- pathLength = strlen (pathCharPtr);
- if (pathLength > 255)
- return (BAD_FILE_TYPE);
- pathNamePString [0] = (unsigned char) pathLength;
- memcpy (pathNamePString + 1, pathCharPtr, pathLength);
-
- paramBlk.ioParam.ioNamePtr = pathNamePString;
- ioResult = PBDirCreateSync (¶mBlk);
- if (ioResult != noErr)
- return (-1);
- else
- return (0);
- }
- #endif
-
- #ifdef DF_MACHINE_WINDOWS
- int
- MakeDirectory (const char *pathCharPtr)
- {
- if (!CreateDirectory (pathCharPtr, (LPSECURITY_ATTRIBUTES) NULL))
- return (-1);
- return (0);
- }
- #endif
-
- #ifdef DF_MACHINE_NEXT
- int
- MakeDirectory (const char *pathCharPtr)
- {
- return (mkdir(pathCharPtr, 0777));
- }
- #endif
-
- int
- PathHasExtension (const char *pathCharPtr, const char **pathExtensions)
- {
- int index;
- char lowerCaseExtension [MAX_EXTENSION_LENGTH + 1];
- register char *lowerCaseExtensionPtr;
- register const char **pathExtensionsPtr;
- register char *testExtensionPtr;
- char theChar;
-
- testExtensionPtr = strrchr (pathCharPtr, '.');
- if (testExtensionPtr == NULL)
- return (FALSE);
- testExtensionPtr ++;
-
- index = 0;
- lowerCaseExtensionPtr = lowerCaseExtension;
- do {
- if (index > MAX_EXTENSION_LENGTH)
- return (FALSE); /* Extension too long */
- theChar = tolower (*testExtensionPtr++);
- *lowerCaseExtensionPtr++ = theChar;
- index++;
- } while (theChar != '\0');
- pathExtensionsPtr = pathExtensions;
- while (**pathExtensionsPtr) {
- if (strcmp (*pathExtensionsPtr, lowerCaseExtension) == 0)
- return (TRUE);
- pathExtensionsPtr++;
- }
- return (FALSE);
- }
-
- /*
- * A machine dependent routine that reads a list of filenames from a directory. A pointer
- * to a contiguous list of null terminated strings is returned. The list of strings is
- * terminated with "". The caller is responsible for freeing the pointer that is returned.
- * Returns NULL if an error occurs.
- */
-
- #ifdef DF_MACHINE_MACINTOSH
- fileNameListPType
- ReadDirectory (const char *pathCharPtr)
- {
- long ioDirID;
- OSErr ioResult;
- short ioVRefNum;
- int nameLength;
- fileNameListPType nameListPtr;
- int nameListLength;
- int nameListTotalSpace;
- CInfoPBRec paramBlk;
- int pathLength;
- Str255 pathNamePString;
-
- bzero (¶mBlk, sizeof (CInfoPBRec));
- pathLength = strlen (pathCharPtr);
- if (pathLength > 255)
- return (NULL);
- pathNamePString [0] = (unsigned char) pathLength;
- memcpy (pathNamePString + 1, pathCharPtr, pathLength);
- paramBlk.hFileInfo.ioNamePtr = pathNamePString;
- ioResult = PBGetCatInfoSync (¶mBlk);
- if (ioResult != noErr || (paramBlk.hFileInfo.ioFlAttrib & 0X10) == 0)
- return (NULL);
-
- /*
- * Set nameListLength to the size of a name list with no elements in it. Since ANSI
- * C requires us to put one item in the array we use offsetof to get the size.
- */
- nameListLength = offsetof (fileNameListType, names);
- nameListTotalSpace = 4096;
- nameListPtr = xmalloc (nameListTotalSpace);
- nameListPtr->numberOfItems = 0;
- ioVRefNum = paramBlk.hFileInfo.ioVRefNum;
- ioDirID = paramBlk.hFileInfo.ioDirID;
- do {
- paramBlk.hFileInfo.ioFDirIndex ++;
- paramBlk.hFileInfo.ioVRefNum = ioVRefNum;
- paramBlk.hFileInfo.ioDirID = ioDirID;
- ioResult = PBGetCatInfoSync (¶mBlk);
- if (ioResult == fnfErr)
- break;
- if (ioResult != noErr)
- Error (UNEXPECTED_IO_ERROR);
- nameLength = pathNamePString[0];
- if (nameLength != 0) {
- /*
- * We add 2 to the length: one for null termination of this file name and one for
- * possible end of list null termination, which avoids a rare realloc.
- */
- if (nameListLength + nameLength + 2 > nameListTotalSpace) {
- nameListTotalSpace += nameListLength + nameLength + 2048;
- nameListPtr = xrealloc (nameListPtr, nameListTotalSpace);
- }
- memcpy ((char *) nameListPtr + nameListLength,
- (char *) pathNamePString + 1,
- nameLength);
- nameListLength += nameLength;
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
- nameListPtr->numberOfItems++;
- }
- } while (TRUE);
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
-
- nameListPtr = xrealloc (nameListPtr, nameListLength);
-
- return (nameListPtr);
- }
- #endif
-
- #ifdef DF_MACHINE_WINDOWS
- fileNameListPType
- ReadDirectory (const char *pathCharPtr)
- {
- char fileSearchPath [FILENAME_MAX];
- WIN32_FIND_DATA findData;
- int nameLength;
- int nameListLength;
- fileNameListPType nameListPtr;
- int nameListTotalSpace;
- HANDLE searchHandle;
-
- if (strlen (pathCharPtr) >= FILENAME_MAX - 2) {
- fprintf (stderr, "%s\n", pathCharPtr);
- Error (PATH_TOO_LONG);
- }
- strcpy (fileSearchPath, pathCharPtr);
- strcat (fileSearchPath, "\\*");
- searchHandle = FindFirstFile (fileSearchPath, &findData);
- if (searchHandle == INVALID_HANDLE_VALUE)
- return (NULL);
- /*
- * Read past the first two directories: "." and ".."
- */
- if (!FindNextFile (searchHandle, &findData))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, pathCharPtr);
- /*
- * Set nameListLength to the size of a name list with no elements in it. Since ANSI
- * C requires us to put one item in the array we use offsetof to get the size.
- */
- nameListLength = offsetof (fileNameListType, names);
- nameListTotalSpace = 4096;
- nameListPtr = xmalloc (nameListTotalSpace);
- nameListPtr->numberOfItems = 0;
- while (FindNextFile (searchHandle, &findData)) {
- if ((findData.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {
- nameLength = strlen (findData.cFileName);
- /*
- * We add 2 to the length: one for null termination of this file name and one for
- * possible end of list null termination, which avoids a rare realloc.
- */
- if (nameListLength + nameLength + 2 > nameListTotalSpace) {
- nameListTotalSpace += nameListLength + nameLength + 2048;
- nameListPtr = xrealloc (nameListPtr, nameListTotalSpace);
- }
- memcpy ((char *) nameListPtr + nameListLength, findData.cFileName, nameLength);
- nameListLength += nameLength;
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
- nameListPtr->numberOfItems++;
- }
- }
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
-
- nameListPtr = xrealloc (nameListPtr, nameListLength);
- if (!FindClose (searchHandle))
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, pathCharPtr);
-
- return (nameListPtr);
- }
- #endif
-
- #ifdef DF_MACHINE_NEXT
- fileNameListPType
- ReadDirectory (const char *pathCharPtr)
- {
- DIR *directoryPtr;
- register struct direct *entryPtr;
- fileNameListPType nameListPtr;
- int nameListLength;
- int nameListTotalSpace;
-
- directoryPtr = opendir(pathCharPtr);
- if (directoryPtr == NULL)
- return (NULL);
- /*
- * Read past the first two directories: "." and ".."
- */
- entryPtr = readdir (directoryPtr);
- entryPtr = readdir (directoryPtr);
-
- /*
- * Set nameListLength to the size of a name list with no elements in it. Since ANSI
- * C requires us to put one item in the array we use offsetof to get the size.
- */
- nameListLength = offsetof (fileNameListType, names);
- nameListTotalSpace = 4096;
- nameListPtr = xmalloc (nameListTotalSpace);
- nameListPtr->numberOfItems = 0;
- do {
- entryPtr = readdir (directoryPtr);
- if (entryPtr == NULL)
- break;
- if (entryPtr->d_namlen != 0) {
- /*
- * We add 2 to the length: one for null termination of this file name and one for
- * possible end of list null termination, which avoids a rare realloc.
- */
- if (nameListLength + entryPtr->d_namlen + 2 > nameListTotalSpace) {
- nameListTotalSpace += nameListLength + entryPtr->d_namlen + 2048;
- nameListPtr = xrealloc (nameListPtr, nameListTotalSpace);
- }
- memcpy ((char *) nameListPtr + nameListLength, entryPtr->d_name, entryPtr->d_namlen);
- nameListLength += entryPtr->d_namlen;
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
- nameListPtr->numberOfItems++;
- }
- } while (TRUE);
- *((char *) nameListPtr + nameListLength) = '\0';
- nameListLength++;
-
- nameListPtr = xrealloc (nameListPtr, nameListLength);
- closedir (directoryPtr);
-
- return (nameListPtr);
- }
- #endif
-
- /* Given a vector of two file_data objects, read the file associated
- with each one, and build the table of equivalence classes. */
-
- void
- read_files (register struct file_data *file0Ptr, register struct file_data *file1Ptr)
- {
- int equivs_alloc; /* Size allocated to the array `equivs'. */
- int primes_index; /* Index of current nbuckets in primes. */
-
- slurp (file0Ptr);
- slurp (file1Ptr);
-
- find_identical_ends (file0Ptr, file1Ptr);
-
- find_and_hash_each_line (file0Ptr);
- find_and_hash_each_line (file1Ptr);
-
- if (file0Ptr->buffer != NULL && file1Ptr->buffer != NULL) {
- /* This is guaranteed to be enough space. */
- equivs_alloc = file0Ptr->buffered_lines + file1Ptr->buffered_lines + 1;
- equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
- /* Equivalence class 0 is permanently safe for lines that were not
- hashed. Real equivalence classes start at 1. */
- equivs_index = 1;
-
- primes_index = 0;
- while (primes[primes_index] < equivs_alloc / 3)
- primes_index++;
-
- buckets = (struct equivclass **) xmalloc (primes[primes_index] * sizeof (struct equivclass *));
- bzero (buckets, primes[primes_index] * sizeof (struct equivclass *));
- nbuckets = primes[primes_index];
-
- find_equiv_class (file0Ptr);
- find_equiv_class (file1Ptr);
-
- file0Ptr->equiv_max = file1Ptr->equiv_max = equivs_index;
-
- free (equivs);
- free (buckets);
- }
- }
-
- /* Slurp the file completely into memory. */
-
- void
- slurp (struct file_data *filePtr)
- {
- int ioResult;
-
- #ifdef DF_MACHINE_MACINTOSH
- SpinCursor (1);
- #endif
-
- if (filePtr->desc == NULL /* file doesn't exist */) {
- filePtr->buffered_chars = 0;
- if (filePtr->buffer != NULL)
- free (filePtr->buffer);
- filePtr->buffer = NULL;
- } else { /* file exists */
- ioResult = fseek (filePtr->desc, 0, SEEK_END);
- if (ioResult != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, filePtr->namePtr);
-
- filePtr->bufsize = (int) ftell (filePtr->desc);
- if (filePtr->bufsize == -1)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, filePtr->namePtr);
-
- ioResult = fseek (filePtr->desc, 0, SEEK_SET);
- if (ioResult != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, filePtr->namePtr);
-
- /* Leave room in the buffer for 2 extra chars beyond those
- that filePtr->bufsize describes:
- one for a newline (in case the text does not end with one)
- and one for a sentinel in find_identical_ends. */
- if (filePtr->buffer == NULL)
- filePtr->buffer = (char *) xmalloc (filePtr->bufsize + 2);
- else
- filePtr->buffer = (char *) xrealloc (filePtr->buffer, filePtr->bufsize + 2);
- /*
- * We reset bufsize to the actual number of characters in the buffer rather
- * than the size malloced. They may differ because we read text files. For
- * example, text files can store end of lines as more than one character.
- */
- filePtr->buffered_chars = fread (filePtr->buffer, 1, filePtr->bufsize, filePtr->desc);
- if (ferror (filePtr->desc) != 0)
- ErrorWithStringArgument (UNEXPECTED_IO_ERROR, filePtr->namePtr);
- if (filePtr->buffered_chars != filePtr->bufsize) {
- filePtr->bufsize = filePtr->buffered_chars;
- filePtr->buffer = (char *) xrealloc (filePtr->buffer, filePtr->bufsize + 2);
- }
-
- /* Make sure text ends in a newline,
- but remember that we had to add one. */
- if (filePtr->buffered_chars > 0 && filePtr->buffer[filePtr->buffered_chars - 1] != '\n') {
- filePtr->missing_newline = TRUE;
- filePtr->buffer[filePtr->buffered_chars++] = '\n';
- }
-
- /* Don't use uninitialized storage. */
- filePtr->buffer[filePtr->buffered_chars] = '\0';
- }
- }
-
-