home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-385-Vol-1of3.iso
/
t
/
tags18.zip
/
SORT.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-05-03
|
50KB
|
1,831 lines
/* sort.c - sort lines of text (with all kinds of options).
Copyright 1989 Free Software Foundation
Written December 1988 by Mike Haertel.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
/* MS-DOS port (c) 1990 by Thorsten Ohl, td12@ddagsi3.bitnet
This port is also distributed under the terms of the
GNU General Public License as published by the
Free Software Foundation.
$Header: e:/gnu/sort/RCS/sort.c'v 0.3.0.4 90/08/26 19:03:05 tho Exp $
*/
static char version[] = "GNU sort, version 0.3";
#include "std.h"
#ifndef MSDOS
#include "unix.h"
#endif /* not MSDOS */
#ifdef SORT_MODULE
#include "sort.h"
#endif
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#ifdef MSDOS
#include <process.h>
static void cleanup (void);
static void assert_lines (int lines);
void *xmalloc (unsigned int size);
void *xrealloc (void *ptr, unsigned int size);
static FILE *xfopen (char *file, char *how);
static void xfclose (FILE *fp);
static void xfwrite (char *buf, int size, int nelem, FILE *fp);
static char *tempname (void);
static void zaptemp (char *name);
static void inittables (void);
static void initbuf (struct buffer *buf, int alloc);
static int fillbuf (struct buffer *buf, FILE *fp);
static void initlines (struct lines *lines, int alloc);
static char *begfield (struct line *line, struct keyfield *key);
static char *limfield (struct line *line, struct keyfield *key);
static void findlines (struct buffer *buf, struct lines *lines);
static int fraccompare (char *a, char *b);
static int numcompare (char *a, char *b);
static int getmonth (char *s, int len);
static int keycompare (struct line *a, struct line *b);
static int compare (struct line *a, struct line *b);
static int checkfp (FILE *fp);
static void mergefps (FILE **fps, int nfps, FILE *ofp);
static void sortlines (struct line *lines, int nlines, struct line *temp);
static int check (char **files, int nfiles);
static void merge (char **files, int nfiles, FILE *ofp);
static void sort (char **files, int nfiles, FILE *ofp);
static void insertkey (struct keyfield *key);
static void usage (void);
static void badfieldspec (char *s);
static void __cdecl inthandler (void);
#ifndef SORT_MODULE
int main (int argc, char **argv);
#endif
#endif /* MSDOS */
/* A few useful macros. */
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define UCHAR_LIM (UCHAR_MAX + 1)
#define UCHAR(c) ((unsigned char) (c))
/* Table of digits. */
static int digits[UCHAR_LIM];
/* Table of white space. */
static int blanks[UCHAR_LIM];
/* Table of non-printing characters. */
static int nonprinting[UCHAR_LIM];
/* Table of non-dictionary characters (not letters, digits, or blanks). */
static int nondictionary[UCHAR_LIM];
/* Translation table folding upper case to lower. */
static char fold_tolower[UCHAR_LIM];
/* Table mapping 3-letter month names to integers.
Alphabetic order allows binary search. */
static struct month {
char *name;
int val;
} monthtab[] = {
"apr", 4,
"aug", 8,
"dec", 12,
"feb", 2,
"jan", 1,
"jul", 7,
"jun", 6,
"mar", 3,
"may", 5,
"nov", 11,
"oct", 10,
"sep", 9
};
/* During the merge phase, the number of files to merge at once. */
#ifdef MSDOS
/* 14 (= 20 - 5 - 1) is a hard upper limit for MeSsy DOS versions <3.2
(and a soft one for later versions...) */
#define NMERGE 12
#else /* not MSDOS */
#define NMERGE 16
#endif /* not MSDOS */
/* Initial buffer size for in core sorting. Will not grow unless a
line longer than this is seen. */
#ifdef MSDOS
static int sortalloc = 32767;
#else /* not MSDOS */
static int sortalloc = 524288;
#endif /* not MSDOS */
/* Initial buffer size for in core merge buffers. Bear in mind that
up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
static int mergealloc = 16384;
/* Guess of average line length. */
static int linelength = 30;
/* Prefix for temporary file names. */
#ifdef MSDOS
static char *prefix;
#else /* not MSDOS */
static char *prefix = "/tmp";
#endif /* not MSDOS */
/* Flag to reverse the order of all comparisons. */
static int reverse;
/* Tab character separating fields. If NUL, then fields are separated
by the empty string between a non-whitespace character and a whitespace
character. */
static char tab;
/* Flag to remove consecutive duplicate lines from the output.
Only the last of a sequence of equal lines will be output. */
static int unique;
/* Lines are held in core as counted strings. */
struct line
{
char *text; /* Text of the line. */
int length; /* Length not including final newline. */
char *keybeg; /* Start of first key. */
char *keylim; /* Limit of first key. */
};
#ifdef MSDOS
/* Using _huge line arrays under MS-DOS is terribly inefficient, so we
impose an upper limit on the number of lines cosidered at once. The
user can break the input into digestable pieces by using the `-S' option
to adjust the input buffer size.
This only matters for files with very short lines ( < 8 chars). */
static int maxlines
= (int) (((1L << 16) - 1L) / sizeof (struct line));
void
assert_lines (int lines)
{
if (lines >= maxlines)
{
fprintf (stderr,
"sort: the number of lines per input buffer is restricted in\n"
" the MS-DOS version. For files with short lines, use\n"
" the `-S <num>' option to reduce the buffer size.\n");
cleanup ();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit (-1);
}
}
#endif /* MSDOS */
/* Arrays of lines. */
struct lines
{
struct line *lines; /* Dynamically allocated array of lines. */
int used; /* Number of slots used. */
int alloc; /* Number of slots allocated. */
};
/* Input buffers. */
struct buffer
{
char *buf; /* Dynamically allocated buffer. */
int used; /* Number of bytes used. */
int alloc; /* Number of bytes allocated. */
int left; /* Number of bytes left after line parsing. */
};
/* Lists of key field comparisons to be tried. */
static struct keyfield
{
int sword; /* Zero-origin 'word' to start at. */
int schar; /* Additional characters to skip. */
int skipsblanks; /* Skip leading white space at start. */
int eword; /* Zero-origin first word after field. */
int echar; /* Additional characters in field. */
int skipeblanks; /* Skip trailing white space at finish. */
int *ignore; /* Boolean array of characters to ignore. */
char *translate; /* Translation applied to characters. */
int numeric; /* Flag for numeric comparison. */
int month; /* Flag for comparison by month name. */
int reverse; /* Reverse the sense of comparison. */
struct keyfield *next; /* Next keyfield to try. */
} keyhead;
/* The list of temporary files. */
static struct tempnode
{
char *name;
struct tempnode *next;
} temphead;
/* Clean up any remaining temporary files. */
static void
cleanup()
{
struct tempnode *node;
#ifdef MSDOS
_fcloseall();
#endif
for (node = temphead.next; node; node = node->next)
remove(node->name);
}
/* Interfaces to library routines. */
#ifdef MSDOS
void *
xmalloc (size_t size)
{
void *r = malloc (size);
if (r)
return r;
fprintf (stderr, "sort: memory exausted\n");
cleanup ();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit (-1);
}
void *
xrealloc (void *ptr, size_t size)
{
void *r = realloc (ptr, size);
if (r)
return r;
fprintf (stderr, "sort: memory exhausted\n");
cleanup ();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit (-1);
}
#else /* not MSDOS */
static char *
xmalloc(n)
int n;
{
char *r = malloc(n);
if (r)
return r;
fprintf(stderr, "sort: memory exausted\n");
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
static char *
xrealloc(p, n)
char *p;
int n;
{
char *r = realloc(p, n);
if (r)
return r;
fprintf(stderr, "sort: memory exhausted\n");
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
#endif /* not MSDOS */
static FILE *
xfopen(file, how)
char *file, *how;
{
FILE *fp = strcmp(file, "-") ? fopen(file, how) : stdin;
if (fp)
return fp;
fprintf(stderr, "sort: cannot open %s (%s): %s\n", file, how,
strerror(errno));
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
static void
xfclose(fp)
FILE *fp;
{
fflush(fp);
if (fp != stdin && fp != stdout)
if (fclose(fp) != 0)
{
fprintf(stderr, "sort: error in fclose: %s\n", strerror(errno));
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
}
static void
xfwrite(buf, size, nelem, fp)
char *buf;
int size, nelem;
FILE *fp;
{
#ifdef MSDOS
if (fwrite(buf, size, nelem, fp) < (unsigned) nelem)
#else /* not MSDOS */
if (fwrite(buf, size, nelem, fp) < 0)
#endif /* not MSDOS */
{
fprintf(stderr, "sort: error in fwrite: %s\n", strerror(errno));
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
}
/* Return a name for a temporary file. */
static char *
tempname()
{
static int seq;
int len = strlen(prefix);
char *name = xmalloc(len + 16);
struct tempnode *node =
(struct tempnode *) xmalloc(sizeof (struct tempnode));
#ifdef MSDOS
if (len && prefix[len - 1] != '/' && prefix[len - 1] != '\\')
sprintf(name, "%s/sort%4.4x.%3.3x", prefix, _getpid(), ++seq);
else
sprintf(name, "%ssort%4.4x.%3.3x", prefix, _getpid(), ++seq);
#else /* not MSDOS */
if (len && prefix[len - 1] != '/')
sprintf(name, "%s/sort%5.5d%5.5d", prefix, _getpid(), ++seq);
else
sprintf(name, "%ssort%5.5d%5.5d", prefix, _getpid(), ++seq);
#endif /* not MSDOS */
node->name = name;
node->next = temphead.next;
temphead.next = node;
return name;
}
/* Search through the list of temporary files for the given name;
remove it if it is found on the list. */
static void
zaptemp(name)
char *name;
{
struct tempnode *node, *temp;
for (node = &temphead; node->next; node = node->next)
if (!strcmp(name, node->next->name))
break;
if (node->next)
{
temp = node->next;
remove(temp->name);
free(temp->name);
node->next = temp->next;
free((char *) temp);
}
}
/* Initialize the character class tables. */
static void
inittables()
{
int i;
for (i = 0; i < UCHAR_LIM; ++i)
{
if (ISBLANK(i))
blanks[i] = 1;
if (ISDIGIT(i))
digits[i] = 1;
if (!ISPRINT(i))
nonprinting[i] = 1;
if (!ISALNUM(i) && !ISBLANK(i))
nondictionary[i] = 1;
if (ISUPPER(i))
fold_tolower[i] = tolower(i);
else
fold_tolower[i] = i;
}
}
/* Initialize BUF allocating ALLOC bytes initially. */
static void
initbuf(buf, alloc)
struct buffer *buf;
int alloc;
{
buf->alloc = alloc;
buf->buf = xmalloc(buf->alloc);
buf->used = buf->left = 0;
}
/* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Return a count
of bytes actually read. */
static int
fillbuf(buf, fp)
struct buffer *buf;
FILE *fp;
{
int cc, total = 0;
memmove(buf->buf, buf->buf + buf->used - buf->left, buf->left);
buf->used = buf->left;
while (!feof(fp) && !memchr(buf->buf, '\n', buf->used))
{
if (buf->used == buf->alloc)
#ifdef MSDOS
{
fprintf (stderr,
"sort: lines longer than 32k are not supported under\n"
" MS-DOS because of performance considerations.\n");
cleanup ();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit (-1);
}
#else /* not MSDOS */
buf->buf = xrealloc(buf->buf, buf->alloc *= 2);
#endif /* not MSDOS */
cc = fread(buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
if (cc < 0)
{
fprintf(stderr, "sort: read error (%s)\n", strerror(errno));
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit(-1);
}
buf->used += cc;
total += cc;
}
if (feof(fp) && buf->used && buf->buf[buf->used - 1] != '\n')
{
if (buf->used == buf->alloc)
#ifdef MSDOS
{
fprintf (stderr,
"sort: lines longer than 32k are not supported under\n"
" MS-DOS because of performance considerations.\n");
cleanup ();
#ifdef SORT_MODULE
external_cleanup();
#endif
exit (-1);
}
#else /* not MSDOS */
buf->buf = xrealloc(buf->buf, buf->alloc *= 2);
#endif /* not MSDOS */
buf->buf[buf->used++] = '\n';
++total;
}
return total;
}
/* Initialize LINES, allocating space for ALLOC lines initially. */
static void
initlines(lines, alloc)
struct lines *lines;
int alloc;
{
lines->alloc = alloc;
#ifdef MSDOS
assert_lines (lines->alloc);
#endif /* MSDOS */
lines->lines = (struct line *) xmalloc(lines->alloc * sizeof (struct line));
lines->used = 0;
}
/* Return a pointer to the first character of a field. */
static char *
begfield(line, key)
struct line *line;
struct keyfield *key;
{
register char *ptr = line->text, *lim = ptr + line->length;
register int sword = key->sword, schar = key->schar;
if (tab)
while (ptr < lim && sword--)
{
while (ptr < lim && *ptr != tab)
++ptr;
if (ptr < lim)
++ptr;
}
else
while (ptr < lim && sword--)
{
while (ptr < lim && blanks[UCHAR(*ptr)])
++ptr;
while (ptr < lim && !blanks[UCHAR(*ptr)])
++ptr;
}
if (key->skipsblanks)
while (ptr < lim && blanks[UCHAR(*ptr)])
++ptr;
while (ptr < lim && schar--)
++ptr;
return ptr;
}
/* Find the limit of a field; i.e., a pointer to the first character
after the field. */
static char *
limfield(line, key)
struct line *line;
struct keyfield *key;
{
register char *ptr = line->text, *lim = ptr + line->length;
register int eword = key->eword, echar = key->echar;
if (tab)
while (ptr < lim && eword--)
{
while (ptr < lim && *ptr != tab)
++ptr;
if (ptr < lim && (eword || key->skipeblanks))
++ptr;
}
else
while (ptr < lim && eword--)
{
while (ptr < lim && blanks[UCHAR(*ptr)])
++ptr;
while (ptr < lim && !blanks[UCHAR(*ptr)])
++ptr;
}
if (key->skipeblanks)
while (ptr < lim && blanks[UCHAR(*ptr)])
++ptr;
while (ptr < lim && echar--)
++ptr;
return ptr;
}
/* Find the lines in BUF, storing pointers and lengths in LINES.
Also replace newlines with NULs. */
static void
findlines(buf, lines)
struct buffer *buf;
struct lines *lines;
{
register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
struct keyfield *key = keyhead.next;
lines->used = 0;
while (beg < lim && (ptr = memchr(beg, '\n', lim - beg)))
{
/* There are various places in the code that rely on a NUL
being at the end of in-core lines; NULs inside the lines
will not cause trouble, though. */
*ptr = '\0';
if (lines->used == lines->alloc)
lines->lines =
(struct line *) xrealloc((char *) lines->lines,
(lines->alloc *= 2) * sizeof (struct line));
#ifdef MSDOS
assert_lines (lines->alloc);
#endif /* MSDOS */
lines->lines[lines->used].text = beg;
lines->lines[lines->used].length = ptr - beg;
/* Precompute the position of the first key for efficiency. */
if (key)
{
if (key->eword >= 0)
lines->lines[lines->used].keylim =
limfield(&lines->lines[lines->used], key);
else
lines->lines[lines->used].keylim = ptr;
if (key->sword >= 0)
lines->lines[lines->used].keybeg =
begfield(&lines->lines[lines->used], key);
else
{
if (key->skipsblanks)
while (blanks[UCHAR(*beg)])
++beg;
lines->lines[lines->used].keybeg = beg;
}
}
++lines->used;
beg = ptr + 1;
}
buf->left = lim - beg;
}
/* Compare two strings containing decimal fractions < 1. Each string
should begin with a decimal point followed immediately by the digits
of the fraction. Strings not of this form are considered to be zero. */
static int
fraccompare(a, b)
register char *a, *b;
{
register tmpa = UCHAR(*a), tmpb = UCHAR(*b);
if (tmpa == '.' && tmpb == '.')
{
do
tmpa = UCHAR(*++a), tmpb = UCHAR(*++b);
while (tmpa == tmpb && digits[tmpa]);
if (digits[tmpa] && digits[tmpb])
return tmpa - tmpb;
if (digits[tmpa])
{
while (tmpa == '0')
tmpa = UCHAR(*++a);
if (digits[tmpa])
return 1;
return 0;
}
if (digits[tmpb])
{
while (tmpb == '0')
tmpb = UCHAR(*++b);
if (digits[tmpb])
return -1;
return 0;
}
return 0;
}
else if (tmpa == '.')
{
do
tmpa = UCHAR(*++a);
while (tmpa == '0');
if (digits[tmpa])
return 1;
return 0;
}
else if (tmpb == '.')
{
do
tmpb = UCHAR(*++b);
while (tmpb == '0');
if (digits[tmpb])
return -1;
return 0;
}
return 0;
}
/* Compare two strings as numbers without explicitly converting them to
machine numbers. Comparatively slow for short strings, but asymptotically
hideously fast. */
static int
numcompare(a, b)
register char *a, *b;
{
register int tmpa, tmpb, loga, logb, tmp;
tmpa = UCHAR(*a), tmpb = UCHAR(*b);
if (tmpa == '-')
{
tmpa = UCHAR(*++a);
if (tmpb != '-')
{
if (digits[tmpa] && digits[tmpb])
return -1;
return 0;
}
tmpb = UCHAR(*++b);
while (tmpa == '0')
tmpa = UCHAR(*++a);
while (tmpb == '0')
tmpb = UCHAR(*++b);
while (tmpa == tmpb && digits[tmpa])
tmpa = UCHAR(*++a), tmpb = UCHAR(*++b);
if (tmpa == '.' && !digits[tmpb] || tmpb == '.' && !digits[tmpa])
return -fraccompare(a, b);
if (digits[tmpa])
for (loga = 1; digits[UCHAR(*++a)]; ++loga)
;
else
loga = 0;
if (digits[tmpb])
for (logb = 1; digits[UCHAR(*++b)]; ++logb)
;
else
logb = 0;
if (tmp = logb - loga)
return tmp;
if (! loga)
return 0;
return tmpb - tmpa;
}
else if (tmpb == '-')
{
if (digits[UCHAR(tmpa)] && digits[UCHAR(*++b)])
return 1;
return 0;
}
else
{
while (tmpa == '0')
tmpa = UCHAR(*++a);
while (tmpb == '0')
tmpb = UCHAR(*++b);
while (tmpa == tmpb && digits[tmpa])
tmpa = UCHAR(*++a), tmpb = UCHAR(*++b);
if (tmpa == '.' && !digits[tmpb] || tmpb == '.' && !digits[tmpa])
return fraccompare(a, b);
if (digits[tmpa])
for (loga = 1; digits[UCHAR(*++a)]; ++loga)
;
else
loga = 0;
if (digits[tmpb])
for (logb = 1; digits[UCHAR(*++b)]; ++logb)
;
else
logb = 0;
if (tmp = loga - logb)
return tmp;
if (! loga)
return 0;
return tmpa - tmpb;
}
}
/* Return an integer <= 12 associated with a month name (0 if the name
is not recognized. */
static int
getmonth(s, len)
char *s;
int len;
{
char month[4];
register int i, lo = 0, hi = 12;
if (len < 3)
return 0;
for (i = 0; i < 3; ++i)
month[i] = fold_tolower[UCHAR(s[i])];
month[3] = '\0';
while (hi - lo > 1)
if (strcmp(month, monthtab[(lo + hi) / 2].name) < 0)
hi = (lo + hi) / 2;
else
lo = (lo + hi) / 2;
if (!strcmp(month, monthtab[lo].name))
return monthtab[lo].val;
return 0;
}
/* Compare two lines trying every key in sequence until there
are no more keys or a difference is found. */
static int
keycompare(a, b)
struct line *a, *b;
{
register char *texta, *textb, *lima, *limb, *translate;
register int *ignore;
struct keyfield *key;
int diff = 0, iter = 0, lena, lenb;
for (key = keyhead.next; key; key = key->next, ++iter)
{
ignore = key->ignore;
translate = key->translate;
/* Find the beginning and limit of each field. */
if (iter)
{
if (key->eword >= 0)
lima = limfield(a, key), limb = limfield(b, key);
else
lima = a->text + a->length, limb = b->text + b->length;
if (key->sword >= 0)
texta = begfield(a, key), textb = begfield(b, key);
else
{
texta = a->text, textb = b->text;
if (key->skipsblanks)
{
while (texta < lima && blanks[UCHAR(*texta)])
++texta;
while (textb < limb && blanks[UCHAR(*textb)])
++textb;
}
}
}
else
{
/* For the first iteration only, the key positions have
been precomputed for us. */
texta = a->keybeg, lima = a->keylim;
textb = b->keybeg, limb = b->keylim;
}
/* Find the lengths. */
lena = lima - texta, lenb = limb - textb;
if (lena < 0)
lena = 0;
if (lenb < 0)
lenb = 0;
/* Actually compare the fields. */
if (key->numeric)
{
if (*lima || *limb)
{
char savea = *lima, saveb = *limb;
*lima = *limb = '\0';
diff = numcompare(texta, textb);
*lima = savea, *limb = saveb;
}
else
diff = numcompare(texta, textb);
if (diff)
return key->reverse ? -diff : diff;
continue;
}
else if (key->month)
{
diff = getmonth(texta, lena) - getmonth(textb, lenb);
if (diff)
return key->reverse ? -diff : diff;
continue;
}
else if (ignore && translate)
while (texta < lima && textb < limb)
{
while (texta < lima && ignore[UCHAR(*texta)])
++texta;
while (textb < limb && ignore[UCHAR(*textb)])
++textb;
if (texta < lima && textb < limb &&
translate[UCHAR(*texta++)] != translate[UCHAR(*textb++)])
{
diff = translate[UCHAR(*--texta)] - translate[UCHAR(*--textb)];
break;
}
}
else if (ignore)
while (texta < lima && textb < limb)
{
while (texta < lima && ignore[UCHAR(*texta)])
++texta;
while (textb < limb && ignore[UCHAR(*textb)])
++textb;
if (texta < lima && textb < limb && *texta++ != *textb++)
{
diff = *--texta - *--textb;
break;
}
}
else if (translate)
while (texta < lima && textb < limb)
{
if (translate[UCHAR(*texta++)] != translate[UCHAR(*textb++)])
{
diff = translate[UCHAR(*--texta)] - translate[UCHAR(*--textb)];
break;
}
}
else
diff = memcmp(texta, textb, MIN(lena, lenb));
if (diff)
return key->reverse ? -diff : diff;
if (diff = lena - lenb)
return key->reverse ? -diff : diff;
}
return 0;
}
/* Compare two lines, returning negative, zero, or positive depending on
whether A compares less than, equal to, or greater than B. */
static int
compare(a, b)
register struct line *a, *b;
{
int diff, tmpa, tmpb, min;
if (keyhead.next)
{
if (diff = keycompare(a, b))
return diff;
if (!unique)
{
tmpa = a->length, tmpb = b->length;
diff = memcmp(a->text, b->text, MIN(tmpa, tmpb));
if (! diff)
diff = tmpa - tmpb;
}
}
else
{
tmpa = a->length, tmpb = b->length;
min = MIN(tmpa, tmpb);
if (min == 0)
diff = tmpa - tmpb;
else
{
char *ap = a->text, *bp = b->text;
diff = *ap - *bp;
if (diff == 0)
{
diff = memcmp(ap, bp, min);
if (diff == 0)
diff = tmpa - tmpb;
}
}
}
return reverse ? -diff : diff;
}
/* Check that the lines read from the given FP come in order. Return
1 if they do and 0 if there is a disorder. */
static int
checkfp(fp)
FILE *fp;
{
struct buffer buf; /* Input buffer. */
struct lines lines; /* Lines scanned from the buffer. */
struct line temp; /* Copy of previous line. */
int cc; /* Character count. */
int cmp; /* Result of calling compare. */
int alloc, i, success = 1;
initbuf(&buf, mergealloc);
initlines(&lines, mergealloc / linelength + 1);
alloc = linelength;
temp.text = xmalloc(alloc);
cc = fillbuf(&buf, fp);
findlines(&buf, &lines);
if (cc)
do
{
/* Compare each line in the buffer with its successor. */
for (i = 0; i < lines.used - 1; ++i)
{
cmp = compare(&lines.lines[i], &lines.lines[i + 1]);
if (unique && cmp >= 0 || cmp > 0)
{
success = 0;
goto finish;
}
}
/* Save the last line of the buffer and refill the buffer. */
if (lines.lines[lines.used - 1].length > alloc)
{
while (lines.lines[lines.used - 1].length + 1 > alloc)
alloc *= 2;
temp.text = xrealloc(temp.text, alloc);
}
memcpy(temp.text, lines.lines[lines.used - 1].text,
lines.lines[lines.used - 1].length + 1);
temp.length = lines.lines[lines.used - 1].length;
cc = fillbuf(&buf, fp);
if (cc)
{
findlines(&buf, &lines);
/* Make sure the line saved from the old buffer contents is
less than or equal to the first line of the new buffer. */
cmp = compare(&temp, &lines.lines[0]);
if (unique && cmp >= 0 || cmp > 0)
{
success = 0;
break;
}
}
}
while (cc);
finish:
xfclose(fp);
free(buf.buf);
free((char *) lines.lines);
free(temp.text);
return success;
}
/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
Close FPS before returning. */
static void
mergefps(fps, nfps, ofp)
FILE *fps[], *ofp;
register int nfps;
{
struct buffer buffer[NMERGE]; /* Input buffers for each file. */
struct lines lines[NMERGE]; /* Line tables for each buffer. */
struct line saved; /* Saved line for unique check. */
int savedflag = 0; /* True if there is a saved line. */
int savealloc; /* Size allocated for the saved line. */
int cur[NMERGE]; /* Current line in each line table. */
int ord[NMERGE]; /* Table representing a permutation of fps,
such that lines[ord[0]].lines[cur[ord[0]]]
is the smallest line and will be next
output. */
register int i, j, t;
/* Allocate space for a saved line if necessary. */
if (unique)
saved.text = xmalloc(savealloc = linelength);
/* Read initial lines from each input file. */
for (i = 0; i < nfps; ++i)
{
initbuf(&buffer[i], mergealloc);
/* If a file is empty, eliminate it from future consideration. */
while (i < nfps && !fillbuf(&buffer[i], fps[i]))
{
xfclose(fps[i]);
--nfps;
for (j = i; j < nfps; ++j)
fps[j] = fps[j + 1];
}
if (i == nfps)
free(buffer[i].buf);
else
{
initlines(&lines[i], mergealloc / linelength + 1);
findlines(&buffer[i], &lines[i]);
cur[i] = 0;
}
}
/* Set up the ord table according to comparisons among input lines.
Since this only reorders two items if one is strictly greater than
the other, it is stable. */
for (i = 0; i < nfps; ++i)
ord[i] = i;
for (i = 1; i < nfps; ++i)
if (compare(&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
&lines[ord[i]].lines[cur[ord[i]]]) > 0)
t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
/* Repeatedly output the smallest line until no input remains. */
while (nfps)
{
/* If uniqified output is turned out, output only the last of
an identical series of lines. */
if (unique)
{
if (savedflag && compare(&saved, &lines[ord[0]].lines[cur[ord[0]]]))
{
xfwrite(saved.text, 1, saved.length, ofp);
putc('\n', ofp);
}
if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
{
while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
savealloc *= 2;
saved.text = xrealloc(saved.text, savealloc);
}
saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
memcpy(saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
saved.length + 1);
saved.keybeg = lines[ord[0]].lines[cur[ord[0]]].keybeg;
saved.keylim = lines[ord[0]].lines[cur[ord[0]]].keylim;
savedflag = 1;
}
else
{
xfwrite(lines[ord[0]].lines[cur[ord[0]]].text, 1,
lines[ord[0]].lines[cur[ord[0]]].length, ofp);
putc('\n', ofp);
}
/* Check if we need to read more lines into core. */
if (++cur[ord[0]] == lines[ord[0]].used)
if (fillbuf(&buffer[ord[0]], fps[ord[0]]))
{
findlines(&buffer[ord[0]], &lines[ord[0]]);
cur[ord[0]] = 0;
}
else
{
/* We reached EOF on fps[ord[0]]. */
for (i = 1; i < nfps; ++i)
if (ord[i] > ord[0])
--ord[i];
--nfps;
xfclose(fps[ord[0]]);
free(buffer[ord[0]].buf);
free((char *) lines[ord[0]].lines);
for (i = ord[0]; i < nfps; ++i)
{
fps[i] = fps[i + 1];
buffer[i] = buffer[i + 1];
lines[i] = lines[i + 1];
cur[i] = cur[i + 1];
}
for (i = 0; i < nfps; ++i)
ord[i] = ord[i + 1];
continue;
}
/* The new line just read in may be larger than other lines
already in core; push it back in the queue until we encounter
a line larger than it. */
for (i = 1; i < nfps; ++i)
{
t = compare(&lines[ord[0]].lines[cur[ord[0]]],
&lines[ord[i]].lines[cur[ord[i]]]);
if (! t)
t = ord[0] - ord[i];
if (t < 0)
break;
}
t = ord[0];
for (j = 1; j < i; ++j)
ord[j - 1] = ord[j];
ord[i - 1] = t;
}
if (unique && savedflag)
{
xfwrite(saved.text, 1, saved.length, ofp);
putc('\n', ofp);
free(saved.text);
}
}
/* Sort the array LINES using TEMP for temporary space. */
static void
sortlines(lines, nlines, temp)
struct line *lines, *temp;
int nlines;
{
register struct line *lo, *hi, *t;
register int nlo, nhi;
if (nlines == 2)
{
if (compare(&lines[0], &lines[1]) > 0)
*temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
return;
}
nlo = nlines / 2;
lo = lines;
nhi = nlines - nlo;
hi = lines + nlo;
if (nlo > 1)
sortlines(lo, nlo, temp);
if (nhi > 1)
sortlines(hi, nhi, temp);
t = temp;
while (nlo && nhi)
if (compare(lo, hi) <= 0)
*t++ = *lo++, --nlo;
else
*t++ = *hi++, --nhi;
while (nlo--)
*t++ = *lo++;
for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
*lo++ = *t++;
}
/* Check that each of the given FILES is ordered.
Return a count of disordered files. */
static int
check(files, nfiles)
char *files[];
int nfiles;
{
int i, disorders = 0;
FILE *fp;
for (i = 0; i < nfiles; ++i)
{
fp = xfopen(files[i], "r");
if (! checkfp(fp))
{
printf("sort: disorder on %s\n", files[i]);
++disorders;
}
}
return disorders;
}
/* Merge any number of FILES onto the given OFP. */
static void
merge(files, nfiles, ofp)
char *files[];
int nfiles;
FILE *ofp;
{
int i, j, t;
char *temp;
FILE *fps[NMERGE], *tfp;
while (nfiles > NMERGE)
{
t = 0;
for (i = 0; i < nfiles / NMERGE; ++i)
{
for (j = 0; j < NMERGE; ++j)
fps[j] = xfopen(files[i * NMERGE + j], "r");
tfp = xfopen(temp = tempname(), "w");
mergefps(fps, NMERGE, tfp);
xfclose(tfp);
for (j = 0; j < NMERGE; ++j)
zaptemp(files[i * NMERGE + j]);
files[t++] = temp;
}
for (j = 0; j < nfiles % NMERGE; ++j)
fps[j] = xfopen(files[i * NMERGE + j], "r");
tfp = xfopen(temp = tempname(), "w");
mergefps(fps, nfiles % NMERGE, tfp);
xfclose(tfp);
for (j = 0; j < nfiles % NMERGE; ++j)
zaptemp(files[i * NMERGE + j]);
files[t++] = temp;
nfiles = t;
}
for (i = 0; i < nfiles; ++i)
fps[i] = xfopen(files[i], "r");
mergefps(fps, i, ofp);
for (i = 0; i < nfiles; ++i)
zaptemp(files[i]);
}
/* Sort any number of FILES onto the given OFP. */
static void
sort(files, nfiles, ofp)
char **files;
int nfiles;
FILE *ofp;
{
struct buffer buf;
struct lines lines;
struct line *tmp;
int i, ntmp;
FILE *fp, *tfp;
struct tempnode *node;
int ntemp = 0;
char **tempfiles;
initbuf(&buf, sortalloc);
initlines(&lines, sortalloc / linelength + 1);
ntmp = lines.alloc;
#ifdef MSDOS
assert_lines (ntmp);
#endif /* MSDOS */
tmp = (struct line *) xmalloc(ntmp * sizeof (struct line));
while (nfiles--)
{
fp = xfopen(*files++, "r");
while (fillbuf(&buf, fp))
{
findlines(&buf, &lines);
if (lines.used > ntmp)
{
while (lines.used > ntmp)
ntmp *= 2;
#ifdef MSDOS
assert_lines (ntmp);
#endif /* MSDOS */
tmp = (struct line *) xrealloc((char *) tmp,
ntmp * sizeof (struct line));
}
sortlines(lines.lines, lines.used, tmp);
if (feof(fp) && !nfiles && !ntemp)
tfp = ofp;
else
{
++ntemp;
tfp = xfopen(tempname(), "w");
}
for (i = 0; i < lines.used; ++i)
if (!unique || i == lines.used - 1
|| compare(&lines.lines[i], &lines.lines[i + 1]))
{
xfwrite(lines.lines[i].text, 1, lines.lines[i].length, tfp);
putc('\n', tfp);
}
if (tfp != ofp)
xfclose(tfp);
}
xfclose(fp);
}
free(buf.buf);
free((char *) lines.lines);
free((char *) tmp);
if (ntemp)
{
tempfiles = (char **) xmalloc(ntemp * sizeof (char *));
i = ntemp;
for (node = temphead.next; node; node = node->next)
tempfiles[--i] = node->name;
merge(tempfiles, ntemp, ofp);
free((char *) tempfiles);
}
}
/* Insert a key at the end of the list. */
static void
insertkey(key)
struct keyfield *key;
{
struct keyfield *k = &keyhead;
while (k->next)
k = k->next;
k->next = key;
key->next = NULL;
}
static void
usage()
{
fprintf(stderr,
"usage: sort [ -cmu ] [ -tc ] [ -o file ] [ -T dir ]\n");
fprintf(stderr,
" [ -bdfiMnr ] [ +n [ -m ] . . . ] [ files . . . ]\n");
exit(-1);
}
static void
badfieldspec(s)
char *s;
{
fprintf(stderr, "sort: bad field specification %s\n", s);
exit(-1);
}
/* Handle interrupts and hangups. */
static void
#ifdef MSDOS
__cdecl inthandler()
#else
inthandler()
#endif
{
signal(SIGINT, SIG_DFL);
cleanup();
#ifdef SORT_MODULE
external_cleanup();
#endif
#ifdef MSDOS
abort ();
#else /* not MSDOS */
kill(_getpid(), SIGINT);
#endif /* not MSDOS */
}
#ifndef MSDOS
static void
huphandler()
{
signal(SIGHUP, SIG_DFL);
cleanup();
kill(_getpid(), SIGHUP);
}
#endif /* not MSDOS */
int
#ifdef SORT_MODULE
sort_main(argc, argv)
#else
main(argc, argv)
#endif
int argc;
char *argv[];
{
struct keyfield *key = NULL, gkey;
char *s;
int i, t, t2;
int checkonly = 0, mergeonly = 0, nfiles;
char *minus = "-", *outfile = minus, **files, *tmp;
FILE *ofp;
#ifdef MSDOS
prefix = getenv ("TMP");
if (!prefix)
prefix = ".";
#endif /* MSDOS */
inittables();
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, inthandler);
#ifndef MSDOS
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, huphandler);
#endif /* not MSDOS */
gkey.sword = gkey.eword = -1;
gkey.ignore = NULL;
gkey.translate = NULL;
gkey.numeric = gkey.month = gkey.reverse = 0;
gkey.skipsblanks = gkey.skipeblanks = 0;
for (i = 1; i < argc; ++i)
{
if (argv[i][0] == '+')
{
if (key)
insertkey(key);
key = (struct keyfield *) xmalloc(sizeof (struct keyfield));
key->eword = -1;
key->ignore = NULL;
key->translate = NULL;
key->skipsblanks = key->skipeblanks = 0;
key->numeric = key->month = key->reverse = 0;
s = argv[i] + 1;
if (!digits[UCHAR(*s)])
badfieldspec(argv[i]);
for (t = 0; digits[UCHAR(*s)]; ++s)
t = 10 * t + *s - '0';
t2 = 0;
if (*s == '.')
for (++s; digits[UCHAR(*s)]; ++s)
t2 = 10 * t2 + *s - '0';
if (t2 || t)
{
key->sword = t;
key->schar = t2;
}
else
key->sword = -1;
while (*s)
{
switch (*s)
{
case 'b':
key->skipsblanks = 1;
break;
case 'd':
key->ignore = nondictionary;
break;
case 'f':
key->translate = fold_tolower;
break;
case 'i':
key->ignore = nonprinting;
case 'M':
key->skipsblanks = key->skipeblanks = key->month = 1;
break;
case 'n':
key->skipsblanks = key->skipeblanks = key->numeric = 1;
break;
case 'r':
key->reverse = 1;
break;
default:
badfieldspec(argv[i]);
break;
}
++s;
}
}
else if (argv[i][0] == '-')
{
if (!strcmp("-", argv[i]))
break;
s = argv[i] + 1;
if (digits[UCHAR(*s)])
{
if (! key)
usage();
for (t = 0; digits[UCHAR(*s)]; ++s)
t = t * 10 + *s - '0';
t2 = 0;
if (*s == '.')
for (++s; digits[UCHAR(*s)]; ++s)
t2 = t2 * 10 + *s - '0';
key->eword = t;
key->echar = t2;
while (*s)
{
switch (*s)
{
case 'b':
key->skipeblanks = 1;
break;
case 'd':
key->ignore = nondictionary;
break;
case 'f':
key->translate = fold_tolower;
break;
case 'i':
key->ignore = nonprinting;
case 'M':
key->skipsblanks = key->skipeblanks = key->month = 1;
break;
case 'n':
key->skipsblanks = key->skipeblanks = key->numeric = 1;
break;
case 'r':
key->reverse = 1;
break;
default:
badfieldspec(argv[i]);
break;
}
++s;
}
insertkey(key);
key = NULL;
}
else
while (*s)
{
switch (*s)
{
case 'b':
gkey.skipsblanks = gkey.skipeblanks = 1;
break;
case 'c':
checkonly = 1;
break;
case 'd':
gkey.ignore = nondictionary;
break;
case 'f':
gkey.translate = fold_tolower;
break;
case 'i':
gkey.ignore = nonprinting;
break;
case 'M':
gkey.skipsblanks = gkey.skipeblanks = gkey.month = 1;
break;
case 'm':
mergeonly = 1;
break;
case 'n':
gkey.skipsblanks = gkey.skipeblanks = gkey.numeric = 1;
break;
case 'o':
if (s[1])
outfile = s + 1;
else
{
if (i == argc - 1)
{
fprintf(stderr, "sort: missing argument to -o\n");
exit(-1);
}
else
outfile = argv[++i];
}
goto outer;
case 'r':
gkey.reverse = reverse = 1;
break;
case 'T':
if (s[1])
prefix = s + 1;
else
{
if (i == argc - 1)
{
fprintf(stderr, "sort: missing argument to -T\n");
exit(-1);
}
else
prefix = argv[++i];
}
goto outer;
case 't':
if (s[1])
tab = *++s;
else if (i < argc - 1)
{
tab = *argv[++i];
goto outer;
}
else
{
fprintf(stderr, "sort: missing character for -tc\n");
exit(-1);
}
break;
case 'u':
unique = 1;
break;
case 'V':
fprintf(stderr, "%s\n", version);
break;
#ifdef MSDOS
case 'S':
{
long num;
if (s[1])
num = atol (s + 1);
else
{
if (i == argc - 1)
{
fprintf(stderr, "sort: missing argument to -S\n");
exit(-1);
}
else
num = atol (argv[++i]);
}
if (num > 32767 || num <= 0)
fprintf(stderr, "sort: argument to -S must be < 32k\n");
else
sortalloc = (int) num;
}
goto outer;
#endif /* MSDOS */
default:
usage();
exit(-1);
}
++s;
}
}
else
break;
outer:;
}
if (key)
insertkey(key);
/* Inheritance of global options to individual keys. */
for (key = keyhead.next; key; key = key->next)
if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
&& !key->skipeblanks && !key->month && !key->numeric)
{
key->ignore = gkey.ignore;
key->translate = gkey.translate;
key->skipsblanks = gkey.skipsblanks;
key->skipeblanks = gkey.skipeblanks;
key->month = gkey.month;
key->numeric = gkey.numeric;
key->reverse = gkey.reverse;
}
if (! keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
|| gkey.reverse || gkey.skipeblanks
|| gkey.month || gkey.numeric))
insertkey(&gkey);
if (i < argc)
{
nfiles = argc - i;
files = &argv[i];
}
else
{
nfiles = 1;
files = −
}
if (checkonly)
exit(check(files, nfiles));
if (strcmp(outfile, "-"))
{
for (i = 0; i < nfiles; ++i)
if (!strcmp(outfile, files[i]))
break;
if (i == nfiles)
ofp = xfopen(outfile, "w");
else
{
char buf[8192];
FILE *fp = xfopen(outfile, "r");
int cc;
tmp = tempname();
ofp = xfopen(tmp, "w");
while (cc = fread(buf, 1, sizeof buf, fp))
if (cc < 0)
{
fprintf(stderr, "sort: error in fread (%s)\n",
strerror(errno));
cleanup();
exit(-1);
}
else
xfwrite(buf, 1, cc, ofp);
xfclose(ofp);
xfclose(fp);
files[i] = tmp;
ofp = xfopen(outfile, "w");
}
}
else
ofp = stdout;
if (mergeonly)
merge(files, nfiles, ofp);
else
sort(files, nfiles, ofp);
/* close the file */
if (ofp != stdout)
fclose(ofp);
cleanup();
#ifdef SORT_MODULE
return(0);
#else /* not SORT_MODULE */
exit(0);
#endif /* not SORT_MODULE */
}