home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1994 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1994.iso
/
netsrcs
/
fastsort
< prev
next >
Wrap
Internet Message Format
|
1989-03-07
|
9KB
From tmdonahu@athena.mit.edu Tue Mar 7 14:36:58 1989
Path: uunet!labrea!bloom-beacon!athena.mit.edu!tmdonahu
From: tmdonahu@athena.mit.edu (Terence M Donahue)
Newsgroups: alt.sources
Subject: A faster sort
Message-ID: <9667@bloom-beacon.MIT.EDU>
Date: 7 Mar 89 19:36:58 GMT
Sender: daemon@bloom-beacon.MIT.EDU
Reply-To: tmdonahu@athena.mit.edu (Terence M Donahue)
Distribution: usa
Organization: Massachusetts Institute of Technology
Lines: 304
I was curious about all of these "better faster stronger" sorting
programs, so I decided to try them out myself using a Vaxstation 2000
running Unix.
Functionality
-------------
The man page for sort(1) says that it merges all the
files together and sorts the entire mess. Both fastsort and
fastersort simply sort each file separately.
In addition, fastsort produced different output from sort and
fastersort, as reported by diff.
Improvements
------------
Use fputs() and putc() to output the sorted lines instead of fprintf()
inline the strcmp function into compare().
Ideally the compare() function should be built in to the qsort
routine, eliminating the significant amount of procedure call
overhead in the program. This was not done in fastestsort
because my homemade quicksort is significantly slower than the
qsort in the C library here. The qsort sources are not freely
distributable. The inlining of the compare function would
result in a significant speed improvement.
Speed
-----
All programs were compiled with gcc with optimization on.
termcap is the first 1000 lines of /etc/termcap, a file with long lines
dict is the first 10000 lines of /usr/dict/words in reverse order, a
file with short lines.
All times are in seconds
Method termcap dict
------ ------- ----
sort 1.6u 0.5s 0:03 11.7u 0.4s 0:13
fastsort 13.3u 1.2s 0:15 22.0u 1.9s 0:25
fastersort 3.0u 0.2s 0:03 36.4u 0.4s 0:38
fastestsort 1.2u 0.2s 0:01 13.1u 0.5s 0:14
So good old sort beats out fastsort and fastersort. The latest version,
fastestsort, does about as well as sort.
Profiling
---------
fastsort on termcap
% cumulative self self total
time seconds seconds calls ms/call ms/call name
65.3 9.05 9.05 1 9050.00 9522.34 _qst [4]
10.3 10.48 1.43 1 1430.00 11010.00 _qsort [3]
7.6 11.54 1.06 mcount (30)
5.8 12.34 0.80 2001 0.40 0.46 _fgets [5]
4.6 12.98 0.64 1007 0.64 0.68 __doprnt [7]
3.8 13.51 0.53 11095 0.05 0.05 _strcmp [8]
fastsort on dict
% cumulative self self total
time seconds seconds calls ms/call ms/call name
39.4 10.20 10.20 mcount (25)
24.3 16.48 6.28 125441 0.05 0.05 _strcmp [5]
13.2 19.91 3.43 1 3430.00 9209.27 _qst [4]
10.2 22.54 2.63 10007 0.26 0.27 __doprnt [7]
4.8 23.77 1.23 20001 0.06 0.07 _fgets [8]
3.4 24.66 0.89 1 890.00 15690.00 _main [2]
fastersort on termcap
% cumulative self self total
time seconds seconds calls ms/call ms/call name
35.7 1.27 1.27 mcount (21)
16.0 1.84 0.57 1000 0.57 0.62 __doprnt [7]
14.0 2.34 0.50 10900 0.05 0.05 _strcmp [8]
9.3 2.67 0.33 10900 0.03 0.08 _compare [5]
8.4 2.97 0.30 1 300.00 1037.79 _qst [4]
8.1 3.26 0.29 1 290.00 2290.00 _main [2]
fastersort on dict
% cumulative self self total
time seconds seconds calls ms/call ms/call name
47.6 23.96 23.96 mcount (23)
21.0 34.54 10.58 218572 0.05 0.05 _strcmp [6]
12.5 40.83 6.29 218572 0.03 0.08 _compare [5]
9.9 45.82 4.99 1 4990.00 21027.27 _qst [4]
fastestsort on termcap
% cumulative self self total
time seconds seconds calls ms/call ms/call name
27.1 0.61 0.61 mcount (22)
19.1 1.04 0.43 10900 0.04 0.04 _compare [5]
18.7 1.46 0.42 1 420.00 802.23 _qst [4]
10.7 1.70 0.24 1 240.00 1640.00 _main [2]
8.9 1.90 0.20 1 200.00 200.00 _read [6]
4.9 2.01 0.11 1000 0.11 0.17 _fputs [7]
4.0 2.10 0.09 6 15.00 15.00 _write [9]
2.2 2.15 0.05 2 25.00 25.00 _open [10]
2.2 2.20 0.05 1 50.00 900.00 _qsort [3]
fastestsort on dict
% cumulative self self total
time seconds seconds calls ms/call ms/call name
44.9 12.34 12.34 mcount (21)
29.2 20.38 8.04 218572 0.04 0.04 _compare [5]
17.8 25.26 4.88 1 4880.00 12523.14 _qst [4]
2.8 26.02 0.76 1 760.00 15150.00 _main [2]
2.2 26.62 0.60 10000 0.06 0.07 _fputs [6]
1.0 26.89 0.27 1 270.00 13190.00 _qsort [3]
Fastestsort
-----------
--------------------------- fastestsort.c ------------------------------
/*
*
* fastsort - sort a file in place - fast!
*
* Written 03/01/89 by Edwin R. Carp
*
* Copyright 1989 by Edwin R. Carp
*
*
* John F. Haugh II, modified 3/3/89
*
* Completely rehacked to remove the two pass garbage
* and quit pushing strings about in memory. Reads
* entire file in with one call, splits into lines and
* saves pointers to each. Then sorts pointers and
* outputs lines.
*
* No big deal ...
*
*
* Terence M. Donahue, modified 3/4/89
*
* Uses fputs() instead of fprintf() to output the sorted lines
* Inlined the string compare into the compare() function.
*
* It is now about as fast as sort on my machine...
*
* There is a slow homemade quicksort routine #ifdef'ed out.
* Once it is fast enough, compile -DHOMEMADE to have it replace qsort
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
nomem (s)
char *s;
{
fprintf (stderr, "Can't get memory for %s\n", s);
exit (1);
}
#ifdef HOMEMADE
/*
** This homemade quicksort is currently much slower than qsort,
** especially for large arrays.
**
** Future improvements:
**
** inline the strcmps
** do the recursive sort call on the smaller partition
** switch to an insertion sort on partitions smaller than 8 elements
*/
#define exch(x,y) (temp = x, x = y, y = temp)
void sort(v,left,right)
char *v[];
int left,right;
{
int i, last;
char *temp;
while (left < right) {
/* Determine pivot by taking the median of left, middle, and right */
i = (left+right)>>1;
if (strcmp(v[left],v[right]) > 0) {
if (strcmp(v[left],v[i]) < 0) i = left;
else if (strcmp(v[right],v[i]) > 0) i = right;
}
else {
if (strcmp(v[left],v[i]) > 0) i = left;
else if (strcmp(v[right],v[i]) < 0) i = right;
}
exch(v[left],v[i]);
last = left;
for (i=left+1; i <= right; i++)
if (strcmp(v[i],v[left]) < 0)
if (i != ++last) { exch(v[last],v[i]); }
exch(v[left],v[last]);
if (left < last-1) sort(v, left, last-1);
left = last+1;
}
}
#else
compare(sp1,sp2)
char **sp1,**sp2;
{
char *s1,*s2;
s1 = *sp1; s2 = *sp2;
while(*s1 == *s2++)
if(*s1++ == '\0') return 0;
return(*s1 - *--s2);
}
#endif
main(argc, argv)
int argc;
char **argv;
{
int fd;
char *malloc ();
char *realloc ();
char *cp;
char *buf;
char **lines;
int cnt, cur, max;
struct stat statbuf;
FILE *fp;
if (argc < 2) {
fprintf (stderr, "usage: fastsort files ...\n");
exit (1);
}
while (*++argv) {
if (stat (*argv, &statbuf)) {
perror(*argv);
continue;
}
if (! (buf = malloc ((unsigned) statbuf.st_size + 1)))
nomem (*argv);
if ((fd = open (*argv, O_RDONLY)) < 0) {
perror (*argv);
continue;
}
if (read (fd, buf, statbuf.st_size) != statbuf.st_size) {
perror (*argv);
free (buf);
continue;
}
close (fd);
*(cp = &buf[statbuf.st_size]) = '\0';
cur = 0;
max = 10;
if (! (lines = (char **) malloc (sizeof (char *) * max)))
nomem (*argv);
while (--cp != buf) {
if (*cp == '\n') {
*cp = '\0';
if (cur == max)
if (! (lines = (char **) realloc (lines, sizeof (char *) * (max += 10))))
nomem (*argv);
lines[cur++] = cp + 1;
}
}
lines[0] = buf; /* fix our earlier mistake :-) */
#ifdef HOMEMADE
sort (lines, 0, cur-1);
#else
qsort ((char *) lines, cur, sizeof (char *), compare);
#endif
if (! (fp = fopen (*argv, "w"))) {
perror (*argv);
continue;
}
for (max = cur, cur = 0;cur < max;cur++) {
fputs (lines[cur], fp);
putc ('\n', fp);
}
fflush (fp);
fclose (fp);
free (lines);
free (buf);
}
exit (0);
}
------------------------- end of fastestsort.c ---------------------------
Terry Donahue tmdonahu@athena.mit.edu