home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-385-Vol-1of3.iso
/
m
/
msc7.zip
/
QSORT.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-02-17
|
18KB
|
640 lines
/* ----------------------------------------------------------------
* QSORT.C
*
* QSORT is a demonstration program which reads data
* into memory, and sorts it using the Quicksort algorithm.
*
* The data to be sorted is limited only by memory size.
*
* Note: unlike DOS SORT, the QSORT is CASE-SENSITIVE.
*
* QSORT may be built to use either real mode or protected
* mode. The protected mode version uses DPMI to allocate
* extended memory from the DPMI host. Assuming that sufficient
* extended memory is present, the protected mode version
* can sort much larger data sets than the real mode version,
* and even the real mode version outstrips the standard DOS
* sort command.
*
* This program is intended to demonstrate how to access large
* data sets from within a small model C program.
*
* ----------------------------------------------------------------
* Program usage
* Each line in the input stream is one record to be sorted.
* Each line may be up to 254 characters long.
*
* Arguments:
*
* /R reverse (descending) sort
* /+nnn sort on column nnn
* < input input is always from standard in
* > output output is always to standard out
*
* Possible errors: record not found
* input file error
* output file error
* bad argument
* input line too long (greater than 254 bytes)
*
*
*
* ----------------------------------------------------------------
* Implementation notes:
*
* o QSORT can be configured to run in real mode or
* in protected mode, by defining the USEREAL symbol.
* The program was developed and tested in real mode
* in order to make debugging easy.
*
* o QSORT has been tested with Borland C version 2.0 and 3.0
* and with Microsoft C version 6.0 and 7.0.
*
* ----------------------------------------------------------------
*
* QSORT internals, data structures, theory of operation
*
* The Quicksort algorithm is described by Donald Knuth in
* "The Art of Computer Programming Vol 3: Searching and Sorting"
*
* o Memory Management
*
*
* Input record storage:
* Input records are stored in what we call "raw arrays" which
* are dynamically allocated 64K blocks. No record crosses
* a 64K boundary. The input records are stored as counted
* strings, which are also null terminated to make the sort
* comparisons more efficient.
*
* Pointer storage:
* We store and sort pointers to the input records.
* The pointers are also stored in dynamically allocated
* 64K blocks. Each pointer is 4 bytes, thus each 64K block
* holds 16K pointers. Unlike the "huge" memory model, we
* do not assume that the blocks are referenced by contiguous
* selectors. Instead, we maintain a master table (the Array
* of Arrays, or AA[n]) which is used to locate a pointer.
*
* Indices:
* An INDEX is essentially a pointer to a pointer. The INDEX
* selects a memory block and a pointer within the block.
*
* Graphically, with 32K pointers (requiring 128K to hold pointers)
*
AA
[0] ----------> PointerBlock0 RawBlock0
[1] ------+ [0] ---------------> DataString[0]
| [1] ---------+
| [2] ... |
| . +-----> DataString[1]
| .
| .
|
+---> PointerBlock1
[0] ----+
[1] -+ |
. | | RawBlockN
. | +---------> DataString[16K]
. |
+------------> DataString[16K+1]
*
*
* So, to get a data pointer from an index, e.g. Index[anum=1, aidx=0]
*
* char **PointerToPointer = AA[anum]
* char *PointerToData = PointerToPointer[aidx]
*
*
*/
/*
* If USEREAL is defined, QSORT does not enter protected mode,
* and uses real mode memory allocation for dynamic storage
*/
// #define USEREAL 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dos.h>
#include "dpmi.h"
#ifdef USEREAL
#include <alloc.h>
#endif
#define FARNULL ((void *) 0L)
/* ----------------------------------------------------------------
* Define Data Structures
* ----------------------------------------------------------------
*/
/* A record is a string of 1-254 bytes
* byte 0 = len
* byte 1-255 = null-terminated data
*
* An Index refers to a string stored in far memory
* anum = array number (lookup address in AddrPtrArrays)
* aidx = index within array
*
* We maintain aidx as an offset into array,
* i.e. aidx = 0, 4, 8, 12, ...
*
* The real advantage is that therefore the index wraps automatically
* into the array number when it overflows, and we needn't check for
* overflow. Remember that we are dereferencing the array each time
* we need to get a pointer from an index.
*
*/
union Index_t
{
unsigned long L; // Use as long
struct stag { // so aidx overflows into anum.
unsigned int aidx; //
unsigned int anum; // Use as words for direct access.
} W;
};
typedef union Index_t Index; // Index = long or two words
typedef char far *CFPTR; // CFPTR = far pointer to char
typedef CFPTR far *CFPTRP; // CFPTRP = pointer to CFPTR
/* ----------------------------------------------------------------
* Global Data
* ----------------------------------------------------------------
*/
// The Raw Arrays hold the raw input data, converted into string records
int Ascending; // sort order
int SortColumn; // input column for sort key
Index NumRecs; // number of records read into database ( * 4)
uLong NumComps = 0; // Statistics: number of comparisons made
uLong NumSwaps = 0; // Statistics: number of swaps made
#define NumRawArrays 512 // 512 * 64K = 32Meg maximum data
CFPTR AddrRawArrays[NumRawArrays]; // array of pointers raw data blocks
uShort FreeRawArrays[NumRawArrays]; // Next free byte in each array
uShort SizeRawArrays[NumRawArrays]; // Size in bytes of each array
int CurrentRawArray = 0; // current array
// The AA (Address Array) is an array of pointers to 64K blocks.
#define NumAA 256 // maximum number of address arrays
CFPTRP AA[NumAA]; // pointers to array of pointers
uShort NextFreePtr = 0; // next free in current array
uShort CurrentAA = 0; // current array being filled
/* ----------------------------------------------------------------
* Print usage info and exit
* ----------------------------------------------------------------
*/
void PrintUsage() {
fprintf(stderr, "QSORT [/R] [/+nnn] [< INFILE] [> OUTFILE]\n");
fprintf(stderr, " /R for reverse (descending) sort\n");
fprintf(stderr, " /+nnn to sort on column nnn\n");
fprintf(stderr, " < INFILE to specify input file\n");
fprintf(stderr, " > OUTFILE to specify output file\n");
exit(1);
}
/* ----------------------------------------------------------------
* Parse Arguments from Command Line
* args:
* argc number of args on command line
* argv vector of argument strings
* res:
* Exit if cannot parse arguments
*
* Load global data and flags
* ----------------------------------------------------------------
*/
void ParseArgs(int argc, char *argv[])
{
int i;
// defaults:
SortColumn = 1;
Ascending = TRUE;
// parse command line
if (argc == 1) return; // no args, just program name
if (argc > 3) {
PrintUsage();
}
for (i=1; i<argc; i++) {
if (stricmp (argv[i], "/R") == 0) {
Ascending = FALSE;
continue;
}
if ((argv[i][0] == '/') && argv[i][1] == '+') {
SortColumn = atoi( argv[i] + 2);
continue;
}
PrintUsage();
}
if (SortColumn < 1) PrintUsage();
}
/* ----------------------------------------------------------------
* AllocateRawArray
* ----------------------------------------------------------------
*/
void AllocateRawArray(int which)
{
#ifndef USEREAL
struct DPMImemory_t MemStruc;
#endif
if (which >= NumRawArrays) {
fprintf(stderr, "Allocate Raw Array: exceeded internal limit after %lu records, exiting\n", NumRecs.L/4);
exit(1);
}
// For now, we always allocate 64K-1 byte arrays
#define RAWSIZE ((uShort) 65535)
#ifdef USEREAL
if ((AddrRawArrays[which] = farmalloc(RAWSIZE)) == 0) {
#else
if ((AddrRawArrays[which] = DPMImalloc(RAWSIZE, &MemStruc)) == 0) {
#endif
fprintf(stderr, "Allocate Raw Array: Out of room after %lu records, exiting\n", NumRecs.L/4);
exit(1);
}
FreeRawArrays[which] = 0;
SizeRawArrays[which] = RAWSIZE;
}
/* ----------------------------------------------------------------
* Initialize Arrays
* ----------------------------------------------------------------
*/
void InitializeArrays()
{
int i;
NumRecs.L = 0;
for (i=0; i<NumAA; i++) AA[i] = FARNULL;
for (i=0; i<NumRawArrays; i++) {
SizeRawArrays[i] = 0;
FreeRawArrays[i] = 0;
}
AllocateRawArray(0);
}
/* ----------------------------------------------------------------
* Store Pointer
* ----------------------------------------------------------------
*/
void StorePointer(CFPTR ptr)
{
#ifndef USEREAL
struct DPMImemory_t MemStruc;
#endif
CFPTRP PtrArray;
PtrArray = AA[NumRecs.W.anum];
if (PtrArray == FARNULL) {
#ifdef USEREAL
PtrArray = farmalloc(65536);
#else
PtrArray = DPMImalloc(0, &MemStruc);
#endif
AA[NumRecs.W.anum] = PtrArray;
if (PtrArray == FARNULL) {
fprintf(stderr, "Store Pointer: Out of room after %ld records, exiting\n", NumRecs.L / 4);
exit(1);
}
}
/* store the pointer at index indicated by NumRecs */
*((CFPTRP) ( ((CFPTR) AA[NumRecs.W.anum]) + NumRecs.W.aidx)) = ptr;
}
/* ----------------------------------------------------------------
* Read Next Record
* args:
* none
* ret:
* FALSE if end of file
* otherwise, new record is loaded with input data
* ----------------------------------------------------------------
*/
int ReadNextRecord()
{
char far *ptr; // pointer to store text
char far *lenptr; // pointer to length field
int len = 0; // accumulated length
int c; // input character
if (SizeRawArrays[CurrentRawArray] - FreeRawArrays[CurrentRawArray] < 256)
AllocateRawArray(++CurrentRawArray);
ptr = AddrRawArrays[CurrentRawArray] + FreeRawArrays[CurrentRawArray];
lenptr = ptr++; // remember string start, and advance
while (TRUE) {
c = getchar();
if (c == EOF) break;
if (++len > 254) {
fprintf(stderr, "Input line too long at %lu, exiting\n", NumRecs.L/4);
exit(1);
}
*ptr++ = c;
if (c == '\n') break;
}
*ptr++ = 0; // terminate with null
if (len != 0) {
*ptr = 0; // mark (len) next string empty
*lenptr = len; // record size of this string
FreeRawArrays[CurrentRawArray] += (len + 3); // record number of bytes consumed
StorePointer(lenptr); // store the pointer in the database
return TRUE;
}
else return FALSE; // no string parsed
}
/* ----------------------------------------------------------------
* Read Data
* ----------------------------------------------------------------
*/
void ReadData()
{
while (!feof(stdin)) {
if (ReadNextRecord()) NumRecs.L += 4;
}
}
/* ----------------------------------------------------------------
* Write Data
* ----------------------------------------------------------------
*/
void WriteData()
{
Index Idx;
CFPTR Ptr;
int len;
for (Idx.L = 0; Idx.L < NumRecs.L; Idx.L += 4) {
Ptr = *((CFPTRP) ( ((CFPTR) AA[Idx.W.anum]) + Idx.W.aidx));
len = *Ptr++;
while (len-- > 0) putchar(*Ptr++); // write counted string
}
fflush(stdout); // because we don't go through cleanup code
}
#ifndef USEREAL
/* ----------------------------------------------------------------
* Enter protected mode
* ----------------------------------------------------------------
*/
void DPMIGoProtectedMode()
{
uShort flags;
uChar processor;
uChar majorVersion;
uChar minorVersion;
uShort nPrivateDataParas;
void (far* entryAddress)();
int Result;
Result = DPMIObtainSwitchEntryPoint(&flags, &processor, &majorVersion,
&minorVersion, &nPrivateDataParas, &entryAddress);
if (Result != 0) {
fprintf(stderr, "No DPMI host found, exiting\n");
exit(Result);
}
Result = DPMIEnterProtectedMode(entryAddress, 0,
nPrivateDataParas);
if (Result != 0) {
fprintf(stderr, "Error attempting to enter protected mode, exiting\n");
exit(Result);
}
}
#endif
/* ----------------------------------------------------------------
* Compare record referenced by idx1, idx2
* ret:
* =0 if idx1 = idx2
* >0 if idx1 > idx2
* <0 if idx1 < idx2
*
* Note: this comparison is CASE SENSITIVE!!!
*
* ----------------------------------------------------------------
*/
int CMP(Index idx1, Index idx2)
{
CFPTR ptr1;
CFPTR ptr2;
int len;
int result;
NumComps++;
ptr1 = *((CFPTRP) ( ((CFPTR) AA[idx1.W.anum]) + idx1.W.aidx));
ptr2 = *((CFPTRP) ( ((CFPTR) AA[idx2.W.anum]) + idx2.W.aidx));
len = min(*ptr1, *ptr2);
ptr1 += min(*ptr1, SortColumn);
ptr2 += min(*ptr2, SortColumn);
/*
* Code the inner loop of this comparison in assembler for speed.
* This is not strictly necessary, of course, but the performance
* improvement is dramatic.
*/
_asm mov dx, ds
_asm les di, ptr1
_asm lds si, ptr2
_asm mov cx, len
_asm cld
_asm repe cmpsb
_asm mov al, [si-1]
_asm mov bl, es:[di-1]
_asm xor ah, ah
_asm mov bh, ah
_asm sub ax, bx
_asm mov ds, dx
_asm mov result, ax
return (Ascending ? -result : result);
}
void Swap(Index idx1, Index idx2)
{
CFPTR temp; // temp string pointer
CFPTRP p1; // point to idx1 string pointer
CFPTRP p2; // point to idx2 string pointer
NumSwaps++;
p1 = (CFPTRP) (((CFPTR) AA[idx1.W.anum]) + idx1.W.aidx);
p2 = (CFPTRP) (((CFPTR) AA[idx2.W.anum]) + idx2.W.aidx);
temp = *p1;
*p1 = *p2;
*p2 = temp;
}
/* ----------------------------------------------------------------
*
* QuickSort works by picking a "pivot", then dividing the data
* into records less than the pivot and records greater than the pivot.
*
* Then, call Quicksort recursively - once to sort the data below
* the pivot, and once to sort the data above the pivot.
*
* If there are only 2 elements in the range, sort them and return
* from the recursion.
*
* ----------------------------------------------------------------
*/
void Quicksort(Index Low, Index High)
{
Index Up; // Up starts at the bottom of the range and walks up
Index Down; // Down starts at the top of the range and walks down
Index Temp1, Temp2;
/* called with bad argument? */
if ( Low.L >= High.L) return;
/* if only two elements, swap if needed to get them in order */
if ( (High.L - Low.L) == 4 ) {
if ( CMP(Low, High) > 0 )
Swap(Low, High );
return;
}
/* if only three elements, swap as needed */
if ( (High.L - Low.L) == 8 ) {
Temp1.L = Low.L + 4;
Temp2.L = Low.L + 8;
if ( CMP(Low, Temp1) > 0) Swap(Low, Temp1);
if ( CMP(Low, Temp2) > 0) Swap(Low, Temp2);
if ( CMP(Temp1, Temp2) > 0) Swap(Temp1, Temp2);
return;
}
/* if more than three elements, Quicksort them */
do {
/* Move in from both sides towards the pivot element */
Up = Low;
Down = High;
while( (Up.L < Down.L) && (CMP(Up, High) <= 0))
Up.L += 4;
while( (Down.L > Up.L) && (CMP(Down, High) >= 0))
Down.L -= 4;
/* If we haven't reached the pivot, it means that two
* elements on either side are out of order, so swap them.
*/
if( Up.L < Down.L ) Swap( Up, Down );
} while ( Up.L < Down.L );
/* Move pivot element back to its proper place in the array. */
Swap( Up, High );
/* call for partitions above and below the pivot */
/* UNLESS the partition in question contains < 2 elements */
/* if lower partition contains 0 or 1 elements, just sort upper */
if (Up.L <= Low.L+4) {
Up.L += 4;
Quicksort(Up, High);
return;
}
/* if higher partition contains 0 or 1 elements, just sort lower */
if (Up.L >= High.L-4) {
Up.L -= 4;
Quicksort(Low, Up);
return;
}
/* Sort smaller partition first to conserve recursion stack */
if( (Up.L - Low.L) < (High.L - Up.L) ) {
Up.L -= 4;
Quicksort( Low, Up );
Up.L += 8;
Quicksort( Up , High );
}
else {
Up.L += 4;
Quicksort( Up, High );
Up.L -= 8;
Quicksort( Low, Up );
}
}
/* ----------------------------------------------------------------
* Main
* ----------------------------------------------------------------
*/
void main(int argc, char *argv[])
{
Index zero;
uLong secs; // elapsed time in seconds
#ifndef USEREAL
DPMIGoProtectedMode();
#endif
#ifdef USEREAL
fprintf(stderr, "QSORT running in real mode\n");
#else
fprintf(stderr, "QSORT running in protected mode\n");
#endif
ParseArgs(argc, argv);
InitializeArrays();
ReadData();
fprintf(stderr, "Sorting %lu records\n", NumRecs.L/4);
if (NumRecs.L > 1) {
NumRecs.L -= 4;
zero.L = 0;
Quicksort(zero, NumRecs);
NumRecs.L += 4;
}
WriteData();
fprintf(stderr, "Comps: %lu Swaps: %lu\n", NumComps, NumSwaps);
}