home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.barnyard.co.uk
/
2015.02.ftp.barnyard.co.uk.tar
/
ftp.barnyard.co.uk
/
cpm
/
walnut-creek-CDROM
/
CPM
/
PROGRAMS
/
SORT
/
B-SORT.ARK
/
B-SORT.C
next >
Wrap
C/C++ Source or Header
|
1983-09-09
|
12KB
|
433 lines
/*
B-SORT a sorting program
This is a self-contained file except for the standard included
definitions and is part of the larger development of a message-
retrieval system. Since the algorithms involved are well known
it is felt that putting this program in public domain would serve
a useful purpose, namely provide some examples of some programming
techniques using the 'C' language. The compiler used is the BDS
vs 1.46 and use is made of at least one special feature of this
compiler(see 'printf' function).
Nominally this is a sorting program and can certainly be used for
that purpose. However, since the main benefit of using a B-tree is
when using disk-based structures, it is quite possible that this
will not be your hot-dog sort utility. It is relatively fast though
and I would be interested in any comparisons anyone makes. A later
effort is planned to build a virtual memory support package which
will then allow the 'nodes' of the b-tree to be stored as disk
sectors and at that point this program will allow sorting of
arbitrarily large text files. Again, that is still not the main
objective for it, information retrieval is.
I will not attempt to give even an introduction to B-trees here. My
primary reference and, in fact, the bareface source of this program
is N. Wirth's wonderful book "Algorithms+Data Structures=Programs".
I passed this book up several times in the past, but when I was
looking for a treatment of this subject, I found that it had a very
nice balance between explanation and example(the best way I learn)
and since 'C' and PASCAL are 'kissin cousins', it was 'straight
forward' to do the transcribing. So go to god and read section
4.51 if you want to understand what is going on here. Also you can
get a little contrast of the styles which result from the different
features of the two languages(especially w re pointer variables).
Things to note in this program which you may not already be
familiar with are the following:
1) A nested data structure(PAGE) is used. This is possible
in PASCAL and PL/I too and makes life a lot more pleasant.
Try doing the same thing in BASIC or FORTRAN, what a
mess! In a sense it would be easier in assembler.
2) Note that the tree structure used is a balanced one
so that no single branch gets long at the expense of
others. To see the depth level of the tree, turn on
the SPRINT parameter and note the first column of the
output on any sort. There is a trade-off here, though
in that it takes longer to build the tree than if it
were just a binary tree(not an AVL though). Thats why
b-trees are best for retrieval rather than simple
sorts.
3) The logic uses a rather interesting recursive structure
in which 'emerging' items are handed back through higher
levels until perhaps to the root of the tree. See the
variable called 'u' in function 'search'.
4) The parameter KEYPTR allows the option of including the
string storage area in the nodes themselves as opposed
to allocated areas. The latter is faster since no move-
ment of strings is necessary after they have been given
initial allocations. However, for disk-based use the
strings(keys) would need to be in the nodes. To see the
difference in performance(big!) just undefine(comment
out) this parameter.
5) For further perfomance comparison play with the FAST
parameter. If undefined, the compiler will build a
version of B-sort with a high-level version of two
block move operations. These same functions are imple-
mented in assembler code in the file BDSNEW.CSM. By
linking with that special library, you can get a version
of this program which runs quite a bit faster. Let me
know if you get any amazing results.
6) One peculiarity of the 'C' language came up in an earlier
version of B-sort, it would not sort itself! Clue, this
had to do with the way that 'printf' works. See if you
can figure out how this happened and how it was fixed.
I hope this program is useful for some in learning more about the
'C' language and an interesting algorithm(the key to better software).
If you make any improvements give me a copy. If you mutilate it, keep
it to yourself.
Jack Riley (303) 499-9169 RCPM, Boulder, CO.
*/
#include "bdscio1.h" /* version with ALLOC_ON true */
#DEFINE N 2 /* half-size of b-tree node */
/* options for customizing the program */
#DEFINE FAST /* uses external 'move' functions for assemlber vs */
/* #DEFINE SPRINT */ /* provides statistics on keys in output */
#DEFINE KEYPTR /* uses pointer references to strings instead of
arrays in the b-tree nodes */
/* #DEFINE DIAGNOSE */ /* turns on voluminous trace information on actions
taken by program logic.. perhaps instructive */
/* special assigned values */
#DEFINE KEYSIZE 80
#DEFINE MAX_LEN 20
#DEFINE MAXPRINT 3000
/* dependent parameters */
#DEFINE NN N+N
#DEFINE NM1 N-1
#DEFINE NM2 N-2
#DEFINE NP1 N+1
/* structure definitions */
#DEFINE ITEM struct sitem
#DEFINE PAGE struct spage
ITEM {
#ifndef KEYPTR
CHAR KEY[KEYSIZE];
#endif
#ifdef KEYPTR
CHAR *KEY;
#endif
PAGE *P;
UNSIGNED COUNT;
} oneitem;
PAGE {
UNSIGNED M;
PAGE *P0;
ITEM E[NN];
} onepage;
/* external variables */
PAGE *root,*q;
ITEM g;
FILE infile,l_buffer;
CHAR infilnam[MAX_LEN], instrg[MAXLINE], o_flg;
INT sizitem,sizpage, maxcount, nkeys, tokcount;
/* beginning of programs */
use_err() /* Usage message: */
{ printfc("\nERROR: Invalid parameter specification\n\n");
printfc("Usage: b-sort <filename> <flag(s)>\n\n");
printfc(" -o <filename> = Write output to named file\n");
exit(0); }
main(argc,argv)
int argc;
char **argv;
{
char *arg;
o_flg=FALSE; /* output file flag */
if (argc < 2) use_err();
strcpy(infilnam,*++argv);
if( fopen(infilnam,infile) == ERROR )
{ printfc("\nERROR: Unable to open input file: %s\n",infilnam);
EXIT(0); }
if(--argc > 0)
if(*(arg=*++argv) == '-')
if(tolower(*++arg) == 'o')
{ o_flg++;
if(--argc == 0)
{printfc("\nneed output file name");use_err();}
if(fcreat(*++argv,l_buffer) == ERROR)
{printfc("ERROR: Unable to create output file - %s\n",
*argv); exit(0);}
}
_allocp=NULL;
root=NULL; sizitem=sizeof(oneitem); sizpage=sizeof(onepage);
tokcount=nkeys=0;
#ifdef DIAGNOSE
printf("\n&root=%x,g=%x,sizi=%d,sizp=%d",&root,g,sizitem,sizpage);
#endif
while( fgets(instrg,infile) )
{
if( trim(instrg) <= 0 ) continue;
instrg[KEYSIZE-1]='\0';
#ifdef DIAGNOSE
printf("\n\nsearch key= %s",instrg);
#endif
if( search(instrg,root,g) )
{ q=root;
if( (root=alloc(sizpage)) == NULL)
{ printfc("\nERROR unable to allocate page");
EXIT(0); }
#ifdef DIAGNOSE
printf(" root=%x, q=%x",root,q);
#endif
root->M=1; root->P0=q; moveb(root->E[0] , g ,sizitem);
}
}
printfc("\nEnd of input\n");
printfc("\nnumber of unique keys=%d, total key count=%d\n",
nkeys,tokcount);
if(!o_flg) pause();
maxcount=MAXPRINT; printtree(root,1);
printf("\n");
if(o_flg)
{ putc(CPMEOF,l_buffer); fflush(l_buffer); fclose(l_buffer); }
}
CHAR search(x,a,v)
CHAR *x;
PAGE *a;
ITEM *v;
{
INT i,k,l,r,cmp; PAGE *q,*b; ITEM u; CHAR *t;
/* Search for key x on page a */
if(a==NULL) /* ITEM with key x is not in tree */
{
++tokcount; ++nkeys; defkey(&v->KEY,x);
#ifdef DIAGNOSE
printf("\n a == null v(=%x)->KEY=%s",v,v->KEY);
#endif
v->COUNT=1;
v->P=NULL; return (TRUE) ; /* TRUE means not found */
}
else
{ l=0; r=a->M-1; /* binary array search */
do {
k=(l+r)/2;
cmp= strcmp(x,t=(a->E[k].KEY));
#ifdef DIAGNOSE
printf("\ncmp=%d,r=%d,l=%d,a(=%x)->P0=%x/E[k=%d].P=%x/E[k].KEY=%x=%s",
cmp,r,l,a,a->P0,k,a->E[k].P,t,t );
#endif
if( cmp <= 0) r=k-1;
if( cmp >= 0) l=k+1;
} while ( r >= l );
if( cmp == 0 ) /* found it, bump counter */
{ ++tokcount; ++a->E[k].COUNT; return(FALSE); }
else /* test if item is not on this page */
{
q = ( r < 0 ) ? a->P0 : a->E[r].P;
if( !search(x,q,u) ) return(FALSE);
}
}
/* ---- insert an item */
if (a->M < NN)
{ /* page not full yet, add to it. 'Bump' items from r+1 to
M-1 */
movdnb( a->E[r+2] , a->E[r+1] , sizitem*((a->M++)-r-1) );
moveb( a->E[r+1] , u , sizitem );
return(FALSE);
}
else
{ /* page full, split it and push center item upward in tree */
if( (b = alloc(sizpage)) == NULL )
printf("\nOut of memory");
#ifdef DIAGNOSE
printf("\n\n ****** new node at %x",b);
#endif
if ( r <= NM1 ) /* put new item in old page */
{
if ( r == NM1 ) moveb( v , u , sizitem );
else
{ /* 'bump' down items from r+2 to N-1 */
moveb( v , a->E[NM1] , sizitem );
movdnb( a->E[r+2] , a->E[r+1] ,
sizitem*(NM2-r) );
moveb( a->E[r+1] , u , sizitem );
}
moveb( b->E[0] , a->E[N] , sizitem*N );
}
else
{/* move upper N items and new item to new page */
moveb( v , a->E[N] , sizitem ) ;
if( (r = r - N) > 0 )
moveb( b->E[0] , a->E[NP1] , sizitem*r );
moveb( b->E[r] , u , sizitem );
if( (i = NM1-r) > 0 )
moveb( b->E[r+1], a->E[NP1+r], sizitem * i );
}
a->M = b->M = N ; b->P0 = v->P; v->P = b ;
}
return (TRUE);
}
trim(strg)
char *strg;
{
INT l;
l=strlen(strg);
while ( strg[l] <= ' ' )
{ if( l <= 0 ) break; strg[l--]='\0'; }
return(l);
}
moveb(a,b,c)
char *a,*b;
int c;
{
int i;
#ifdef FAST /* use potentially faster move routine */
move(a,b,c); return;
#endif
#ifndef FAST
for ( i=0 ; i < c ; ++i ) a[i]=b[i] ;
#endif
}
movdnb(a,b,c)
char *a,*b;
int c;
{
int i;
#ifdef FAST /* use potentially faster move routine */
movdn(a,b,c); return;
#endif
#ifndef FAST
for ( ; --c >= 0 ; c ) a[c]=b[c] ;
#endif
}
printtree(p,l)
PAGE *p;
INT l;
{
INT i,j; CHAR *t;
if(maxcount <= 0) return;
if ( p != NULL )
{
printtree(p->P0, l+1 );
for ( i=0; i <= (j=p->M-1) ; ++i )
{ --maxcount;
printf("\n");
#ifdef SPRINT
printf(" %d %d ",l,p->E[i].COUNT );
#endif
prints(p->E[i].KEY);
printtree(p->E[i].P,l+1);
}
}
}
defkey(a,b)
char **a,*b;
{
#ifdef KEYPTR
if ( (*a=alloc(strlen(b)+1)) == NULL )
{printfc("\ninsufficient string storage in defkey\n");EXIT(0);}
strcpy(*a,b);
#endif
#ifndef KEYPTR
strcpy(a,b);
#endif
}
prints(str)
char *str;
{
if(o_flg)
fputs(str,l_buffer);
else
puts(str); /* and print out the line */
}
/* Note: The following two functions where obtained from the BDS STDLIB2.C
file and may be quite dependent on this compiler since the 'C'
language does specify where arguments are to be found. */
printfc(format)
char *format;
{
char line[MAXLINE];
_spr(line,&format); /* use "_spr" to form the output */
puts(line); /* and print out the line */
}
printf(format)
char *format;
{
char line[MAXLINE];
_spr(line,&format); /* use "_spr" to form the output */
prints(line);
}
fputs(s,iobuf)
char *s;
struct _buf *iobuf;
{
char c;
while (c = *s++) {
if (c == '\r' || c == '\0') return;
if (c == '\n') putc('\r',iobuf);
if (putc(c,iobuf) == ERROR) return ERROR;
}
return OK;
}