ftptest.leeds.ac.uk

home *** CD-ROM | disk | FTP | other *** search

/ ftptest.leeds.ac.uk / 2015.02.ftptest.leeds.ac.uk.tar / ftptest.leeds.ac.uk / bionet / CAE-GROUP / SCL-WIN3x / FED_PLUS.EXE / HASH.C < prev next >

Wrap

C/C++ Source or Header | 1994-07-23 | 14KB | 549 lines

static char rcsid[] = "$Id: hash.c,v 1.6 1994/03/23 20:06:51 libes Exp $"; /* * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson. * Coded into C, with minor code improvements, and with hsearch(3) interface, * by ejp@ausmelb.oz, Jul 26, 1988: 13:16; * also, hcreate/hdestroy routines added to simulate hsearch(3). * * move to local hash table, cleanup abstraction and storage allocation; * convert to ANSII C. * by snc (clark@cme.nbs.gov), 10-Mar-1989 * * these routines simulate hsearch(3) and family, with the important * difference that the hash table is dynamic - can grow indefinitely * beyond its original size (as supplied to hcreate()). * * Performance appears to be comparable to that of hsearch(3). * The 'source-code' options referred to in hsearch(3)'s 'man' page * are not implemented; otherwise functionality is identical. * * Compilation controls: * HASH_DEBUG controls some informative traces, mainly for debugging. * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained; * when combined with HASH_DEBUG, these are displayed by hdestroy(). * * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor * concatenation property, in probably unnecessary code 'optimisation'. * * snc: fixed concatenation for ANSII C, 10-Mar-1989 * * $Log: hash.c,v $ * Revision 1.6 1994/03/23 20:06:51 libes * botched free in hash destroy * * Revision 1.5 1993/10/15 18:49:55 libes * CADDETC certified * * Revision 1.4 1992/08/18 18:27:10 libes * changed DEBUG to HASH_DEBUG * * Revision 1.3 1992/08/18 17:16:22 libes * rm'd extraneous error messages * * Revision 1.2 1992/05/31 08:37:18 libes * multiple files * * Revision 1.1 1992/05/28 03:56:55 libes * Initial revision * * Revision 1.4 1992/05/14 10:15:04 libes * don't remember * * Revision 1.3 1992/02/12 07:06:49 libes * y * y * do sub/supertype * * Revision 1.2 1992/02/09 00:51:02 libes * does ref/use correctly * * Revision 1.1 1992/02/05 08:34:24 libes * Initial revision * * Revision 1.0.1.1 1992/01/22 02:40:04 libes * copied from ~pdes * * Revision 1.9 1992/01/22 02:26:58 libes * added code to test hash package * * Revision 1.8 1991/09/17 02:53:23 libes * fixed bug in HASHcopy (new hash tables were coming out empty) * * Revision 1.7 1991/08/30 16:34:36 libes * fixed HASHlist to use state from parameter instead of static * * Revision 1.6 1991/08/13 22:19:39 libes * forgot to declare s, s2, pp, and q. Oops. * * Revision 1.5 1991/08/06 19:03:07 libes * fixed bugs relating to my previous addition (delete function) * added HASHcopy to support DICT_copy via OBJcopy * * Revision 1.4 1991/07/19 03:57:46 libes * added action HASH_DELETE * made action HASH_INSERT return failure upon duplicate entry * * Revision 1.3 1991/02/26 19:40:45 libes * Added functions for visiting every member of the hash table, to duplicate * similar functionality of linked lists. Note that current implementation * can only walk through one hash table at a time. This can be improved, but * the application code doesn't currently need such a feature. * * Revision 1.2 1990/09/06 11:06:56 clark * BPR 2.1 alpha * * Revision 1.1 90/06/11 16:44:43 clark * Initial revision * */ #define HASH_C #include "hash.h" #include <string.h> /* ** Internal routines */ static_inline Address HASHhash(char*, Hash_Table); static void HASHexpand_table(Hash_Table); /* ** Local data */ # if HASH_STATISTICS static long HashAccesses, HashCollisions; # endif /* ** Code */ void HASHinitialize() { MEMinitialize(&HASH_Table_fl,sizeof(struct Hash_Table),50,50); MEMinitialize(&HASH_Element_fl,sizeof(struct Element),500,100); } Hash_Table HASHcreate(unsigned count) { int i; Hash_Table table; /* ** Adjust Count to be nearest higher power of 2, ** minimum SEGMENT_SIZE, then convert into segments. */ i = SEGMENT_SIZE; while (i < count) i <<= 1; count = DIV(i, SEGMENT_SIZE); table = HASH_Table_new(); #if 0 table->in_use = 0; #endif table->SegmentCount = table->p = table->KeyCount = 0; /* ** Allocate initial 'i' segments of buckets */ for (i = 0; i < count; i++) CALLOC(table->Directory[i], SEGMENT_SIZE, Element) /*, "segment in HASHcreate");*/ table->SegmentCount = count; table->maxp = MUL(count, SEGMENT_SIZE); table->MinLoadFactor = 1; table->MaxLoadFactor = MAX_LOAD_FACTOR; # if HASH_DEBUG fprintf(stderr, "[HASHcreate] table %x count %d maxp %d SegmentCount %d\n", table, count, table->maxp, table->SegmentCount); # endif # if HASH_STATISTICS HashAccesses = HashCollisions = 0; # endif return(table); } /* initialize pointer to beginning of hash table so we can step through it */ /* on repeated calls to HASHlist - DEL */ void HASHlistinit(Hash_Table table,HashEntry *he) { he->i = he->j = 0; he->p = 0; he->table = table; he->type = '*'; #if 0 table->in_use = 1; #endif } void HASHlistinit_by_type(Hash_Table table,HashEntry *he,char type) { he->i = he->j = 0; he->p = 0; he->table = table; he->type = type; #if 0 table->in_use = 1; #endif } #if 0 /* if you don't step to the end, you can clear the flag this way */ void HASHlistend(HashEntry *he) { he->table->in_use = 0; } #endif /* provide a way to step through the hash */ Element HASHlist(HashEntry *he) { int i2 = he->i; int j2 = he->j; Segment s; he->e = 0; for (he->i = i2; he->i < he->table->SegmentCount; he->i++) { /* test probably unnecessary */ if ((s = he->table->Directory[he->i]) != NULL) { for (he->j = j2; he->j < SEGMENT_SIZE; he->j++) { if (!he->p) he->p = s[he->j]; /* if he->p is defined, prepare to return it (by setting it to he->e) and begin looking for a new value for he->p */ retry: if (he->p) { if ((he->type != '*') && (he->type != he->p->type)) { he->p = he->p->next; goto retry; } if (he->e) return(he->e); he->e = he->p; he->p = he->p->next; } /* avoid incrementing he->j by returning here */ if (he->p) return(he->e); } j2 = 0; } } /* if he->e was set then it is last one */ #if 0 he->table->in_use = 0; #endif return(he->e); } #if 0 /* this verifies no one else is walking through the table that we might screw up */ /* it should be called before adding, deleting or destroying a table */ HASH_in_use(Hash_Table table,char *action) { fprintf(stderr,"HASH: attempted to %s but hash table in use\n",action); } #endif void HASHdestroy(Hash_Table table) { int i, j; Segment s; Element p, q; if (table != HASH_NULL) { #if 0 if (table->in_use) HASH_in_use(table,"destroy hash table"); #endif for (i = 0; i < table->SegmentCount; i++) { /* test probably unnecessary */ if ((s = table->Directory[i]) != NULL) { for (j = 0; j < SEGMENT_SIZE; j++) { p = s[j]; while (p != NULL) { q = p->next; HASH_Element_destroy(p); p = q; } } free(table->Directory[i]); } } HASH_Table_destroy(table); # if HASH_STATISTICS && HASH_DEBUG fprintf(stderr, "[hdestroy] Accesses %ld Collisions %ld\n", HashAccesses, HashCollisions); # endif } } Element HASHsearch(Hash_Table table, Element item, Action action) { Address h; Segment CurrentSegment; int SegmentIndex; int SegmentDir; Segment p; Element q; Element deleteme; assert(table != HASH_NULL); /* Kinder really than return(NULL); */ # if HASH_STATISTICS HashAccesses++; # endif h = HASHhash(item->key, table); SegmentDir = DIV(h, SEGMENT_SIZE); SegmentIndex = MOD(h, SEGMENT_SIZE); /* ** valid segment ensured by HASHhash() */ CurrentSegment = table->Directory[SegmentDir]; assert(CurrentSegment != NULL); /* bad failure if tripped */ p = CurrentSegment + SegmentIndex; q = *p; /* ** Follow collision chain ** Now and after we finish this loop ** p = &element, and ** q = element */ while (q != NULL && strcmp(q->key, item->key)) { p = &q->next; q = *p; # if HASH_STATISTICS HashCollisions++; # endif } /* at this point, we have either found the element or it doesn't exist */ switch (action) { case HASH_FIND: return((Element)q); case HASH_DELETE: if (!q) return(0); /* at this point, element exists and action == DELETE */ #if 0 if (table->in_use) HASH_in_use(table,"insert element"); #endif deleteme = q; *p = q->next; /*STRINGfree(deleteme->key);*/ HASH_Element_destroy(deleteme); --table->KeyCount; return(deleteme); /* of course, user shouldn't deref this! */ case HASH_INSERT: /* if trying to insert it (twice), let them know */ if (q != NULL) return(q); /* was return(0);!!!!!?!?! */ #if 0 if (table->in_use) HASH_in_use(table,"delete element"); #endif /* at this point, element does not exist and action == INSERT */ q = HASH_Element_new(); *p = q; /* link into chain */ /* ** Initialize new element */ /* I don't see the point of copying the key!!!! */ /* q->key = STRINGcopy(item->key);*/ q->key = item->key; q->data = item->data; q->symbol = item->symbol; q->type = item->type; q->next = NULL; /* ** table over-full? */ if (++table->KeyCount / MUL(table->SegmentCount, SEGMENT_SIZE) > table->MaxLoadFactor) HASHexpand_table(table); /* doesn't affect q */ } return((Element)0); /* was return (Element)q */ } /* ** Internal routines */ static_inline Address HASHhash(char* Key, Hash_Table table) { Address h, address; register unsigned char *k = (unsigned char*)Key; h = 0; /* ** Convert string to integer */ /*SUPPRESS 112*/ while (*k) /*SUPPRESS 8*/ /*SUPPRESS 112*/ h = h*PRIME1 ^ (*k++ - ' '); h %= PRIME2; address = MOD(h, table->maxp); if (address < table->p) address = MOD(h, (table->maxp << 1)); /* h % (2*table->maxp) */ return(address); } static void HASHexpand_table(Hash_Table table) { Address NewAddress; int OldSegmentIndex, NewSegmentIndex; int OldSegmentDir, NewSegmentDir; Segment OldSegment, NewSegment; Element Current, *Previous, *LastOfNew; if (table->maxp + table->p < MUL(DIRECTORY_SIZE, SEGMENT_SIZE)) { /* ** Locate the bucket to be split */ OldSegmentDir = DIV(table->p, SEGMENT_SIZE); OldSegment = table->Directory[OldSegmentDir]; OldSegmentIndex = MOD(table->p, SEGMENT_SIZE); /* ** Expand address space; if necessary create a new segment */ NewAddress = table->maxp + table->p; NewSegmentDir = DIV(NewAddress, SEGMENT_SIZE); NewSegmentIndex = MOD(NewAddress, SEGMENT_SIZE); if (NewSegmentIndex == 0) CALLOC(table->Directory[NewSegmentDir], SEGMENT_SIZE, Element); /* "segment in HASHexpand_table");*/ NewSegment = table->Directory[NewSegmentDir]; /* ** Adjust state variables */ table->p++; if (table->p == table->maxp) { table->maxp <<= 1; /* table->maxp *= 2 */ table->p = 0; } table->SegmentCount++; /* ** Relocate records to the new bucket */ Previous = &OldSegment[OldSegmentIndex]; Current = *Previous; LastOfNew = &NewSegment[NewSegmentIndex]; *LastOfNew = NULL; while (Current != NULL) { if (HASHhash(Current->key, table) == NewAddress) { /* ** Attach it to the end of the new chain */ *LastOfNew = Current; /* ** Remove it from old chain */ *Previous = Current->next; LastOfNew = &Current->next; Current = Current->next; *LastOfNew = NULL; } else { /* ** leave it on the old chain */ Previous = &Current->next; Current = Current->next; } } } } /* make a complete copy of a hash table */ /* Note that individual objects are shallow-copied. OBJcopy is not called! */ /* But then, it isn't called when objects are inserted/deleted so this seems */ /* reasonable - DEL */ Hash_Table HASHcopy(Hash_Table oldtable) { Hash_Table newtable; Segment s, s2; Element *pp; /* old element */ Element *qq; /* new element */ int i, j; newtable = HASH_Table_new(); for (i = 0; i < oldtable->SegmentCount; i++) { CALLOC(newtable->Directory[i], SEGMENT_SIZE, Element); /* "segment in HASHcopy");*/ } newtable->p = oldtable->p; newtable->SegmentCount = oldtable->SegmentCount; newtable->maxp = oldtable->maxp; newtable->MinLoadFactor = oldtable->MinLoadFactor; newtable->MaxLoadFactor = oldtable->MaxLoadFactor; newtable->KeyCount = oldtable->KeyCount; for (i=0; i<oldtable->SegmentCount; i++) { /* test probably unnecessary */ if ((s = oldtable->Directory[i]) != NULL) { s2 = newtable->Directory[i]; for (j = 0; j < SEGMENT_SIZE; j++) { qq = &s2[j]; for (pp = &s[j];*pp;pp = &(*pp)->next) { *qq = HASH_Element_new(); /* (*qq)->key = STRINGcopy((*pp)->key);*/ /* I really doubt it is necessary to copy the key!!! */ (*qq)->key = (*pp)->key; (*qq)->data = (*pp)->data; (*qq)->symbol = (*pp)->symbol; (*qq)->type = (*pp)->type; (*qq)->next = NULL; qq = &((*qq)->next); } } } } return(newtable); } /* following code is for testing hash package */ #ifdef HASHTEST struct Element e1, e2, e3, *e; struct Hash_Table *t; HashEntry he; main() { e1.key = "foo"; e1.data = (char *)1; e2.key = "bar"; e2.data = (char *)2; e3.key = "herschel"; e3.data = (char *)3; t = HASHcreate(100); e = HASHsearch(t,&e1,HASH_INSERT); e = HASHsearch(t,&e2,HASH_INSERT); e = HASHsearch(t,&e3,HASH_INSERT); HASHlistinit(t,&he); for (;;) { e = HASHlist(&he); if (!e) exit(0); printf("found key %s, data %d\n",e->key,(int)e->data); } } #endif