home *** CD-ROM | disk | FTP | other *** search
- /*
- *******************************************************************************
- * *
- * COPYRIGHT: *
- * (C) Copyright Taligent, Inc., 1997 *
- * (C) Copyright International Business Machines Corporation, 1997-1999 *
- * Licensed Material - Program-Property of IBM - All Rights Reserved. *
- * US Government Users Restricted Rights - Use, duplication, or disclosure *
- * restricted by GSA ADP Schedule Contract with IBM Corp. *
- * *
- *******************************************************************************
- *
- * File uhash.c
- *
- * Modification History:
- *
- * Date Name Description
- * 03/12/99 stephen Creation.
- *******************************************************************************
- */
-
- #include "uhash.h"
- #include "ustring.h"
- #include "cmemory.h"
-
- /* Private function prototypes */
- void
- uhash_initialize(UHashtable *hash,
- int32_t primeIndex,
- UErrorCode *status);
-
- int32_t
- uhash_leastGreaterPrimeIndex(int32_t source);
-
- void
- uhash_rehash(UHashtable *hash,
- UErrorCode *status);
-
- void
- uhash_putInternal(UHashtable *hash,
- int32_t hashCode,
- void *value);
-
- int32_t
- uhash_find(const UHashtable *hash,
- int32_t hashCode);
-
-
-
- /*
- INVARIANT: the size of the table MUST be prime for this algorithm to work!
- Prime table can be tuned for different performance/storage characteristics
- We avoid computing primes by precomputing a table that we use.
- */
- int32_t UHASH_PRIMES [] =
- {
- 17, 37, 67, 131, 257,
- 521, 1031, 2053, 4099, 8209, 16411, 32771, 65537,
- 131101, 262147, 524309, 1048583, 2097169, 4194319, 8388617, 16777259,
- 33554467, 67108879, 134217757, 268435459, 536870923, 1073741827, 2147483647
- };
-
- #define UHASH_PRIMES_LENGTH 28
-
- #define UHASH_SLOT_DELETED ((int32_t) 0x80000000)
- #define UHASH_SLOT_EMPTY ((int32_t) UHASH_SLOT_DELETED + 1)
- #define UHASH_MAX_UNUSED ((int32_t) UHASH_SLOT_EMPTY)
-
- /*
- INVARIANTS:
- DELETED <= MAX_UNUSED
- EMPTY <= MAX_UNUSED
- Any hash > MAX_UNUSED*
- * hashcodes may not start out this way, but internally,
- they are adjusted so that they are always positive, and this is always true.
- Note here that we are assuming 32-bit ints.
- */
-
- U_CAPI UHashtable*
- uhash_open(UHashFunction func,
- UErrorCode *status)
- {
- UHashtable* myUHT = uhash_openSize(func, 3, status);
- if (U_SUCCESS(*status)) myUHT->isGrowable = TRUE;
-
- return myUHT;
- }
-
- U_CAPI UHashtable*
- uhash_openSize(UHashFunction func,
- int32_t size,
- UErrorCode *status)
- {
- UHashtable *result;
-
- if(U_FAILURE(*status)) return NULL;
-
- result = (UHashtable*) icu_malloc(sizeof(UHashtable));
- if(result == 0) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- result->highWaterFactor = 0.5F;
- result->lowWaterFactor = 0.0F;
- result->hashFunction = func;
- result->valueDelete = NULL;
- result->toBeDeleted = NULL;
- result->toBeDeletedCount = 0;
- result->isGrowable = FALSE;
-
- uhash_initialize(result, uhash_leastGreaterPrimeIndex(size), status);
-
- if(U_FAILURE(*status)) {
- icu_free(result);
- return 0;
- }
-
- return result;
- }
-
- U_CAPI void
- uhash_setValueDeleter(UHashtable *hash, ValueDeleter del )
- {
- hash->valueDelete = del;
- }
-
- U_CAPI void
- uhash_close(UHashtable *hash)
- {
- if (hash->valueDelete)
- {
- ValueDeleter my_free = hash->valueDelete;
- void** vals = hash->values;
- void** toBeDeleted = hash->toBeDeleted;
- int32_t i;
- int32_t count = hash->count;
- int32_t toBeDeletedCount = hash->toBeDeletedCount;
- for (i = 0; i < count; i++) my_free(vals[i]);
- while (toBeDeletedCount--) my_free(toBeDeleted[toBeDeletedCount]);
-
- }
- icu_free(hash->values);
- icu_free(hash->hashes);
- icu_free(hash->toBeDeleted);
- }
-
- U_CAPI int32_t
- uhash_size(const UHashtable *hash)
- {
- return hash->count;
- }
- U_CAPI int32_t
- uhash_putKey(UHashtable *hash,
- int32_t valueKey,
- void *value,
- UErrorCode *status)
- {
- /* Put finds the position in the table for the new value. */
-
- int32_t hashCode;
- int32_t index;
-
- if(U_FAILURE(*status)) return UHASH_INVALID;
-
- if(hash->count > hash->highWaterMark) {
- if (hash->isGrowable) uhash_rehash(hash, status);
- else {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return UHASH_INVALID;
- }
- }
-
- hashCode = valueKey;
- index = uhash_find(hash, hashCode);
-
- /* deleted or empty */
- if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
- /* make new object */
- hash->hashes[index] = hashCode;
- ++(hash->count);
- }
-
- /* delete old value? */
- if (hash->valueDelete)
- {
- void * result = hash->values[index];
- if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
- {
- hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
- hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
- }
- hash->values[index] = 0;
- }
-
-
- /* store value */
- hash->values[index] = value;
-
- /* return the hash code to the user */
- return hashCode;
- }
-
- U_CAPI int32_t
- uhash_put(UHashtable *hash,
- void *value,
- UErrorCode *status)
- {
- /* Put finds the position in the table for the new value. */
-
- int32_t hashCode;
- int32_t index;
-
- if(U_FAILURE(*status)) return UHASH_INVALID;
-
- if(hash->count > hash->highWaterMark) {
- if (hash->isGrowable) uhash_rehash(hash, status);
- else {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return UHASH_INVALID;
- }
- }
-
- hashCode = (hash->hashFunction)(value);
- index = uhash_find(hash, hashCode);
-
- /* deleted or empty */
- if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
- /* make new object */
- hash->hashes[index] = hashCode;
- ++(hash->count);
- }
-
- /* delete old value? */
- if (hash->valueDelete)
- {
- void* result = hash->values[index];
- if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
- {
- hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted,
- sizeof(void*)*(++(hash->toBeDeletedCount)));
- hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
- }
- hash->values[index] = 0;
- }
-
-
- /* store value */
- hash->values[index] = value;
-
- /* return the hash code to the user */
- return hashCode;
- }
-
- U_CAPI void*
- uhash_get(const UHashtable *hash,
- int32_t key)
- {
- /* srl: Shouldn't we check to see if hash->hashes[uhash_find(hash, key)] == UHASH_SLOT_DELETED ?
- Perhaps in theory hash->values[...] should have been set to 0, but can we depend
- on this?
- */
-
- void *result = hash->values[uhash_find(hash, key)];
- return result;
- }
-
- U_CAPI void*
- uhash_remove(UHashtable *hash,
- int32_t key,
- UErrorCode *status)
- {
- /*
- First find the position of the key in the table
- If the object has not be removed already, remove it.
- We have to put a special value in that position that means that
- something has been deleted, since when we do a find,
- we have to continue PAST any deleted values
- */
- int32_t index = uhash_find(hash, key);
- void *result = 0;
-
- /* neither deleted nor empty */
- if(hash->hashes[index] > UHASH_MAX_UNUSED) {
- /* set to deleted */
- hash->hashes[index] = UHASH_SLOT_DELETED;
- result = hash->values[index];
-
- /* delete old value? */
- if (hash->valueDelete)
- {
- hash->valueDelete(result);
- }
- hash->values[index] = 0; /* srl .. always null out the value even if there's no deletor!! */
-
- --(hash->count);
-
- if(hash->count < hash->lowWaterMark) {
- uhash_rehash(hash, status);
- }
- }
-
- return result;
- }
-
- U_CAPI void*
- uhash_nextElement(const UHashtable *hash,
- int32_t *pos)
- {
- /*
- Walk through the array until you find an element that is not EMPTY and
- not DELETED
- */
-
- int32_t i;
- void *value;
-
- for(i = *pos + 1; i < hash->length; ++i) {
- if(hash->hashes[i] > UHASH_MAX_UNUSED) {
- *pos = i;
- value = hash->values[i];
- return value;
- }
- }
-
- /* No more elements */
- return 0;
- }
-
- /* ================================================== */
- /* Private functions */
- /* ================================================== */
- void
- uhash_initialize(UHashtable *hash,
- int32_t primeIndex,
- UErrorCode *status)
- {
- int32_t i;
-
- if(U_FAILURE(*status)) return;
-
- if(primeIndex < 0) {
- primeIndex = 0;
- }
- else if(primeIndex >= UHASH_PRIMES_LENGTH) {
- primeIndex = UHASH_PRIMES_LENGTH - 1;
- }
-
- hash->primeIndex = primeIndex;
- hash->length = UHASH_PRIMES[primeIndex];
-
- hash->values = (void**) icu_malloc(sizeof(void*) * hash->length);
- if(hash->values == 0) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- hash->hashes = (int32_t*) icu_malloc(sizeof(int32_t) * hash->length);
- if(hash->values == 0) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- icu_free(hash->values);
- return;
- }
-
- for(i = 0; i < hash->length; ++i) {
- hash->hashes[i] = UHASH_SLOT_EMPTY;
- hash->values[i] = 0;
- }
-
- hash->count = 0;
- hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterFactor);
- hash->highWaterMark = (int32_t)(hash->length * hash->highWaterFactor);
- }
-
- int32_t
- uhash_leastGreaterPrimeIndex(int32_t source)
- {
- int32_t i;
- for(i = 0; i < UHASH_PRIMES_LENGTH; ++i) {
- if(source < UHASH_PRIMES[i]) {
- break;
- }
- }
- return (i == 0 ? 0 : (i - 1));
- }
-
- void
- uhash_rehash(UHashtable *hash,
- UErrorCode *status)
- {
- /*
- Rebuild the table from the start. This clears out deadwood, in case
- we have a lot of deleted values. See Find
- It is also used when the table grows or shrinks a lot.
- INVARIANT: The size of the table MUST be prime for this algorithm to work!
- */
-
- void **oldValues = hash->values;
- int32_t *oldHashList = hash->hashes;
- int32_t old_length = hash->length;
- int32_t newPrimeIndex = hash->primeIndex;
- int32_t i;
-
- if(U_FAILURE(*status)) return;
-
- if(hash->count > hash->highWaterMark) {
- ++newPrimeIndex;
- }
- else if(hash->count < hash->lowWaterMark) {
- newPrimeIndex -= 2;
- }
-
- uhash_initialize(hash, newPrimeIndex, status);
- for(i = old_length - 1; i >= 0; --i) {
- void *value = oldValues[i];
- if(value != 0) {
- uhash_putInternal(hash, oldHashList[i], value);
- }
- }
-
- icu_free(oldValues);
- icu_free(oldHashList);
- }
-
- void
- uhash_putInternal(UHashtable *hash,
- int32_t hashCode,
- void *value)
- {
- int32_t index = uhash_find(hash, hashCode);
- if(hash->hashes[index] <= UHASH_MAX_UNUSED) {
- /* deleted or empty */
- hash->hashes[index] = hashCode;
- ++(hash->count);
- }
-
- /* reset value */
- hash->values[index] = value;
- }
-
- int32_t
- uhash_find(const UHashtable *hash,
- int32_t hashCode)
- {
- /*
- This is the key routine. It looks for a particular key in the following
- way. First find the start position, which is basically the key modulo the
- length. Test it to see if it is
- a. Identical (same hash values)
- b. Deleted
- c. Empty
- Stop if it is identical or empty, otherwise continue by adding a "jump"
- value (moduloing by the length again to keep it within range) and
- retesting. For efficiency, it needs enough empty values so that the
- searches stop within a reasonable amount of time. This can be changed by
- changing the high/low water marks.
- INVARIANT: the size of the table MUST be prime for this algorithm to work!
- */
-
- int32_t firstDeleted = -1;
- int32_t index = (hashCode ^ 0x4000000) % hash->length;
- int32_t jump = 0;
-
- while(TRUE) {
- int32_t tableHash = hash->hashes[index];
-
- /* Compare hash codes */
- if(tableHash == hashCode) {
- return index;
- }
-
- /* neither correct nor unused */
- else if(tableHash > UHASH_MAX_UNUSED) {
- /* ignore */
- }
-
- /* empty, end o' the line */
- else if(tableHash == UHASH_SLOT_EMPTY) {
- if(firstDeleted >= 0) {
- /* reset if had deleted slot */
- index = firstDeleted;
- }
- return index;
- }
-
- /* remember first deleted */
- else if(firstDeleted < 0) {
- firstDeleted = index;
- }
-
- /* lazy compute jump */
- if(jump == 0) {
- jump = (hashCode % (hash->length - 1)) + 1;
- }
-
- index = (index + jump) % hash->length;
- }
-
- /* This never happens -- just make the compiler happy */
- return -1;
- }
-
- /* Predefined hash functions */
-
- U_CAPI int32_t
- uhash_hashUString(const void *parm)
- {
- const UChar *key = (const UChar*) parm;
- int32_t len = u_strlen(key);
- int32_t hash = UHASH_INVALID;
- const UChar *limit = key + len;
- int32_t inc = (len >= 128 ? len/64 : 1);
-
- /*
- We compute the hash by iterating sparsely over 64 (at most) characters
- spaced evenly through the string. For each character, we multiply the
- previous hash value by a prime number and add the new character in,
- in the manner of a additive linear congruential random number generator,
- thus producing a pseudorandom deterministic value which should be well
- distributed over the output range. [LIU]
- */
-
- while(key < limit) {
- hash = (hash * 37) + *key;
- key += inc;
- }
-
- if(hash == UHASH_INVALID)
- hash = UHASH_EMPTY;
-
- return hash & 0x7FFFFFFF;
- }
-
- U_CAPI int32_t
- uhash_hashString(const void *parm)
- {
- const char *key = (const char*) parm;
- int32_t len = strlen(key);
- int32_t hash = UHASH_INVALID;
- const char *limit = key + len;
- int32_t inc = (len >= 128 ? len/64 : 1);
-
- /*
- We compute the hash by iterating sparsely over 64 (at most) characters
- spaced evenly through the string. For each character, we multiply the
- previous hash value by a prime number and add the new character in,
- in the manner of a additive linear congruential random number generator,
- thus producing a pseudorandom deterministic value which should be well
- distributed over the output range. [LIU]
- */
-
- while(key < limit) {
- hash = (hash * 37) + *key;
- key += inc;
- }
-
- if(hash == UHASH_INVALID)
- hash = UHASH_EMPTY;
-
- return hash & 0x7FFFFFFF;
- }
-
- U_CAPI int32_t
- uhash_hashLong(const void *parm)
- {
- int32_t hash = (int32_t) parm;
- return (int32_t) (hash & 0x7FFFFFFF);
- }
-