home *** CD-ROM | disk | FTP | other *** search
- /*
- File: OpenHash.h
-
- Contains: OpenHashTable, a hash table with "open" hashing
-
- Owned by: David McCusker
-
- Copyright: © 1995 by Apple Computer, Inc., all rights reserved.
-
-
- */
-
- #ifndef _OPENHASH_
- #define _OPENHASH_ 1
-
- #ifndef _ODTYPES_
- #include "ODTypes.h"
- #endif
-
- #ifndef _ODMEMORY_
- #include "ODMemory.h"
- #endif
-
- //==============================================================================
- // Constants
- //==============================================================================
-
- //==============================================================================
- // Scalar Types
- //==============================================================================
-
- typedef ODBoolean (*OpenHashKeyEqual)(const void* key, const void* testKey);
- typedef ODULong (*OpenHashKeyHash)(const void* key);
-
- typedef unsigned int OpenHashSize; // must be unsigned, (similar to size_t)
-
- typedef ODBoolean (*OpenHashAction)(void* key, void* value, ODULong slot, void* ref);
- /* OpenHashAction supports walking hash tables. Returning true causes
- * a walk to terminate early (false continues the walk). This allows
- * a walk to be invoked just to see if it finds something interesting.
- */
-
- //==============================================================================
- // Classes defined in this interface
- //==============================================================================
-
- class OpenHashTable;
- class OpenHashTableIterator;
-
- //==============================================================================
- // Classes used by this interface
- //==============================================================================
-
- class OpenHashEntry; // private to the implementation
- class OpenHashVectors; // private to the implementation
-
-
- //==============================================================================
- // OpenHashTable: theory of operation
- //==============================================================================
-
- /*
- OpenHashTable is a hash table that implements "open" hashing (such that
- collisions are handled by chaining together the members of a bucket). This
- permits efficient removal of hash table associations (as opposed to closed
- hashing, which might require a non-trivial amount of entry movement after
- a removal). In particular, this implementation of open hashing supports
- removal during iterations.
-
- A table is logically a set of <key, value> pairs, where no key is equal to
- another. This defines a mapping that associates keys with values.
- OpenHashTable manages the space for both keys and values; keys and values
- are copied into and out of the hash table through the interface. The
- amount of space required for each key and value is specified when the hash
- table is initialized. Values may be specified as zero-sized, causing the
- the table to allocate space for keys only. Users might want to do this
- when a key contains all the information of interest, and a value would
- be redundant. (Note that keys may *not* be zero-sized.) If users want
- to manage the memory for their own keys and values, it is simple to use
- pointers for the keys and values, and specify the size as sizeof(void*).
-
- The only requirements for keys are that they have appropriate
- OpenHashKeyEqual and OpenHashKeyHash functions, which are related to
- each other by the following invariant:
- (*equal)(key1, key2) implies (*hash)(key1) == (*hash)(key2)
-
- Assuming a full table, the storage overhead for this implementation is
- two longwords per <key, value> association. (If OPENHASH_CACHE_HASH is
- defined, another longword of overhead is added to cache the hash of keys,
- and the speed of lookup operations and changes in table size will be
- significantly increased.)
-
- This implementation uses mod (%) to calculate buckets from hashes. If
- masking (&) had been used, the table would have been constrained to sizes
- that are powers of two. Masking might be slightly faster, but causes
- poor distribution if the low bits of hashes are not very pseudo-random.
- */
-
-
- class OpenHashTable {
- public:
-
- //=============================
- // Hashing
-
- static ODULong StdHash(const void* key);
- // A standard hash function useable when keys are pointers or ints.
- // Hash is defined as the pointer cast to an integer. Note that since
- // a pointer to the key is passed, the argument type is actually
- // pointer to a pointer.
- // returns *(ODULong*) key;
-
- static ODULong Random(ODULong seed);
- // Pseudo-random number generator with period 2**31-1 (i.e. all
- // values in 1..2**31-1 are visted before repetition). Useful
- // for hashing one or more longwords. Derived from published
- // literature on good random number generators (the names
- // Park and Miller come to mind).
-
- static ODULong HashString(const ODUByte* key, ODULong keyLength);
- // This is a generic hashing utility that might sometimes be used to
- // implement a hash function passed to the OpenHashTable constructor;
- // the keyLength arg must be curried because only key is passed
- // to a hash function. HashString() is conceptually derived
- // from P.J.Weinberger (from the "dragon" book).
-
- static ODULong RandomOneLong(const void* key);
- // A standard hash function for a 4-byte key, based on Random().
- // returns Random( *(ODULong*) key );
-
- static ODULong RandomTwoLongs(const void* key);
- // A standard hash function for an 8-byte key, based on Random().
- // returns Random( ((ODULong*) key)[0] ^ ((ODULong*) key)[1] );
-
- static ODULong HashTwoLongs(const void* key);
- // A standard hash function for an 8-byte key, where the longwords
- // in the keys are likely already largish values (like pointers).
- // returns ( ((ODULong*) key)[0] ^ ((ODULong*) key)[1] );
-
- //=============================
- // Equality
-
- static ODBoolean StdEqual(const void* key, const void* testKey);
- // A standard equal function useable when keys are pointers or ints.
- // Equality is defined as pointer equality. Note that since
- // a pointer to the key is passed, the argument type is actually
- // pointer to a pointer.
- // returns ( *(void**) key == *(void**) testKey);
-
- static ODBoolean EqualTwoLongs(const void* key, const void* testKey);
- // A standard equal function for an 8-byte key. Compares the
- // first two longwords of each keys as unsigned longs.
-
- //=============================
- // Making and Breaking
-
- OpenHashTable(OpenHashKeyEqual equal = StdEqual,
- OpenHashKeyHash hash = StdHash,
- ODMemoryHeapID heap=kDefaultHeapID);
- // Compare keys with equal, and hash them with hash.
- // All memory is allocated from the specified heap.
-
- OpenHashTable(const OpenHashTable& otherTable);
- // Use the same equal, hash, and heap as otherTable.
- // (Does *not* copy otherTable; use InitializeAndCopy()
- // to actually copy the contents of otherTable.)
-
- void SetHeap(ODMemoryHeapID heap=kDefaultHeapID);
- // All memory is allocated from the specified heap.
- // This method does nothing after Initialize() has been called.
- // Generally this function is only called by hash tables delegating
- // to this one, which specify the heap in Initialize() functions
- // instead of in the constructor.
-
- ODBoolean Initialize(ODULong sizeHint=5,
- OpenHashSize keySize=sizeof(void*),
- OpenHashSize valueSize=sizeof(void*),
- ODBoolean throwErrors=kODTrue);
- // Initialize the table with sizeHint slots. Keys will occupy keySize
- // bytes of space, and this amount of space will copied in/out of
- // functions. Values will occupy valueSize bytes of space, and this
- // amount of space will copied in/out of functions. valueSize can
- // be zero, causing values to be ignored. The value of throwErrors
- // controls whether exceptions will be thrown when errors occur.
- // Note that if either keys or values have alignment requirements,
- // then keySize and valueSize must be such that keyPtr + keySize and
- // valuePtr + valueSize point to appropriately aligned structures.
- // Returns false only if memory could not be allocated. (If
- // throwErrors, throws kODErrOutOfMemory instead of returning false.)
- // You can alternatively use InitAndCopyFrom() to copy another table.
-
- ODBoolean InitAndCopyFrom(const OpenHashTable& otherTable);
- // Initialize this table so that it uses the same parameters
- // as otherTable; sizeHint is taken as otherTable.Count().
- // Then we call this->CopyTable(otherTable) to copy the contents.
- // Returns false only if memory could not be allocated. (If
- // throwErrors, throws kODErrOutOfMemory instead of returning false.)
-
- ODBoolean CopyTable(const OpenHashTable& otherTable);
- // Copy all of otherTable's entries into this table such that the
- // result is the union of the previous contents of this table and
- // otherTable. If this table was empty, the result is a simple copy.
- // If this table contains entries that are also in otherTable, they
- // are replaced with otherTable's entries (beware losing pointers
- // and creating garbage this way). Othertable should have the same
- // equal and hash functions, as well as the same key and value sizes.
- // Returns false only if memory could not be allocated. (If
- // throwErrors, throws kODErrOutOfMemory instead of returning false.)
- // The implementation is just an optimized way of iterating over
- // entries in otherTable and calling this->ReplaceEntry() for each.
-
- ~OpenHashTable();
-
- //=============================
- // Association operations
-
- ODBoolean ReplaceEntry(void* keyPtr, void* valuePtr);
- // Replace and/or add an entry that associates key with value. keyPtr
- // must be a POINTER to a key, and valuePtr must be a POINTER to a value
- // (e.g. if a key is struct Foo, then keyPtr must be type struct Foo*).
- // <key-size> bytes from keyPtr will be copied into the table, and
- // <value-size> bytes from valuePtr will be be copied into the
- // table (if <value-size> is zero, valuePtr is ignored).
- // (Note that if you need a copy of the old contents of the key or
- // value from a previous association, you should copy them first using
- // one of GetKey(), GetValue(), or GetKeyAndValue().)
- // If the addition of this entry makes the table's entry count exceed
- // the current capacity of the table, then the table grows by some
- // percentage (say 50%), leaving some empty slots for future additions.
- // False is returned only if memory could not be allocated. (If
- // throwErrors, throws kODErrOutOfMemory instead of returning false.)
-
- void RemoveEntry(void* keyPtr);
- // Remove the association with the specified key. keyPtr must be
- // a POINTER to a key (e.g. if a key is struct Foo, then keyPtr must
- // be type struct Foo*). Nothing happens if there was no such
- // association. This operation is fast and cheap because an old entry
- // is simply "unlinked" and set aside for reuse. (Consider using
- // ShrinkToFit() to reclaim space after numerous removals.)
- // (Note that if you need a copy of the old contents of the key or
- // value from a previous association, you should copy them first using
- // one of GetKey(), GetValue(), or GetKeyAndValue().)
-
- ODBoolean GetValue(void* keyPtr, void* valuePtr) const;
- // Find and return the value associated with key in (*valuePtr);
- // returns false only if there was no association with the given key,
- // or if <value-size> is zero. keyPtr must be a POINTER to a key, and
- // valuePtr must be a POINTER to a value (e.g. if a key is struct
- // Foo, then keyPtr must be type struct Foo*).
- // If the association is found, and if <value-size> is nonzero, then
- // <value-size> bytes are copied to the address indicated by valuePtr;
- // otherwise valuePtr is ignored.
-
- ODBoolean GetKey(void* keyInPtr, void* keyOutPtr) const;
- // Find the key in the table equal to (*keyInPtr) and return it in
- // (*keyOutPtr); returns false only if there was no association with
- // the given key. keyInPtr and keyOutPtr must be POINTERs to keys
- // (e.g. if a key is struct Foo, then keyPtr must be type struct Foo*).
- // If the association is found, then <key-size> bytes are copied to the
- // address indicated by keyOutPtr; it is safe to use the same pointer
- // for keyInPtr and keyOutPtr because keyInPtr is not read again after
- // keyOutPtr is written.
-
- ODBoolean GetKeyAndValue(void* key, void* keyPtr, void* valuePtr) const;
- // Roughly equivalent to GetKey(key, keyPtr), GetValue(key, valuePtr),
- // but faster. Either keyPtr or valuePtr can be set to zero to
- // suppress copying that part of the association.
-
- ODBoolean Exists(void* keyPtr) const;
- // Return whether there is an association with a key equal to (*keyPtr).
- // keyPtr must be a POINTER to a key (e.g. if a key is struct
- // Foo, then keyPtr must be type struct Foo*).
-
- //=============================
- // Table parameters
-
- ODULong Count() const; // number of associations
- ODULong Capacity() const; // current number of slots
- ODULong Seed() const; // changes whenever table is modified
-
- OpenHashSize KeySize() const; // size of keys: <key-size>
- OpenHashSize ValueSize() const; // size of values: <value-size>
-
- ODBoolean ThrowErrors() const;
- void SetThrowErrors(ODBoolean b);
-
- ODBoolean ShrinkToFit(OpenHashSize extraSlots=3);
- // Change the capacity of the table to Count() + extraSlots. This is
- // the principal means of recovering storage after numerous removals.
- // Presumably an application watches the relationship between Count()
- // and Capacity(), and shrinks the table when count falls below some
- // threshold. Returns false only if memory could not be allocated. (If
- // throwErrors, throws kODErrOutOfMemory instead of returning false.)
-
- //=============================
- // comparing hash tables:
- ODBoolean Intersect(OpenHashTable* ht, OpenHashAction action, void* ref);
- // Walk this table and compare with table ht, calling action (with ref
- // as one argument) once for every association in this table which has
- // a key equal to a key in ht. (Note: the value of slot passed to
- // action is meaningless.) If action ever returns true, Intersect()
- // returns immediately without examining any more associations.
- // Intersect() returns true only if action returns true (thus, for
- // example, if either table is empty then action is never called and
- // Intersect() returns false). If the caller wishes to build a data
- // structure (e.g. a list) that represents the intersection, this can
- // be maintained in ref. Or, if the caller wants a boolean yes or no
- // (is the intersection non-empty?) then action merely needs to always
- // return true (use TrueAction()). Table ht must have the
- // same hash and equal functions as in this table. You cannot add or
- // remove associations in this table during the intersection, but you
- // can in ht.
-
- static ODBoolean TrueAction(void* k, void* v, ODULong s, void* r);
- // This function ignores its arguments and always returns true.
- // It is meant to be passed as an action argument to Intersect().
-
- //=============================
- // testing & debugging:
- ODBoolean Walk(OpenHashAction action, void* ref) const;
- // Call action once for every association in the hash table, passing
- // through ref each time, as well as the slot in the table containing
- // the association. If action ever returns true, the walk is terminated
- // immediately, rather than visiting every association. Walk() only
- // returns true if action returns true (thus, an empty table never calls
- // action and returns false). This function is useful for printing the
- // distribution of keys in a table to evaluate the quality of hashing.
- // (It's not appropriate to expose slots in the table with an iterator.)
- // You cannot add or remove associations during the walk.
-
- protected:
- OpenHashEntry** fTable; // hash buckets (numbering fSlots)
- OpenHashEntry* fEntries; // block of fSlots entries
- char* fKeys; // block of fSlots keys
- char* fValues; // block of fSlots values
- OpenHashEntry* fFreeList; // free list of entries
- OpenHashKeyEqual fEqual; // used to compare keys
- OpenHashKeyHash fHash; // used to hash keys
- ODMemoryHeapID fHeap; // used to allocate memory
- OpenHashSize fSlots; // current size (capacity) of table
- ODULong fCount; // number of entries in table
- ODULong fSeed; // used to validate iterators
- OpenHashSize fKeySize; // size of keys
- OpenHashSize fValSize; // size of values
- ODBoolean fThrowErrors; // throw exception on errors
-
- OpenHashEntry** Find(void* key, ODULong hash) const;
- ODBoolean Grow();
-
- void Clear();
- OpenHashEntry** NewTable(OpenHashSize slots) const;
- OpenHashEntry* NewEntries(OpenHashSize slots) const;
- char* NewKeys(OpenHashSize slots) const;
- char* NewValues(OpenHashSize slots) const;
-
- ODBoolean NewVectors(OpenHashVectors* old, OpenHashSize slots);
-
- OpenHashEntry* PopFreeEntry();
- void PushFreeEntry(OpenHashEntry* e);
-
- void CopyTo(void* key, void* value, ODULong i) const;
- private:
- friend class OpenHashTableIterator;
- friend class OpenHashVectors;
- };
-
- inline ODULong OpenHashTable::Capacity() const
- {
- return fSlots;
- }
-
- inline ODULong OpenHashTable::Count() const
- {
- return fCount;
- }
-
- inline ODULong OpenHashTable::Seed() const
- {
- return fSeed;
- }
-
- inline OpenHashSize OpenHashTable::KeySize() const
- {
- return fKeySize;
- }
-
- inline OpenHashSize OpenHashTable::ValueSize() const
- {
- return fValSize;
- }
-
- inline ODBoolean OpenHashTable::ThrowErrors() const
- {
- return fThrowErrors;
- }
-
- inline void OpenHashTable::SetThrowErrors(ODBoolean b)
- {
- fThrowErrors = b;
- }
-
-
- //==============================================================================
- // OpenHashTableIterator
- //
- // This iterator is only meant to be used in the the context of a for loop,
- // e.g.:
- //
- // struct MyKey key;
- // struct MyValue value;
- // OpenHashTableIterator i(table);
- // for (i.First(&key, &value); i.IsNotComplete(); i.Next(&key, &value))
- // {
- // ...
- // }
- //
- //==============================================================================
-
- class OpenHashTableIterator
- {
- public:
- OpenHashTableIterator(OpenHashTable* table);
- ~OpenHashTableIterator();
-
- void First(void* keyPtr, void* valuePtr);
- // Resets the iteration and copies the "first" <key, value> pair
- // in the hash table. Either keyPtr or valuePtr can be set to
- // zero to suppress copying that part of the assocation. valuePtr
- // will also be ignored if <value-size> is zero. keyPtr must
- // be a POINTER to a key, and valuePtr must be a POINTER to a value
- // (e.g. if a key is struct Foo, then keyPtr must be type struct Foo*).
- // keyPtr or valuePtr are ignored if there are no more associations
- // (use IsNotComplete() to check). The iterator and the table are
- // always in sync after First().
-
- void Next(void* keyPtr, void* valuePtr);
- // Continues the iteration and copies the "next" <key, value>
- // pair in the hash table. Either keyPtr or valuePtr can be set to
- // zero to suppress copying that part of the assocation. valuePtr
- // will also be ignored if <value-size> is zero. keyPtr must
- // be a POINTER to a key, and valuePtr must be a POINTER to a value
- // (e.g. if a key is struct Foo, then keyPtr must be type struct Foo*).
- // keyPtr or valuePtr are ignored if there are no more associations
- // (use IsNotComplete() to check). If the table is out of sync with
- // the iterator (has been modified since First() other than by
- // RemoveCurrent()), then exception kODErrIteratorOutOfSync is thrown.
-
- ODBoolean Current(void* keyPtr, void* valuePtr);
- // Copies the same <key, value> pair as the last call to either
- // First() or Next(). Either keyPtr or valuePtr can be set to
- // zero to suppress copying that part of the assocation. valuePtr
- // will also be ignored if <value-size> is zero. keyPtr must
- // be a POINTER to a key, and valuePtr must be a POINTER to a value
- // (e.g. if a key is struct Foo, then keyPtr must be type struct Foo*).
- // keyPtr or valuePtr are ignored if there are no more associations
- // (use IsNotComplete() to check). If the table is out of sync with
- // the iterator (has been modified since First() other than by
- // RemoveCurrent()), then exception kODErrIteratorOutOfSync is thrown.
-
- void RemoveCurrent();
- // Efficiently removes the current <key, value> pair from the table.
- // This method works the same whether you are iterating with
- // associations or with pointers. If the table is out of sync with
- // the iterator (has been modified since First() other than by
- // RemoveCurrent()), then exception kODErrIteratorOutOfSync is thrown.
-
- ODBoolean IsNotComplete() const;
- // Returns true if the last call to First(), Next() or Current()
- // accessed an association in the table.
-
- ODBoolean IsOutOfSync() const;
- // Returns whether the iterator is out of sync with the table.
- // Users who do not want an kODErrIteratorOutOfSync exception to be
- // thrown from Next(), Current(), or RemoveCurrent() should avoid
- // calling those functions when IsOutOfSync() returns true.
- // (The iterator becomes out of sync when the table is modified other
- // than by RemoveCurrent() since the last call to First().)
-
- private:
- OpenHashTable* fTable; // the table we are iterating over
- ODULong fSeed; // should match table's seed
- OpenHashEntry** fBucket; // current bucket in the table
- OpenHashEntry** fRef; // normally (*fRef == fCurrent), for removal
- OpenHashEntry* fCurrent; // the current entry
- OpenHashEntry* fAfter; // normally fCurrent->fLink
- };
-
- inline ODBoolean OpenHashTableIterator::IsNotComplete() const
- {
- return (fCurrent != 0);
- }
-
- inline ODBoolean OpenHashTableIterator::IsOutOfSync() const
- {
- return (fSeed != fTable->Seed());
- }
-
-
- #endif
- // _OPENHASH_
-