summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJeff Brown <jeffbrown@google.com>2011-11-14 18:29:15 -0800
committerAlex Ray <aray@google.com>2013-07-30 13:56:58 -0700
commite735f23018b398f45bd052b63616d7a45e29515b (patch)
tree49b0eac2a5288a2d3c99fb15cc079f9ff1e0834d /include
parent11189f5f797ef29ee670ab4683f97c2cfc109899 (diff)
downloadcore-e735f23018b398f45bd052b63616d7a45e29515b.tar.gz
core-e735f23018b398f45bd052b63616d7a45e29515b.tar.bz2
core-e735f23018b398f45bd052b63616d7a45e29515b.zip
Add a basic hashtable data structure, with tests!
The basic hashtable is intended to be used to support a variety of different datastructures such as map, set, multimap, multiset, linkedmap, generationcache, etc. Consequently its interface is fairly primitive. The basic hashtable supports copy-on-write style functionality using SharedBuffer. The change introduces a simple generic function in TypeHelpers for specifying hash functions. The idea is to add template specializations of hash_type<T> next to the relevant data structures such as String8, String16, sp<T>, etc. Change-Id: I2c479229e9d4527b4fbfe3b8b04776a2fd32c973
Diffstat (limited to 'include')
-rw-r--r--include/utils/BasicHashtable.h393
-rw-r--r--include/utils/TypeHelpers.h44
2 files changed, 437 insertions, 0 deletions
diff --git a/include/utils/BasicHashtable.h b/include/utils/BasicHashtable.h
new file mode 100644
index 000000000..fdf97385f
--- /dev/null
+++ b/include/utils/BasicHashtable.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_BASIC_HASHTABLE_H
+#define ANDROID_BASIC_HASHTABLE_H
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <utils/SharedBuffer.h>
+#include <utils/TypeHelpers.h>
+
+namespace android {
+
+/* Implementation type. Nothing to see here. */
+class BasicHashtableImpl {
+protected:
+ struct Bucket {
+ // The collision flag indicates that the bucket is part of a collision chain
+ // such that at least two entries both hash to this bucket. When true, we
+ // may need to seek further along the chain to find the entry.
+ static const uint32_t COLLISION = 0x80000000UL;
+
+ // The present flag indicates that the bucket contains an initialized entry value.
+ static const uint32_t PRESENT = 0x40000000UL;
+
+ // Mask for 30 bits worth of the hash code that are stored within the bucket to
+ // speed up lookups and rehashing by eliminating the need to recalculate the
+ // hash code of the entry's key.
+ static const uint32_t HASH_MASK = 0x3fffffffUL;
+
+ // Combined value that stores the collision and present flags as well as
+ // a 30 bit hash code.
+ uint32_t cookie;
+
+ // Storage for the entry begins here.
+ char entry[0];
+ };
+
+ BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
+ size_t minimumInitialCapacity, float loadFactor);
+ BasicHashtableImpl(const BasicHashtableImpl& other);
+
+ void dispose();
+
+ inline void edit() {
+ if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
+ clone();
+ }
+ }
+
+ void setTo(const BasicHashtableImpl& other);
+ void clear();
+
+ ssize_t next(ssize_t index) const;
+ ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
+ size_t add(hash_t hash, const void* __restrict__ entry);
+ void removeAt(size_t index);
+ void rehash(size_t minimumCapacity, float loadFactor);
+
+ const size_t mBucketSize; // number of bytes per bucket including the entry
+ const bool mHasTrivialDestructor; // true if the entry type does not require destruction
+ size_t mCapacity; // number of buckets that can be filled before exceeding load factor
+ float mLoadFactor; // load factor
+ size_t mSize; // number of elements actually in the table
+ size_t mFilledBuckets; // number of buckets for which collision or present is true
+ size_t mBucketCount; // number of slots in the mBuckets array
+ void* mBuckets; // array of buckets, as a SharedBuffer
+
+ inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
+ return *reinterpret_cast<const Bucket*>(
+ static_cast<const uint8_t*>(buckets) + index * mBucketSize);
+ }
+
+ inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
+ return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
+ }
+
+ virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
+ virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
+ virtual void destroyBucketEntry(Bucket& bucket) const = 0;
+
+private:
+ void clone();
+
+ // Allocates a bucket array as a SharedBuffer.
+ void* allocateBuckets(size_t count) const;
+
+ // Releases a bucket array's associated SharedBuffer.
+ void releaseBuckets(void* __restrict__ buckets, size_t count) const;
+
+ // Destroys the contents of buckets (invokes destroyBucketEntry for each
+ // populated bucket if needed).
+ void destroyBuckets(void* __restrict__ buckets, size_t count) const;
+
+ // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
+ // for each populated bucket if needed).
+ void copyBuckets(const void* __restrict__ fromBuckets,
+ void* __restrict__ toBuckets, size_t count) const;
+
+ // Determines the appropriate size of a bucket array to store a certain minimum
+ // number of entries and returns its effective capacity.
+ static void determineCapacity(size_t minimumCapacity, float loadFactor,
+ size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
+
+ // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
+ inline static hash_t trimHash(hash_t hash) {
+ return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
+ }
+
+ // Returns the index of the first bucket that is in the collision chain
+ // for the specified hash code, given the total number of buckets.
+ // (Primary hash)
+ inline static size_t chainStart(hash_t hash, size_t count) {
+ return hash % count;
+ }
+
+ // Returns the increment to add to a bucket index to seek to the next bucket
+ // in the collision chain for the specified hash code, given the total number of buckets.
+ // (Secondary hash)
+ inline static size_t chainIncrement(hash_t hash, size_t count) {
+ return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
+ }
+
+ // Returns the index of the next bucket that is in the collision chain
+ // that is defined by the specified increment, given the total number of buckets.
+ inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
+ return (index + increment) % count;
+ }
+};
+
+/*
+ * A BasicHashtable stores entries that are indexed by hash code in place
+ * within an array. The basic operations are finding entries by key,
+ * adding new entries and removing existing entries.
+ *
+ * This class provides a very limited set of operations with simple semantics.
+ * It is intended to be used as a building block to construct more complex
+ * and interesting data structures such as HashMap. Think very hard before
+ * adding anything extra to BasicHashtable, it probably belongs at a
+ * higher level of abstraction.
+ *
+ * TKey: The key type.
+ * TEntry: The entry type which is what is actually stored in the array.
+ *
+ * TKey must support the following contract:
+ * bool operator==(const TKey& other) const; // return true if equal
+ * bool operator!=(const TKey& other) const; // return true if unequal
+ *
+ * TEntry must support the following contract:
+ * const TKey& getKey() const; // get the key from the entry
+ *
+ * This class supports storing entries with duplicate keys. Of course, it can't
+ * tell them apart during removal so only the first entry will be removed.
+ * We do this because it means that operations like add() can't fail.
+ */
+template <typename TKey, typename TEntry>
+class BasicHashtable : private BasicHashtableImpl {
+public:
+ /* Creates a hashtable with the specified minimum initial capacity.
+ * The underlying array will be created when the first entry is added.
+ *
+ * minimumInitialCapacity: The minimum initial capacity for the hashtable.
+ * Default is 0.
+ * loadFactor: The desired load factor for the hashtable, between 0 and 1.
+ * Default is 0.75.
+ */
+ BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
+
+ /* Copies a hashtable.
+ * The underlying storage is shared copy-on-write.
+ */
+ BasicHashtable(const BasicHashtable& other);
+
+ /* Clears and destroys the hashtable.
+ */
+ virtual ~BasicHashtable();
+
+ /* Making this hashtable a copy of the other hashtable.
+ * The underlying storage is shared copy-on-write.
+ *
+ * other: The hashtable to copy.
+ */
+ inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
+ setTo(other);
+ return *this;
+ }
+
+ /* Returns the number of entries in the hashtable.
+ */
+ inline size_t size() const {
+ return mSize;
+ }
+
+ /* Returns the capacity of the hashtable, which is the number of elements that can
+ * added to the hashtable without requiring it to be grown.
+ */
+ inline size_t capacity() const {
+ return mCapacity;
+ }
+
+ /* Returns the number of buckets that the hashtable has, which is the size of its
+ * underlying array.
+ */
+ inline size_t bucketCount() const {
+ return mBucketCount;
+ }
+
+ /* Returns the load factor of the hashtable. */
+ inline float loadFactor() const {
+ return mLoadFactor;
+ };
+
+ /* Returns a const reference to the entry at the specified index.
+ *
+ * index: The index of the entry to retrieve. Must be a valid index within
+ * the bounds of the hashtable.
+ */
+ inline const TEntry& entryAt(size_t index) const {
+ return entryFor(bucketAt(mBuckets, index));
+ }
+
+ /* Returns a non-const reference to the entry at the specified index.
+ *
+ * index: The index of the entry to edit. Must be a valid index within
+ * the bounds of the hashtable.
+ */
+ inline TEntry& editEntryAt(size_t index) {
+ edit();
+ return entryFor(bucketAt(mBuckets, index));
+ }
+
+ /* Clears the hashtable.
+ * All entries in the hashtable are destroyed immediately.
+ * If you need to do something special with the entries in the hashtable then iterate
+ * over them and do what you need before clearing the hashtable.
+ */
+ inline void clear() {
+ BasicHashtableImpl::clear();
+ }
+
+ /* Returns the index of the next entry in the hashtable given the index of a previous entry.
+ * If the given index is -1, then returns the index of the first entry in the hashtable,
+ * if there is one, or -1 otherwise.
+ * If the given index is not -1, then returns the index of the next entry in the hashtable,
+ * in strictly increasing order, or -1 if there are none left.
+ *
+ * index: The index of the previous entry that was iterated, or -1 to begin
+ * iteration at the beginning of the hashtable.
+ */
+ inline ssize_t next(ssize_t index) const {
+ return BasicHashtableImpl::next(index);
+ }
+
+ /* Finds the index of an entry with the specified key.
+ * If the given index is -1, then returns the index of the first matching entry,
+ * otherwise returns the index of the next matching entry.
+ * If the hashtable contains multiple entries with keys that match the requested
+ * key, then the sequence of entries returned is arbitrary.
+ * Returns -1 if no entry was found.
+ *
+ * index: The index of the previous entry with the specified key, or -1 to
+ * find the first matching entry.
+ * hash: The hashcode of the key.
+ * key: The key.
+ */
+ inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
+ return BasicHashtableImpl::find(index, hash, &key);
+ }
+
+ /* Adds the entry to the hashtable.
+ * Returns the index of the newly added entry.
+ * If an entry with the same key already exists, then a duplicate entry is added.
+ * If the entry will not fit, then the hashtable's capacity is increased and
+ * its contents are rehashed. See rehash().
+ *
+ * hash: The hashcode of the key.
+ * entry: The entry to add.
+ */
+ inline size_t add(hash_t hash, const TEntry& entry) {
+ return BasicHashtableImpl::add(hash, &entry);
+ }
+
+ /* Removes the entry with the specified index from the hashtable.
+ * The entry is destroyed immediately.
+ * The index must be valid.
+ *
+ * The hashtable is not compacted after an item is removed, so it is legal
+ * to continue iterating over the hashtable using next() or find().
+ *
+ * index: The index of the entry to remove. Must be a valid index within the
+ * bounds of the hashtable, and it must refer to an existing entry.
+ */
+ inline void removeAt(size_t index) {
+ BasicHashtableImpl::removeAt(index);
+ }
+
+ /* Rehashes the contents of the hashtable.
+ * Grows the hashtable to at least the specified minimum capacity or the
+ * current number of elements, whichever is larger.
+ *
+ * Rehashing causes all entries to be copied and the entry indices may change.
+ * Although the hash codes are cached by the hashtable, rehashing can be an
+ * expensive operation and should be avoided unless the hashtable's size
+ * needs to be changed.
+ *
+ * Rehashing is the only way to change the capacity or load factor of the
+ * hashtable once it has been created. It can be used to compact the
+ * hashtable by choosing a minimum capacity that is smaller than the current
+ * capacity (such as 0).
+ *
+ * minimumCapacity: The desired minimum capacity after rehashing.
+ * loadFactor: The desired load factor after rehashing.
+ */
+ inline void rehash(size_t minimumCapacity, float loadFactor) {
+ BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
+ }
+
+protected:
+ static inline const TEntry& entryFor(const Bucket& bucket) {
+ return reinterpret_cast<const TEntry&>(bucket.entry);
+ }
+
+ static inline TEntry& entryFor(Bucket& bucket) {
+ return reinterpret_cast<TEntry&>(bucket.entry);
+ }
+
+ virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
+ virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
+ virtual void destroyBucketEntry(Bucket& bucket) const;
+
+private:
+ // For dumping the raw contents of a hashtable during testing.
+ friend class BasicHashtableTest;
+ inline uint32_t cookieAt(size_t index) const {
+ return bucketAt(mBuckets, index).cookie;
+ }
+};
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
+ BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
+ minimumInitialCapacity, loadFactor) {
+}
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
+ BasicHashtableImpl(other) {
+}
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::~BasicHashtable() {
+ dispose();
+}
+
+template <typename TKey, typename TEntry>
+bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
+ const void* __restrict__ key) const {
+ return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
+}
+
+template <typename TKey, typename TEntry>
+void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
+ const void* __restrict__ entry) const {
+ if (!traits<TEntry>::has_trivial_copy) {
+ new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
+ } else {
+ memcpy(&entryFor(bucket), entry, sizeof(TEntry));
+ }
+}
+
+template <typename TKey, typename TEntry>
+void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
+ if (!traits<TEntry>::has_trivial_dtor) {
+ entryFor(bucket).~TEntry();
+ }
+}
+
+}; // namespace android
+
+#endif // ANDROID_BASIC_HASHTABLE_H
diff --git a/include/utils/TypeHelpers.h b/include/utils/TypeHelpers.h
index a1663f30e..75388176d 100644
--- a/include/utils/TypeHelpers.h
+++ b/include/utils/TypeHelpers.h
@@ -213,6 +213,9 @@ void move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) {
template <typename KEY, typename VALUE>
struct key_value_pair_t {
+ typedef KEY key_t;
+ typedef VALUE value_t;
+
KEY key;
VALUE value;
key_value_pair_t() { }
@@ -222,6 +225,12 @@ struct key_value_pair_t {
inline bool operator < (const key_value_pair_t& o) const {
return strictly_order_type(key, o.key);
}
+ inline const KEY& getKey() const {
+ return key;
+ }
+ inline const VALUE& getValue() const {
+ return value;
+ }
};
template<>
@@ -243,6 +252,41 @@ struct trait_trivial_move< key_value_pair_t<K, V> >
// ---------------------------------------------------------------------------
+/*
+ * Hash codes.
+ */
+typedef uint32_t hash_t;
+
+template <typename TKey>
+hash_t hash_type(const TKey& key);
+
+/* Built-in hash code specializations.
+ * Assumes pointers are 32bit. */
+#define ANDROID_INT32_HASH(T) \
+ template <> inline hash_t hash_type(const T& value) { return hash_t(value); }
+#define ANDROID_INT64_HASH(T) \
+ template <> inline hash_t hash_type(const T& value) { \
+ return hash_t((value >> 32) ^ value); }
+#define ANDROID_REINTERPRET_HASH(T, R) \
+ template <> inline hash_t hash_type(const T& value) { \
+ return hash_type(*reinterpret_cast<const R*>(&value)); }
+
+ANDROID_INT32_HASH(bool)
+ANDROID_INT32_HASH(char)
+ANDROID_INT32_HASH(unsigned char)
+ANDROID_INT32_HASH(short)
+ANDROID_INT32_HASH(unsigned short)
+ANDROID_INT32_HASH(int)
+ANDROID_INT32_HASH(unsigned int)
+ANDROID_INT64_HASH(long)
+ANDROID_INT64_HASH(unsigned long)
+ANDROID_REINTERPRET_HASH(float, uint32_t)
+ANDROID_REINTERPRET_HASH(double, uint64_t)
+
+template <typename T> inline hash_t hash_type(const T*& value) {
+ return hash_type(uintptr_t(value));
+}
+
}; // namespace android
// ---------------------------------------------------------------------------