Building Template-Driven Hash Containers in C++: From Core Implementation to Standard-Like Wrappers
Core Bucket Architecture
Hash tables relying on chaining require a lightweight link node structure. Each node stores the payload and a pointer to the subsequent element within the same collision bucket.
template <typename Payload>
struct ChainLink {
Payload data;
ChainLink* next_node;
ChainLink(const Payload& val) : data(val), next_node(nullptr) {}
};
Hash Resolution & Key Projection
Generic container support demands decoupled mechanisms for hash computation and key extraction. Two callable functors handle these responsibilities: one projects the actual key from the stored value, and another converts that key into a valid bucket index.
// Extracts the primary key from a stored payload
template <typename T>
struct KeySelector {
using key_type = T;
T operator()(const T& item) const { return item; }
};
// Computes modulo-safe bucket indices
template <typename K>
struct BucketMapper {
size_t operator()(const K& key) const {
return std::hash<K>{}(key);
}
};
// String specialization for character-weighted distribution
template <>
struct BucketMapper<std::string> {
size_t operator()(const std::string& str) const {
size_t hash_val = 0;
for (char c : str) {
hash_val = hash_val * 33 + static_cast<size_t>(c);
}
return hash_val;
}
};
Dynamic Storage Management
The underlying storage utilizes a dynamic array of bucket pointers alongside a counter tracking active elements. Resize operations trigger when load factor thresholds are breached, reallocating memory and redistributing nodes into new buckets.
template <typename K, typename V, typename SelectKey = KeySelector<V>, typename HashFn = BucketMapper<K>>
class OpenChainContainer {
private:
using Node = ChainLink<V>;
std::vector<Node*> buckets_;
size_t active_count_ = 0;
SelectKey selector_;
HashFn hasher_;
size_t compute_index(const V& val) const {
return hasher_(selector_(val)) % buckets_.size();
}
void rehash(size_t new_capacity) {
std::vector<Node*> new_buckets(new_capacity, nullptr);
for (auto& head : buckets_) {
Node* current = head;
while (current) {
Node* temp = current;
current = current->next_node;
size_t idx = hasher_(selector_(temp->data)) % new_capacity;
temp->next_node = new_buckets[idx];
new_buckets[idx] = temp;
}
}
buckets_ = std::move(new_buckets);
}
public:
bool find(const V& target) const {
if (buckets_.empty()) return false;
size_t idx = compute_index(target);
for (Node* cur = buckets_[idx]; cur; cur = cur->next_node) {
if (cur->data == target) return true;
}
return false;
}
std::pair<bool, V*> insert(const V& value) {
size_t new_cap = buckets_.empty() ? 8 : buckets_.size();
if (active_count_ >= new_cap) {
rehash(new_cap * 2);
}
size_t idx = compute_index(value);
V* existing = nullptr;
for (Node* cur = buckets_[idx]; cur; cur = cur->next_node) {
if (cur->data == value) { existing = &cur->data; break; }
}
if (existing) return {false, existing};
Node* newNode = new Node(value);
newNode->next_node = buckets_[idx];
buckets_[idx] = newNode;
++active_count_;
return {true, &newNode->data};
}
bool remove(const V& target) {
if (buckets_.empty()) return false;
size_t idx = compute_index(target);
Node* prev = nullptr;
for (Node* cur = buckets_[idx]; cur; prev = cur, cur = cur->next_node) {
if (cur->data == target) {
if (prev) prev->next_node = cur->next_node;
else buckets_[idx] = cur->next_node;
delete cur;
--active_count_;
return true;
}
}
return false;
}
};
Iterator Abstraction
Iterators abstract bucket traversal and chain navigation. By storing a reference to the parent container, the current bucket index, and the active node pointer, forward movement seamlessly bridges inter-bucket gaps and intra-chain links.
template <typename Container, typename RefType, typename PtrType>
class ChainIterator {
private:
friend Container;
typename Container::Node* current_node_ = nullptr;
size_t current_bucket_idx_ = 0;
Container* parent_ = nullptr;
ChainIterator(typename Container::Node* node, size_t bucket, Container* host)
: current_node_(node), current_bucket_idx_(bucket), parent_(host) {}
public:
using difference_type = std::ptrdiff_t;
using value_type = typename Container::value_type;
using pointer = PtrType;
using reference = RefType;
reference operator*() const { return current_node_->data; }
pointer operator->() const { return &(current_node_->data); }
auto& operator++() {
if (current_node_ && current_node_->next_node) {
current_node_ = current_node_->next_node;
} else {
++current_bucket_idx_;
while (current_bucket_idx_ < parent_->buckets_.size()) {
if (parent_->buckets_[current_bucket_idx_]) {
current_node_ = parent_->buckets_[current_bucket_idx_];
return *this;
}
++current_bucket_idx_;
}
current_node_ = nullptr;
}
return *this;
}
bool operator!=(const ChainIterator& other) const {
return current_node_ != other.current_node_;
}
};
Standard-Compatible Adapters
High-level collections wrap the generic chain container. Type deduction and helper functions map raw types to appropriate key extractors, yielding interfaces identical to STL standards.
Hash-Backed Associative Array
Stores key-value pairs and exposes bracket notation for insertion or lookup.
template <typename K, typename V>
class CustomUnorderedMap {
struct PairKeyExtractor {
K operator()(const std::pair<K, V>& p) const { return p.first; }
};
using BaseContainer = OpenChainContainer<K, std::pair<K, V>, PairKeyExtractor>;
BaseContainer store_;
using IteratorType = ChainIterator<BaseContainer, std::pair<K, V>&, std::pair<K, V>*>
public:
using iterator = IteratorType;
using key_type = K;
using mapped_type = V;
iterator begin() {
for (size_t i = 0; i < store_.buckets_.size(); ++i) {
if (store_.buckets_[i]) return iterator(store_.buckets_[i], i, &store_);
}
return end();
}
iterator end() { return iterator(nullptr, 0, &store_); }
std::pair<iterator, bool> emplace(const std::pair<K, V>& val) {
auto result = store_.insert(val);
return { iterator(result.second ? &result.second->first : nullptr, 0, &store_), result.first };
}
V& operator[](const K& k) {
auto [it, inserted] = emplace(std::make_pair(k, V()));
return it->second;
}
};
Hash-Backed Unique Collection
Mirrors map architecture but restricts payloads to single-value types with out assignment operators.
template <typename K>
class CustomUnorderedSet {
using BaseContainer = OpenChainContainer<K, K, KeySelector<K>, BucketMapper<K>>;
BaseContainer store_;
using IteratorType = ChainIterator<BaseContainer, K&, K*>;
public:
using iterator = IteratorType;
using key_type = K;
iterator begin() {
for (size_t i = 0; i < store_.buckets_.size(); ++i) {
if (store_.buckets_[i]) return iterator(store_.buckets_[i], i, &store_);
}
return end();
}
iterator end() { return iterator(nullptr, 0, &store_); }
std::pair<iterator, bool> insert(const K& val) {
auto result = store_.insert(val);
return { iterator(result.second ? &result.second : nullptr, 0, &store_), result.first };
}
};