From 5a5548208bb363e5bf9cc87ba0f40df246f60408 Mon Sep 17 00:00:00 2001 From: David Nieder <post@davidnieder.de> Date: Sat, 31 Dec 2022 12:14:21 +0100 Subject: [PATCH] a first draft --- src/hashtable.cpp | 156 ++++++++++++++++++++++++++++++++++++++++++++++ src/hashtable.h | 41 ++++++++++++ 2 files changed, 197 insertions(+) diff --git a/src/hashtable.cpp b/src/hashtable.cpp index e69de29..bea549f 100644 --- a/src/hashtable.cpp +++ b/src/hashtable.cpp @@ -0,0 +1,156 @@ +#include <cmath> +#include <cassert> +#include <iostream> +#include <stdexcept> + +#include "hashtable.h" + + +Hashtable::Hashtable(size_t m, float alpha, float s): + slot_count(m), max_loadfactor(alpha), growfactor(s), table(new Slot[m]) +{ + + if (!(0 < alpha && alpha < 1)) + throw std::invalid_argument("load factor alpha must be in range (0;1)"); + if (!(s > 1)) + throw std::invalid_argument("grow factor s must be greater 1"); +} + +float Hashtable::load_factor() const { + return slot_count == 0 ? 0 : static_cast<float>(element_count)/slot_count; +} + +bool Hashtable::insert(unsigned key) { + // Grow the table if inserting would exceed the load factor. + // If the key is already in the table this could grow the table unnecessarily once. + if (slot_count == 0 || (element_count+1.0f)/slot_count > max_loadfactor) + grow(); + + unsigned hk = hash(key); + for (size_t i=0; i<slot_count; i++) { + Slot &slot = table[probe(hk, i)]; + + if (slot.label == Slot::used && slot.key == key) { + // the key is already in the table + return false; + } + + if (slot.label == Slot::vacant || slot.label == Slot::deleted) { + // found an empty slot + element_count++; + slot.key = key; + slot.label = Slot::used; + return true; + } + } + + // If the probing strategy permutates {0,...,m-1} the for-loop + // visits every slot and will find an empty one eventually. + throw std::logic_error("will we ever reach here?"); +} + +bool Hashtable::remove(unsigned key) { + if (slot_count == 0) + return false; // table is empty + + unsigned hk = hash(key); + for (size_t i=0; i<slot_count; i++) { + Slot &slot = table[probe(hk, i)]; + + if (slot.label == Slot::vacant) { + // found a vacant slot: key is not in table + break; + } + + if (slot.label == Slot::used && slot.key == key) { + // found the key: mark slot deleted + element_count--; + slot.label = Slot::deleted; + return true; + } + } + + return false; +} + +bool Hashtable::contains(unsigned key) { + if (slot_count == 0) + return false; // table is empty + + unsigned hk = hash(key); + for (size_t i=0; i<slot_count; i++) { + Slot &slot = table[probe(hk, i)]; + + if (slot.label == Slot::vacant) { + // found a vacant slot, key is not in table + break; + } + + if (slot.label == Slot::used && slot.key == key) { + return true; + } + } + + return false; +} + +inline unsigned Hashtable::hash(unsigned key) { + assert(slot_count != 0); + return key % slot_count; +} + +inline size_t Hashtable::probe(unsigned h, size_t i) { + assert(slot_count != 0); + return (h+i) % slot_count; +} + +void Hashtable::grow() { + size_t old_capacity = slot_count; + slot_count = std::ceil(growfactor * (slot_count==0 ? 1 : slot_count)); + Slot *const new_table = new Slot[slot_count]; + + // for all slots in the old table ... + for (size_t i=0; i<old_capacity; i++) { + Slot &os = table[i]; + + // that store a key ... + if (os.label == Slot::used) { + unsigned hk = hash(os.key); + + // find a slot in the new table + for (size_t j=0; j<slot_count; j++) { + Slot &ns = new_table[probe(hk, j)]; + + if (ns.label == Slot::vacant) { + ns.key = os.key; + ns.label = Slot::used; + break; + } + } + } + } + + table.reset(new_table); // smart_ptr frees the old table +} + +void Hashtable::info() const { + std::cerr << "size: " << size() << ", capacity: " << capacity() + << ", alpha (current/max): " << load_factor() << "/" << max_loadfactor << std::endl; + std::cerr << "array size: " << slot_count*sizeof(Slot)/1024 << "kB, start: " + << table.get() << ", end: " << table.get()+slot_count << std::endl; +} + +void Hashtable::print() const { + for (size_t i=0; i<slot_count; i+=10) { + std::cerr << i << ": "; + for (size_t j=i; j<i+10 && j<slot_count; j++) { + Slot s = table[j]; + std::string repr; + if (s.label == Slot::vacant) repr = "F"; + else if(s.label == Slot::deleted) repr = "D"; + else repr = std::to_string(s.key); + std::cerr << repr << " "; + } + std::cerr << std::endl; + } +} diff --git a/src/hashtable.h b/src/hashtable.h index e69de29..94918fe 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -0,0 +1,41 @@ +#ifndef HASHTABLE_H +#define HASHTABLE_H + +#include <memory> + + +/* a bucket of size 1 */ +class Slot { + unsigned key = 0; + enum: char { vacant, deleted, used } label = vacant; + + friend class Hashtable; +}; + + +class Hashtable { + public: + Hashtable(size_t m=8, float alpha=0.75, float s=2.0); + + bool insert(unsigned key); + bool remove(unsigned key); + bool contains(unsigned key); + size_t size() const { return element_count; } + size_t capacity() const { return slot_count; } + float load_factor() const; + + void info() const; + void print() const; + private: + size_t element_count = 0; + size_t slot_count; + const float max_loadfactor; + const float growfactor; + std::unique_ptr<Slot[]> table; + + unsigned hash(unsigned key); + size_t probe(unsigned key, size_t i); + void grow(); +}; + +#endif -- GitLab