aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKatharina Fey <kookie@spacekookie.de>2019-07-13 06:40:05 +0100
committerKatharina Fey <kookie@spacekookie.de>2019-07-13 06:47:21 +0100
commit9130e47b171c5182ffe6c14eb710fdcb73943de4 (patch)
treeca76fcc58130b53c794c99adc7658b3b51075639
parentd0cca976be6fb90f3724911c8a124bce56b3c5f9 (diff)
Adding initial support for HASH nodes
This PR adds initial support for HASH data nodes in libbowl. This allows a performant key-value store lookup in a node tree. The hashing code implements the "murmur" hash, which has shown good performance over at [`libcuckoo`]. Currently there is no extended hashing strategy, which should definitely be changed. [`libcuckoo`]: https://github.com/qaul/libcuckoo (currently a collision will cause a recursive re-alloc) Some of the type-level hacks also begs the question if a PAIR data node might be warranted, even though it would break the simple design around bowl->data.
-rw-r--r--CMakeLists.txt9
-rw-r--r--bowl.c19
-rw-r--r--bowl.h19
-rw-r--r--examples/list.c8
-rw-r--r--examples/tree.c10
-rw-r--r--hash.c73
-rw-r--r--hash.h11
-rw-r--r--utils.c70
-rw-r--r--utils.h4
9 files changed, 198 insertions, 25 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5ab2f46..abe82d3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,10 +3,11 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c11")
set(CMAKE_BUILD_TYPE Debug)
project(bowl)
-add_library(bowl SHARED bowl.c
+add_library(bowl SHARED array.c
+ bowl.c
data.c
- utils.c
- array.c)
+ hash.c
+ utils.c)
target_include_directories(bowl PUBLIC ".")
@@ -15,4 +16,4 @@ add_executable(ex_tree examples/tree.c)
target_link_libraries(ex_tree bowl)
add_executable(ex_list examples/list.c)
-target_link_libraries(ex_list bowl) \ No newline at end of file
+target_link_libraries(ex_list bowl)
diff --git a/bowl.c b/bowl.c
index 4eeeb59..3cb36b9 100644
--- a/bowl.c
+++ b/bowl.c
@@ -1,5 +1,6 @@
#include "bowl.h"
#include "array.h"
+#include "hash.h"
#include <stdlib.h>
#include <memory.h>
@@ -37,7 +38,7 @@ err_t bowl_malloc(struct bowl **ptr, bowl_t type)
return OK;
}
-err_t bowl_insert(struct bowl *self, struct bowl *new)
+err_t bowl_append(struct bowl *self, struct bowl *new)
{
CHECK(self, INVALID_PARAMS)
CHECK(new, INVALID_PARAMS)
@@ -52,7 +53,19 @@ err_t bowl_insert(struct bowl *self, struct bowl *new)
return OK;
}
-err_t bowl_insert_key(struct bowl *self, size_t idx, struct bowl *child)
+err_t bowl_insert_key(struct bowl *self, char *key, struct bowl *child)
+{
+ CHECK(self, INVALID_PARAMS)
+ CHECK(child, INVALID_PARAMS)
+ CHECK(key, INVALID_PARAMS)
+
+ switch(self->type) {
+ case HASH: return hash_insert_key(self, key, child);
+ default: return INVALID_PARAMS;
+ }
+}
+
+err_t bowl_insert_idx(struct bowl *self, size_t idx, struct bowl *child)
{
CHECK(self, INVALID_PARAMS)
CHECK(child, INVALID_PARAMS)
@@ -66,7 +79,7 @@ err_t bowl_insert_key(struct bowl *self, size_t idx, struct bowl *child)
}
}
-err_t bowl_swap_key(struct bowl *self, size_t idx, struct bowl *child, struct bowl **prev)
+err_t bowl_swap_idx(struct bowl *self, size_t idx, struct bowl *child, struct bowl **prev)
{
CHECK(self, INVALID_PARAMS)
CHECK(child, INVALID_PARAMS)
diff --git a/bowl.h b/bowl.h
index 63149d2..ae7efe3 100644
--- a/bowl.h
+++ b/bowl.h
@@ -12,7 +12,7 @@ extern "C" {
#define CHECK(ptr, err) if(!ptr) return err;
/** Define some generic error codes first that we can propagate **/
-typedef enum err_t {
+typedef enum {
OK = 0,
ERR = -1,
NOT_IMPLEMENTED = 1, // A runtime error for missing features
@@ -22,7 +22,7 @@ typedef enum err_t {
INVALID_STATE = 4, // Calling an operation on an invalid state
} err_t;
-typedef enum data_t {
+typedef enum {
LITERAL, // Any string that is \0 terminated
INTEGER, // Integer value
REAL, // Floating point value
@@ -30,7 +30,7 @@ typedef enum data_t {
RAW // A platform-length pointer
} data_t;
-typedef enum bowl_t {
+typedef enum {
LEAF = 1, ARRAY, HASH, LINKED
} bowl_t;
@@ -67,14 +67,17 @@ struct b_ptr {
/// Allocate memory for an new, empty bowl node
err_t bowl_malloc(struct bowl **, bowl_t);
-/// Insert one node into another
-err_t bowl_insert(struct bowl *, struct bowl *);
+/// Insert one node at the end of another
+err_t bowl_append(struct bowl *, struct bowl *);
+
+/// Insert a node under a specific key (relevant for HASH nodes)
+err_t bowl_insert_key(struct bowl *, char *key, struct bowl *);
/// Insert a node into a specific index of another node
-err_t bowl_insert_key(struct bowl *, size_t idx, struct bowl *);
+err_t bowl_insert_idx(struct bowl *, size_t idx, struct bowl *);
/// Insert and swap a key in place, returning the old one
-err_t bowl_swap_key(struct bowl *, size_t idx, struct bowl *, struct bowl **);
+err_t bowl_swap_idx(struct bowl *, size_t idx, struct bowl *, struct bowl **);
/// Remove a bowl node by it's pointer reference
err_t bowl_remove(struct bowl *, struct bowl *);
@@ -94,4 +97,4 @@ err_t data_free(struct data *);
#ifdef __cplusplus
}
#endif
-#endif // _BOWL_H_ \ No newline at end of file
+#endif // _BOWL_H_
diff --git a/examples/list.c b/examples/list.c
index a09588b..84205e2 100644
--- a/examples/list.c
+++ b/examples/list.c
@@ -9,15 +9,15 @@ int main()
struct bowl *a;
data_malloc(&a, LITERAL, "Destroy capitalism");
- bowl_insert(root, a);
+ bowl_append(root, a);
struct bowl *b;
data_malloc(&b, INTEGER, 1312);
- bowl_insert(root, b);
+ bowl_append(root, b);
struct bowl *c;
data_malloc(&c, LITERAL, "Alerta, Antifascista!");
- bowl_insert(root, c);
+ bowl_append(root, c);
return bowl_free(root);
-} \ No newline at end of file
+}
diff --git a/examples/tree.c b/examples/tree.c
index e55acb9..eb1b33a 100644
--- a/examples/tree.c
+++ b/examples/tree.c
@@ -30,17 +30,17 @@ int main()
if(e) return e;
// Add the d node to c
- e = bowl_insert(c, d);
+ e = bowl_append(c, d);
if(e) e;
// Add other nodes to root
- e = bowl_insert(root, a);
+ e = bowl_append(root, a);
if(e) return e;
- e = bowl_insert(root, b);
+ e = bowl_append(root, b);
if(e) return e;
- e = bowl_insert(root, c);
+ e = bowl_append(root, c);
if(e) return e;
e = bowl_free(root);
return e;
-} \ No newline at end of file
+}
diff --git a/hash.c b/hash.c
new file mode 100644
index 0000000..689bf18
--- /dev/null
+++ b/hash.c
@@ -0,0 +1,73 @@
+#include "bowl.h"
+#include "array.h"
+#include "hash.h"
+#include "utils.h"
+
+#include <malloc.h>
+
+typedef struct {
+ char *key;
+ struct bowl *data;
+} hash_item;
+
+err_t hash_insert_key(struct bowl *self, char *key, struct bowl *child)
+{
+ CHECK(self, INVALID_STATE)
+ CHECK(child, INVALID_STATE)
+ CHECK(key, INVALID_STATE)
+
+ // Even though we're a HASH node, we use an array under the hood
+ struct bowl_arr *arr = self->_pl.array;
+ CHECK(arr, INVALID_STATE)
+
+ size_t idx;
+ err_t e = _hash(key, arr->size, &idx);
+ if(e) return e;
+
+ // If the index is used, rescale and try again
+ if(arr->ptr[idx] != NULL) {
+ struct bowl *prev = self;
+
+ // Allocate new array
+ e = array_malloc(self, arr->size * 2);
+ if(e) return e;
+
+ for(int i = 0; i < arr->size; i++) {
+ if(arr->ptr[i] != NULL) {
+ // FIXME: This is horrible
+ hash_item *item = (hash_item*) arr->ptr[i];
+ e = hash_insert_key(self, item->key, item->data);
+ if(e) return e;
+ }
+ }
+
+ e = array_free(prev);
+ if(e) return e;
+
+ e = hash_insert_key(self, key, child);
+ if(e) return e;
+
+ } else {
+ hash_item *item = malloc(sizeof(hash_item));
+ CHECK(item, MALLOC_FAILED)
+
+ item->key = calloc(sizeof(char), REAL_STRLEN(key));
+ CHECK(item->key, MALLOC_FAILED)
+ strcpy(item->key, key);
+
+ item->data = child;
+
+ arr->ptr[idx] = (struct bowl*) item;
+ }
+
+ return OK;
+}
+
+err_t hash_remove_key(struct bowl *self, char *key, struct bowl **child)
+{
+ CHECK(self, INVALID_STATE)
+ CHECK(key, INVALID_STATE)
+
+ return OK;
+}
+
diff --git a/hash.h b/hash.h
new file mode 100644
index 0000000..a5d763d
--- /dev/null
+++ b/hash.h
@@ -0,0 +1,11 @@
+#ifndef HASH_H_
+#define HASH_H_
+
+#include "bowl.h"
+
+err_t hash_insert_key(struct bowl *, char *key, struct bowl *);
+
+err_t hash_remove_key(struct bowl *, char *key, struct bowl **);
+
+#endif // HASH_H_
+
diff --git a/utils.c b/utils.c
index 9e5a027..f08c670 100644
--- a/utils.c
+++ b/utils.c
@@ -1,4 +1,5 @@
#include <stdlib.h>
+#include <stdint.h>
#include <stdio.h>
#include "utils.h"
@@ -44,3 +45,72 @@ err_t _array_remove(void **ptr, size_t idx, size_t len, void **out)
return OK;
}
+
+err_t _hash(char *str, size_t len, size_t *out)
+{
+ CHECK(str, INVALID_PARAMS)
+ CHECK((len < 0), INVALID_PARAMS)
+
+ // Implements the "murmur" non-cryptographic hash
+
+#define C1 0xcc9e2d51
+#define C2 0x1b873593
+#define N 0xe6546b64
+#define M 5
+#define C3 0x85ebca6b
+#define C4 0xc2b2ae35
+#define R1 15
+#define R2 13
+#define R3 16
+#define R4 13
+#define ROTL32(v, shift) ( ((v) << (shift)) | ( (v) >> (32 - (shift))) )
+
+ size_t key_len = strlen(str);
+ int l = (int) key_len / 4;
+ uint32_t seed = 0xAF172FE;
+ int i = 0;
+
+ uint32_t k = 0;
+ uint32_t h = seed;
+ uint8_t *d = (uint8_t *) str;
+ const uint32_t *chunks = (const uint32_t *)(d + l * 4);
+ const uint8_t *tail = (const uint8_t *)(d + l * 4);
+
+ for (i = -l; i != 0; ++i) {
+ k = chunks[i];
+ k *= C1;
+ k = ROTL32(k, R1);
+ k *= C2;
+ h ^= k;
+ h = ROTL32(k, R2);
+ h = h * M + N;
+ }
+
+ k = 0;
+ switch(key_len & 3) {
+ case 3:
+ k ^= (tail[2] << 16);
+ case 2:
+ k ^= (tail[1] << 8);
+ case 1:
+ k ^= tail[0];
+ k *= C1;
+ k = ROTL32(k, R1);
+ k *= C2;
+ h ^= k;
+ default: break;
+ }
+
+ /* Finalised hash */
+ h ^= key_len;
+ h ^= (h >> R3);
+ h *= C3;
+ h ^= (h >> R4);
+ h *= C4;
+ h ^= (h >> R3);
+
+ /* Finalise ✨ */
+ h %= len;
+ *out = (size_t) h;
+ return OK;
+}
diff --git a/utils.h b/utils.h
index c7c2178..4fa7a90 100644
--- a/utils.h
+++ b/utils.h
@@ -13,7 +13,9 @@ err_t _array_search(void **, size_t, size_t *out, void *in);
err_t _array_remove(void **, size_t idx, size_t len, void **out);
+err_t _hash(char *str, size_t len, size_t *out);
+
#ifdef __cplusplus
}
#endif
-#endif // _UTIL_H_ \ No newline at end of file
+#endif // _UTIL_H_