aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKatharina Fey <kookie@spacekookie.de>2016-12-11 16:39:43 +0100
committerKatharina Fey <kookie@spacekookie.de>2019-06-04 20:21:53 +0200
commite8e503ed6ef043f0ca9d540290adc5ca61b8551e (patch)
treee4dab8d6e5a3cd74cba66a307c467694d0b01200
parent877f10e3f6618fd761b71bbb5e839b4a4f98760f (diff)
Impementing a functional json parser on top of the tokeniser
-rw-r--r--.gitignore1
-rw-r--r--lib/dtree_utils.c264
-rw-r--r--test/main.c341
3 files changed, 361 insertions, 245 deletions
diff --git a/.gitignore b/.gitignore
index 7394053..1212a63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,4 @@
build/
.idea/
+cmake-build-debug
diff --git a/lib/dtree_utils.c b/lib/dtree_utils.c
index 082ece3..3d1c394 100644
--- a/lib/dtree_utils.c
+++ b/lib/dtree_utils.c
@@ -1,129 +1,207 @@
#include <dtree/dtree.h>
#include <stdio.h>
+#include <malloc.h>
#include <stdlib.h>
-void pk_string_trim(char *src, char *dst);
+#include "jsmn.h"
-dt_err dtree_decode_json(dtree *(*data), const char *json_data)
-{
- enum parse_state {
- VALUE, HASH, LIST, WAITING
- };
-
-#define BUF_SIZE 256
-
- /* Save some space for a token */
- const char *delims = ",:";
- char *parse;
- char curr_key[BUF_SIZE];
- char curr_val[BUF_SIZE];
- dtree *root, *curr_root;
- enum parse_state state = WAITING;
+#define DTREE_TOK_BOOLEAN (1 << 1)
+#define DTREE_TOK_NUMERICAL (1 << 2)
+#define DTREE_TOK_LITERAL (1 << 3)
- /* Prepare environment for parsing */
- char json_buf[REAL_STRLEN(json_data)];
- strcpy(json_buf, json_data);
- /* Setup root dtree node */
- dtree_malloc(&root);
- curr_root = root;
+char *tok_to_str(jsmntype_t *tok)
+{
+ switch(*tok) {
+ case JSMN_UNDEFINED: return "UNDEFINED";
+ case JSMN_OBJECT: return "OBJECT";
+ case JSMN_ARRAY: return "ARRAY";
+ case JSMN_STRING: return "STRING";
+ case JSMN_PRIMITIVE: return "PRIMITIVE";
+ default: return "UNKNOWN";
+ }
+}
- /* Read in the first token */
- parse = strtok(json_buf, delims);
- while(parse != NULL) {
+int digest_payload(const char *token)
+{
+ char* end;
+ size_t len = strlen(token);
- char tok[strlen(parse) + 1];
- memset(tok, 0, strlen(parse) + 1);
+ if(len == strlen("true") || len == strlen("false"))
+ if(strcmp(token, "true") == 0 || strcmp(token, "false") == 0)
+ return DTREE_TOK_BOOLEAN;
- pk_string_trim(parse, tok);
+ /* It could still be a number! */
+ strtol(token, &end, 10);
+ if (!*end) return DTREE_TOK_NUMERICAL;
- /* Open a new hash context */
- if(tok[0] == '{') {
+ return DTREE_TOK_LITERAL;
+}
- dtree *new_root;
- dtree_addlist(curr_root, &new_root);
- curr_root = new_root;
+dt_err dtree_decode_json(dtree *(*data), const char *json_data, size_t len)
+{
+ jsmn_parser parse;
+ jsmn_init(&parse);
- printf("Creating new hash context...\n");
- state = HASH;
- }
+ // FIXME: Variable amount of tokens?
+ jsmntok_t *tokens = malloc(sizeof(jsmntok_t) * len);
+ memset(tokens, 0, sizeof(jsmntok_t) * len);
- /* Open a new list context - finishing a PAIR - Back to waiting */
- if(tok[0] == '[') {
- printf("Creating new hash context...\n");
- state = LIST;
- }
+ int ret = jsmn_parse(&parse, json_data, strlen(json_data), tokens, sizeof(tokens) / sizeof(tokens[0]));
- /* If we're in a hash & waiting for a key */
- if(state == HASH) {
- if(tok[0] == '{') strcpy(curr_key, tok + 1);
- else strcpy(curr_key, tok);
+ jsmntok_t tok;
+ unsigned int idx = 0;
- printf("Current Key: %s\n", curr_key);
- state = VALUE;
- goto END;
- }
+ /** Prepare dtree nodes */
+ dtree *root, *curr;
+ dtree_malloc(&root);
+ curr = root;
- /* We already had a key - finishing up the pair */
- if(state == VALUE) {
- strcpy(curr_val, tok);
- printf("Current Val: %s\n", curr_val);
+ struct bounds {
+ int low, high;
+ };
- /* Copy pair into dtree structure */
- dtree *parent, *key, *val;
- dtree_addlist(curr_root, &parent);
+ struct pair {
+ short state;
+#define TOK_PAIR_KEYED 1
+#define TOK_PAIR_VALUED 2
+ char key[1024];
+ union value {
+ char string[1024];
+ unsigned long num;
+ } value;
+ };
- /* Make the "parent" node into the pair parent */
- dtree_addpair(parent, &key, &val);
- dtree_addliteral(key, curr_key);
- dtree_addliteral(val, curr_val);
+ /* Save some space to store token bounds */
+ struct bounds *bounds = malloc(sizeof(struct bounds) * len);
+ memset(bounds, 0, sizeof(struct bounds) * len);
+ int focused = -1;
- /* Add the parent */
+ struct pair c_pair;
+ memset(&c_pair, 0, sizeof(struct pair));
- /* Blank current pair data */
- memset(curr_key, 0, BUF_SIZE);
- memset(curr_val, 0, BUF_SIZE);
+ while(tok = tokens[idx++], tok.type != NULL) {
- state = HASH;
- goto END;
- }
+ size_t tok_len = (size_t) tok.end - tok.start;
+ char token[tok_len];
+ memset(token, 0, tok_len);
+ memcpy(token, json_data + tok.start, tok_len);
- if(state == LIST) {
- dtree *child;
- dtree_addlist(curr_root, &child);
- dtree_addliteral(child, tok);
+ /** Check if we need to move the boundry scope (again) */
+ if(focused > 0 && tok.end >= bounds[focused].high) {
+ focused--;
- size_t chs = strlen(tok);
- dtree *parent;
+ /* Because of how our root node is a VALUE node, we need the parents parent */
+ dtree *parent, *pair_parent;
+ dtree_parent(root, curr, &parent);
+ dtree_parent(root, parent, &pair_parent);
- dtree_parent(root, curr_root, &parent);
- if(tok[chs] == ']') {
- curr_root = parent;
- state = HASH;
- }
+ /* Assign the new root node - old scope restored */
+ curr = pair_parent;
}
- printf(" Recognised token: %s\n", tok);
- END:
- parse = strtok(NULL, delims);
- }
+ switch(tok.type) {
+
+ /**
+ * When we encounter a new json object, shift our "focus" over by one so we can
+ * record in what range this object is going to accumilate tokens.
+ *
+ * We then create a new child node under the current root node and switch the
+ * curr root pointer over to that child.
+ *
+ * When we reach the end of the token scope, we need to re-reference the parent as
+ * current root and switch over our boundry scope as well. This is done before the
+ * parsing switch statement.
+ */
+ case JSMN_OBJECT:
+ {
+ focused++;
+ bounds[focused].low = tok.start;
+ bounds[focused].high = tok.end;
+
+ /**
+ * Most of the time, we will create a new object under the key of
+ * a pair. This is the case, when the c_pair state buffer has been
+ * set to KEYED. In this case we allocate a new pair node for key
+ * and value and set that value to the new root.
+ */
+ if(c_pair.state == TOK_PAIR_KEYED) {
+
+ /* Create pair nodes & new_root which becomes curr */
+ dtree *pair, *key, *val;
+ dtree_addlist(curr, &pair);
+ dtree_addpair(pair, &key, &val);
+
+ /* Assign key and new_root as a value of the pair */
+ dtree_addliteral(key, c_pair.key);
+
+ /* Move curr root pointer */
+ curr = val;
+
+ /* Blank c_pair data for next tokens */
+ memset(&c_pair, 0, sizeof(struct pair));
+
+ }
+
+ /* Skip to next token */
+ continue;
+ }
- return SUCCESS;
-}
+ case JSMN_STRING:
+ {
+ /**
+ * Here we need to check if we are adding a string as a key
+ * or as a value. This is simply done by checking for the existance
+ * of a key in the c_pair (current pair) variable.
+ *
+ * We know the token positions so we can manualy copy from the json stream
+ */
+ if(c_pair.state == 0) {
+ memcpy(c_pair.key, json_data + tok.start, (size_t) tok.end - tok.start);
+ c_pair.state = TOK_PAIR_KEYED;
+
+ } else if(c_pair.state == TOK_PAIR_KEYED){
+
+ /** Create a PAIR node under current root */
+ dtree *pair, *key, *val;
+ dtree_addlist(curr, &pair);
+ dtree_addpair(pair, &key, &val);
+
+ /* Key is always literal */
+ dtree_addliteral(key, c_pair.key);
+
+ /* Parse payload and asign to value node */
+ switch(digest_payload(token)) {
+ case DTREE_TOK_LITERAL:
+ dtree_addliteral(val, token);
+ break;
+
+ case DTREE_TOK_NUMERICAL:
+ dtree_addnumeral(val, atol(token));
+ break;
+
+ default: continue;
+ }
+
+ /* Blank c_pair data for next tokens */
+ memset(&c_pair, 0, sizeof(struct pair));
+ }
+
+ /* Skip to next token */
+ continue;
+ }
-/************************************************************/
-void pk_string_trim(char *src, char *dst)
-{
- int s, d=0;
- for (s=0; src[s] != 0; s++)
- if (src[s] != ' ') {
- dst[d] = src[s];
- d++;
+ default:
+ continue;
}
- dst[d] = 0;
+ }
+
+ /* Switch over data pointer and return */
+ (*data) = root;
+ return SUCCESS;
} \ No newline at end of file
diff --git a/test/main.c b/test/main.c
index 02e871b..63ff521 100644
--- a/test/main.c
+++ b/test/main.c
@@ -3,6 +3,8 @@
#include <dtree/dtree.h>
#include <dtree/eztree.h>
+#include <time.h>
+#include <sys/time.h>
/**
* A small test that creates a tree, splits the nodes
@@ -23,174 +25,209 @@ dt_err test_shortcut_functions();
printf(" %s\n", (err == 0) ? "OK!" : "FAILED!"); \
if(err) goto end;
-int main(void)
+int main(int argn, char **argv)
{
dt_err err = SUCCESS;
printf("=== libdyntree test suite ===\n");
- TEST(split_and_merge())
+// TEST(split_and_merge())
+//
+// TEST(search_for_payload())
+//
+// char json[1024];
+// TEST(json_encode(json))
+//
+// dtree *recover;
+// dtree_decode_json(&recover, json);
+// dtree_free(recover);
- TEST(search_for_payload())
+ struct timeval t1, t2;
+ double elapsedTime;
- char json[1024];
- TEST(json_encode(json))
+ // start timer
+ gettimeofday(&t1, NULL);
- dtree *recover;
- dtree_decode_json(&recover, json);
- dtree_free(recover);
+#define PATH "/home/spacekookie/Downloads/generated.json"
- end:
- printf("==== done ====\n");
- return err;
-}
-
-
-/*************** TEST IMPLEMENTATIONS ****************/
+ /* Open the file and seek through it for length */
+ FILE *f = fopen(PATH, "r");
+ fseek(f, 0, SEEK_END);
+ size_t file_size = (size_t) ftell(f);
+ fseek(f, 0, SEEK_SET);
-dt_err split_and_merge()
-{
- dt_err err;
-
- /* Allocate a node named root */
- dtree *root;
- err = dtree_malloc(&root);
- if(err) goto exit;
-
- /* Add child as a recursive node to root */
- dtree *child;
- err = dtree_addlist(root, &child);
- if(err) goto exit;
-
- /* Make child a literal node containing the works of shakespeare */
- const char *hamlet = "To be, or not to be: that is the question:\n"
- "Whether 'tis nobler in the mind to suffer\n"
- "The slings and arrows of outrageous fortune,\n"
- "Or to take arms against a sea of troubles,\n"
- "And by opposing end them? To die: to sleep;\n"
- "No more; and by a sleep to say we end\n"
- "The heart-ache and the thousand natural shocks\n"
- "That flesh is heir to, 'tis a consummation\n"
- "Devoutly to be wish'd. To die, to sleep;";
-
- err = dtree_addliteral(child, hamlet);
- if(err) goto exit;
-
- /* Split our tree into two single-nodes */
- err = dtree_split_trees(root, child);
- if(err) goto exit;
-
- /* Re-merge because they miss each other */
- err = dtree_merge_trees(root, child);
- if(err) goto exit;
-
- /* Cleanup */
- exit:
- dtree_free(root);
- return err;
-}
+ /* Create a buffer of the correct size */
+ char *json = (char*) malloc(sizeof(char) * (file_size + 1));
+ memset(json, 0, file_size + 1);
+ fread(json, file_size, 1, f);
+ fclose(f);
-dt_err search_for_payload()
-{
- dt_err err;
+ dtree *recov;
+ dtree_decode_json(&recov, json, file_size);
- dtree *root, *a, *b, *found;
- err = dtree_malloc(&root);
- if(err) goto exit;
+ dtree_print(recov);
+ dtree_free(recov);
- const char *string = "This is some data!";
- err = dtree_addlist(root, &a);
- if(err) goto exit;
+ gettimeofday(&t2, NULL);
- err = dtree_addliteral(a, string);
- if(err) goto exit;
+ // compute and print the elapsed time in millisec
+ elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms
+ elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms
- err = dtree_addlist(root, &b);
- if(err) goto exit;
+ printf("Program took %fms to run\n", elapsedTime);
- err = dtree_addnumeral(b, 1337);
- if(err) goto exit;
- /* Try to find our data again */
-
- err = dtree_search_payload(root, &found, (void*) string, LITERAL);
- if(err) goto exit;
-
- err = dtree_search_payload(root, &found, (void*) 1337, NUMERIC);
- if(err) goto exit;
-
- exit:
- dtree_free(root);
+end:
+ printf("==== done ====\n");
return err;
}
-dt_err json_encode(char *json) {
- dt_err err;
-
- dtree *root, *a, *b, *c, *found;
- err = dtree_malloc(&root);
- if (err) goto exit;
-
- dtree *key, *val;
- err = dtree_addlist(root, &a);
- if (err) goto exit;
- err = dtree_addlist(root, &b);
- if (err) goto exit;
- err = dtree_addlist(root, &c);
- if (err) goto exit;
-
- err = dtree_addpair(a, &key, &val);
- if (err) goto exit;
- err = dtree_addliteral(key, "Server Address");
- if (err) goto exit;
- err = dtree_addliteral(val, "https://github.com");
- if (err) goto exit;
-
- key = val = NULL;
-
- err = dtree_addpair(b, &key, &val);
- if (err) goto exit;
- err = dtree_addliteral(key, "Server Port");
- if (err) goto exit;
- err = dtree_addnumeral(val, 8080);
- if (err) goto exit;
-
- key = val = NULL;
-
- err = dtree_addpair(c, &key, &val);
- if (err) goto exit;
- err = dtree_addliteral(key, "Users");
- if (err) goto exit;
-
- dtree *sbrec, *sbrec2;
- err = dtree_addlist(val, &sbrec);
- if (err) goto exit;
- err = dtree_addlist(val, &sbrec2);
- if (err) goto exit;
-
- dtree *subkey, *subval;
- err = dtree_addpair(sbrec, &subkey, &subval);
- if (err) goto exit;
- err = dtree_addliteral(subkey, "spacekookie");
- if (err) goto exit;
- err = dtree_addliteral(subval, "Admin");
- if (err) goto exit;
-
- key = val = NULL;
-
- dtree *subkey2, *subval2;
- err = dtree_addpair(sbrec2, &subkey2, &subval2);
- if (err) goto exit;
- err = dtree_addliteral(subkey2, "jane");
- if (err) goto exit;
- err = dtree_addliteral(subval2, "normal");
- if (err) goto exit;
-
- err = dtree_encode_set(root, DYNTREE_JSON_MINIFIED);
- if (err) goto exit;
- err = dtree_encode_json(root, json);
- if (err) goto exit;
-
- exit:
- dtree_free(root);
- return err;
-}
+
+/*************** TEST IMPLEMENTATIONS ****************/
+
+//dt_err split_and_merge()
+//{
+// dt_err err;
+//
+// /* Allocate a node named root */
+// dtree *root;
+// err = dtree_malloc(&root);
+// if(err) goto exit;
+//
+// /* Add child as a recursive node to root */
+// dtree *child;
+// err = dtree_addlist(root, &child);
+// if(err) goto exit;
+//
+// /* Make child a literal node containing the works of shakespeare */
+// const char *hamlet = "To be, or not to be: that is the question:\n"
+// "Whether 'tis nobler in the mind to suffer\n"
+// "The slings and arrows of outrageous fortune,\n"
+// "Or to take arms against a sea of troubles,\n"
+// "And by opposing end them? To die: to sleep;\n"
+// "No more; and by a sleep to say we end\n"
+// "The heart-ache and the thousand natural shocks\n"
+// "That flesh is heir to, 'tis a consummation\n"
+// "Devoutly to be wish'd. To die, to sleep;";
+//
+// err = dtree_addliteral(child, hamlet);
+// if(err) goto exit;
+//
+// /* Split our tree into two single-nodes */
+// err = dtree_split_trees(root, child);
+// if(err) goto exit;
+//
+// /* Re-merge because they miss each other */
+// err = dtree_merge_trees(root, child);
+// if(err) goto exit;
+//
+// /* Cleanup */
+// exit:
+// dtree_free(root);
+// return err;
+//}
+//
+//dt_err search_for_payload()
+//{
+// dt_err err;
+//
+// dtree *root, *a, *b, *found;
+// err = dtree_malloc(&root);
+// if(err) goto exit;
+//
+// const char *string = "This is some data!";
+// err = dtree_addlist(root, &a);
+// if(err) goto exit;
+//
+// err = dtree_addliteral(a, string);
+// if(err) goto exit;
+//
+// err = dtree_addlist(root, &b);
+// if(err) goto exit;
+//
+// err = dtree_addnumeral(b, 1337);
+// if(err) goto exit;
+//
+// /* Try to find our data again */
+//
+// err = dtree_search_payload(root, &found, (void*) string, LITERAL);
+// if(err) goto exit;
+//
+// err = dtree_search_payload(root, &found, (void*) 1337, NUMERIC);
+// if(err) goto exit;
+//
+// exit:
+// dtree_free(root);
+// return err;
+//}
+//
+//dt_err json_encode(char *json) {
+// dt_err err;
+//
+// dtree *root, *a, *b, *c, *found;
+// err = dtree_malloc(&root);
+// if (err) goto exit;
+//
+// dtree *key, *val;
+// err = dtree_addlist(root, &a);
+// if (err) goto exit;
+// err = dtree_addlist(root, &b);
+// if (err) goto exit;
+// err = dtree_addlist(root, &c);
+// if (err) goto exit;
+//
+// err = dtree_addpair(a, &key, &val);
+// if (err) goto exit;
+// err = dtree_addliteral(key, "Server Address");
+// if (err) goto exit;
+// err = dtree_addliteral(val, "https://github.com");
+// if (err) goto exit;
+//
+// key = val = NULL;
+//
+// err = dtree_addpair(b, &key, &val);
+// if (err) goto exit;
+// err = dtree_addliteral(key, "Server Port");
+// if (err) goto exit;
+// err = dtree_addnumeral(val, 8080);
+// if (err) goto exit;
+//
+// key = val = NULL;
+//
+// err = dtree_addpair(c, &key, &val);
+// if (err) goto exit;
+// err = dtree_addliteral(key, "Users");
+// if (err) goto exit;
+//
+// dtree *sbrec, *sbrec2;
+// err = dtree_addlist(val, &sbrec);
+// if (err) goto exit;
+// err = dtree_addlist(val, &sbrec2);
+// if (err) goto exit;
+//
+// dtree *subkey, *subval;
+// err = dtree_addpair(sbrec, &subkey, &subval);
+// if (err) goto exit;
+// err = dtree_addliteral(subkey, "spacekookie");
+// if (err) goto exit;
+// err = dtree_addliteral(subval, "Admin");
+// if (err) goto exit;
+//
+// key = val = NULL;
+//
+// dtree *subkey2, *subval2;
+// err = dtree_addpair(sbrec2, &subkey2, &subval2);
+// if (err) goto exit;
+// err = dtree_addliteral(subkey2, "jane");
+// if (err) goto exit;
+// err = dtree_addliteral(subval2, "normal");
+// if (err) goto exit;
+//
+// err = dtree_encode_set(root, DYNTREE_JSON_MINIFIED);
+// if (err) goto exit;
+// err = dtree_encode_json(root, json);
+// if (err) goto exit;
+//
+// exit:
+// dtree_free(root);
+// return err;
+//}