From e8e503ed6ef043f0ca9d540290adc5ca61b8551e Mon Sep 17 00:00:00 2001 From: Katharina Fey Date: Sun, 11 Dec 2016 16:39:43 +0100 Subject: Impementing a functional json parser on top of the tokeniser --- .gitignore | 1 + lib/dtree_utils.c | 264 +++++++++++++++++++++++++++--------------- test/main.c | 341 ++++++++++++++++++++++++++++++------------------------ 3 files changed, 361 insertions(+), 245 deletions(-) diff --git a/.gitignore b/.gitignore index 7394053..1212a63 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ build/ .idea/ +cmake-build-debug diff --git a/lib/dtree_utils.c b/lib/dtree_utils.c index 082ece3..3d1c394 100644 --- a/lib/dtree_utils.c +++ b/lib/dtree_utils.c @@ -1,129 +1,207 @@ #include #include +#include #include -void pk_string_trim(char *src, char *dst); +#include "jsmn.h" -dt_err dtree_decode_json(dtree *(*data), const char *json_data) -{ - enum parse_state { - VALUE, HASH, LIST, WAITING - }; - -#define BUF_SIZE 256 - - /* Save some space for a token */ - const char *delims = ",:"; - char *parse; - char curr_key[BUF_SIZE]; - char curr_val[BUF_SIZE]; - dtree *root, *curr_root; - enum parse_state state = WAITING; +#define DTREE_TOK_BOOLEAN (1 << 1) +#define DTREE_TOK_NUMERICAL (1 << 2) +#define DTREE_TOK_LITERAL (1 << 3) - /* Prepare environment for parsing */ - char json_buf[REAL_STRLEN(json_data)]; - strcpy(json_buf, json_data); - /* Setup root dtree node */ - dtree_malloc(&root); - curr_root = root; +char *tok_to_str(jsmntype_t *tok) +{ + switch(*tok) { + case JSMN_UNDEFINED: return "UNDEFINED"; + case JSMN_OBJECT: return "OBJECT"; + case JSMN_ARRAY: return "ARRAY"; + case JSMN_STRING: return "STRING"; + case JSMN_PRIMITIVE: return "PRIMITIVE"; + default: return "UNKNOWN"; + } +} - /* Read in the first token */ - parse = strtok(json_buf, delims); - while(parse != NULL) { +int digest_payload(const char *token) +{ + char* end; + size_t len = strlen(token); - char tok[strlen(parse) + 1]; - memset(tok, 0, strlen(parse) + 1); + if(len == strlen("true") || len == strlen("false")) + if(strcmp(token, "true") == 0 || strcmp(token, "false") == 0) + return DTREE_TOK_BOOLEAN; - pk_string_trim(parse, tok); + /* It could still be a number! */ + strtol(token, &end, 10); + if (!*end) return DTREE_TOK_NUMERICAL; - /* Open a new hash context */ - if(tok[0] == '{') { + return DTREE_TOK_LITERAL; +} - dtree *new_root; - dtree_addlist(curr_root, &new_root); - curr_root = new_root; +dt_err dtree_decode_json(dtree *(*data), const char *json_data, size_t len) +{ + jsmn_parser parse; + jsmn_init(&parse); - printf("Creating new hash context...\n"); - state = HASH; - } + // FIXME: Variable amount of tokens? + jsmntok_t *tokens = malloc(sizeof(jsmntok_t) * len); + memset(tokens, 0, sizeof(jsmntok_t) * len); - /* Open a new list context - finishing a PAIR - Back to waiting */ - if(tok[0] == '[') { - printf("Creating new hash context...\n"); - state = LIST; - } + int ret = jsmn_parse(&parse, json_data, strlen(json_data), tokens, sizeof(tokens) / sizeof(tokens[0])); - /* If we're in a hash & waiting for a key */ - if(state == HASH) { - if(tok[0] == '{') strcpy(curr_key, tok + 1); - else strcpy(curr_key, tok); + jsmntok_t tok; + unsigned int idx = 0; - printf("Current Key: %s\n", curr_key); - state = VALUE; - goto END; - } + /** Prepare dtree nodes */ + dtree *root, *curr; + dtree_malloc(&root); + curr = root; - /* We already had a key - finishing up the pair */ - if(state == VALUE) { - strcpy(curr_val, tok); - printf("Current Val: %s\n", curr_val); + struct bounds { + int low, high; + }; - /* Copy pair into dtree structure */ - dtree *parent, *key, *val; - dtree_addlist(curr_root, &parent); + struct pair { + short state; +#define TOK_PAIR_KEYED 1 +#define TOK_PAIR_VALUED 2 + char key[1024]; + union value { + char string[1024]; + unsigned long num; + } value; + }; - /* Make the "parent" node into the pair parent */ - dtree_addpair(parent, &key, &val); - dtree_addliteral(key, curr_key); - dtree_addliteral(val, curr_val); + /* Save some space to store token bounds */ + struct bounds *bounds = malloc(sizeof(struct bounds) * len); + memset(bounds, 0, sizeof(struct bounds) * len); + int focused = -1; - /* Add the parent */ + struct pair c_pair; + memset(&c_pair, 0, sizeof(struct pair)); - /* Blank current pair data */ - memset(curr_key, 0, BUF_SIZE); - memset(curr_val, 0, BUF_SIZE); + while(tok = tokens[idx++], tok.type != NULL) { - state = HASH; - goto END; - } + size_t tok_len = (size_t) tok.end - tok.start; + char token[tok_len]; + memset(token, 0, tok_len); + memcpy(token, json_data + tok.start, tok_len); - if(state == LIST) { - dtree *child; - dtree_addlist(curr_root, &child); - dtree_addliteral(child, tok); + /** Check if we need to move the boundry scope (again) */ + if(focused > 0 && tok.end >= bounds[focused].high) { + focused--; - size_t chs = strlen(tok); - dtree *parent; + /* Because of how our root node is a VALUE node, we need the parents parent */ + dtree *parent, *pair_parent; + dtree_parent(root, curr, &parent); + dtree_parent(root, parent, &pair_parent); - dtree_parent(root, curr_root, &parent); - if(tok[chs] == ']') { - curr_root = parent; - state = HASH; - } + /* Assign the new root node - old scope restored */ + curr = pair_parent; } - printf(" Recognised token: %s\n", tok); - END: - parse = strtok(NULL, delims); - } + switch(tok.type) { + + /** + * When we encounter a new json object, shift our "focus" over by one so we can + * record in what range this object is going to accumilate tokens. + * + * We then create a new child node under the current root node and switch the + * curr root pointer over to that child. + * + * When we reach the end of the token scope, we need to re-reference the parent as + * current root and switch over our boundry scope as well. This is done before the + * parsing switch statement. + */ + case JSMN_OBJECT: + { + focused++; + bounds[focused].low = tok.start; + bounds[focused].high = tok.end; + + /** + * Most of the time, we will create a new object under the key of + * a pair. This is the case, when the c_pair state buffer has been + * set to KEYED. In this case we allocate a new pair node for key + * and value and set that value to the new root. + */ + if(c_pair.state == TOK_PAIR_KEYED) { + + /* Create pair nodes & new_root which becomes curr */ + dtree *pair, *key, *val; + dtree_addlist(curr, &pair); + dtree_addpair(pair, &key, &val); + + /* Assign key and new_root as a value of the pair */ + dtree_addliteral(key, c_pair.key); + + /* Move curr root pointer */ + curr = val; + + /* Blank c_pair data for next tokens */ + memset(&c_pair, 0, sizeof(struct pair)); + + } + + /* Skip to next token */ + continue; + } - return SUCCESS; -} + case JSMN_STRING: + { + /** + * Here we need to check if we are adding a string as a key + * or as a value. This is simply done by checking for the existance + * of a key in the c_pair (current pair) variable. + * + * We know the token positions so we can manualy copy from the json stream + */ + if(c_pair.state == 0) { + memcpy(c_pair.key, json_data + tok.start, (size_t) tok.end - tok.start); + c_pair.state = TOK_PAIR_KEYED; + + } else if(c_pair.state == TOK_PAIR_KEYED){ + + /** Create a PAIR node under current root */ + dtree *pair, *key, *val; + dtree_addlist(curr, &pair); + dtree_addpair(pair, &key, &val); + + /* Key is always literal */ + dtree_addliteral(key, c_pair.key); + + /* Parse payload and asign to value node */ + switch(digest_payload(token)) { + case DTREE_TOK_LITERAL: + dtree_addliteral(val, token); + break; + + case DTREE_TOK_NUMERICAL: + dtree_addnumeral(val, atol(token)); + break; + + default: continue; + } + + /* Blank c_pair data for next tokens */ + memset(&c_pair, 0, sizeof(struct pair)); + } + + /* Skip to next token */ + continue; + } -/************************************************************/ -void pk_string_trim(char *src, char *dst) -{ - int s, d=0; - for (s=0; src[s] != 0; s++) - if (src[s] != ' ') { - dst[d] = src[s]; - d++; + default: + continue; } - dst[d] = 0; + } + + /* Switch over data pointer and return */ + (*data) = root; + return SUCCESS; } \ No newline at end of file diff --git a/test/main.c b/test/main.c index 02e871b..63ff521 100644 --- a/test/main.c +++ b/test/main.c @@ -3,6 +3,8 @@ #include #include +#include +#include /** * A small test that creates a tree, splits the nodes @@ -23,174 +25,209 @@ dt_err test_shortcut_functions(); printf(" %s\n", (err == 0) ? "OK!" : "FAILED!"); \ if(err) goto end; -int main(void) +int main(int argn, char **argv) { dt_err err = SUCCESS; printf("=== libdyntree test suite ===\n"); - TEST(split_and_merge()) +// TEST(split_and_merge()) +// +// TEST(search_for_payload()) +// +// char json[1024]; +// TEST(json_encode(json)) +// +// dtree *recover; +// dtree_decode_json(&recover, json); +// dtree_free(recover); - TEST(search_for_payload()) + struct timeval t1, t2; + double elapsedTime; - char json[1024]; - TEST(json_encode(json)) + // start timer + gettimeofday(&t1, NULL); - dtree *recover; - dtree_decode_json(&recover, json); - dtree_free(recover); +#define PATH "/home/spacekookie/Downloads/generated.json" - end: - printf("==== done ====\n"); - return err; -} - - -/*************** TEST IMPLEMENTATIONS ****************/ + /* Open the file and seek through it for length */ + FILE *f = fopen(PATH, "r"); + fseek(f, 0, SEEK_END); + size_t file_size = (size_t) ftell(f); + fseek(f, 0, SEEK_SET); -dt_err split_and_merge() -{ - dt_err err; - - /* Allocate a node named root */ - dtree *root; - err = dtree_malloc(&root); - if(err) goto exit; - - /* Add child as a recursive node to root */ - dtree *child; - err = dtree_addlist(root, &child); - if(err) goto exit; - - /* Make child a literal node containing the works of shakespeare */ - const char *hamlet = "To be, or not to be: that is the question:\n" - "Whether 'tis nobler in the mind to suffer\n" - "The slings and arrows of outrageous fortune,\n" - "Or to take arms against a sea of troubles,\n" - "And by opposing end them? To die: to sleep;\n" - "No more; and by a sleep to say we end\n" - "The heart-ache and the thousand natural shocks\n" - "That flesh is heir to, 'tis a consummation\n" - "Devoutly to be wish'd. To die, to sleep;"; - - err = dtree_addliteral(child, hamlet); - if(err) goto exit; - - /* Split our tree into two single-nodes */ - err = dtree_split_trees(root, child); - if(err) goto exit; - - /* Re-merge because they miss each other */ - err = dtree_merge_trees(root, child); - if(err) goto exit; - - /* Cleanup */ - exit: - dtree_free(root); - return err; -} + /* Create a buffer of the correct size */ + char *json = (char*) malloc(sizeof(char) * (file_size + 1)); + memset(json, 0, file_size + 1); + fread(json, file_size, 1, f); + fclose(f); -dt_err search_for_payload() -{ - dt_err err; + dtree *recov; + dtree_decode_json(&recov, json, file_size); - dtree *root, *a, *b, *found; - err = dtree_malloc(&root); - if(err) goto exit; + dtree_print(recov); + dtree_free(recov); - const char *string = "This is some data!"; - err = dtree_addlist(root, &a); - if(err) goto exit; + gettimeofday(&t2, NULL); - err = dtree_addliteral(a, string); - if(err) goto exit; + // compute and print the elapsed time in millisec + elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms + elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms - err = dtree_addlist(root, &b); - if(err) goto exit; + printf("Program took %fms to run\n", elapsedTime); - err = dtree_addnumeral(b, 1337); - if(err) goto exit; - /* Try to find our data again */ - - err = dtree_search_payload(root, &found, (void*) string, LITERAL); - if(err) goto exit; - - err = dtree_search_payload(root, &found, (void*) 1337, NUMERIC); - if(err) goto exit; - - exit: - dtree_free(root); +end: + printf("==== done ====\n"); return err; } -dt_err json_encode(char *json) { - dt_err err; - - dtree *root, *a, *b, *c, *found; - err = dtree_malloc(&root); - if (err) goto exit; - - dtree *key, *val; - err = dtree_addlist(root, &a); - if (err) goto exit; - err = dtree_addlist(root, &b); - if (err) goto exit; - err = dtree_addlist(root, &c); - if (err) goto exit; - - err = dtree_addpair(a, &key, &val); - if (err) goto exit; - err = dtree_addliteral(key, "Server Address"); - if (err) goto exit; - err = dtree_addliteral(val, "https://github.com"); - if (err) goto exit; - - key = val = NULL; - - err = dtree_addpair(b, &key, &val); - if (err) goto exit; - err = dtree_addliteral(key, "Server Port"); - if (err) goto exit; - err = dtree_addnumeral(val, 8080); - if (err) goto exit; - - key = val = NULL; - - err = dtree_addpair(c, &key, &val); - if (err) goto exit; - err = dtree_addliteral(key, "Users"); - if (err) goto exit; - - dtree *sbrec, *sbrec2; - err = dtree_addlist(val, &sbrec); - if (err) goto exit; - err = dtree_addlist(val, &sbrec2); - if (err) goto exit; - - dtree *subkey, *subval; - err = dtree_addpair(sbrec, &subkey, &subval); - if (err) goto exit; - err = dtree_addliteral(subkey, "spacekookie"); - if (err) goto exit; - err = dtree_addliteral(subval, "Admin"); - if (err) goto exit; - - key = val = NULL; - - dtree *subkey2, *subval2; - err = dtree_addpair(sbrec2, &subkey2, &subval2); - if (err) goto exit; - err = dtree_addliteral(subkey2, "jane"); - if (err) goto exit; - err = dtree_addliteral(subval2, "normal"); - if (err) goto exit; - - err = dtree_encode_set(root, DYNTREE_JSON_MINIFIED); - if (err) goto exit; - err = dtree_encode_json(root, json); - if (err) goto exit; - - exit: - dtree_free(root); - return err; -} + +/*************** TEST IMPLEMENTATIONS ****************/ + +//dt_err split_and_merge() +//{ +// dt_err err; +// +// /* Allocate a node named root */ +// dtree *root; +// err = dtree_malloc(&root); +// if(err) goto exit; +// +// /* Add child as a recursive node to root */ +// dtree *child; +// err = dtree_addlist(root, &child); +// if(err) goto exit; +// +// /* Make child a literal node containing the works of shakespeare */ +// const char *hamlet = "To be, or not to be: that is the question:\n" +// "Whether 'tis nobler in the mind to suffer\n" +// "The slings and arrows of outrageous fortune,\n" +// "Or to take arms against a sea of troubles,\n" +// "And by opposing end them? To die: to sleep;\n" +// "No more; and by a sleep to say we end\n" +// "The heart-ache and the thousand natural shocks\n" +// "That flesh is heir to, 'tis a consummation\n" +// "Devoutly to be wish'd. To die, to sleep;"; +// +// err = dtree_addliteral(child, hamlet); +// if(err) goto exit; +// +// /* Split our tree into two single-nodes */ +// err = dtree_split_trees(root, child); +// if(err) goto exit; +// +// /* Re-merge because they miss each other */ +// err = dtree_merge_trees(root, child); +// if(err) goto exit; +// +// /* Cleanup */ +// exit: +// dtree_free(root); +// return err; +//} +// +//dt_err search_for_payload() +//{ +// dt_err err; +// +// dtree *root, *a, *b, *found; +// err = dtree_malloc(&root); +// if(err) goto exit; +// +// const char *string = "This is some data!"; +// err = dtree_addlist(root, &a); +// if(err) goto exit; +// +// err = dtree_addliteral(a, string); +// if(err) goto exit; +// +// err = dtree_addlist(root, &b); +// if(err) goto exit; +// +// err = dtree_addnumeral(b, 1337); +// if(err) goto exit; +// +// /* Try to find our data again */ +// +// err = dtree_search_payload(root, &found, (void*) string, LITERAL); +// if(err) goto exit; +// +// err = dtree_search_payload(root, &found, (void*) 1337, NUMERIC); +// if(err) goto exit; +// +// exit: +// dtree_free(root); +// return err; +//} +// +//dt_err json_encode(char *json) { +// dt_err err; +// +// dtree *root, *a, *b, *c, *found; +// err = dtree_malloc(&root); +// if (err) goto exit; +// +// dtree *key, *val; +// err = dtree_addlist(root, &a); +// if (err) goto exit; +// err = dtree_addlist(root, &b); +// if (err) goto exit; +// err = dtree_addlist(root, &c); +// if (err) goto exit; +// +// err = dtree_addpair(a, &key, &val); +// if (err) goto exit; +// err = dtree_addliteral(key, "Server Address"); +// if (err) goto exit; +// err = dtree_addliteral(val, "https://github.com"); +// if (err) goto exit; +// +// key = val = NULL; +// +// err = dtree_addpair(b, &key, &val); +// if (err) goto exit; +// err = dtree_addliteral(key, "Server Port"); +// if (err) goto exit; +// err = dtree_addnumeral(val, 8080); +// if (err) goto exit; +// +// key = val = NULL; +// +// err = dtree_addpair(c, &key, &val); +// if (err) goto exit; +// err = dtree_addliteral(key, "Users"); +// if (err) goto exit; +// +// dtree *sbrec, *sbrec2; +// err = dtree_addlist(val, &sbrec); +// if (err) goto exit; +// err = dtree_addlist(val, &sbrec2); +// if (err) goto exit; +// +// dtree *subkey, *subval; +// err = dtree_addpair(sbrec, &subkey, &subval); +// if (err) goto exit; +// err = dtree_addliteral(subkey, "spacekookie"); +// if (err) goto exit; +// err = dtree_addliteral(subval, "Admin"); +// if (err) goto exit; +// +// key = val = NULL; +// +// dtree *subkey2, *subval2; +// err = dtree_addpair(sbrec2, &subkey2, &subval2); +// if (err) goto exit; +// err = dtree_addliteral(subkey2, "jane"); +// if (err) goto exit; +// err = dtree_addliteral(subval2, "normal"); +// if (err) goto exit; +// +// err = dtree_encode_set(root, DYNTREE_JSON_MINIFIED); +// if (err) goto exit; +// err = dtree_encode_json(root, json); +// if (err) goto exit; +// +// exit: +// dtree_free(root); +// return err; +//} -- cgit v1.2.3