From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Subject: [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions Date: Sat, 31 Aug 2019 23:35:53 +0200 [thread overview] Message-ID: <c6712e02d94d15fb8aa415ff1953380de7335d9a.1567287197.git.v.shpilevoy@tarantool.org> (raw) In-Reply-To: <cover.1567287197.git.v.shpilevoy@tarantool.org> They are needed in incoming JSON updates, which are going to solve a task of comparison of two JSON paths, their simultaneous parsing, and digging into a tuple. json_token_cmp() existed before this patch, but it was trying to compare parent pointers too, which is not needed in the JSON updates, since they won't use JSON trees. Needed for #1261 --- src/lib/json/json.c | 37 +++++++++++++------------------------ src/lib/json/json.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1bfef172a..416c7dfda 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -55,7 +55,7 @@ static inline int json_read_symbol(struct json_lexer *lexer, UChar32 *out) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { *out = U_SENTINEL; return lexer->symbol_count + 1; } @@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token) int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { token->type = JSON_TOKEN_END; return 0; } @@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) switch(c) { case (UChar32)'[': /* Error for '[\0'. */ - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count; c = json_current_char(lexer); if (c == '"' || c == '\'') { @@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) * Expression, started from [ must be finished * with ] regardless of its type. */ - if (lexer->offset == lexer->src_len || + if (json_lexer_is_eof(lexer) || json_current_char(lexer) != ']') return lexer->symbol_count + 1; /* Skip ] - one byte char. */ json_skip_char(lexer); return 0; case (UChar32)'.': - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count + 1; return json_parse_identifier(lexer, token); default: @@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } /** - * Compare JSON token keys. + * Compare JSON tokens as nodes of a JSON tree. That is, including + * parent references. */ static int -json_token_cmp(const struct json_token *a, const struct json_token *b) +json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) return a->parent - b->parent; - if (a->type != b->type) - return a->type - b->type; - int ret = 0; - if (a->type == JSON_TOKEN_STR) { - if (a->len != b->len) - return a->len - b->len; - ret = memcmp(a->str, b->str, a->len); - } else if (a->type == JSON_TOKEN_NUM) { - ret = a->num - b->num; - } else { - assert(a->type == JSON_TOKEN_ANY); - } - return ret; + return json_token_cmp(a, b); } int @@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, json_lexer_create(&lexer_a, a, a_len, index_base); json_lexer_create(&lexer_b, b, b_len, index_base); struct json_token token_a, token_b; - /* For the sake of json_token_cmp(). */ + /* For the sake of json_token_cmp_in_tree(). */ token_a.parent = NULL; token_b.parent = NULL; int rc_a, rc_b; @@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 && token_a.type != JSON_TOKEN_END && token_b.type != JSON_TOKEN_END) { - int rc = json_token_cmp(&token_a, &token_b); + int rc = json_token_cmp_in_tree(&token_a, &token_b); if (rc != 0) return rc; } @@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token, #define mh_arg_t void * #define mh_hash(a, arg) ((*(a))->hash) #define mh_hash_key(a, arg) ((a)->hash) -#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b))) -#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b))) +#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b))) +#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b))) #include "salad/mhash.h" static const uint32_t hash_seed = 13U; diff --git a/src/lib/json/json.h b/src/lib/json/json.h index d66a9c7a4..08a0ee96c 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** Check if @a lexer has finished parsing. */ +static inline bool +json_lexer_is_eof(const struct json_lexer *lexer) +{ + return lexer->offset == lexer->src_len; +} + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -279,6 +286,27 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Compare two JSON tokens, not taking into account their tree + * attributes. + */ +static inline int +json_token_cmp(const struct json_token *l, const struct json_token *r) +{ + if (l->type != r->type) + return l->type - r->type; + switch(l->type) { + case JSON_TOKEN_NUM: + return l->num - r->num; + case JSON_TOKEN_STR: + if (l->len != r->len) + return l->len - r->len; + return memcmp(l->str, r->str, l->len); + default: + return 0; + } +} + /** * Test if a given JSON token is multikey. */ -- 2.20.1 (Apple Git-117)
next prev parent reply other threads:[~2019-08-31 21:32 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-08-31 21:35 [tarantool-patches] [PATCH v2 0/8] JSON updates Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 1/8] tuple: expose JSON go_to_key and go_to_index functions Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 2/8] tuple: rework updates to improve code extendibility Vladislav Shpilevoy [not found] ` <20190903192059.GE15611@atlas> [not found] ` <6ee759cf-a975-e6a9-6f52-f855958ffe06@tarantool.org> [not found] ` <20191005132055.GD3913@atlas> [not found] ` <20191005135037.GJ3913@atlas> 2019-10-19 15:11 ` [Tarantool-patches] [tarantool-patches] " Vladislav Shpilevoy 2019-08-31 21:35 ` Vladislav Shpilevoy [this message] [not found] ` <20190903192433.GF15611@atlas> [not found] ` <f5612e04-dc56-f4bd-1298-c5841ac909f5@tarantool.org> [not found] ` <20191005132231.GE3913@atlas> [not found] ` <20191005135014.GI3913@atlas> 2019-10-19 15:08 ` [Tarantool-patches] [tarantool-patches] Re: [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 4/8] tuple: account the whole array in field.data and size Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 5/8] tuple: enable JSON bar updates Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 6/8] tuple: make update operation tokens consumable Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 7/8] tuple: JSON updates support intersection by arrays Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 8/8] tuple: JSON updates support intersection by maps Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=c6712e02d94d15fb8aa415ff1953380de7335d9a.1567287197.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox