From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions Date: Tue, 12 Nov 2019 00:10:47 +0100 [thread overview] Message-ID: <ab3e3676f3b12b692d4bc27ec95c6270696bbcd1.1573513733.git.v.shpilevoy@tarantool.org> (raw) In-Reply-To: <cover.1573513733.git.v.shpilevoy@tarantool.org> They are needed in incoming JSON updates, which are going to solve a task of comparison of two JSON paths, their simultaneous parsing, and digging into a tuple. json_token_cmp() existed before this patch, but it was trying to compare parent pointers too, which is not needed in the JSON updates, since they won't use JSON trees. Needed for #1261 --- src/lib/json/json.c | 37 +++++++++++++------------------------ src/lib/json/json.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1bfef172a..416c7dfda 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -55,7 +55,7 @@ static inline int json_read_symbol(struct json_lexer *lexer, UChar32 *out) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { *out = U_SENTINEL; return lexer->symbol_count + 1; } @@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token) int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { token->type = JSON_TOKEN_END; return 0; } @@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) switch(c) { case (UChar32)'[': /* Error for '[\0'. */ - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count; c = json_current_char(lexer); if (c == '"' || c == '\'') { @@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) * Expression, started from [ must be finished * with ] regardless of its type. */ - if (lexer->offset == lexer->src_len || + if (json_lexer_is_eof(lexer) || json_current_char(lexer) != ']') return lexer->symbol_count + 1; /* Skip ] - one byte char. */ json_skip_char(lexer); return 0; case (UChar32)'.': - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count + 1; return json_parse_identifier(lexer, token); default: @@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } /** - * Compare JSON token keys. + * Compare JSON tokens as nodes of a JSON tree. That is, including + * parent references. */ static int -json_token_cmp(const struct json_token *a, const struct json_token *b) +json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) return a->parent - b->parent; - if (a->type != b->type) - return a->type - b->type; - int ret = 0; - if (a->type == JSON_TOKEN_STR) { - if (a->len != b->len) - return a->len - b->len; - ret = memcmp(a->str, b->str, a->len); - } else if (a->type == JSON_TOKEN_NUM) { - ret = a->num - b->num; - } else { - assert(a->type == JSON_TOKEN_ANY); - } - return ret; + return json_token_cmp(a, b); } int @@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, json_lexer_create(&lexer_a, a, a_len, index_base); json_lexer_create(&lexer_b, b, b_len, index_base); struct json_token token_a, token_b; - /* For the sake of json_token_cmp(). */ + /* For the sake of json_token_cmp_in_tree(). */ token_a.parent = NULL; token_b.parent = NULL; int rc_a, rc_b; @@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 && token_a.type != JSON_TOKEN_END && token_b.type != JSON_TOKEN_END) { - int rc = json_token_cmp(&token_a, &token_b); + int rc = json_token_cmp_in_tree(&token_a, &token_b); if (rc != 0) return rc; } @@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token, #define mh_arg_t void * #define mh_hash(a, arg) ((*(a))->hash) #define mh_hash_key(a, arg) ((a)->hash) -#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b))) -#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b))) +#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b))) +#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b))) #include "salad/mhash.h" static const uint32_t hash_seed = 13U; diff --git a/src/lib/json/json.h b/src/lib/json/json.h index d66a9c7a4..3218769a1 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** Check if @a lexer has finished parsing. */ +static inline bool +json_lexer_is_eof(const struct json_lexer *lexer) +{ + return lexer->offset == lexer->src_len; +} + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -279,6 +286,30 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Compare two JSON tokens, not taking into account their tree + * attributes. Only the token values are compared. That might be + * used to compare two JSON paths. String comparison of the paths + * may not work because the same token can be present in different + * forms: ['a'] == .a, for example. + */ +static inline int +json_token_cmp(const struct json_token *l, const struct json_token *r) +{ + if (l->type != r->type) + return l->type - r->type; + switch(l->type) { + case JSON_TOKEN_NUM: + return l->num - r->num; + case JSON_TOKEN_STR: + if (l->len != r->len) + return l->len - r->len; + return memcmp(l->str, r->str, l->len); + default: + return 0; + } +} + /** * Test if a given JSON token is multikey. */ -- 2.21.0 (Apple Git-122.2)
next prev parent reply other threads:[~2019-11-11 23:04 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-11-11 23:10 [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Vladislav Shpilevoy 2019-11-11 23:10 ` Vladislav Shpilevoy [this message] 2019-11-11 23:10 ` [Tarantool-patches] [PATCH 2/2] tuple: account the whole array in field.data and size Vladislav Shpilevoy 2019-11-12 10:01 ` [Tarantool-patches] [PATCH 0/2] JSON preparation part 6 Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=ab3e3676f3b12b692d4bc27ec95c6270696bbcd1.1573513733.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox