From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp33.i.mail.ru (smtp33.i.mail.ru [94.100.177.93]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 725B6452566 for ; Tue, 12 Nov 2019 02:04:43 +0300 (MSK) From: Vladislav Shpilevoy Date: Tue, 12 Nov 2019 00:10:47 +0100 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 1/2] json: lexer_eof and token_cmp helper functions List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org They are needed in incoming JSON updates, which are going to solve a task of comparison of two JSON paths, their simultaneous parsing, and digging into a tuple. json_token_cmp() existed before this patch, but it was trying to compare parent pointers too, which is not needed in the JSON updates, since they won't use JSON trees. Needed for #1261 --- src/lib/json/json.c | 37 +++++++++++++------------------------ src/lib/json/json.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1bfef172a..416c7dfda 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -55,7 +55,7 @@ static inline int json_read_symbol(struct json_lexer *lexer, UChar32 *out) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { *out = U_SENTINEL; return lexer->symbol_count + 1; } @@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token) int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { token->type = JSON_TOKEN_END; return 0; } @@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) switch(c) { case (UChar32)'[': /* Error for '[\0'. */ - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count; c = json_current_char(lexer); if (c == '"' || c == '\'') { @@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) * Expression, started from [ must be finished * with ] regardless of its type. */ - if (lexer->offset == lexer->src_len || + if (json_lexer_is_eof(lexer) || json_current_char(lexer) != ']') return lexer->symbol_count + 1; /* Skip ] - one byte char. */ json_skip_char(lexer); return 0; case (UChar32)'.': - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count + 1; return json_parse_identifier(lexer, token); default: @@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } /** - * Compare JSON token keys. + * Compare JSON tokens as nodes of a JSON tree. That is, including + * parent references. */ static int -json_token_cmp(const struct json_token *a, const struct json_token *b) +json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) return a->parent - b->parent; - if (a->type != b->type) - return a->type - b->type; - int ret = 0; - if (a->type == JSON_TOKEN_STR) { - if (a->len != b->len) - return a->len - b->len; - ret = memcmp(a->str, b->str, a->len); - } else if (a->type == JSON_TOKEN_NUM) { - ret = a->num - b->num; - } else { - assert(a->type == JSON_TOKEN_ANY); - } - return ret; + return json_token_cmp(a, b); } int @@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, json_lexer_create(&lexer_a, a, a_len, index_base); json_lexer_create(&lexer_b, b, b_len, index_base); struct json_token token_a, token_b; - /* For the sake of json_token_cmp(). */ + /* For the sake of json_token_cmp_in_tree(). */ token_a.parent = NULL; token_b.parent = NULL; int rc_a, rc_b; @@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 && token_a.type != JSON_TOKEN_END && token_b.type != JSON_TOKEN_END) { - int rc = json_token_cmp(&token_a, &token_b); + int rc = json_token_cmp_in_tree(&token_a, &token_b); if (rc != 0) return rc; } @@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token, #define mh_arg_t void * #define mh_hash(a, arg) ((*(a))->hash) #define mh_hash_key(a, arg) ((a)->hash) -#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b))) -#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b))) +#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b))) +#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b))) #include "salad/mhash.h" static const uint32_t hash_seed = 13U; diff --git a/src/lib/json/json.h b/src/lib/json/json.h index d66a9c7a4..3218769a1 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** Check if @a lexer has finished parsing. */ +static inline bool +json_lexer_is_eof(const struct json_lexer *lexer) +{ + return lexer->offset == lexer->src_len; +} + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -279,6 +286,30 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Compare two JSON tokens, not taking into account their tree + * attributes. Only the token values are compared. That might be + * used to compare two JSON paths. String comparison of the paths + * may not work because the same token can be present in different + * forms: ['a'] == .a, for example. + */ +static inline int +json_token_cmp(const struct json_token *l, const struct json_token *r) +{ + if (l->type != r->type) + return l->type - r->type; + switch(l->type) { + case JSON_TOKEN_NUM: + return l->num - r->num; + case JSON_TOKEN_STR: + if (l->len != r->len) + return l->len - r->len; + return memcmp(l->str, r->str, l->len); + default: + return 0; + } +} + /** * Test if a given JSON token is multikey. */ -- 2.21.0 (Apple Git-122.2)