From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 5579125BBE for ; Sat, 31 Aug 2019 17:32:42 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id wE71RprAxa7R for ; Sat, 31 Aug 2019 17:32:42 -0400 (EDT) Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 0AC5525BA5 for ; Sat, 31 Aug 2019 17:32:42 -0400 (EDT) From: Vladislav Shpilevoy Subject: [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions Date: Sat, 31 Aug 2019 23:35:53 +0200 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: kostja@tarantool.org They are needed in incoming JSON updates, which are going to solve a task of comparison of two JSON paths, their simultaneous parsing, and digging into a tuple. json_token_cmp() existed before this patch, but it was trying to compare parent pointers too, which is not needed in the JSON updates, since they won't use JSON trees. Needed for #1261 --- src/lib/json/json.c | 37 +++++++++++++------------------------ src/lib/json/json.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1bfef172a..416c7dfda 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -55,7 +55,7 @@ static inline int json_read_symbol(struct json_lexer *lexer, UChar32 *out) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { *out = U_SENTINEL; return lexer->symbol_count + 1; } @@ -211,7 +211,7 @@ json_parse_identifier(struct json_lexer *lexer, struct json_token *token) int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) { - if (lexer->offset == lexer->src_len) { + if (json_lexer_is_eof(lexer)) { token->type = JSON_TOKEN_END; return 0; } @@ -223,7 +223,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) switch(c) { case (UChar32)'[': /* Error for '[\0'. */ - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count; c = json_current_char(lexer); if (c == '"' || c == '\'') { @@ -240,14 +240,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) * Expression, started from [ must be finished * with ] regardless of its type. */ - if (lexer->offset == lexer->src_len || + if (json_lexer_is_eof(lexer) || json_current_char(lexer) != ']') return lexer->symbol_count + 1; /* Skip ] - one byte char. */ json_skip_char(lexer); return 0; case (UChar32)'.': - if (lexer->offset == lexer->src_len) + if (json_lexer_is_eof(lexer)) return lexer->symbol_count + 1; return json_parse_identifier(lexer, token); default: @@ -259,26 +259,15 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } /** - * Compare JSON token keys. + * Compare JSON tokens as nodes of a JSON tree. That is, including + * parent references. */ static int -json_token_cmp(const struct json_token *a, const struct json_token *b) +json_token_cmp_in_tree(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) return a->parent - b->parent; - if (a->type != b->type) - return a->type - b->type; - int ret = 0; - if (a->type == JSON_TOKEN_STR) { - if (a->len != b->len) - return a->len - b->len; - ret = memcmp(a->str, b->str, a->len); - } else if (a->type == JSON_TOKEN_NUM) { - ret = a->num - b->num; - } else { - assert(a->type == JSON_TOKEN_ANY); - } - return ret; + return json_token_cmp(a, b); } int @@ -289,7 +278,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, json_lexer_create(&lexer_a, a, a_len, index_base); json_lexer_create(&lexer_b, b, b_len, index_base); struct json_token token_a, token_b; - /* For the sake of json_token_cmp(). */ + /* For the sake of json_token_cmp_in_tree(). */ token_a.parent = NULL; token_b.parent = NULL; int rc_a, rc_b; @@ -297,7 +286,7 @@ json_path_cmp(const char *a, int a_len, const char *b, int b_len, (rc_b = json_lexer_next_token(&lexer_b, &token_b)) == 0 && token_a.type != JSON_TOKEN_END && token_b.type != JSON_TOKEN_END) { - int rc = json_token_cmp(&token_a, &token_b); + int rc = json_token_cmp_in_tree(&token_a, &token_b); if (rc != 0) return rc; } @@ -423,8 +412,8 @@ json_tree_snprint_path(char *buf, int size, const struct json_token *token, #define mh_arg_t void * #define mh_hash(a, arg) ((*(a))->hash) #define mh_hash_key(a, arg) ((a)->hash) -#define mh_cmp(a, b, arg) (json_token_cmp(*(a), *(b))) -#define mh_cmp_key(a, b, arg) (json_token_cmp((a), *(b))) +#define mh_cmp(a, b, arg) (json_token_cmp_in_tree(*(a), *(b))) +#define mh_cmp_key(a, b, arg) (json_token_cmp_in_tree((a), *(b))) #include "salad/mhash.h" static const uint32_t hash_seed = 13U; diff --git a/src/lib/json/json.h b/src/lib/json/json.h index d66a9c7a4..08a0ee96c 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -241,6 +241,13 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** Check if @a lexer has finished parsing. */ +static inline bool +json_lexer_is_eof(const struct json_lexer *lexer) +{ + return lexer->offset == lexer->src_len; +} + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -279,6 +286,27 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Compare two JSON tokens, not taking into account their tree + * attributes. + */ +static inline int +json_token_cmp(const struct json_token *l, const struct json_token *r) +{ + if (l->type != r->type) + return l->type - r->type; + switch(l->type) { + case JSON_TOKEN_NUM: + return l->num - r->num; + case JSON_TOKEN_STR: + if (l->len != r->len) + return l->len - r->len; + return memcmp(l->str, r->str, l->len); + default: + return 0; + } +} + /** * Test if a given JSON token is multikey. */ -- 2.20.1 (Apple Git-117)