From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v5 2/9] lib: make index_base support for json_lexer References: <02671a3d0a2236ecd6e12c0bc51b7f5e39272a2f.1543229303.git.kshcherbatov@tarantool.org> <20181129173816.kprfjhki5o7ytfbl@esperanza> <3c7bb503-561c-19b0-1197-f714b6f384d4@tarantool.org> <20181204175412.dayx2wplbxi5rrfz@esperanza> <38c62fa1-190d-181f-621b-8185847055f7@tarantool.org> <20181205090740.lyt6ikf7wmivavqb@esperanza> <20181205095214.763qjnpkphv64kqm@esperanza> From: Kirill Shcherbatov Message-ID: Date: Thu, 6 Dec 2018 10:56:59 +0300 MIME-Version: 1.0 In-Reply-To: <20181205095214.763qjnpkphv64kqm@esperanza> Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, Vladimir Davydov , Kostya Osipov List-ID: Introduced a new index_base field for json_lexer class - this value is a base field offset for emitted JSON_TOKEN_NUM tokens. Thus, we get rid of the need to perform manual casts using the TUPLE_INDEX_BASE constant in the majority of cases. This will also ensure that the extracted tuples are correctly inserted into the numerical level of JSON tree. Needed for #1012 --- src/box/tuple_format.c | 16 ++++------------ src/lib/json/json.c | 4 +++- src/lib/json/json.h | 11 ++++++++++- test/engine/tuple.result | 4 ++-- test/unit/json_path.c | 24 +++++++++++++++--------- test/unit/json_path.result | 21 +++++++++++---------- 6 files changed, 45 insertions(+), 35 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 661cfdc94..149248144 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -491,7 +491,7 @@ box_tuple_format_unref(box_tuple_format_t *format) /** * Propagate @a field to MessagePack(field)[index]. * @param[in][out] field Field to propagate. - * @param index 1-based index to propagate to. + * @param index 0-based index to propagate to. * * @retval 0 Success, the index was found. * @retval -1 Not found. @@ -501,10 +501,6 @@ tuple_field_go_to_index(const char **field, uint64_t index) { enum mp_type type = mp_typeof(**field); if (type == MP_ARRAY) { - if (index == 0) - return -1; - /* Make index 0-based. */ - index -= TUPLE_INDEX_BASE; uint32_t count = mp_decode_array(field); if (index >= count) return -1; @@ -512,6 +508,7 @@ tuple_field_go_to_index(const char **field, uint64_t index) mp_next(field); return 0; } else if (type == MP_MAP) { + index += TUPLE_INDEX_BASE; uint64_t count = mp_decode_map(field); for (; count > 0; --count) { type = mp_typeof(**field); @@ -582,7 +579,7 @@ tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len) int rc; struct json_lexer lexer; struct json_token token; - json_lexer_create(&lexer, path, path_len); + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { switch (token.type) { case JSON_TOKEN_NUM: @@ -624,18 +621,13 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, } struct json_lexer lexer; struct json_token token; - json_lexer_create(&lexer, path, path_len); + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); int rc = json_lexer_next_token(&lexer, &token); if (rc != 0) goto error; switch(token.type) { case JSON_TOKEN_NUM: { int index = token.num; - if (index == 0) { - *field = NULL; - return 0; - } - index -= TUPLE_INDEX_BASE; *field = tuple_field_raw(format, tuple, field_map, index); if (*field == NULL) return 0; diff --git a/src/lib/json/json.c b/src/lib/json/json.c index eb80e4bbc..81b291127 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -144,10 +144,12 @@ json_parse_integer(struct json_lexer *lexer, struct json_token *token) value = value * 10 + c - (int)'0'; ++len; } while (++pos < end && isdigit((c = *pos))); + if (value < lexer->index_base) + return lexer->symbol_count + 1; lexer->offset += len; lexer->symbol_count += len; token->type = JSON_TOKEN_NUM; - token->num = value; + token->num = value - lexer->index_base; return 0; } diff --git a/src/lib/json/json.h b/src/lib/json/json.h index ead446878..5c8d973e5 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -49,6 +49,11 @@ struct json_lexer { int offset; /** Current lexer's offset in symbols. */ int symbol_count; + /** + * Base field offset for emitted JSON_TOKEN_NUM tokens, + * e.g. 0 for C and 1 for Lua. + */ + unsigned index_base; }; enum json_token_type { @@ -82,14 +87,18 @@ struct json_token { * @param[out] lexer Lexer to create. * @param src Source string. * @param src_len Length of @a src. + * @param index_base Base field offset for emitted JSON_TOKEN_NUM + * tokens e.g. 0 for C and 1 for Lua. */ static inline void -json_lexer_create(struct json_lexer *lexer, const char *src, int src_len) +json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, + unsigned index_base) { lexer->src = src; lexer->src_len = src_len; lexer->offset = 0; lexer->symbol_count = 0; + lexer->index_base = index_base; } /** diff --git a/test/engine/tuple.result b/test/engine/tuple.result index 35c700e16..7ca3985c7 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -823,7 +823,7 @@ t[0] ... t["[0]"] --- -- null +- error: Illegal parameters, error in path on position 2 ... t["[1000]"] --- @@ -847,7 +847,7 @@ t["[2][6].key100"] ... t["[2][0]"] -- 0-based index in array. --- -- null +- error: Illegal parameters, error in path on position 5 ... t["[4][3]"] -- Can not index string. --- diff --git a/test/unit/json_path.c b/test/unit/json_path.c index a5f90ad98..1d7707ee6 100644 --- a/test/unit/json_path.c +++ b/test/unit/json_path.c @@ -3,10 +3,12 @@ #include "trivia/util.h" #include +#define TUPLE_INDEX_BASE 1 + #define reset_to_new_path(value) \ path = value; \ len = strlen(value); \ - json_lexer_create(&lexer, path, len); + json_lexer_create(&lexer, path, len, TUPLE_INDEX_BASE); #define is_next_index(value_len, value) \ path = lexer.src + lexer.offset; \ @@ -32,18 +34,18 @@ test_basic() struct json_lexer lexer; struct json_token token; - reset_to_new_path("[0].field1.field2['field3'][5]"); + reset_to_new_path("[1].field1.field2['field3'][5]"); is_next_index(3, 0); is_next_key("field1"); is_next_key("field2"); is_next_key("field3"); - is_next_index(3, 5); + is_next_index(3, 4); reset_to_new_path("[3].field[2].field") - is_next_index(3, 3); - is_next_key("field"); is_next_index(3, 2); is_next_key("field"); + is_next_index(3, 1); + is_next_key("field"); reset_to_new_path("[\"f1\"][\"f2'3'\"]"); is_next_key("f1"); @@ -57,7 +59,7 @@ test_basic() /* Long number. */ reset_to_new_path("[1234]"); - is_next_index(6, 1234); + is_next_index(6, 1233); /* Empty path. */ reset_to_new_path(""); @@ -70,8 +72,8 @@ test_basic() /* Unicode. */ reset_to_new_path("[2][6]['привет中国world']['中国a']"); - is_next_index(3, 2); - is_next_index(3, 6); + is_next_index(3, 1); + is_next_index(3, 5); is_next_key("привет中国world"); is_next_key("中国a"); @@ -94,7 +96,7 @@ void test_errors() { header(); - plan(20); + plan(21); const char *path; int len; struct json_lexer lexer; @@ -155,6 +157,10 @@ test_errors() json_lexer_next_token(&lexer, &token); is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier"); + reset_to_new_path("[0]"); + is(json_lexer_next_token(&lexer, &token), 2, + "invalid token for index_base %d", TUPLE_INDEX_BASE); + check_plan(); footer(); } diff --git a/test/unit/json_path.result b/test/unit/json_path.result index a2a2f829f..ad6f07e5a 100644 --- a/test/unit/json_path.result +++ b/test/unit/json_path.result @@ -2,9 +2,9 @@ 1..2 *** test_basic *** 1..71 - ok 1 - parse <[0]> - ok 2 - <[0]> is num - ok 3 - <[0]> is 0 + ok 1 - parse <[1]> + ok 2 - <[1]> is num + ok 3 - <[1]> is 0 ok 4 - parse ok 5 - is str ok 6 - len is 6 @@ -19,17 +19,17 @@ ok 15 - str is field3 ok 16 - parse <[5]> ok 17 - <[5]> is num - ok 18 - <[5]> is 5 + ok 18 - <[5]> is 4 ok 19 - parse <[3]> ok 20 - <[3]> is num - ok 21 - <[3]> is 3 + ok 21 - <[3]> is 2 ok 22 - parse ok 23 - is str ok 24 - len is 5 ok 25 - str is field ok 26 - parse <[2]> ok 27 - <[2]> is num - ok 28 - <[2]> is 2 + ok 28 - <[2]> is 1 ok 29 - parse ok 30 - is str ok 31 - len is 5 @@ -52,7 +52,7 @@ ok 48 - str is field1 ok 49 - parse <[1234]> ok 50 - <[1234]> is num - ok 51 - <[1234]> is 1234 + ok 51 - <[1234]> is 1233 ok 52 - parse empty path ok 53 - is str ok 54 - parse @@ -61,10 +61,10 @@ ok 57 - str is field1 ok 58 - parse <[2]> ok 59 - <[2]> is num - ok 60 - <[2]> is 2 + ok 60 - <[2]> is 1 ok 61 - parse <[6]> ok 62 - <[6]> is num - ok 63 - <[6]> is 6 + ok 63 - <[6]> is 5 ok 64 - parse <привет中国world> ok 65 - <привет中国world> is str ok 66 - len is 23 @@ -76,7 +76,7 @@ ok 1 - subtests *** test_basic: done *** *** test_errors *** - 1..20 + 1..21 ok 1 - error on position 2 for <[[> ok 2 - error on position 2 for <[field]> ok 3 - error on position 1 for <'field1'.field2> @@ -97,6 +97,7 @@ ok 1 - subtests ok 18 - error in leading <.> ok 19 - space inside identifier ok 20 - tab inside identifier + ok 21 - invalid token for index_base 1 ok 2 - subtests *** test_errors: done *** *** main: done *** -- 2.19.2