From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token References: <51b08c09ebe40d52bc3b6b6b63755930040500f6.1550571905.git.kshcherbatov@tarantool.org> <20190226165832.jvwc55ctxj5zhpgt@esperanza> From: Kirill Shcherbatov Message-ID: <078abd2e-76d9-373e-07db-e70286faef3f@tarantool.org> Date: Wed, 27 Feb 2019 17:07:30 +0300 MIME-Version: 1.0 In-Reply-To: <20190226165832.jvwc55ctxj5zhpgt@esperanza> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Vladimir Davydov List-ID: > If we look up ANY, shouldn't we be given any token rather than NULL? Yep. Let's return if (likely(parent->max_child_idx >= 0)) ret = parent->children[parent->max_child_idx]; ===================================================== Introduced a new JSON_TOKEN_ANY json token that makes possible to perform anonymous lookup in marked tree nodes. This feature is required to implement multikey indexes. Since the token entered into the parser becomes available to user, additional server-side check is introduced so that an error occurs when trying to create a multikey index. Needed for #1257 https://github.com/tarantool/tarantool/tree/kshch/gh-1257-multikey-index-new-field-map-alloc https://github.com/tarantool/tarantool/issues/1257 --- src/box/key_def.c | 27 ++++++++++++++-- src/lib/json/json.c | 25 ++++++++++----- src/lib/json/json.h | 40 +++++++++++++++++++++--- test/engine/json.result | 13 ++++++++ test/engine/json.test.lua | 7 +++++ test/unit/json.c | 65 ++++++++++++++++++++++++++++++++++++--- test/unit/json.result | 12 ++++++-- 7 files changed, 168 insertions(+), 21 deletions(-) diff --git a/src/box/key_def.c b/src/box/key_def.c index 432b72a97..6e818f20a 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -196,12 +196,30 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) if (coll_id == NULL) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, i + 1, "collation was not found by ID"); - key_def_delete(def); - return NULL; + goto error; } coll = coll_id->coll; } - uint32_t path_len = part->path != NULL ? strlen(part->path) : 0; + uint32_t path_len = 0; + if (part->path != NULL) { + path_len = strlen(part->path); + int rc; + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, part->path, path_len, + TUPLE_INDEX_BASE); + while ((rc = json_lexer_next_token(&lexer, + &token)) == 0) { + if (token.type == JSON_TOKEN_ANY) { + diag_set(ClientError, ER_UNSUPPORTED, + "Index", "multikey path yet"); + goto error; + } else if (token.type == JSON_TOKEN_END) { + break; + } + } + assert(rc == 0 && token.type == JSON_TOKEN_END); + } key_def_set_part(def, i, part->fieldno, part->type, part->nullable_action, coll, part->coll_id, part->sort_order, part->path, path_len, @@ -210,6 +228,9 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) assert(path_pool == (char *)def + sz); key_def_set_func(def); return def; +error: + key_def_delete(def); + return NULL; } int diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1b1a3ec2c..2a28d5d8e 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -226,10 +226,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) if (lexer->offset == lexer->src_len) return lexer->symbol_count; c = json_current_char(lexer); - if (c == '"' || c == '\'') + if (c == '"' || c == '\'') { rc = json_parse_string(lexer, token, c); - else + } else if (c == '*') { + json_skip_char(lexer); + token->type = JSON_TOKEN_ANY; + } else { rc = json_parse_integer(lexer, token); + } if (rc != 0) return rc; /* @@ -252,10 +256,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } } -/** - * Compare JSON token keys. - */ -static int +int json_token_cmp(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) @@ -270,7 +271,7 @@ json_token_cmp(const struct json_token *a, const struct json_token *b) } else if (a->type == JSON_TOKEN_NUM) { ret = a->num - b->num; } else { - unreachable(); + assert(a->type == JSON_TOKEN_ANY); } return ret; } @@ -332,6 +333,9 @@ json_token_snprint(char *buf, int size, const struct json_token *token, case JSON_TOKEN_STR: len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str); break; + case JSON_TOKEN_ANY: + len = snprintf(buf, size, "[*]"); + break; default: unreachable(); } @@ -420,6 +424,9 @@ json_token_hash(struct json_token *token) } else if (token->type == JSON_TOKEN_NUM) { data = &token->num; data_size = sizeof(token->num); + } else if (token->type == JSON_TOKEN_ANY) { + data = "*"; + data_size = 1; } else { unreachable(); } @@ -479,6 +486,8 @@ json_tree_add(struct json_tree *tree, struct json_token *parent, struct json_token *token) { assert(json_tree_lookup(tree, parent, token) == NULL); + assert(token->type != JSON_TOKEN_ANY || + (!json_token_is_multikey(parent) && json_token_is_leaf(parent))); token->parent = parent; token->children = NULL; token->children_capacity = 0; @@ -532,6 +541,8 @@ json_tree_del(struct json_tree *tree, struct json_token *token) assert(token->sibling_idx >= 0); assert(parent->children[token->sibling_idx] == token); assert(json_tree_lookup(tree, parent, token) == token); + assert(token->type != JSON_TOKEN_ANY || + (json_token_is_multikey(parent) && parent->max_child_idx == 0)); /* * Clear the entry corresponding to this token in parent's * children array and update max_child_idx if necessary. diff --git a/src/lib/json/json.h b/src/lib/json/json.h index 66cddd026..c1c61c447 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -66,6 +66,7 @@ struct json_lexer { enum json_token_type { JSON_TOKEN_NUM, JSON_TOKEN_STR, + JSON_TOKEN_ANY, /** Lexer reached end of path. */ JSON_TOKEN_END, }; @@ -98,6 +99,8 @@ struct json_token { * array match [token.num] index for JSON_TOKEN_NUM type * and are allocated sequentially for JSON_TOKEN_STR child * tokens. + * If in case of "multikey entry", children array has the + * only child JSON_TOKEN_ANY with index 0. */ struct json_token **children; /** Allocation size of children array. */ @@ -236,6 +239,12 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** + * Compare JSON token keys. + */ +int +json_token_cmp(const struct json_token *a, const struct json_token *b); + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -264,6 +273,16 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Test if a given JSON token is multikey. + */ +static inline bool +json_token_is_multikey(struct json_token *token) +{ + return token->max_child_idx == 0 && + token->children[0]->type == JSON_TOKEN_ANY; +} + /** * An snprint-style function to print the path to a token in * a JSON tree. @@ -307,11 +326,24 @@ json_tree_lookup(struct json_tree *tree, struct json_token *parent, const struct json_token *token) { struct json_token *ret = NULL; - if (likely(token->type == JSON_TOKEN_NUM)) { - ret = (int)token->num < parent->children_capacity ? - parent->children[token->num] : NULL; - } else { + if (unlikely(json_token_is_multikey(parent))) { + assert(parent->max_child_idx == 0); + return parent->children[0]; + } + switch (token->type) { + case JSON_TOKEN_NUM: + if (likely(token->num <= parent->max_child_idx)) + ret = parent->children[token->num]; + break; + case JSON_TOKEN_ANY: + if (likely(parent->max_child_idx >= 0)) + ret = parent->children[parent->max_child_idx]; + break; + case JSON_TOKEN_STR: ret = json_tree_lookup_slowpath(tree, parent, token); + break; + default: + unreachable(); } return ret; } diff --git a/test/engine/json.result b/test/engine/json.result index 1bac85edd..c25fe5ce3 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -683,3 +683,16 @@ town:select() s:drop() --- ... +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: Index does not support multikey path yet +... +s:drop() +--- +... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 9afa3daa2..f9a7180b1 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -192,3 +192,10 @@ town:select() name:drop() town:select() s:drop() + +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:drop() diff --git a/test/unit/json.c b/test/unit/json.c index 6448a3210..2e24d60f1 100644 --- a/test/unit/json.c +++ b/test/unit/json.c @@ -211,7 +211,7 @@ void test_tree() { header(); - plan(58); + plan(65); struct json_tree tree; int rc = json_tree_create(&tree); @@ -411,6 +411,54 @@ test_tree() "last node became interm - it can't be leaf anymore"); is(json_token_is_leaf(&records[3].node), true, "last node is leaf"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, + node, node_tmp) + json_tree_del(&tree, &node->node); + + /* Test multikey tokens. */ + records_idx = 0; + char *path_problem = "base[3]"; + node = test_add_path(&tree, path_problem, strlen(path_problem), + records, &records_idx); + is(node, &records[1], "add path '%s'", path_problem); + token->type = JSON_TOKEN_ANY; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node->node.num, 2, "lookup any token in non-multikey node"); + + /* Can't attay ANY token to non-leaf parent. Cleanup. */ + json_tree_del(&tree, &records[1].node); + records_idx--; + + char *path_multikey = "base[*][\"data\"]"; + node = test_add_path(&tree, path_multikey, strlen(path_multikey), + records, &records_idx); + is(node, &records[2], "add path '%s'", path_multikey); + + node = json_tree_lookup_path_entry(&tree, &tree.root, path_multikey, + strlen(path_multikey), INDEX_BASE, + struct test_struct, node); + is(node, &records[2], "lookup path '%s'", path_multikey); + + token = &records[records_idx++].node; + token->type = JSON_TOKEN_NUM; + token->num = 3; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup numeric token in multikey node"); + + token->type = JSON_TOKEN_ANY; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup any token in multikey node"); + + token->type = JSON_TOKEN_STR; + token->str = "str"; + token->len = strlen("str"); + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup string token in multikey node"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, node, node_tmp) json_tree_del(&tree, &node->node); @@ -433,7 +481,7 @@ test_path_cmp() {"Data[1][\"Info\"].fname[1]", -1}, }; header(); - plan(lengthof(rc) + 2); + plan(lengthof(rc) + 3); for (size_t i = 0; i < lengthof(rc); ++i) { const char *path = rc[i].path; int errpos = rc[i].errpos; @@ -450,6 +498,13 @@ test_path_cmp() ret = json_path_validate(invalid, strlen(invalid), INDEX_BASE); is(ret, 6, "path %s error pos %d expected %d", invalid, ret, 6); + char *multikey_a = "Data[*][\"FIO\"].fname[*]"; + char *multikey_b = "[\"Data\"][*].FIO[\"fname\"][*]"; + ret = json_path_cmp(multikey_a, strlen(multikey_a), multikey_b, + strlen(multikey_b), INDEX_BASE); + is(ret, 0, "path cmp result \"%s\" with \"%s\": have %d, expected %d", + multikey_a, multikey_b, ret, 0); + check_plan(); footer(); } @@ -463,14 +518,14 @@ test_path_snprint() struct json_tree tree; int rc = json_tree_create(&tree); fail_if(rc != 0); - struct test_struct records[5]; - const char *path = "[1][20][\"file\"][8]"; + struct test_struct records[6]; + const char *path = "[1][*][20][\"file\"][8]"; int path_len = strlen(path); int records_idx = 0; struct test_struct *node, *node_tmp; node = test_add_path(&tree, path, path_len, records, &records_idx); - fail_if(&node->node != &records[3].node); + fail_if(&node->node != &records[4].node); char buf[64]; int bufsz = sizeof(buf); diff --git a/test/unit/json.result b/test/unit/json.result index ee54cbe0e..02080f93a 100644 --- a/test/unit/json.result +++ b/test/unit/json.result @@ -101,7 +101,7 @@ ok 1 - subtests ok 2 - subtests *** test_errors: done *** *** test_tree *** - 1..58 + 1..65 ok 1 - add path '[1][10]' ok 2 - add path '[1][20].file' ok 3 - add path '[1][20].file[2]' @@ -160,10 +160,17 @@ ok 2 - subtests ok 56 - last node is leaf ok 57 - last node became interm - it can't be leaf anymore ok 58 - last node is leaf + ok 59 - add path 'base[3]' + ok 60 - lookup any token in non-multikey node + ok 61 - add path 'base[*]["data"]' + ok 62 - lookup path 'base[*]["data"]' + ok 63 - lookup numeric token in multikey node + ok 64 - lookup any token in multikey node + ok 65 - lookup string token in multikey node ok 3 - subtests *** test_tree: done *** *** test_path_cmp *** - 1..7 + 1..8 ok 1 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname": have 0, expected 0 ok 2 - path cmp result "Data[1]["FIO"].fname" with "["Data"][1].FIO["fname"]": have 0, expected 0 ok 3 - path cmp result "Data[1]["FIO"].fname" with "Data[1]": have 1, expected 1 @@ -171,6 +178,7 @@ ok 3 - subtests ok 5 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["Info"].fname[1]": have -1, expected -1 ok 6 - path Data[1]["FIO"].fname is valid ok 7 - path Data[[1]["FIO"].fname error pos 6 expected 6 + ok 8 - path cmp result "Data[*]["FIO"].fname[*]" with "["Data"][*].FIO["fname"][*]": have 0, expected 0 ok 4 - subtests *** test_path_cmp: done *** *** test_path_snprint *** -- 2.20.1