From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token References: <51b08c09ebe40d52bc3b6b6b63755930040500f6.1550571905.git.kshcherbatov@tarantool.org> <20190226165832.jvwc55ctxj5zhpgt@esperanza> <078abd2e-76d9-373e-07db-e70286faef3f@tarantool.org> <20190228171047.hmemgyzmjjmljcos@esperanza> From: Kirill Shcherbatov Message-ID: Date: Fri, 1 Mar 2019 16:50:02 +0300 MIME-Version: 1.0 In-Reply-To: <20190228171047.hmemgyzmjjmljcos@esperanza> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Vladimir Davydov List-ID: >> + char *path_multikey = "base[*][\"data\"]"; > > Again, let's please reuse 'path' and 'path_len' defined in the beginning > of this function rather than adding a new local variable. I really prefer to introduce a new variable here instead of override existent one. =============================================== Introduced a new JSON_TOKEN_ANY json token that makes possible to perform anonymous lookup in marked tree nodes. This feature is required to implement multikey indexes. Since the token entered into the parser becomes available to user, additional server-side check is introduced so that an error occurs when trying to create a multikey index. Needed for #1257 --- src/box/tuple_format.c | 5 +++ src/lib/json/json.c | 16 ++++++++-- src/lib/json/json.h | 36 +++++++++++++++++++--- test/engine/json.result | 13 ++++++++ test/engine/json.test.lua | 7 +++++ test/unit/json.c | 65 +++++++++++++++++++++++++++++++++++---- test/unit/json.result | 16 +++++++--- 7 files changed, 141 insertions(+), 17 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 1c9b3c20d..4439ce3e0 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -233,6 +233,11 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && field->token.type != JSON_TOKEN_END) { + if (field->token.type == JSON_TOKEN_ANY) { + diag_set(ClientError, ER_UNSUPPORTED, + "Tarantool", "multikey indexes"); + goto fail; + } enum field_type expected_type = field->token.type == JSON_TOKEN_STR ? FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1b1a3ec2c..854158f63 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -226,10 +226,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) if (lexer->offset == lexer->src_len) return lexer->symbol_count; c = json_current_char(lexer); - if (c == '"' || c == '\'') + if (c == '"' || c == '\'') { rc = json_parse_string(lexer, token, c); - else + } else if (c == '*') { + json_skip_char(lexer); + token->type = JSON_TOKEN_ANY; + } else { rc = json_parse_integer(lexer, token); + } if (rc != 0) return rc; /* @@ -270,7 +274,7 @@ json_token_cmp(const struct json_token *a, const struct json_token *b) } else if (a->type == JSON_TOKEN_NUM) { ret = a->num - b->num; } else { - unreachable(); + assert(a->type == JSON_TOKEN_ANY); } return ret; } @@ -332,6 +336,9 @@ json_token_snprint(char *buf, int size, const struct json_token *token, case JSON_TOKEN_STR: len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str); break; + case JSON_TOKEN_ANY: + len = snprintf(buf, size, "[*]"); + break; default: unreachable(); } @@ -420,6 +427,9 @@ json_token_hash(struct json_token *token) } else if (token->type == JSON_TOKEN_NUM) { data = &token->num; data_size = sizeof(token->num); + } else if (token->type == JSON_TOKEN_ANY) { + data = "*"; + data_size = 1; } else { unreachable(); } diff --git a/src/lib/json/json.h b/src/lib/json/json.h index 6927d2d90..c1de3e579 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -66,6 +66,7 @@ struct json_lexer { enum json_token_type { JSON_TOKEN_NUM, JSON_TOKEN_STR, + JSON_TOKEN_ANY, /** Lexer reached end of path. */ JSON_TOKEN_END, }; @@ -98,6 +99,10 @@ struct json_token { * array match [token.num] index for JSON_TOKEN_NUM type * and are allocated sequentially for JSON_TOKEN_STR child * tokens. + * + * JSON_TOKEN_ANY is exclusive. If it's present, it must + * be the only one and have index 0 in the children array. + * It will be returned by lookup by any key. */ struct json_token **children; /** Allocation size of children array. */ @@ -264,6 +269,16 @@ json_token_is_leaf(struct json_token *token) return token->max_child_idx < 0; } +/** + * Test if a given JSON token is multikey. + */ +static inline bool +json_token_is_multikey(struct json_token *token) +{ + return token->max_child_idx == 0 && + token->children[0]->type == JSON_TOKEN_ANY; +} + /** * An snprint-style function to print the path to a token in * a JSON tree. @@ -307,11 +322,24 @@ json_tree_lookup(struct json_tree *tree, struct json_token *parent, const struct json_token *token) { struct json_token *ret = NULL; - if (likely(token->type == JSON_TOKEN_NUM)) { - ret = (int)token->num < parent->children_capacity ? - parent->children[token->num] : NULL; - } else { + if (unlikely(json_token_is_multikey(parent))) { + assert(parent->max_child_idx == 0); + return parent->children[0]; + } + switch (token->type) { + case JSON_TOKEN_NUM: + if (likely(token->num <= parent->max_child_idx)) + ret = parent->children[token->num]; + break; + case JSON_TOKEN_ANY: + if (likely(parent->max_child_idx >= 0)) + ret = parent->children[parent->max_child_idx]; + break; + case JSON_TOKEN_STR: ret = json_tree_lookup_slowpath(tree, parent, token); + break; + default: + unreachable(); } return ret; } diff --git a/test/engine/json.result b/test/engine/json.result index 1bac85edd..a790cdfbc 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -683,3 +683,16 @@ town:select() s:drop() --- ... +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: Tarantool does not support multikey indexes +... +s:drop() +--- +... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 9afa3daa2..f9a7180b1 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -192,3 +192,10 @@ town:select() name:drop() town:select() s:drop() + +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:drop() diff --git a/test/unit/json.c b/test/unit/json.c index 6448a3210..fd320c9eb 100644 --- a/test/unit/json.c +++ b/test/unit/json.c @@ -211,7 +211,7 @@ void test_tree() { header(); - plan(58); + plan(65); struct json_tree tree; int rc = json_tree_create(&tree); @@ -411,6 +411,52 @@ test_tree() "last node became interm - it can't be leaf anymore"); is(json_token_is_leaf(&records[3].node), true, "last node is leaf"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, + node, node_tmp) + json_tree_del(&tree, &node->node); + + /* Test multikey tokens. */ + records_idx = 0; + node = test_add_path(&tree, path1, strlen(path1), records, &records_idx); + is(node, &records[1], "add path '%s'", path1); + token->type = JSON_TOKEN_ANY; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node->node.num, 9, "lookup any token in non-multikey node"); + + /* Can't attach ANY token to non-leaf node. Cleanup. */ + json_tree_del(&tree, &records[1].node); + records_idx--; + + const char *path_multikey = "[1][*][\"data\"]"; + node = test_add_path(&tree, path_multikey, strlen(path_multikey), + records, &records_idx); + is(node, &records[2], "add path '%s'", path_multikey); + + node = json_tree_lookup_path_entry(&tree, &tree.root, path_multikey, + strlen(path_multikey), INDEX_BASE, + struct test_struct, node); + is(node, &records[2], "lookup path '%s'", path_multikey); + + token = &records[records_idx++].node; + token->type = JSON_TOKEN_NUM; + token->num = 3; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup numeric token in multikey node"); + + token->type = JSON_TOKEN_ANY; + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup any token in multikey node"); + + token->type = JSON_TOKEN_STR; + token->str = "str"; + token->len = strlen("str"); + node = json_tree_lookup_entry(&tree, &records[0].node, token, + struct test_struct, node); + is(node, &records[1], "lookup string token in multikey node"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, node, node_tmp) json_tree_del(&tree, &node->node); @@ -433,7 +479,7 @@ test_path_cmp() {"Data[1][\"Info\"].fname[1]", -1}, }; header(); - plan(lengthof(rc) + 2); + plan(lengthof(rc) + 3); for (size_t i = 0; i < lengthof(rc); ++i) { const char *path = rc[i].path; int errpos = rc[i].errpos; @@ -444,8 +490,15 @@ test_path_cmp() is(rc, errpos, "path cmp result \"%s\" with \"%s\": " "have %d, expected %d", a, path, rc, errpos); } + char *multikey_a = "Data[*][\"FIO\"].fname[*]"; + char *multikey_b = "[\"Data\"][*].FIO[\"fname\"][*]"; + int ret = json_path_cmp(multikey_a, strlen(multikey_a), multikey_b, + strlen(multikey_b), INDEX_BASE); + is(ret, 0, "path cmp result \"%s\" with \"%s\": have %d, expected %d", + multikey_a, multikey_b, ret, 0); + const char *invalid = "Data[[1][\"FIO\"].fname"; - int ret = json_path_validate(a, strlen(a), INDEX_BASE); + ret = json_path_validate(a, strlen(a), INDEX_BASE); is(ret, 0, "path %s is valid", a); ret = json_path_validate(invalid, strlen(invalid), INDEX_BASE); is(ret, 6, "path %s error pos %d expected %d", invalid, ret, 6); @@ -463,14 +516,14 @@ test_path_snprint() struct json_tree tree; int rc = json_tree_create(&tree); fail_if(rc != 0); - struct test_struct records[5]; - const char *path = "[1][20][\"file\"][8]"; + struct test_struct records[6]; + const char *path = "[1][*][20][\"file\"][8]"; int path_len = strlen(path); int records_idx = 0; struct test_struct *node, *node_tmp; node = test_add_path(&tree, path, path_len, records, &records_idx); - fail_if(&node->node != &records[3].node); + fail_if(&node->node != &records[4].node); char buf[64]; int bufsz = sizeof(buf); diff --git a/test/unit/json.result b/test/unit/json.result index ee54cbe0e..3a15e84bb 100644 --- a/test/unit/json.result +++ b/test/unit/json.result @@ -101,7 +101,7 @@ ok 1 - subtests ok 2 - subtests *** test_errors: done *** *** test_tree *** - 1..58 + 1..65 ok 1 - add path '[1][10]' ok 2 - add path '[1][20].file' ok 3 - add path '[1][20].file[2]' @@ -160,17 +160,25 @@ ok 2 - subtests ok 56 - last node is leaf ok 57 - last node became interm - it can't be leaf anymore ok 58 - last node is leaf + ok 59 - add path '[1][10]' + ok 60 - lookup any token in non-multikey node + ok 61 - add path '[1][*]["data"]' + ok 62 - lookup path '[1][*]["data"]' + ok 63 - lookup numeric token in multikey node + ok 64 - lookup any token in multikey node + ok 65 - lookup string token in multikey node ok 3 - subtests *** test_tree: done *** *** test_path_cmp *** - 1..7 + 1..8 ok 1 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname": have 0, expected 0 ok 2 - path cmp result "Data[1]["FIO"].fname" with "["Data"][1].FIO["fname"]": have 0, expected 0 ok 3 - path cmp result "Data[1]["FIO"].fname" with "Data[1]": have 1, expected 1 ok 4 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname[1]": have -1, expected -1 ok 5 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["Info"].fname[1]": have -1, expected -1 - ok 6 - path Data[1]["FIO"].fname is valid - ok 7 - path Data[[1]["FIO"].fname error pos 6 expected 6 + ok 6 - path cmp result "Data[*]["FIO"].fname[*]" with "["Data"][*].FIO["fname"][*]": have 0, expected 0 + ok 7 - path Data[1]["FIO"].fname is valid + ok 8 - path Data[[1]["FIO"].fname error pos 6 expected 6 ok 4 - subtests *** test_path_cmp: done *** *** test_path_snprint *** -- 2.21.0