From: Kirill Shcherbatov <kshcherbatov@tarantool.org> To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov <kshcherbatov@tarantool.org> Subject: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token Date: Tue, 19 Feb 2019 13:31:43 +0300 [thread overview] Message-ID: <51b08c09ebe40d52bc3b6b6b63755930040500f6.1550571905.git.kshcherbatov@tarantool.org> (raw) Introduced a new JSON_TOKEN_ANY json token that makes possible to perform anonymous lookup in marked tree nodes. This feature is required to implement multikey indexes. Since the token entered into the parser becomes available to user, additional server-side check is introduced so that an error occurs when trying to create a multikey index. Needed for #1260 With the introduction of multikey indexes, the key_parts_are_compatible function will not change significantly: here https://gist.github.com/kshcherbatov/c0e48138fc16678ad9a82646f00c1881 you can see how the check is made that the multikey indexes are compatible with each other. https://github.com/tarantool/tarantool/tree/kshch/gh-1257-multikey-index-json-any-token https://github.com/tarantool/tarantool/issues/1257 --- src/box/index_def.c | 60 +++++++++++++++++++++++++++++++++------ src/lib/json/json.c | 38 +++++++++++++++++++------ src/lib/json/json.h | 21 ++++++++++++-- test/engine/json.result | 21 ++++++++++++++ test/engine/json.test.lua | 8 ++++++ test/unit/json.c | 52 +++++++++++++++++++++++++++++---- test/unit/json.result | 10 +++++-- 7 files changed, 184 insertions(+), 26 deletions(-) diff --git a/src/box/index_def.c b/src/box/index_def.c index 6c37f9f1d..0f99f1759 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -244,6 +244,56 @@ index_def_cmp(const struct index_def *key1, const struct index_def *key2) key2->key_def->parts, key2->key_def->part_count); } +/** + * Test whether key_parts a and b are compatible: + * + field numbers are differ OR + * + json paths are differ + * Also perform integrity check: parts must not define multikey + * index. + */ +static bool +key_parts_are_compatible(struct key_part *a, struct key_part *b, + struct index_def *index_def, const char *space_name) +{ + if (a->fieldno != b->fieldno) + return true; + struct json_lexer lexer_a, lexer_b; + json_lexer_create(&lexer_a, a->path, a->path_len, TUPLE_INDEX_BASE); + json_lexer_create(&lexer_b, b->path, b->path_len, TUPLE_INDEX_BASE); + struct json_token token_a, token_b; + /* For the sake of json_token_cmp(). */ + token_a.parent = NULL; + token_b.parent = NULL; + int a_rc, b_rc; + int token_idx = 0, differ_token_idx = 0; + int a_multikey_rank = 0, b_multikey_rank = 0; + while ((a_rc = json_lexer_next_token(&lexer_a, &token_a)) == 0 && + (b_rc = json_lexer_next_token(&lexer_b, &token_b)) == 0 && + token_a.type != JSON_TOKEN_END && + token_b.type != JSON_TOKEN_END) { + token_idx++; + if (differ_token_idx == 0) { + if (json_token_cmp(&token_a, &token_b) != 0) + differ_token_idx = token_idx; + } + if (token_a.type == JSON_TOKEN_ANY) + a_multikey_rank = token_idx; + if (token_b.type == JSON_TOKEN_ANY) + b_multikey_rank = token_idx; + } + if (a_multikey_rank > 0 || b_multikey_rank > 0) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name, + "multikey index feature is not supported yet"); + return false; + } + if (differ_token_idx > 0 || token_b.type != token_a.type) + return true; + diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, + "same key part is indexed twice"); + return false; +} + bool index_def_is_valid(struct index_def *index_def, const char *space_name) @@ -282,15 +332,9 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) */ struct key_part *part_a = &index_def->key_def->parts[i]; struct key_part *part_b = &index_def->key_def->parts[j]; - if (part_a->fieldno == part_b->fieldno && - json_path_cmp(part_a->path, part_a->path_len, - part_b->path, part_b->path_len, - TUPLE_INDEX_BASE) == 0) { - diag_set(ClientError, ER_MODIFY_INDEX, - index_def->name, space_name, - "same key part is indexed twice"); + if (!key_parts_are_compatible(part_a, part_b, index_def, + space_name)) return false; - } } } return true; diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 1b1a3ec2c..019917825 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -146,18 +146,25 @@ json_parse_integer(struct json_lexer *lexer, struct json_token *token) int len = 0; int value = 0; char c = *pos; - if (! isdigit(c)) - return lexer->symbol_count + 1; + if (!isdigit(c)) { + if (c != '*') + return lexer->symbol_count + 1; + token->type = JSON_TOKEN_ANY; + ++len; + ++pos; + goto end; + } do { value = value * 10 + c - (int)'0'; ++len; } while (++pos < end && isdigit((c = *pos))); if (value < lexer->index_base) return lexer->symbol_count + 1; - lexer->offset += len; - lexer->symbol_count += len; token->type = JSON_TOKEN_NUM; token->num = value - lexer->index_base; +end: + lexer->offset += len; + lexer->symbol_count += len; return 0; } @@ -252,10 +259,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) } } -/** - * Compare JSON token keys. - */ -static int +int json_token_cmp(const struct json_token *a, const struct json_token *b) { if (a->parent != b->parent) @@ -270,7 +274,7 @@ json_token_cmp(const struct json_token *a, const struct json_token *b) } else if (a->type == JSON_TOKEN_NUM) { ret = a->num - b->num; } else { - unreachable(); + assert(a->type == JSON_TOKEN_ANY); } return ret; } @@ -332,6 +336,9 @@ json_token_snprint(char *buf, int size, const struct json_token *token, case JSON_TOKEN_STR: len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str); break; + case JSON_TOKEN_ANY: + len = snprintf(buf, size, "[*]"); + break; default: unreachable(); } @@ -420,6 +427,9 @@ json_token_hash(struct json_token *token) } else if (token->type == JSON_TOKEN_NUM) { data = &token->num; data_size = sizeof(token->num); + } else if (token->type == JSON_TOKEN_ANY) { + data = "*"; + data_size = 1; } else { unreachable(); } @@ -435,6 +445,7 @@ json_tree_create(struct json_tree *tree) tree->root.type = JSON_TOKEN_END; tree->root.max_child_idx = -1; tree->root.sibling_idx = -1; + tree->root.is_multikey = false; tree->hash = mh_json_new(); return tree->hash == NULL ? -1 : 0; } @@ -479,11 +490,14 @@ json_tree_add(struct json_tree *tree, struct json_token *parent, struct json_token *token) { assert(json_tree_lookup(tree, parent, token) == NULL); + assert(token->type != JSON_TOKEN_ANY || + (!parent->is_multikey && json_token_is_leaf(parent))); token->parent = parent; token->children = NULL; token->children_capacity = 0; token->max_child_idx = -1; token->hash = json_token_hash(token); + token->is_multikey = false; int insert_idx = token->type == JSON_TOKEN_NUM ? (int)token->num : parent->max_child_idx + 1; /* @@ -520,6 +534,8 @@ json_tree_add(struct json_tree *tree, struct json_token *parent, assert(parent->children[insert_idx] == NULL); parent->children[insert_idx] = token; parent->max_child_idx = MAX(parent->max_child_idx, insert_idx); + if (token->type == JSON_TOKEN_ANY) + parent->is_multikey = true; token->sibling_idx = insert_idx; assert(json_tree_lookup(tree, parent, token) == token); return 0; @@ -532,11 +548,15 @@ json_tree_del(struct json_tree *tree, struct json_token *token) assert(token->sibling_idx >= 0); assert(parent->children[token->sibling_idx] == token); assert(json_tree_lookup(tree, parent, token) == token); + assert(token->type != JSON_TOKEN_ANY || + (parent->is_multikey && parent->max_child_idx == 0)); /* * Clear the entry corresponding to this token in parent's * children array and update max_child_idx if necessary. */ parent->children[token->sibling_idx] = NULL; + if (token->type == JSON_TOKEN_ANY) + parent->is_multikey = false; token->sibling_idx = -1; while (parent->max_child_idx >= 0 && parent->children[parent->max_child_idx] == NULL) diff --git a/src/lib/json/json.h b/src/lib/json/json.h index 66cddd026..ff4ab7d91 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -66,6 +66,7 @@ struct json_lexer { enum json_token_type { JSON_TOKEN_NUM, JSON_TOKEN_STR, + JSON_TOKEN_ANY, /** Lexer reached end of path. */ JSON_TOKEN_END, }; @@ -113,6 +114,10 @@ struct json_token { * a JSON tree root. */ int sibling_idx; + /** + * True when it has the only child token JSON_TOKEN_ANY. + */ + bool is_multikey; /** * Hash value of the token. Used for lookups in a JSON tree. * For more details, see the comment to json_tree::hash. @@ -236,6 +241,12 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len, int json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); +/** + * Compare JSON token keys. + */ +int +json_token_cmp(const struct json_token *a, const struct json_token *b); + /** * Compare two JSON paths using Lexer class. * - in case of paths that have same token-sequence prefix, @@ -307,10 +318,16 @@ json_tree_lookup(struct json_tree *tree, struct json_token *parent, const struct json_token *token) { struct json_token *ret = NULL; + if (unlikely(parent->is_multikey && + (token->type == JSON_TOKEN_NUM || + token->type == JSON_TOKEN_ANY))) { + assert(parent->max_child_idx == 0); + return parent->children[0]; + } if (likely(token->type == JSON_TOKEN_NUM)) { - ret = (int)token->num < parent->children_capacity ? + ret = token->num <= parent->max_child_idx ? parent->children[token->num] : NULL; - } else { + } else if (token->type == JSON_TOKEN_STR) { ret = json_tree_lookup_slowpath(tree, parent, token); } return ret; diff --git a/test/engine/json.result b/test/engine/json.result index 1bac85edd..e280a770c 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -683,3 +683,24 @@ town:select() s:drop() --- ... +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index + feature is not supported yet' +... +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[*].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index + feature is not supported yet' +... +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][2].b'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index + feature is not supported yet' +... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 9afa3daa2..ef554247e 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -192,3 +192,11 @@ town:select() name:drop() town:select() s:drop() + +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata') +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}}) +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[*].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}}) +idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][2].b'}}}) diff --git a/test/unit/json.c b/test/unit/json.c index 6448a3210..8b141f54a 100644 --- a/test/unit/json.c +++ b/test/unit/json.c @@ -211,7 +211,7 @@ void test_tree() { header(); - plan(58); + plan(63); struct json_tree tree; int rc = json_tree_create(&tree); @@ -411,6 +411,41 @@ test_tree() "last node became interm - it can't be leaf anymore"); is(json_token_is_leaf(&records[3].node), true, "last node is leaf"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, + node, node_tmp) + json_tree_del(&tree, &node->node); + + /* Test multikey tokens. */ + records_idx = 0; + char *path_multikey = "[*][\"data\"]"; + node = test_add_path(&tree, path_multikey, strlen(path_multikey), + records, &records_idx); + is(node, &records[1], "add path '%s'", path_multikey); + + node = json_tree_lookup_path_entry(&tree, &tree.root, path_multikey, + strlen(path_multikey), INDEX_BASE, + struct test_struct, node); + is(node, &records[1], "lookup path '%s'", path_multikey); + + token = &records[records_idx++].node; + token->type = JSON_TOKEN_NUM; + token->num = 3; + node = json_tree_lookup_entry(&tree, &tree.root, token, + struct test_struct, node); + is(node, &records[0], "lookup numeric token in multikey node"); + + token->type = JSON_TOKEN_ANY; + node = json_tree_lookup_entry(&tree, &tree.root, token, + struct test_struct, node); + is(node, &records[0], "lookup any token in multikey node"); + + token->type = JSON_TOKEN_STR; + token->str = "invalid"; + token->len = strlen("invalid"); + node = json_tree_lookup_entry(&tree, &tree.root, token, + struct test_struct, node); + is(node, NULL, "lookup string token in multikey node"); + json_tree_foreach_entry_safe(node, &tree.root, struct test_struct, node, node_tmp) json_tree_del(&tree, &node->node); @@ -433,7 +468,7 @@ test_path_cmp() {"Data[1][\"Info\"].fname[1]", -1}, }; header(); - plan(lengthof(rc) + 2); + plan(lengthof(rc) + 3); for (size_t i = 0; i < lengthof(rc); ++i) { const char *path = rc[i].path; int errpos = rc[i].errpos; @@ -450,6 +485,13 @@ test_path_cmp() ret = json_path_validate(invalid, strlen(invalid), INDEX_BASE); is(ret, 6, "path %s error pos %d expected %d", invalid, ret, 6); + char *multikey_a = "Data[*][\"FIO\"].fname[*]"; + char *multikey_b = "[\"Data\"][*].FIO[\"fname\"][*]"; + ret = json_path_cmp(multikey_a, strlen(multikey_a), multikey_b, + strlen(multikey_b), INDEX_BASE); + is(ret, 0, "path cmp result \"%s\" with \"%s\": have %d, expected %d", + multikey_a, multikey_b, ret, 0); + check_plan(); footer(); } @@ -463,14 +505,14 @@ test_path_snprint() struct json_tree tree; int rc = json_tree_create(&tree); fail_if(rc != 0); - struct test_struct records[5]; - const char *path = "[1][20][\"file\"][8]"; + struct test_struct records[6]; + const char *path = "[1][*][20][\"file\"][8]"; int path_len = strlen(path); int records_idx = 0; struct test_struct *node, *node_tmp; node = test_add_path(&tree, path, path_len, records, &records_idx); - fail_if(&node->node != &records[3].node); + fail_if(&node->node != &records[4].node); char buf[64]; int bufsz = sizeof(buf); diff --git a/test/unit/json.result b/test/unit/json.result index ee54cbe0e..8a6c7db73 100644 --- a/test/unit/json.result +++ b/test/unit/json.result @@ -101,7 +101,7 @@ ok 1 - subtests ok 2 - subtests *** test_errors: done *** *** test_tree *** - 1..58 + 1..63 ok 1 - add path '[1][10]' ok 2 - add path '[1][20].file' ok 3 - add path '[1][20].file[2]' @@ -160,10 +160,15 @@ ok 2 - subtests ok 56 - last node is leaf ok 57 - last node became interm - it can't be leaf anymore ok 58 - last node is leaf + ok 59 - add path '[*]["data"]' + ok 60 - lookup path '[*]["data"]' + ok 61 - lookup numeric token in multikey node + ok 62 - lookup any token in multikey node + ok 63 - lookup string token in multikey node ok 3 - subtests *** test_tree: done *** *** test_path_cmp *** - 1..7 + 1..8 ok 1 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname": have 0, expected 0 ok 2 - path cmp result "Data[1]["FIO"].fname" with "["Data"][1].FIO["fname"]": have 0, expected 0 ok 3 - path cmp result "Data[1]["FIO"].fname" with "Data[1]": have 1, expected 1 @@ -171,6 +176,7 @@ ok 3 - subtests ok 5 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["Info"].fname[1]": have -1, expected -1 ok 6 - path Data[1]["FIO"].fname is valid ok 7 - path Data[[1]["FIO"].fname error pos 6 expected 6 + ok 8 - path cmp result "Data[*]["FIO"].fname[*]" with "["Data"][*].FIO["fname"][*]": have 0, expected 0 ok 4 - subtests *** test_path_cmp: done *** *** test_path_snprint *** -- 2.20.1
next reply other threads:[~2019-02-19 10:31 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-02-19 10:31 Kirill Shcherbatov [this message] 2019-02-26 16:58 ` Vladimir Davydov 2019-02-27 14:07 ` [tarantool-patches] " Kirill Shcherbatov 2019-02-28 17:10 ` Vladimir Davydov 2019-03-01 13:50 ` Kirill Shcherbatov 2019-03-01 16:06 ` Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=51b08c09ebe40d52bc3b6b6b63755930040500f6.1550571905.git.kshcherbatov@tarantool.org \ --to=kshcherbatov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=vdavydov.dev@gmail.com \ --subject='Re: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox