Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org,
	Vladimir Davydov <vdavydov.dev@gmail.com>
Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token
Date: Fri, 1 Mar 2019 16:50:02 +0300	[thread overview]
Message-ID: <ab51310a-8429-cbb2-860c-1faa71cda99c@tarantool.org> (raw)
In-Reply-To: <20190228171047.hmemgyzmjjmljcos@esperanza>

>> +	char *path_multikey = "base[*][\"data\"]";
> 
> Again, let's please reuse 'path' and 'path_len' defined in the beginning
> of this function rather than adding a new local variable.
I really prefer to introduce a new variable here instead of override existent one.

===============================================

Introduced a new JSON_TOKEN_ANY json token that makes possible to
perform anonymous lookup in marked tree nodes. This feature is
required to implement multikey indexes.
Since the token entered into the parser becomes available to user,
additional server-side check is introduced so that an error
occurs when trying to create a multikey index.

Needed for #1257
---
 src/box/tuple_format.c    |  5 +++
 src/lib/json/json.c       | 16 ++++++++--
 src/lib/json/json.h       | 36 +++++++++++++++++++---
 test/engine/json.result   | 13 ++++++++
 test/engine/json.test.lua |  7 +++++
 test/unit/json.c          | 65 +++++++++++++++++++++++++++++++++++----
 test/unit/json.result     | 16 +++++++---
 7 files changed, 141 insertions(+), 17 deletions(-)

diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1c9b3c20d..4439ce3e0 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -233,6 +233,11 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
 	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
 	       field->token.type != JSON_TOKEN_END) {
+		if (field->token.type == JSON_TOKEN_ANY) {
+			diag_set(ClientError, ER_UNSUPPORTED,
+				"Tarantool", "multikey indexes");
+			goto fail;
+		}
 		enum field_type expected_type =
 			field->token.type == JSON_TOKEN_STR ?
 			FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 1b1a3ec2c..854158f63 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -226,10 +226,14 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 		if (lexer->offset == lexer->src_len)
 			return lexer->symbol_count;
 		c = json_current_char(lexer);
-		if (c == '"' || c == '\'')
+		if (c == '"' || c == '\'') {
 			rc = json_parse_string(lexer, token, c);
-		else
+		} else if (c == '*') {
+			json_skip_char(lexer);
+			token->type = JSON_TOKEN_ANY;
+		} else {
 			rc = json_parse_integer(lexer, token);
+		}
 		if (rc != 0)
 			return rc;
 		/*
@@ -270,7 +274,7 @@ json_token_cmp(const struct json_token *a, const struct json_token *b)
 	} else if (a->type == JSON_TOKEN_NUM) {
 		ret = a->num - b->num;
 	} else {
-		unreachable();
+		assert(a->type == JSON_TOKEN_ANY);
 	}
 	return ret;
 }
@@ -332,6 +336,9 @@ json_token_snprint(char *buf, int size, const struct json_token *token,
 	case JSON_TOKEN_STR:
 		len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str);
 		break;
+	case JSON_TOKEN_ANY:
+		len = snprintf(buf, size, "[*]");
+		break;
 	default:
 		unreachable();
 	}
@@ -420,6 +427,9 @@ json_token_hash(struct json_token *token)
 	} else if (token->type == JSON_TOKEN_NUM) {
 		data = &token->num;
 		data_size = sizeof(token->num);
+	} else if (token->type == JSON_TOKEN_ANY) {
+		data = "*";
+		data_size = 1;
 	} else {
 		unreachable();
 	}
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index 6927d2d90..c1de3e579 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -66,6 +66,7 @@ struct json_lexer {
 enum json_token_type {
 	JSON_TOKEN_NUM,
 	JSON_TOKEN_STR,
+	JSON_TOKEN_ANY,
 	/** Lexer reached end of path. */
 	JSON_TOKEN_END,
 };
@@ -98,6 +99,10 @@ struct json_token {
 	 * array match [token.num] index for JSON_TOKEN_NUM type
 	 * and are allocated sequentially for JSON_TOKEN_STR child
 	 * tokens.
+	 *
+	 * JSON_TOKEN_ANY is exclusive. If it's present, it must
+	 * be the only one and have index 0 in the children array.
+	 * It will be returned by lookup by any key.
 	 */
 	struct json_token **children;
 	/** Allocation size of children array. */
@@ -264,6 +269,16 @@ json_token_is_leaf(struct json_token *token)
 	return token->max_child_idx < 0;
 }
 
+/**
+ * Test if a given JSON token is multikey.
+ */
+static inline bool
+json_token_is_multikey(struct json_token *token)
+{
+	return token->max_child_idx == 0 &&
+	       token->children[0]->type == JSON_TOKEN_ANY;
+}
+
 /**
  * An snprint-style function to print the path to a token in
  * a JSON tree.
@@ -307,11 +322,24 @@ json_tree_lookup(struct json_tree *tree, struct json_token *parent,
 		 const struct json_token *token)
 {
 	struct json_token *ret = NULL;
-	if (likely(token->type == JSON_TOKEN_NUM)) {
-		ret = (int)token->num < parent->children_capacity ?
-		      parent->children[token->num] : NULL;
-	} else {
+	if (unlikely(json_token_is_multikey(parent))) {
+		assert(parent->max_child_idx == 0);
+		return parent->children[0];
+	}
+	switch (token->type) {
+	case JSON_TOKEN_NUM:
+		if (likely(token->num <= parent->max_child_idx))
+			ret = parent->children[token->num];
+		break;
+	case JSON_TOKEN_ANY:
+		if (likely(parent->max_child_idx >= 0))
+			ret = parent->children[parent->max_child_idx];
+		break;
+	case JSON_TOKEN_STR:
 		ret = json_tree_lookup_slowpath(tree, parent, token);
+		break;
+	default:
+		unreachable();
 	}
 	return ret;
 }
diff --git a/test/engine/json.result b/test/engine/json.result
index 1bac85edd..a790cdfbc 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -683,3 +683,16 @@ town:select()
 s:drop()
 ---
 ...
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+- error: Tarantool does not support multikey indexes
+...
+s:drop()
+---
+...
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index 9afa3daa2..f9a7180b1 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -192,3 +192,10 @@ town:select()
 name:drop()
 town:select()
 s:drop()
+
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata')
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+s:drop()
diff --git a/test/unit/json.c b/test/unit/json.c
index 6448a3210..fd320c9eb 100644
--- a/test/unit/json.c
+++ b/test/unit/json.c
@@ -211,7 +211,7 @@ void
 test_tree()
 {
 	header();
-	plan(58);
+	plan(65);
 
 	struct json_tree tree;
 	int rc = json_tree_create(&tree);
@@ -411,6 +411,52 @@ test_tree()
 	   "last node became interm - it can't be leaf anymore");
 	is(json_token_is_leaf(&records[3].node), true, "last node is leaf");
 
+	json_tree_foreach_entry_safe(node, &tree.root, struct test_struct,
+				     node, node_tmp)
+		json_tree_del(&tree, &node->node);
+
+	/* Test multikey tokens. */
+	records_idx = 0;
+	node = test_add_path(&tree, path1, strlen(path1), records, &records_idx);
+	is(node, &records[1], "add path '%s'", path1);
+	token->type = JSON_TOKEN_ANY;
+	node = json_tree_lookup_entry(&tree, &records[0].node, token,
+				      struct test_struct, node);
+	is(node->node.num, 9, "lookup any token in non-multikey node");
+
+	/* Can't attach ANY token to non-leaf node. Cleanup. */
+	json_tree_del(&tree, &records[1].node);
+	records_idx--;
+
+	const char *path_multikey = "[1][*][\"data\"]";
+	node = test_add_path(&tree, path_multikey, strlen(path_multikey),
+			     records, &records_idx);
+	is(node, &records[2], "add path '%s'", path_multikey);
+
+	node = json_tree_lookup_path_entry(&tree, &tree.root, path_multikey,
+					   strlen(path_multikey), INDEX_BASE,
+					   struct test_struct, node);
+	is(node, &records[2], "lookup path '%s'", path_multikey);
+
+	token = &records[records_idx++].node;
+	token->type = JSON_TOKEN_NUM;
+	token->num = 3;
+	node = json_tree_lookup_entry(&tree, &records[0].node, token,
+				      struct test_struct, node);
+	is(node, &records[1], "lookup numeric token in multikey node");
+
+	token->type = JSON_TOKEN_ANY;
+	node = json_tree_lookup_entry(&tree, &records[0].node, token,
+				      struct test_struct, node);
+	is(node, &records[1], "lookup any token in multikey node");
+
+	token->type = JSON_TOKEN_STR;
+	token->str = "str";
+	token->len = strlen("str");
+	node = json_tree_lookup_entry(&tree, &records[0].node, token,
+				      struct test_struct, node);
+	is(node, &records[1], "lookup string token in multikey node");
+
 	json_tree_foreach_entry_safe(node, &tree.root, struct test_struct,
 				     node, node_tmp)
 		json_tree_del(&tree, &node->node);
@@ -433,7 +479,7 @@ test_path_cmp()
 		{"Data[1][\"Info\"].fname[1]", -1},
 	};
 	header();
-	plan(lengthof(rc) + 2);
+	plan(lengthof(rc) + 3);
 	for (size_t i = 0; i < lengthof(rc); ++i) {
 		const char *path = rc[i].path;
 		int errpos = rc[i].errpos;
@@ -444,8 +490,15 @@ test_path_cmp()
 		is(rc, errpos, "path cmp result \"%s\" with \"%s\": "
 		   "have %d, expected %d", a, path, rc, errpos);
 	}
+	char *multikey_a = "Data[*][\"FIO\"].fname[*]";
+	char *multikey_b = "[\"Data\"][*].FIO[\"fname\"][*]";
+	int ret = json_path_cmp(multikey_a, strlen(multikey_a), multikey_b,
+				strlen(multikey_b), INDEX_BASE);
+	is(ret, 0, "path cmp result \"%s\" with \"%s\": have %d, expected %d",
+	   multikey_a, multikey_b, ret, 0);
+
 	const char *invalid = "Data[[1][\"FIO\"].fname";
-	int ret = json_path_validate(a, strlen(a), INDEX_BASE);
+	ret = json_path_validate(a, strlen(a), INDEX_BASE);
 	is(ret, 0, "path %s is valid", a);
 	ret = json_path_validate(invalid, strlen(invalid), INDEX_BASE);
 	is(ret, 6, "path %s error pos %d expected %d", invalid, ret, 6);
@@ -463,14 +516,14 @@ test_path_snprint()
 	struct json_tree tree;
 	int rc = json_tree_create(&tree);
 	fail_if(rc != 0);
-	struct test_struct records[5];
-	const char *path = "[1][20][\"file\"][8]";
+	struct test_struct records[6];
+	const char *path = "[1][*][20][\"file\"][8]";
 	int path_len = strlen(path);
 
 	int records_idx = 0;
 	struct test_struct *node, *node_tmp;
 	node = test_add_path(&tree, path, path_len, records, &records_idx);
-	fail_if(&node->node != &records[3].node);
+	fail_if(&node->node != &records[4].node);
 
 	char buf[64];
 	int bufsz = sizeof(buf);
diff --git a/test/unit/json.result b/test/unit/json.result
index ee54cbe0e..3a15e84bb 100644
--- a/test/unit/json.result
+++ b/test/unit/json.result
@@ -101,7 +101,7 @@ ok 1 - subtests
 ok 2 - subtests
 	*** test_errors: done ***
 	*** test_tree ***
-    1..58
+    1..65
     ok 1 - add path '[1][10]'
     ok 2 - add path '[1][20].file'
     ok 3 - add path '[1][20].file[2]'
@@ -160,17 +160,25 @@ ok 2 - subtests
     ok 56 - last node is leaf
     ok 57 - last node became interm - it can't be leaf anymore
     ok 58 - last node is leaf
+    ok 59 - add path '[1][10]'
+    ok 60 - lookup any token in non-multikey node
+    ok 61 - add path '[1][*]["data"]'
+    ok 62 - lookup path '[1][*]["data"]'
+    ok 63 - lookup numeric token in multikey node
+    ok 64 - lookup any token in multikey node
+    ok 65 - lookup string token in multikey node
 ok 3 - subtests
 	*** test_tree: done ***
 	*** test_path_cmp ***
-    1..7
+    1..8
     ok 1 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname": have 0, expected 0
     ok 2 - path cmp result "Data[1]["FIO"].fname" with "["Data"][1].FIO["fname"]": have 0, expected 0
     ok 3 - path cmp result "Data[1]["FIO"].fname" with "Data[1]": have 1, expected 1
     ok 4 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname[1]": have -1, expected -1
     ok 5 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["Info"].fname[1]": have -1, expected -1
-    ok 6 - path Data[1]["FIO"].fname is valid
-    ok 7 - path Data[[1]["FIO"].fname error pos 6 expected 6
+    ok 6 - path cmp result "Data[*]["FIO"].fname[*]" with "["Data"][*].FIO["fname"][*]": have 0, expected 0
+    ok 7 - path Data[1]["FIO"].fname is valid
+    ok 8 - path Data[[1]["FIO"].fname error pos 6 expected 6
 ok 4 - subtests
 	*** test_path_cmp: done ***
 	*** test_path_snprint ***
-- 
2.21.0

  reply	other threads:[~2019-03-01 13:50 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-19 10:31 Kirill Shcherbatov
2019-02-26 16:58 ` Vladimir Davydov
2019-02-27 14:07   ` [tarantool-patches] " Kirill Shcherbatov
2019-02-28 17:10     ` Vladimir Davydov
2019-03-01 13:50       ` Kirill Shcherbatov [this message]
2019-03-01 16:06         ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ab51310a-8429-cbb2-860c-1faa71cda99c@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [tarantool-patches] Re: [PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox