[PATCH v1 1/1] lib: introduce a new JSON_TOKEN_ANY json token

Kirill Shcherbatov kshcherbatov at tarantool.org
Tue Feb 19 13:31:43 MSK 2019


Introduced a new JSON_TOKEN_ANY json token that makes possible to
perform anonymous lookup in marked tree nodes. This feature is
required to implement multikey indexes.
Since the token entered into the parser becomes available to user,
additional server-side check is introduced so that an error
occurs when trying to create a multikey index.

Needed for #1260

With the introduction of multikey indexes, the key_parts_are_compatible
function will not change significantly: here
https://gist.github.com/kshcherbatov/c0e48138fc16678ad9a82646f00c1881
you can see how the check is made that the multikey indexes are compatible
with each other.

https://github.com/tarantool/tarantool/tree/kshch/gh-1257-multikey-index-json-any-token
https://github.com/tarantool/tarantool/issues/1257
---
 src/box/index_def.c       | 60 +++++++++++++++++++++++++++++++++------
 src/lib/json/json.c       | 38 +++++++++++++++++++------
 src/lib/json/json.h       | 21 ++++++++++++--
 test/engine/json.result   | 21 ++++++++++++++
 test/engine/json.test.lua |  8 ++++++
 test/unit/json.c          | 52 +++++++++++++++++++++++++++++----
 test/unit/json.result     | 10 +++++--
 7 files changed, 184 insertions(+), 26 deletions(-)

diff --git a/src/box/index_def.c b/src/box/index_def.c
index 6c37f9f1d..0f99f1759 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -244,6 +244,56 @@ index_def_cmp(const struct index_def *key1, const struct index_def *key2)
 			    key2->key_def->parts, key2->key_def->part_count);
 }
 
+/**
+ * Test whether key_parts a and b are compatible:
+ *  + field numbers are differ OR
+ *  + json paths are differ
+ * Also perform integrity check: parts must not define multikey
+ * index.
+ */
+static bool
+key_parts_are_compatible(struct key_part *a, struct key_part *b,
+			 struct index_def *index_def, const char *space_name)
+{
+	if (a->fieldno != b->fieldno)
+		return true;
+	struct json_lexer lexer_a, lexer_b;
+	json_lexer_create(&lexer_a, a->path, a->path_len, TUPLE_INDEX_BASE);
+	json_lexer_create(&lexer_b, b->path, b->path_len, TUPLE_INDEX_BASE);
+	struct json_token token_a, token_b;
+	/* For the sake of json_token_cmp(). */
+	token_a.parent = NULL;
+	token_b.parent = NULL;
+	int a_rc, b_rc;
+	int token_idx = 0,  differ_token_idx = 0;
+	int a_multikey_rank = 0, b_multikey_rank = 0;
+	while ((a_rc = json_lexer_next_token(&lexer_a, &token_a)) == 0 &&
+	       (b_rc = json_lexer_next_token(&lexer_b, &token_b)) == 0 &&
+	       token_a.type != JSON_TOKEN_END &&
+	       token_b.type != JSON_TOKEN_END) {
+		token_idx++;
+		if (differ_token_idx == 0) {
+			if (json_token_cmp(&token_a, &token_b) != 0)
+				differ_token_idx = token_idx;
+		}
+		if (token_a.type == JSON_TOKEN_ANY)
+			a_multikey_rank = token_idx;
+		if (token_b.type == JSON_TOKEN_ANY)
+			b_multikey_rank = token_idx;
+	}
+	if (a_multikey_rank > 0 || b_multikey_rank > 0) {
+		diag_set(ClientError, ER_MODIFY_INDEX,
+			 index_def->name, space_name,
+			 "multikey index feature is not supported yet");
+		return false;
+	}
+	if (differ_token_idx > 0 || token_b.type != token_a.type)
+		return true;
+	diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name,
+		"same key part is indexed twice");
+	return false;
+}
+
 bool
 index_def_is_valid(struct index_def *index_def, const char *space_name)
 
@@ -282,15 +332,9 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
 			 */
 			struct key_part *part_a = &index_def->key_def->parts[i];
 			struct key_part *part_b = &index_def->key_def->parts[j];
-			if (part_a->fieldno == part_b->fieldno &&
-			    json_path_cmp(part_a->path, part_a->path_len,
-					  part_b->path, part_b->path_len,
-					  TUPLE_INDEX_BASE) == 0) {
-				diag_set(ClientError, ER_MODIFY_INDEX,
-					 index_def->name, space_name,
-					 "same key part is indexed twice");
+			if (!key_parts_are_compatible(part_a, part_b, index_def,
+						      space_name))
 				return false;
-			}
 		}
 	}
 	return true;
diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index 1b1a3ec2c..019917825 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -146,18 +146,25 @@ json_parse_integer(struct json_lexer *lexer, struct json_token *token)
 	int len = 0;
 	int value = 0;
 	char c = *pos;
-	if (! isdigit(c))
-		return lexer->symbol_count + 1;
+	if (!isdigit(c)) {
+		if (c != '*')
+			return lexer->symbol_count + 1;
+		token->type = JSON_TOKEN_ANY;
+		++len;
+		++pos;
+		goto end;
+	}
 	do {
 		value = value * 10 + c - (int)'0';
 		++len;
 	} while (++pos < end && isdigit((c = *pos)));
 	if (value < lexer->index_base)
 		return lexer->symbol_count + 1;
-	lexer->offset += len;
-	lexer->symbol_count += len;
 	token->type = JSON_TOKEN_NUM;
 	token->num = value - lexer->index_base;
+end:
+	lexer->offset += len;
+	lexer->symbol_count += len;
 	return 0;
 }
 
@@ -252,10 +259,7 @@ json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 	}
 }
 
-/**
- * Compare JSON token keys.
- */
-static int
+int
 json_token_cmp(const struct json_token *a, const struct json_token *b)
 {
 	if (a->parent != b->parent)
@@ -270,7 +274,7 @@ json_token_cmp(const struct json_token *a, const struct json_token *b)
 	} else if (a->type == JSON_TOKEN_NUM) {
 		ret = a->num - b->num;
 	} else {
-		unreachable();
+		assert(a->type == JSON_TOKEN_ANY);
 	}
 	return ret;
 }
@@ -332,6 +336,9 @@ json_token_snprint(char *buf, int size, const struct json_token *token,
 	case JSON_TOKEN_STR:
 		len = snprintf(buf, size, "[\"%.*s\"]", token->len, token->str);
 		break;
+	case JSON_TOKEN_ANY:
+		len = snprintf(buf, size, "[*]");
+		break;
 	default:
 		unreachable();
 	}
@@ -420,6 +427,9 @@ json_token_hash(struct json_token *token)
 	} else if (token->type == JSON_TOKEN_NUM) {
 		data = &token->num;
 		data_size = sizeof(token->num);
+	} else if (token->type == JSON_TOKEN_ANY) {
+		data = "*";
+		data_size = 1;
 	} else {
 		unreachable();
 	}
@@ -435,6 +445,7 @@ json_tree_create(struct json_tree *tree)
 	tree->root.type = JSON_TOKEN_END;
 	tree->root.max_child_idx = -1;
 	tree->root.sibling_idx = -1;
+	tree->root.is_multikey = false;
 	tree->hash = mh_json_new();
 	return tree->hash == NULL ? -1 : 0;
 }
@@ -479,11 +490,14 @@ json_tree_add(struct json_tree *tree, struct json_token *parent,
 	      struct json_token *token)
 {
 	assert(json_tree_lookup(tree, parent, token) == NULL);
+	assert(token->type != JSON_TOKEN_ANY ||
+	       (!parent->is_multikey && json_token_is_leaf(parent)));
 	token->parent = parent;
 	token->children = NULL;
 	token->children_capacity = 0;
 	token->max_child_idx = -1;
 	token->hash = json_token_hash(token);
+	token->is_multikey = false;
 	int insert_idx = token->type == JSON_TOKEN_NUM ?
 			 (int)token->num : parent->max_child_idx + 1;
 	/*
@@ -520,6 +534,8 @@ json_tree_add(struct json_tree *tree, struct json_token *parent,
 	assert(parent->children[insert_idx] == NULL);
 	parent->children[insert_idx] = token;
 	parent->max_child_idx = MAX(parent->max_child_idx, insert_idx);
+	if (token->type == JSON_TOKEN_ANY)
+		parent->is_multikey = true;
 	token->sibling_idx = insert_idx;
 	assert(json_tree_lookup(tree, parent, token) == token);
 	return 0;
@@ -532,11 +548,15 @@ json_tree_del(struct json_tree *tree, struct json_token *token)
 	assert(token->sibling_idx >= 0);
 	assert(parent->children[token->sibling_idx] == token);
 	assert(json_tree_lookup(tree, parent, token) == token);
+	assert(token->type != JSON_TOKEN_ANY ||
+	       (parent->is_multikey && parent->max_child_idx == 0));
 	/*
 	 * Clear the entry corresponding to this token in parent's
 	 * children array and update max_child_idx if necessary.
 	 */
 	parent->children[token->sibling_idx] = NULL;
+	if (token->type == JSON_TOKEN_ANY)
+		parent->is_multikey = false;
 	token->sibling_idx = -1;
 	while (parent->max_child_idx >= 0 &&
 	       parent->children[parent->max_child_idx] == NULL)
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index 66cddd026..ff4ab7d91 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -66,6 +66,7 @@ struct json_lexer {
 enum json_token_type {
 	JSON_TOKEN_NUM,
 	JSON_TOKEN_STR,
+	JSON_TOKEN_ANY,
 	/** Lexer reached end of path. */
 	JSON_TOKEN_END,
 };
@@ -113,6 +114,10 @@ struct json_token {
 	 * a JSON tree root.
 	 */
 	int sibling_idx;
+	/**
+	 * True when it has the only child token JSON_TOKEN_ANY.
+	 */
+	bool is_multikey;
 	/**
 	 * Hash value of the token. Used for lookups in a JSON tree.
 	 * For more details, see the comment to json_tree::hash.
@@ -236,6 +241,12 @@ json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
 int
 json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
 
+/**
+ * Compare JSON token keys.
+ */
+int
+json_token_cmp(const struct json_token *a, const struct json_token *b);
+
 /**
  * Compare two JSON paths using Lexer class.
  * - in case of paths that have same token-sequence prefix,
@@ -307,10 +318,16 @@ json_tree_lookup(struct json_tree *tree, struct json_token *parent,
 		 const struct json_token *token)
 {
 	struct json_token *ret = NULL;
+	if (unlikely(parent->is_multikey &&
+		     (token->type == JSON_TOKEN_NUM ||
+		      token->type == JSON_TOKEN_ANY))) {
+		assert(parent->max_child_idx == 0);
+		return parent->children[0];
+	}
 	if (likely(token->type == JSON_TOKEN_NUM)) {
-		ret = (int)token->num < parent->children_capacity ?
+		ret = token->num <= parent->max_child_idx ?
 		      parent->children[token->num] : NULL;
-	} else {
+	} else if (token->type == JSON_TOKEN_STR) {
 		ret = json_tree_lookup_slowpath(tree, parent, token);
 	}
 	return ret;
diff --git a/test/engine/json.result b/test/engine/json.result
index 1bac85edd..e280a770c 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -683,3 +683,24 @@ town:select()
 s:drop()
 ---
 ...
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index
+    feature is not supported yet'
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[*].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index
+    feature is not supported yet'
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][2].b'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': multikey index
+    feature is not supported yet'
+...
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index 9afa3daa2..ef554247e 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -192,3 +192,11 @@ town:select()
 name:drop()
 town:select()
 s:drop()
+
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata')
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}})
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[*].a'}, {3, 'str', path = '["FIO"][*]["fname"][*].b'}}})
+idx = s:create_index('idx', {parts = {{3, 'str', path = 'FIO[*].fname[1].a'}, {3, 'str', path = '["FIO"][*]["fname"][2].b'}}})
diff --git a/test/unit/json.c b/test/unit/json.c
index 6448a3210..8b141f54a 100644
--- a/test/unit/json.c
+++ b/test/unit/json.c
@@ -211,7 +211,7 @@ void
 test_tree()
 {
 	header();
-	plan(58);
+	plan(63);
 
 	struct json_tree tree;
 	int rc = json_tree_create(&tree);
@@ -411,6 +411,41 @@ test_tree()
 	   "last node became interm - it can't be leaf anymore");
 	is(json_token_is_leaf(&records[3].node), true, "last node is leaf");
 
+	json_tree_foreach_entry_safe(node, &tree.root, struct test_struct,
+				     node, node_tmp)
+		json_tree_del(&tree, &node->node);
+
+	/* Test multikey tokens. */
+	records_idx = 0;
+	char *path_multikey = "[*][\"data\"]";
+	node = test_add_path(&tree, path_multikey, strlen(path_multikey),
+			     records, &records_idx);
+	is(node, &records[1], "add path '%s'", path_multikey);
+
+	node = json_tree_lookup_path_entry(&tree, &tree.root, path_multikey,
+					   strlen(path_multikey), INDEX_BASE,
+					   struct test_struct, node);
+	is(node, &records[1], "lookup path '%s'", path_multikey);
+
+	token = &records[records_idx++].node;
+	token->type = JSON_TOKEN_NUM;
+	token->num = 3;
+	node = json_tree_lookup_entry(&tree, &tree.root, token,
+				      struct test_struct, node);
+	is(node, &records[0], "lookup numeric token in multikey node");
+
+	token->type = JSON_TOKEN_ANY;
+	node = json_tree_lookup_entry(&tree, &tree.root, token,
+				      struct test_struct, node);
+	is(node, &records[0], "lookup any token in multikey node");
+
+	token->type = JSON_TOKEN_STR;
+	token->str = "invalid";
+	token->len = strlen("invalid");
+	node = json_tree_lookup_entry(&tree, &tree.root, token,
+				      struct test_struct, node);
+	is(node, NULL, "lookup string token in multikey node");
+
 	json_tree_foreach_entry_safe(node, &tree.root, struct test_struct,
 				     node, node_tmp)
 		json_tree_del(&tree, &node->node);
@@ -433,7 +468,7 @@ test_path_cmp()
 		{"Data[1][\"Info\"].fname[1]", -1},
 	};
 	header();
-	plan(lengthof(rc) + 2);
+	plan(lengthof(rc) + 3);
 	for (size_t i = 0; i < lengthof(rc); ++i) {
 		const char *path = rc[i].path;
 		int errpos = rc[i].errpos;
@@ -450,6 +485,13 @@ test_path_cmp()
 	ret = json_path_validate(invalid, strlen(invalid), INDEX_BASE);
 	is(ret, 6, "path %s error pos %d expected %d", invalid, ret, 6);
 
+	char *multikey_a = "Data[*][\"FIO\"].fname[*]";
+	char *multikey_b = "[\"Data\"][*].FIO[\"fname\"][*]";
+	ret = json_path_cmp(multikey_a, strlen(multikey_a), multikey_b,
+			    strlen(multikey_b), INDEX_BASE);
+	is(ret, 0, "path cmp result \"%s\" with \"%s\": have %d, expected %d",
+	   multikey_a, multikey_b, ret, 0);
+
 	check_plan();
 	footer();
 }
@@ -463,14 +505,14 @@ test_path_snprint()
 	struct json_tree tree;
 	int rc = json_tree_create(&tree);
 	fail_if(rc != 0);
-	struct test_struct records[5];
-	const char *path = "[1][20][\"file\"][8]";
+	struct test_struct records[6];
+	const char *path = "[1][*][20][\"file\"][8]";
 	int path_len = strlen(path);
 
 	int records_idx = 0;
 	struct test_struct *node, *node_tmp;
 	node = test_add_path(&tree, path, path_len, records, &records_idx);
-	fail_if(&node->node != &records[3].node);
+	fail_if(&node->node != &records[4].node);
 
 	char buf[64];
 	int bufsz = sizeof(buf);
diff --git a/test/unit/json.result b/test/unit/json.result
index ee54cbe0e..8a6c7db73 100644
--- a/test/unit/json.result
+++ b/test/unit/json.result
@@ -101,7 +101,7 @@ ok 1 - subtests
 ok 2 - subtests
 	*** test_errors: done ***
 	*** test_tree ***
-    1..58
+    1..63
     ok 1 - add path '[1][10]'
     ok 2 - add path '[1][20].file'
     ok 3 - add path '[1][20].file[2]'
@@ -160,10 +160,15 @@ ok 2 - subtests
     ok 56 - last node is leaf
     ok 57 - last node became interm - it can't be leaf anymore
     ok 58 - last node is leaf
+    ok 59 - add path '[*]["data"]'
+    ok 60 - lookup path '[*]["data"]'
+    ok 61 - lookup numeric token in multikey node
+    ok 62 - lookup any token in multikey node
+    ok 63 - lookup string token in multikey node
 ok 3 - subtests
 	*** test_tree: done ***
 	*** test_path_cmp ***
-    1..7
+    1..8
     ok 1 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["FIO"].fname": have 0, expected 0
     ok 2 - path cmp result "Data[1]["FIO"].fname" with "["Data"][1].FIO["fname"]": have 0, expected 0
     ok 3 - path cmp result "Data[1]["FIO"].fname" with "Data[1]": have 1, expected 1
@@ -171,6 +176,7 @@ ok 3 - subtests
     ok 5 - path cmp result "Data[1]["FIO"].fname" with "Data[1]["Info"].fname[1]": have -1, expected -1
     ok 6 - path Data[1]["FIO"].fname is valid
     ok 7 - path Data[[1]["FIO"].fname error pos 6 expected 6
+    ok 8 - path cmp result "Data[*]["FIO"].fname[*]" with "["Data"][*].FIO["fname"][*]": have 0, expected 0
 ok 4 - subtests
 	*** test_path_cmp: done ***
 	*** test_path_snprint ***
-- 
2.20.1




More information about the Tarantool-patches mailing list