[tarantool-patches] [PATCH v2 5/5] box: introduce multikey indexes in memtx

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Mar 21 15:35:35 MSK 2019


I've rebased patchset on master branch with new hints.

=================================================

Part of #1257

@TarantoolBot document
Title: introduce multikey indexes in memtx
Multikey indexes allows you to automatically index set of documents
by JSON paths having array index placeholder "[*]". Multikey index
cannot be primary as it cannot be unique(by definition).
Multikey index parts must be compatible: only one "[*]" placeholder
is allowed in same position(for all JSON paths in index parts).

Example:
s = box.schema.space.create('withdata')
pk = s:create_index('pk')
parts = {
	{2, 'str', path = 'data[*].name'},
        {2, 'str', path = 'data[*].extra.phone'}
}
idx = s:create_index('idx', {parts = parts})
s:insert({1, {data = {{name="A", extra={phone="111"}},
                      {name="B", extra={phone="111"}}},
             garbage = 1}}
idx:get({'A', '111'})
---
 src/box/errcode.h                 |   1 +
 src/box/index_def.c               |  25 +++-
 src/box/key_def.c                 |  10 ++
 src/box/key_def.h                 |  18 +++
 src/box/memtx_space.c             |  18 +++
 src/box/memtx_tree.c              | 185 ++++++++++++++++++++----
 src/box/tuple.c                   |  28 +++-
 src/box/tuple.h                   | 111 +++++++++++++--
 src/box/tuple_compare.cc          |  84 ++++++++---
 src/box/tuple_extract_key.cc      |   2 +-
 src/box/tuple_field_map.c         |  27 ++++
 src/box/tuple_field_map.h         |  87 +++++++++++-
 src/box/tuple_format.c            | 104 +++++++++++---
 src/box/vinyl.c                   |   6 +
 test/box/misc.result              |   1 +
 test/engine/json.result           |  13 --
 test/engine/json.test.lua         |   7 -
 test/engine/multikey_idx.result   | 228 ++++++++++++++++++++++++++++++
 test/engine/multikey_idx.test.lua |  67 +++++++++
 19 files changed, 909 insertions(+), 113 deletions(-)
 create mode 100644 test/engine/multikey_idx.result
 create mode 100644 test/engine/multikey_idx.test.lua

diff --git a/src/box/errcode.h b/src/box/errcode.h
index 7764aa352..c6f42463d 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -240,6 +240,7 @@ struct errcode_record {
 	/*185 */_(ER_SQL_UNKNOWN_TOKEN,		"Syntax error: unrecognized token: '%.*s'") \
 	/*186 */_(ER_SQL_PARSER_GENERIC,	"%s") \
 	/*187 */_(ER_SQL_ANALYZE_ARGUMENT,	"ANALYZE statement argument %s is not a base table") \
+	/*188 */_(ER_INDEX_MULTIKEY_INVALID,	"Multikey index is invalid: %s") \
 
 /*
  * !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/index_def.c b/src/box/index_def.c
index f0c7dc4f6..dfa539f86 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -268,9 +268,15 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
 			 space_name, "too many key parts");
 		return false;
 	}
+	if (index_def->iid == 0 && key_def_is_multikey(index_def->key_def)) {
+		diag_set(ClientError, ER_MODIFY_INDEX, index_def->name,
+			 space_name, "primary key cannot be multikey");
+		return false;
+	}
 	for (uint32_t i = 0; i < index_def->key_def->part_count; i++) {
-		assert(index_def->key_def->parts[i].type < field_type_MAX);
-		if (index_def->key_def->parts[i].fieldno > BOX_INDEX_FIELD_MAX) {
+		struct key_part *parts = index_def->key_def->parts;
+		assert(parts[i].type < field_type_MAX);
+		if (parts[i].fieldno > BOX_INDEX_FIELD_MAX) {
 			diag_set(ClientError, ER_MODIFY_INDEX, index_def->name,
 				 space_name, "field no is too big");
 			return false;
@@ -280,8 +286,8 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
 			 * Courtesy to a user who could have made
 			 * a typo.
 			 */
-			struct key_part *part_a = &index_def->key_def->parts[i];
-			struct key_part *part_b = &index_def->key_def->parts[j];
+			struct key_part *part_a = &parts[i];
+			struct key_part *part_b = &parts[j];
 			if (part_a->fieldno == part_b->fieldno &&
 			    json_path_cmp(part_a->path, part_a->path_len,
 					  part_b->path, part_b->path_len,
@@ -292,6 +298,17 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
 				return false;
 			}
 		}
+		if (key_def_is_multikey(index_def->key_def) &&
+		    json_path_cmp(parts[i].path, parts[i].path_len,
+				  parts[index_def->key_def->
+					multikey_part_idx].path,
+				  index_def->key_def->multikey_path_len,
+				  true, TUPLE_INDEX_BASE) != 0) {
+			diag_set(ClientError, ER_MODIFY_INDEX,
+				 index_def->name, space_name,
+				 "multikey index parts are incompatible");
+			return false;
+		}
 	}
 	return true;
 }
diff --git a/src/box/key_def.c b/src/box/key_def.c
index f9e464402..ffc1e1665 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -164,6 +164,13 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 		*path_pool += path_len;
 		memcpy(def->parts[part_no].path, path, path_len);
 		def->parts[part_no].path_len = path_len;
+		int multikey_path_len;
+		if (json_path_is_multikey(path, path_len, &multikey_path_len,
+					  TUPLE_INDEX_BASE) &&
+		    def->multikey_path_len <= (uint32_t)multikey_path_len) {
+			def->multikey_part_idx = part_no;
+			def->multikey_path_len = (uint32_t)multikey_path_len;
+		}
 	} else {
 		def->parts[part_no].path = NULL;
 		def->parts[part_no].path_len = 0;
@@ -185,6 +192,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
 	}
 
 	def->part_count = part_count;
+	def->multikey_part_idx = part_count;
 	def->unique_part_count = part_count;
 
 	/* A pointer to the JSON paths data in the new key_def. */
@@ -253,6 +261,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 	}
 
 	key_def->part_count = part_count;
+	key_def->multikey_part_idx = part_count;
 	key_def->unique_part_count = part_count;
 
 	for (uint32_t item = 0; item < part_count; ++item) {
@@ -672,6 +681,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 		return NULL;
 	}
 	new_def->part_count = new_part_count;
+	new_def->multikey_part_idx = new_part_count;
 	new_def->unique_part_count = new_part_count;
 	new_def->is_nullable = first->is_nullable || second->is_nullable;
 	new_def->has_optional_parts = first->has_optional_parts ||
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 288cf7270..004794969 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -234,6 +234,18 @@ struct key_def {
 	bool has_optional_parts;
 	/** Key fields mask. @sa column_mask.h for details. */
 	uint64_t column_mask;
+	/**
+	 * In case of multikey index, the index of the key_part
+	 * containing JSON path with array index placeholder "[*]".
+	 * Otherwise multikey_part_idx == part_count.
+	 */
+	uint32_t multikey_part_idx;
+	/**
+	 * In case of multikey index, the length of the
+	 * parts[multikey_part_idx].path substring "...[*]"
+	 * @see json_path_is_multikey().
+	 */
+	uint32_t multikey_path_len;
 	/** The size of the 'parts' array. */
 	uint32_t part_count;
 	/** Description of parts of a multipart index. */
@@ -472,6 +484,12 @@ key_def_is_sequential(const struct key_def *key_def)
 	return true;
 }
 
+static inline bool
+key_def_is_multikey(const struct key_def *key_def)
+{
+	return key_def->multikey_part_idx < key_def->part_count;
+}
+
 /**
  * Return true if @a key_def defines has fields that requires
  * special collation comparison.
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index 92ec0b300..9a5132979 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -638,6 +638,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
 				 "HASH index must be unique");
 			return -1;
 		}
+		if (key_def_is_multikey(index_def->key_def)) {
+			diag_set(ClientError, ER_MODIFY_INDEX,
+				 index_def->name, space_name(space),
+				 "HASH index cannot be multikey");
+			return -1;
+		}
 		break;
 	case TREE:
 		/* TREE index has no limitations. */
@@ -661,6 +667,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
 				 "RTREE index field type must be ARRAY");
 			return -1;
 		}
+		if (key_def_is_multikey(index_def->key_def)) {
+			diag_set(ClientError, ER_MODIFY_INDEX,
+				 index_def->name, space_name(space),
+				 "RTREE index cannot be multikey");
+			return -1;
+		}
 		/* no furter checks of parts needed */
 		return 0;
 	case BITSET:
@@ -683,6 +695,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
 				 "BITSET index field type must be NUM or STR");
 			return -1;
 		}
+		if (key_def_is_multikey(index_def->key_def)) {
+			diag_set(ClientError, ER_MODIFY_INDEX,
+				 index_def->name, space_name(space),
+				 "BITSET index cannot be multikey");
+			return -1;
+		}
 		/* no furter checks of parts needed */
 		return 0;
 	default:
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index fe037c54a..c9d3b2130 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -528,7 +528,8 @@ memtx_tree_index_get(struct index *base, const char *key,
 	struct memtx_tree_key_data key_data;
 	key_data.key = key;
 	key_data.part_count = part_count;
-	key_data.hint = key_hint(key, part_count, cmp_def);
+	if (!key_def_is_multikey(cmp_def))
+		key_data.hint = key_hint(key, part_count, cmp_def);
 	struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
 	*result = res != NULL ? res->tuple : NULL;
 	return 0;
@@ -584,6 +585,79 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
 	return 0;
 }
 
+int
+memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple,
+				  struct tuple *new_tuple,
+				  enum dup_replace_mode mode,
+				  struct tuple **result)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
+	if (new_tuple != NULL) {
+		int errcode = 0, tree_res = 0;
+		struct tuple *dup_tuple = NULL;
+		int multikey_idx = 0;
+		uint32_t items = tuple_field_multikey_items(new_tuple, cmp_def);
+		for (; (uint32_t)multikey_idx < items; multikey_idx++) {
+			struct memtx_tree_data new_data;
+			new_data.tuple = new_tuple;
+			new_data.hint = multikey_idx;
+			struct memtx_tree_data dup_data;
+			dup_data.tuple = NULL;
+			tree_res = memtx_tree_insert(&index->tree, new_data,
+						     &dup_data);
+			if (tree_res != 0) {
+				diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+					 "memtx_tree_index", "replace");
+				break;
+			}
+			dup_tuple = dup_data.tuple;
+			errcode = replace_check_dup(old_tuple, dup_tuple, mode);
+			if (errcode != 0) {
+				memtx_tree_delete(&index->tree, new_data);
+				if (dup_tuple != NULL) {
+					memtx_tree_insert(&index->tree,
+							  dup_data, NULL);
+				}
+				struct space *sp =
+					space_cache_find(base->def->space_id);
+				if (sp != NULL) {
+					diag_set(ClientError, errcode,
+						 base->def->name,
+						 space_name(sp));
+				}
+				break;
+			}
+		}
+		if (tree_res != 0 || errcode != 0) {
+			multikey_idx--;
+			for (; multikey_idx >= 0; multikey_idx--) {
+				struct memtx_tree_data data;
+				data.tuple = new_tuple;
+				data.hint = multikey_idx;
+				memtx_tree_delete(&index->tree, data);
+			}
+			return -1;
+		}
+		if (dup_tuple != NULL) {
+			*result = dup_tuple;
+			return 0;
+		}
+	}
+	if (old_tuple != NULL) {
+		uint32_t items = tuple_field_multikey_items(old_tuple, cmp_def);
+		for (uint32_t multikey_idx = 0; multikey_idx < items;
+		     multikey_idx++) {
+			struct memtx_tree_data old_data;
+			old_data.tuple = old_tuple;
+			old_data.hint = multikey_idx;
+			memtx_tree_delete(&index->tree, old_data);
+		}
+	}
+	*result = old_tuple;
+	return 0;
+}
+
 static struct iterator *
 memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
 				 const char *key, uint32_t part_count)
@@ -621,7 +695,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
 	it->type = type;
 	it->key_data.key = key;
 	it->key_data.part_count = part_count;
-	it->key_data.hint = key_hint(key, part_count, cmp_def);
+	if (!key_def_is_multikey(cmp_def))
+		it->key_data.hint = key_hint(key, part_count, cmp_def);
 	it->index_def = base->def;
 	it->tree = &index->tree;
 	it->tree_iterator = memtx_tree_invalid_iterator();
@@ -656,34 +731,43 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
 }
 
 static int
-memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
+memtx_tree_index_build_array_realloc(struct memtx_tree_index *index,
+				     uint32_t items)
 {
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
+	bool need_realloc = false;
 	if (index->build_array == NULL) {
-		index->build_array = malloc(MEMTX_EXTENT_SIZE);
-		if (index->build_array == NULL) {
-			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
-				 "memtx_tree_index", "build_next");
-			return -1;
-		}
 		index->build_array_alloc_size =
 			MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
+		need_realloc = true;
 	}
-	assert(index->build_array_size <= index->build_array_alloc_size);
-	if (index->build_array_size == index->build_array_alloc_size) {
-		index->build_array_alloc_size = index->build_array_alloc_size +
-					index->build_array_alloc_size / 2;
-		struct memtx_tree_data *tmp =
-			realloc(index->build_array,
-				index->build_array_alloc_size * sizeof(*tmp));
-		if (tmp == NULL) {
-			diag_set(OutOfMemory, index->build_array_alloc_size *
-				 sizeof(*tmp), "memtx_tree_index", "build_next");
-			return -1;
-		}
-		index->build_array = tmp;
+	uint32_t build_array_new_size = index->build_array_size + items;
+	if (build_array_new_size > index->build_array_alloc_size) {
+		index->build_array_alloc_size +=
+			MAX(index->build_array_alloc_size / 2,
+			    build_array_new_size - index->build_array_alloc_size);
+		need_realloc = true;
+	}
+	if (!need_realloc)
+		return 0;
+	struct memtx_tree_data *tmp =
+		realloc(index->build_array,
+			index->build_array_alloc_size * sizeof(*tmp));
+	if (tmp == NULL) {
+		diag_set(OutOfMemory, index->build_array_alloc_size *
+			 sizeof(*tmp), "memtx_tree_index", "build_next");
+		return -1;
 	}
+	index->build_array = tmp;
+	return 0;
+}
+
+static int
+memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
+	if (memtx_tree_index_build_array_realloc(index, 1) != 0)
+		return -1;
 	struct memtx_tree_data *elem =
 		&index->build_array[index->build_array_size++];
 	elem->tuple = tuple;
@@ -691,6 +775,24 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
 	return 0;
 }
 
+static int
+memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
+	uint32_t items = tuple_field_multikey_items(tuple, cmp_def);
+	if (memtx_tree_index_build_array_realloc(index, items) != 0)
+		return -1;
+	for (uint32_t multikey_idx = 0; multikey_idx < items; multikey_idx++) {
+		struct memtx_tree_data *elem =
+			&index->build_array[index->build_array_size++];
+		assert(index->build_array_size <= index->build_array_alloc_size);
+		elem->tuple = tuple;
+		elem->hint = multikey_idx;
+	}
+	return 0;
+}
+
 static void
 memtx_tree_index_end_build(struct index *base)
 {
@@ -793,6 +895,36 @@ static const struct index_vtab memtx_tree_index_vtab = {
 	/* .end_build = */ memtx_tree_index_end_build,
 };
 
+static const struct index_vtab memtx_tree_index_multikey_vtab = {
+	/* .destroy = */ memtx_tree_index_destroy,
+	/* .commit_create = */ generic_index_commit_create,
+	/* .abort_create = */ generic_index_abort_create,
+	/* .commit_modify = */ generic_index_commit_modify,
+	/* .commit_drop = */ generic_index_commit_drop,
+	/* .update_def = */ memtx_tree_index_update_def,
+	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
+	/* .def_change_requires_rebuild = */
+		memtx_index_def_change_requires_rebuild,
+	/* .size = */ memtx_tree_index_size,
+	/* .bsize = */ memtx_tree_index_bsize,
+	/* .min = */ generic_index_min,
+	/* .max = */ generic_index_max,
+	/* .random = */ memtx_tree_index_random,
+	/* .count = */ memtx_tree_index_count,
+	/* .get = */ memtx_tree_index_get,
+	/* .replace = */ memtx_tree_index_replace_multikey,
+	/* .create_iterator = */ memtx_tree_index_create_iterator,
+	/* .create_snapshot_iterator = */
+		memtx_tree_index_create_snapshot_iterator,
+	/* .stat = */ generic_index_stat,
+	/* .compact = */ generic_index_compact,
+	/* .reset_stat = */ generic_index_reset_stat,
+	/* .begin_build = */ memtx_tree_index_begin_build,
+	/* .reserve = */ memtx_tree_index_reserve,
+	/* .build_next = */ memtx_tree_index_build_next_multikey,
+	/* .end_build = */ memtx_tree_index_end_build,
+};
+
 struct index *
 memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 {
@@ -803,8 +935,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 			 "malloc", "struct memtx_tree_index");
 		return NULL;
 	}
+	const struct index_vtab *vtab = key_def_is_multikey(def->key_def) ?
+					&memtx_tree_index_multikey_vtab :
+					&memtx_tree_index_vtab;
 	if (index_create(&index->base, (struct engine *)memtx,
-			 &memtx_tree_index_vtab, def) != 0) {
+			 vtab, def) != 0) {
 		free(index);
 		return NULL;
 	}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 68f0670f2..985117957 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -129,8 +129,6 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
 	struct field_map_builder builder;
 	int rc = tuple_field_map_create(&builder, format, tuple, true);
 	region_truncate(region, region_svp);
-	assert(rc != 0 ||
-	       field_map_builder_size(&builder) == format->field_map_size);
 	return rc;
 }
 
@@ -455,7 +453,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
 }
 
 int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len,
+		 int multikey_idx)
 {
 	int rc;
 	struct json_lexer lexer;
@@ -463,6 +462,10 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
 	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
 		switch (token.type) {
+		case JSON_TOKEN_ANY:
+			assert(multikey_idx >= 0);
+			token.num = multikey_idx;
+			FALLTHROUGH;
 		case JSON_TOKEN_NUM:
 			rc = tuple_field_go_to_index(data, token.num);
 			break;
@@ -532,7 +535,24 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
 	}
 	return tuple_field_raw_by_path(format, tuple, field_map, fieldno,
 				       path + lexer.offset,
-				       path_len - lexer.offset, NULL);
+				       path_len - lexer.offset, NULL, -1);
+}
+
+uint32_t
+tuple_field_raw_multikey_items(struct tuple_format *format, const char *data,
+			       const uint32_t *field_map,
+			       struct key_def *key_def)
+{
+	assert(key_def_is_multikey(key_def));
+	struct key_part *part = &key_def->parts[key_def->multikey_part_idx];
+	const char *array_raw =
+		tuple_field_raw_by_path(format, data, field_map, part->fieldno,
+					part->path, key_def->multikey_path_len,
+					NULL, -1);
+	if (array_raw == NULL)
+		return 0;
+	assert(mp_typeof(*array_raw) == MP_ARRAY);
+	return mp_decode_array(&array_raw);
 }
 
 /* }}} tuple_field_* getters */
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 8b12fd5a8..060e4a076 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -508,14 +508,51 @@ tuple_field_count(const struct tuple *tuple)
  *                      with NULL.
  * @param path The path to process.
  * @param path_len The length of the @path.
+ * @param multikey_idx The multikey index hint - index of
+ *                     document to retrieve when array index
+ *                     placeholder "[*]" is met.
  * @retval 0 On success.
  * @retval -1 In case of error in JSON path.
  */
 int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len,
+		 int multikey_idx);
 
 /**
- * Get tuple field by field index and relative JSON path.
+ * Get tuple field by field_map, offset_slot and multikey_idx.
+ * @param tuple MessagePack tuple's body.
+ * @param field_map Tuple field map.
+ * @param offset_slot The field_map slot with field to get offset.
+ * @param multikey_idx The multikey index hint - index of
+ *                     document to retrieve when array index
+ *                     placeholder "[*]" is met.
+ * @retval Not NULL field pointer on success.
+ * @retval NULL otherwise, when field is absents.
+ */
+static const char *
+tuple_field_raw_by_offset_slot(const char *tuple, const uint32_t *field_map,
+			       int32_t offset_slot, int multikey_idx)
+{
+	assert(offset_slot != TUPLE_OFFSET_SLOT_NIL);
+	uint32_t offset;
+	if (multikey_idx >= 0) {
+		assert(field_map[offset_slot] != 0);
+		struct field_map_ext *extent =
+			field_map_ext_get(field_map, offset_slot);
+		if ((uint32_t)multikey_idx >= extent->items)
+			return NULL;
+		offset = extent->offset[multikey_idx];
+	} else {
+		offset = field_map[offset_slot];
+	}
+	if (offset == 0)
+		return NULL;
+	return tuple + offset;
+}
+
+/**
+ * Get tuple field by field index, relative JSON path and
+ * multikey_idx.
  * @param format Tuple format.
  * @param tuple MessagePack tuple's body.
  * @param field_map Tuple field map.
@@ -528,12 +565,15 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
  *                         access data in a single operation.
  *                         Else it is initialized with offset_slot
  *                         of format field by path.
+ * @param multikey_idx The multikey index hint - index of
+ *                     document to retrieve when array index
+ *                     placeholder "[*]" is met.
  */
 static inline const char *
 tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 			const uint32_t *field_map, uint32_t fieldno,
 			const char *path, uint32_t path_len,
-			int32_t *offset_slot_hint)
+			int32_t *offset_slot_hint, int multikey_idx)
 {
 	int32_t offset_slot;
 	if (offset_slot_hint != NULL &&
@@ -558,10 +598,8 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 		if (offset_slot_hint != NULL)
 			*offset_slot_hint = offset_slot;
 offset_slot_access:
-		/* Indexed field */
-		if (field_map[offset_slot] == 0)
-			return NULL;
-		tuple += field_map[offset_slot];
+		tuple = tuple_field_raw_by_offset_slot(tuple, field_map,
+					offset_slot, multikey_idx);
 	} else {
 		uint32_t field_count;
 parse:
@@ -572,8 +610,8 @@ parse:
 		for (uint32_t k = 0; k < fieldno; k++)
 			mp_next(&tuple);
 		if (path != NULL &&
-		    unlikely(tuple_go_to_path(&tuple, path,
-						    path_len) != 0))
+		    unlikely(tuple_go_to_path(&tuple, path, path_len,
+					      multikey_idx) != 0))
 			return NULL;
 	}
 	return tuple;
@@ -595,7 +633,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple,
 		const uint32_t *field_map, uint32_t field_no)
 {
 	return tuple_field_raw_by_path(format, tuple, field_map, field_no,
-				       NULL, 0, NULL);
+				       NULL, 0, NULL, -1);
 }
 
 /**
@@ -634,16 +672,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
 			     uint32_t path_len, uint32_t path_hash);
 
 /**
- * Get a tuple field pointed to by an index part.
+ * Get a tuple field pointed to by an index part and multikey hint.
  * @param format Tuple format.
  * @param tuple A pointer to MessagePack array.
  * @param field_map A pointer to the LAST element of field map.
+ * @param multikey_idx A multikey hint.
  * @param part Index part to use.
  * @retval Field data if the field exists or NULL.
  */
 static inline const char *
-tuple_field_raw_by_part(struct tuple_format *format, const char *data,
-			const uint32_t *field_map, struct key_part *part)
+tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data,
+				 const uint32_t *field_map,
+				 struct key_part *part, int multikey_idx)
 {
 	if (unlikely(part->format_epoch != format->epoch)) {
 		assert(format->epoch != 0);
@@ -656,7 +696,23 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data,
 	}
 	return tuple_field_raw_by_path(format, data, field_map, part->fieldno,
 				       part->path, part->path_len,
-				       &part->offset_slot_cache);
+				       &part->offset_slot_cache, multikey_idx);
+}
+
+/**
+ * Get a tuple field pointed to by an index part.
+ * @param format Tuple format.
+ * @param tuple A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param part Index part to use.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_raw_by_part(struct tuple_format *format, const char *data,
+			const uint32_t *field_map, struct key_part *part)
+{
+	return tuple_field_raw_by_part_multikey(format, data, field_map,
+						part, -1);
 }
 
 /**
@@ -672,6 +728,33 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part)
 				       tuple_field_map(tuple), part);
 }
 
+/**
+ * Get count of documents in the multikey index.
+ * @param format Tuple format.
+ * @param data A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param key_def Index key_definition.
+ * @retval Count of documents in the multikey index.
+ */
+uint32_t
+tuple_field_raw_multikey_items(struct tuple_format *format, const char *data,
+			       const uint32_t *field_map,
+			       struct key_def *key_def);
+
+/**
+ * Get count of documents in the multikey index.
+ * @param tuple Tuple to get the count of documents from.
+ * @param key_def Index key_definition.
+ * @retval Count of documents in the multikey index.
+ */
+static inline uint32_t
+tuple_field_multikey_items(struct tuple *tuple, struct key_def *key_def)
+{
+	return tuple_field_raw_multikey_items(tuple_format(tuple),
+					      tuple_data(tuple),
+					      tuple_field_map(tuple), key_def);
+}
+
 /**
  * @brief Tuple Interator
  */
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 93756365b..14c630af5 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -445,7 +445,8 @@ tuple_compare_field_with_type(const char *field_a, enum mp_type a_type,
 	}
 }
 
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+	 bool is_multikey>
 static inline int
 tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
 			      const struct tuple *tuple_b, hint_t tuple_b_hint,
@@ -455,8 +456,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
 	assert(!has_optional_parts || is_nullable);
 	assert(is_nullable == key_def->is_nullable);
 	assert(has_optional_parts == key_def->has_optional_parts);
-	int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
-	if (rc != 0)
+	assert(key_def_is_multikey(key_def) == is_multikey);
+	assert(!is_multikey || is_multikey == has_json_paths);
+	int rc = 0;
+	if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
 		return rc;
 	struct key_part *part = key_def->parts;
 	const char *tuple_a_raw = tuple_data(tuple_a);
@@ -499,7 +502,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
 		end = part + key_def->part_count;
 
 	for (; part < end; part++) {
-		if (has_json_paths) {
+		if (is_multikey) {
+			field_a = tuple_field_raw_by_part_multikey(format_a,
+					tuple_a_raw, field_map_a, part,
+					(int)tuple_a_hint);
+			field_b = tuple_field_raw_by_part_multikey(format_b,
+					tuple_b_raw, field_map_b, part,
+					(int)tuple_b_hint);
+		} else if (has_json_paths) {
 			field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
 							  field_map_a, part);
 			field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -556,7 +566,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
 	 */
 	end = key_def->parts + key_def->part_count;
 	for (; part < end; ++part) {
-		if (has_json_paths) {
+		if (is_multikey) {
+			field_a = tuple_field_raw_by_part_multikey(format_a,
+					tuple_a_raw, field_map_a, part,
+					(int)tuple_a_hint);
+			field_b = tuple_field_raw_by_part_multikey(format_b,
+					tuple_b_raw, field_map_b, part,
+					(int)tuple_b_hint);
+		} else if (has_json_paths) {
 			field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
 							  field_map_a, part);
 			field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -586,24 +603,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
 		       struct key_def *key_def)
 {
 	return tuple_compare_slowpath_hinted
-		<is_nullable, has_optional_parts, has_json_paths>
+		<is_nullable, has_optional_parts, has_json_paths, false>
 		(tuple_a, HINT_NONE, tuple_b, HINT_NONE, key_def);
 }
 
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+	 bool is_multikey>
 static inline int
 tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 		hint_t tuple_hint, const char *key, uint32_t part_count,
 		hint_t key_hint, struct key_def *key_def)
 {
+	(void)key_hint;
 	assert(has_json_paths == key_def->has_json_paths);
 	assert(!has_optional_parts || is_nullable);
 	assert(is_nullable == key_def->is_nullable);
 	assert(has_optional_parts == key_def->has_optional_parts);
 	assert(key != NULL || part_count == 0);
 	assert(part_count <= key_def->part_count);
-	int rc = hint_cmp(tuple_hint, key_hint);
-	if (rc != 0)
+	assert(key_def_is_multikey(key_def) == is_multikey);
+	assert(!is_multikey || is_multikey == has_json_paths);
+	int rc = 0;
+	if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0)
 		return rc;
 	struct key_part *part = key_def->parts;
 	struct tuple_format *format = tuple_format(tuple);
@@ -612,7 +633,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 	enum mp_type a_type, b_type;
 	if (likely(part_count == 1)) {
 		const char *field;
-		if (has_json_paths) {
+		if (is_multikey) {
+			field = tuple_field_raw_by_part_multikey(format,
+					tuple_raw, field_map, part,
+					(int)tuple_hint);
+		} else if (has_json_paths) {
 			field = tuple_field_raw_by_part(format, tuple_raw,
 							field_map, part);
 		} else {
@@ -642,7 +667,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 	struct key_part *end = part + part_count;
 	for (; part < end; ++part, mp_next(&key)) {
 		const char *field;
-		if (has_json_paths) {
+		if (is_multikey) {
+			field = tuple_field_raw_by_part_multikey(format,
+					tuple_raw, field_map, part,
+					(int)tuple_hint);
+		} else if (has_json_paths) {
 			field = tuple_field_raw_by_part(format, tuple_raw,
 							field_map, part);
 		} else {
@@ -684,7 +713,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
 				uint32_t part_count, struct key_def *key_def)
 {
 	return tuple_compare_with_key_slowpath_hinted
-		<is_nullable, has_optional_parts, has_json_paths>
+		<is_nullable, has_optional_parts, has_json_paths, false>
 		(tuple, HINT_NONE, key, part_count, HINT_NONE, key_def);
 }
 
@@ -1710,7 +1739,7 @@ key_def_set_compare_func_fast(struct key_def *def)
 			tuple_compare_slowpath<false, false, false>;
 		cmp_hinted = is_sequential ?
 			tuple_compare_sequential_hinted<false, false> :
-			tuple_compare_slowpath_hinted<false, false, false>;
+			tuple_compare_slowpath_hinted<false, false, false, false>;
 	}
 	if (cmp_wk == NULL) {
 		cmp_wk = is_sequential ?
@@ -1718,7 +1747,8 @@ key_def_set_compare_func_fast(struct key_def *def)
 			tuple_compare_with_key_slowpath<false, false, false>;
 		cmp_wk_hinted = is_sequential ?
 			tuple_compare_with_key_sequential_hinted<false, false> :
-			tuple_compare_with_key_slowpath_hinted<false, false, false>;
+			tuple_compare_with_key_slowpath_hinted<false, false,
+							       false, false>;
 	}
 
 	def->tuple_compare = cmp;
@@ -1746,16 +1776,17 @@ key_def_set_compare_func_plain(struct key_def *def)
 		def->tuple_compare = tuple_compare_slowpath
 				<is_nullable, has_optional_parts, false>;
 		def->tuple_compare_hinted = tuple_compare_slowpath_hinted
-				<is_nullable, has_optional_parts, false>;
+				<is_nullable, has_optional_parts, false, false>;
 		def->tuple_compare_with_key = tuple_compare_with_key_slowpath
 				<is_nullable, has_optional_parts, false>;
 		def->tuple_compare_with_key_hinted =
 					tuple_compare_with_key_slowpath_hinted
-					<is_nullable, has_optional_parts, false>;
+					<is_nullable, has_optional_parts,
+					 false, false>;
 	}
 }
 
-template<bool is_nullable, bool has_optional_parts>
+template<bool is_nullable, bool has_optional_parts, bool is_multikey>
 static void
 key_def_set_compare_func_json(struct key_def *def)
 {
@@ -1763,12 +1794,12 @@ key_def_set_compare_func_json(struct key_def *def)
 	def->tuple_compare = tuple_compare_slowpath
 			<is_nullable, has_optional_parts, true>;
 	def->tuple_compare_hinted = tuple_compare_slowpath_hinted
-			<is_nullable, has_optional_parts, true>;
+			<is_nullable, has_optional_parts, true, is_multikey>;
 	def->tuple_compare_with_key = tuple_compare_with_key_slowpath
 			<is_nullable, has_optional_parts, true>;
 	def->tuple_compare_with_key_hinted =
 			tuple_compare_with_key_slowpath_hinted
-			<is_nullable, has_optional_parts, true>;
+			<is_nullable, has_optional_parts, true, is_multikey>;
 }
 
 void
@@ -1786,14 +1817,23 @@ key_def_set_compare_func(struct key_def *def)
 			assert(!def->is_nullable && !def->has_optional_parts);
 			key_def_set_compare_func_plain<false, false>(def);
 		}
+	} else if (key_def_is_multikey(def)) {
+		if (def->is_nullable && def->has_optional_parts) {
+			key_def_set_compare_func_json<true, true, true>(def);
+		} else if (def->is_nullable && !def->has_optional_parts) {
+			key_def_set_compare_func_json<true, false, true>(def);
+		} else {
+			assert(!def->is_nullable && !def->has_optional_parts);
+			key_def_set_compare_func_json<false, false, true>(def);
+		}
 	} else {
 		if (def->is_nullable && def->has_optional_parts) {
-			key_def_set_compare_func_json<true, true>(def);
+			key_def_set_compare_func_json<true, true, false>(def);
 		} else if (def->is_nullable && !def->has_optional_parts) {
-			key_def_set_compare_func_json<true, false>(def);
+			key_def_set_compare_func_json<true, false, false>(def);
 		} else {
 			assert(!def->is_nullable && !def->has_optional_parts);
-			key_def_set_compare_func_json<false, false>(def);
+			key_def_set_compare_func_json<false, false, false>(def);
 		}
 	}
 	key_def_set_hint_func(def);
diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
index 0a8337102..00bcfacdd 100644
--- a/src/box/tuple_extract_key.cc
+++ b/src/box/tuple_extract_key.cc
@@ -310,7 +310,7 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
 		const char *src_end = field_end;
 		if (has_json_paths && part->path != NULL) {
 			if (tuple_go_to_path(&src, part->path,
-						   part->path_len) != 0) {
+					     part->path_len, -1) != 0) {
 				/*
 				 * The path must be correct as
 				 * it has already been validated
diff --git a/src/box/tuple_field_map.c b/src/box/tuple_field_map.c
index ea77745aa..e76068619 100644
--- a/src/box/tuple_field_map.c
+++ b/src/box/tuple_field_map.c
@@ -36,6 +36,8 @@ int
 field_map_builder_create(struct field_map_builder *builder,
 			 uint32_t field_map_size, struct region *region)
 {
+	builder->region = region;
+	builder->extents_size = 0;
 	builder->item_count = field_map_size / sizeof(uint32_t);
 	if (field_map_size == 0) {
 		builder->items = NULL;
@@ -51,3 +53,28 @@ field_map_builder_create(struct field_map_builder *builder,
 	builder->items = (field_map_builder_item *)((char *)builder->items + sz);
 	return 0;
 }
+
+struct field_map_ext *
+field_map_builder_ext_get(struct field_map_builder *builder, int32_t offset_slot,
+			  uint32_t extent_items)
+{
+	struct field_map_ext *extent;
+	if (builder->items[offset_slot].has_extent) {
+		extent = builder->items[offset_slot].extent;
+		assert(extent != NULL);
+		assert(extent->items == extent_items);
+		return extent;
+	}
+	uint32_t sz = field_map_ext_size(extent_items);
+	extent = region_alloc(builder->region, sz);
+	if (extent == NULL) {
+		diag_set(OutOfMemory, sz, "region", "extent");
+		return NULL;
+	}
+	memset(extent, 0, sz);
+	extent->items = extent_items;
+	builder->items[offset_slot].extent = extent;
+	builder->items[offset_slot].has_extent = true;
+	builder->extents_size += sz;
+	return extent;
+}
diff --git a/src/box/tuple_field_map.h b/src/box/tuple_field_map.h
index 3a9927faa..042209a80 100644
--- a/src/box/tuple_field_map.h
+++ b/src/box/tuple_field_map.h
@@ -31,11 +31,48 @@
  * SUCH DAMAGE.
  */
 #include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
 
 struct region;
 
+/** The field_map extent. */
+struct field_map_ext {
+	/** Count of field_map_ext::offset[] items. */
+	uint32_t items;
+	/** Data offset in tuple array. */
+	uint32_t offset[0];
+};
+
+/** Return field_map extention allocation size. */
+static inline uint32_t
+field_map_ext_size(uint32_t items)
+{
+	return sizeof(struct field_map_ext) + items * sizeof(uint32_t);
+}
+
+/** Return field_map extention for field_map and offset_slot. */
+static inline struct field_map_ext *
+field_map_ext_get(const uint32_t *field_map, int32_t offset_slot)
+{
+	return (struct field_map_ext *)((char *)field_map -
+					field_map[offset_slot]);
+}
+
 /** Preliminary field_map atom. */
-typedef uint32_t field_map_builder_item;
+typedef struct {
+	/**
+	 * True when this slot must be interpret as
+	 * extention pointer.
+	 */
+	bool has_extent;
+	union {
+		/** Data offset in tuple. */
+		uint32_t offset;
+		/** Pointer to field_map_ext extention. */
+		struct field_map_ext *extent;
+	};
+} field_map_builder_item;
 
 /** A class that contains a preliminary view of the field_map. */
 struct field_map_builder {
@@ -49,8 +86,20 @@ struct field_map_builder {
 	 * count of field_map_builder::items.
 	 */
 	uint32_t item_count;
+	/**
+	 * Total size of memory is allocated for field_map
+	 * extentions.
+	 */
+	uint32_t extents_size;
+	/** Region to use for allocations. */
+	struct region *region;
 };
 
+/** Get or allocate field_map_ext for offset_slot. */
+struct field_map_ext *
+field_map_builder_ext_get(struct field_map_builder *builder,
+			  int32_t offset_slot, uint32_t extent_items);
+
 /**
  * Initialize field_map_builder to prepare field_map of size
  * field_map_size. Use region for temporary allocations.
@@ -65,22 +114,50 @@ field_map_builder_slot_set(struct field_map_builder *builder,
 			   int32_t offset_slot, uint32_t offset)
 {
 	assert(offset_slot < 0);
-	builder->items[offset_slot] = offset;
+	builder->items[offset_slot].offset = offset;
+}
+
+/** Initialize corresponding field_map_ext with offset value. */
+static inline int
+field_map_builder_ext_slot_set(struct field_map_builder *builder,
+			       int32_t offset_slot, int32_t extent_slot,
+			       uint32_t extent_items, uint32_t offset)
+{
+	assert(offset_slot < 0 && extent_slot >= 0 && extent_items > 0);
+	struct field_map_ext *extent =
+		field_map_builder_ext_get(builder, offset_slot, extent_items);
+	if (extent == NULL)
+		return -1;
+	assert(extent->items == extent_items);
+	extent->offset[extent_slot] = offset;
+	return 0;
 }
 
 /** Return built field_map size. */
 static inline uint32_t
 field_map_builder_size(struct field_map_builder *builder)
 {
-	return builder->item_count * sizeof(uint32_t);
+	return builder->item_count * sizeof(uint32_t) + builder->extents_size;
 }
 
 /** Write build field_map size in the preallocated buffer wptr. */
 static inline void
 field_map_builder_build(struct field_map_builder *builder, char *wptr)
 {
-	uint32_t field_map_size = field_map_builder_size(builder);
-	memcpy(wptr, (char *)builder->items - field_map_size, field_map_size);
+	uint32_t *field_map =
+		(uint32_t *)(wptr + field_map_builder_size(builder));
+	char *extent_wptr = wptr + builder->extents_size;
+	for (int32_t i = -1; i >= -(int32_t)builder->item_count; i--) {
+		if (!builder->items[i].has_extent) {
+			field_map[i] = builder->items[i].offset;
+			continue;
+		}
+		struct field_map_ext *extent = builder->items[i].extent;
+		uint32_t sz = field_map_ext_size(extent->items);
+		extent_wptr -= sz;
+		field_map[i] = (char *)field_map - (char *)extent_wptr;
+		memcpy(extent_wptr, extent, sz);
+	}
 }
 
 #endif /* TARANTOOL_BOX_FIELD_MAP_H_INCLUDED */
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1d765c7c7..883656bf9 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -194,6 +194,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
 	return NULL;
 }
 
+/**
+ * Check if child_field may be attached to parent_field,
+ * update the parent_field container type if required.
+ */
+static int
+tuple_format_field_update_type(struct tuple_field *parent_field,
+			       struct tuple_field *child_field)
+{
+	enum field_type expected_type =
+		child_field->token.type == JSON_TOKEN_STR ?
+		FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+	if (child_field->token.type == JSON_TOKEN_ANY &&
+		!json_token_is_multikey(&parent_field->token) &&
+		!json_token_is_leaf(&parent_field->token)) {
+		assert(expected_type == FIELD_TYPE_ARRAY);
+		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+			 tt_sprintf("field %s is already refer to document by "
+				    "identifier and cannot use array index "
+				    "placeholder [*]",
+				    tuple_field_path(parent_field)));
+		return -1;
+	}
+	if (json_token_is_multikey(&parent_field->token) &&
+		child_field->token.type != JSON_TOKEN_ANY) {
+		assert(expected_type == FIELD_TYPE_ARRAY);
+		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+			 tt_sprintf("field %s is already used with array index "
+				    "placeholder [*] and cannot refer to "
+				    "document by identifier",
+				    tuple_field_path(parent_field)));
+		return -1;
+	}
+	if (field_type1_contains_type2(parent_field->type, expected_type)) {
+		parent_field->type = expected_type;
+	} else {
+		diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+			 tuple_field_path(parent_field),
+			 field_type_strs[parent_field->type],
+			 field_type_strs[expected_type]);
+		return -1;
+	}
+	return 0;
+}
+
 /**
  * Given a field number and a path, add the corresponding field
  * to the tuple format, allocating intermediate fields if
@@ -228,29 +272,16 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
 	*path_pool += path_len;
 
 	int rc = 0;
-	uint32_t token_count = 0;
+	uint32_t token_count = 0, multikey_token_count = 0;
 	struct json_tree *tree = &format->fields;
 	struct json_lexer lexer;
 	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
 	       field->token.type != JSON_TOKEN_END) {
-		if (field->token.type == JSON_TOKEN_ANY) {
-			diag_set(ClientError, ER_UNSUPPORTED,
-				"Tarantool", "multikey indexes");
-			goto fail;
-		}
-		enum field_type expected_type =
-			field->token.type == JSON_TOKEN_STR ?
-			FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
-		if (field_type1_contains_type2(parent->type, expected_type)) {
-			parent->type = expected_type;
-		} else {
-			diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
-				 tuple_field_path(parent),
-				 field_type_strs[parent->type],
-				 field_type_strs[expected_type]);
+		if (tuple_format_field_update_type(parent, field) != 0)
 			goto fail;
-		}
+		if (field->token.type == JSON_TOKEN_ANY)
+			multikey_token_count++;
 		struct tuple_field *next =
 			json_tree_lookup_entry(tree, &parent->token,
 					       &field->token,
@@ -268,6 +299,18 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
 			if (field == NULL)
 				goto fail;
 		}
+		if (json_token_is_multikey(&parent->token) &&
+		    parent->offset_slot == TUPLE_OFFSET_SLOT_NIL) {
+			/**
+			 * Allocate offset slot for array is used
+			 * in multikey index. This is required to
+			 * quickly extract its size.
+			 * @see tuple_field_multikey_items()
+			 */
+			assert(parent->type == FIELD_TYPE_ARRAY);
+			*current_slot = *current_slot - 1;
+			parent->offset_slot = *current_slot;
+		}
 		parent->is_key_part = true;
 		parent = next;
 		token_count++;
@@ -280,6 +323,13 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
 	assert(parent != NULL);
 	/* Update tree depth information. */
 	format->fields_depth = MAX(format->fields_depth, token_count + 1);
+	if (multikey_token_count > 1) {
+		assert(path_len > 0);
+		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+			 tt_sprintf("no more than one array index placeholder "
+				    "[*] is allowed in JSON path"));
+		goto fail;
+	}
 cleanup:
 	tuple_field_delete(field);
 end:
@@ -782,6 +832,7 @@ tuple_field_map_initialize(struct field_map_builder *builder,
 	 * validations and field map initialization.
 	 */
 	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+	struct mp_frame *mk_parent_frame = NULL;
 	struct mp_frame *frames = region_alloc(region, frames_sz);
 	if (frames == NULL) {
 		diag_set(OutOfMemory, frames_sz, "region", "frames");
@@ -805,6 +856,10 @@ tuple_field_map_initialize(struct field_map_builder *builder,
 			mp_stack_pop(&stack);
 			if (mp_stack_is_empty(&stack))
 				goto end;
+			if (json_token_is_multikey(parent)) {
+				/* Leave multikey index branch. */
+				mk_parent_frame = NULL;
+			}
 			parent = parent->parent;
 		}
 		/*
@@ -855,7 +910,16 @@ tuple_field_map_initialize(struct field_map_builder *builder,
 					 field_type_strs[field->type]);
 				return -1;
 			}
-			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+			    mk_parent_frame != NULL) {
+				int multikey_idx = mk_parent_frame->curr;
+				uint32_t multikey_items = mk_parent_frame->count;
+				if (field_map_builder_ext_slot_set(builder,
+						field->offset_slot, multikey_idx,
+						multikey_items,
+						*pos - tuple) != 0)
+					return -1;
+			} else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
 				field_map_builder_slot_set(builder,
 							   field->offset_slot,
 							   *pos - tuple);
@@ -876,6 +940,10 @@ tuple_field_map_initialize(struct field_map_builder *builder,
 					mp_decode_array(pos) :
 					mp_decode_map(pos);
 			mp_stack_push(&stack, type, size);
+			if (json_token_is_multikey(&field->token)) {
+				assert(type == MP_ARRAY);
+				mk_parent_frame = &frames[stack.used - 1];
+			}
 			parent = &field->token;
 		} else {
 			mp_next(pos);
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 78f29c624..39ba76394 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -699,6 +699,12 @@ vinyl_space_check_index_def(struct space *space, struct index_def *index_def)
 			return -1;
 		}
 	}
+	if (key_def_is_multikey(index_def->key_def)) {
+		diag_set(ClientError, ER_MODIFY_INDEX,
+			 index_def->name, space_name(space),
+			 "vinyl space index cannot be multikey");
+		return -1;
+	}
 	return 0;
 }
 
diff --git a/test/box/misc.result b/test/box/misc.result
index c350bbd73..88cc64daf 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -516,6 +516,7 @@ t;
   185: box.error.SQL_UNKNOWN_TOKEN
   186: box.error.SQL_PARSER_GENERIC
   187: box.error.SQL_ANALYZE_ARGUMENT
+  188: box.error.INDEX_MULTIKEY_INVALID
 ...
 test_run:cmd("setopt delimiter ''");
 ---
diff --git a/test/engine/json.result b/test/engine/json.result
index a790cdfbc..1bac85edd 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -683,16 +683,3 @@ town:select()
 s:drop()
 ---
 ...
---
--- gh-1260: Multikey indexes
---
-s = box.schema.space.create('withdata')
----
-...
-idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
----
-- error: Tarantool does not support multikey indexes
-...
-s:drop()
----
-...
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index f9a7180b1..9afa3daa2 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -192,10 +192,3 @@ town:select()
 name:drop()
 town:select()
 s:drop()
-
---
--- gh-1260: Multikey indexes
---
-s = box.schema.space.create('withdata')
-idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
-s:drop()
diff --git a/test/engine/multikey_idx.result b/test/engine/multikey_idx.result
new file mode 100644
index 000000000..f41ddafa8
--- /dev/null
+++ b/test/engine/multikey_idx.result
@@ -0,0 +1,228 @@
+test_run = require('test_run').new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata', {engine = 'vinyl'})
+---
+...
+-- Vinyl's space can't be multikey (yet).
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key
+    cannot be multikey'
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata', {engine = 'memtx'})
+---
+...
+-- Primary index must be unique so it can't be multikey.
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key
+    cannot be multikey'
+...
+pk = s:create_index('pk')
+---
+...
+-- Only tree index type may be mutlikey.
+_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': HASH index
+    cannot be multikey'
+...
+_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': BITSET index
+    cannot be multikey'
+...
+_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': RTREE index
+    cannot be multikey'
+...
+-- Test incompatible multikey index parts.
+_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}})
+---
+- error: 'Can''t create or modify index ''idx3'' in space ''withdata'': multikey index
+    parts are incompatible'
+...
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}})
+---
+- error: 'Multikey index is invalid: no more than one array index placeholder [*]
+    is allowed in JSON path'
+...
+idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}})
+---
+...
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+- error: 'Multikey index is invalid: field 3 is already refer to document by identifier
+    and cannot use array index placeholder [*]'
+...
+idx0:drop()
+---
+...
+-- Unique multikey index.
+idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+...
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}})
+---
+- error: 'Multikey index is invalid: field 3 is already used with array index placeholder
+    [*] and cannot refer to document by identifier'
+...
+s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}})
+---
+- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}})
+---
+- error: Duplicate key exists in unique index 'arr_idx' in space 'withdata'
+...
+-- Non-unique multikey index; two multikey indexes per space.
+arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}})
+---
+...
+arr_idx:select()
+---
+- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:get({'James', 'Bond'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:get({'Ivan', 'Ivanych'})
+---
+- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:get({'Vasya', 'Pupkin'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+s:insert({4, {1}, {{fname='James', sname='Bond'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+-- Duplicates in multikey parts.
+s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:get({'A', 'B'})
+---
+...
+idx:get({'C', 'D'})
+---
+...
+idx:delete({'Vasya', 'Pupkin'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+---
+- [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({7, {1}, {{fname='James', sname='Bond'}}})
+---
+- [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+arr_idx:select({1})
+---
+- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+  - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+-- Snapshot & recovery.
+box.snapshot()
+---
+- ok
+...
+test_run:cmd("restart server default")
+s = box.space["withdata"]
+---
+...
+idx = s.index["idx"]
+---
+...
+arr_idx = s.index["arr_idx"]
+---
+...
+s:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+  - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+arr_idx:select()
+---
+- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+  - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+s:drop()
+---
+...
+-- Assymetric multikey index paths.
+s = box.schema.space.create('withdata')
+---
+...
+pk = s:create_index('pk')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+---
+...
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+---
+- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1',
+      'extra': {'sname': 'C2'}}]]
+...
+s:drop()
+---
+...
diff --git a/test/engine/multikey_idx.test.lua b/test/engine/multikey_idx.test.lua
new file mode 100644
index 000000000..2b25063cf
--- /dev/null
+++ b/test/engine/multikey_idx.test.lua
@@ -0,0 +1,67 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata', {engine = 'vinyl'})
+-- Vinyl's space can't be multikey (yet).
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+s:drop()
+
+s = box.schema.space.create('withdata', {engine = 'memtx'})
+-- Primary index must be unique so it can't be multikey.
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}})
+pk = s:create_index('pk')
+-- Only tree index type may be mutlikey.
+_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}})
+_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}})
+_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}})
+-- Test incompatible multikey index parts.
+_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}})
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}})
+idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}})
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+idx0:drop()
+-- Unique multikey index.
+idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}})
+s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}})
+_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}})
+-- Non-unique multikey index; two multikey indexes per space.
+arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}})
+arr_idx:select()
+idx:get({'James', 'Bond'})
+idx:get({'Ivan', 'Ivanych'})
+idx:get({'Vasya', 'Pupkin'})
+idx:select()
+s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({4, {1}, {{fname='James', sname='Bond'}}})
+idx:select()
+-- Duplicates in multikey parts.
+s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}})
+idx:get({'A', 'B'})
+idx:get({'C', 'D'})
+idx:delete({'Vasya', 'Pupkin'})
+s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({7, {1}, {{fname='James', sname='Bond'}}})
+arr_idx:select({1})
+idx:select()
+-- Snapshot & recovery.
+box.snapshot()
+test_run:cmd("restart server default")
+s = box.space["withdata"]
+idx = s.index["idx"]
+arr_idx = s.index["arr_idx"]
+s:select()
+idx:select()
+arr_idx:select()
+s:drop()
+
+-- Assymetric multikey index paths.
+s = box.schema.space.create('withdata')
+pk = s:create_index('pk')
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+s:drop()
-- 
2.21.0




More information about the Tarantool-patches mailing list