[PATCH v6 3/3] box: introduce multikey indexes

Kirill Shcherbatov kshcherbatov at tarantool.org
Wed Mar 13 15:15:39 MSK 2019


I didn't finished review fixes for multikey indexes, but I've
included this part to show how the modified hints changes affect
multikey indexes.

=========================================================================

With multikey index feature introduction, JSON path may have [*]
placeholder instead of array index: this allows to index
multiple documents by one JSON path automatically.

Example:
s = box.schema.space.create('withdata')
idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'},
                              {3, 'str', path = '[*].sname'}}})
_ = s:insert({1, 1, {{fname='James', sname='Bond'},
             {fname='Austin', sname='Powers'}}})
idx:get({'Austin', 'Powers'})
---
- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Austin',
'sname': 'Powers'}]]
...

Closes #1257
---
 src/box/key_def.c         |  15 ++++
 src/box/key_def.h         |   5 ++
 src/box/memtx_tree.c      | 175 +++++++++++++++++++++++++++++++++++++-
 src/box/tuple.c           |   8 +-
 src/box/tuple.h           | 122 +++++++++++++++++++++++---
 src/box/tuple_compare.cc  | 115 +++++++++++++++++++------
 src/box/tuple_format.c    | 120 +++++++++++++++++++++-----
 test/engine/json.result   |  80 ++++++++++++++++-
 test/engine/json.test.lua |  20 +++++
 9 files changed, 592 insertions(+), 68 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index d4c979aa1..b9cc77699 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -164,9 +164,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 		*path_pool += path_len;
 		memcpy(def->parts[part_no].path, path, path_len);
 		def->parts[part_no].path_len = path_len;
+
+		int rc;
+		struct json_lexer lexer;
+		struct json_token token;
+		json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
+		while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
+			if (token.type == JSON_TOKEN_ANY) {
+				def->has_multikey_parts = true;
+				def->parts[part_no].is_multikey = true;
+				break;
+			} else if (token.type == JSON_TOKEN_END) {
+				break;
+			}
+		}
 	} else {
 		def->parts[part_no].path = NULL;
 		def->parts[part_no].path_len = 0;
+		def->parts[part_no].is_multikey = false;
 	}
 	column_mask_set_fieldno(&def->column_mask, fieldno);
 }
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 0d8f3112e..6a914df11 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -97,6 +97,8 @@ struct key_part {
 	char *path;
 	/** The length of JSON path. */
 	uint32_t path_len;
+	/** True if this is multikey key part. */
+	bool is_multikey;
 	/**
 	 * Epoch of the tuple format the offset slot cached in
 	 * this part is valid for, see tuple_format::epoch.
@@ -205,6 +207,8 @@ struct key_def {
 	bool is_nullable;
 	/** True if some key part has JSON path. */
 	bool has_json_paths;
+	/** True if some part has array index placeholder *. */
+	bool has_multikey_parts;
 	/**
 	 * True, if some key parts can be absent in a tuple. These
 	 * fields assumed to be MP_NIL.
@@ -699,6 +703,7 @@ key_hash(const char *key, struct key_def *key_def)
  * Get a comparison hint for a @a tuple.
  * @param tuple - tuple to get uint64_t of.
  * @param key_def - key_def that defines which comparison is used.
+ * @param multikey_idx - index of multikey array item.
  * @return the comparison auxiliary information.
  */
 static inline uint64_t
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 2c1933ceb..bfc618bdf 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -537,7 +537,8 @@ memtx_tree_index_get(struct index *base, const char *key,
 	struct memtx_tree_key_data key_data;
 	key_data.key = key;
 	key_data.part_count = part_count;
-	key_data.hint = key_hint(key, cmp_def);
+	if (!cmp_def->has_multikey_parts)
+		key_data.hint = key_hint(key, cmp_def);
 	struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
 	*result = res != NULL ? res->tuple : NULL;
 	return 0;
@@ -593,6 +594,98 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
 	return 0;
 }
 
+static int
+multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_def->has_multikey_parts);
+	struct key_part *part = key_def->parts;
+	/* FIXME: don't like it... */
+	while (part < key_def->parts + key_def->part_count &&
+	       !part->is_multikey)
+		part++;
+	assert(part->path != NULL && part->is_multikey);
+	struct tuple_field *field =
+		tuple_format_field_by_path(tuple_format(tuple), part->fieldno,
+					   part->path, part->path_len);
+	assert(field != NULL);
+	struct field_map_ext *field_map_ext =
+		tuple_field_map_ext((uint32_t *)tuple_field_map(tuple),
+				    field->offset_slot);
+	return field_map_ext->size;
+}
+
+int
+memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple,
+				  struct tuple *new_tuple,
+				  enum dup_replace_mode mode,
+				  struct tuple **result)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *key_def = index->tree.arg;
+	if (new_tuple != NULL) {
+		int errcode = 0, tree_res = 0;
+		struct tuple *dup_tuple = NULL;
+		int multikey_idx = 0;
+		int sz = multikey_get_array_sz(new_tuple, key_def);
+		for (; multikey_idx < sz; multikey_idx++) {
+			struct memtx_tree_data new_data;
+			new_data.tuple = new_tuple;
+			new_data.hint = multikey_idx;
+			struct memtx_tree_data dup_data;
+			dup_data.tuple = NULL;
+			tree_res = memtx_tree_insert(&index->tree, new_data,
+						     &dup_data);
+			if (tree_res != 0) {
+				diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+					 "memtx_tree_index", "replace");
+				break;
+			}
+			dup_tuple = dup_data.tuple;
+			errcode = replace_check_dup(old_tuple, dup_tuple, mode);
+			if (errcode != 0) {
+				memtx_tree_delete(&index->tree, new_data);
+				if (dup_tuple != NULL) {
+					memtx_tree_insert(&index->tree,
+							  dup_data, NULL);
+				}
+				struct space *sp =
+					space_cache_find(base->def->space_id);
+				if (sp != NULL) {
+					diag_set(ClientError, errcode,
+						 base->def->name,
+						 space_name(sp));
+				}
+				break;
+			}
+		}
+		if (tree_res != 0 || errcode != 0) {
+			multikey_idx--;
+			for (; multikey_idx >= 0; multikey_idx--) {
+				struct memtx_tree_data data;
+				data.tuple = new_tuple;
+				data.hint = multikey_idx;
+				memtx_tree_delete(&index->tree, data);
+			}
+			return -1;
+		}
+		if (dup_tuple != NULL) {
+			*result = dup_tuple;
+			return 0;
+		}
+	}
+	if (old_tuple != NULL) {
+		int sz = multikey_get_array_sz(old_tuple, key_def);
+		for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
+			struct memtx_tree_data old_data;
+			old_data.tuple = old_tuple;
+			old_data.hint = multikey_idx;
+			memtx_tree_delete(&index->tree, old_data);
+		}
+	}
+	*result = old_tuple;
+	return 0;
+}
+
 static struct iterator *
 memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
 				 const char *key, uint32_t part_count)
@@ -630,7 +723,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
 	it->key_data.key = key;
 	it->key_data.part_count = part_count;
 	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
-	it->key_data.hint = key_hint(key, cmp_def);
+	if (!cmp_def->has_multikey_parts)
+		it->key_data.hint = key_hint(key, cmp_def);
 	it->index_def = base->def;
 	it->tree = &index->tree;
 	it->tree_iterator = memtx_tree_invalid_iterator();
@@ -700,6 +794,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
 	return 0;
 }
 
+static int
+memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *key_def = index->tree.arg;
+	int sz = multikey_get_array_sz(tuple, key_def);
+
+	if (index->build_array == NULL) {
+		index->build_array =
+			(struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE);
+		if (index->build_array == NULL) {
+			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+				 "memtx_tree_index", "build_next");
+			return -1;
+		}
+		index->build_array_alloc_size =
+			MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
+	}
+	assert(index->build_array_size <= index->build_array_alloc_size);
+	if (index->build_array_size == index->build_array_alloc_size) {
+		index->build_array_alloc_size = index->build_array_alloc_size +
+					index->build_array_alloc_size / 2;
+		struct memtx_tree_data *tmp =
+			realloc(index->build_array,
+				index->build_array_alloc_size * sizeof(*tmp));
+		if (tmp == NULL) {
+			diag_set(OutOfMemory, index->build_array_alloc_size *
+				 sizeof(*tmp), "memtx_tree_index", "build_next");
+			return -1;
+		}
+		index->build_array = tmp;
+	}
+	for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
+		struct memtx_tree_data *elem =
+			&index->build_array[index->build_array_size++];
+		assert(index->build_array_size < index->build_array_alloc_size);
+		elem->tuple = tuple;
+		elem->hint = multikey_idx;
+	}
+	return 0;
+}
+
 static void
 memtx_tree_index_end_build(struct index *base)
 {
@@ -802,6 +938,36 @@ static const struct index_vtab memtx_tree_index_vtab = {
 	/* .end_build = */ memtx_tree_index_end_build,
 };
 
+static const struct index_vtab memtx_multikey_tree_index_vtab = {
+	/* .destroy = */ memtx_tree_index_destroy,
+	/* .commit_create = */ generic_index_commit_create,
+	/* .abort_create = */ generic_index_abort_create,
+	/* .commit_modify = */ generic_index_commit_modify,
+	/* .commit_drop = */ generic_index_commit_drop,
+	/* .update_def = */ memtx_tree_index_update_def,
+	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
+	/* .def_change_requires_rebuild = */
+		memtx_index_def_change_requires_rebuild,
+	/* .size = */ memtx_tree_index_size,
+	/* .bsize = */ memtx_tree_index_bsize,
+	/* .min = */ generic_index_min,
+	/* .max = */ generic_index_max,
+	/* .random = */ memtx_tree_index_random,
+	/* .count = */ memtx_tree_index_count,
+	/* .get = */ memtx_tree_index_get,
+	/* .replace = */ memtx_multikey_tree_index_replace,
+	/* .create_iterator = */ memtx_tree_index_create_iterator,
+	/* .create_snapshot_iterator = */
+		memtx_tree_index_create_snapshot_iterator,
+	/* .stat = */ generic_index_stat,
+	/* .compact = */ generic_index_compact,
+	/* .reset_stat = */ generic_index_reset_stat,
+	/* .begin_build = */ memtx_tree_index_begin_build,
+	/* .reserve = */ memtx_tree_index_reserve,
+	/* .build_next = */ memtx_multikey_tree_index_build_next,
+	/* .end_build = */ memtx_tree_index_end_build,
+};
+
 struct index *
 memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 {
@@ -812,8 +978,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 			 "malloc", "struct memtx_tree_index");
 		return NULL;
 	}
+	const struct index_vtab *vtab = def->key_def->has_multikey_parts ?
+					&memtx_multikey_tree_index_vtab :
+					&memtx_tree_index_vtab;
 	if (index_create(&index->base, (struct engine *)memtx,
-			 &memtx_tree_index_vtab, def) != 0) {
+			 vtab, def) != 0) {
 		free(index);
 		return NULL;
 	}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 7f06d4053..c8a938c4c 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
 }
 
 int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+tuple_go_to_path_multikey(const char **data, const char *path,
+			  uint32_t path_len, uint64_t multikey_idx)
 {
 	int rc;
 	struct json_lexer lexer;
@@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
 	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
 		switch (token.type) {
+		case JSON_TOKEN_ANY:
+			token.num = (int)multikey_idx;
 		case JSON_TOKEN_NUM:
 			rc = tuple_field_go_to_index(data, token.num);
 			break;
@@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
 	}
 	return tuple_field_raw_by_path(format, tuple, field_map, fieldno,
 				       path + lexer.offset,
-				       path_len - lexer.offset, NULL);
+				       path_len - lexer.offset, NULL,
+				       INVALID_MULTIKEY_INDEX);
 }
 
 /* }}} tuple_field_* getters */
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 8b12fd5a8..e498e1e41 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -45,6 +45,8 @@ struct slab_arena;
 struct quota;
 struct key_part;
 
+#define INVALID_MULTIKEY_INDEX UINT64_MAX
+
 /**
  * A format for standalone tuples allocated on runtime arena.
  * \sa tuple_new().
@@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
 
 /** \endcond public */
 
+
 /**
  * An atom of Tarantool storage. Represents MsgPack Array.
  * Tuple has the following structure:
@@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
  * |                                            ^
  * +---------------------------------------data_offset
  *
- * Each 'off_i' is the offset to the i-th indexed field.
+ * Each 'off_i' is the offset to the i-th indexed field or field
+ * map extention (described with struct tuple_field_map_ext)
+ * offset (in sizeof(field_map[0]) units).
  */
 struct PACKED tuple
 {
@@ -328,6 +333,26 @@ struct PACKED tuple
 	 */
 };
 
+/**
+ * Tuple field map extent. Is allocated on tuple_field_map_create
+ * call automatically when necessary, when tuple field map must be
+ * reallocated dynamically.
+ */
+struct field_map_ext {
+	/* Sequence of data offset slots. */
+	uint32_t field_map[1];
+	/* The count of field_map items. */
+	uint32_t size;
+};
+
+static inline struct field_map_ext *
+tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot)
+{
+	uint32_t slot_off = field_map[root_offset_slot];
+	return (struct field_map_ext *)((char *)field_map -
+		slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext));
+}
+
 /** Size of the tuple including size of struct tuple. */
 static inline size_t
 tuple_size(const struct tuple *tuple)
@@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple)
  *                      with NULL.
  * @param path The path to process.
  * @param path_len The length of the @path.
+ * @param multikey_index The multikey index hint - index of item
+ *                       for JSON_TOKEN_ANY level.
  * @retval 0 On success.
  * @retval -1 In case of error in JSON path.
  */
 int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
+tuple_go_to_path_multikey(const char **data, const char *path,
+			  uint32_t path_len, uint64_t multikey_idx);
+
+/**
+ * Retrieve msgpack data by JSON path.
+ * @param data[in, out] Pointer to msgpack with data.
+ *                      If the field cannot be retrieved be the
+ *                      specified path @path, it is overwritten
+ *                      with NULL.
+ * @param path The path to process.
+ * @param path_len The length of the @path.
+ * @retval 0 On success.
+ * @retval -1 In case of error in JSON path.
+ */
+static inline int
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+{
+	return tuple_go_to_path_multikey(data, path, path_len,
+					 INVALID_MULTIKEY_INDEX);
+}
 
 /**
  * Get tuple field by field index and relative JSON path.
@@ -533,7 +579,7 @@ static inline const char *
 tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 			const uint32_t *field_map, uint32_t fieldno,
 			const char *path, uint32_t path_len,
-			int32_t *offset_slot_hint)
+			int32_t *offset_slot_hint, uint64_t multikey_idx)
 {
 	int32_t offset_slot;
 	if (offset_slot_hint != NULL &&
@@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 		if (offset_slot_hint != NULL)
 			*offset_slot_hint = offset_slot;
 offset_slot_access:
-		/* Indexed field */
-		if (field_map[offset_slot] == 0)
-			return NULL;
-		tuple += field_map[offset_slot];
+		if (multikey_idx != INVALID_MULTIKEY_INDEX) {
+			struct field_map_ext *field_map_ext =
+				tuple_field_map_ext((uint32_t *)field_map,
+						    offset_slot);
+			if (multikey_idx >= field_map_ext->size)
+				return NULL;
+			uint32_t off = field_map_ext->field_map[-multikey_idx];
+			if (off == 0)
+				return NULL;
+			tuple += off;
+		} else {
+			/* Indexed field */
+			if (field_map[offset_slot] == 0)
+				return NULL;
+			tuple += field_map[offset_slot];
+		}
 	} else {
 		uint32_t field_count;
 parse:
@@ -572,8 +630,8 @@ parse:
 		for (uint32_t k = 0; k < fieldno; k++)
 			mp_next(&tuple);
 		if (path != NULL &&
-		    unlikely(tuple_go_to_path(&tuple, path,
-						    path_len) != 0))
+		    unlikely(tuple_go_to_path_multikey(&tuple, path, path_len,
+						       multikey_idx) != 0))
 			return NULL;
 	}
 	return tuple;
@@ -595,7 +653,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple,
 		const uint32_t *field_map, uint32_t field_no)
 {
 	return tuple_field_raw_by_path(format, tuple, field_map, field_no,
-				       NULL, 0, NULL);
+				       NULL, 0, NULL, INVALID_MULTIKEY_INDEX);
 }
 
 /**
@@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
 			     uint32_t path_len, uint32_t path_hash);
 
 /**
- * Get a tuple field pointed to by an index part.
+ * Get a tuple field pointed to by an index part and multikey hint.
  * @param format Tuple format.
  * @param tuple A pointer to MessagePack array.
  * @param field_map A pointer to the LAST element of field map.
+ * @param multikey_idx A multikey hint.
  * @param part Index part to use.
  * @retval Field data if the field exists or NULL.
  */
 static inline const char *
-tuple_field_raw_by_part(struct tuple_format *format, const char *data,
-			const uint32_t *field_map, struct key_part *part)
+tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data,
+				 const uint32_t *field_map,
+				 struct key_part *part, uint64_t multikey_idx)
 {
 	if (unlikely(part->format_epoch != format->epoch)) {
 		assert(format->epoch != 0);
@@ -656,7 +716,41 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data,
 	}
 	return tuple_field_raw_by_path(format, data, field_map, part->fieldno,
 				       part->path, part->path_len,
-				       &part->offset_slot_cache);
+				       &part->offset_slot_cache, multikey_idx);
+}
+
+/**
+ * Get a field refereed by index @part and multikey hint in tuple.
+ * @param tuple Tuple to get the field from.
+ * @param part Index part to use.
+ * @param multikey_idx A multikey hint.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_by_part_multikey(const struct tuple *tuple, struct key_part *part,
+			     uint64_t multikey_idx)
+{
+	return tuple_field_raw_by_part_multikey(tuple_format(tuple),
+						tuple_data(tuple),
+						tuple_field_map(tuple), part,
+						multikey_idx);
+}
+
+
+/**
+ * Get a tuple field pointed to by an index part.
+ * @param format Tuple format.
+ * @param tuple A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param part Index part to use.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_raw_by_part(struct tuple_format *format, const char *data,
+			const uint32_t *field_map, struct key_part *part)
+{
+	return tuple_field_raw_by_part_multikey(format, data, field_map,
+						part, INVALID_MULTIKEY_INDEX);
 }
 
 /**
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 86922c9bb..cdb08f8fe 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -351,6 +351,8 @@ key_def_set_hint_func(struct key_def *def)
 {
 	def->key_hint = NULL;
 	def->tuple_hint = NULL;
+	if (def->has_multikey_parts)
+		return;
 	bool is_nullable = key_part_is_nullable(def->parts);
 	switch (def->parts->type) {
 	case FIELD_TYPE_UNSIGNED: {
@@ -835,7 +837,8 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b,
 	return i;
 }
 
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+	 bool is_multikey>
 static inline int
 tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
 		uint64_t tuple_a_hint, const struct tuple *tuple_b,
@@ -845,8 +848,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
 	assert(!has_optional_parts || is_nullable);
 	assert(is_nullable == key_def->is_nullable);
 	assert(has_optional_parts == key_def->has_optional_parts);
+	assert(is_multikey == key_def->has_multikey_parts);
+	assert(!is_multikey || is_multikey == has_json_paths);
 	int rc = 0;
-	if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
+	if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
 		return rc;
 	struct key_part *part = key_def->parts;
 	const char *tuple_a_raw = tuple_data(tuple_a);
@@ -889,7 +894,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
 		end = part + key_def->part_count;
 
 	for (; part < end; part++) {
-		if (has_json_paths) {
+		if (is_multikey) {
+			field_a = tuple_field_raw_by_part_multikey(format_a,
+					tuple_a_raw, field_map_a, part,
+					tuple_a_hint);
+			field_b = tuple_field_raw_by_part_multikey(format_b,
+					tuple_b_raw, field_map_b, part,
+					tuple_b_hint);
+		} else if (has_json_paths) {
 			field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
 							  field_map_a, part);
 			field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -946,7 +958,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
 	 */
 	end = key_def->parts + key_def->part_count;
 	for (; part < end; ++part) {
-		if (has_json_paths) {
+		if (is_multikey) {
+			field_a = tuple_field_raw_by_part_multikey(format_a,
+					tuple_a_raw, field_map_a, part,
+					tuple_a_hint);
+			field_b = tuple_field_raw_by_part_multikey(format_b,
+					tuple_b_raw, field_map_b, part,
+					tuple_b_hint);
+		} else if (has_json_paths) {
 			field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
 							  field_map_a, part);
 			field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -976,24 +995,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
 		       struct key_def *key_def)
 {
 	return tuple_compare_slowpath_hinted<is_nullable, has_optional_parts,
-			has_json_paths>(tuple_a, HINT_INVALID, tuple_b,
+			has_json_paths, false>(tuple_a, HINT_INVALID, tuple_b,
 					HINT_INVALID, key_def);
 }
 
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+	 bool is_multikey>
 static inline int
 tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 		uint64_t tuple_hint, const char *key, uint32_t part_count,
 		uint64_t key_hint, struct key_def *key_def)
 {
+	(void)key_hint;
 	assert(has_json_paths == key_def->has_json_paths);
 	assert(!has_optional_parts || is_nullable);
 	assert(is_nullable == key_def->is_nullable);
 	assert(has_optional_parts == key_def->has_optional_parts);
 	assert(key != NULL || part_count == 0);
 	assert(part_count <= key_def->part_count);
+	assert(is_multikey == key_def->has_multikey_parts);
+	assert(!is_multikey || is_multikey == has_json_paths);
 	int rc = 0;
-	if ((rc = hint_cmp(tuple_hint, key_hint)) != 0)
+	if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0)
 		return rc;
 	struct key_part *part = key_def->parts;
 	struct tuple_format *format = tuple_format(tuple);
@@ -1002,7 +1025,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 	enum mp_type a_type, b_type;
 	if (likely(part_count == 1)) {
 		const char *field;
-		if (has_json_paths) {
+		if (is_multikey) {
+			field = tuple_field_raw_by_part_multikey(format,
+					tuple_raw, field_map, part, tuple_hint);
+		} else if (has_json_paths) {
 			field = tuple_field_raw_by_part(format, tuple_raw,
 							field_map, part);
 		} else {
@@ -1032,7 +1058,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
 	struct key_part *end = part + part_count;
 	for (; part < end; ++part, mp_next(&key)) {
 		const char *field;
-		if (has_json_paths) {
+		if (is_multikey) {
+			field = tuple_field_raw_by_part_multikey(format,
+					tuple_raw, field_map, part, tuple_hint);
+		} else if (has_json_paths) {
 			field = tuple_field_raw_by_part(format, tuple_raw,
 							field_map, part);
 		} else {
@@ -1074,7 +1103,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
 				uint32_t part_count, struct key_def *key_def)
 {
 	return tuple_compare_with_key_slowpath_hinted<is_nullable,
-			has_optional_parts, has_json_paths>(tuple,
+			has_optional_parts, has_json_paths, false>(tuple,
 					HINT_INVALID, key, part_count,
 					HINT_INVALID, key_def);
 }
@@ -1508,14 +1537,22 @@ static const tuple_compare_t compare_slowpath_funcs[] = {
 };
 
 static const tuple_compare_hinted_t compare_slowpath_hinted_funcs[] = {
-	tuple_compare_slowpath_hinted<false, false, false>,
-	tuple_compare_slowpath_hinted<true, false, false>,
-	tuple_compare_slowpath_hinted<false, true, false>,
-	tuple_compare_slowpath_hinted<true, true, false>,
-	tuple_compare_slowpath_hinted<false, false, true>,
-	tuple_compare_slowpath_hinted<true, false, true>,
-	tuple_compare_slowpath_hinted<false, true, true>,
-	tuple_compare_slowpath_hinted<true, true, true>
+	tuple_compare_slowpath_hinted<false, false, false, false>,
+	tuple_compare_slowpath_hinted<true, false, false, false>,
+	tuple_compare_slowpath_hinted<false, true, false, false>,
+	tuple_compare_slowpath_hinted<true, true, false, false>,
+	tuple_compare_slowpath_hinted<false, false, true, false>,
+	tuple_compare_slowpath_hinted<true, false, true, false>,
+	tuple_compare_slowpath_hinted<false, true, true, false>,
+	tuple_compare_slowpath_hinted<true, true, true, false>,
+	tuple_compare_slowpath_hinted<false, false, false, true>,
+	tuple_compare_slowpath_hinted<true, false, false, true>,
+	tuple_compare_slowpath_hinted<false, true, false, true>,
+	tuple_compare_slowpath_hinted<true, true, false, true>,
+	tuple_compare_slowpath_hinted<false, false, true, true>,
+	tuple_compare_slowpath_hinted<true, false, true, true>,
+	tuple_compare_slowpath_hinted<false, true, true, true>,
+	tuple_compare_slowpath_hinted<true, true, true, true>,
 };
 
 void
@@ -1523,7 +1560,14 @@ key_def_set_tuple_compare(struct key_def *def)
 {
 	int cmp_func_idx = (def->is_nullable ? 1 : 0) +
 			   2 * (def->has_optional_parts ? 1 : 0) +
-			   4 * (def->has_json_paths ? 1 : 0);
+			   4 * (def->has_json_paths ? 1 : 0) +
+			   8 * (def->has_multikey_parts ? 1 : 0);
+	if (def->has_multikey_parts) {
+		def->tuple_compare = NULL;
+		def->tuple_compare_hinted =
+			compare_slowpath_hinted_funcs[cmp_func_idx];
+		return;
+	}
 	if (def->is_nullable) {
 		if (key_def_is_sequential(def)) {
 			if (def->has_optional_parts) {
@@ -1795,14 +1839,22 @@ static const tuple_compare_with_key_t compare_with_key_slowpath_funcs[] = {
 
 static const tuple_compare_with_key_hinted_t
 			compare_with_key_slowpath_hinted_funcs[] = {
-	tuple_compare_with_key_slowpath_hinted<false, false, false>,
-	tuple_compare_with_key_slowpath_hinted<true, false, false>,
-	tuple_compare_with_key_slowpath_hinted<false, true, false>,
-	tuple_compare_with_key_slowpath_hinted<true, true, false>,
-	tuple_compare_with_key_slowpath_hinted<false, false, true>,
-	tuple_compare_with_key_slowpath_hinted<true, false, true>,
-	tuple_compare_with_key_slowpath_hinted<false, true, true>,
-	tuple_compare_with_key_slowpath_hinted<true, true, true>
+	tuple_compare_with_key_slowpath_hinted<false, false, false, false>,
+	tuple_compare_with_key_slowpath_hinted<true, false, false, false>,
+	tuple_compare_with_key_slowpath_hinted<false, true, false, false>,
+	tuple_compare_with_key_slowpath_hinted<true, true, false, false>,
+	tuple_compare_with_key_slowpath_hinted<false, false, true, false>,
+	tuple_compare_with_key_slowpath_hinted<true, false, true, false>,
+	tuple_compare_with_key_slowpath_hinted<false, true, true, false>,
+	tuple_compare_with_key_slowpath_hinted<true, true, true, false>,
+	tuple_compare_with_key_slowpath_hinted<false, false, false, true>,
+	tuple_compare_with_key_slowpath_hinted<true, false, false, true>,
+	tuple_compare_with_key_slowpath_hinted<false, true, false, true>,
+	tuple_compare_with_key_slowpath_hinted<true, true, false, true>,
+	tuple_compare_with_key_slowpath_hinted<false, false, true, true>,
+	tuple_compare_with_key_slowpath_hinted<true, false, true, true>,
+	tuple_compare_with_key_slowpath_hinted<false, true, true, true>,
+	tuple_compare_with_key_slowpath_hinted<true, true, true, true>
 };
 
 void
@@ -1810,7 +1862,14 @@ key_def_set_tuple_compare_with_key(struct key_def *def)
 {
 	int cmp_func_idx = (def->is_nullable ? 1 : 0) +
 			   2 * (def->has_optional_parts ? 1 : 0) +
-			   4 * (def->has_json_paths ? 1 : 0);
+			   4 * (def->has_json_paths ? 1 : 0) +
+			   8 * (def->has_multikey_parts ? 1 : 0);
+	if (def->has_multikey_parts) {
+		def->tuple_compare_with_key = NULL;
+		def->tuple_compare_with_key_hinted =
+			compare_with_key_slowpath_hinted_funcs[cmp_func_idx];
+		return;
+	}
 	if (def->is_nullable) {
 		if (key_def_is_sequential(def)) {
 			if (def->has_optional_parts) {
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 4439ce3e0..99b602cd5 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -33,6 +33,7 @@
 #include "json/json.h"
 #include "tuple_format.h"
 #include "coll_id_cache.h"
+#include "tuple.h"
 
 #include "third_party/PMurHash.h"
 
@@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
 	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
 	       field->token.type != JSON_TOKEN_END) {
-		if (field->token.type == JSON_TOKEN_ANY) {
-			diag_set(ClientError, ER_UNSUPPORTED,
-				"Tarantool", "multikey indexes");
-			goto fail;
-		}
 		enum field_type expected_type =
 			field->token.type == JSON_TOKEN_STR ?
 			FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+		if (field->token.type == JSON_TOKEN_ANY &&
+		    !json_token_is_multikey(&parent->token) &&
+		    !json_token_is_leaf(&parent->token)) {
+			assert(expected_type == FIELD_TYPE_ARRAY);
+			diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+				 tuple_field_path(parent),
+				 field_type_strs[parent->type],
+				 "multikey array");
+			goto fail;
+		}
 		if (field_type1_contains_type2(parent->type, expected_type)) {
 			parent->type = expected_type;
 		} else {
@@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 					break;
 				format->min_tuple_size += mp_sizeof_nil();
 			}
-		} else {
+		} else if (field->token.type == JSON_TOKEN_STR) {
 			/* Account a key string for map member. */
-			assert(field->token.type == JSON_TOKEN_STR);
 			format->min_tuple_size +=
 				mp_sizeof_str(field->token.len);
 		}
@@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
 	return true;
 }
 
+/**
+ * Grow tuple_field_map allocation left with extent_size
+ * 0 bytes.
+ */
+static int
+tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size,
+			uint32_t extent_size, struct region *region)
+{
+	assert(extent_size % sizeof(uint32_t) == 0);
+	uint32_t new_field_map_size = *field_map_size + extent_size;
+	uint32_t *new_field_map = region_alloc(region, new_field_map_size);
+	if (new_field_map == NULL) {
+		diag_set(OutOfMemory, new_field_map_size, "region_alloc",
+			 "new_field_map");
+		return -1;
+	}
+	memset(new_field_map, 0, extent_size);
+	if (*field_map != NULL) {
+		memcpy((char *)new_field_map + extent_size,
+		       (char *)*field_map - *field_map_size, *field_map_size);
+	}
+	*field_map = (uint32_t *)((char *)new_field_map + new_field_map_size);
+	*field_map_size = new_field_map_size;
+	return 0;
+}
+
+/**
+ * Retrieve tuple field map extent by root_offset_slot, reallocate
+ * memory if required.
+ */
+struct field_map_ext *
+tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size,
+			     int32_t root_offset_slot, uint32_t extent_items,
+			     struct region *region)
+{
+	uint32_t extent_sz = sizeof(struct field_map_ext) +
+			     extent_items * sizeof(uint32_t);
+	uint32_t slot_off = (*field_map)[root_offset_slot];
+	if (slot_off == 0) {
+		/* Field map extent was not created yet. */
+		slot_off = *field_map_size / sizeof(uint32_t);
+		(*field_map)[root_offset_slot] = slot_off;
+		if (tuple_field_map_realloc(field_map, field_map_size,
+					    extent_sz, region) != 0)
+			return NULL;
+	}
+	struct field_map_ext *field_map_ext =
+		tuple_field_map_ext(*field_map, root_offset_slot);
+	assert(field_map_ext->size == 0 || field_map_ext->size == extent_items);
+	field_map_ext->size = extent_items;
+	return field_map_ext;
+}
+
 /** @sa declaration for details. */
 int
 tuple_field_map_create(struct tuple_format *format, const char *tuple,
@@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 		return 0; /* Nothing to initialize */
 	}
 	struct region *region = &fiber()->gc;
-	*field_map_size = format->field_map_size;
-	*field_map = region_alloc(region, *field_map_size);
-	if (*field_map == NULL) {
-		diag_set(OutOfMemory, *field_map_size, "region_alloc",
-			 "field_map");
+	*field_map = NULL;
+	*field_map_size = 0;
+	if (tuple_field_map_realloc(field_map, field_map_size,
+				    format->field_map_size, region) != 0)
 		return -1;
-	}
-	*field_map = (uint32_t *)((char *)*field_map + *field_map_size);
 
 	const char *pos = tuple;
 	int rc = 0;
@@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 	uint32_t defined_field_count = MIN(field_count, validate ?
 					   tuple_format_field_count(format) :
 					   format->index_field_count);
-	/*
-	 * Nullify field map to be able to detect by 0,
-	 * which key fields are absent in tuple_field().
-	 */
-	memset((char *)*field_map - *field_map_size, 0, *field_map_size);
 	/*
 	 * Prepare mp stack of the size equal to the maximum depth
 	 * of the indexed field in the format::fields tree
@@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 	struct mp_stack stack;
 	mp_stack_create(&stack, format->fields_depth, frames);
 	mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+	/**
+	 * Pointer to the stack entry that represents the multikey
+	 * index root ARRAY entry.
+	 */
+	struct mp_frame *mk_parent_frame = NULL;
+
 	struct tuple_field *field;
 	struct json_token *parent = &format->fields.root;
 	while (true) {
@@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 			mp_stack_pop(&stack);
 			if (mp_stack_is_empty(&stack))
 				goto finish;
+			if (json_token_is_multikey(parent)) {
+				/* Leave multikey index branch. */
+				mk_parent_frame = NULL;
+			}
 			parent = parent->parent;
 		}
 		/*
@@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 					 field_type_strs[field->type]);
 				goto error;
 			}
-			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+			    mk_parent_frame == NULL) {
 				(*field_map)[field->offset_slot] = pos - tuple;
+			} else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+				   mk_parent_frame != NULL) {
+				uint32_t extent_items = mk_parent_frame->count;
+				struct field_map_ext *field_map_ext =
+					tuple_field_map_ext_retrieve(field_map,
+							field_map_size,
+							field->offset_slot,
+							extent_items, region);
+				if (field_map_ext == NULL)
+					goto error;
+				int multikey_idx = mk_parent_frame->curr;
+				field_map_ext->field_map[
+					-multikey_idx] = pos - tuple;
+			}
 			if (required_fields != NULL)
 				bit_clear(required_fields, field->id);
 		}
@@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 					mp_decode_array(&pos) :
 					mp_decode_map(&pos);
 			mp_stack_push(&stack, type, size);
+			if (json_token_is_multikey(&field->token)) {
+				/* Track multikey entry frame. */
+				assert(type == MP_ARRAY);
+				mk_parent_frame = &frames[stack.used - 1];
+			}
 			parent = &field->token;
 		} else {
 			mp_next(&pos);
diff --git a/test/engine/json.result b/test/engine/json.result
index a790cdfbc..5c7a9b3b3 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -691,7 +691,85 @@ s = box.schema.space.create('withdata')
 ...
 idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
 ---
-- error: Tarantool does not support multikey indexes
+...
+s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+---
+- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}})
+---
+- [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:select({'James', 'Bond'})
+---
+- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select({'Kirill', 'Shcherbatov'})
+---
+- []
+...
+idx:select({'Ivan', 'Ivanych'})
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:select({'Vasya', 'Pupkin'})
+---
+- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+  - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:delete({'Vasya', 'Pupkin'})
+---
+- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+---
+- [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+---
+- [2, 1, [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+  - [2, 1, [{'fname': 'James', 'sname': 'Bond'}]]
+  - [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata')
+---
+...
+pk = s:create_index('pk')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+---
+...
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+---
+- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1',
+      'extra': {'sname': 'C2'}}]]
 ...
 s:drop()
 ---
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index f9a7180b1..dc6016af9 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -198,4 +198,24 @@ s:drop()
 --
 s = box.schema.space.create('withdata')
 idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}})
+idx:select({'James', 'Bond'})
+idx:select({'Kirill', 'Shcherbatov'})
+idx:select({'Ivan', 'Ivanych'})
+idx:select({'Vasya', 'Pupkin'})
+idx:select()
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+idx:select()
+idx:delete({'Vasya', 'Pupkin'})
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+idx:select()
+s:drop()
+
+s = box.schema.space.create('withdata')
+pk = s:create_index('pk')
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
 s:drop()
-- 
2.21.0




More information about the Tarantool-patches mailing list