[tarantool-patches] [PATCH v1 4/5] box: introduce path_hash and tuple_field tree

Kirill Shcherbatov kshcherbatov at tarantool.org
Mon Aug 6 15:27:01 MSK 2018


To work with JSON-defined indexes we introduce format JSON
path hashtable data_path and a tree of intermediate path
fields attached to format root fields.

<Hashtable>: format->data_path
[2].FIO.fname -> field "fname" {type=str, off_slot=-1}
[2].FIO.sname -> field "sname" {type=str, off_slot=-2}

<Tree>:      format->field[2] {type = map}
                           |
                   FIO {type = map}
                           |
        "fname"            |             "sname"
{type=str,off_slot=-1} ____|____ {type = str,off_slot=-2}

Leaf fields used in Index have initialized offset_slot.
On new tuple creation we observe fields tree and use leaf
records to init tuple field_map.
At the same time we use data_path hashtable on tuple data
access by index(when cached offset_slot is invalid).

New routine tuple_format_add_json_path is used to construct
all internal structures for JSON path on new format creation
and duplicating.

Part of #1012.
---
 src/box/errcode.h            |   2 +-
 src/box/index_def.c          |   8 +-
 src/box/key_def.c            |   4 +-
 src/box/tuple_extract_key.cc |  23 +-
 src/box/tuple_format.c       | 531 +++++++++++++++++++++++++++++++++++++++++--
 src/box/tuple_format.h       |  20 +-
 test/box/misc.result         |  51 +++--
 test/engine/tuple.result     | 101 ++++++++
 test/engine/tuple.test.lua   |  27 +++
 9 files changed, 704 insertions(+), 63 deletions(-)

diff --git a/src/box/errcode.h b/src/box/errcode.h
index 3d5f66a..8cbd59d 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -107,7 +107,7 @@ struct errcode_record {
 	/* 52 */_(ER_FUNCTION_EXISTS,		"Function '%s' already exists") \
 	/* 53 */_(ER_BEFORE_REPLACE_RET,	"Invalid return value of space:before_replace trigger: expected tuple or nil, got %s") \
 	/* 54 */_(ER_FUNCTION_MAX,		"A limit on the total number of functions has been reached: %u") \
-	/* 55 */_(ER_UNUSED4,			"") \
+	/* 55 */_(ER_DATA_MISMATCH_INDEX_PART,	"Tuple doesn't math document structure defined as index") \
 	/* 56 */_(ER_USER_MAX,			"A limit on the total number of users has been reached: %u") \
 	/* 57 */_(ER_NO_SUCH_ENGINE,		"Space engine '%s' does not exist") \
 	/* 58 */_(ER_RELOAD_CFG,		"Can't set option '%s' dynamically") \
diff --git a/src/box/index_def.c b/src/box/index_def.c
index 9cda63c..a2c5bd4 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -209,8 +209,12 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
 			 * Courtesy to a user who could have made
 			 * a typo.
 			 */
-			if (index_def->key_def->parts[i].fieldno ==
-			    index_def->key_def->parts[j].fieldno) {
+			struct key_part *part_a = &index_def->key_def->parts[i];
+			struct key_part *part_b = &index_def->key_def->parts[j];
+			if ((part_a->fieldno == part_b->fieldno &&
+			    part_a->path == NULL && part_b->path == NULL) ||
+			    (part_a->path != NULL && part_b->path != NULL &&
+			     strcmp(part_a->path, part_b->path) == 0)) {
 				diag_set(ClientError, ER_MODIFY_INDEX,
 					 index_def->name, space_name,
 					 "same key part is indexed twice");
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 79e07f8..c454377 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -583,8 +583,8 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
 			if (node.type != JSON_PATH_NUM) {
 				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
 					part->fieldno,
-					"invalid JSON path: first part should "
-					"be defined as array");
+					"invalid JSON path: first path part "\
+					"should be defined as array");
 				return -1;
 			}
 			if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
index bbd87f5..6a2690b 100644
--- a/src/box/tuple_extract_key.cc
+++ b/src/box/tuple_extract_key.cc
@@ -4,14 +4,22 @@
 
 enum { MSGPACK_NULL = 0xc0 };
 
+static bool
+key_def_parts_sequential(const struct key_def *def, int i)
+{
+	uint32_t fieldno1 = def->parts[i + 1].fieldno;
+	uint32_t fieldno2 = def->parts[i].fieldno + 1;
+	return fieldno1 == fieldno2 && def->parts[i].path != NULL &&
+		def->parts[i + 1].path != NULL;
+}
+
 /** True, if a key con contain two or more parts in sequence. */
 static bool
 key_def_contains_sequential_parts(const struct key_def *def)
 {
-	for (uint32_t i = 0; i < def->part_count - 1; ++i) {
-		if (def->parts[i].fieldno + 1 == def->parts[i + 1].fieldno)
+	for (uint32_t i = 0; i < def->part_count - 1; ++i)
+		if (key_def_parts_sequential(def, i))
 			return true;
-	}
 	return false;
 }
 
@@ -120,8 +128,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
 			 * minimize tuple_field_raw() calls.
 			 */
 			for (; i < part_count - 1; i++) {
-				if (key_def->parts[i].fieldno + 1 !=
-				    key_def->parts[i + 1].fieldno) {
+				if (key_def_parts_sequential(key_def, i)) {
 					/*
 					 * End of sequential part.
 					 */
@@ -160,8 +167,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
 			 * minimize tuple_field_raw() calls.
 			 */
 			for (; i < part_count - 1; i++) {
-				if (key_def->parts[i].fieldno + 1 !=
-				    key_def->parts[i + 1].fieldno) {
+				if (key_def_parts_sequential(key_def, i)) {
 					/*
 					 * End of sequential part.
 					 */
@@ -227,8 +233,7 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
 		uint32_t fieldno = key_def->parts[i].fieldno;
 		uint32_t null_count = 0;
 		for (; i < key_def->part_count - 1; i++) {
-			if (key_def->parts[i].fieldno + 1 !=
-			    key_def->parts[i + 1].fieldno)
+			if (key_def_parts_sequential(key_def, i))
 				break;
 		}
 		uint32_t end_fieldno = key_def->parts[i].fieldno;
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 40cd48a..f03978d 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -32,6 +32,34 @@
 #include "tuple.h"
 #include "tuple_format.h"
 
+path_hash_f path_hash;
+
+#define mh_name _field
+struct mh_field_key_t {
+	const char *path;
+	uint32_t path_len;
+	uint32_t path_hash;
+};
+#define mh_key_t struct mh_field_key_t *
+
+struct mh_field_node_t {
+	const char *path;
+	uint32_t path_len;
+	uint32_t path_hash;
+	struct tuple_field *field;
+};
+#define mh_node_t struct mh_field_node_t
+
+#define mh_arg_t void *
+#define mh_hash(a, arg) ((a)->path_hash)
+#define mh_hash_key(a, arg) mh_hash(a, arg)
+#define mh_cmp(a, b, arg) ((a)->path_len != (b)->path_len || \
+			   memcmp((a)->path, (b)->path, (a)->path_len))
+#define mh_cmp_key(a, b, arg) mh_cmp(a, b, arg)
+#define MH_SOURCE 1
+#include "salad/mhash.h" /* Create mh_field_t hash. */
+
+
 /** Global table of tuple formats */
 struct tuple_format **tuple_formats;
 static intptr_t recycled_format_ids = FORMAT_ID_NIL;
@@ -41,10 +69,347 @@ static uint64_t format_epoch = 1;
 static uint32_t formats_size = 0, formats_capacity = 0;
 
 static const struct tuple_field tuple_field_default = {
-	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, false,
+	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, false, {{NULL, 0}, NULL}
 };
 
 /**
+ * Propagate @a field to MessagePack(field)[index].
+ * @param[in][out] field Field to propagate.
+ * @param index 1-based index to propagate to.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_index(const char **field, uint64_t index);
+
+/**
+ * Propagate @a field to MessagePack(field)[key].
+ * @param[in][out] field Field to propagate.
+ * @param key Key to propagate to.
+ * @param len Length of @a key.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_key(const char **field, const char *key, int len);
+
+/**
+ * Get @hashtable record by key @path, @path_len.
+ * @param hashtable to lookup,
+ * @param path string.
+ * @param path_len length of @path.
+ * @retval NULL on nothing found.
+ * @retval hashtable record pointer.
+ */
+static struct mh_field_node_t *
+json_path_hash_get(struct mh_field_t *hashtable, const char *path,
+		   uint32_t path_len)
+{
+	assert(hashtable != NULL);
+	uint32_t hash = field_name_hash(path, path_len);
+	struct mh_field_key_t key = {path, path_len, hash};
+	mh_int_t rc = mh_field_find(hashtable, &key, NULL);
+	if (rc == mh_end(hashtable))
+		return NULL;
+	return mh_field_node(hashtable, rc);
+}
+
+/**
+ * Create a new hashtable object.
+ * @param[out] hashtable pointer to object to create.
+ * @retval -1 on error.
+ * @retval 0 on success.
+ */
+static int
+json_path_hash_create(struct mh_field_t **hashtable)
+{
+	assert(*hashtable == NULL);
+	*hashtable = mh_field_new();
+	if (*hashtable == NULL) {
+		diag_set(OutOfMemory, sizeof(*hashtable), "mh_field_new",
+			"hashtable");
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * Delete @hashtable object.
+ * @param hashtable pointer to object to delete.
+ * @param free_path release key path part.
+ */
+static void
+json_path_hash_delete(struct mh_field_t *hashtable, bool free_path)
+{
+	if (hashtable == NULL)
+		return;
+	while (mh_size(hashtable)) {
+		mh_int_t n = mh_first(hashtable);
+		if (free_path)
+			free((void *)mh_field_node(hashtable, n)->path);
+		mh_field_del(hashtable, n, NULL);
+	}
+	mh_field_delete(hashtable);
+}
+
+/**
+ * Insert a new record to hashtable.
+ * @param hashtable storage to insert new record.
+ * @param path string.
+ * @param path_len length of @path.
+ * @param tuple_field value to store in @hashtable.
+ * @retval -1 on error.
+ * @retval 0 on success.
+ */
+static int
+json_path_hash_insert(struct mh_field_t *hashtable, const char *path,
+		      uint32_t path_len, struct tuple_field *field)
+{
+	assert(hashtable != NULL);
+	/* Test if record already present in hash. */
+	uint32_t path_hash = field_name_hash(path, path_len);
+	struct mh_field_node_t name_node =
+		{path, path_len, path_hash, field};
+	mh_int_t rc = mh_field_put(hashtable, &name_node, NULL, NULL);
+	if (rc == mh_end(hashtable)) {
+		diag_set(OutOfMemory, sizeof(*hashtable), "mh_field_put",
+			"hashtable");
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * Construct field tree level for JSON path part.
+ *
+ * @param[in, out] tuple_field pointer to record to start with
+ *                 would be changed to record that math
+ *                 @part lexeme.
+ * @param fieldno number of root space field.
+ * @param part JSON path lexeme to represent in field tree.
+ * @retval -1 on error.
+ * @retval 0 on success.
+ */
+static int
+json_field_tree_append(struct tuple_field **field_subtree, uint32_t fieldno,
+		       struct json_path_node *part)
+{
+	enum field_type type;
+	struct tuple_field *field = *field_subtree;
+	switch (part->type) {
+	case JSON_PATH_NUM: {
+		type = FIELD_TYPE_ARRAY;
+		if (field->type != FIELD_TYPE_ANY && field->type != type)
+			goto error_type_mistmatch;
+		field->type = type;
+		/* Create or resize field array if required. */
+		if (field->array == NULL) {
+			field->array =
+				calloc(part->num, sizeof(struct tuple_field *));
+			if (field->array == NULL) {
+				diag_set(OutOfMemory,
+					part->num * sizeof(struct tuple_field *),
+					"malloc", "field->array");
+				return -1;
+			}
+			field->type = type;
+			field->array_size = part->num;
+		} else if (part->num >= field->array_size) {
+			void *array =
+				realloc(field->array,
+					part->num * sizeof(struct tuple_field *));
+			if (array == NULL) {
+				diag_set(OutOfMemory,
+					sizeof(struct tuple_field *), "realloc",
+					"array");
+				return -1;
+			}
+			memset(field->array[field->array_size], 0,
+				part->num - field->array_size);
+			field->array = array;
+			field->array_size = part->num;
+		}
+		/* Record already exists. No actions required */
+		if (field->array[part->num - TUPLE_INDEX_BASE] != NULL) {
+			*field_subtree =
+				field->array[part->num - TUPLE_INDEX_BASE];
+			return 0;
+		}
+		break;
+	}
+	case JSON_PATH_STR: {
+		type = FIELD_TYPE_MAP;
+		if (field->type != FIELD_TYPE_ANY && field->type != type)
+			goto error_type_mistmatch;
+		field->type = type;
+		if (field->map == NULL &&
+		    json_path_hash_create(&field->map) != 0)
+			return -1;
+		struct mh_field_node_t *node =
+			json_path_hash_get(field->map, part->str, part->len);
+		if (node != NULL) {
+			*field_subtree = node->field;
+			return 0;
+		}
+		break;
+	}
+	default:
+		unreachable();
+	}
+
+	/* Construct and insert a new record. */
+	struct tuple_field *new_field =
+		malloc(sizeof(struct tuple_field));
+	if (new_field == NULL) {
+		diag_set(OutOfMemory, sizeof(struct tuple_field), "malloc",
+			"new_field");
+		return -1;
+	}
+	*new_field = tuple_field_default;
+	if (field->type == FIELD_TYPE_MAP) {
+		if (json_path_hash_insert(field->map, part->str, part->len,
+					  new_field) != 0) {
+			free(new_field);
+			return -1;
+		}
+	} else if (field->type == FIELD_TYPE_ARRAY) {
+		field->array[part->num - TUPLE_INDEX_BASE] = new_field;
+	}
+	*field_subtree = new_field;
+
+	return 0;
+
+error_type_mistmatch:
+	diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+		tt_sprintf("%d", fieldno + TUPLE_INDEX_BASE),
+		field_type_strs[type], field_type_strs[field->type]);
+	return -1;
+}
+
+/**
+ * Delete @field_subtree object.
+ * @param field_subtree to delete.
+ */
+static void
+json_field_tree_delete(struct tuple_field *field_subtree)
+{
+	if (field_subtree->type == FIELD_TYPE_MAP &&
+	    field_subtree->map != NULL) {
+		mh_int_t i;
+		mh_foreach(field_subtree->map, i) {
+			struct tuple_field *field =
+				mh_field_node(field_subtree->map, i)->field;
+			json_field_tree_delete(field);
+			free(field);
+		}
+		/*
+		 * Field tree map records refer to string in
+		 * format->path_hash.
+		 */
+		json_path_hash_delete(field_subtree->map, false);
+	} else if (field_subtree->type == FIELD_TYPE_ARRAY &&
+		   field_subtree->array != NULL) {
+		for (uint32_t i = 0; i < field_subtree->array_size; i++) {
+			json_field_tree_delete(field_subtree->array[i]);
+			free(field_subtree->array[i]);
+		}
+		free(field_subtree->array);
+	}
+}
+
+/**
+ * Add new JSON @path to @format.
+ * @param format to modify.
+ * @param path string to add.
+ * @param path_len length of @path.
+ * @param field_type type of field by @path.
+ * @param[out] leaf_field pointer to leaf field.
+ */
+static int
+tuple_format_add_json_path(struct tuple_format *format, const char *path,
+			   uint32_t path_len, enum field_type type,
+			   struct tuple_field **leaf_field)
+{
+
+	struct mh_field_node_t *leaf_node = NULL;
+	if (format->path_hash == NULL) {
+		/* Initialize path hashtable. */
+		if (json_path_hash_create(&format->path_hash) != 0)
+			return -1;
+	}
+
+	/*
+	 * Get root field by index.
+	 * Path is specified in canonical form: [i]...
+	 */
+	int rc = 0;
+	struct json_path_parser parser;
+	struct json_path_node node;
+	json_path_parser_create(&parser, path, path_len);
+	rc = json_path_next(&parser, &node);
+	assert(rc == 0 && node.type == JSON_PATH_NUM);
+	assert(node.num < format->field_count + 1);
+	uint32_t root_fieldno = node.num - TUPLE_INDEX_BASE;
+	struct tuple_field *field =  &format->fields[root_fieldno];
+
+	/* Test if path is already registered. */
+	if ((leaf_node = json_path_hash_get(format->path_hash, path,
+					    path_len)) != NULL) {
+		if (leaf_node->field->type != type) {
+			const char *err =
+				tt_sprintf("JSON path '%.*s' has been already "
+					   "constructed for '%s' leaf record",
+					   path_len, path,
+					   field_type_strs[
+						leaf_node->field->type]);
+			diag_set(ClientError,  ER_WRONG_INDEX_OPTIONS,
+				node.num, err);
+			return -1;
+		}
+		*leaf_field = leaf_node->field;
+		/* Path already registered. */
+		return 0;
+	}
+
+	/*
+	 * Hash table would hold string that path tree
+	 * would refer to.
+	 */
+	if ((path = strdup(path)) == NULL) {
+		diag_set(OutOfMemory, path_len + 1, "strdup", "path");
+		return -1;
+	}
+
+	/* Initialize dummy path_hash record. */
+	if (json_path_hash_insert(format->path_hash, path, path_len,
+				  NULL) != 0) {
+		free((void *)path);
+		return -1;
+	}
+
+	/* Build data path tree. */
+	while ((rc = json_path_next(&parser, &node)) == 0 &&
+		node.type != JSON_PATH_END) {
+		if (json_field_tree_append(&field, root_fieldno, &node) != 0)
+			return -1;
+	}
+	/* It should be canonical valid JSON. */
+	assert(rc == 0 && node.type == JSON_PATH_END);
+	/* Leaf record is a new object as JSON path unique. */
+	field->type = type;
+
+	/* Finish hashtable record init. */
+	leaf_node = json_path_hash_get(format->path_hash, path, path_len);
+	assert(leaf_node != NULL && leaf_node->field == NULL);
+	leaf_node->field = field;
+
+	*leaf_field = field;
+	return 0;
+}
+
+/**
  * Extract all available type info from keys and field
  * definitions.
  */
@@ -66,6 +431,9 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 		format->fields[i].type = fields[i].type;
 		format->fields[i].offset_slot = TUPLE_OFFSET_SLOT_NIL;
 		format->fields[i].is_nullable = fields[i].is_nullable;
+		/* Don't need to init format->fields[i].map. */
+		format->fields[i].array = NULL;
+		format->fields[i].array_size = 0;
 	}
 	/* Initialize remaining fields */
 	for (uint32_t i = field_count; i < format->field_count; i++)
@@ -105,10 +473,12 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 			 * used in tuple_format.
 			 */
 			if (field_type1_contains_type2(field->type,
-						       part->type)) {
+						       part->type) &&
+			    part->path == NULL) {
 				field->type = part->type;
 			} else if (! field_type1_contains_type2(part->type,
-								field->type)) {
+								field->type) &&
+				   part->path == NULL) {
 				const char *name;
 				int fieldno = part->fieldno + TUPLE_INDEX_BASE;
 				if (part->fieldno >= field_count) {
@@ -135,11 +505,26 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 			 * First field is always simply accessible,
 			 * so we don't store an offset for it.
 			 */
-			if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
+			if (part->path != NULL) {
+				assert(is_sequential == false);
+				struct tuple_field *leaf_field = NULL;
+				if (tuple_format_add_json_path(format,
+							       part->path,
+							       part->path_len,
+							       part->type,
+							       &leaf_field) != 0)
+					return -1;
+				assert(leaf_field != NULL);
+				if (leaf_field->offset_slot ==
+				    TUPLE_OFFSET_SLOT_NIL)
+					leaf_field->offset_slot = --current_slot;
+				if (part->slot_cache != current_slot)
+					format_epoch_changed = true;
+			} else if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
 			    is_sequential == false && part->fieldno > 0) {
 
 				field->offset_slot = --current_slot;
-				if (part->slot_cache != field->offset_slot)
+				if (part->slot_cache != current_slot)
 					format_epoch_changed = true;
 			}
 		}
@@ -248,6 +633,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
 	format->index_field_count = index_field_count;
 	format->exact_field_count = 0;
 	format->min_field_count = 0;
+	format->path_hash = NULL;
+	format->epoch = format_epoch;
 	return format;
 }
 
@@ -255,6 +642,9 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
 static inline void
 tuple_format_destroy(struct tuple_format *format)
 {
+	for (uint32_t i = 0; i < format->field_count; i++)
+		json_field_tree_delete(&format->fields[i]);
+	json_path_hash_delete(format->path_hash, true);
 	tuple_dictionary_unref(format->dict);
 }
 
@@ -345,6 +735,32 @@ tuple_format_dup(struct tuple_format *src)
 		return NULL;
 	}
 	memcpy(format, src, total);
+
+	/* Fill with NULLs for normal destruction on error. */
+	format->path_hash = NULL;
+	for (uint32_t i = 0; i < format->field_count; i++) {
+		format->fields[i].array = NULL;
+		format->fields[i].array_size = 0;
+	}
+
+	if (src->path_hash != NULL) {
+		mh_int_t i;
+		mh_foreach(src->path_hash, i) {
+			struct mh_field_node_t *node =
+				mh_field_node(src->path_hash, i);
+			struct tuple_field *field = node->field;
+			int32_t offset_slot = field->offset_slot;
+			if (tuple_format_add_json_path(format, node->path,
+						       node->path_len,
+						       field->type,
+						       &field) != 0)
+				goto error;
+			assert(field != NULL);
+			/* Store offset_slot in a new leaf record. */
+			field->offset_slot = offset_slot;
+		}
+	}
+
 	tuple_dictionary_ref(format->dict);
 	format->id = FORMAT_ID_NIL;
 	format->refs = 0;
@@ -354,6 +770,59 @@ tuple_format_dup(struct tuple_format *src)
 		return NULL;
 	}
 	return format;
+
+error:
+	tuple_format_destroy(format);
+	return NULL;
+}
+
+static int
+tuple_init_json_field_map(const struct tuple_field *field, uint32_t idx,
+			  uint32_t *field_map, const char *tuple,
+			  const char *offset)
+{
+	if (field->type == FIELD_TYPE_MAP) {
+		mh_int_t i;
+		mh_foreach(field->map, i) {
+			struct mh_field_node_t *node =
+				mh_field_node(field->map, i);
+			const char *raw = offset;
+			if (tuple_field_go_to_key(&raw, node->path,
+						  node->path_len) != 0) {
+				diag_set(ClientError,
+					ER_DATA_MISMATCH_INDEX_PART);
+				return -1;
+			}
+			if (tuple_init_json_field_map(node->field, idx,
+						      field_map, tuple,
+						      raw) != 0)
+				return -1;
+		}
+	} else if (field->type == FIELD_TYPE_ARRAY) {
+		for (uint32_t i = 0; i < field->array_size; i++) {
+			struct tuple_field *field = field->array[i];
+			if (field == NULL)
+				continue;
+			const char *raw = offset;
+			if (tuple_field_go_to_index(&raw, i) != 0) {
+				diag_set(ClientError,
+					ER_DATA_MISMATCH_INDEX_PART);
+				return -1;
+			}
+			if (tuple_init_json_field_map(field, idx, field_map,
+						      tuple, raw) != 0)
+				return -1;
+		}
+	} else {
+		/* Leaf field node. */
+		assert(field->offset_slot != TUPLE_OFFSET_SLOT_NIL);
+		if (key_mp_type_validate(field->type, mp_typeof(*offset),
+					 ER_KEY_PART_TYPE, idx,
+					 field->is_nullable) != 0)
+			return -1;
+		field_map[field->offset_slot] = (uint32_t)(offset - tuple);
+	}
+	return 0;
 }
 
 /** @sa declaration for details. */
@@ -411,6 +880,12 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
 			field_map[field->offset_slot] =
 				(uint32_t) (pos - tuple);
 		}
+		if (field->map != NULL) {
+			assert(field->array != NULL);
+			if (tuple_init_json_field_map(field, i, field_map,
+						      tuple, pos) != 0)
+				return -1;
+		}
 		mp_next(&pos);
 	}
 	return 0;
@@ -471,14 +946,6 @@ box_tuple_format_unref(box_tuple_format_t *format)
 	tuple_format_unref(format);
 }
 
-/**
- * Propagate @a field to MessagePack(field)[index].
- * @param[in][out] field Field to propagate.
- * @param index 1-based index to propagate to.
- *
- * @retval  0 Success, the index was found.
- * @retval -1 Not found.
- */
 static inline int
 tuple_field_go_to_index(const char **field, uint64_t index)
 {
@@ -517,15 +984,6 @@ tuple_field_go_to_index(const char **field, uint64_t index)
 	return -1;
 }
 
-/**
- * Propagate @a field to MessagePack(field)[key].
- * @param[in][out] field Field to propagate.
- * @param key Key to propagate to.
- * @param len Length of @a key.
- *
- * @retval  0 Success, the index was found.
- * @retval -1 Not found.
- */
 static inline int
 tuple_field_go_to_key(const char **field, const char *key, int len)
 {
@@ -565,17 +1023,33 @@ tuple_field_by_part(const struct tuple *tuple, const struct key_def *def,
 		assert(format->epoch > 0);
 		int32_t offset_slot = TUPLE_OFFSET_SLOT_NIL;
 		if (part->format_epoch != format->epoch) {
-			offset_slot = format->fields[field_no].offset_slot;
+			/* Cache miss. */
+			if (part->path != NULL) {
+				struct mh_field_t *ht = format->path_hash;
+				struct mh_field_node_t *node =
+					json_path_hash_get(ht, part->path,
+							   part->path_len);
+				assert(node != NULL);
+				offset_slot = node->field->offset_slot;
+			} else {
+				offset_slot =
+					format->fields[field_no].offset_slot;
+			}
+			/* Update cache only for greater epoch. */
 			if (format->epoch > part->format_epoch) {
 				part->slot_cache = offset_slot;
 				part->format_epoch = format->epoch;
 			}
+		} else {
+			/* Cache hit. */
+			offset_slot = part->slot_cache;
 		}
 		if (offset_slot != TUPLE_OFFSET_SLOT_NIL) {
 			return field_map[offset_slot] != 0 ?
 			       data + field_map[offset_slot] : NULL;
 		}
 	}
+	assert(part->path == NULL);
 	ERROR_INJECT(ERRINJ_TUPLE_FIELD, return NULL);
 	uint32_t field_count = mp_decode_array(&data);
 	if (unlikely(field_no >= field_count))
@@ -593,6 +1067,17 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 {
 	assert(path_len > 0);
 	uint32_t fieldno;
+	if (format->path_hash != NULL) {
+		struct mh_field_node_t *ht_record =
+			json_path_hash_get(format->path_hash, path, path_len);
+		if (ht_record != NULL) {
+			int32_t offset_slot = ht_record->field->offset_slot;
+			assert(offset_slot != TUPLE_OFFSET_SLOT_NIL);
+			assert(field_map[offset_slot] != 0);
+			*field = tuple + field_map[offset_slot];
+			return 0;
+		}
+	}
 	/*
 	 * It is possible, that a field has a name as
 	 * well-formatted JSON. For example 'a.b.c.d' or '[1]' can
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 3cb1284..1154f74 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -63,6 +63,7 @@ enum { TUPLE_OFFSET_SLOT_NIL = INT32_MAX };
 
 struct tuple;
 struct tuple_format;
+struct tuple_field_map;
 
 /** Engine-specific tuple format methods. */
 struct tuple_format_vtab {
@@ -81,6 +82,11 @@ struct tuple_format_vtab {
 	             const char *end);
 };
 
+struct mh_field_t;
+typedef size_t (*path_hash_f)(const char *str, uint32_t len);
+extern path_hash_f path_hash;
+
+
 /** Tuple field meta information for tuple_format. */
 struct tuple_field {
 	/**
@@ -92,7 +98,7 @@ struct tuple_field {
 	 */
 	enum field_type type;
 	/**
-	 * Offset slot in field map in tuple. Normally tuple
+	 * Offset slot ix`n field map in tuple. Normally tuple
 	 * stores field map - offsets of all fields participating
 	 * in indexes. This allows quick access to most used
 	 * fields without parsing entire mspack. This member
@@ -108,6 +114,16 @@ struct tuple_field {
 	bool is_key_part;
 	/** True, if a field can store NULL. */
 	bool is_nullable;
+	/** Tree child records. */
+	union {
+		/** Array of fields. */
+		struct  {
+			struct tuple_field **array;
+			uint32_t array_size;
+		};
+		/** Hashtable: path -> tuple_field. */
+		struct mh_field_t *map;
+	};
 };
 
 /**
@@ -163,6 +179,8 @@ struct tuple_format {
 	 * Shared names storage used by all formats of a space.
 	 */
 	struct tuple_dictionary *dict;
+	/** JSON path hash table */
+	struct mh_field_t *path_hash;
 	/* Formats of the fields */
 	struct tuple_field fields[0];
 };
diff --git a/test/box/misc.result b/test/box/misc.result
index 4895a78..556f004 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -348,7 +348,7 @@ t;
   - 'box.error.CANT_CREATE_COLLATION : 150'
   - 'box.error.USER_EXISTS : 46'
   - 'box.error.WAL_IO : 40'
-  - 'box.error.PROC_RET : 21'
+  - 'box.error.RTREE_RECT : 101'
   - 'box.error.PRIV_GRANTED : 89'
   - 'box.error.CREATE_SPACE : 9'
   - 'box.error.GRANT : 88'
@@ -359,7 +359,7 @@ t;
   - 'box.error.VINYL_MAX_TUPLE_SIZE : 139'
   - 'box.error.LOAD_FUNCTION : 99'
   - 'box.error.INVALID_XLOG : 74'
-  - 'box.error.READ_VIEW_ABORTED : 130'
+  - 'box.error.PRIV_NOT_GRANTED : 91'
   - 'box.error.TRANSACTION_CONFLICT : 97'
   - 'box.error.GUEST_USER_PASSWORD : 96'
   - 'box.error.PROC_C : 102'
@@ -370,7 +370,7 @@ t;
   - 'box.error.CFG : 59'
   - 'box.error.NO_SUCH_FIELD : 37'
   - 'box.error.CONNECTION_TO_SELF : 117'
-  - 'box.error.FUNCTION_MAX : 54'
+  - 'box.error.PROC_LUA : 32'
   - 'box.error.ILLEGAL_PARAMS : 1'
   - 'box.error.PARTIAL_KEY : 136'
   - 'box.error.SAVEPOINT_NO_TRANSACTION : 114'
@@ -397,36 +397,37 @@ t;
   - 'box.error.FUNCTION_EXISTS : 52'
   - 'box.error.UPDATE_ARG_TYPE : 26'
   - 'box.error.CROSS_ENGINE_TRANSACTION : 81'
-  - 'box.error.FORMAT_MISMATCH_INDEX_PART : 27'
   - 'box.error.injection : table: <address>
+  - 'box.error.FORMAT_MISMATCH_INDEX_PART : 27'
+  - 'box.error.IDENTIFIER : 70'
   - 'box.error.FUNCTION_TX_ACTIVE : 30'
-  - 'box.error.ITERATOR_TYPE : 72'
   - 'box.error.TRANSACTION_YIELD : 154'
+  - 'box.error.NULLABLE_MISMATCH : 153'
   - 'box.error.NO_SUCH_ENGINE : 57'
   - 'box.error.COMMIT_IN_SUB_STMT : 122'
-  - 'box.error.NULLABLE_MISMATCH : 153'
-  - 'box.error.UNSUPPORTED : 5'
-  - 'box.error.LAST_DROP : 15'
+  - 'box.error.RELOAD_CFG : 58'
   - 'box.error.SPACE_FIELD_IS_DUPLICATE : 149'
+  - 'box.error.LAST_DROP : 15'
+  - 'box.error.SEQUENCE_OVERFLOW : 147'
   - 'box.error.DECOMPRESSION : 124'
   - 'box.error.CREATE_SEQUENCE : 142'
   - 'box.error.CREATE_USER : 43'
-  - 'box.error.SEQUENCE_OVERFLOW : 147'
+  - 'box.error.DATA_MISMATCH_INDEX_PART : 55'
   - 'box.error.INSTANCE_UUID_MISMATCH : 66'
-  - 'box.error.RELOAD_CFG : 58'
+  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
   - 'box.error.SYSTEM : 115'
   - 'box.error.KEY_PART_IS_TOO_LONG : 118'
-  - 'box.error.MORE_THAN_ONE_TUPLE : 41'
+  - 'box.error.INJECTION : 8'
   - 'box.error.TRUNCATE_SYSTEM_SPACE : 137'
   - 'box.error.NO_SUCH_SAVEPOINT : 61'
   - 'box.error.VY_QUOTA_TIMEOUT : 135'
-  - 'box.error.PRIV_NOT_GRANTED : 91'
+  - 'box.error.READ_VIEW_ABORTED : 130'
   - 'box.error.WRONG_INDEX_OPTIONS : 108'
   - 'box.error.INVALID_VYLOG_FILE : 133'
   - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127'
-  - 'box.error.BEFORE_REPLACE_RET : 53'
+  - 'box.error.PROTOCOL : 104'
   - 'box.error.USER_MAX : 56'
-  - 'box.error.INVALID_MSGPACK : 20'
+  - 'box.error.BEFORE_REPLACE_RET : 53'
   - 'box.error.TUPLE_NOT_ARRAY : 22'
   - 'box.error.KEY_PART_COUNT : 31'
   - 'box.error.ALTER_SPACE : 12'
@@ -435,7 +436,7 @@ t;
   - 'box.error.DROP_SEQUENCE : 144'
   - 'box.error.INVALID_XLOG_ORDER : 76'
   - 'box.error.UNKNOWN_REQUEST_TYPE : 48'
-  - 'box.error.PROC_LUA : 32'
+  - 'box.error.PROC_RET : 21'
   - 'box.error.SUB_STMT_MAX : 121'
   - 'box.error.ROLE_NOT_GRANTED : 92'
   - 'box.error.SPACE_EXISTS : 10'
@@ -446,36 +447,36 @@ t;
   - 'box.error.REPLICASET_UUID_MISMATCH : 63'
   - 'box.error.UPDATE_FIELD : 29'
   - 'box.error.INDEX_EXISTS : 85'
-  - 'box.error.SPLICE : 25'
+  - 'box.error.DROP_SPACE : 11'
   - 'box.error.COMPRESSION : 119'
   - 'box.error.INVALID_ORDER : 68'
-  - 'box.error.UNKNOWN : 0'
+  - 'box.error.SPLICE : 25'
   - 'box.error.NO_SUCH_GROUP : 155'
-  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
+  - 'box.error.INVALID_MSGPACK : 20'
   - 'box.error.DROP_PRIMARY_KEY : 17'
   - 'box.error.NULLABLE_PRIMARY : 152'
   - 'box.error.NO_SUCH_SEQUENCE : 145'
-  - 'box.error.INJECTION : 8'
+  - 'box.error.FUNCTION_MAX : 54'
   - 'box.error.INVALID_UUID : 64'
-  - 'box.error.IDENTIFIER : 70'
+  - 'box.error.UNSUPPORTED : 5'
   - 'box.error.TIMEOUT : 78'
+  - 'box.error.ITERATOR_TYPE : 72'
   - 'box.error.REPLICA_MAX : 73'
   - 'box.error.NO_SUCH_ROLE : 82'
-  - 'box.error.DROP_SPACE : 11'
   - 'box.error.MISSING_REQUEST_FIELD : 69'
   - 'box.error.MISSING_SNAPSHOT : 93'
   - 'box.error.WRONG_SPACE_OPTIONS : 111'
   - 'box.error.READONLY : 7'
-  - 'box.error.RTREE_RECT : 101'
+  - 'box.error.UNKNOWN : 0'
   - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
   - 'box.error.NO_CONNECTION : 77'
   - 'box.error.UNSUPPORTED_PRIV : 98'
   - 'box.error.WRONG_SCHEMA_VERSION : 109'
   - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
-  - 'box.error.PROTOCOL : 104'
-  - 'box.error.INVALID_XLOG_TYPE : 125'
-  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.MORE_THAN_ONE_TUPLE : 41'
   - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
+  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.INVALID_XLOG_TYPE : 125'
 ...
 test_run:cmd("setopt delimiter ''");
 ---
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 7fb0916..02b92f3 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -891,6 +891,107 @@ t["{"]
 s:drop()
 ---
 ...
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+s_withdata = box.schema.space.create('withdata', {engine = engine})
+---
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO["fname"]'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key
+    part is indexed twice'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '[3]["FIO"]["fname"]'}}})
+---
+- error: 'Wrong index options (field 2): ''path'' must be string'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'field.FIO.fname'}}})
+---
+- error: 'Wrong index options (field 2): invalid JSON path: first path part should
+    be defined as array'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = '[3].FIO'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+    ''map'' is not supported'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[3][1]'}}})
+---
+- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
+    ''array'' is not supported'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+---
+- error: Field 3 has type 'map' in one index, but type 'string' in another
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3][1].sname'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+---
+- error: Field 3 has type 'map' in one index, but type 'array' in another
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[2].FIO.fname'}}})
+---
+- error: 'Wrong index options (field 2): invalid JSON path: first part refers to invalid
+    field'
+...
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO....fname'}}})
+---
+- error: 'Wrong index options (field 3): invalid JSON path: path has invalid structure'
+...
+idx = s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3]["FIO"]["fname"]'}, {3, 'str', path = '[3]["FIO"]["sname"]'}}})
+---
+...
+assert(idx ~= nil)
+---
+- true
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = 666}, 4, 5}
+---
+- error: Tuple doesn't math document structure defined as index
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+---
+- error: 'Supplied key type of part 2 does not match index part type: expected string'
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = "James"}}, 4, 5}
+---
+- error: Tuple doesn't math document structure defined as index
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- [7, 7, {'town': 'NY', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+---
+- error: Duplicate key exists in unique index 'test1' in space 'withdata'
+...
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+---
+- [7, 7, {'town': 'NY', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+  4, 5]
+...
+idx:select()
+---
+- - [7, 7, {'town': 'NY', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+  - [7, 7, {'town': 'NY', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+    4, 5]
+...
+idx:min()
+---
+- [7, 7, {'town': 'NY', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5]
+...
+idx:max()
+---
+- [7, 7, {'town': 'NY', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}},
+  4, 5]
+...
+s_withdata:drop()
+---
+...
 engine = nil
 ---
 ...
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index 30d6f1a..e106dd0 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -289,5 +289,32 @@ t["{"]
 
 s:drop()
 
+--
+-- gh-1012: Indexes for JSON-defined paths.
+--
+s_withdata = box.schema.space.create('withdata', {engine = engine})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO["fname"]'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '[3]["FIO"]["fname"]'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'field.FIO.fname'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = '[3].FIO'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[3][1]'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3][1].sname'}, {3, 'str', path = '[3]["FIO"].fname'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[2].FIO.fname'}}})
+s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3].FIO....fname'}}})
+idx = s_withdata:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[3]["FIO"]["fname"]'}, {3, 'str', path = '[3]["FIO"]["sname"]'}}})
+assert(idx ~= nil)
+s_withdata:insert{7, 7, {town = 'NY', FIO = 666}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 666, sname = 'Bond'}}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = "James"}}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5}
+s_withdata:insert{7, 7, {town = 'NY', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5}
+idx:select()
+idx:min()
+idx:max()
+s_withdata:drop()
+
 engine = nil
 test_run = nil
-- 
2.7.4







More information about the Tarantool-patches mailing list