[tarantool-patches] Re: [PATCH v2 4/5] box: introduce path_hash and tuple_field tree

Wed Aug 22 03:26:50 MSK 2018

Thanks for the patch! See 49 comments below.

On 15/08/2018 15:15, Kirill Shcherbatov wrote:
> To work with JSON-defined indexes we introduce format JSON
> path hashtable data_path and a tree of intermediate path
> fields attached to format root fields.
> 
> <Hashtable>: format->data_path
> [2].FIO.fname -> field "fname" {type=str, off_slot=-1}
> [2].FIO.sname -> field "sname" {type=str, off_slot=-2}
> 
> <Tree>:      format->field[2] {type = map}
>                             |
>                     FIO {type = map}
>                             |
>          "fname"            |             "sname"
> {type=str,off_slot=-1} ____|____ {type = str,off_slot=-2}
> 
> Leaf fields used in Index have initialized offset_slot.
> On new tuple creation we observe fields tree and use leaf
> records to init tuple field_map.
> At the same time we use data_path hashtable on tuple data
> access by index(when cached offset_slot is invalid).
> All paths stored at the end of format allocation:
> JSON-tree fields same as format->path_hash point to them.
> 
> +------------+------------+-------+------------+-------+
> |tuple_format|tuple_field1|  ...  |tuple_fieldN| pathK |
> +------------+------------+-------+------------+-------+
> 
> New routine tuple_format_add_json_path is used to construct
> all internal structures for JSON path on new format creation
> and duplicating.
> 
> Part of #1012.
> ---
>   src/box/errcode.h            |   2 +-
>   src/box/key_def.c            |   3 +
>   src/box/key_def.h            |   2 +
>   src/box/tuple.c              |  25 +-
>   src/box/tuple_compare.cc     |  41 ++-
>   src/box/tuple_extract_key.cc | 137 +++++++++--
>   src/box/tuple_format.c       | 575 +++++++++++++++++++++++++++++++++++++++----
>   src/box/tuple_format.h       |  84 ++++++-
>   src/box/tuple_hash.cc        |  17 +-
>   src/box/vy_lsm.c             |  43 ++++
>   src/box/vy_point_lookup.c    |   2 -
>   src/box/vy_stmt.c            | 124 ++++++++--
>   test/box/misc.result         |  51 ++--
>   test/engine/tuple.result     | 271 ++++++++++++++++++++
>   test/engine/tuple.test.lua   |  80 ++++++
>   15 files changed, 1314 insertions(+), 143 deletions(-)
> 
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 3d5f66a..8cbd59d 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -107,7 +107,7 @@ struct errcode_record {
>   	/* 52 */_(ER_FUNCTION_EXISTS,		"Function '%s' already exists") \
>   	/* 53 */_(ER_BEFORE_REPLACE_RET,	"Invalid return value of space:before_replace trigger: expected tuple or nil, got %s") \
>   	/* 54 */_(ER_FUNCTION_MAX,		"A limit on the total number of functions has been reached: %u") \
> -	/* 55 */_(ER_UNUSED4,			"") \
> +	/* 55 */_(ER_DATA_MISMATCH_INDEX_PART,	"Tuple doesn't math document structure defined as index") \

1. Document structure in future could be defined not only
via indexes.

>   	/* 56 */_(ER_USER_MAX,			"A limit on the total number of users has been reached: %u") \
>   	/* 57 */_(ER_NO_SUCH_ENGINE,		"Space engine '%s' does not exist") \
>   	/* 58 */_(ER_RELOAD_CFG,		"Can't set option '%s' dynamically") \
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index d7dbad3..130acf7 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -135,6 +135,18 @@ runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple)
>   	smfree(&runtime_alloc, tuple, total);
>   }
>   

2. A comment, please.

> +static int
> +tuple_validate_json_path_data(const struct tuple_field *field, uint32_t idx,
> +			      const char *tuple, const char *offset, void *ctx)

3. Please, add suffix '_cb' which we use for callbacks.

> +{
> +	(void)tuple;
> +	(void)ctx;
> +	if (key_mp_type_validate(field->type, mp_typeof(*offset),
> +				 ER_KEY_PART_TYPE, idx, field->is_nullable) != 0)

4. Why not just 'return key_mp_type_validate(...);'?

> +		return -1;
> +	return 0;> +}
> +
>   int
>   tuple_validate_raw(struct tuple_format *format, const char *tuple)
>   {
> @@ -159,14 +171,23 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
>   
>   	/* Check field types */
>   	struct tuple_field *field = &format->fields[0];
> +	const char *pos = tuple;
>   	uint32_t i = 0;
>   	uint32_t defined_field_count = MIN(field_count, format->field_count);
>   	for (; i < defined_field_count; ++i, ++field) {
> -		if (key_mp_type_validate(field->type, mp_typeof(*tuple),
> +		if (key_mp_type_validate(field->type, mp_typeof(*pos),
>   					 ER_FIELD_TYPE, i + TUPLE_INDEX_BASE,
>   					 field->is_nullable))
>   			return -1;
> -		mp_next(&tuple);
> +		/* Check all JSON paths. */
> +		if (field->array != NULL) {
> +			json_field_tree_routine func =
> +				tuple_validate_json_path_data;
> +			if (json_field_tree_exec_routine(field, i, tuple, pos,
> +							 func, NULL) != 0)
> +				return -1;
> +		}
> +		mp_next(&pos);

5. The same problem as in the previous version - you double decode
the field. First inside the tree walker and second here.

Please, find a way how to avoid it. For help see my comments
for the walker.

>   	}
>   	return 0;
>   }
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index 923e71c..490b528 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	const struct key_part *part = key_def->parts;
>   	const char *tuple_a_raw = tuple_data(tuple_a);
>   	const char *tuple_b_raw = tuple_data(tuple_b);
> -	if (key_def->part_count == 1 && part->fieldno == 0) {
> +	if (key_def->part_count == 1 && part->fieldno == 0 &&
> +	    part->path == NULL) {

6. It should be part of the previous patch together with any
changes of this file.

>   		/*
>   		 * First field can not be optional - empty tuples
>   		 * can not exist.> diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
> index 0301186..4c0e201 100644
> --- a/src/box/tuple_extract_key.cc
> +++ b/src/box/tuple_extract_key.cc
> @@ -1,15 +1,25 @@
>   #include "tuple_extract_key.h"
>   #include "tuple.h"
>   #include "fiber.h"
> +#include "json/path.h"
>   
>   enum { MSGPACK_NULL = 0xc0 };
>   
> +static bool
> +key_def_parts_are_sequential(const struct key_def *def, int i)
> +{
> +	uint32_t fieldno1 = def->parts[i].fieldno + 1;
> +	uint32_t fieldno2 = def->parts[i + 1].fieldno;
> +	return fieldno1 == fieldno2 && def->parts[i].path == NULL &&
> +		def->parts[i + 1].path == NULL;
> +}

7. Static inline and a comment. Also, this function is called
from places having 'is_flat' template parameter which
allows to do not check path for NULL on compilation time, but
you do not use it. Please, make it template too.

> +
>   /** True, if a key con contain two or more parts in sequence. */
>   static bool
>   key_def_contains_sequential_parts(const struct key_def *def)
>   {
>   	for (uint32_t i = 0; i < def->part_count - 1; ++i) {
> -		if (def->parts[i].fieldno + 1 == def->parts[i + 1].fieldno)
> +		if (key_def_parts_are_sequential(def, i))
>   			return true;
>   	}
>   	return false;
> @@ -132,8 +142,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
>   			 * minimize tuple_field_raw() calls.
>   			 */
>   			for (; i < part_count - 1; i++) {
> -				if (key_def->parts[i].fieldno + 1 !=
> -				    key_def->parts[i + 1].fieldno) {
> +				if (!key_def_parts_are_sequential(key_def, i)) {
>   					/*
>   					 * End of sequential part.
>   					 */
> @@ -180,8 +189,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
>   			 * minimize tuple_field_raw() calls.
>   			 */
>   			for (; i < part_count - 1; i++) {
> -				if (key_def->parts[i].fieldno + 1 !=
> -				    key_def->parts[i + 1].fieldno) {
> +				if (!key_def_parts_are_sequential(key_def, i)) {
>   					/*
>   					 * End of sequential part.
>   					 */
> @@ -214,12 +222,13 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
>    * General-purpose version of tuple_extract_key_raw()
>    * @copydoc tuple_extract_key_raw()
>    */
> -template <bool has_optional_parts>
> +template <bool has_optional_parts, bool is_flat>
>   static char *
>   tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
>   			       const struct key_def *key_def,
>   			       uint32_t *key_size)
>   {
> +	assert(!is_flat == key_def->has_json_paths);
>   	assert(!has_optional_parts || key_def->is_nullable);
>   	assert(has_optional_parts == key_def->has_optional_parts);
>   	assert(mp_sizeof_nil() == 1);
> @@ -247,11 +256,11 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
>   		uint32_t fieldno = key_def->parts[i].fieldno;
>   		uint32_t null_count = 0;
>   		for (; i < key_def->part_count - 1; i++) {
> -			if (key_def->parts[i].fieldno + 1 !=
> -			    key_def->parts[i + 1].fieldno)
> +			if (!key_def_parts_are_sequential(key_def, i))
>   				break;
>   		}
> -		uint32_t end_fieldno = key_def->parts[i].fieldno;
> +		const struct key_part *part = &key_def->parts[i];
> +		uint32_t end_fieldno = part->fieldno;
>   
>   		if (fieldno < current_fieldno) {
>   			/* Rewind. */

8. All the changes between this point and point 7 should be
part of the previous patch.

> @@ -293,6 +302,38 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
>   				current_fieldno++;
>   			}
>   		}
> +		const char *field_last, *field_end_last;
> +		if (!is_flat && part->path != NULL) {
> +			field_last = field;
> +			field_end_last = field_end;
> +			struct json_path_parser parser;
> +			struct json_path_node node;
> +			json_path_parser_create(&parser, part->path,
> +						part->path_len);
> +			/* Skip fieldno. */

9. First level fieldno should not be encoded in the path. See
the previous patch for comments.

> +			int rc = json_path_next(&parser, &node);
> +			assert(rc == 0);
> +			while ((rc = json_path_next(&parser, &node)) == 0 &&
> +				node.type != JSON_PATH_END) {
> +				switch(node.type) {
> +				case JSON_PATH_NUM:
> +					rc = tuple_field_go_to_index(&field,
> +								     node.num);
> +					break;
> +				case JSON_PATH_STR:
> +					rc = tuple_field_go_to_key(&field,
> +								   node.str,
> +								   node.len);
> +					break;
> +				default:
> +					unreachable();
> +				}

10. You have exactly the same code in tuple_field_raw_by_path. Please,
extract either the whole while cycle or this switch into a function.

> +				assert(rc == 0);
> +			}
> +			assert(rc == 0 && node.type == JSON_PATH_END);
> +			field_end = field;
> +			mp_next(&field_end);
> +		}
>   		memcpy(key_buf, field, field_end - field);
>   		key_buf += field_end - field;
>   		if (has_optional_parts && null_count != 0) {
> @@ -330,32 +375,74 @@ tuple_extract_key_set(struct key_def *key_def)
>   		if (key_def->has_optional_parts) {
>   			assert(key_def->is_nullable);
>   			if (key_def_contains_sequential_parts(key_def)) {
> -				key_def->tuple_extract_key =
> -					tuple_extract_key_slowpath<true, true,
> -								   true>;
> +				if (key_def->has_json_paths) {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<true,
> +									   true,
> +									   false>;
> +				} else {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<true,
> +									   true,
> +									   true>;
> +				}
>   			} else {
> -				key_def->tuple_extract_key =
> -					tuple_extract_key_slowpath<false, true,
> -								   true>;
> +				if (key_def->has_json_paths) {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<false,
> +									   true,
> +									   false>;
> +				} else {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<false,
> +									   true,
> +									   true>;
> +				}
>   			}
>   		} else {
>   			if (key_def_contains_sequential_parts(key_def)) {
> -				key_def->tuple_extract_key =
> -					tuple_extract_key_slowpath<true, false,
> -								   true>;
> +				if (key_def->has_json_paths) {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<true,
> +									   false,
> +									   false>;
> +				} else {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<true,
> +									   false,
> +									   true>;
> +				}
>   			} else {
> -				key_def->tuple_extract_key =
> -					tuple_extract_key_slowpath<false, false,
> -								   true>;
> +				if (key_def->has_json_paths) {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<false,
> +									   false,
> +									   false>;
> +				} else {
> +					key_def->tuple_extract_key =
> +						tuple_extract_key_slowpath<false,
> +									   false,
> +									   true>;
> +				}
>   			}
>   		}

11. At first, it is a part of the previous patch. At second, it
run too far. It is time to declare a matrix of functions, maybe
three dimensional or two, indexes of which are calculated from
contains_sequential_parts, has_optional_parts and is_flat. See
field_type1_contains_type2 for an example.

Same for other functions, having >= 3 template parameters.

>   	}
>   	if (key_def->has_optional_parts) {
>   		assert(key_def->is_nullable);
> -		key_def->tuple_extract_key_raw =
> -			tuple_extract_key_slowpath_raw<true>;
> +		if (key_def->has_json_paths) {
> +			key_def->tuple_extract_key_raw =
> +				tuple_extract_key_slowpath_raw<true, false>;
> +		} else {
> +			key_def->tuple_extract_key_raw =
> +				tuple_extract_key_slowpath_raw<true, true>;
> +		}
>   	} else {
> -		key_def->tuple_extract_key_raw =
> -			tuple_extract_key_slowpath_raw<false>;
> +		if (key_def->has_json_paths) {
> +			key_def->tuple_extract_key_raw =
> +				tuple_extract_key_slowpath_raw<false, false>;
> +		} else {
> +			key_def->tuple_extract_key_raw =
> +				tuple_extract_key_slowpath_raw<false, true>;
> +		}
>   	}
>   }
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index a9fddc0..1821950 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -38,9 +39,336 @@ static intptr_t recycled_format_ids = FORMAT_ID_NIL;
>   static uint32_t formats_size = 0, formats_capacity = 0;
>   
>   static const struct tuple_field tuple_field_default = {
> -	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, false,
> +	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, false, {{NULL, 0}}
>   };
>   
> +struct mh_strnptr_node_t *
> +json_path_hash_get(struct mh_strnptr_t *hashtable, const char *path,
> +		   uint32_t path_len, uint32_t path_hash)

12. Why do you return node_t if you always use only node->val? Why not
to return val?

> +{
> +	assert(hashtable != NULL);
> +	struct mh_strnptr_key_t key = {path, path_len, path_hash};
> +	mh_int_t rc = mh_strnptr_find(hashtable, &key, NULL);
> +	if (rc == mh_end(hashtable))
> +		return NULL;
> +	return mh_strnptr_node(hashtable, rc);
> +}
> +
> +/**
> + * Create a new hashtable object.
> + * @param[out] hashtable pointer to object to create.
> + * @param records count of records to reserve.
> + * @retval -1 on error.
> + * @retval 0 on success.
> + */
> +static int
> +json_path_hash_create(struct mh_strnptr_t **hashtable, uint32_t records)

13. Why so complicated? Just rename it to new and return the hash
instead of int.

> +{
> +	struct mh_strnptr_t *ret = mh_strnptr_new();
> +	if (ret == NULL) {
> +		diag_set(OutOfMemory, sizeof(*hashtable), "mh_strnptr_new",
> +			 "hashtable");
> +		return -1;
> +	}
> +	if (mh_strnptr_reserve(ret, records, NULL) != 0) {
> +		mh_strnptr_delete(ret);
> +		diag_set(OutOfMemory, records, "mh_strnptr_reserve",
> +			 "hashtable");
> +		return -1;
> +	}
> +	*hashtable = ret;
> +	return 0;
> +}
> +/**

14. Please, put an empty line between the functions.

> + * Delete @hashtable object.
> + * @param hashtable pointer to object to delete.
> + */
> +static void
> +json_path_hash_delete(struct mh_strnptr_t *hashtable)
> +{
> +	if (hashtable == NULL)
> +		return;

15. Do not use SQLite style of freeing objects, please.

> +	while (mh_size(hashtable)) {
> +		mh_int_t n = mh_first(hashtable);
> +		mh_strnptr_del(hashtable, n, NULL);
> +	}
> +	mh_strnptr_delete(hashtable);
> +}
> +
> +/**
> + * Insert a new record to hashtable.
> + * @param hashtable storage to insert new record.
> + * @param path string.
> + * @param path_len length of @path.
> + * @param tuple_field value to store in @hashtable.
> + * @retval -1 on error.
> + * @retval 0 on success.
> + */
> +static int
> +json_path_hash_insert(struct mh_strnptr_t *hashtable, const char *path,
> +		      uint32_t path_len, struct tuple_field *field)
> +{
> +	assert(hashtable != NULL);
> +	/* Test if record already present in hash. */

16. How is it possible that the record exists already? And
where do you check it here?

> +	uint32_t path_hash = mh_strn_hash(path, path_len);
> +	struct mh_strnptr_node_t name_node =
> +		{path, path_len, path_hash, field};

17. Fits in one line.

> +	mh_int_t rc = mh_strnptr_put(hashtable, &name_node, NULL, NULL);
> +	if (rc == mh_end(hashtable)) {
> +		diag_set(OutOfMemory, sizeof(*hashtable), "mh_strnptr_put",
> +			"hashtable");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Construct field tree level for JSON path part.
> + *
> + * @param[in, out] tuple_field pointer to record to start with
> + *                 would be changed to record that math
> + *                 @part lexeme.
> + * @param fieldno number of root space field.
> + * @param part JSON path lexeme to represent in field tree.
> + * @retval -1 on error.
> + * @retval 0 on success.
> + */
> +static int
> +json_field_tree_append(struct tuple_field **field_subtree, uint32_t fieldno,
> +		       struct json_path_node *part)
> +{
> +	enum field_type type;
> +	struct tuple_field *field = *field_subtree;
> +	switch (part->type) {
> +	case JSON_PATH_NUM: {
> +		type = FIELD_TYPE_ARRAY;
> +		if (field->type != FIELD_TYPE_ANY && field->type != type)
> +			goto error_type_mistmatch> +		field->type = type;
> +		/* Create or resize field array if required. */
> +		if (field->array == NULL || part->num >= field->array_size) {
> +			struct tuple_field **array =
> +				realloc(field->array,
> +					part->num * sizeof(struct tuple_field *));

18. Out of 80. Use sizeof(array[0]). The same below.

> +			if (array == NULL) {
> +				diag_set(OutOfMemory,
> +					sizeof(struct tuple_field *), "realloc",
> +					"array");

19. Size here is not correct. You forgot about * part->num.

> +				return -1;
> +			}
> +			if (field->array == NULL) {
> +				memset(array, 0, part->num *
> +					sizeof(struct tuple_field *));
> +			} else {
> +				memset(&array[field->array_size], 0,
> +				       (part->num - field->array_size) *
> +				       sizeof(struct tuple_field *));
> +			}

20. Why are these cases different? If I substitute field->array_size
(which is true when field->array == NULL) with 0, I will match the
first branch. It is not?

> +			field->array = array;
> +			field->array_size = part->num;
> +		}
> +		/* Record already exists. No actions required */

21. I think, it is rather 'else' branch. Evidently, the condition
is unreachable, if the field was just created in the code above.

> +		if (field->array[part->num - TUPLE_INDEX_BASE] != NULL) {
> +			*field_subtree =
> +				field->array[part->num - TUPLE_INDEX_BASE];
> +			return 0;
> +		}
> +		break;
> +	}
> +	case JSON_PATH_STR: {
> +		type = FIELD_TYPE_MAP;
> +		if (field->type != FIELD_TYPE_ANY && field->type != type)
> +			goto error_type_mistmatch;
> +		field->type = type;
> +		if (field->map == NULL &&
> +		    json_path_hash_create(&field->map, 1) != 0)
> +			return -1;

22. If field->map == NULL then sure you do not
need to check node existence below. Please,
split those two cases into 2 branches.

> +		struct mh_strnptr_node_t *node =
> +			json_path_hash_get(field->map, part->str, part->len,
> +					   mh_strn_hash(part->str, part->len));
> +		if (node != NULL) {
> +			*field_subtree = node->val;
> +			return 0;
> +		}
> +		break;
> +	}
> +	default:
> +		unreachable();
> +	}
> +
> +	/* Construct and insert a new record. */
> +	struct tuple_field *new_field = malloc(sizeof(struct tuple_field));
> +	if (new_field == NULL) {
> +		diag_set(OutOfMemory, sizeof(struct tuple_field), "malloc",
> +			"new_field");
> +		return -1;
> +	}
> +	*new_field = tuple_field_default;
> +	if (field->type == FIELD_TYPE_MAP) {
> +		if (json_path_hash_insert(field->map, part->str, part->len,
> +					  new_field) != 0) {
> +			free(new_field);
> +			return -1;
> +		}
> +	} else if (field->type == FIELD_TYPE_ARRAY) {
> +		field->array[part->num - TUPLE_INDEX_BASE] = new_field;
> +	}
> +	*field_subtree = new_field;
> +
> +	return 0;
> +
> +error_type_mistmatch:
> +	diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
> +		tt_sprintf("%d", fieldno + TUPLE_INDEX_BASE),
> +		field_type_strs[type], field_type_strs[field->type]);
> +	return -1;
> +}
> +
> +/**
> + * Delete @field_subtree object.
> + * @param field_subtree to delete.
> + */
> +static void
> +json_field_tree_delete(struct tuple_field *field_subtree)
> +{
> +	if (field_subtree->type == FIELD_TYPE_MAP &&
> +	    field_subtree->map != NULL) {
> +		mh_int_t i;
> +		mh_foreach(field_subtree->map, i) {
> +			struct tuple_field *field =
> +				mh_strnptr_node(field_subtree->map, i)->val;
> +			if (field == NULL)
> +				continue;

23. How can it be NULL?

> +			json_field_tree_delete(field);
> +			free(field);
> +		}
> +		json_path_hash_delete(field_subtree->map);
> +	} else if (field_subtree->type == FIELD_TYPE_ARRAY &&
> +		   field_subtree->array != NULL) {
> +		for (uint32_t i = 0; i < field_subtree->array_size; i++) {
> +			struct tuple_field *field = field_subtree->array[i];
> +			if (field == NULL)
> +				continue;
> +			json_field_tree_delete(field_subtree->array[i]);
> +			free(field_subtree->array[i]);
> +		}
> +		free(field_subtree->array);
> +	}
> +}
> +
> +int
> +json_field_tree_exec_routine(const struct tuple_field *field, uint32_t idx,
> +			     const char *tuple, const char *offset,
> +			     json_field_tree_routine routine, void *routine_ctx)

24. I got the idea and I like it - have a walker applicable
for any puprose. But it calls a function by pointer on a hot
path - initialization of a tuple field map + validation.

Lets make it macros. I know, that a macros can not be recursive,
but we can write a generator of such walker. A macros taking a
function and generating new json_field_tree_exec_routine.

For example:
#define TUPLE_FIELD_TREE_WALKER(name, function) \
// json_field_tree_exec_routine declaration and code
// under the specified name.

> +{
> +	int rc = 0;
> +	if (field->type == FIELD_TYPE_MAP) {
> +		mh_int_t i;
> +		mh_foreach(field->map, i) {
> +			struct mh_strnptr_node_t *node =
> +				mh_strnptr_node(field->map, i);
> +			const char *raw = offset;
> +			if (tuple_field_go_to_key(&raw, node->str,
> +						  node->len) != 0) {
> +				diag_set(ClientError,
> +					 ER_DATA_MISMATCH_INDEX_PART);
> +				return -1;
> +			}
> +			if (json_field_tree_exec_routine(node->val, idx,
> +							 tuple, raw, routine,
> +							 routine_ctx) != 0)
> +				return -1;
> +		}> +	} else if (field->type == FIELD_TYPE_ARRAY) {
> +		assert(mp_typeof(*offset) == MP_ARRAY);
> +		uint32_t count = mp_decode_array(&offset);
> +		if (count < field->array_size) {
> +			diag_set(ClientError, ER_DATA_MISMATCH_INDEX_PART);
> +			return -1;
> +		}
> +		for (uint32_t i = 0; i < field->array_size;
> +		     i++, mp_next(&offset)) {
> +			if (field->array[i] == NULL)
> +				continue;
> +			if (json_field_tree_exec_routine(field->array[i], idx,
> +							 tuple, offset, routine,
> +							 routine_ctx) != 0)
> +				return -1;
> +		}

25. Talking about one of my first comments (how to iterate over
the field only once when possible): here, for array, you can
merely continue iteration and mp_next calling behind field->array_size
up to count.

For map the things are more complex. And you can apply the method
I have described on the previous review. You learn map size and
on each tuple_field_go_to_key remember maximal number and position
of the decoded key-value pair. When the iteration is finished,
you decode the tail starting from the maximal decoded key-value
pair until the end. In such a case the tail is decoded only once
always.

> +	} else {
> +		rc = routine(field, idx, tuple, offset, routine_ctx);
> +	}
> +	return rc;
> +}
> +
> +/**
> + * Add new JSON @path to @format.
> + * @param format to modify.
> + * @param path string to add.
> + * @param path_len length of @path.
> + * @param field_type type of field by @path.

26. No such parameter 'field_type'. Only 'type'.

> + * @param[out] leaf_field pointer to leaf field.
> + * @retval -1 on error.
> + * @retval 0 on success.
> + */
> +static int
> +tuple_format_add_json_path(struct tuple_format *format, const char *path,
> +			   uint32_t path_len, enum field_type type,
> +			   struct tuple_field **leaf_field)
> +{
> +	assert(format->path_hash != NULL);
> +	/*
> +	 * Get root field by index.
> +	 * Path is specified in canonical form: [i]...
> +	 */
> +	int rc = 0;

27. Do not use SQLite style of variables declaration. It
can be declared and assigned at the same time below.

> +	struct json_path_parser parser;
> +	struct json_path_node node;
> +	json_path_parser_create(&parser, path, path_len);
> +	rc = json_path_next(&parser, &node);
> +	assert(rc == 0 && node.type == JSON_PATH_NUM);
> +	assert(node.num < format->field_count + 1);
> +
> +	/* Test if path is already registered. */
> +	struct mh_strnptr_node_t *leaf_node = NULL;
> +	uint32_t hash = mh_strn_hash(path, path_len);
> +	if ((leaf_node = json_path_hash_get(format->path_hash, path,
> +					    path_len, hash)) != NULL) {
> +		struct tuple_field *field = leaf_node->val;
> +		if (field->type != type) {
> +			const char *err =
> +				tt_sprintf("JSON path '%.*s' has been already "
> +					   "constructed for '%s' leaf record",
> +					   path_len, path,
> +					   field_type_strs[field->type]);

28. I do not see a test on this. Please, use gcov to be sure that
test coverage of all the new code is 100% (except OOM errors).

29. What about nullability mismatch?

> +			diag_set(ClientError,  ER_WRONG_INDEX_OPTIONS,
> +				node.num, err);
> +			return -1;
> +		}
> +		*leaf_field = field;
> +		return 0;
> +	}
> +
> +	/* Build data path tree. */
> +	uint32_t root_fieldno = node.num - TUPLE_INDEX_BASE;
> +	struct tuple_field *field = &format->fields[root_fieldno];
> +	while ((rc = json_path_next(&parser, &node)) == 0 &&
> +		node.type != JSON_PATH_END) {
> +		if (json_field_tree_append(&field, root_fieldno, &node) != 0)
> +			return -1;
> +	}
> +	assert(rc == 0 && node.type == JSON_PATH_END);
> +
> +	/* Leaf record is a new object as JSON path unique. */
> +	field->type = type;
> +	if (json_path_hash_insert(format->path_hash, path, path_len,
> +				  field) != 0)
> +		return -1;
> +
> +	*leaf_field = field;
> +	return 0;

30. Please. consider this:

  	field->type = type;
-	if (json_path_hash_insert(format->path_hash, path, path_len,
-				  field) != 0)
-		return -1;
-
  	*leaf_field = field;
-	return 0;
+	return json_path_hash_insert(format->path_hash, path, path_len, field);
  }

> +}
> +
>   /**
>    * Extract all available type info from keys and field
>    * definitions.> @@ -101,10 +434,12 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>   			 * used in tuple_format.
>   			 */
>   			if (field_type1_contains_type2(field->type,
> -						       part->type)) {
> +						       part->type) &&
> +			    part->path == NULL) {
>   				field->type = part->type;
>   			} else if (! field_type1_contains_type2(part->type,
> -								field->type)) {
> +								field->type) &&
> +				   part->path == NULL) {
>   				const char *name;
>   				int fieldno = part->fieldno + TUPLE_INDEX_BASE;
>   				if (part->fieldno >= field_count) {
> @@ -131,9 +466,22 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>   			 * First field is always simply accessible,
>   			 * so we don't store an offset for it.
>   			 */
> -			if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
> +			if (part->path != NULL) {
> +				assert(is_sequential == false);
> +				memcpy(data, part->path, part->path_len);
> +				data[part->path_len] = '\0';
> +				struct tuple_field *leaf = NULL;
> +				if (tuple_format_add_json_path(format, data,
> +							       part->path_len,
> +							       part->type,
> +							       &leaf) != 0)
> +					return -1;
> +				assert(leaf != NULL);
> +				if (leaf->offset_slot == TUPLE_OFFSET_SLOT_NIL)
> +					leaf->offset_slot = --current_slot;
> +				data += part->path_len + 1;
> +			} else if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
>   			    is_sequential == false && part->fieldno > 0) {
> -
>   				field->offset_slot = --current_slot;
>   			}

31. The aforementioned two hunks are complete mess. Please,
move the 'is_key_part = true' before them. Then after it at
first check for 'if (part->path != NULL) { ...; continue; }'.
Then remove checks for 'path == NULL'.

>   		}
> @@ -201,20 +549,26 @@ static struct tuple_format *
>   tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>   		   uint32_t space_field_count, struct tuple_dictionary *dict)
>   {
> +	size_t extra_size = 0;
>   	uint32_t index_field_count = 0;
> +	uint32_t json_path_count = 0;
>   	/* find max max field no */
>   	for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
>   		const struct key_def *key_def = keys[key_no];
>   		const struct key_part *part = key_def->parts;
>   		const struct key_part *pend = part + key_def->part_count;
>   		for (; part < pend; part++) {
> +			if (part->path != NULL) {
> +				json_path_count++;
> +				extra_size += part->path_len + 1;

32. When some indexes has common fields, you count
and allocate their paths twice.

> +			}
>   			index_field_count = MAX(index_field_count,
>   						part->fieldno + 1);
>   		}
>   	}
>   	uint32_t field_count = MAX(space_field_count, index_field_count);
>   	uint32_t total = sizeof(struct tuple_format) +
> -			 field_count * sizeof(struct tuple_field);
> +			 field_count * sizeof(struct tuple_field) + extra_size;
>   
>   	struct tuple_format *format = (struct tuple_format *) malloc(total);
>   	if (format == NULL) {
> @@ -335,21 +697,75 @@ tuple_format_dup(struct tuple_format *src)
>   {
>   	uint32_t total = sizeof(struct tuple_format) +
>   			 src->field_count * sizeof(struct tuple_field);
> +	if (src->path_hash != NULL) {
> +		mh_int_t i;
> +		mh_foreach(src->path_hash, i)
> +			total += mh_strnptr_node(src->path_hash, i)->len + 1;
> +	}
>   	struct tuple_format *format = (struct tuple_format *) malloc(total);
>   	if (format == NULL) {
>   		diag_set(OutOfMemory, total, "malloc", "tuple format");
>   		return NULL;
>   	}
>   	memcpy(format, src, total);
> +
> +	/* Fill with NULLs for normal destruction on error. */
> +	format->path_hash = NULL;
> +	for (uint32_t i = 0; i < format->field_count; i++) {
> +		format->fields[i].array = NULL;
> +		format->fields[i].array_size = 0;

33. Just memset them.

> +	}
> +	if (src->path_hash != NULL) {
> +		mh_int_t i;
> +		if (json_path_hash_create(&format->path_hash,
> +					  mh_size(src->path_hash)) != 0)
> +			goto error;
> +		mh_foreach(src->path_hash, i) {
> +			struct mh_strnptr_node_t *node =
> +				mh_strnptr_node(src->path_hash, i);
> +			/* Path data has been already copied. */
> +			char *path = (char *)format + (node->str - (char *)src);
> +			/* Store source leaf field offset_slot.  */
> +			struct tuple_field *leaf_field = node->val;
> +			int32_t offset_slot = leaf_field->offset_slot;
> +			if (tuple_format_add_json_path(format, path, node->len,
> +						       leaf_field->type,
> +						       &leaf_field) != 0)
> +				goto error;
> +			/* Store offset_slot in a new leaf record. */
> +			assert(leaf_field != NULL);
> +			leaf_field->offset_slot = offset_slot;
> +		}
> +	}
>   	tuple_dictionary_ref(format->dict);
>   	format->id = FORMAT_ID_NIL;
>   	format->refs = 0;
> -	if (tuple_format_register(format) != 0) {
> -		tuple_format_destroy(format);
> -		free(format);
> -		return NULL;
> -	}
> +	if (tuple_format_register(format) != 0)
> +		goto error;
>   	return format;

34. Consider this:

         format->id = FORMAT_ID_NIL;
         format->refs = 0;
-       if (tuple_format_register(format) != 0)
-               goto error;
-       return format;
+       if (tuple_format_register(format) == 0)
+               return format;
  error:
         tuple_format_destroy(format);

> +error:
> +	tuple_format_destroy(format);
> +	free(format);
> +	return NULL;
> +}
> @@ -378,18 +794,14 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
>   		return -1;
>   	}
>   
> -	/* first field is simply accessible, so we do not store offset to it */
> -	enum mp_type mp_type = mp_typeof(*pos);
> +	/*
> +	 * First field is simply accessible, store offset to it
> +	 * only for JSON path.
> +	 */
> +	uint32_t i = 0;
> +	enum mp_type mp_type;
>   	const struct tuple_field *field = &format->fields[0];
> -	if (key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
> -				 TUPLE_INDEX_BASE, field->is_nullable))
> -		return -1;
> -	mp_next(&pos);
> -	/* other fields...*/
> -	++field;
> -	uint32_t i = 1;
> -	uint32_t defined_field_count = MIN(field_count, format->field_count);
> -	if (field_count < format->index_field_count) {
> +	if (field_count < format->index_field_count || field->map != NULL) {

35. Why here do you check for map != NULL but in all other places
for array != NULL?

>   		/*
>   		 * Nullify field map to be able to detect by 0,
>   		 * which key fields are absent in tuple_field().
> @@ -397,6 +809,16 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
>   		memset((char *)field_map - format->field_map_size, 0,
>   		       format->field_map_size);
>   	}
> +	if (field->map == NULL) {
> +		mp_type = mp_typeof(*pos);
> +		if (key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
> +					 TUPLE_INDEX_BASE, field->is_nullable))
> +			return -1;
> +		mp_next(&pos);
> +		++field;
> +		++i;
> +	}
> +	uint32_t defined_field_count = MIN(field_count, format->field_count);
>   	for (; i < defined_field_count; ++i, ++field) {
>   		mp_type = mp_typeof(*pos);
>   		if (key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE,
> @@ -407,6 +829,14 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
>   			field_map[field->offset_slot] =
>   				(uint32_t) (pos - tuple);
>   		}
> +		if (field->map != NULL) {

36. If this is true, then the previous 'if' is unreachable
(field->offset_slot != TUPLE_OFFSET_SLOT_NIL). Complex field
can not have an offset slot. So this is 'else' branch, I guess.

> +			assert(field->array != NULL);
> +			json_field_tree_routine func =
> +				tuple_init_json_field_map_routine;
> +			if (json_field_tree_exec_routine(field, i, tuple, pos,
> +							 func, field_map) != 0)
> +				return -1;
> +		}
>   		mp_next(&pos);

37. After you fix my other comments, this mp_next should
not be called, if json tree walker was used.

>   	}
>   	return 0;
> @@ -467,15 +897,60 @@ box_tuple_format_unref(box_tuple_format_t *format)
>   	tuple_format_unref(format);
>   }
>   
> -/**
> - * Propagate @a field to MessagePack(field)[index].
> - * @param[in][out] field Field to propagate.
> - * @param index 1-based index to propagate to.
> - *
> - * @retval  0 Success, the index was found.
> - * @retval -1 Not found.
> - */
> -static inline int
> +const char *
> +tuple_field_by_part(const struct tuple_format *format, const char *data,
> +		    const uint32_t *field_map, struct key_part *part)
> +{
> +	const char *raw = NULL;
> +	uint32_t field_no = part->fieldno;

38. Just inline it.

> +	struct mh_strnptr_node_t *node;
> +	if (unlikely(part->path == NULL)) {

39. Inversely, it is likely.

> +		raw = tuple_field_raw(format, data, field_map, field_no);

40. Do here 'return' and reduce indentation level below.

> +	} else {
> +		int32_t offset_slot = TUPLE_OFFSET_SLOT_NIL;
> +		if (part->format_epoch == format->epoch &&
> +		    -part->slot_cache * sizeof(uint32_t) <=
> +		    format->field_map_size) {

41. As I understand, when a part is JSON and its epoch
matches the format, it always has a valid slot_cache and you
do not need to check for '< field_map_size'.

Moreover, I do not understand why do you check this slot
to be not SLOT_NIL and has not zero value in field_map. It
is 100% valid and you already now can do
'return data + field_map[part->slot_cache]', it is not?

> +			offset_slot = part->slot_cache;
> +		} else if (format->path_hash != NULL &&
> +			   (node = json_path_hash_get(format->path_hash,
> +						      part->path,
> +						      part->path_len,
> +						      part->path_hash)) !=
> +						      NULL) {
> +			assert(node != NULL);
> +			struct tuple_field *field = node->val;
> +			assert(field != NULL);
> +			offset_slot = field->offset_slot;

42. The same here. When an offset slot is stored in a field
from format->path_hash, it always has not zero field_map
value and not SLOT_NIL offset. So the branch below is actually
'else', I guess.

> +		}
> +		if (unlikely(offset_slot == TUPLE_OFFSET_SLOT_NIL ||
> +			     field_map[offset_slot] == 0)) {
> +			/*
> +			 * Legacy tuple having no field map
> +			 * for JSON index.
> +			 */
> +			uint32_t path_hash =
> +				field_name_hash(part->path, part->path_len);
> +			if (tuple_field_raw_by_path(format, data, field_map,
> +						    part->path, part->path_len,
> +						    path_hash, &raw) != 0)
> +				raw = NULL;
> +		} else {
> +			assert(offset_slot < 0);
> +			assert(-offset_slot * sizeof(uint32_t) <=
> +			       format->field_map_size);
> +			/* Cache offset_slot if required. */
> +			if (part->format_epoch < format->epoch) {
> +				part->slot_cache = offset_slot;
> +				part->format_epoch = format->epoch;
> +			}
> +			raw = data + field_map[offset_slot];
> +		}
> +	}
> +	return raw;
> +}
> +
> +int
>   tuple_field_go_to_index(const char **field, uint64_t index)
>   {
>   	enum mp_type type = mp_typeof(**field);
> @@ -547,21 +1013,32 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
>   	return -1;
>   }
>   
> -const char *
> -tuple_field_by_part(const struct tuple_format *format, const char *data,
> -		    const uint32_t *field_map, struct key_part *part)
> -{
> -	return tuple_field_raw(format, data, field_map, part->fieldno);
> -}

43. Please, move this function above within its original commit.

> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index a989917..1348f0d 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -108,6 +108,18 @@ struct tuple_field {
>   	bool is_key_part;
>   	/** True, if a field can store NULL. */
>   	bool is_nullable;
> +	/** Tree child records. */
> +	union {
> +		/** Array of fields. */
> +		struct  {
> +			struct tuple_field **array;
> +			uint32_t array_size;
> +		};
> +		/** Hashtable: path -> tuple_field. */
> +		struct mh_strnptr_t *map;
> +		/** Leaf argument for tree-walker routine. */
> +		void *arg;

44. Actually, it is not used in tree-walker.

> +	};

45. I see, how do you struggle with checking that the
field has no childs, so I propose to add a member in the
union 'void *child'. And check it for NULL. It is better
than check explicitly array or map in places where it
does not matter.

>   };
>   
>   /**
> diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
> index ae0bb8e..5f3357a 100644
> --- a/src/box/tuple_hash.cc
> +++ b/src/box/tuple_hash.cc
> @@ -228,7 +228,7 @@ key_hash_slowpath(const char *key, const struct key_def *key_def);
>   
>   void
>   tuple_hash_func_set(struct key_def *key_def) {
> -	if (key_def->is_nullable)
> +	if (key_def->is_nullable || key_def->has_json_paths)
>   		goto slowpath;
>   	/*
>   	 * Check that key_def defines sequential a key without holes
> @@ -262,10 +262,17 @@ tuple_hash_func_set(struct key_def *key_def) {
>   	}
>   
>   slowpath:
> -	if (key_def->has_optional_parts)
> -		key_def->tuple_hash = tuple_hash_slowpath<true, true>;
> -	else
> -		key_def->tuple_hash = tuple_hash_slowpath<false, true>;
> +	if (key_def->has_optional_parts) {
> +		if (key_def->has_json_paths)
> +			key_def->tuple_hash = tuple_hash_slowpath<true, false>;
> +		else
> +			key_def->tuple_hash = tuple_hash_slowpath<true, true>;
> +	} else {
> +		if (key_def->has_json_paths)
> +			key_def->tuple_hash = tuple_hash_slowpath<false, false>;
> +		else
> +			key_def->tuple_hash = tuple_hash_slowpath<false, true>;
> +	}
>   	key_def->key_hash = key_hash_slowpath;
>   }

46. All these changes should be done in the previous commit.

>   
> diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
> index cb3c436..1bb5e22 100644
> --- a/src/box/vy_lsm.c
> +++ b/src/box/vy_lsm.c
> @@ -158,6 +159,48 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env,
>   						    NULL);
>   		if (lsm->disk_format == NULL)
>   			goto fail_format;
> +		/*
> +		 * Tuple formats should be compatible to make
> +		 * epoch-based caching work.
> +		 */
> +		int32_t min_offset_slot = 0;
> +		struct tuple_field *dst_fields = lsm->disk_format->fields;
> +		struct mh_strnptr_t *dst_ht = lsm->disk_format->path_hash;
> +		struct mh_strnptr_t *src_ht = format->path_hash;
> +		struct key_part *part = cmp_def->parts;
> +		struct key_part *part_end = part + cmp_def->part_count;
> +		for (; part < part_end; part++) {
> +			struct tuple_field *dst_field =
> +				&dst_fields[part->fieldno];
> +			struct tuple_field *src_field;
> +			if (dst_field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> +				src_field = &format->fields[part->fieldno];
> +			} else if (dst_fields[part->fieldno].map != NULL) {

47. Why do you check for child != NULL instead of part->path != NULL?

> +				struct mh_strnptr_node_t *node;
> +				node = json_path_hash_get(dst_ht, part->path,
> +							  part->path_len,
> +							  part->path_hash);
> +				assert(node != NULL);
> +				dst_field = node->val;
> +				assert(dst_field != NULL);
> +
> +				node = json_path_hash_get(src_ht, part->path,
> +							  part->path_len,
> +							  part->path_hash);
> +				assert(node != NULL);
> +				src_field = node->val;
> +				assert(dst_field != NULL);
> +			} else {
> +				continue;
> +			}
> +			if (src_field->offset_slot == TUPLE_OFFSET_SLOT_NIL)
> +				continue;
> +			dst_field->offset_slot = src_field->offset_slot;
> +			min_offset_slot =
> +				MIN(src_field->offset_slot, min_offset_slot);
> +		}
> +		lsm->disk_format->field_map_size =
> +			-min_offset_slot * sizeof(uint32_t);
>   	}
>   	tuple_format_ref(lsm->disk_format);
>   
> diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
> index a4b7975..80f08bb 100644
> --- a/src/box/vy_stmt.c
> +++ b/src/box/vy_stmt.c
> @@ -321,6 +322,67 @@ vy_stmt_replace_from_upsert(const struct tuple *upsert)
>   	return replace;
>   }
>   
> +static void
> +vy_stmt_msgpack_build(struct tuple_field *field, char *tuple,
> +		      uint32_t *field_map, char **offset, bool write_data)

48. Why you do not use json_field_tree_exec_routine? Because you need
to do something before and afer decoding the field? Then just implement
it as a macro as I said and add 2 parameters:
start_field_cb(field_type, field_size) and end_field_cb(field_type).
And if a _cb is not defined, its call is not generated.

> +{
> +	if (field->type == FIELD_TYPE_ARRAY) {
> +		if (write_data)
> +			*offset = mp_encode_array(*offset, field->array_size);
> +		else
> +			*offset += mp_sizeof_array(field->array_size);
> +		for (uint32_t i = 0; i < field->array_size; i++) {
> +			if (field->array[i] == NULL) {
> +				if (write_data)
> +					*offset = mp_encode_nil(*offset);
> +				else
> +					*offset += mp_sizeof_nil();
> +				continue;
> +			}
> +			vy_stmt_msgpack_build(field->array[i], tuple, field_map,
> +					      offset, write_data);
> +		}
> +		return;
> +	} else if (field->type == FIELD_TYPE_MAP) {
> +		if (write_data)
> +			*offset = mp_encode_map(*offset, mh_size(field->map));
> +		else
> +			*offset += mp_sizeof_map(mh_size(field->map));
> +		mh_int_t i;
> +		mh_foreach(field->map, i) {
> +			struct mh_strnptr_node_t *node =
> +				mh_strnptr_node(field->map, i);
> +			assert(node);
> +			if (write_data) {
> +				*offset = mp_encode_str(*offset, node->str,
> +							node->len);
> +			} else {
> +				*offset += mp_sizeof_str(node->len);
> +			}
> +			vy_stmt_msgpack_build(node->val, tuple, field_map,
> +					      offset, write_data);
> +		}
> +		return;;

49. Double ';'.

> +	}
> +
> +	struct iovec *iov = field->arg;
> +	if (iov == NULL) {
> +		if (write_data)
> +			*offset = mp_encode_nil(*offset);
> +		else
> +			*offset += mp_sizeof_nil();
> +	} else {
> +		if (write_data) {
> +			uint32_t data_offset = *offset - tuple;
> +			memcpy(*offset, iov->iov_base, iov->iov_len);
> +			field->arg = NULL;
> +			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
> +				field_map[field->offset_slot] = data_offset;
> +		}
> +		*offset += iov->iov_len;
> +	}
> +}
> +
>   static struct tuple *
>   vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
>   			       const struct key_def *cmp_def,