[PATCH v7 1/5] box: introduce JSON Indexes

Thu Jan 10 13:16:16 MSK 2019

On Wed, Jan 09, 2019 at 11:29:36AM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 0589c9678..9656a4189 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -268,7 +268,7 @@ index_def_new_from_tuple(struct tuple *tuple, struct space *space)
>  	});
>  	if (key_def_decode_parts(part_def, part_count, &parts,
>  				 space->def->fields,
> -				 space->def->field_count) != 0)
> +				 space->def->field_count, &fiber()->gc) != 0)
>  		diag_raise();
>  	key_def = key_def_new(part_def, part_count);
>  	if (key_def == NULL)
> diff --git a/src/box/index_def.c b/src/box/index_def.c
> index 2ba57ee9d..58137ed07 100644
> --- a/src/box/index_def.c
> +++ b/src/box/index_def.c
> @@ -31,6 +31,8 @@
>  #include "index_def.h"
>  #include "schema_def.h"
>  #include "identifier.h"
> +#include "tuple_format.h"

Pointless inclusion.

> +#include "json/json.h"
>  
>  const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" };
>  
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index dae3580e2..3012b05df 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -71,19 +75,30 @@ const struct opt_def part_def_reg[] = {
>  		     struct key_part_def, nullable_action, NULL),
>  	OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def,
>  		     sort_order, NULL),
> +	OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path),
>  	OPT_END,
>  };
>  
>  struct key_def *
>  key_def_dup(const struct key_def *src)
>  {
> -	size_t sz = key_def_sizeof(src->part_count);
> -	struct key_def *res = (struct key_def *)malloc(sz);
> +	size_t sz = 0;
> +	for (uint32_t i = 0; i < src->part_count; i++)
> +		sz += src->parts[i].path_len;
> +	sz = key_def_sizeof(src->part_count, sz);
> +	struct key_def *res = (struct key_def *)calloc(1, sz);

No need to zero 'res' here. Use malloc() pls.

>  	if (res == NULL) {
>  		diag_set(OutOfMemory, sz, "malloc", "res");
>  		return NULL;
>  	}
>  	memcpy(res, src, sz);
> +	/* Update paths to point to the new memory chunk.*/
> +	for (uint32_t i = 0; i < src->part_count; i++) {
> +		if (src->parts[i].path == NULL)
> +			continue;
> +		size_t path_offset = src->parts[i].path - (char *)src;
> +		res->parts[i].path = (char *)res + path_offset;
> +	}
>  	return res;
>  }
>  
> @@ -115,24 +138,39 @@ static void
>  key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
>  		 enum field_type type, enum on_conflict_action nullable_action,
>  		 struct coll *coll, uint32_t coll_id,
> -		 enum sort_order sort_order)
> +		 enum sort_order sort_order, const char *path,
> +		 uint32_t path_len, char **paths)

path, paths - easy to mix up. I think we should rename paths to
path_pool or path_data or something like that here and everywhere
where this function is called.

>  {
>  	assert(part_no < def->part_count);
>  	assert(type < field_type_MAX);
>  	def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE);
> +	def->has_json_paths |= path != NULL;
>  	def->parts[part_no].nullable_action = nullable_action;
>  	def->parts[part_no].fieldno = fieldno;
>  	def->parts[part_no].type = type;
>  	def->parts[part_no].coll = coll;
>  	def->parts[part_no].coll_id = coll_id;
>  	def->parts[part_no].sort_order = sort_order;
> +	if (path != NULL) {
> +		assert(paths != NULL);
> +		def->parts[part_no].path = *paths;
> +		*paths += path_len;
> +		memcpy(def->parts[part_no].path, path, path_len);
> +		def->parts[part_no].path_len = path_len;
> +	} else {
> +		def->parts[part_no].path = NULL;
> +		def->parts[part_no].path_len = 0;
> +	}
>  	column_mask_set_fieldno(&def->column_mask, fieldno);
>  }
>  
>  struct key_def *
>  key_def_new(const struct key_part_def *parts, uint32_t part_count)
>  {
> -	size_t sz = key_def_sizeof(part_count);
> +	ssize_t sz = 0;

size_t

> +	for (uint32_t i = 0; i < part_count; i++)
> +		sz += parts[i].path != NULL ? strlen(parts[i].path) : 0;
> +	sz = key_def_sizeof(part_count, sz);
>  	struct key_def *def = calloc(1, sz);
>  	if (def == NULL) {
>  		diag_set(OutOfMemory, sz, "malloc", "struct key_def");
> @@ -274,8 +336,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts,
>  	for (uint32_t i = 0; i < part_count; i++) {
>  		const struct key_part_def *part = &parts[i];
>  		assert(part->type < field_type_MAX);
> -		SNPRINT(total, snprintf, buf, size, "%d, '%s'",
> -			(int)part->fieldno, field_type_strs[part->type]);
> +		if (part->path != NULL) {
> +			SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'",
> +				(int)part->fieldno, field_type_strs[part->type],
> +				 part->path);
> +		} else {
> +			SNPRINT(total, snprintf, buf, size, "%d, '%s'",
> +				(int)part->fieldno,
> +				field_type_strs[part->type]);
> +		}

		SNPRINT(total, snprintf, buf, size, "[%d, '%s'",
			(int)part->fieldno, field_type_strs[part->type]);
		if (part->path != NULL)
			SNPRINT(total, snprintf, buf, size, ", path='%s'",
				part->path);
		SNPRINT(total, snprintf, buf, size, "]");

>  		if (i < part_count - 1)
>  			SNPRINT(total, snprintf, buf, size, ", ");
>  	}
> @@ -485,6 +569,27 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
>  				 "index part: unknown sort order");
>  			return -1;
>  		}
> +		if (part->path != NULL) {
> +			uint32_t path_len = strlen(part->path);
> +			if (path_len > BOX_JSON_PATH_MAX) {
> +				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +					 "JSON path is too long");
> +				return -1;
> +			}

Looking at the definition of BOX_JSON_PATH_MAX, I see that it's 512.
This limitation seems to be way too rigorous. Why do you need it,
anyway? To avoid truncating error messages? I don't think that anybody
will give a damn about that, and in case somebody does, we can simply
increase the size of the static buffer or even make it grow dynamically.
No point in limiting functionality because of that.

> +			int rc = json_path_validate(part->path, path_len,
> +						    TUPLE_INDEX_BASE);
> +			if (rc != 0) {
> +				const char *err_msg =
> +					tt_sprintf("invalid JSON path '%s': "
> +						   "error in path on "
> +						   "position %d", part->path,
> +						   rc);

I don't think we need to print the path as we already print the key part
number so the user can find it by himself. I wouldn't even print the
symbol number here - 'invalid path' would be enough.

> +				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +					 part->fieldno + TUPLE_INDEX_BASE,
> +					 err_msg);
> +				return -1;
> +			}
> +		}
>  	}
>  	return 0;
>  }
> diff --git a/src/box/key_def.h b/src/box/key_def.h
> index d1866303b..c6b7a8c74 100644
> --- a/src/box/key_def.h
> +++ b/src/box/key_def.h
> @@ -82,6 +87,15 @@ struct key_part {
>  	enum on_conflict_action nullable_action;
>  	/** Part sort order. */
>  	enum sort_order sort_order;
> +	/**
> +	 * JSON path to indexed data, relative to the field number,
> +	 * or NULL if this key part indexes a top-level field.
> +	 * This sting is not 0-terminated. Memory is allocated

s/sting/string

> +	 * at the end of key_def chunk.
> +	 */
> +	char *path;
> +	/** The length of JSON path. */
> +	uint32_t path_len;
>  };
>  
>  struct key_def;
> @@ -148,6 +162,8 @@ struct key_def {
>  	uint32_t unique_part_count;
>  	/** True, if at least one part can store NULL. */
>  	bool is_nullable;
> +	/** True, if some key part has JSON path. */

Redundant comma before 'if'.

> +	bool has_json_paths;
>  	/**
>  	 * True, if some key parts can be absent in a tuple. These
>  	 * fields assumed to be MP_NIL.
> @@ -255,9 +272,13 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count);
>  
>  /**
>   * Dump part definitions of the given key def.
> + * Region is required to make allocations for JSON paths when some
> + * path present. JSON path strings are 0-terminated.

  The region is used for allocating JSON paths, if any.

No need to say anything about 0-terminated strings - it implicitly
follows from the key_part_def definition.

> + * Return -1 on memory allocation error, 0 on success.
>   */
> -void
> -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
> +int
> +key_def_dump_parts(const struct key_def *def, struct key_part_def *parts,
> +		   struct region *region);
>  
>  /**
>   * Update 'has_optional_parts' of @a key_def with correspondence
> @@ -303,7 +324,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
>  int
>  key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
>  		     const char **data, const struct field_def *fields,
> -		     uint32_t field_count);
> +		     uint32_t field_count, struct region *region);

What's the region used for? Update the comment please.

>  
>  /**
>   * Returns the part in index_def->parts for the specified fieldno.
> diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
> index ac8b5a44e..c40d7887d 100644
> --- a/src/box/tuple_extract_key.cc
> +++ b/src/box/tuple_extract_key.cc
> @@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i)
>  {
>  	uint32_t fieldno1 = def->parts[i].fieldno + 1;
>  	uint32_t fieldno2 = def->parts[i + 1].fieldno;
> -	return fieldno1 == fieldno2;
> +	return fieldno1 == fieldno2 && def->parts[i].path == NULL &&
> +	       def->parts[i + 1].path == NULL;

This piece of code looks kinda weird now: we use fieldno1 and fieldno2
auxiliary variables, but we still dereference def->parts to check paths.
Let's rewrite it as

	struct key_part *part1 = &def->parts[i];
	struct key_part *part2 = &def->parts[i + 1];
	return part1->fieldno + 1 == part2->fieldno &&
	       part1->path == NULL && part2->path == NULL;

>  }
>  
>  /** True, if a key con contain two or more parts in sequence. */
> @@ -283,6 +285,22 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
>  				current_fieldno++;
>  			}
>  		}
> +		const char *field_last, *field_end_last;
> +		if (part->path != NULL) {
> +			field_last = field;
> +			field_end_last = field_end;
> +			MAYBE_UNUSED int rc =
> +				tuple_field_go_to_path(&field, part->path,
> +						       part->path_len);

I don't like using MAYBE_UNUSED when it could be easily avoided:

			if (tuple_field_go_to_path(&field, part->path,
						   part->path_len) != 0)
				unreachable();

> +			/*
> +			 * All tuples must be valid as all
> +			 * integrity checks has already been
> +			 * passed.

have already passed.

> +			 */
> +			assert(rc == 0);
> +			field_end = field;
> +			mp_next(&field_end);
> +		}
>  		memcpy(key_buf, field, field_end - field);
>  		key_buf += field_end - field;
>  		if (has_optional_parts && null_count != 0) {
> @@ -291,6 +309,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
>  		} else {
>  			assert(key_buf - key <= data_end - data);
>  		}
> +		if (part->path != NULL) {
> +			field = field_last;
> +			field_end = field_end_last;
> +		}

I don't like how you overwrite and then restore field/field_end here.
Let's rewrite the code without it:

		const char *src = field;
		const char *src_end = field_end
		if (part->path != NULL) {
			tuple_field_go_to_path(&src, ...);
			mp_next(&src_end);
		}
		memcpy(key_buf, src, src_end - src);
		key_buf += src_end - src;

>  	}
>  	if (key_size != NULL)
>  		*key_size = (uint32_t)(key_buf - key);
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index e11b4e6f3..c81c23fd1 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -66,12 +67,88 @@ tuple_field_delete(struct tuple_field *field)
>  
>  /** Return path to a tuple field. Used for error reporting. */
>  static const char *
> -tuple_field_path(const struct tuple_field *field)
> +tuple_field_path(const struct tuple_field *field, bool json_only)

This extra argument looks horrible. Please remove it. This function is
used only for error reporting according to the comment so you don't need
to format the output as a valid json path in case of a top-level field.
If somewhere else you need to do that, you should use
json_tree_snprint_path directly there.

>  {
>  	assert(field->token.parent != NULL);
> -	assert(field->token.parent->parent == NULL);
> -	assert(field->token.type == JSON_TOKEN_NUM);
> -	return int2str(field->token.num + TUPLE_INDEX_BASE);
> +	char *path;
> +	if (!json_only && field->token.parent->type == JSON_TOKEN_END) {
> +		assert(field->token.type == JSON_TOKEN_NUM);
> +		path = int2str(field->token.num + TUPLE_INDEX_BASE);
> +	} else {
> +		path = tt_static_buf();
> +		MAYBE_UNUSED int rc =
> +			json_tree_snprint_path(path, TT_STATIC_BUF_LEN,
> +					       &field->token, TUPLE_INDEX_BASE);
> +		assert(rc > 0 && rc < TT_STATIC_BUF_LEN);

We don't really care if an error message gets truncated so these checks
are not needed:

	if (field->token.parent->parent == NULL) {
		/* Top-level field, no need to format the path. */
		return int2str(field->token.num + TUPLE_INDEX_BASE);
	}
	char *path = tt_static_buf();
	json_tree_snprint_path(path, TT_STATIC_BUF_LEN,
			       &field->token, TUPLE_INDEX_BASE);
	return path;

> +	}
> +	return path;
> +}
> +
> +/**
> + * Add corresponding format:fields for specified JSON path.

Please use either double colon :: or -> or . when you denote accessing
a struct member in comments, because single colon : is meaningless in
terms of C/C++.

Anyway, I'd rather rewrite the comment in plain English:

  Given a field number and a path, add the corresponding tuple field
  to the tuple format, allocating intermediate fields if necessary.

> + * Return a pointer to the leaf field on success, NULL on memory
> + * allocation error or type/nullability mistmatch error, diag
> + * message is set.
> + */
> +static struct tuple_field *
> +tuple_field_tree_add_path(struct tuple_format *format, const char *path,

The function name is confusing as there's no such thing as
tuple_field_tree.

Should be called tuple_format_add_field, I guess.

> +			  uint32_t path_len, uint32_t fieldno)

Semantically, fieldno should go before path in the argument list.

> +{
> +	int rc = 0;
> +	struct json_tree *tree = &format->fields;
> +	struct tuple_field *parent = tuple_format_field(format, fieldno);
> +	struct tuple_field *field = tuple_field_new();
> +	if (field == NULL)
> +		goto fail;
> +
> +	struct json_lexer lexer;
> +	uint32_t token_count = 0;
> +	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
> +	while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
> +	       field->token.type != JSON_TOKEN_END) {
> +		enum field_type expected_type =
> +			field->token.type == JSON_TOKEN_STR ?
> +			FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
> +		if (field_type1_contains_type2(parent->type, expected_type)) {
> +			parent->type = expected_type;
> +		} else if (!field_type1_contains_type2(expected_type,
> +						       parent->type)) {

Why the second if()?

> +			diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
> +				 tuple_field_path(parent, false),
> +				 field_type_strs[parent->type],
> +				 field_type_strs[expected_type]);
> +			goto fail;
> +		}
> +		struct tuple_field *next =
> +			json_tree_lookup_entry(tree, &parent->token,
> +					       &field->token,
> +					       struct tuple_field, token);
> +		if (next == NULL) {
> +			rc = json_tree_add(tree, &parent->token, &field->token);
> +			if (rc != 0) {
> +				diag_set(OutOfMemory, sizeof(struct json_token),
> +					 "json_tree_add", "tree");
> +				goto fail;
> +			}
> +			next = field;
> +			field = tuple_field_new();
> +			if (field == NULL)
> +				goto fail;
> +		}
> +		parent = next;
> +		token_count++;
> +	}
> +	/* Path has been verified  key_def_decode_parts. */

by key_def_decode_parts

> +	assert(rc == 0 && field->token.type == JSON_TOKEN_END);
> +	assert(parent != NULL);
> +	/* Update tree depth information. */
> +	format->max_path_tokens = MAX(format->max_path_tokens, token_count + 1);

This is confusing - the variable name implies that it denotes the
maximal number of tokens among JSON paths, but actually it also
includes the top level field. We need to either remove +1 or rename
the variable (tuple_format::fields_depth or something like that).

> +end:
> +	tuple_field_delete(field);
> +	return parent;
> +fail:
> +	parent = NULL;
> +	goto end;
>  }
>  
>  /**
> @@ -95,10 +172,25 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
>  static int
>  tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
>  			  const struct key_part *part, bool is_sequential,
> -			  int *current_slot)
> +			  int *current_slot, char **paths)
>  {
>  	assert(part->fieldno < tuple_format_field_count(format));
> -	struct tuple_field *field = tuple_format_field(format, part->fieldno);
> +	struct tuple_field *field;
> +	if (part->path == NULL) {
> +		field = tuple_format_field(format, part->fieldno);
> +	} else {
> +		assert(!is_sequential);
> +		/**
> +		 * Copy JSON path data to reserved area at the
> +		 * end of format allocation.
> +		 */
> +		memcpy(*paths, part->path, part->path_len);
> +		field = tuple_field_tree_add_path(format, *paths, part->path_len,
> +						  part->fieldno);
> +		if (field == NULL)
> +			return -1;
> +		*paths += part->path_len;
> +	}

I don't like this branching. AFAIU the code for path != NULL would work
even if path == NULL.

>  	/*
>  		* If a field is not present in the space format,
>  		* inherit nullable action of the first key part

While you are at it, please fix indentation here. No need to submit such
a small collateral change separately, do it in this patch please.

> @@ -158,13 +250,93 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count,
>  	 * simply accessible, so we don't store an offset for it.
>  	 */
>  	if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL &&
> -	    is_sequential == false && part->fieldno > 0) {
> +	    is_sequential == false &&
> +	    (part->fieldno > 0 || part->path != NULL)) {
>  		*current_slot = *current_slot - 1;
>  		field->offset_slot = *current_slot;
>  	}
>  	return 0;
>  }
>  
> +/**
> + * Get format:field parent field_type.
> + * This routine is required as first-level fields has no parent
> + * field so it could not be retrieved with json_tree_entry.
> + */
> +static enum field_type
> +tuple_format_field_parent_type(struct tuple_format *format,
> +			       struct tuple_field *field)

I don't like this auxiliary function, because its applicability is
quite limited IMO. Since it's used in the only place, why don't you
simply inline it there?

> +{
> +	struct json_token *parent = field->token.parent;
> +	if (parent == &format->fields.root)
> +		return FIELD_TYPE_ARRAY;
> +	return json_tree_entry(parent, struct tuple_field, token)->type;
> +}
> +
> +uint32_t
> +tuple_format_stmt_encode(struct tuple_format *format, char **offset,
> +			 char *tuple_raw, uint32_t *field_map,
> +			 struct iovec *iov)

You either pass all NULLs to this function so as to estimate the
resulting msgpack size or don't use the return value at all. This
doesn't look good. Semantically, there are two functions. Please
split.

The part that encodes a tuple should stay in vy_stmt.c IMO. In fact,
I don't think there's any point in factoring this code out of
vy_stmt_new_surrogate_from_key(), because without the rest of it it's
quite difficult to understand why it exists.

The part that calculates the size shouldn't have stmt in its name,
because what it actually does is calculate the size of a tuple matching
the format and filled with nulls.

> +{
> +	bool write = offset != NULL;
> +	uint32_t size = 0;
> +	struct tuple_field *field;
> +	json_tree_foreach_entry_preorder(field, &format->fields.root,
> +					 struct tuple_field, token) {
> +		enum field_type parent_type =
> +			tuple_format_field_parent_type(format, field);
> +		if (parent_type == FIELD_TYPE_ARRAY &&

Do you really need to know parent_type here? I think that instead you
could simply check field->token.type: NUM means array, STR means map.

> +		    field->token.sibling_idx > 0) {
> +			/*
> +			 * Write nil istead of omitted array
> +			 * members.
> +			 */
> +			struct json_token **neighbors =
> +				field->token.parent->children;
> +			for (uint32_t i = field->token.sibling_idx - 1;

I'd use token.num here instead if possible - would be consistent with
the fact that you use token.str below.

> +			     neighbors[i] == NULL && i > 0; i--) {
> +				if (write)
> +					*offset = mp_encode_nil(*offset);
> +				size += mp_sizeof_nil();
> +			}

Could be rewritten without the extra check for (sibling_idx > 0) above,
making the code a little bit more straightforward:

		if (field->token.type == JSON_TOKEN_NUM) {
			/* Fill missing array slots with nils. */
			for (int i = field->token.num - 1;
			     i > 0 && neighbors[i] == NULL; i--) {
				...
			}
		}

> +		} else if (parent_type == FIELD_TYPE_MAP) {
> +			/* Write map key string. */
> +			const char *str = field->token.str;
> +			uint32_t len = field->token.len;
> +			if (write)
> +				*offset = mp_encode_str(*offset, str, len);
> +			size += mp_sizeof_str(len);
> +		}
> +		/* Fill data. */
> +		uint32_t children_cnt = field->token.max_child_idx + 1;

It's not child count - the number of children can be less. Let's please
avoid introducing this extra variable and use max_child_idx+1 directly -
if you split this function the code will look even better without it.

> +		if (json_token_is_leaf(&field->token)) {
> +			if (!write || iov[field->id].iov_len == 0) {
> +				if (write)
> +					*offset = mp_encode_nil(*offset);
> +				size += mp_sizeof_nil();
> +			} else {
> +				memcpy(*offset, iov[field->id].iov_base,
> +				       iov[field->id].iov_len);
> +				uint32_t data_offset = *offset - tuple_raw;
> +				int32_t slot = field->offset_slot;
> +				if (slot != TUPLE_OFFSET_SLOT_NIL)
> +					field_map[slot] = data_offset;
> +				*offset += iov[field->id].iov_len;
> +				size += iov[field->id].iov_len;
> +			}
> +		} else if (field->type == FIELD_TYPE_ARRAY) {
> +			if (write)
> +				*offset = mp_encode_array(*offset, children_cnt);
> +			size += mp_sizeof_array(children_cnt);
> +		} else if (field->type == FIELD_TYPE_MAP) {
> +			if (write)
> +				*offset = mp_encode_map(*offset, children_cnt);
> +			size += mp_sizeof_map(children_cnt);
> +		}
> +	}
> +	return size;
> +}
> +
>  /**
>   * Extract all available type info from keys and field
>   * definitions.
> @@ -236,9 +414,12 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>  			 "malloc", "required field bitmap");
>  		return -1;
>  	}
> +	uint32_t id = 0;
>  	struct tuple_field *field;
>  	json_tree_foreach_entry_preorder(field, &format->fields.root,
>  					 struct tuple_field, token) {
> +		/* Set the unique field identifier. */
> +		field->id = id++;

This should be done in tuple_format_add_field. You can post-inc
total_field_count there to allocate a new id.

And as I've already told you, it's no good moving pieces of code around
in patches of the same series. If you intended to allocate an id here in
the first place you should have added this code in the patch that
introduced field->id. Now, you should follow the pattern you laid in
that patch and allocate field->id when a field is added.

>  		/*
>  		 * Mark all leaf non-nullable fields as required
>  		 * by setting the corresponding bit in the bitmap
> @@ -317,6 +502,8 @@ static struct tuple_format *
>  tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  		   uint32_t space_field_count, struct tuple_dictionary *dict)
>  {
> +	/* Size of area to store paths. */
> +	uint32_t paths_size = 0;

Just like I don't like 'paths', because it can be confused with 'path',
I don't like paths_size. Can't come up with a better name though now.
Think of the name please and give me some options.

>  	uint32_t index_field_count = 0;
>  	/* find max max field no */
>  	for (uint16_t key_no = 0; key_no < key_count; ++key_no) {
> @@ -368,6 +556,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  	}
>  	format->total_field_count = field_count;
>  	format->required_fields = NULL;
> +	format->max_path_tokens = 1;
> +	format->vy_stmt_size = UINT32_MAX;

Why? Should be 0, I think.

>  	format->refs = 0;
>  	format->id = FORMAT_ID_NIL;
>  	format->index_field_count = index_field_count;
> @@ -428,15 +618,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
>  {
>  	if (format1->exact_field_count != format2->exact_field_count)
>  		return false;
> -	uint32_t format1_field_count = tuple_format_field_count(format1);
> -	uint32_t format2_field_count = tuple_format_field_count(format2);
> -	for (uint32_t i = 0; i < format1_field_count; ++i) {
> -		struct tuple_field *field1 = tuple_format_field(format1, i);
> +	struct tuple_field *field1;
> +	json_tree_foreach_entry_preorder(field1, &format1->fields.root,
> +					 struct tuple_field, token) {
> +next:;

Please, don't use :; - it looks weird.

> +		const char *path = tuple_field_path(field1, true);
> +		struct tuple_field *field2 =
> +			json_tree_lookup_path_entry(&format2->fields,
> +						    &format2->fields.root,
> +						    path, strlen(path),
> +						    TUPLE_INDEX_BASE,
> +						    struct tuple_field, token);

Use json_tree_snprint_path directly here. Allocating a buffer on the
region for it is OK. Panicking on memory allocation error is OK as well.

>  		/*
>  		 * The field has a data type in format1, but has
>  		 * no data type in format2.
>  		 */
> -		if (i >= format2_field_count) {
> +		if (field2 == NULL) {
>  			/*
>  			 * The field can get a name added
>  			 * for it, and this doesn't require a data
> @@ -447,12 +644,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
>  			 * NULLs or miss the subject field.
>  			 */
>  			if (field1->type == FIELD_TYPE_ANY &&
> -			    tuple_field_is_nullable(field1))
> -				continue;
> -			else
> +			    tuple_field_is_nullable(field1)) {
> +				/* Skip subtree. */
> +				struct json_token *token = &field1->token;
> +				struct json_token *parent = token->parent;
> +				field1 = json_tree_child_next_entry(parent,

Skipping some entries while iterating over a tree looks precarious.
It feels like it may break the iterator assumption, even if it actually
doesn't.

I don't think it's worth it - AFAIU you could simply continue the loop
instead. That would be suboptimal, but then again this is a cold path.

> +								    token,
> +								    struct
> +								    tuple_field,
> +								    token);

BTW there's no strong requirement to align function arguments by
parenthesis. You could write

				field1 = json_tree_child_next_entry(parent,
					token, struct tuple_field, token);

and that would be OK.

> +				if (field1 == NULL)
> +					break;
> +				goto next;
> +			} else {
>  				return false;
> +			}
>  		}
> -		struct tuple_field *field2 = tuple_format_field(format2, i);
>  		if (! field_type1_contains_type2(field1->type, field2->type))
>  			return false;
>  		/*
> @@ -466,6 +673,90 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
>  	return true;
>  }
>  
> +/**
> + * Descriptor of the parsed msgpack frame.
> + * Due to the fact that the msgpack has nested structures whose
> + * length is stored in the frame header at the blob beginning, we

What's blob?

> + * need to be able to determine that we have finished parsing the

s/that/if

> + * current component and should move on to the next one.
> + * For this purpose a stack of disassembled levels is organized,

Disassembled levels?

> + * where the type of the level, the total number of elements,
> + * and the number of elements that have already been parsed are
> + * stored.
> + */
> +struct mp_frame {
> +	/** JSON token type representing frame data structure. */
> +	enum json_token_type child_type;
> +	/** Total count of MP members to process. */
> +	uint32_t total;
> +	/** Count of MP elements that already have parseed. */
> +	uint32_t curr;
> +};

It feels like this thing should be independent of json/tuple stuff and
be a part of the msgpuck library, because what it actually does is
decode mp data providing the caller with array index / map key of each
decoded entry. Please think whether we can make it so.

> +
> +/**
> + * Emit token to analyze and do msgpack pointer shift using top
> + * mp_stack frame. Return 0 on success, -1 when analyse step must
> + * be skipped (on usuported term detection).
> + */
> +static int
> +mp_frame_parse(struct mp_frame *mp_stack, uint32_t mp_stack_idx,
> +	       const char **pos, struct json_token *token)
> +{
> +	token->type = mp_stack[mp_stack_idx].child_type;
> +	++mp_stack[mp_stack_idx].curr;
> +	if (token->type == JSON_TOKEN_NUM) {
> +		token->num = mp_stack[mp_stack_idx].curr - TUPLE_INDEX_BASE;
> +	} else if (token->type == JSON_TOKEN_STR) {
> +		if (mp_typeof(**pos) != MP_STR) {
> +			/* Skip key. */
> +			mp_next(pos);
> +			return -1;
> +		}
> +		token->str = mp_decode_str(pos, (uint32_t *)&token->len);
> +	} else {
> +		unreachable();
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Prepare mp_frame for futher iterations. Store container length
> + * and child_type. Update parent token pointer and shift msgpack
> + * pointer.
> + */
> +static int
> +mp_frame_prepare(struct mp_frame *mp_stack, uint32_t *mp_stack_idx,
> +		 uint32_t mp_stack_total, struct json_token *token,
> +		 const char **pos, struct json_token **parent)

I don't quite like the names, nor the API. Please put more thoughts in
it in the scope of moving this thing to the msgpuck library. Gimme some
options, please.

> +{
> +	enum mp_type type = mp_typeof(**pos);
> +	if (token != NULL && *mp_stack_idx + 1 < mp_stack_total &&
> +	    (type == MP_MAP || type == MP_ARRAY)) {
> +		uint32_t size = type == MP_ARRAY ? mp_decode_array(pos) :
> +				mp_decode_map(pos);
> +		if (size == 0)
> +			return 0;
> +		*parent = token;
> +		enum json_token_type child_type =
> +			type == MP_ARRAY ? JSON_TOKEN_NUM : JSON_TOKEN_STR;
> +		*mp_stack_idx = *mp_stack_idx + 1;
> +		mp_stack[*mp_stack_idx].child_type = child_type;
> +		mp_stack[*mp_stack_idx].total = size;
> +		mp_stack[*mp_stack_idx].curr = 0;
> +	} else {
> +		mp_next(pos);
> +		while (mp_stack[*mp_stack_idx].curr >=
> +			mp_stack[*mp_stack_idx].total) {

Bad indentation.

> +			assert(*parent != NULL);
> +			*parent = (*parent)->parent;
> +			if (*mp_stack_idx == 0)
> +				return -1;
> +			*mp_stack_idx = *mp_stack_idx - 1;
> +		}
> +	}
> +	return 0;
> +}
> +
>  /** @sa declaration for details. */
>  int
>  tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
> @@ -512,49 +803,64 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
>  		/* Empty tuple, nothing to do. */
>  		goto skip;
>  	}
> -	/* first field is simply accessible, so we do not store offset to it */
> -	struct tuple_field *field = tuple_format_field(format, 0);
> -	if (validate &&
> -	    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> -					 tuple_field_is_nullable(field))) {
> -		diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field),
> -			 field_type_strs[field->type]);
> -		goto error;
> -	}
> -	if (required_fields != NULL)
> -		bit_clear(required_fields, field->id);
> -	mp_next(&pos);
> -	/* other fields...*/
> -	uint32_t i = 1;
>  	uint32_t defined_field_count = MIN(field_count, validate ?
>  					   tuple_format_field_count(format) :
>  					   format->index_field_count);

I think that decoding the top level should be done with the same mp
helpers that are used for decoding nested levels. Please think whether
we can achieve that.

> -	if (field_count < format->index_field_count) {
> -		/*
> -		 * Nullify field map to be able to detect by 0,
> -		 * which key fields are absent in tuple_field().
> -		 */
> -		memset((char *)field_map - format->field_map_size, 0,
> -		       format->field_map_size);
> +	/*
> +	 * Nullify field map to be able to detect by 0,
> +	 * which key fields are absent in tuple_field().
> +	 */
> +	memset((char *)field_map - format->field_map_size, 0,
> +		format->field_map_size);
> +	uint32_t mp_stack_size =
> +		format->max_path_tokens * sizeof(struct mp_frame);
> +	struct mp_frame *mp_stack = region_alloc(region, mp_stack_size);
> +	if (mp_stack == NULL) {
> +		diag_set(OutOfMemory, mp_stack_size, "region_alloc",
> +			 "mp_stack");
> +		goto error;
>  	}
> -	for (; i < defined_field_count; ++i) {
> -		field = tuple_format_field(format, i);
> -		if (validate &&
> -		    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> -						 tuple_field_is_nullable(field))) {
> -			diag_set(ClientError, ER_FIELD_TYPE,
> -				 tuple_field_path(field),
> -				 field_type_strs[field->type]);
> -			goto error;
> +	struct tuple_field *field;
> +	mp_stack[0].child_type = JSON_TOKEN_NUM;
> +	mp_stack[0].total = defined_field_count;
> +	mp_stack[0].curr = 0;
> +	uint32_t mp_stack_idx = 0;
> +	struct json_tree *tree = (struct json_tree *)&format->fields;

As I've already asked you, please remove all no-op type conversions left
from the time when tuple_format was const.

> +	struct json_token *parent = &tree->root;
> +	while (mp_stack[0].curr <= mp_stack[0].total) {
> +		struct json_token token;
> +		if (mp_frame_parse(mp_stack, mp_stack_idx, &pos, &token) != 0) {
> +			/* Unsupported token. */
> +			goto finish_frame;
>  		}
> -		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> -			field_map[field->offset_slot] =
> -				(uint32_t) (pos - tuple);

Pointless type conversion?

> +		field = json_tree_lookup_entry(tree, parent, &token,
> +					       struct tuple_field, token);
> +		if (field != NULL) {
> +			bool is_nullable = tuple_field_is_nullable(field);
> +			if (validate &&
> +			    !field_mp_type_is_compatible(field->type,
> +							 mp_typeof(*pos),
> +							 is_nullable) != 0) {
> +				diag_set(ClientError, ER_FIELD_TYPE,
> +					 tuple_field_path(field, false),
> +					 field_type_strs[field->type]);
> +				goto error;
> +			}
> +			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> +				field_map[field->offset_slot] =
> +					(uint32_t)(pos - tuple);

Ditto.

> +			}
> +			if (required_fields != NULL)
> +				bit_clear(required_fields, field->id);
>  		}
> -		if (required_fields != NULL)
> -			bit_clear(required_fields, field->id);
> -		mp_next(&pos);
> -	}
> +finish_frame:
> +		/* Prepare stack info for next iteration. */
> +		if (mp_frame_prepare(mp_stack, &mp_stack_idx,
> +				     format->max_path_tokens,
> +				     field != NULL ? &field->token : NULL,
> +				     &pos, &parent) != 0)
> +			break;
> +	};
>  skip:
>  	/*
>  	 * Check the required field bitmap for missing fields.
> @@ -820,3 +1118,30 @@ error:
>  		 tt_sprintf("error in path on position %d", rc));
>  	return -1;
>  }
> +
> +int
> +tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data,
> +				 const uint32_t *field_map,
> +				 struct key_part *part, const char **raw)

You never use the return value so why don't you return void * instead,
like tuple_field_raw() does?

> +{
> +	assert(part->path != NULL);
> +	struct tuple_field *field =
> +		tuple_format_field_by_path(format, part->fieldno, part->path,
> +					   part->path_len);
> +	if (field != NULL) {
> +		int32_t offset_slot = field->offset_slot;
> +		assert(-offset_slot * sizeof(uint32_t) <=
> +		       format->field_map_size);
> +		*raw = field_map[offset_slot] == 0 ?
> +		       NULL : data + field_map[offset_slot];
> +		return 0;
> +	}
> +	/*
> +	 * Format doesn't have field representing specified part.
> +	 * Make slow tuple parsing.
> +	 */
> +	*raw = tuple_field_raw(format, data, field_map, part->fieldno);
> +	if (*raw == NULL)
> +		return 0;
> +	return tuple_field_go_to_path(raw, part->path, part->path_len);
> +}
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index 30b93b610..3b630c3bb 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -185,6 +186,15 @@ struct tuple_format {
>  	 * Shared names storage used by all formats of a space.
>  	 */
>  	struct tuple_dictionary *dict;
> +	/**
> +	 * A maximum depth of format:fields subtree.
> +	 */
> +	uint32_t max_path_tokens;
> +	/**
> +	 * The size of the secondary key built for format:fields
> +	 * with all leaf records set to nil.
> +	 */
> +	uint32_t vy_stmt_size;

Bad name - it shouldn't refer to vinyl or secondary indexes. It's just
the size of a minimal tuple conforming to the format and filled with
nils. Please think of a better name.

>  	/**
>  	 * Fields comprising the format, organized in a tree.
>  	 * First level nodes correspond to tuple fields.
> @@ -451,7 +510,16 @@ static inline const char *
>  tuple_field_by_part_raw(struct tuple_format *format, const char *data,
>  			const uint32_t *field_map, struct key_part *part)
>  {
> -	return tuple_field_raw(format, data, field_map, part->fieldno);
> +	if (likely(part->path == NULL)) {
> +		return tuple_field_raw(format, data, field_map, part->fieldno);
> +	} else {
> +		const char *raw;
> +		MAYBE_UNUSED int rc =
> +			tuple_field_by_part_raw_slowpath(format, data,
> +							 field_map, part, &raw);

So now we have

  - tuple_field_raw(fieldno) - which looks up tuple_field in
    tuple_format by fieldno and uses field map for a quick lookup;
    if not found decodes msgpack.

  - tuple_field_raw_by_path(path) - which looks up a field by full path
    (including fieldno or field name). Doesn't use field map.

  - tuple_field_by_part_raw(part) - which is equivalent to
    tuple_field_raw(part->fieldno) if part->path is NULL, otherwise it
    looks up tuple_field in tuple_format by fieldno and path. If found,
    it uses field map for a quick lookup, otherwise it decodes msgpack
    with the aid of tuple_field_go_to_path().

In the following patches you make tuple_field_raw_by_path() use
tuple_field_by_part_raw() passing a fake key_part, which looks kinda
ugly.

I expect tuple_field_by_part() to use tuple_field_by_path(), not vice
versa. Please refactor the code. May be, you'll need a separate patch.

> +		assert(rc == 0);
> +		return raw;
> +	}
>  }
>  
>  #if defined(__cplusplus)
> diff --git a/src/box/vy_log.c b/src/box/vy_log.c
> index c9e0713c8..6fc051648 100644
> --- a/src/box/vy_log.c
> +++ b/src/box/vy_log.c
> @@ -581,9 +581,11 @@ vy_log_record_decode(struct vy_log_record *record,
>  			record->group_id = mp_decode_uint(&pos);
>  			break;
>  		case VY_LOG_KEY_DEF: {
> +			struct region *region = &fiber()->gc;
>  			uint32_t part_count = mp_decode_array(&pos);
> -			struct key_part_def *parts = region_alloc(&fiber()->gc,
> -						sizeof(*parts) * part_count);
> +			struct key_part_def *parts =
> +				region_alloc(region,
> +					     sizeof(*parts) * part_count);

TBO I hate when one carries the rvalue to another line in an assignment,
like this

		int very_long_name =
			very_long_initializer;

Looks ugly and complicates grepping.

In this particular case there's no point to do that, but you seem to be
quite obsessed with aligning function arguments by the parenthesis for
some reason :-)

>  			if (parts == NULL) {
>  				diag_set(OutOfMemory,
>  					 sizeof(*parts) * part_count,
> diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
> index ddbc2d46f..14e0c0c93 100644
> --- a/src/box/vy_point_lookup.c
> +++ b/src/box/vy_point_lookup.c
> @@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
>  		const struct vy_read_view **rv,
>  		struct tuple *key, struct tuple **ret)
>  {
> -	assert(tuple_field_count(key) >= lsm->cmp_def->part_count);
> -

Quoting myself from the previous review round:

} I don't like that you simply throw away this assertion. Please think how
} we can keep it.

( see https://www.freelists.org/post/tarantool-patches/PATCH-v5-59-box-introduce-JSON-indexes,1 )

You haven't answered me back then, but in the new version you're
still trying to remove this assertion...

>  	*ret = NULL;
>  	double start_time = ev_monotonic_now(loop());
>  	int rc = 0;
> diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
> index 47f135c65..7a302e6f3 100644
> --- a/src/box/vy_stmt.c
> +++ b/src/box/vy_stmt.c
> @@ -385,26 +385,43 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type,
>  	struct region *region = &fiber()->gc;
>  
>  	uint32_t field_count = format->index_field_count;
> -	struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count);
> +	uint32_t iov_sz =
> +		sizeof(struct iovec) * format->total_field_count;

Why carry it to the next line? It fits.

> +	struct iovec *iov = region_alloc(region, iov_sz);
>  	if (iov == NULL) {
> -		diag_set(OutOfMemory, sizeof(*iov) * field_count,
> -			 "region", "iov for surrogate key");
> +		diag_set(OutOfMemory, iov_sz, "region_alloc",
> +			 "iov for surrogate key");

We usually pass "region", not "region_alloc" to OutOfMemory.
Why change it here?

>  		return NULL;
>  	}
> -	memset(iov, 0, sizeof(*iov) * field_count);
> +	memset(iov, 0, iov_sz);
>  	uint32_t part_count = mp_decode_array(&key);
>  	assert(part_count == cmp_def->part_count);
> -	assert(part_count <= field_count);
> -	uint32_t nulls_count = field_count - cmp_def->part_count;
> +	assert(part_count <= format->total_field_count);
> +	/**
> +	 * The format:vy_stmt_size contains a size of

the size

> +	 * stmt tuple having all leaf fields set to null.
> +	 * Calculate bsize as vy_stmt_size where parts_count
> +	 * nulls replaced with extracted keys.
> +	 */
>  	uint32_t bsize = mp_sizeof_array(field_count) +

Isn't mp_sizeof_array(field_count) included into vy_stmt_size?
It should be.

> -			 mp_sizeof_nil() * nulls_count;
> +			 format->vy_stmt_size - mp_sizeof_nil() * part_count;
>  	for (uint32_t i = 0; i < part_count; ++i) {
>  		const struct key_part *part = &cmp_def->parts[i];
>  		assert(part->fieldno < field_count);
> +		struct tuple_field *field;
> +		if (part->path != NULL) {
> +			field = tuple_format_field_by_path(format,
> +							   part->fieldno,
> +							   part->path,
> +							   part->path_len);
> +		} else {
> +			field = tuple_format_field(format, part->fieldno);
> +		}

tuple_format_field_by_part() helper would be appreciated.

> +		assert(field != NULL);
>  		const char *svp = key;
> -		iov[part->fieldno].iov_base = (char *) key;
> +		iov[field->id].iov_base = (char *) key;
>  		mp_next(&key);
> -		iov[part->fieldno].iov_len = key - svp;
> +		iov[field->id].iov_len = key - svp;
>  		bsize += key - svp;
>  	}
>  
> diff --git a/test/engine/json.result b/test/engine/json.result
> new file mode 100644
> index 000000000..711f7f256
> --- /dev/null
> +++ b/test/engine/json.result
> @@ -0,0 +1,448 @@
> +test_run = require('test_run').new()
> +---
> +...
> +engine = test_run:get_cfg('engine')
> +---
> +...
> +--
> +-- gh-1012: Indexes for JSON-defined paths.
> +--
> +s = box.schema.space.create('withdata', {engine = engine})
> +---
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}})
> +---
> +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key
> +    part is indexed twice'
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}})
> +---
> +- error: 'Wrong index options (field 2): ''path'' must be string'
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}})
> +---
> +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
> +    ''map'' is not supported'
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}})
> +---
> +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type
> +    ''array'' is not supported'
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = 'FIO.fname'}}})
> +---
> +- error: Field [3]["FIO"] has type 'string' in one index, but type 'map' in another
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}})
> +---
> +- error: Field 3 has type 'array' in one index, but type 'map' in another
> +...
> +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}})
> +---
> +- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': error
> +    in path on position 5'
> +...
> +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}})
> +---
> +...
> +assert(idx ~= nil)

No point in using assert() in a test.

I'll look at the test later, when you fix the code according to my
comments.