[PATCH v3 7/7] box: introduce multikey indexes in memtx

Thu Apr 4 17:20:28 MSK 2019

On Tue, Apr 02, 2019 at 06:49:38PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 3f8cb8e0e..ea992a6b2 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -246,6 +246,7 @@ struct errcode_record {
>  	/*191 */_(ER_SQL_PARSER_LIMIT,		"%s %d exceeds the limit (%d)") \
>  	/*192 */_(ER_INDEX_DEF_UNSUPPORTED,	"%s are prohibited in an index definition") \
>  	/*193 */_(ER_CK_DEF_UNSUPPORTED,	"%s are prohibited in a CHECK constraint definition") \
> +	/*194 */_(ER_INDEX_MULTIKEY_INVALID,	"Multikey index is invalid: %s") \

Other similar errors take a path to a field as a separate argument, e.g.
ER_INDEX_PART_TYPE_MISMATCH. I think this one should work the same way.

>  
>  /*
>   * !IMPORTANT! Please follow instructions at start of the file
> diff --git a/src/box/field_map.c b/src/box/field_map.c
> index 690aa461d..ae6c5be5f 100644
> --- a/src/box/field_map.c
> +++ b/src/box/field_map.c
> @@ -37,6 +37,8 @@ field_map_builder_create(struct field_map_builder *builder,
>  			 uint32_t minimal_field_map_size,
>  			 struct region *region)
>  {
> +	builder->region = region;

Region shouldn't be a property of a builder. Pass it in function
arguments where appropriate instead.

> +	builder->extents_size = 0;
>  	builder->slot_count = minimal_field_map_size / sizeof(uint32_t);
>  	if (minimal_field_map_size == 0) {
>  		builder->slots = NULL;
> @@ -53,9 +55,71 @@ field_map_builder_create(struct field_map_builder *builder,
>  	return 0;
>  }
>  
> +/**
> + * Get size of extention (in bytes) by count of items it
> + * must contain.
> + */
> +static uint32_t
> +field_map_ext_size(uint32_t items)

IMO helpers like this just obfuscate the code.

> +{
> +	return sizeof(struct field_map_ext) + items * sizeof(uint32_t);
> +}
> +
> +struct field_map_ext *
> +field_map_builder_ext_get(struct field_map_builder *builder,
> +			  int32_t offset_slot, uint32_t extent_items)
> +{
> +	struct field_map_ext *extent;
> +	if (builder->slots[offset_slot].has_extent) {
> +		extent = builder->slots[offset_slot].extent;
> +		assert(extent != NULL);
> +		assert(extent->items == extent_items);
> +		return extent;
> +	}
> +	uint32_t sz = field_map_ext_size(extent_items);
> +	extent = region_alloc(builder->region, sz);
> +	if (extent == NULL) {
> +		diag_set(OutOfMemory, sz, "region", "extent");
> +		return NULL;
> +	}
> +	memset(extent, 0, sz);
> +	extent->items = extent_items;
> +	builder->slots[offset_slot].extent = extent;
> +	builder->slots[offset_slot].has_extent = true;
> +	builder->extents_size += sz;
> +	return extent;
> +}
> +
>  void
>  field_map_build(struct field_map_builder *builder, char *buffer)
>  {
> -	uint32_t field_map_size = field_map_build_size(builder);
> -	memcpy(buffer, (char *)builder->slots - field_map_size, field_map_size);
> +	/*
> +	 * To initialize the field map and its extents, prepare
> +	 * the following memory layout with pointers:
> +	 *
> +	 *                      offset
> +	 *            +---------------------+
> +	 *            |                     |
> +	 * [extentK]..[extent1][[slotN]..[slot2][slot1]]
> +	 *            |        |extent_wptr            |field_map
> +	 *            <-       <-                     <-
> +	 *

Why allocate extents starting from the end? There's no requirement for
this while IMO this makes the code more complicated. Or am I missing
something?

> +	 * The buffer size is assumed to be sufficient to write
> +	 * field_map_build_size(builder) bytes there.
> +	 */
> +	uint32_t *field_map =
> +		(uint32_t *)(buffer + field_map_build_size(builder));
> +	char *extent_wptr = buffer + builder->extents_size;
> +	for (int32_t i = -1; i >= -(int32_t)builder->slot_count; i--) {
> +		if (!builder->slots[i].has_extent) {
> +			field_map[i] = builder->slots[i].offset;
> +			continue;
> +		}
> +		struct field_map_ext *extent = builder->slots[i].extent;
> +		/** Retrive memory for the extent. */
> +		uint32_t sz = field_map_ext_size(extent->items);
> +		extent_wptr -= sz;
> +		field_map[i] = (char *)field_map - (char *)extent_wptr;
> +		memcpy(extent_wptr, extent, sz);

Please add an assertion checking the extents size.

> +	}
>  }
> diff --git a/src/box/field_map.h b/src/box/field_map.h
> index 47ecbd009..af578ec6b 100644
> --- a/src/box/field_map.h
> +++ b/src/box/field_map.h
> @@ -34,6 +34,7 @@
>  #include <stdint.h>
>  
>  struct region;
> +struct field_map_builder_slot;
>  
>  /**
>   * A field map is a special area is reserved before tuple's
> @@ -46,13 +47,15 @@ struct region;
>   * offset_slot(s) is performed on tuple_format creation on index
>   * create or alter (see tuple_format_create()).
>   *
> - *              4b          4b       MessagePack data.
> - *           +------+----+------+---------------------------+
> - *    tuple: | offN | .. | off1 | header ..|key1|..|keyN|.. |
> - *           +--+---+----+--+---+---------------------------+
> - *           |     ...    |                 ^       ^
> - *           |            +-----------------+       |
> - *           +--------------------------------------+
> + *        4b   4b      4b          4b       MessagePack data.
> + *       +-----------+------+----+------+------------------------+
> + *tuple: |cnt|off1|..| offN | .. | off1 | header ..|key1|..|keyN||
> + *       +-----+-----+--+---+----+--+---+------------------------+
> + * ext1  ^     |        |   ...     |                 ^       ^
> + *       +-----|--------+           |                 |       |
> + * indirection |                    +-----------------+       |
> + *             +----------------------------------------------+
> + *             (offset_slot = N, extent_slot = 1) --> offset
>   *
>   * This field_map_builder class is used for tuple field_map
>   * construction. It encapsulates field_map build logic and size
> @@ -60,19 +63,88 @@ struct region;
>   *
>   * Each field offset is a positive number, except the case when
>   * a field is not in the tuple. In this case offset is 0.
> + *
> + * Some slots may store an offset of the field_map_ext structure,

"Some slots may store" - sounds ambiguous. Let's rephrase the comment to
emphasize that it's only used for multikey indexes?

> + * which contains an additional sequence of offsets of size
> + * defined above(see field_map_ext layout). The caller needs to
> + * be aware of when the slot is an offset of the data and when
> + * it is the offset of the field_map_ext.

I don't like that it's impossible to say which slots are multikey and
which are not by looking at a field map. It may result in funny memory
corruption bugs. I think we should mark multikey slots somehow and add
appropriate assertions to field_map_get_offset. E.g. write negative
offsets there. Would it make sense or I'm over-engineering?

> + *
> + * Now these extents are used to organize a multikey index.
> + * The count of keys in the multikey index imposes the count of

"imposes"?

> + * items in extent while the i-th extent's slot contains the
> + * offset of the i-th key field.
>   */
>  struct field_map_builder {
>  	/**
> -	 * The pointer to the end of filed_map allocation.

Typo: filed -> field. Please fix the previous commit.

> +	 * The pointer to the end of field_map_builder_slot(s)
> +	 * allocation.

Why change this comment at all? It's still correct.

>  	 * Its elements are accessible by negative indexes
>  	 * that coinciding with offset_slot(s).
>  	 */
> -	uint32_t *slots;
> +	struct field_map_builder_slot *slots;
>  	/**
>  	 * The count of slots in field_map_builder::slots
>  	 * allocation.
>  	 */
>  	uint32_t slot_count;
> +	/**
> +	 * Total size of memory is allocated for field_map
> +	 * extentions.

There's no word "extention".

Let's use "extent" everywhere, including field_map_ext.

> +	 */
> +	uint32_t extents_size;
> +	/**
> +	 * Region to use to perform memory allocations.
> +	 */
> +	struct region *region;
> +};
> +
> +/**
> + * Internal stucture representing field_map extent.
> + * (see field_map_builder description).
> + */
> +struct field_map_ext {
> +	/** Count of field_map_ext::offset[] elements. */
> +	uint32_t items;
> +	/** Data offset in tuple array. */
> +	uint32_t offset[0];
> +};
> +
> +/**
> + * Internal function to get or allocate field map extent
> + * by offset_slot, and count of items.
> + */
> +struct field_map_ext *
> +field_map_builder_ext_get(struct field_map_builder *builder,
> +			  int32_t offset_slot, uint32_t extent_items);
> +
> +/**
> + * Instead of using uint32_t offset slots directly the
> + * field_map_builder uses this structure as a storage atom.
> + * When there is a need to initialize an extent, the
> + * field_map_builder allocates a new memory chunk and sets
> + * field_map_builder_slot::pointer (instead of real field_map
> + * reallocation).
> + *
> + * On field_map_build, all of the extents are dumped to the same
> + * memory chunk that the regular field_map slots and corresponding
> + * slots represent relative field_map_ext offset instead of
> + * field data offset.
> + *
> + * The allocated memory is accounted for in extents_size.
> + */
> +struct field_map_builder_slot {
> +	/**
> +	 * True when this slot must be interpret as
> +	 * extention pointer.
> +	 */
> +	bool has_extent;
> +	union {
> +		/** Data offset in tuple. */
> +		uint32_t offset;
> +		/** Pointer to field_map_ext extention. */
> +		struct field_map_ext *extent;
> +	};
>  };
>  
>  /**
> @@ -82,9 +154,22 @@ struct field_map_builder {
>   * When a field is not in the data tuple, its offset is 0.
>   */
>  static inline uint32_t
> -field_map_get_offset(const uint32_t *field_map, int32_t offset_slot)
> +field_map_get_offset(const uint32_t *field_map, int32_t offset_slot,
> +		     int multikey_idx)
>  {
> -	return field_map[offset_slot];
> +	uint32_t offset;
> +	if (multikey_idx >= 0) {
> +		assert(field_map[offset_slot] != 0);

What if a multikey field is absent (e.g. nullable)? Can it actually
happen?

> +		struct field_map_ext *extent =
> +			(struct field_map_ext *)((char *)field_map -
> +						 field_map[offset_slot]);
> +		if ((uint32_t)multikey_idx >= extent->items)
> +			return 0;

I'm not sure that using field_map_ext is a good idea. See, we don't have
a separate data structure for field map, still we have one for an
extent. What is more confusing, we use the same data structure both in
builder and here. Since this code wouldn't look more complex without
field_map_ext, I'd rename field_map_ext to field_map_builder_slot_extent
and use it only in the builder code while accessing the field map
directly here.

> +		offset = extent->offset[multikey_idx];
> +	} else {
> +		offset = field_map[offset_slot];
> +	}
> +	return offset;
>  }
>  
>  /**
> @@ -116,7 +201,32 @@ field_map_builder_set_slot(struct field_map_builder *builder,
>  {
>  	assert(offset_slot < 0);
>  	assert(offset > 0);
> -	builder->slots[offset_slot] = offset;
> +	builder->slots[offset_slot].offset = offset;
> +}
> +
> +/**
> + * Set data offset in field map extent (by given offset_slot,
> + * extent_slot and extent_items) for a field identified by
> + * unique offset_slot.
> + *
> + * The offset_slot argument must be negative and offset must be
> + * positive (by definition).
> + */
> +static inline int
> +field_map_builder_set_extent_slot(struct field_map_builder *builder,

IMO better call it field_map_builder_set_multikey to emphasize that it
only makes sense for multikey indexes.

> +				  int32_t offset_slot, int32_t extent_slot,

extent_slot is called multikey_idx everywhere else and has type int.
I'd use the same name here.

> +				  uint32_t extent_items, uint32_t offset)

Don't quite like 'items' - sounds like it's an array or a list, not a
number. What about multikey_count:

field_map_builder_set_slot_multikey(builder,
		int32_t offset_slot, uint32_t offset,
		int multikey_idx, int multikey_count);

> +{
> +	assert(offset_slot < 0);
> +	assert(offset > 0);
> +	assert(extent_slot >= 0 && extent_items > 0);

Would be more useful to ensure that 'multikey_idx' is less than
'multikey_count'.

> +	struct field_map_ext *extent =
> +		field_map_builder_ext_get(builder, offset_slot, extent_items);

This is an unconditional function call on a hot path.
I'd create a function to allocate a multikey slot instead
and call it only if the slot is empty.

> +	if (extent == NULL)
> +		return -1;
> +	assert(extent->items == extent_items);
> +	extent->offset[extent_slot] = offset;
> +	return 0;
>  }
>  
>  /**
> @@ -125,7 +235,8 @@ field_map_builder_set_slot(struct field_map_builder *builder,
>  static inline uint32_t
>  field_map_build_size(struct field_map_builder *builder)
>  {
> -	return builder->slot_count * sizeof(uint32_t);
> +	return builder->slot_count * sizeof(uint32_t) +
> +	       builder->extents_size;
>  }
>  
>  /**
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 55dcf1eb5..4d36560e0 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -138,7 +138,57 @@ key_def_set_func(struct key_def *def)
>  	key_def_set_extract_func(def);
>  }
>  
> -static void
> +static int
> +key_def_set_part_path(struct key_def *def, uint32_t part_no, const char *path,
> +		      uint32_t path_len, char **path_pool)
> +{
> +	struct key_part *part = &def->parts[part_no];
> +	if (path == NULL) {
> +		part->path = NULL;
> +		part->path_len = 0;
> +		return 0;
> +	}
> +	assert(path_pool != NULL);
> +	part->path = *path_pool;
> +	*path_pool += path_len;
> +	memcpy(part->path, path, path_len);
> +	part->path_len = path_len;
> +
> +	/*
> +	 * Test whether this key_part has array index
> +	 * placeholder [*] (i.e. is a part of multikey index
> +	 * definition).
> +	 */
> +	int multikey_path_len =
> +		json_path_multikey_offset(path, path_len, TUPLE_INDEX_BASE);
> +	if (multikey_path_len < 0)
> +		return 0;

Please move the check that no part contains two [*] tokens here so that
one can't create an invalid key_def, even temporarily.

The check that we don't index a multikey path with a normal index is OK
to live in tuple format creation code, because it's in fact a limitation
of the tuple field map format.

> +
> +	/*
> +	 * In case of multikey index key_parts must have the
> +	 * same JSON prefix.
> +	 */
> +	if (!key_def_is_multikey(def)) {

Please check def->multikey_path here directly, without the aid of this
helper, since you set it directly below.

> +		/*
> +		 * Keep the index of the first multikey key_part
> +		 * and the length of JSON path substring to the
> +		 * array index placeholder sign [*].
> +		 */
> +		def->multikey_path = part->path;
> +		def->multikey_fieldno = part->fieldno;
> +		def->multikey_path_len = (uint32_t)multikey_path_len;
> +	} else if (json_path_cmp(path, multikey_path_len, def->multikey_path,
> +				 def->multikey_path_len,
> +				 TUPLE_INDEX_BASE) != 0) {
> +		diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +			 part_no + TUPLE_INDEX_BASE,
> +			 "incompatable multikey index path");
> +		return -1;
> +	}
> +	return 0;
> +}
> @@ -256,11 +301,12 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
>  	key_def->unique_part_count = part_count;
>  
>  	for (uint32_t item = 0; item < part_count; ++item) {
> -		key_def_set_part(key_def, item, fields[item],
> -				 (enum field_type)types[item],
> -				 ON_CONFLICT_ACTION_DEFAULT,
> -				 NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0,
> -				 NULL, TUPLE_OFFSET_SLOT_NIL, 0);
> +		if (key_def_set_part(key_def, item, fields[item],
> +				     (enum field_type)types[item],
> +				     ON_CONFLICT_ACTION_DEFAULT, NULL,
> +				     COLL_NONE, SORT_ORDER_ASC, NULL, 0, NULL,
> +				     TUPLE_OFFSET_SLOT_NIL, 0) != 0)
> +			unreachable();

Please don't use unreachable(). Handle errors instead so that if we add
some more checks to key_def_set_part we don't have patch these other
places.

> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index fe037c54a..fa8a04664 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -656,34 +727,42 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
>  }
>  
>  static int
> -memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
> +memtx_tree_index_build_array_realloc(struct memtx_tree_index *index,
> +				     uint32_t items)

Please refactor it in a slightly different manner: instead of this
cumbersome realloc helper, simply add a function that sets a particular
slot, reallocating the array if necessary:

	memtx_tree_index_build_array_append(index, tuple, hint)

> diff --git a/src/box/sql.c b/src/box/sql.c
> index 7d5c3a8e0..be2212ef1 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c
> @@ -788,7 +788,8 @@ tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
>  			} else {
>  				uint32_t field_offset =
>  					field_map_get_offset(field_map,
> -							     field->offset_slot);
> +							     field->offset_slot,
> +							     -1);
>  				p = base + field_offset;
>  			}
>  		}
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 570e4c192..8c2cd7611 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -130,7 +130,7 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
>  	int rc = tuple_field_map_create(&builder, format, tuple, true);
>  	region_truncate(region, region_svp);
>  	assert(rc != 0 ||
> -	       field_map_build_size(&builder) == format->field_map_size);
> +	       field_map_build_size(&builder) >= format->field_map_size);

This assertion is pointless. Please remove it (in the previous patch).

>  	return rc;
>  }
>  
> @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
>  }
>  
>  int
> -tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
> +tuple_go_to_path(const char **data, const char *path, uint32_t path_len,
> +		 int multikey_idx)
>  {
>  	int rc;
>  	struct json_lexer lexer;
> @@ -463,6 +464,10 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
>  	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
>  	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
>  		switch (token.type) {
> +		case JSON_TOKEN_ANY:
> +			assert(multikey_idx >= 0);
> +			token.num = multikey_idx;
> +			FALLTHROUGH;

The function must not crash if there's no mutlikey_idx - it should
return nil:

t = box.tuple.new{1, {a = 1, b = 2}, 3}
t[2][*] // crash!

Please fix and add a test case.

> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index da4085bcf..99753c8d5 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -674,6 +702,35 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part)
>  				       tuple_field_map(tuple), part);
>  }
>  
> +/**
> + * Get count of documents in tuple by given multikey index
> + * definition.
> + * @param format Tuple format.
> + * @param data A pointer to MessagePack array.
> + * @param field_map A pointer to the LAST element of field map.
> + * @param key_def Index key_definition.
> + * @retval Count of documents in the multikey index.
> + */
> +uint32_t
> +tuple_field_raw_multikey_items(struct tuple_format *format, const char *data,
> +			       const uint32_t *field_map,
> +			       struct key_def *key_def);
> +
> +/**
> + * Get count of documents in tuple by given multikey index
> + * definition.
> + * @param tuple Tuple to get the count of documents from.
> + * @param key_def Index key_definition.
> + * @retval Count of documents in the multikey index.

What's a 'document'? I wouldn't introduce a new term.

> + */
> +static inline uint32_t
> +tuple_field_multikey_items(struct tuple *tuple, struct key_def *key_def)

Don't like the name. This function doesn't return a tuple field so using
tuple_field_ prefix is wrong IMO. And again 'items' sounds like it
returns a set of items, not a count. What about tuple_multikey_count?
Do you have better alternatives in mind?

> +{
> +	return tuple_field_raw_multikey_items(tuple_format(tuple),
> +					      tuple_data(tuple),
> +					      tuple_field_map(tuple), key_def);
> +}
> +
>  /**
>   * @brief Tuple Interator
>   */
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index 93756365b..bf3a23097 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -455,8 +479,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
>  	assert(!has_optional_parts || is_nullable);
>  	assert(is_nullable == key_def->is_nullable);
>  	assert(has_optional_parts == key_def->has_optional_parts);
> -	int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
> -	if (rc != 0)
> +	assert(key_def_is_multikey(key_def) == is_multikey);
> +	assert(!is_multikey || is_multikey == has_json_paths);

Please add an assertion checking that a hint cannot be HINT_NONE for
a multikey index. Here and everywhere else where applicable.

> @@ -1314,6 +1365,16 @@ static const comparator_with_key_signature cmp_wk_arr[] = {
>  /* {{{ tuple_hint */
>  
>  /**
> + * Hints are now used for two purposes - passing the index of the
> + * key used in the case of multikey index and to speed up the
> + * comparators.
> + *
> + * Scenario I. Pass the multikey index of the key to comparator.
> + * In the case of multikey index arises an ambiguity: which key
> + * should be used in the comparison. Hints act as an non-negative
> + * numeric index of key to use.
> + *
> + * Scenario II. Speed up comparators.

Please move this comment to hint_t definition. Here just say that hint
comparison only makes sense for non-multikey indexes.

>   * A comparison hint is an unsigned integer number that has
>   * the following layout:
>   *
> @@ -1611,12 +1672,35 @@ tuple_hint(const struct tuple *tuple, struct key_def *key_def)
>  	return field_hint<type, is_nullable>(field, key_def->parts->coll);
>  }
>  
> +static hint_t
> +key_hint_stub(const char *key, uint32_t part_count, struct key_def *key_def)

key_hint_multikey?

> +{
> +	(void) key;
> +	(void) part_count;
> +	(void) key_def;
> +	return HINT_NONE;

Please add a comment explaining why this function can actually be called
while tuple_hint_multikey cannot.

> +}
> +
> +static hint_t
> +tuple_hint_stub(const struct tuple *tuple, struct key_def *key_def)
> +{
> +	(void) tuple;
> +	(void) key_def;
> +	unreachable();
> +	return HINT_NONE;
> +}
> +
>  template<enum field_type type, bool is_nullable>
>  static void
>  key_def_set_hint_func(struct key_def *def)
>  {
> -	def->key_hint = key_hint<type, is_nullable>;
> -	def->tuple_hint = tuple_hint<type, is_nullable>;
> +	if (key_def_is_multikey(def)) {
> +		def->key_hint = key_hint_stub;
> +		def->tuple_hint = tuple_hint_stub;

This should be moved upper level, to key_def_set_hint_func - no need in
all these branching to just set it to the stub.

> +	} else {
> +		def->key_hint = key_hint<type, is_nullable>;
> +		def->tuple_hint = tuple_hint<type, is_nullable>;
> +	}
>  }
>  
>  template<enum field_type type>
> diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
> index 836d4e565..081677ff6 100644
> --- a/src/box/tuple_extract_key.cc
> +++ b/src/box/tuple_extract_key.cc
> @@ -369,11 +393,16 @@ static void
>  key_def_set_extract_func_json(struct key_def *def)
>  {
>  	assert(def->has_json_paths);
> -	def->tuple_extract_key = tuple_extract_key_slowpath
> -					<contains_sequential_parts,
> -					 has_optional_parts, true>;
> -	def->tuple_extract_key_raw = tuple_extract_key_slowpath_raw
> -					<has_optional_parts, true>;
> +	if (!key_def_is_multikey(def)) {
> +		def->tuple_extract_key = tuple_extract_key_slowpath
> +						<contains_sequential_parts,
> +						has_optional_parts, true>;
> +		def->tuple_extract_key_raw = tuple_extract_key_slowpath_raw
> +						<has_optional_parts, true>;
> +	} else {
> +		def->tuple_extract_key = tuple_extract_key_stub;
> +		def->tuple_extract_key_raw = tuple_extract_key_raw_stub;
> +	}

Come to think of it, let's add

	assert(!key_def_is_multikey(key_def));

to original extractors rather than adding those stubs.

>  }
>  
>  void
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 9a643b700..33d6db420 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -194,6 +194,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
>  	return NULL;
>  }
>  
> +/**
> + * Check if child_field may be attached to parent_field,
> + * update the parent_field container type if required.
> + */
> +static int
> +tuple_format_field_update_type(struct tuple_field *parent_field,
> +			       struct tuple_field *child_field)

The name's kinda confusing - this function doesn't necessarily update
a field type - it rather checks field compatibility. Please come up with
a better name.

> +{
> +	enum field_type expected_type =
> +		child_field->token.type == JSON_TOKEN_STR ?
> +		FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
> +	if (child_field->token.type == JSON_TOKEN_ANY &&
> +	    !json_token_is_multikey(&parent_field->token) &&
> +	    !json_token_is_leaf(&parent_field->token)) {
> +		assert(expected_type == FIELD_TYPE_ARRAY);
> +		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
> +			 tt_sprintf("field %s is already refer to document by "
> +				    "identifier and cannot use array index "
> +				    "placeholder [*]",
> +				    tuple_field_path(parent_field)));
> +		return -1;
> +	}
> +	if (json_token_is_multikey(&parent_field->token) &&
> +		child_field->token.type != JSON_TOKEN_ANY) {

Bad indentation.

> +		assert(expected_type == FIELD_TYPE_ARRAY);
> +		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
> +			 tt_sprintf("field %s is already used with array index "
> +				    "placeholder [*] and cannot refer to "
> +				    "document by identifier",
> +				    tuple_field_path(parent_field)));

These error messages look bad to me. And the code is difficult for
understanding as well and needs a comment explaining what exactly we
prohibit here and why.

What about the following error message (the same for both cases)?

	ER_MULTIKEY_INDEX_MISMATCH
	Field %s is used as multikey in one index and as single key in another.

Please think about it.

> +		return -1;
> +	}
> +	if (field_type1_contains_type2(parent_field->type, expected_type)) {
> +		parent_field->type = expected_type;
> +	} else {
> +		diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
> +			 tuple_field_path(parent_field),
> +			 field_type_strs[parent_field->type],
> +			 field_type_strs[expected_type]);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
>  /**
>   * Given a field number and a path, add the corresponding field
>   * to the tuple format, allocating intermediate fields if
> @@ -280,6 +323,13 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
>  	assert(parent != NULL);
>  	/* Update tree depth information. */
>  	format->fields_depth = MAX(format->fields_depth, token_count + 1);
> +	if (json_token_any_count > 1) {
> +		assert(path_len > 0);
> +		diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
> +			 "no more than one array index placeholder [*] is "
> +			 "allowed in JSON path");
> +		goto fail;
> +	}

As I wrote above, this check should be done in key_def_new.

> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index 88f03d5eb..d07ca91eb 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -437,6 +437,12 @@ struct tuple_parse_iterator {
>  	 * pointer accordingly.
>  	 */
>  	struct mp_stack stack;
> +	/**
> +	 * The pointer to the stack frame representing an array
> +	 * filed that has JSON_TOKEN_ANY child, i.e. the root
> +	 * of the multikey index.
> +	 */
> +	struct mp_frame *multikey_frame;
>  	/** The current read position in msgpack. */
>  	const char *pos;
>  };
> @@ -479,6 +485,32 @@ tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
>  			    struct tuple_field **field, const char **data,
>  			    const char **data_end);
>  
> +/**
> + * Returns non-negative multikey index comparison hint when
> + * the iterator's position in the format::fields subtree
> + * corresponds to the multikey subtree and -1 otherwise.
> + */
> +static inline int
> +tuple_parse_iterator_multikey_idx(struct tuple_parse_iterator *it)
> +{
> +	if (it->multikey_frame == NULL)
> +		return -1;
> +	return it->multikey_frame->curr;
> +}

I don't like that some variables are passed as iteration function
arguments, some are returned by separate functions. Why not access them
all directly, for instance?

But then again, the iterator API is flawed and we need to put more
thoughts in how to improve it.

> diff --git a/test/engine/multikey.test.lua b/test/engine/multikey.test.lua
> new file mode 100644
> index 000000000..71fc82a5f
> --- /dev/null
> +++ b/test/engine/multikey.test.lua
> @@ -0,0 +1,88 @@
> +test_run = require('test_run').new()
> +engine = test_run:get_cfg('engine')
> +
> +--
> +-- gh-1260: Multikey indexes
> +--
> +s = box.schema.space.create('withdata', {engine = 'vinyl'})
> +pk = s:create_index('pk')
> +-- Vinyl's space can't be multikey (yet).
> +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
> +s:drop()
> +
> +s = box.schema.space.create('withdata', {engine = 'memtx'})
> +-- Primary index must be unique so it can't be multikey.
> +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}})
> +pk = s:create_index('pk')
> +-- Only tree index type may be mutlikey.
> +_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}})
> +_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}})
> +_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}})
> +-- Test incompatible multikey index parts.
> +_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}})
> +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}})
> +idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}})
> +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
> +idx0:drop()
> +-- Unique multikey index.
> +idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
> +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}})
> +s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
> +s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}})
> +_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}})
> +-- Non-unique multikey index; two multikey indexes per space.
> +arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}})
> +arr_idx:select()
> +idx:get({'James', 'Bond'})
> +idx:get({'Ivan', 'Ivanych'})
> +idx:get({'Vasya', 'Pupkin'})
> +idx:select()
> +s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
> +s:insert({4, {1}, {{fname='James', sname='Bond'}}})
> +idx:select()
> +-- Duplicates in multikey parts.
> +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}})
> +arr_idx:select({1})
> +s:delete(5)
> +-- Check that there is no garbage in index
> +arr_idx:select({1})
> +idx:get({'A', 'B'})
> +idx:get({'C', 'D'})
> +idx:delete({'Vasya', 'Pupkin'})
> +s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
> +s:insert({7, {1}, {{fname='James', sname='Bond'}}})
> +arr_idx:select({1})
> +idx:select()
> +-- Snapshot & recovery.
> +box.snapshot()
> +test_run:cmd("restart server default")
> +s = box.space["withdata"]
> +idx = s.index["idx"]
> +arr_idx = s.index["arr_idx"]
> +s:select()
> +idx:select()
> +arr_idx:select()
> +s:drop()
> +
> +-- Assymetric multikey index paths.
> +s = box.schema.space.create('withdata')
> +pk = s:create_index('pk')
> +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
> +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
> +s:drop()
> +
> +-- Unique multikey index peculiar properties
> +s = box.schema.space.create('withdata')

Use proper test_run.engine for creating all spaces. Simply disable vinyl
in suite.ini for now.

> +pk = s:create_index('pk')
> +idx0 = s:create_index('idx0', {parts = {{2, 'int', path = '[*]'}}})
> +s:insert({1, {1, 1, 1}})
> +s:insert({2, {2, 2}})
> +s:insert({3, {3, 3, 2, 2, 1, 1}})
> +idx0:get(2)
> +idx0:get(1)
> +idx0:get(3)
> +idx0:select()
> +idx0:delete(2)
> +idx0:get(2)
> +idx0:select()
> +s:drop()

Add many more tests. Check that update/upsert/replace all work
(currently you only check insert/delete). Check reverse iterators.
Check space.format() and index.alter(). Check nullable with an absent
field. Check everything else you can think of to make sure there's no
bugs.

Check creation of an index after some tuples have been inserted - it
crashes BTW:

	tarantool> s = box.schema.space.create('test')
	---
	...

	tarantool> i = s:create_index('pk')
	---
	...

	tarantool> s:replace{1, {{1, 1}}
		 > }
	---
	- [1, [[1, 1]]]
	...

	tarantool> s:replace{2, {{2, 3}}}
	---
	- [2, [[2, 3]]]
	...

	tarantool> i2 = s:create_index('sk', {parts = {{2, 'unsigned', path = '[*]'}}})
	---
	- error: 'Tuple field [2][*] type does not match one required by operation: expected
	    unsigned'
	...

	tarantool> i2 = s:create_index('sk', {parts = {{2, 'unsigned', path = '[1][*]'}}})
	tarantool: /home/vlad/src/tarantool/src/box/memtx_space.c:928: memtx_space_build_index: Assertion `old_tuple == NULL' failed.
	Aborted