From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 12 Mar 2019 16:24:49 +0300 From: Vladimir Davydov Subject: Re: [tarantool-patches] [PATCH v5 4/4] box: introduce multikey indexes Message-ID: <20190312132449.3twey6ingganyuqy@esperanza> References: <126ecffeb103797f6a466387cd780587d0270275.1551951540.git.kshcherbatov@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org List-ID: On Thu, Mar 07, 2019 at 06:55:09PM +0300, Kirill Shcherbatov wrote: > diff --git a/src/box/key_def.c b/src/box/key_def.c > index 771c30172..f6ca461c4 100644 > --- a/src/box/key_def.c > +++ b/src/box/key_def.c > @@ -165,9 +165,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, > *path_pool += path_len; > memcpy(def->parts[part_no].path, path, path_len); > def->parts[part_no].path_len = path_len; > + > + int rc; > + struct json_lexer lexer; > + struct json_token token; > + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); > + while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { > + if (token.type == JSON_TOKEN_ANY) { > + def->has_multikey_parts = true; > + def->parts[part_no].is_multikey = true; > + break; > + } else if (token.type == JSON_TOKEN_END) { > + break; > + } > + } Better wrap this in a helper in json lib. > } else { > def->parts[part_no].path = NULL; > def->parts[part_no].path_len = 0; > + def->parts[part_no].is_multikey = false; > } > column_mask_set_fieldno(&def->column_mask, fieldno); > } > diff --git a/src/box/key_def.h b/src/box/key_def.h > index c630e6b43..37fae6b52 100644 > --- a/src/box/key_def.h > +++ b/src/box/key_def.h > @@ -610,13 +611,106 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, > } > if (old_tuple) { > struct memtx_tree_data old_data; > - memtx_tree_data_set(&old_data, old_tuple, key_def); > + memtx_tree_data_set(&old_data, old_tuple, key_def, > + INVALID_MULTIKEY_INDEX); > memtx_tree_delete(&index->tree, old_data); > } > *result = old_tuple; > return 0; > } > > +static int > +multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def) This function is engine independent. Moreover, we will need it in Vinyl as well. Please move it to key_def.c. > +{ > + assert(key_def->has_multikey_parts); > + struct key_part *part = key_def->parts; > + /* FIXME: don't like it... */ Me neither. What about storing the path to the first '*' in key_def instead? You wouldn't need to allocate it - you could simply set it to point to key_part->path of the first multikey part and store the length in key_def. Then you wouldn't need to iterate over key parts here - instead you would simply use the path stored in key_def. You wouldn't need to add is_multikey and has_multikey_parts flags either. Instead you could check if the path stored in key_def is not NULL. This would also allow you to check if multikey parts are compatible while building a key_def, in key_def_set_part: upon encountering the first multikey part, you would initialize multikey path in key_def; then you would check if subsequent multikey paths have the same prefix (you would need to introduce json_path_ncmp helper for this, I guess). > + while (part < key_def->parts + key_def->part_count && > + !part->is_multikey) > + part++; > + assert(part->path != NULL && part->is_multikey); > + struct tuple_field *field = > + tuple_format_field_by_path(tuple_format(tuple), part->fieldno, > + part->path, part->path_len); > + assert(field != NULL); This isn't necessarily true. Just like tuple_field_by_path, this function must fall back on decoding the msgpack in case the tuple field isn't indexed in the format. > + struct field_map_ext *field_map_ext = > + tuple_field_map_ext((uint32_t *)tuple_field_map(tuple), > + field->offset_slot); > + return field_map_ext->size; > +} > + > +int > +memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple, Nit: memtx_tree_index_ is kinda namespace here so better call it memtx_tree_index_replace_multikey IMO. The same's fair for other multikey functions and constants. > + struct tuple *new_tuple, > + enum dup_replace_mode mode, > + struct tuple **result) > +{ > + struct memtx_tree_index *index = (struct memtx_tree_index *)base; > + struct key_def *key_def = index->tree.arg; > + if (new_tuple != NULL) { > + int errcode = 0, tree_res = 0; > + struct tuple *dup_tuple = NULL; > + int multikey_idx = 0; > + int sz = multikey_get_array_sz(new_tuple, key_def); > + for (; multikey_idx < sz; multikey_idx++) { > + struct memtx_tree_data new_data; > + memtx_tree_data_set(&new_data, new_tuple, key_def, > + multikey_idx); > + struct memtx_tree_data dup_data; > + memtx_tree_data_clear(&dup_data); > + tree_res = memtx_tree_insert(&index->tree, new_data, > + &dup_data); > + if (tree_res != 0) { > + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, > + "memtx_tree_index", "replace"); > + break; > + } > + dup_tuple = dup_data.tuple; > + errcode = replace_check_dup(old_tuple, dup_tuple, mode); > + if (errcode != 0) { > + memtx_tree_delete(&index->tree, new_data); > + if (dup_tuple != NULL) { > + memtx_tree_insert(&index->tree, > + dup_data, NULL); > + } > + struct space *sp = > + space_cache_find(base->def->space_id); > + if (sp != NULL) { > + diag_set(ClientError, errcode, > + base->def->name, > + space_name(sp)); > + } > + break; > + } > + } > + if (tree_res != 0 || errcode != 0) { > + multikey_idx--; > + for (; multikey_idx >= 0; multikey_idx--) { > + struct memtx_tree_data data; > + memtx_tree_data_set(&data, new_tuple, key_def, > + multikey_idx); > + memtx_tree_delete(&index->tree, data); > + } > + return -1; > + } > + if (dup_tuple != NULL) { > + *result = dup_tuple; > + return 0; > + } > + } > + if (old_tuple != NULL) { > + int sz = multikey_get_array_sz(old_tuple, key_def); > + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { > + struct memtx_tree_data old_data; > + memtx_tree_data_set(&old_data, old_tuple, key_def, > + multikey_idx); > + memtx_tree_delete(&index->tree, old_data); > + } > + } > + *result = old_tuple; > + return 0; > +} > + > static struct iterator * > memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, > const char *key, uint32_t part_count) > @@ -718,7 +812,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) > } > struct memtx_tree_data *elem = > &index->build_array[index->build_array_size++]; > - memtx_tree_data_set(elem, tuple, key_def); > + memtx_tree_data_set(elem, tuple, key_def, INVALID_MULTIKEY_INDEX); > + return 0; > +} > + > +static int > +memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple) > +{ > + struct memtx_tree_index *index = (struct memtx_tree_index *)base; > + struct key_def *key_def = index->tree.arg; > + int sz = multikey_get_array_sz(tuple, key_def); > + > + if (index->build_array == NULL) { > + index->build_array = > + (struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE); > + if (index->build_array == NULL) { > + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, > + "memtx_tree_index", "build_next"); > + return -1; > + } > + index->build_array_alloc_size = > + MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); > + } > + assert(index->build_array_size <= index->build_array_alloc_size); > + if (index->build_array_size == index->build_array_alloc_size) { > + index->build_array_alloc_size = index->build_array_alloc_size + > + index->build_array_alloc_size / 2; > + struct memtx_tree_data *tmp = > + realloc(index->build_array, > + index->build_array_alloc_size * sizeof(*tmp)); > + if (tmp == NULL) { > + diag_set(OutOfMemory, index->build_array_alloc_size * > + sizeof(*tmp), "memtx_tree_index", "build_next"); > + return -1; > + } > + index->build_array = tmp; > + } This was copied-n-pasted from memtx_tree_index_build_next. Better introduce a helper function for that. > + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { > + struct memtx_tree_data *elem = > + &index->build_array[index->build_array_size++]; > + assert(index->build_array_size < index->build_array_alloc_size); > + memtx_tree_data_set(elem, tuple, key_def, multikey_idx); > + } > return 0; > } > > diff --git a/src/box/tuple.c b/src/box/tuple.c > index 7f06d4053..c8a938c4c 100644 > --- a/src/box/tuple.c > +++ b/src/box/tuple.c > @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len) > } > > int > -tuple_go_to_path(const char **data, const char *path, uint32_t path_len) > +tuple_go_to_path_multikey(const char **data, const char *path, > + uint32_t path_len, uint64_t multikey_idx) > { > int rc; > struct json_lexer lexer; > @@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len) > json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); > while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { > switch (token.type) { > + case JSON_TOKEN_ANY: > + token.num = (int)multikey_idx; FALLTHROUGH is missing - this code may not compile on certain platforms. Also, let's use int instead of uint64_t for multikey_idx. And rather than adding INVALID_MULTIKEY_INDEX contant, simply check if multikey_idx is non-negative. Also, an assertion ensuring that if '*' is encountered mutlikey_idx is set would be welcome here. > case JSON_TOKEN_NUM: > rc = tuple_field_go_to_index(data, token.num); > break; > @@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, > } > return tuple_field_raw_by_path(format, tuple, field_map, fieldno, > path + lexer.offset, > - path_len - lexer.offset, NULL); > + path_len - lexer.offset, NULL, > + INVALID_MULTIKEY_INDEX); > } > > /* }}} tuple_field_* getters */ > diff --git a/src/box/tuple.h b/src/box/tuple.h > index 8b12fd5a8..e498e1e41 100644 > --- a/src/box/tuple.h > +++ b/src/box/tuple.h > @@ -45,6 +45,8 @@ struct slab_arena; > struct quota; > struct key_part; > > +#define INVALID_MULTIKEY_INDEX UINT64_MAX > + > /** > * A format for standalone tuples allocated on runtime arena. > * \sa tuple_new(). > @@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const > > /** \endcond public */ > > + Nit: extra change. > /** > * An atom of Tarantool storage. Represents MsgPack Array. > * Tuple has the following structure: > @@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const > * | ^ > * +---------------------------------------data_offset > * > - * Each 'off_i' is the offset to the i-th indexed field. > + * Each 'off_i' is the offset to the i-th indexed field or field > + * map extention (described with struct tuple_field_map_ext) > + * offset (in sizeof(field_map[0]) units). The comment is bad as it doesn't really explain what a field map extension looks like, nor in what cases is it created. A diagram would be helpful here. > */ > struct PACKED tuple > { > @@ -328,6 +333,26 @@ struct PACKED tuple > */ > }; > > +/** > + * Tuple field map extent. Is allocated on tuple_field_map_create > + * call automatically when necessary, when tuple field map must be > + * reallocated dynamically. > + */ > +struct field_map_ext { > + /* Sequence of data offset slots. */ > + uint32_t field_map[1]; This looks weird. > + /* The count of field_map items. */ > + uint32_t size; > +}; I don't think that this deserves a separate structure. I'd rather you access the offset map array directly. > + > +static inline struct field_map_ext * > +tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot) > +{ > + uint32_t slot_off = field_map[root_offset_slot]; > + return (struct field_map_ext *)((char *)field_map - > + slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext)); > +} > + > /** Size of the tuple including size of struct tuple. */ > static inline size_t > tuple_size(const struct tuple *tuple) > @@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple) > * with NULL. > * @param path The path to process. > * @param path_len The length of the @path. > + * @param multikey_index The multikey index hint - index of item It's multikey_idx. > + * for JSON_TOKEN_ANY level. > * @retval 0 On success. > * @retval -1 In case of error in JSON path. > */ > int > -tuple_go_to_path(const char **data, const char *path, uint32_t path_len); > +tuple_go_to_path_multikey(const char **data, const char *path, > + uint32_t path_len, uint64_t multikey_idx); > + > +/** > + * Retrieve msgpack data by JSON path. > + * @param data[in, out] Pointer to msgpack with data. > + * If the field cannot be retrieved be the > + * specified path @path, it is overwritten > + * with NULL. > + * @param path The path to process. > + * @param path_len The length of the @path. > + * @retval 0 On success. > + * @retval -1 In case of error in JSON path. > + */ Copying-and-pasting is bad, even in case of comments. Please instead of writing a doxygen-style comment, just say a few words about this function: what it does, what it expects (the path must not have '*', I assume). > +static inline int > +tuple_go_to_path(const char **data, const char *path, uint32_t path_len) > +{ > + return tuple_go_to_path_multikey(data, path, path_len, > + INVALID_MULTIKEY_INDEX); > +} > > /** > * Get tuple field by field index and relative JSON path. > @@ -533,7 +579,7 @@ static inline const char * > tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > const uint32_t *field_map, uint32_t fieldno, > const char *path, uint32_t path_len, > - int32_t *offset_slot_hint) > + int32_t *offset_slot_hint, uint64_t multikey_idx) Update the comment please. > { > int32_t offset_slot; > if (offset_slot_hint != NULL && > @@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > if (offset_slot_hint != NULL) > *offset_slot_hint = offset_slot; > offset_slot_access: > - /* Indexed field */ > - if (field_map[offset_slot] == 0) > - return NULL; > - tuple += field_map[offset_slot]; > + if (multikey_idx != INVALID_MULTIKEY_INDEX) { > + struct field_map_ext *field_map_ext = > + tuple_field_map_ext((uint32_t *)field_map, > + offset_slot); > + if (multikey_idx >= field_map_ext->size) > + return NULL; > + uint32_t off = field_map_ext->field_map[-multikey_idx]; > + if (off == 0) > + return NULL; > + tuple += off; > + } else { > + /* Indexed field */ Confused. Both branches are for indexed fields, aren't they? Anyway, I'd move this whole thing (retrieving offset slot) to a separate helper function. > + if (field_map[offset_slot] == 0) > + return NULL; > + tuple += field_map[offset_slot]; > + } > } else { > uint32_t field_count; > parse: > @@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, > uint32_t path_len, uint32_t path_hash); > > /** > - * Get a tuple field pointed to by an index part. > + * Get a tuple field pointed to by an index part and multikey hint. > * @param format Tuple format. > * @param tuple A pointer to MessagePack array. > * @param field_map A pointer to the LAST element of field map. > + * @param multikey_idx A multikey hint. What's a multikey hint? Please mention it in the comment. > * @param part Index part to use. > * @retval Field data if the field exists or NULL. > */ > static inline const char * > -tuple_field_raw_by_part(struct tuple_format *format, const char *data, > - const uint32_t *field_map, struct key_part *part) > +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data, > + const uint32_t *field_map, > + struct key_part *part, uint64_t multikey_idx) > { > if (unlikely(part->format_epoch != format->epoch)) { > assert(format->epoch != 0); > diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc > index 5b06e06b3..dbf7bb4f6 100644 > --- a/src/box/tuple_compare.cc > +++ b/src/box/tuple_compare.cc > @@ -458,10 +458,12 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b, > return i; > } > > -template > +template + bool is_multikey> > static inline int > -tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, > - struct key_def *key_def) > +tuple_compare_slowpath_multikey(const struct tuple *tuple_a, The function has suffix _multikey, but it may be defined with is_multikey = false template parameter. This is confusing. > + cmp_aux_t tuple_a_cmp_aux, const struct tuple *tuple_b, > + cmp_aux_t tuple_b_cmp_aux, struct key_def *key_def) > { > assert(has_json_paths == key_def->has_json_paths); > assert(!has_optional_parts || is_nullable); Please add an assertion for is_multikey. > @@ -471,7 +473,7 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, > const char *tuple_a_raw = tuple_data(tuple_a); > const char *tuple_b_raw = tuple_data(tuple_b); > if (key_def->part_count == 1 && part->fieldno == 0 && > - (!has_json_paths || part->path == NULL)) { > + (!has_json_paths || part->path == NULL) && !is_multikey) { I don't understand why you need this: is_multikey can't be set if there's no json path. > /* > * First field can not be optional - empty tuples > * can not exist. > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index 4439ce3e0..99b602cd5 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -33,6 +33,7 @@ > #include "json/json.h" > #include "tuple_format.h" > #include "coll_id_cache.h" > +#include "tuple.h" > > #include "third_party/PMurHash.h" > > @@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, > json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); > while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && > field->token.type != JSON_TOKEN_END) { > - if (field->token.type == JSON_TOKEN_ANY) { > - diag_set(ClientError, ER_UNSUPPORTED, > - "Tarantool", "multikey indexes"); > - goto fail; > - } > enum field_type expected_type = > field->token.type == JSON_TOKEN_STR ? > FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; > + if (field->token.type == JSON_TOKEN_ANY && > + !json_token_is_multikey(&parent->token) && > + !json_token_is_leaf(&parent->token)) { > + assert(expected_type == FIELD_TYPE_ARRAY); > + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, > + tuple_field_path(parent), > + field_type_strs[parent->type], > + "multikey array"); > + goto fail; > + } "Field [1].a has type 'array' in one index, but type 'multikey array' in another" Sounds confusing. IMO better introduce a new error code for this. BTW, you should also check that there's no multikey parts in Vinyl indexes. And that there's no two incompatible/unsupported multikey parts (e.g. a.b[*] and a.c[*], or a.b[*] and a.b[*][*]). > if (field_type1_contains_type2(parent->type, expected_type)) { > parent->type = expected_type; > } else { > @@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, > break; > format->min_tuple_size += mp_sizeof_nil(); > } > - } else { > + } else if (field->token.type == JSON_TOKEN_STR) { > /* Account a key string for map member. */ > - assert(field->token.type == JSON_TOKEN_STR); > format->min_tuple_size += > mp_sizeof_str(field->token.len); Shouldn't we account multikey indexes in min_tuple_size as well? > } > @@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, > return true; > } > > +/** > + * Grow tuple_field_map allocation left with extent_size > + * 0 bytes. > + */ > +static int > +tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size, > + uint32_t extent_size, struct region *region) > +{ > + assert(extent_size % sizeof(uint32_t) == 0); > + uint32_t new_field_map_size = *field_map_size + extent_size; > + uint32_t *new_field_map = region_alloc(region, new_field_map_size); > + if (new_field_map == NULL) { > + diag_set(OutOfMemory, new_field_map_size, "region_alloc", > + "new_field_map"); > + return -1; > + } > + memset(new_field_map, 0, extent_size); > + if (*field_map != NULL) { > + memcpy((char *)new_field_map + extent_size, > + (char *)*field_map - *field_map_size, *field_map_size); > + } > + *field_map = (uint32_t *)((char *)new_field_map + new_field_map_size); > + *field_map_size = new_field_map_size; > + return 0; > +} > + > +/** > + * Retrieve tuple field map extent by root_offset_slot, reallocate > + * memory if required. > + */ > +struct field_map_ext * > +tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size, > + int32_t root_offset_slot, uint32_t extent_items, > + struct region *region) > +{ > + uint32_t extent_sz = sizeof(struct field_map_ext) + > + extent_items * sizeof(uint32_t); > + uint32_t slot_off = (*field_map)[root_offset_slot]; > + if (slot_off == 0) { > + /* Field map extent was not created yet. */ > + slot_off = *field_map_size / sizeof(uint32_t); > + (*field_map)[root_offset_slot] = slot_off; > + if (tuple_field_map_realloc(field_map, field_map_size, > + extent_sz, region) != 0) > + return NULL; > + } I don't like that potentially we have to reallocate the field map several times if multikey indexes are present. What about introducing a temporary field map storage which would operate with raw pointers rather than offsets? We would fill this temporary object rather than a real field map in tuple_field_map_create, without any relocations. Once we were done, we would call a special helper method that would turn this object to a real field map. > + struct field_map_ext *field_map_ext = > + tuple_field_map_ext(*field_map, root_offset_slot); > + assert(field_map_ext->size == 0 || field_map_ext->size == extent_items); > + field_map_ext->size = extent_items; > + return field_map_ext; > +} > + > /** @sa declaration for details. */ > int > tuple_field_map_create(struct tuple_format *format, const char *tuple, > @@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > return 0; /* Nothing to initialize */ > } > struct region *region = &fiber()->gc; > - *field_map_size = format->field_map_size; > - *field_map = region_alloc(region, *field_map_size); > - if (*field_map == NULL) { > - diag_set(OutOfMemory, *field_map_size, "region_alloc", > - "field_map"); > + *field_map = NULL; > + *field_map_size = 0; > + if (tuple_field_map_realloc(field_map, field_map_size, > + format->field_map_size, region) != 0) > return -1; > - } > - *field_map = (uint32_t *)((char *)*field_map + *field_map_size); > > const char *pos = tuple; > int rc = 0; > @@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > uint32_t defined_field_count = MIN(field_count, validate ? > tuple_format_field_count(format) : > format->index_field_count); > - /* > - * Nullify field map to be able to detect by 0, > - * which key fields are absent in tuple_field(). > - */ > - memset((char *)*field_map - *field_map_size, 0, *field_map_size); > /* > * Prepare mp stack of the size equal to the maximum depth > * of the indexed field in the format::fields tree > @@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > struct mp_stack stack; > mp_stack_create(&stack, format->fields_depth, frames); > mp_stack_push(&stack, MP_ARRAY, defined_field_count); > + /** > + * Pointer to the stack entry that represents the multikey > + * index root ARRAY entry. > + */ > + struct mp_frame *mk_parent_frame = NULL; > + > struct tuple_field *field; > struct json_token *parent = &format->fields.root; > while (true) { > @@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > mp_stack_pop(&stack); > if (mp_stack_is_empty(&stack)) > goto finish; > + if (json_token_is_multikey(parent)) { > + /* Leave multikey index branch. */ > + mk_parent_frame = NULL; > + } > parent = parent->parent; > } > /* > @@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > field_type_strs[field->type]); > goto error; > } > - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) > + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && > + mk_parent_frame == NULL) { > (*field_map)[field->offset_slot] = pos - tuple; > + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && > + mk_parent_frame != NULL) { > + uint32_t extent_items = mk_parent_frame->count; > + struct field_map_ext *field_map_ext = > + tuple_field_map_ext_retrieve(field_map, > + field_map_size, > + field->offset_slot, > + extent_items, region); > + if (field_map_ext == NULL) > + goto error; > + int multikey_idx = mk_parent_frame->curr; > + field_map_ext->field_map[ > + -multikey_idx] = pos - tuple; > + } This function has grown too big for easy reading. It definitely needs to be split. > if (required_fields != NULL) > bit_clear(required_fields, field->id); > } > @@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, > mp_decode_array(&pos) : > mp_decode_map(&pos); > mp_stack_push(&stack, type, size); > + if (json_token_is_multikey(&field->token)) { > + /* Track multikey entry frame. */ > + assert(type == MP_ARRAY); > + mk_parent_frame = &frames[stack.used - 1]; > + } > parent = &field->token; > } else { > mp_next(&pos); > diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua > index f9a7180b1..dc6016af9 100644 > --- a/test/engine/json.test.lua > +++ b/test/engine/json.test.lua > @@ -198,4 +198,24 @@ s:drop() > -- > s = box.schema.space.create('withdata') > idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) > +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) > +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}}) > +idx:select({'James', 'Bond'}) > +idx:select({'Kirill', 'Shcherbatov'}) > +idx:select({'Ivan', 'Ivanych'}) > +idx:select({'Vasya', 'Pupkin'}) > +idx:select() > +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) > +s:insert({2, 1, {{fname='James', sname='Bond'}}}) > +idx:select() > +idx:delete({'Vasya', 'Pupkin'}) > +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) > +s:insert({2, 1, {{fname='James', sname='Bond'}}}) > +idx:select() > +s:drop() > + > +s = box.schema.space.create('withdata') > +pk = s:create_index('pk') > +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) > +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) > s:drop() Please write many more tests to cover all the following cases: - snapshot - recovery - invalid/incompatible paths - unique/non-unique indexes - duplicates in multikey parts I guess, it's worth moving this to a separate test file.