[tarantool-patches] [PATCH v5 4/4] box: introduce multikey indexes
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Mar 12 16:24:49 MSK 2019
On Thu, Mar 07, 2019 at 06:55:09PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 771c30172..f6ca461c4 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -165,9 +165,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
> *path_pool += path_len;
> memcpy(def->parts[part_no].path, path, path_len);
> def->parts[part_no].path_len = path_len;
> +
> + int rc;
> + struct json_lexer lexer;
> + struct json_token token;
> + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
> + while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
> + if (token.type == JSON_TOKEN_ANY) {
> + def->has_multikey_parts = true;
> + def->parts[part_no].is_multikey = true;
> + break;
> + } else if (token.type == JSON_TOKEN_END) {
> + break;
> + }
> + }
Better wrap this in a helper in json lib.
> } else {
> def->parts[part_no].path = NULL;
> def->parts[part_no].path_len = 0;
> + def->parts[part_no].is_multikey = false;
> }
> column_mask_set_fieldno(&def->column_mask, fieldno);
> }
> diff --git a/src/box/key_def.h b/src/box/key_def.h
> index c630e6b43..37fae6b52 100644
> --- a/src/box/key_def.h
> +++ b/src/box/key_def.h
> @@ -610,13 +611,106 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
> }
> if (old_tuple) {
> struct memtx_tree_data old_data;
> - memtx_tree_data_set(&old_data, old_tuple, key_def);
> + memtx_tree_data_set(&old_data, old_tuple, key_def,
> + INVALID_MULTIKEY_INDEX);
> memtx_tree_delete(&index->tree, old_data);
> }
> *result = old_tuple;
> return 0;
> }
>
> +static int
> +multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def)
This function is engine independent. Moreover, we will need it in
Vinyl as well. Please move it to key_def.c.
> +{
> + assert(key_def->has_multikey_parts);
> + struct key_part *part = key_def->parts;
> + /* FIXME: don't like it... */
Me neither. What about storing the path to the first '*' in key_def
instead? You wouldn't need to allocate it - you could simply set it
to point to key_part->path of the first multikey part and store the
length in key_def. Then you wouldn't need to iterate over key parts
here - instead you would simply use the path stored in key_def. You
wouldn't need to add is_multikey and has_multikey_parts flags either.
Instead you could check if the path stored in key_def is not NULL.
This would also allow you to check if multikey parts are compatible
while building a key_def, in key_def_set_part: upon encountering the
first multikey part, you would initialize multikey path in key_def;
then you would check if subsequent multikey paths have the same prefix
(you would need to introduce json_path_ncmp helper for this, I guess).
> + while (part < key_def->parts + key_def->part_count &&
> + !part->is_multikey)
> + part++;
> + assert(part->path != NULL && part->is_multikey);
> + struct tuple_field *field =
> + tuple_format_field_by_path(tuple_format(tuple), part->fieldno,
> + part->path, part->path_len);
> + assert(field != NULL);
This isn't necessarily true. Just like tuple_field_by_path, this
function must fall back on decoding the msgpack in case the tuple
field isn't indexed in the format.
> + struct field_map_ext *field_map_ext =
> + tuple_field_map_ext((uint32_t *)tuple_field_map(tuple),
> + field->offset_slot);
> + return field_map_ext->size;
> +}
> +
> +int
> +memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple,
Nit: memtx_tree_index_ is kinda namespace here so better call it
memtx_tree_index_replace_multikey IMO. The same's fair for other
multikey functions and constants.
> + struct tuple *new_tuple,
> + enum dup_replace_mode mode,
> + struct tuple **result)
> +{
> + struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *key_def = index->tree.arg;
> + if (new_tuple != NULL) {
> + int errcode = 0, tree_res = 0;
> + struct tuple *dup_tuple = NULL;
> + int multikey_idx = 0;
> + int sz = multikey_get_array_sz(new_tuple, key_def);
> + for (; multikey_idx < sz; multikey_idx++) {
> + struct memtx_tree_data new_data;
> + memtx_tree_data_set(&new_data, new_tuple, key_def,
> + multikey_idx);
> + struct memtx_tree_data dup_data;
> + memtx_tree_data_clear(&dup_data);
> + tree_res = memtx_tree_insert(&index->tree, new_data,
> + &dup_data);
> + if (tree_res != 0) {
> + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
> + "memtx_tree_index", "replace");
> + break;
> + }
> + dup_tuple = dup_data.tuple;
> + errcode = replace_check_dup(old_tuple, dup_tuple, mode);
> + if (errcode != 0) {
> + memtx_tree_delete(&index->tree, new_data);
> + if (dup_tuple != NULL) {
> + memtx_tree_insert(&index->tree,
> + dup_data, NULL);
> + }
> + struct space *sp =
> + space_cache_find(base->def->space_id);
> + if (sp != NULL) {
> + diag_set(ClientError, errcode,
> + base->def->name,
> + space_name(sp));
> + }
> + break;
> + }
> + }
> + if (tree_res != 0 || errcode != 0) {
> + multikey_idx--;
> + for (; multikey_idx >= 0; multikey_idx--) {
> + struct memtx_tree_data data;
> + memtx_tree_data_set(&data, new_tuple, key_def,
> + multikey_idx);
> + memtx_tree_delete(&index->tree, data);
> + }
> + return -1;
> + }
> + if (dup_tuple != NULL) {
> + *result = dup_tuple;
> + return 0;
> + }
> + }
> + if (old_tuple != NULL) {
> + int sz = multikey_get_array_sz(old_tuple, key_def);
> + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
> + struct memtx_tree_data old_data;
> + memtx_tree_data_set(&old_data, old_tuple, key_def,
> + multikey_idx);
> + memtx_tree_delete(&index->tree, old_data);
> + }
> + }
> + *result = old_tuple;
> + return 0;
> +}
> +
> static struct iterator *
> memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
> const char *key, uint32_t part_count)
> @@ -718,7 +812,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
> }
> struct memtx_tree_data *elem =
> &index->build_array[index->build_array_size++];
> - memtx_tree_data_set(elem, tuple, key_def);
> + memtx_tree_data_set(elem, tuple, key_def, INVALID_MULTIKEY_INDEX);
> + return 0;
> +}
> +
> +static int
> +memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple)
> +{
> + struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *key_def = index->tree.arg;
> + int sz = multikey_get_array_sz(tuple, key_def);
> +
> + if (index->build_array == NULL) {
> + index->build_array =
> + (struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE);
> + if (index->build_array == NULL) {
> + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
> + "memtx_tree_index", "build_next");
> + return -1;
> + }
> + index->build_array_alloc_size =
> + MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
> + }
> + assert(index->build_array_size <= index->build_array_alloc_size);
> + if (index->build_array_size == index->build_array_alloc_size) {
> + index->build_array_alloc_size = index->build_array_alloc_size +
> + index->build_array_alloc_size / 2;
> + struct memtx_tree_data *tmp =
> + realloc(index->build_array,
> + index->build_array_alloc_size * sizeof(*tmp));
> + if (tmp == NULL) {
> + diag_set(OutOfMemory, index->build_array_alloc_size *
> + sizeof(*tmp), "memtx_tree_index", "build_next");
> + return -1;
> + }
> + index->build_array = tmp;
> + }
This was copied-n-pasted from memtx_tree_index_build_next.
Better introduce a helper function for that.
> + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
> + struct memtx_tree_data *elem =
> + &index->build_array[index->build_array_size++];
> + assert(index->build_array_size < index->build_array_alloc_size);
> + memtx_tree_data_set(elem, tuple, key_def, multikey_idx);
> + }
> return 0;
> }
>
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 7f06d4053..c8a938c4c 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
> }
>
> int
> -tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
> +tuple_go_to_path_multikey(const char **data, const char *path,
> + uint32_t path_len, uint64_t multikey_idx)
> {
> int rc;
> struct json_lexer lexer;
> @@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
> json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
> while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
> switch (token.type) {
> + case JSON_TOKEN_ANY:
> + token.num = (int)multikey_idx;
FALLTHROUGH is missing - this code may not compile on certain platforms.
Also, let's use int instead of uint64_t for multikey_idx. And rather
than adding INVALID_MULTIKEY_INDEX contant, simply check if multikey_idx
is non-negative.
Also, an assertion ensuring that if '*' is encountered mutlikey_idx
is set would be welcome here.
> case JSON_TOKEN_NUM:
> rc = tuple_field_go_to_index(data, token.num);
> break;
> @@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
> }
> return tuple_field_raw_by_path(format, tuple, field_map, fieldno,
> path + lexer.offset,
> - path_len - lexer.offset, NULL);
> + path_len - lexer.offset, NULL,
> + INVALID_MULTIKEY_INDEX);
> }
>
> /* }}} tuple_field_* getters */
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index 8b12fd5a8..e498e1e41 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -45,6 +45,8 @@ struct slab_arena;
> struct quota;
> struct key_part;
>
> +#define INVALID_MULTIKEY_INDEX UINT64_MAX
> +
> /**
> * A format for standalone tuples allocated on runtime arena.
> * \sa tuple_new().
> @@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
>
> /** \endcond public */
>
> +
Nit: extra change.
> /**
> * An atom of Tarantool storage. Represents MsgPack Array.
> * Tuple has the following structure:
> @@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
> * | ^
> * +---------------------------------------data_offset
> *
> - * Each 'off_i' is the offset to the i-th indexed field.
> + * Each 'off_i' is the offset to the i-th indexed field or field
> + * map extention (described with struct tuple_field_map_ext)
> + * offset (in sizeof(field_map[0]) units).
The comment is bad as it doesn't really explain what a field map
extension looks like, nor in what cases is it created. A diagram
would be helpful here.
> */
> struct PACKED tuple
> {
> @@ -328,6 +333,26 @@ struct PACKED tuple
> */
> };
>
> +/**
> + * Tuple field map extent. Is allocated on tuple_field_map_create
> + * call automatically when necessary, when tuple field map must be
> + * reallocated dynamically.
> + */
> +struct field_map_ext {
> + /* Sequence of data offset slots. */
> + uint32_t field_map[1];
This looks weird.
> + /* The count of field_map items. */
> + uint32_t size;
> +};
I don't think that this deserves a separate structure. I'd rather
you access the offset map array directly.
> +
> +static inline struct field_map_ext *
> +tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot)
> +{
> + uint32_t slot_off = field_map[root_offset_slot];
> + return (struct field_map_ext *)((char *)field_map -
> + slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext));
> +}
> +
> /** Size of the tuple including size of struct tuple. */
> static inline size_t
> tuple_size(const struct tuple *tuple)
> @@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple)
> * with NULL.
> * @param path The path to process.
> * @param path_len The length of the @path.
> + * @param multikey_index The multikey index hint - index of item
It's multikey_idx.
> + * for JSON_TOKEN_ANY level.
> * @retval 0 On success.
> * @retval -1 In case of error in JSON path.
> */
> int
> -tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
> +tuple_go_to_path_multikey(const char **data, const char *path,
> + uint32_t path_len, uint64_t multikey_idx);
> +
> +/**
> + * Retrieve msgpack data by JSON path.
> + * @param data[in, out] Pointer to msgpack with data.
> + * If the field cannot be retrieved be the
> + * specified path @path, it is overwritten
> + * with NULL.
> + * @param path The path to process.
> + * @param path_len The length of the @path.
> + * @retval 0 On success.
> + * @retval -1 In case of error in JSON path.
> + */
Copying-and-pasting is bad, even in case of comments. Please instead
of writing a doxygen-style comment, just say a few words about this
function: what it does, what it expects (the path must not have '*',
I assume).
> +static inline int
> +tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
> +{
> + return tuple_go_to_path_multikey(data, path, path_len,
> + INVALID_MULTIKEY_INDEX);
> +}
>
> /**
> * Get tuple field by field index and relative JSON path.
> @@ -533,7 +579,7 @@ static inline const char *
> tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> const uint32_t *field_map, uint32_t fieldno,
> const char *path, uint32_t path_len,
> - int32_t *offset_slot_hint)
> + int32_t *offset_slot_hint, uint64_t multikey_idx)
Update the comment please.
> {
> int32_t offset_slot;
> if (offset_slot_hint != NULL &&
> @@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> if (offset_slot_hint != NULL)
> *offset_slot_hint = offset_slot;
> offset_slot_access:
> - /* Indexed field */
> - if (field_map[offset_slot] == 0)
> - return NULL;
> - tuple += field_map[offset_slot];
> + if (multikey_idx != INVALID_MULTIKEY_INDEX) {
> + struct field_map_ext *field_map_ext =
> + tuple_field_map_ext((uint32_t *)field_map,
> + offset_slot);
> + if (multikey_idx >= field_map_ext->size)
> + return NULL;
> + uint32_t off = field_map_ext->field_map[-multikey_idx];
> + if (off == 0)
> + return NULL;
> + tuple += off;
> + } else {
> + /* Indexed field */
Confused. Both branches are for indexed fields, aren't they?
Anyway, I'd move this whole thing (retrieving offset slot) to a separate
helper function.
> + if (field_map[offset_slot] == 0)
> + return NULL;
> + tuple += field_map[offset_slot];
> + }
> } else {
> uint32_t field_count;
> parse:
> @@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
> uint32_t path_len, uint32_t path_hash);
>
> /**
> - * Get a tuple field pointed to by an index part.
> + * Get a tuple field pointed to by an index part and multikey hint.
> * @param format Tuple format.
> * @param tuple A pointer to MessagePack array.
> * @param field_map A pointer to the LAST element of field map.
> + * @param multikey_idx A multikey hint.
What's a multikey hint? Please mention it in the comment.
> * @param part Index part to use.
> * @retval Field data if the field exists or NULL.
> */
> static inline const char *
> -tuple_field_raw_by_part(struct tuple_format *format, const char *data,
> - const uint32_t *field_map, struct key_part *part)
> +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data,
> + const uint32_t *field_map,
> + struct key_part *part, uint64_t multikey_idx)
> {
> if (unlikely(part->format_epoch != format->epoch)) {
> assert(format->epoch != 0);
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index 5b06e06b3..dbf7bb4f6 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -458,10 +458,12 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b,
> return i;
> }
>
> -template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
> +template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
> + bool is_multikey>
> static inline int
> -tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
> - struct key_def *key_def)
> +tuple_compare_slowpath_multikey(const struct tuple *tuple_a,
The function has suffix _multikey, but it may be defined with
is_multikey = false template parameter. This is confusing.
> + cmp_aux_t tuple_a_cmp_aux, const struct tuple *tuple_b,
> + cmp_aux_t tuple_b_cmp_aux, struct key_def *key_def)
> {
> assert(has_json_paths == key_def->has_json_paths);
> assert(!has_optional_parts || is_nullable);
Please add an assertion for is_multikey.
> @@ -471,7 +473,7 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
> const char *tuple_a_raw = tuple_data(tuple_a);
> const char *tuple_b_raw = tuple_data(tuple_b);
> if (key_def->part_count == 1 && part->fieldno == 0 &&
> - (!has_json_paths || part->path == NULL)) {
> + (!has_json_paths || part->path == NULL) && !is_multikey) {
I don't understand why you need this: is_multikey can't be set if
there's no json path.
> /*
> * First field can not be optional - empty tuples
> * can not exist.
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 4439ce3e0..99b602cd5 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -33,6 +33,7 @@
> #include "json/json.h"
> #include "tuple_format.h"
> #include "coll_id_cache.h"
> +#include "tuple.h"
>
> #include "third_party/PMurHash.h"
>
> @@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
> json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
> while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
> field->token.type != JSON_TOKEN_END) {
> - if (field->token.type == JSON_TOKEN_ANY) {
> - diag_set(ClientError, ER_UNSUPPORTED,
> - "Tarantool", "multikey indexes");
> - goto fail;
> - }
> enum field_type expected_type =
> field->token.type == JSON_TOKEN_STR ?
> FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
> + if (field->token.type == JSON_TOKEN_ANY &&
> + !json_token_is_multikey(&parent->token) &&
> + !json_token_is_leaf(&parent->token)) {
> + assert(expected_type == FIELD_TYPE_ARRAY);
> + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
> + tuple_field_path(parent),
> + field_type_strs[parent->type],
> + "multikey array");
> + goto fail;
> + }
"Field [1].a has type 'array' in one index, but type 'multikey array' in another"
Sounds confusing. IMO better introduce a new error code for this.
BTW, you should also check that there's no multikey parts in Vinyl
indexes. And that there's no two incompatible/unsupported multikey
parts (e.g. a.b[*] and a.c[*], or a.b[*] and a.b[*][*]).
> if (field_type1_contains_type2(parent->type, expected_type)) {
> parent->type = expected_type;
> } else {
> @@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
> break;
> format->min_tuple_size += mp_sizeof_nil();
> }
> - } else {
> + } else if (field->token.type == JSON_TOKEN_STR) {
> /* Account a key string for map member. */
> - assert(field->token.type == JSON_TOKEN_STR);
> format->min_tuple_size +=
> mp_sizeof_str(field->token.len);
Shouldn't we account multikey indexes in min_tuple_size as well?
> }
> @@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
> return true;
> }
>
> +/**
> + * Grow tuple_field_map allocation left with extent_size
> + * 0 bytes.
> + */
> +static int
> +tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size,
> + uint32_t extent_size, struct region *region)
> +{
> + assert(extent_size % sizeof(uint32_t) == 0);
> + uint32_t new_field_map_size = *field_map_size + extent_size;
> + uint32_t *new_field_map = region_alloc(region, new_field_map_size);
> + if (new_field_map == NULL) {
> + diag_set(OutOfMemory, new_field_map_size, "region_alloc",
> + "new_field_map");
> + return -1;
> + }
> + memset(new_field_map, 0, extent_size);
> + if (*field_map != NULL) {
> + memcpy((char *)new_field_map + extent_size,
> + (char *)*field_map - *field_map_size, *field_map_size);
> + }
> + *field_map = (uint32_t *)((char *)new_field_map + new_field_map_size);
> + *field_map_size = new_field_map_size;
> + return 0;
> +}
> +
> +/**
> + * Retrieve tuple field map extent by root_offset_slot, reallocate
> + * memory if required.
> + */
> +struct field_map_ext *
> +tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size,
> + int32_t root_offset_slot, uint32_t extent_items,
> + struct region *region)
> +{
> + uint32_t extent_sz = sizeof(struct field_map_ext) +
> + extent_items * sizeof(uint32_t);
> + uint32_t slot_off = (*field_map)[root_offset_slot];
> + if (slot_off == 0) {
> + /* Field map extent was not created yet. */
> + slot_off = *field_map_size / sizeof(uint32_t);
> + (*field_map)[root_offset_slot] = slot_off;
> + if (tuple_field_map_realloc(field_map, field_map_size,
> + extent_sz, region) != 0)
> + return NULL;
> + }
I don't like that potentially we have to reallocate the field map
several times if multikey indexes are present. What about introducing
a temporary field map storage which would operate with raw pointers
rather than offsets? We would fill this temporary object rather than
a real field map in tuple_field_map_create, without any relocations.
Once we were done, we would call a special helper method that would
turn this object to a real field map.
> + struct field_map_ext *field_map_ext =
> + tuple_field_map_ext(*field_map, root_offset_slot);
> + assert(field_map_ext->size == 0 || field_map_ext->size == extent_items);
> + field_map_ext->size = extent_items;
> + return field_map_ext;
> +}
> +
> /** @sa declaration for details. */
> int
> tuple_field_map_create(struct tuple_format *format, const char *tuple,
> @@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> return 0; /* Nothing to initialize */
> }
> struct region *region = &fiber()->gc;
> - *field_map_size = format->field_map_size;
> - *field_map = region_alloc(region, *field_map_size);
> - if (*field_map == NULL) {
> - diag_set(OutOfMemory, *field_map_size, "region_alloc",
> - "field_map");
> + *field_map = NULL;
> + *field_map_size = 0;
> + if (tuple_field_map_realloc(field_map, field_map_size,
> + format->field_map_size, region) != 0)
> return -1;
> - }
> - *field_map = (uint32_t *)((char *)*field_map + *field_map_size);
>
> const char *pos = tuple;
> int rc = 0;
> @@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> uint32_t defined_field_count = MIN(field_count, validate ?
> tuple_format_field_count(format) :
> format->index_field_count);
> - /*
> - * Nullify field map to be able to detect by 0,
> - * which key fields are absent in tuple_field().
> - */
> - memset((char *)*field_map - *field_map_size, 0, *field_map_size);
> /*
> * Prepare mp stack of the size equal to the maximum depth
> * of the indexed field in the format::fields tree
> @@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> struct mp_stack stack;
> mp_stack_create(&stack, format->fields_depth, frames);
> mp_stack_push(&stack, MP_ARRAY, defined_field_count);
> + /**
> + * Pointer to the stack entry that represents the multikey
> + * index root ARRAY entry.
> + */
> + struct mp_frame *mk_parent_frame = NULL;
> +
> struct tuple_field *field;
> struct json_token *parent = &format->fields.root;
> while (true) {
> @@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> mp_stack_pop(&stack);
> if (mp_stack_is_empty(&stack))
> goto finish;
> + if (json_token_is_multikey(parent)) {
> + /* Leave multikey index branch. */
> + mk_parent_frame = NULL;
> + }
> parent = parent->parent;
> }
> /*
> @@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> field_type_strs[field->type]);
> goto error;
> }
> - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
> + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
> + mk_parent_frame == NULL) {
> (*field_map)[field->offset_slot] = pos - tuple;
> + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
> + mk_parent_frame != NULL) {
> + uint32_t extent_items = mk_parent_frame->count;
> + struct field_map_ext *field_map_ext =
> + tuple_field_map_ext_retrieve(field_map,
> + field_map_size,
> + field->offset_slot,
> + extent_items, region);
> + if (field_map_ext == NULL)
> + goto error;
> + int multikey_idx = mk_parent_frame->curr;
> + field_map_ext->field_map[
> + -multikey_idx] = pos - tuple;
> + }
This function has grown too big for easy reading. It definitely needs to
be split.
> if (required_fields != NULL)
> bit_clear(required_fields, field->id);
> }
> @@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
> mp_decode_array(&pos) :
> mp_decode_map(&pos);
> mp_stack_push(&stack, type, size);
> + if (json_token_is_multikey(&field->token)) {
> + /* Track multikey entry frame. */
> + assert(type == MP_ARRAY);
> + mk_parent_frame = &frames[stack.used - 1];
> + }
> parent = &field->token;
> } else {
> mp_next(&pos);
> diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
> index f9a7180b1..dc6016af9 100644
> --- a/test/engine/json.test.lua
> +++ b/test/engine/json.test.lua
> @@ -198,4 +198,24 @@ s:drop()
> --
> s = box.schema.space.create('withdata')
> idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
> +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
> +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}})
> +idx:select({'James', 'Bond'})
> +idx:select({'Kirill', 'Shcherbatov'})
> +idx:select({'Ivan', 'Ivanych'})
> +idx:select({'Vasya', 'Pupkin'})
> +idx:select()
> +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
> +s:insert({2, 1, {{fname='James', sname='Bond'}}})
> +idx:select()
> +idx:delete({'Vasya', 'Pupkin'})
> +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
> +s:insert({2, 1, {{fname='James', sname='Bond'}}})
> +idx:select()
> +s:drop()
> +
> +s = box.schema.space.create('withdata')
> +pk = s:create_index('pk')
> +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
> +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
> s:drop()
Please write many more tests to cover all the following cases:
- snapshot
- recovery
- invalid/incompatible paths
- unique/non-unique indexes
- duplicates in multikey parts
I guess, it's worth moving this to a separate test file.
More information about the Tarantool-patches
mailing list