From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v3 7/7] box: introduce multikey indexes in memtx Date: Tue, 2 Apr 2019 18:49:38 +0300 Message-Id: <52eb9dab6c999445ad376c52bb3c5bf716130ff8.1554218695.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: - In the case of multikey index arises an ambiguity: which key should be used in the comparison. The previously introduced comparison hints act as an non-negative numeric index of key to use, - Memtx B+ tree replace and build_next methods have been patched to insert the same tuple multiple times by different logical indexes of the key in the array, - Map fields have been expanded service areas "extent" that contain an offset of multikey index keys by additional logical index. Part of #1257 @TarantoolBot document Title: introduce multikey indexes in memtx Any JSON index in which at least one partition contains "[*]" - array index placeholder sign is called "Multikey". Such indexes allows you to automatically index set of documents having same document structure. Multikey indexes design have a number of restrictions that must be taken into account: - it cannot be primary because of the ambiguity arising from it's definition (primary index requires the one unique key that identify tuple), - if some node in the JSON tree of all defined indexes contains an array index placeholder [*], no other JSON path can use an explicit JSON index on it's nested field. - it support "unique" semantics, but it's uniqueness a little different from conventional indexes: you may insert a tuple in which the same key occurs multiple times in a unique multikey index, but you cannot insert a tuple when any of its keys is in some other tuple stored in space, - the unique multikey index "duplicate" conflict occurs when the sets of extracted keys have a non-empty logical intersection - to identify the different keys by which a given data tuple is indexed, each key is assigned a logical sequence number in the array defined with array index placeholder [*] in index (such array is called multikey index root), - no index partition can contain more than one array index placeholder sign [*] in it's JSON path, - all parts containing JSON paths with array index placeholder [*] must have the same (in terms of json tokens) prefix before this placeholder sign. Example: s = box.schema.space.create('withdata') pk = s:create_index('pk') parts = { {2, 'str', path = 'data[*].name'}, {2, 'str', path = 'data[*].extra.phone'} } idx = s:create_index('idx', {parts = parts}) s:insert({1, {data = {{name="A", extra={phone="111"}}, {name="B", extra={phone="111"}}}, garbage = 1}} idx:get({'A', '111'}) --- src/box/errcode.h | 1 + src/box/field_map.c | 68 +++++++- src/box/field_map.h | 137 ++++++++++++++-- src/box/index_def.c | 5 + src/box/key_def.c | 112 +++++++++---- src/box/key_def.h | 34 ++++ src/box/memtx_space.c | 18 ++ src/box/memtx_tree.c | 180 +++++++++++++++++--- src/box/sql.c | 3 +- src/box/tuple.c | 28 +++- src/box/tuple.h | 81 +++++++-- src/box/tuple_compare.cc | 150 ++++++++++++++--- src/box/tuple_extract_key.cc | 41 ++++- src/box/tuple_format.c | 117 +++++++++++-- src/box/tuple_format.h | 32 ++++ src/box/vinyl.c | 5 + test/box/misc.result | 1 + test/engine/json.result | 13 -- test/engine/json.test.lua | 7 - test/engine/multikey.result | 298 ++++++++++++++++++++++++++++++++++ test/engine/multikey.test.lua | 88 ++++++++++ 21 files changed, 1262 insertions(+), 157 deletions(-) create mode 100644 test/engine/multikey.result create mode 100644 test/engine/multikey.test.lua diff --git a/src/box/errcode.h b/src/box/errcode.h index 3f8cb8e0e..ea992a6b2 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -246,6 +246,7 @@ struct errcode_record { /*191 */_(ER_SQL_PARSER_LIMIT, "%s %d exceeds the limit (%d)") \ /*192 */_(ER_INDEX_DEF_UNSUPPORTED, "%s are prohibited in an index definition") \ /*193 */_(ER_CK_DEF_UNSUPPORTED, "%s are prohibited in a CHECK constraint definition") \ + /*194 */_(ER_INDEX_MULTIKEY_INVALID, "Multikey index is invalid: %s") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/src/box/field_map.c b/src/box/field_map.c index 690aa461d..ae6c5be5f 100644 --- a/src/box/field_map.c +++ b/src/box/field_map.c @@ -37,6 +37,8 @@ field_map_builder_create(struct field_map_builder *builder, uint32_t minimal_field_map_size, struct region *region) { + builder->region = region; + builder->extents_size = 0; builder->slot_count = minimal_field_map_size / sizeof(uint32_t); if (minimal_field_map_size == 0) { builder->slots = NULL; @@ -53,9 +55,71 @@ field_map_builder_create(struct field_map_builder *builder, return 0; } +/** + * Get size of extention (in bytes) by count of items it + * must contain. + */ +static uint32_t +field_map_ext_size(uint32_t items) +{ + return sizeof(struct field_map_ext) + items * sizeof(uint32_t); +} + +struct field_map_ext * +field_map_builder_ext_get(struct field_map_builder *builder, + int32_t offset_slot, uint32_t extent_items) +{ + struct field_map_ext *extent; + if (builder->slots[offset_slot].has_extent) { + extent = builder->slots[offset_slot].extent; + assert(extent != NULL); + assert(extent->items == extent_items); + return extent; + } + uint32_t sz = field_map_ext_size(extent_items); + extent = region_alloc(builder->region, sz); + if (extent == NULL) { + diag_set(OutOfMemory, sz, "region", "extent"); + return NULL; + } + memset(extent, 0, sz); + extent->items = extent_items; + builder->slots[offset_slot].extent = extent; + builder->slots[offset_slot].has_extent = true; + builder->extents_size += sz; + return extent; +} + void field_map_build(struct field_map_builder *builder, char *buffer) { - uint32_t field_map_size = field_map_build_size(builder); - memcpy(buffer, (char *)builder->slots - field_map_size, field_map_size); + /* + * To initialize the field map and its extents, prepare + * the following memory layout with pointers: + * + * offset + * +---------------------+ + * | | + * [extentK]..[extent1][[slotN]..[slot2][slot1]] + * | |extent_wptr |field_map + * <- <- <- + * + * The buffer size is assumed to be sufficient to write + * field_map_build_size(builder) bytes there. + */ + uint32_t *field_map = + (uint32_t *)(buffer + field_map_build_size(builder)); + char *extent_wptr = buffer + builder->extents_size; + for (int32_t i = -1; i >= -(int32_t)builder->slot_count; i--) { + if (!builder->slots[i].has_extent) { + field_map[i] = builder->slots[i].offset; + continue; + } + struct field_map_ext *extent = builder->slots[i].extent; + /** Retrive memory for the extent. */ + uint32_t sz = field_map_ext_size(extent->items); + extent_wptr -= sz; + field_map[i] = (char *)field_map - (char *)extent_wptr; + memcpy(extent_wptr, extent, sz); + } } diff --git a/src/box/field_map.h b/src/box/field_map.h index 47ecbd009..af578ec6b 100644 --- a/src/box/field_map.h +++ b/src/box/field_map.h @@ -34,6 +34,7 @@ #include struct region; +struct field_map_builder_slot; /** * A field map is a special area is reserved before tuple's @@ -46,13 +47,15 @@ struct region; * offset_slot(s) is performed on tuple_format creation on index * create or alter (see tuple_format_create()). * - * 4b 4b MessagePack data. - * +------+----+------+---------------------------+ - * tuple: | offN | .. | off1 | header ..|key1|..|keyN|.. | - * +--+---+----+--+---+---------------------------+ - * | ... | ^ ^ - * | +-----------------+ | - * +--------------------------------------+ + * 4b 4b 4b 4b MessagePack data. + * +-----------+------+----+------+------------------------+ + *tuple: |cnt|off1|..| offN | .. | off1 | header ..|key1|..|keyN|| + * +-----+-----+--+---+----+--+---+------------------------+ + * ext1 ^ | | ... | ^ ^ + * +-----|--------+ | | | + * indirection | +-----------------+ | + * +----------------------------------------------+ + * (offset_slot = N, extent_slot = 1) --> offset * * This field_map_builder class is used for tuple field_map * construction. It encapsulates field_map build logic and size @@ -60,19 +63,88 @@ struct region; * * Each field offset is a positive number, except the case when * a field is not in the tuple. In this case offset is 0. + * + * Some slots may store an offset of the field_map_ext structure, + * which contains an additional sequence of offsets of size + * defined above(see field_map_ext layout). The caller needs to + * be aware of when the slot is an offset of the data and when + * it is the offset of the field_map_ext. + * + * Now these extents are used to organize a multikey index. + * The count of keys in the multikey index imposes the count of + * items in extent while the i-th extent's slot contains the + * offset of the i-th key field. */ struct field_map_builder { /** - * The pointer to the end of filed_map allocation. + * The pointer to the end of field_map_builder_slot(s) + * allocation. * Its elements are accessible by negative indexes * that coinciding with offset_slot(s). */ - uint32_t *slots; + struct field_map_builder_slot *slots; /** * The count of slots in field_map_builder::slots * allocation. */ uint32_t slot_count; + /** + * Total size of memory is allocated for field_map + * extentions. + */ + uint32_t extents_size; + /** + * Region to use to perform memory allocations. + */ + struct region *region; +}; + +/** + * Internal stucture representing field_map extent. + * (see field_map_builder description). + */ +struct field_map_ext { + /** Count of field_map_ext::offset[] elements. */ + uint32_t items; + /** Data offset in tuple array. */ + uint32_t offset[0]; +}; + +/** + * Internal function to get or allocate field map extent + * by offset_slot, and count of items. + */ +struct field_map_ext * +field_map_builder_ext_get(struct field_map_builder *builder, + int32_t offset_slot, uint32_t extent_items); + +/** + * Instead of using uint32_t offset slots directly the + * field_map_builder uses this structure as a storage atom. + * When there is a need to initialize an extent, the + * field_map_builder allocates a new memory chunk and sets + * field_map_builder_slot::pointer (instead of real field_map + * reallocation). + * + * On field_map_build, all of the extents are dumped to the same + * memory chunk that the regular field_map slots and corresponding + * slots represent relative field_map_ext offset instead of + * field data offset. + * + * The allocated memory is accounted for in extents_size. + */ +struct field_map_builder_slot { + /** + * True when this slot must be interpret as + * extention pointer. + */ + bool has_extent; + union { + /** Data offset in tuple. */ + uint32_t offset; + /** Pointer to field_map_ext extention. */ + struct field_map_ext *extent; + }; }; /** @@ -82,9 +154,22 @@ struct field_map_builder { * When a field is not in the data tuple, its offset is 0. */ static inline uint32_t -field_map_get_offset(const uint32_t *field_map, int32_t offset_slot) +field_map_get_offset(const uint32_t *field_map, int32_t offset_slot, + int multikey_idx) { - return field_map[offset_slot]; + uint32_t offset; + if (multikey_idx >= 0) { + assert(field_map[offset_slot] != 0); + struct field_map_ext *extent = + (struct field_map_ext *)((char *)field_map - + field_map[offset_slot]); + if ((uint32_t)multikey_idx >= extent->items) + return 0; + offset = extent->offset[multikey_idx]; + } else { + offset = field_map[offset_slot]; + } + return offset; } /** @@ -116,7 +201,32 @@ field_map_builder_set_slot(struct field_map_builder *builder, { assert(offset_slot < 0); assert(offset > 0); - builder->slots[offset_slot] = offset; + builder->slots[offset_slot].offset = offset; +} + +/** + * Set data offset in field map extent (by given offset_slot, + * extent_slot and extent_items) for a field identified by + * unique offset_slot. + * + * The offset_slot argument must be negative and offset must be + * positive (by definition). + */ +static inline int +field_map_builder_set_extent_slot(struct field_map_builder *builder, + int32_t offset_slot, int32_t extent_slot, + uint32_t extent_items, uint32_t offset) +{ + assert(offset_slot < 0); + assert(offset > 0); + assert(extent_slot >= 0 && extent_items > 0); + struct field_map_ext *extent = + field_map_builder_ext_get(builder, offset_slot, extent_items); + if (extent == NULL) + return -1; + assert(extent->items == extent_items); + extent->offset[extent_slot] = offset; + return 0; } /** @@ -125,7 +235,8 @@ field_map_builder_set_slot(struct field_map_builder *builder, static inline uint32_t field_map_build_size(struct field_map_builder *builder) { - return builder->slot_count * sizeof(uint32_t); + return builder->slot_count * sizeof(uint32_t) + + builder->extents_size; } /** diff --git a/src/box/index_def.c b/src/box/index_def.c index c743d12ce..8c9a99551 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -269,6 +269,11 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) space_name, "too many key parts"); return false; } + if (index_def->iid == 0 && key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, + space_name, "primary key cannot be multikey"); + return false; + } for (uint32_t i = 0; i < index_def->key_def->part_count; i++) { assert(index_def->key_def->parts[i].type < field_type_MAX); if (index_def->key_def->parts[i].fieldno > BOX_INDEX_FIELD_MAX) { diff --git a/src/box/key_def.c b/src/box/key_def.c index 55dcf1eb5..4d36560e0 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -138,7 +138,57 @@ key_def_set_func(struct key_def *def) key_def_set_extract_func(def); } -static void +static int +key_def_set_part_path(struct key_def *def, uint32_t part_no, const char *path, + uint32_t path_len, char **path_pool) +{ + struct key_part *part = &def->parts[part_no]; + if (path == NULL) { + part->path = NULL; + part->path_len = 0; + return 0; + } + assert(path_pool != NULL); + part->path = *path_pool; + *path_pool += path_len; + memcpy(part->path, path, path_len); + part->path_len = path_len; + + /* + * Test whether this key_part has array index + * placeholder [*] (i.e. is a part of multikey index + * definition). + */ + int multikey_path_len = + json_path_multikey_offset(path, path_len, TUPLE_INDEX_BASE); + if (multikey_path_len < 0) + return 0; + + /* + * In case of multikey index key_parts must have the + * same JSON prefix. + */ + if (!key_def_is_multikey(def)) { + /* + * Keep the index of the first multikey key_part + * and the length of JSON path substring to the + * array index placeholder sign [*]. + */ + def->multikey_path = part->path; + def->multikey_fieldno = part->fieldno; + def->multikey_path_len = (uint32_t)multikey_path_len; + } else if (json_path_cmp(path, multikey_path_len, def->multikey_path, + def->multikey_path_len, + TUPLE_INDEX_BASE) != 0) { + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part_no + TUPLE_INDEX_BASE, + "incompatable multikey index path"); + return -1; + } + return 0; +} + +static int key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, enum on_conflict_action nullable_action, struct coll *coll, uint32_t coll_id, @@ -158,17 +208,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, def->parts[part_no].sort_order = sort_order; def->parts[part_no].offset_slot_cache = offset_slot; def->parts[part_no].format_epoch = format_epoch; - if (path != NULL) { - assert(path_pool != NULL); - def->parts[part_no].path = *path_pool; - *path_pool += path_len; - memcpy(def->parts[part_no].path, path, path_len); - def->parts[part_no].path_len = path_len; - } else { - def->parts[part_no].path = NULL; - def->parts[part_no].path_len = 0; - } column_mask_set_fieldno(&def->column_mask, fieldno); + return key_def_set_part_path(def, part_no, path, path_len, path_pool); } struct key_def * @@ -203,10 +244,14 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) coll = coll_id->coll; } uint32_t path_len = part->path != NULL ? strlen(part->path) : 0; - key_def_set_part(def, i, part->fieldno, part->type, - part->nullable_action, coll, part->coll_id, - part->sort_order, part->path, path_len, - &path_pool, TUPLE_OFFSET_SLOT_NIL, 0); + if (key_def_set_part(def, i, part->fieldno, part->type, + part->nullable_action, coll, part->coll_id, + part->sort_order, part->path, path_len, + &path_pool, TUPLE_OFFSET_SLOT_NIL, + 0) != 0) { + key_def_delete(def); + return NULL; + } } assert(path_pool == (char *)def + sz); key_def_set_func(def); @@ -256,11 +301,12 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) key_def->unique_part_count = part_count; for (uint32_t item = 0; item < part_count; ++item) { - key_def_set_part(key_def, item, fields[item], - (enum field_type)types[item], - ON_CONFLICT_ACTION_DEFAULT, - NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0, - NULL, TUPLE_OFFSET_SLOT_NIL, 0); + if (key_def_set_part(key_def, item, fields[item], + (enum field_type)types[item], + ON_CONFLICT_ACTION_DEFAULT, NULL, + COLL_NONE, SORT_ORDER_ASC, NULL, 0, NULL, + TUPLE_OFFSET_SLOT_NIL, 0) != 0) + unreachable(); } key_def_set_func(key_def); return key_def; @@ -685,11 +731,13 @@ key_def_merge(const struct key_def *first, const struct key_def *second) part = first->parts; end = part + first->part_count; for (; part != end; part++) { - key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->nullable_action, part->coll, - part->coll_id, part->sort_order, part->path, - part->path_len, &path_pool, - part->offset_slot_cache, part->format_epoch); + if (key_def_set_part(new_def, pos++, part->fieldno, part->type, + part->nullable_action, part->coll, + part->coll_id, part->sort_order, + part->path, part->path_len, &path_pool, + part->offset_slot_cache, + part->format_epoch) != 0) + unreachable(); } /* Set-append second key def's part to the new key def. */ @@ -698,11 +746,15 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { if (!key_def_can_merge(first, part)) continue; - key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->nullable_action, part->coll, - part->coll_id, part->sort_order, part->path, - part->path_len, &path_pool, - part->offset_slot_cache, part->format_epoch); + if (key_def_set_part(new_def, pos++, part->fieldno, part->type, + part->nullable_action, part->coll, + part->coll_id, part->sort_order, + part->path, part->path_len, &path_pool, + part->offset_slot_cache, + part->format_epoch) != 0) { + key_def_delete(new_def); + return NULL; + } } assert(path_pool == (char *)new_def + sz); key_def_set_func(new_def); diff --git a/src/box/key_def.h b/src/box/key_def.h index 288cf7270..3adf20453 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -234,6 +234,34 @@ struct key_def { bool has_optional_parts; /** Key fields mask. @sa column_mask.h for details. */ uint64_t column_mask; + /** + * In case of the multikey index, a pointer to the + * JSON path string, the path to the root node of + * multikey index that contains the array having + * index placeholder sign [*]. + * + * This pointer duplicates the JSON path of some key_part. + * This path is not 0-terminated. Moreover, it is only + * JSON path subpath so key_def::multikey_path_len must be + * directly used in all cases. + * + * This field is not NULL iff this is multikey index + * key definition. + */ + const char *multikey_path; + /** + * The length of the key_def::multikey_path. + * Valid when key_def_is_multikey(key_def) is true, + * undefined otherwise. + */ + uint32_t multikey_path_len; + /** + * The index of the root field of the multikey JSON + * path index key_def::multikey_path. + * Valid when key_def_is_multikey(key_def) is true, + * undefined otherwise. + */ + uint32_t multikey_fieldno; /** The size of the 'parts' array. */ uint32_t part_count; /** Description of parts of a multipart index. */ @@ -472,6 +500,12 @@ key_def_is_sequential(const struct key_def *key_def) return true; } +static inline bool +key_def_is_multikey(const struct key_def *key_def) +{ + return key_def->multikey_path != NULL; +} + /** * Return true if @a key_def defines has fields that requires * special collation comparison. diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index d8529fe0e..ca5f7426d 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -638,6 +638,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "HASH index must be unique"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "HASH index cannot be multikey"); + return -1; + } break; case TREE: /* TREE index has no limitations. */ @@ -661,6 +667,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "RTREE index field type must be ARRAY"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "RTREE index cannot be multikey"); + return -1; + } /* no furter checks of parts needed */ return 0; case BITSET: @@ -683,6 +695,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "BITSET index field type must be NUM or STR"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "BITSET index cannot be multikey"); + return -1; + } /* no furter checks of parts needed */ return 0; default: diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index fe037c54a..fa8a04664 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -584,6 +584,77 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, return 0; } +static int +memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple, + struct tuple *new_tuple, + enum dup_replace_mode mode, + struct tuple **result) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + if (new_tuple != NULL) { + int errcode = 0, tree_res = 0; + struct memtx_tree_data new_data, dup_data; + new_data.tuple = new_tuple; + int multikey_idx = 0; + uint32_t items = tuple_field_multikey_items(new_tuple, cmp_def); + for (; (uint32_t)multikey_idx < items; multikey_idx++) { + new_data.hint = multikey_idx; + dup_data.tuple = NULL; + tree_res = memtx_tree_insert(&index->tree, new_data, + &dup_data); + if (tree_res != 0) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "replace"); + break; + } + errcode = new_tuple == dup_data.tuple ? 0 : + replace_check_dup(old_tuple, dup_data.tuple, + mode); + if (errcode != 0) { + memtx_tree_delete(&index->tree, new_data); + if (dup_data.tuple != NULL) { + /* Rollback replace. */ + memtx_tree_insert(&index->tree, + dup_data, NULL); + } + struct space *sp = + space_cache_find(base->def->space_id); + if (sp != NULL) { + diag_set(ClientError, errcode, + base->def->name, + space_name(sp)); + } + break; + } + } + if (tree_res != 0 || errcode != 0) { + multikey_idx--; + for (; multikey_idx >= 0; multikey_idx--) { + new_data.hint = multikey_idx; + memtx_tree_delete(&index->tree, new_data); + } + return -1; + } + if (dup_data.tuple != NULL) { + *result = dup_data.tuple; + return 0; + } + } + if (old_tuple != NULL) { + struct memtx_tree_data old_data; + old_data.tuple = old_tuple; + uint32_t items = tuple_field_multikey_items(old_tuple, cmp_def); + for (uint32_t multikey_idx = 0; multikey_idx < items; + multikey_idx++) { + old_data.hint = multikey_idx; + memtx_tree_delete(&index->tree, old_data); + } + } + *result = old_tuple; + return 0; +} + static struct iterator * memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, const char *key, uint32_t part_count) @@ -656,34 +727,42 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint) } static int -memtx_tree_index_build_next(struct index *base, struct tuple *tuple) +memtx_tree_index_build_array_realloc(struct memtx_tree_index *index, + uint32_t items) { - struct memtx_tree_index *index = (struct memtx_tree_index *)base; - struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + bool need_realloc = false; if (index->build_array == NULL) { - index->build_array = malloc(MEMTX_EXTENT_SIZE); - if (index->build_array == NULL) { - diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, - "memtx_tree_index", "build_next"); - return -1; - } - index->build_array_alloc_size = - MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); + index->build_array_alloc_size = MEMTX_EXTENT_SIZE / + sizeof(index->build_array[0]); + need_realloc = true; } - assert(index->build_array_size <= index->build_array_alloc_size); - if (index->build_array_size == index->build_array_alloc_size) { - index->build_array_alloc_size = index->build_array_alloc_size + - index->build_array_alloc_size / 2; - struct memtx_tree_data *tmp = - realloc(index->build_array, - index->build_array_alloc_size * sizeof(*tmp)); - if (tmp == NULL) { - diag_set(OutOfMemory, index->build_array_alloc_size * - sizeof(*tmp), "memtx_tree_index", "build_next"); - return -1; - } - index->build_array = tmp; + uint32_t build_array_new_size = index->build_array_size + items; + if (build_array_new_size > index->build_array_alloc_size) { + index->build_array_alloc_size += + MAX(index->build_array_alloc_size / 2, + build_array_new_size - index->build_array_alloc_size); + need_realloc = true; + } + if (!need_realloc) + return 0; + struct memtx_tree_data *tmp = realloc(index->build_array, + index->build_array_alloc_size * sizeof(*tmp)); + if (tmp == NULL) { + diag_set(OutOfMemory, index->build_array_alloc_size * + sizeof(*tmp), "memtx_tree_index", "build_next"); + return -1; } + index->build_array = tmp; + return 0; +} + +static int +memtx_tree_index_build_next(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + if (memtx_tree_index_build_array_realloc(index, 1) != 0) + return -1; struct memtx_tree_data *elem = &index->build_array[index->build_array_size++]; elem->tuple = tuple; @@ -691,6 +770,24 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) return 0; } +static int +memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + uint32_t items = tuple_field_multikey_items(tuple, cmp_def); + if (memtx_tree_index_build_array_realloc(index, items) != 0) + return -1; + for (uint32_t multikey_idx = 0; multikey_idx < items; multikey_idx++) { + struct memtx_tree_data *elem = + &index->build_array[index->build_array_size++]; + assert(index->build_array_size <= index->build_array_alloc_size); + elem->tuple = tuple; + elem->hint = multikey_idx; + } + return 0; +} + static void memtx_tree_index_end_build(struct index *base) { @@ -793,6 +890,36 @@ static const struct index_vtab memtx_tree_index_vtab = { /* .end_build = */ memtx_tree_index_end_build, }; +static const struct index_vtab memtx_tree_index_multikey_vtab = { + /* .destroy = */ memtx_tree_index_destroy, + /* .commit_create = */ generic_index_commit_create, + /* .abort_create = */ generic_index_abort_create, + /* .commit_modify = */ generic_index_commit_modify, + /* .commit_drop = */ generic_index_commit_drop, + /* .update_def = */ memtx_tree_index_update_def, + /* .depends_on_pk = */ memtx_tree_index_depends_on_pk, + /* .def_change_requires_rebuild = */ + memtx_index_def_change_requires_rebuild, + /* .size = */ memtx_tree_index_size, + /* .bsize = */ memtx_tree_index_bsize, + /* .min = */ generic_index_min, + /* .max = */ generic_index_max, + /* .random = */ memtx_tree_index_random, + /* .count = */ memtx_tree_index_count, + /* .get = */ memtx_tree_index_get, + /* .replace = */ memtx_tree_index_replace_multikey, + /* .create_iterator = */ memtx_tree_index_create_iterator, + /* .create_snapshot_iterator = */ + memtx_tree_index_create_snapshot_iterator, + /* .stat = */ generic_index_stat, + /* .compact = */ generic_index_compact, + /* .reset_stat = */ generic_index_reset_stat, + /* .begin_build = */ memtx_tree_index_begin_build, + /* .reserve = */ memtx_tree_index_reserve, + /* .build_next = */ memtx_tree_index_build_next_multikey, + /* .end_build = */ memtx_tree_index_end_build, +}; + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { @@ -803,8 +930,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) "malloc", "struct memtx_tree_index"); return NULL; } + const struct index_vtab *vtab = key_def_is_multikey(def->key_def) ? + &memtx_tree_index_multikey_vtab : + &memtx_tree_index_vtab; if (index_create(&index->base, (struct engine *)memtx, - &memtx_tree_index_vtab, def) != 0) { + vtab, def) != 0) { free(index); return NULL; } diff --git a/src/box/sql.c b/src/box/sql.c index 7d5c3a8e0..be2212ef1 100644 --- a/src/box/sql.c +++ b/src/box/sql.c @@ -788,7 +788,8 @@ tarantoolsqlIdxKeyCompare(struct BtCursor *cursor, } else { uint32_t field_offset = field_map_get_offset(field_map, - field->offset_slot); + field->offset_slot, + -1); p = base + field_offset; } } diff --git a/src/box/tuple.c b/src/box/tuple.c index 570e4c192..8c2cd7611 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -130,7 +130,7 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple) int rc = tuple_field_map_create(&builder, format, tuple, true); region_truncate(region, region_svp); assert(rc != 0 || - field_map_build_size(&builder) == format->field_map_size); + field_map_build_size(&builder) >= format->field_map_size); return rc; } @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len) } int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +tuple_go_to_path(const char **data, const char *path, uint32_t path_len, + int multikey_idx) { int rc; struct json_lexer lexer; @@ -463,6 +464,10 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len) json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { switch (token.type) { + case JSON_TOKEN_ANY: + assert(multikey_idx >= 0); + token.num = multikey_idx; + FALLTHROUGH; case JSON_TOKEN_NUM: rc = tuple_field_go_to_index(data, token.num); break; @@ -532,7 +537,24 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, } return tuple_field_raw_by_path(format, tuple, field_map, fieldno, path + lexer.offset, - path_len - lexer.offset, NULL); + path_len - lexer.offset, NULL, -1); +} + +uint32_t +tuple_field_raw_multikey_items(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_def *key_def) +{ + assert(key_def_is_multikey(key_def)); + const char *array_raw = + tuple_field_raw_by_path(format, data, field_map, + key_def->multikey_fieldno, + key_def->multikey_path, + key_def->multikey_path_len, NULL, -1); + if (array_raw == NULL) + return 0; + assert(mp_typeof(*array_raw) == MP_ARRAY); + return mp_decode_array(&array_raw); } /* }}} tuple_field_* getters */ diff --git a/src/box/tuple.h b/src/box/tuple.h index da4085bcf..99753c8d5 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -508,14 +508,19 @@ tuple_field_count(const struct tuple *tuple) * with NULL. * @param path The path to process. * @param path_len The length of the @path. + * @param multikey_idx The multikey index hint - index of + * document to retrieve when array index + * placeholder "[*]" is met. * @retval 0 On success. * @retval -1 In case of error in JSON path. */ int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len); +tuple_go_to_path(const char **data, const char *path, uint32_t path_len, + int multikey_idx); /** - * Get tuple field by field index and relative JSON path. + * Get tuple field by field index, relative JSON path and + * multikey_idx. * @param format Tuple format. * @param tuple MessagePack tuple's body. * @param field_map Tuple field map. @@ -528,12 +533,15 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len); * access data in a single operation. * Else it is initialized with offset_slot * of format field by path. + * @param multikey_idx The multikey index hint - index of + * document to retrieve when array index + * placeholder "[*]" is met. */ static inline const char * tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t fieldno, const char *path, uint32_t path_len, - int32_t *offset_slot_hint) + int32_t *offset_slot_hint, int multikey_idx) { int32_t offset_slot; if (offset_slot_hint != NULL && @@ -560,7 +568,8 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, *offset_slot_hint = offset_slot; offset_slot_access: /* Indexed field */ - offset = field_map_get_offset(field_map, offset_slot); + offset = field_map_get_offset(field_map, offset_slot, + multikey_idx); if (offset == 0) return NULL; tuple += offset; @@ -574,8 +583,8 @@ parse: for (uint32_t k = 0; k < fieldno; k++) mp_next(&tuple); if (path != NULL && - unlikely(tuple_go_to_path(&tuple, path, - path_len) != 0)) + unlikely(tuple_go_to_path(&tuple, path, path_len, + multikey_idx) != 0)) return NULL; } return tuple; @@ -597,7 +606,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t field_no) { return tuple_field_raw_by_path(format, tuple, field_map, field_no, - NULL, 0, NULL); + NULL, 0, NULL, -1); } /** @@ -636,16 +645,19 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash); /** - * Get a tuple field pointed to by an index part. + * Get a tuple field pointed to by an index part and multikey + * index hint. * @param format Tuple format. - * @param tuple A pointer to MessagePack array. + * @param data A pointer to MessagePack array. * @param field_map A pointer to the LAST element of field map. * @param part Index part to use. + * @param multikey_idx A multikey index hint. * @retval Field data if the field exists or NULL. */ static inline const char * -tuple_field_raw_by_part(struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, int multikey_idx) { if (unlikely(part->format_epoch != format->epoch)) { assert(format->epoch != 0); @@ -658,7 +670,23 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data, } return tuple_field_raw_by_path(format, data, field_map, part->fieldno, part->path, part->path_len, - &part->offset_slot_cache); + &part->offset_slot_cache, multikey_idx); +} + +/** + * Get a tuple field pointed to by an index part. + * @param format Tuple format. + * @param data A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param part Index part to use. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_raw_by_part(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + return tuple_field_raw_by_part_multikey(format, data, field_map, + part, -1); } /** @@ -674,6 +702,35 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part) tuple_field_map(tuple), part); } +/** + * Get count of documents in tuple by given multikey index + * definition. + * @param format Tuple format. + * @param data A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param key_def Index key_definition. + * @retval Count of documents in the multikey index. + */ +uint32_t +tuple_field_raw_multikey_items(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_def *key_def); + +/** + * Get count of documents in tuple by given multikey index + * definition. + * @param tuple Tuple to get the count of documents from. + * @param key_def Index key_definition. + * @retval Count of documents in the multikey index. + */ +static inline uint32_t +tuple_field_multikey_items(struct tuple *tuple, struct key_def *key_def) +{ + return tuple_field_raw_multikey_items(tuple_format(tuple), + tuple_data(tuple), + tuple_field_map(tuple), key_def); +} + /** * @brief Tuple Interator */ diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 93756365b..bf3a23097 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -375,6 +375,29 @@ mp_compare_scalar_coll(const char *field_a, const char *field_b, return mp_compare_scalar_with_type(field_a, type_a, field_b, type_b); } +static inline int +tuple_compare_stub(const struct tuple *tuple_a, const struct tuple *tuple_b, + struct key_def *key_def) +{ + (void)tuple_a; + (void)tuple_b; + (void)key_def; + unreachable(); + return 0; +} + +static inline int +tuple_compare_with_key_stub(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + (void) tuple; + (void) key; + (void) part_count; + (void) key_def; + unreachable(); + return 0; +} + /** * @brief Compare two fields parts using a type definition * @param field_a field @@ -445,7 +468,8 @@ tuple_compare_field_with_type(const char *field_a, enum mp_type a_type, } } -template +template static inline int tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, const struct tuple *tuple_b, hint_t tuple_b_hint, @@ -455,8 +479,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); - int rc = hint_cmp(tuple_a_hint, tuple_b_hint); - if (rc != 0) + assert(key_def_is_multikey(key_def) == is_multikey); + assert(!is_multikey || is_multikey == has_json_paths); + int rc = 0; + if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) return rc; struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); @@ -499,7 +525,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, end = part + key_def->part_count; for (; part < end; part++) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + (int)tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + (int)tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -556,7 +589,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, */ end = key_def->parts + key_def->part_count; for (; part < end; ++part) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + (int)tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + (int)tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -586,11 +626,12 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def) { return tuple_compare_slowpath_hinted - + (tuple_a, HINT_NONE, tuple_b, HINT_NONE, key_def); } -template +template static inline int tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, hint_t tuple_hint, const char *key, uint32_t part_count, @@ -602,8 +643,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, assert(has_optional_parts == key_def->has_optional_parts); assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); - int rc = hint_cmp(tuple_hint, key_hint); - if (rc != 0) + assert(key_def_is_multikey(key_def) == is_multikey); + assert(!is_multikey || is_multikey == has_json_paths); + int rc = 0; + if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0) return rc; struct key_part *part = key_def->parts; struct tuple_format *format = tuple_format(tuple); @@ -612,7 +655,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, enum mp_type a_type, b_type; if (likely(part_count == 1)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + (int)tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -642,7 +689,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, struct key_part *end = part + part_count; for (; part < end; ++part, mp_next(&key)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + (int)tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -684,7 +735,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, uint32_t part_count, struct key_def *key_def) { return tuple_compare_with_key_slowpath_hinted - + (tuple, HINT_NONE, key, part_count, HINT_NONE, key_def); } @@ -1314,6 +1365,16 @@ static const comparator_with_key_signature cmp_wk_arr[] = { /* {{{ tuple_hint */ /** + * Hints are now used for two purposes - passing the index of the + * key used in the case of multikey index and to speed up the + * comparators. + * + * Scenario I. Pass the multikey index of the key to comparator. + * In the case of multikey index arises an ambiguity: which key + * should be used in the comparison. Hints act as an non-negative + * numeric index of key to use. + * + * Scenario II. Speed up comparators. * A comparison hint is an unsigned integer number that has * the following layout: * @@ -1611,12 +1672,35 @@ tuple_hint(const struct tuple *tuple, struct key_def *key_def) return field_hint(field, key_def->parts->coll); } +static hint_t +key_hint_stub(const char *key, uint32_t part_count, struct key_def *key_def) +{ + (void) key; + (void) part_count; + (void) key_def; + return HINT_NONE; +} + +static hint_t +tuple_hint_stub(const struct tuple *tuple, struct key_def *key_def) +{ + (void) tuple; + (void) key_def; + unreachable(); + return HINT_NONE; +} + template static void key_def_set_hint_func(struct key_def *def) { - def->key_hint = key_hint; - def->tuple_hint = tuple_hint; + if (key_def_is_multikey(def)) { + def->key_hint = key_hint_stub; + def->tuple_hint = tuple_hint_stub; + } else { + def->key_hint = key_hint; + def->tuple_hint = tuple_hint; + } } template @@ -1710,7 +1794,7 @@ key_def_set_compare_func_fast(struct key_def *def) tuple_compare_slowpath; cmp_hinted = is_sequential ? tuple_compare_sequential_hinted : - tuple_compare_slowpath_hinted; + tuple_compare_slowpath_hinted; } if (cmp_wk == NULL) { cmp_wk = is_sequential ? @@ -1718,7 +1802,8 @@ key_def_set_compare_func_fast(struct key_def *def) tuple_compare_with_key_slowpath; cmp_wk_hinted = is_sequential ? tuple_compare_with_key_sequential_hinted : - tuple_compare_with_key_slowpath_hinted; + tuple_compare_with_key_slowpath_hinted; } def->tuple_compare = cmp; @@ -1746,12 +1831,13 @@ key_def_set_compare_func_plain(struct key_def *def) def->tuple_compare = tuple_compare_slowpath ; def->tuple_compare_hinted = tuple_compare_slowpath_hinted - ; + ; def->tuple_compare_with_key = tuple_compare_with_key_slowpath ; def->tuple_compare_with_key_hinted = tuple_compare_with_key_slowpath_hinted - ; + ; } } @@ -1760,15 +1846,25 @@ static void key_def_set_compare_func_json(struct key_def *def) { assert(def->has_json_paths); - def->tuple_compare = tuple_compare_slowpath - ; - def->tuple_compare_hinted = tuple_compare_slowpath_hinted - ; - def->tuple_compare_with_key = tuple_compare_with_key_slowpath - ; - def->tuple_compare_with_key_hinted = - tuple_compare_with_key_slowpath_hinted - ; + if (key_def_is_multikey(def)) { + def->tuple_compare = tuple_compare_stub; + def->tuple_compare_hinted = tuple_compare_slowpath_hinted + ; + def->tuple_compare_with_key = tuple_compare_with_key_stub; + def->tuple_compare_with_key_hinted = + tuple_compare_with_key_slowpath_hinted + ; + } else { + def->tuple_compare = tuple_compare_slowpath + ; + def->tuple_compare_hinted = tuple_compare_slowpath_hinted + ; + def->tuple_compare_with_key = tuple_compare_with_key_slowpath + ; + def->tuple_compare_with_key_hinted = + tuple_compare_with_key_slowpath_hinted + ; + } } void diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index 836d4e565..081677ff6 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -30,6 +30,29 @@ key_def_contains_sequential_parts(const struct key_def *def) return false; } +static inline char * +tuple_extract_key_stub(const struct tuple *tuple, struct key_def *key_def, + uint32_t *key_size) +{ + (void) tuple; + (void) key_def; + (void) key_size; + unreachable(); + return NULL; +} + +static char * +tuple_extract_key_raw_stub(const char *data, const char *data_end, + struct key_def *key_def, uint32_t *key_size) +{ + (void) data; + (void) data_end; + (void) key_def; + (void) key_size; + unreachable(); + return NULL; +} + /** * Optimized version of tuple_extract_key_raw() for sequential key defs * @copydoc tuple_extract_key_raw() @@ -310,7 +333,7 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, const char *src_end = field_end; if (has_json_paths && part->path != NULL) { if (tuple_go_to_path(&src, part->path, - part->path_len) != 0) { + part->path_len, -1) != 0) { /* * The path must be correct as * it has already been validated @@ -349,6 +372,7 @@ static void key_def_set_extract_func_plain(struct key_def *def) { assert(!def->has_json_paths); + assert(!key_def_is_multikey(def)); if (key_def_is_sequential(def)) { assert(contains_sequential_parts || def->part_count == 1); def->tuple_extract_key = tuple_extract_key_sequential @@ -369,11 +393,16 @@ static void key_def_set_extract_func_json(struct key_def *def) { assert(def->has_json_paths); - def->tuple_extract_key = tuple_extract_key_slowpath - ; - def->tuple_extract_key_raw = tuple_extract_key_slowpath_raw - ; + if (!key_def_is_multikey(def)) { + def->tuple_extract_key = tuple_extract_key_slowpath + ; + def->tuple_extract_key_raw = tuple_extract_key_slowpath_raw + ; + } else { + def->tuple_extract_key = tuple_extract_key_stub; + def->tuple_extract_key_raw = tuple_extract_key_raw_stub; + } } void diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 9a643b700..33d6db420 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -194,6 +194,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id) return NULL; } +/** + * Check if child_field may be attached to parent_field, + * update the parent_field container type if required. + */ +static int +tuple_format_field_update_type(struct tuple_field *parent_field, + struct tuple_field *child_field) +{ + enum field_type expected_type = + child_field->token.type == JSON_TOKEN_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (child_field->token.type == JSON_TOKEN_ANY && + !json_token_is_multikey(&parent_field->token) && + !json_token_is_leaf(&parent_field->token)) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + tt_sprintf("field %s is already refer to document by " + "identifier and cannot use array index " + "placeholder [*]", + tuple_field_path(parent_field))); + return -1; + } + if (json_token_is_multikey(&parent_field->token) && + child_field->token.type != JSON_TOKEN_ANY) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + tt_sprintf("field %s is already used with array index " + "placeholder [*] and cannot refer to " + "document by identifier", + tuple_field_path(parent_field))); + return -1; + } + if (field_type1_contains_type2(parent_field->type, expected_type)) { + parent_field->type = expected_type; + } else { + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tuple_field_path(parent_field), + field_type_strs[parent_field->type], + field_type_strs[expected_type]); + return -1; + } + return 0; +} + /** * Given a field number and a path, add the corresponding field * to the tuple format, allocating intermediate fields if @@ -228,29 +272,16 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, *path_pool += path_len; int rc = 0; - uint32_t token_count = 0; + uint32_t token_count = 0, json_token_any_count = 0; struct json_tree *tree = &format->fields; struct json_lexer lexer; json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && field->token.type != JSON_TOKEN_END) { - if (field->token.type == JSON_TOKEN_ANY) { - diag_set(ClientError, ER_UNSUPPORTED, - "Tarantool", "multikey indexes"); + if (tuple_format_field_update_type(parent, field) != 0) goto fail; - } - enum field_type expected_type = - field->token.type == JSON_TOKEN_STR ? - FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; - if (field_type1_contains_type2(parent->type, expected_type)) { - parent->type = expected_type; - } else { - diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, - tuple_field_path(parent), - field_type_strs[parent->type], - field_type_strs[expected_type]); - goto fail; - } + if (field->token.type == JSON_TOKEN_ANY) + json_token_any_count++; struct tuple_field *next = json_tree_lookup_entry(tree, &parent->token, &field->token, @@ -268,6 +299,18 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, if (field == NULL) goto fail; } + if (json_token_is_multikey(&parent->token) && + parent->offset_slot == TUPLE_OFFSET_SLOT_NIL) { + /** + * Allocate offset slot for array is used + * in multikey index. This is required to + * quickly extract its size. + * @see tuple_field_multikey_items() + */ + assert(parent->type == FIELD_TYPE_ARRAY); + *current_slot = *current_slot - 1; + parent->offset_slot = *current_slot; + } parent->is_key_part = true; parent = next; token_count++; @@ -280,6 +323,13 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, assert(parent != NULL); /* Update tree depth information. */ format->fields_depth = MAX(format->fields_depth, token_count + 1); + if (json_token_any_count > 1) { + assert(path_len > 0); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + "no more than one array index placeholder [*] is " + "allowed in JSON path"); + goto fail; + } cleanup: tuple_field_delete(field); end: @@ -847,7 +897,16 @@ tuple_field_map_create(struct field_map_builder *builder, return -1; } /* Initialize field_map with data offset. */ - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + int multikey_idx = tuple_parse_iterator_multikey_idx(&it); + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + multikey_idx >= 0) { + int multikey_items = + tuple_parse_iterator_multikey_items(&it); + if (field_map_builder_set_extent_slot(builder, + field->offset_slot, multikey_idx, + multikey_items, pos - tuple) != 0) + return -1; + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { field_map_builder_set_slot(builder, field->offset_slot, pos - tuple); } @@ -966,6 +1025,7 @@ tuple_parse_iterator_create(struct tuple_parse_iterator *it, it->parent = &format->fields.root; it->format = format; it->pos = data; + it->multikey_frame = NULL; return 0; } @@ -986,6 +1046,14 @@ tuple_parse_iterator_advice(struct tuple_parse_iterator *it, mp_stack_pop(&it->stack); if (mp_stack_is_empty(&it->stack)) return rc; + if (json_token_is_multikey(it->parent)) { + /* + * As we leave the multikey branch, + * we need to reset the pointer to + * multikey_frame. + */ + it->multikey_frame = NULL; + } it->parent = it->parent->parent; } /* @@ -1038,6 +1106,19 @@ tuple_parse_iterator_advice(struct tuple_parse_iterator *it, mp_decode_array(&it->pos) : mp_decode_map(&it->pos); mp_stack_push(&it->stack, type, size); + if (json_token_is_multikey(&(*field)->token)) { + /** + * Keep a pointer to the frame that + * describes an array with index + * placeholder [*]. The "current" item + * of this frame matches the logical + * index of document in multikey index + * and is equal to multikey index + * comparison hint. + */ + assert(type == MP_ARRAY); + it->multikey_frame = mp_stack_top(&it->stack); + } it->parent = &(*field)->token; } else { mp_next(&it->pos); diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 88f03d5eb..d07ca91eb 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -437,6 +437,12 @@ struct tuple_parse_iterator { * pointer accordingly. */ struct mp_stack stack; + /** + * The pointer to the stack frame representing an array + * filed that has JSON_TOKEN_ANY child, i.e. the root + * of the multikey index. + */ + struct mp_frame *multikey_frame; /** The current read position in msgpack. */ const char *pos; }; @@ -479,6 +485,32 @@ tuple_parse_iterator_advice(struct tuple_parse_iterator *it, struct tuple_field **field, const char **data, const char **data_end); +/** + * Returns non-negative multikey index comparison hint when + * the iterator's position in the format::fields subtree + * corresponds to the multikey subtree and -1 otherwise. + */ +static inline int +tuple_parse_iterator_multikey_idx(struct tuple_parse_iterator *it) +{ + if (it->multikey_frame == NULL) + return -1; + return it->multikey_frame->curr; +} + +/** + * Returns positive number - a count of document's in multikey + * index for when the iterator's position in the format::fields + * subtree corresponds to the multikey subtree and -1 otherwise. + */ +static inline int +tuple_parse_iterator_multikey_items(struct tuple_parse_iterator *it) +{ + if (it->multikey_frame == NULL) + return -1; + return it->multikey_frame->count; +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vinyl.c b/src/box/vinyl.c index da8910255..c2860098a 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -699,6 +699,11 @@ vinyl_space_check_index_def(struct space *space, struct index_def *index_def) return -1; } } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_UNSUPPORTED, + "Vinyl", "multikey indexes"); + return -1; + } return 0; } diff --git a/test/box/misc.result b/test/box/misc.result index e2b618c9c..b89a7ed2f 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -522,6 +522,7 @@ t; 191: box.error.SQL_PARSER_LIMIT 192: box.error.INDEX_DEF_UNSUPPORTED 193: box.error.CK_DEF_UNSUPPORTED + 194: box.error.INDEX_MULTIKEY_INVALID ... test_run:cmd("setopt delimiter ''"); --- diff --git a/test/engine/json.result b/test/engine/json.result index 09c704963..6ccfe92b7 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -705,16 +705,3 @@ s:replace({4, {"d1", name='D1'}, "test"}) s:drop() --- ... --- --- gh-1260: Multikey indexes --- -s = box.schema.space.create('withdata') ---- -... -idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) ---- -- error: Tarantool does not support multikey indexes -... -s:drop() ---- -... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 5c235e1ba..05c64ea93 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -201,10 +201,3 @@ idx0 = s:create_index('idx0', {parts = {{2, 'str', path = 'name'}, {3, "str"}}}) s:insert({4, {"d", name='D'}, "test"}) s:replace({4, {"d1", name='D1'}, "test"}) s:drop() - --- --- gh-1260: Multikey indexes --- -s = box.schema.space.create('withdata') -idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) -s:drop() diff --git a/test/engine/multikey.result b/test/engine/multikey.result new file mode 100644 index 000000000..dd6487383 --- /dev/null +++ b/test/engine/multikey.result @@ -0,0 +1,298 @@ +test_run = require('test_run').new() +--- +... +engine = test_run:get_cfg('engine') +--- +... +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata', {engine = 'vinyl'}) +--- +... +pk = s:create_index('pk') +--- +... +-- Vinyl's space can't be multikey (yet). +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: Vinyl does not support multikey indexes +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = 'memtx'}) +--- +... +-- Primary index must be unique so it can't be multikey. +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key + cannot be multikey' +... +pk = s:create_index('pk') +--- +... +-- Only tree index type may be mutlikey. +_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': HASH index + cannot be multikey' +... +_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': BITSET index + cannot be multikey' +... +_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': RTREE index + cannot be multikey' +... +-- Test incompatible multikey index parts. +_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}}) +--- +- error: 'Wrong index options (field 2): incompatable multikey index path' +... +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}}) +--- +- error: 'Multikey index is invalid: no more than one array index placeholder [*] + is allowed in JSON path' +... +idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}}) +--- +... +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: 'Multikey index is invalid: field 3 is already refer to document by identifier + and cannot use array index placeholder [*]' +... +idx0:drop() +--- +... +-- Unique multikey index. +idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +... +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +--- +- error: 'Multikey index is invalid: field 3 is already used with array index placeholder + [*] and cannot refer to document by identifier' +... +s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}}) +--- +- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}}) +--- +- error: Duplicate key exists in unique index 'arr_idx' in space 'withdata' +... +-- Non-unique multikey index; two multikey indexes per space. +arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}}) +--- +... +arr_idx:select() +--- +- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:get({'James', 'Bond'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:get({'Ivan', 'Ivanych'}) +--- +- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:get({'Vasya', 'Pupkin'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +s:insert({4, {1}, {{fname='James', sname='Bond'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +-- Duplicates in multikey parts. +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +--- +- [5, [1, 1, 1], [{'fname': 'A', 'sname': 'B'}, {'fname': 'C', 'sname': 'D'}, {'fname': 'A', + 'sname': 'B'}]] +... +arr_idx:select({1}) +--- +- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [5, [1, 1, 1], [{'fname': 'A', 'sname': 'B'}, {'fname': 'C', 'sname': 'D'}, { + 'fname': 'A', 'sname': 'B'}]] +... +s:delete(5) +--- +- [5, [1, 1, 1], [{'fname': 'A', 'sname': 'B'}, {'fname': 'C', 'sname': 'D'}, {'fname': 'A', + 'sname': 'B'}]] +... +-- Check that there is no garbage in index +arr_idx:select({1}) +--- +- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:get({'A', 'B'}) +--- +... +idx:get({'C', 'D'}) +--- +... +idx:delete({'Vasya', 'Pupkin'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +--- +- [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({7, {1}, {{fname='James', sname='Bond'}}}) +--- +- [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +arr_idx:select({1}) +--- +- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +-- Snapshot & recovery. +box.snapshot() +--- +- ok +... +test_run:cmd("restart server default") +s = box.space["withdata"] +--- +... +idx = s.index["idx"] +--- +... +arr_idx = s.index["arr_idx"] +--- +... +s:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +arr_idx:select() +--- +- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +s:drop() +--- +... +-- Assymetric multikey index paths. +s = box.schema.space.create('withdata') +--- +... +pk = s:create_index('pk') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +--- +... +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +--- +- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1', + 'extra': {'sname': 'C2'}}]] +... +s:drop() +--- +... +-- Unique multikey index peculiar properties +s = box.schema.space.create('withdata') +--- +... +pk = s:create_index('pk') +--- +... +idx0 = s:create_index('idx0', {parts = {{2, 'int', path = '[*]'}}}) +--- +... +s:insert({1, {1, 1, 1}}) +--- +- [1, [1, 1, 1]] +... +s:insert({2, {2, 2}}) +--- +- [2, [2, 2]] +... +s:insert({3, {3, 3, 2, 2, 1, 1}}) +--- +- error: Duplicate key exists in unique index 'idx0' in space 'withdata' +... +idx0:get(2) +--- +- [2, [2, 2]] +... +idx0:get(1) +--- +- [1, [1, 1, 1]] +... +idx0:get(3) +--- +... +idx0:select() +--- +- - [1, [1, 1, 1]] + - [2, [2, 2]] +... +idx0:delete(2) +--- +- [2, [2, 2]] +... +idx0:get(2) +--- +... +idx0:select() +--- +- - [1, [1, 1, 1]] +... +s:drop() +--- +... diff --git a/test/engine/multikey.test.lua b/test/engine/multikey.test.lua new file mode 100644 index 000000000..71fc82a5f --- /dev/null +++ b/test/engine/multikey.test.lua @@ -0,0 +1,88 @@ +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') + +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata', {engine = 'vinyl'}) +pk = s:create_index('pk') +-- Vinyl's space can't be multikey (yet). +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:drop() + +s = box.schema.space.create('withdata', {engine = 'memtx'}) +-- Primary index must be unique so it can't be multikey. +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}}) +pk = s:create_index('pk') +-- Only tree index type may be mutlikey. +_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}}) +_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}}) +_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}}) +-- Test incompatible multikey index parts. +_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}}) +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}}) +idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}}) +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +idx0:drop() +-- Unique multikey index. +idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}}) +_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}}) +-- Non-unique multikey index; two multikey indexes per space. +arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}}) +arr_idx:select() +idx:get({'James', 'Bond'}) +idx:get({'Ivan', 'Ivanych'}) +idx:get({'Vasya', 'Pupkin'}) +idx:select() +s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({4, {1}, {{fname='James', sname='Bond'}}}) +idx:select() +-- Duplicates in multikey parts. +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +arr_idx:select({1}) +s:delete(5) +-- Check that there is no garbage in index +arr_idx:select({1}) +idx:get({'A', 'B'}) +idx:get({'C', 'D'}) +idx:delete({'Vasya', 'Pupkin'}) +s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({7, {1}, {{fname='James', sname='Bond'}}}) +arr_idx:select({1}) +idx:select() +-- Snapshot & recovery. +box.snapshot() +test_run:cmd("restart server default") +s = box.space["withdata"] +idx = s.index["idx"] +arr_idx = s.index["arr_idx"] +s:select() +idx:select() +arr_idx:select() +s:drop() + +-- Assymetric multikey index paths. +s = box.schema.space.create('withdata') +pk = s:create_index('pk') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +s:drop() + +-- Unique multikey index peculiar properties +s = box.schema.space.create('withdata') +pk = s:create_index('pk') +idx0 = s:create_index('idx0', {parts = {{2, 'int', path = '[*]'}}}) +s:insert({1, {1, 1, 1}}) +s:insert({2, {2, 2}}) +s:insert({3, {3, 3, 2, 2, 1, 1}}) +idx0:get(2) +idx0:get(1) +idx0:get(3) +idx0:select() +idx0:delete(2) +idx0:get(2) +idx0:select() +s:drop() -- 2.21.0