From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] [PATCH v2 5/5] box: introduce multikey indexes in memtx References: <5cc1d6c549a630d4925475de757baed71d5b5b1d.1552998554.git.kshcherbatov@tarantool.org> From: Kirill Shcherbatov Message-ID: <6d75d5fa-9564-cda6-52f3-5935ae2e3b73@tarantool.org> Date: Thu, 21 Mar 2019 15:35:35 +0300 MIME-Version: 1.0 In-Reply-To: <5cc1d6c549a630d4925475de757baed71d5b5b1d.1552998554.git.kshcherbatov@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com List-ID: I've rebased patchset on master branch with new hints. ================================================= Part of #1257 @TarantoolBot document Title: introduce multikey indexes in memtx Multikey indexes allows you to automatically index set of documents by JSON paths having array index placeholder "[*]". Multikey index cannot be primary as it cannot be unique(by definition). Multikey index parts must be compatible: only one "[*]" placeholder is allowed in same position(for all JSON paths in index parts). Example: s = box.schema.space.create('withdata') pk = s:create_index('pk') parts = { {2, 'str', path = 'data[*].name'}, {2, 'str', path = 'data[*].extra.phone'} } idx = s:create_index('idx', {parts = parts}) s:insert({1, {data = {{name="A", extra={phone="111"}}, {name="B", extra={phone="111"}}}, garbage = 1}} idx:get({'A', '111'}) --- src/box/errcode.h | 1 + src/box/index_def.c | 25 +++- src/box/key_def.c | 10 ++ src/box/key_def.h | 18 +++ src/box/memtx_space.c | 18 +++ src/box/memtx_tree.c | 185 ++++++++++++++++++++---- src/box/tuple.c | 28 +++- src/box/tuple.h | 111 +++++++++++++-- src/box/tuple_compare.cc | 84 ++++++++--- src/box/tuple_extract_key.cc | 2 +- src/box/tuple_field_map.c | 27 ++++ src/box/tuple_field_map.h | 87 +++++++++++- src/box/tuple_format.c | 104 +++++++++++--- src/box/vinyl.c | 6 + test/box/misc.result | 1 + test/engine/json.result | 13 -- test/engine/json.test.lua | 7 - test/engine/multikey_idx.result | 228 ++++++++++++++++++++++++++++++ test/engine/multikey_idx.test.lua | 67 +++++++++ 19 files changed, 909 insertions(+), 113 deletions(-) create mode 100644 test/engine/multikey_idx.result create mode 100644 test/engine/multikey_idx.test.lua diff --git a/src/box/errcode.h b/src/box/errcode.h index 7764aa352..c6f42463d 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -240,6 +240,7 @@ struct errcode_record { /*185 */_(ER_SQL_UNKNOWN_TOKEN, "Syntax error: unrecognized token: '%.*s'") \ /*186 */_(ER_SQL_PARSER_GENERIC, "%s") \ /*187 */_(ER_SQL_ANALYZE_ARGUMENT, "ANALYZE statement argument %s is not a base table") \ + /*188 */_(ER_INDEX_MULTIKEY_INVALID, "Multikey index is invalid: %s") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/src/box/index_def.c b/src/box/index_def.c index f0c7dc4f6..dfa539f86 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -268,9 +268,15 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) space_name, "too many key parts"); return false; } + if (index_def->iid == 0 && key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, + space_name, "primary key cannot be multikey"); + return false; + } for (uint32_t i = 0; i < index_def->key_def->part_count; i++) { - assert(index_def->key_def->parts[i].type < field_type_MAX); - if (index_def->key_def->parts[i].fieldno > BOX_INDEX_FIELD_MAX) { + struct key_part *parts = index_def->key_def->parts; + assert(parts[i].type < field_type_MAX); + if (parts[i].fieldno > BOX_INDEX_FIELD_MAX) { diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, "field no is too big"); return false; @@ -280,8 +286,8 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) * Courtesy to a user who could have made * a typo. */ - struct key_part *part_a = &index_def->key_def->parts[i]; - struct key_part *part_b = &index_def->key_def->parts[j]; + struct key_part *part_a = &parts[i]; + struct key_part *part_b = &parts[j]; if (part_a->fieldno == part_b->fieldno && json_path_cmp(part_a->path, part_a->path_len, part_b->path, part_b->path_len, @@ -292,6 +298,17 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) return false; } } + if (key_def_is_multikey(index_def->key_def) && + json_path_cmp(parts[i].path, parts[i].path_len, + parts[index_def->key_def-> + multikey_part_idx].path, + index_def->key_def->multikey_path_len, + true, TUPLE_INDEX_BASE) != 0) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name, + "multikey index parts are incompatible"); + return false; + } } return true; } diff --git a/src/box/key_def.c b/src/box/key_def.c index f9e464402..ffc1e1665 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -164,6 +164,13 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, *path_pool += path_len; memcpy(def->parts[part_no].path, path, path_len); def->parts[part_no].path_len = path_len; + int multikey_path_len; + if (json_path_is_multikey(path, path_len, &multikey_path_len, + TUPLE_INDEX_BASE) && + def->multikey_path_len <= (uint32_t)multikey_path_len) { + def->multikey_part_idx = part_no; + def->multikey_path_len = (uint32_t)multikey_path_len; + } } else { def->parts[part_no].path = NULL; def->parts[part_no].path_len = 0; @@ -185,6 +192,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) } def->part_count = part_count; + def->multikey_part_idx = part_count; def->unique_part_count = part_count; /* A pointer to the JSON paths data in the new key_def. */ @@ -253,6 +261,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) } key_def->part_count = part_count; + key_def->multikey_part_idx = part_count; key_def->unique_part_count = part_count; for (uint32_t item = 0; item < part_count; ++item) { @@ -672,6 +681,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second) return NULL; } new_def->part_count = new_part_count; + new_def->multikey_part_idx = new_part_count; new_def->unique_part_count = new_part_count; new_def->is_nullable = first->is_nullable || second->is_nullable; new_def->has_optional_parts = first->has_optional_parts || diff --git a/src/box/key_def.h b/src/box/key_def.h index 288cf7270..004794969 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -234,6 +234,18 @@ struct key_def { bool has_optional_parts; /** Key fields mask. @sa column_mask.h for details. */ uint64_t column_mask; + /** + * In case of multikey index, the index of the key_part + * containing JSON path with array index placeholder "[*]". + * Otherwise multikey_part_idx == part_count. + */ + uint32_t multikey_part_idx; + /** + * In case of multikey index, the length of the + * parts[multikey_part_idx].path substring "...[*]" + * @see json_path_is_multikey(). + */ + uint32_t multikey_path_len; /** The size of the 'parts' array. */ uint32_t part_count; /** Description of parts of a multipart index. */ @@ -472,6 +484,12 @@ key_def_is_sequential(const struct key_def *key_def) return true; } +static inline bool +key_def_is_multikey(const struct key_def *key_def) +{ + return key_def->multikey_part_idx < key_def->part_count; +} + /** * Return true if @a key_def defines has fields that requires * special collation comparison. diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index 92ec0b300..9a5132979 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -638,6 +638,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "HASH index must be unique"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "HASH index cannot be multikey"); + return -1; + } break; case TREE: /* TREE index has no limitations. */ @@ -661,6 +667,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "RTREE index field type must be ARRAY"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "RTREE index cannot be multikey"); + return -1; + } /* no furter checks of parts needed */ return 0; case BITSET: @@ -683,6 +695,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def) "BITSET index field type must be NUM or STR"); return -1; } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "BITSET index cannot be multikey"); + return -1; + } /* no furter checks of parts needed */ return 0; default: diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index fe037c54a..c9d3b2130 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -528,7 +528,8 @@ memtx_tree_index_get(struct index *base, const char *key, struct memtx_tree_key_data key_data; key_data.key = key; key_data.part_count = part_count; - key_data.hint = key_hint(key, part_count, cmp_def); + if (!key_def_is_multikey(cmp_def)) + key_data.hint = key_hint(key, part_count, cmp_def); struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data); *result = res != NULL ? res->tuple : NULL; return 0; @@ -584,6 +585,79 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, return 0; } +int +memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple, + struct tuple *new_tuple, + enum dup_replace_mode mode, + struct tuple **result) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + if (new_tuple != NULL) { + int errcode = 0, tree_res = 0; + struct tuple *dup_tuple = NULL; + int multikey_idx = 0; + uint32_t items = tuple_field_multikey_items(new_tuple, cmp_def); + for (; (uint32_t)multikey_idx < items; multikey_idx++) { + struct memtx_tree_data new_data; + new_data.tuple = new_tuple; + new_data.hint = multikey_idx; + struct memtx_tree_data dup_data; + dup_data.tuple = NULL; + tree_res = memtx_tree_insert(&index->tree, new_data, + &dup_data); + if (tree_res != 0) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "replace"); + break; + } + dup_tuple = dup_data.tuple; + errcode = replace_check_dup(old_tuple, dup_tuple, mode); + if (errcode != 0) { + memtx_tree_delete(&index->tree, new_data); + if (dup_tuple != NULL) { + memtx_tree_insert(&index->tree, + dup_data, NULL); + } + struct space *sp = + space_cache_find(base->def->space_id); + if (sp != NULL) { + diag_set(ClientError, errcode, + base->def->name, + space_name(sp)); + } + break; + } + } + if (tree_res != 0 || errcode != 0) { + multikey_idx--; + for (; multikey_idx >= 0; multikey_idx--) { + struct memtx_tree_data data; + data.tuple = new_tuple; + data.hint = multikey_idx; + memtx_tree_delete(&index->tree, data); + } + return -1; + } + if (dup_tuple != NULL) { + *result = dup_tuple; + return 0; + } + } + if (old_tuple != NULL) { + uint32_t items = tuple_field_multikey_items(old_tuple, cmp_def); + for (uint32_t multikey_idx = 0; multikey_idx < items; + multikey_idx++) { + struct memtx_tree_data old_data; + old_data.tuple = old_tuple; + old_data.hint = multikey_idx; + memtx_tree_delete(&index->tree, old_data); + } + } + *result = old_tuple; + return 0; +} + static struct iterator * memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, const char *key, uint32_t part_count) @@ -621,7 +695,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, it->type = type; it->key_data.key = key; it->key_data.part_count = part_count; - it->key_data.hint = key_hint(key, part_count, cmp_def); + if (!key_def_is_multikey(cmp_def)) + it->key_data.hint = key_hint(key, part_count, cmp_def); it->index_def = base->def; it->tree = &index->tree; it->tree_iterator = memtx_tree_invalid_iterator(); @@ -656,34 +731,43 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint) } static int -memtx_tree_index_build_next(struct index *base, struct tuple *tuple) +memtx_tree_index_build_array_realloc(struct memtx_tree_index *index, + uint32_t items) { - struct memtx_tree_index *index = (struct memtx_tree_index *)base; - struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + bool need_realloc = false; if (index->build_array == NULL) { - index->build_array = malloc(MEMTX_EXTENT_SIZE); - if (index->build_array == NULL) { - diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, - "memtx_tree_index", "build_next"); - return -1; - } index->build_array_alloc_size = MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); + need_realloc = true; } - assert(index->build_array_size <= index->build_array_alloc_size); - if (index->build_array_size == index->build_array_alloc_size) { - index->build_array_alloc_size = index->build_array_alloc_size + - index->build_array_alloc_size / 2; - struct memtx_tree_data *tmp = - realloc(index->build_array, - index->build_array_alloc_size * sizeof(*tmp)); - if (tmp == NULL) { - diag_set(OutOfMemory, index->build_array_alloc_size * - sizeof(*tmp), "memtx_tree_index", "build_next"); - return -1; - } - index->build_array = tmp; + uint32_t build_array_new_size = index->build_array_size + items; + if (build_array_new_size > index->build_array_alloc_size) { + index->build_array_alloc_size += + MAX(index->build_array_alloc_size / 2, + build_array_new_size - index->build_array_alloc_size); + need_realloc = true; + } + if (!need_realloc) + return 0; + struct memtx_tree_data *tmp = + realloc(index->build_array, + index->build_array_alloc_size * sizeof(*tmp)); + if (tmp == NULL) { + diag_set(OutOfMemory, index->build_array_alloc_size * + sizeof(*tmp), "memtx_tree_index", "build_next"); + return -1; } + index->build_array = tmp; + return 0; +} + +static int +memtx_tree_index_build_next(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + if (memtx_tree_index_build_array_realloc(index, 1) != 0) + return -1; struct memtx_tree_data *elem = &index->build_array[index->build_array_size++]; elem->tuple = tuple; @@ -691,6 +775,24 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) return 0; } +static int +memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + uint32_t items = tuple_field_multikey_items(tuple, cmp_def); + if (memtx_tree_index_build_array_realloc(index, items) != 0) + return -1; + for (uint32_t multikey_idx = 0; multikey_idx < items; multikey_idx++) { + struct memtx_tree_data *elem = + &index->build_array[index->build_array_size++]; + assert(index->build_array_size <= index->build_array_alloc_size); + elem->tuple = tuple; + elem->hint = multikey_idx; + } + return 0; +} + static void memtx_tree_index_end_build(struct index *base) { @@ -793,6 +895,36 @@ static const struct index_vtab memtx_tree_index_vtab = { /* .end_build = */ memtx_tree_index_end_build, }; +static const struct index_vtab memtx_tree_index_multikey_vtab = { + /* .destroy = */ memtx_tree_index_destroy, + /* .commit_create = */ generic_index_commit_create, + /* .abort_create = */ generic_index_abort_create, + /* .commit_modify = */ generic_index_commit_modify, + /* .commit_drop = */ generic_index_commit_drop, + /* .update_def = */ memtx_tree_index_update_def, + /* .depends_on_pk = */ memtx_tree_index_depends_on_pk, + /* .def_change_requires_rebuild = */ + memtx_index_def_change_requires_rebuild, + /* .size = */ memtx_tree_index_size, + /* .bsize = */ memtx_tree_index_bsize, + /* .min = */ generic_index_min, + /* .max = */ generic_index_max, + /* .random = */ memtx_tree_index_random, + /* .count = */ memtx_tree_index_count, + /* .get = */ memtx_tree_index_get, + /* .replace = */ memtx_tree_index_replace_multikey, + /* .create_iterator = */ memtx_tree_index_create_iterator, + /* .create_snapshot_iterator = */ + memtx_tree_index_create_snapshot_iterator, + /* .stat = */ generic_index_stat, + /* .compact = */ generic_index_compact, + /* .reset_stat = */ generic_index_reset_stat, + /* .begin_build = */ memtx_tree_index_begin_build, + /* .reserve = */ memtx_tree_index_reserve, + /* .build_next = */ memtx_tree_index_build_next_multikey, + /* .end_build = */ memtx_tree_index_end_build, +}; + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { @@ -803,8 +935,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) "malloc", "struct memtx_tree_index"); return NULL; } + const struct index_vtab *vtab = key_def_is_multikey(def->key_def) ? + &memtx_tree_index_multikey_vtab : + &memtx_tree_index_vtab; if (index_create(&index->base, (struct engine *)memtx, - &memtx_tree_index_vtab, def) != 0) { + vtab, def) != 0) { free(index); return NULL; } diff --git a/src/box/tuple.c b/src/box/tuple.c index 68f0670f2..985117957 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -129,8 +129,6 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple) struct field_map_builder builder; int rc = tuple_field_map_create(&builder, format, tuple, true); region_truncate(region, region_svp); - assert(rc != 0 || - field_map_builder_size(&builder) == format->field_map_size); return rc; } @@ -455,7 +453,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len) } int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +tuple_go_to_path(const char **data, const char *path, uint32_t path_len, + int multikey_idx) { int rc; struct json_lexer lexer; @@ -463,6 +462,10 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len) json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { switch (token.type) { + case JSON_TOKEN_ANY: + assert(multikey_idx >= 0); + token.num = multikey_idx; + FALLTHROUGH; case JSON_TOKEN_NUM: rc = tuple_field_go_to_index(data, token.num); break; @@ -532,7 +535,24 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, } return tuple_field_raw_by_path(format, tuple, field_map, fieldno, path + lexer.offset, - path_len - lexer.offset, NULL); + path_len - lexer.offset, NULL, -1); +} + +uint32_t +tuple_field_raw_multikey_items(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_def *key_def) +{ + assert(key_def_is_multikey(key_def)); + struct key_part *part = &key_def->parts[key_def->multikey_part_idx]; + const char *array_raw = + tuple_field_raw_by_path(format, data, field_map, part->fieldno, + part->path, key_def->multikey_path_len, + NULL, -1); + if (array_raw == NULL) + return 0; + assert(mp_typeof(*array_raw) == MP_ARRAY); + return mp_decode_array(&array_raw); } /* }}} tuple_field_* getters */ diff --git a/src/box/tuple.h b/src/box/tuple.h index 8b12fd5a8..060e4a076 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -508,14 +508,51 @@ tuple_field_count(const struct tuple *tuple) * with NULL. * @param path The path to process. * @param path_len The length of the @path. + * @param multikey_idx The multikey index hint - index of + * document to retrieve when array index + * placeholder "[*]" is met. * @retval 0 On success. * @retval -1 In case of error in JSON path. */ int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len); +tuple_go_to_path(const char **data, const char *path, uint32_t path_len, + int multikey_idx); /** - * Get tuple field by field index and relative JSON path. + * Get tuple field by field_map, offset_slot and multikey_idx. + * @param tuple MessagePack tuple's body. + * @param field_map Tuple field map. + * @param offset_slot The field_map slot with field to get offset. + * @param multikey_idx The multikey index hint - index of + * document to retrieve when array index + * placeholder "[*]" is met. + * @retval Not NULL field pointer on success. + * @retval NULL otherwise, when field is absents. + */ +static const char * +tuple_field_raw_by_offset_slot(const char *tuple, const uint32_t *field_map, + int32_t offset_slot, int multikey_idx) +{ + assert(offset_slot != TUPLE_OFFSET_SLOT_NIL); + uint32_t offset; + if (multikey_idx >= 0) { + assert(field_map[offset_slot] != 0); + struct field_map_ext *extent = + field_map_ext_get(field_map, offset_slot); + if ((uint32_t)multikey_idx >= extent->items) + return NULL; + offset = extent->offset[multikey_idx]; + } else { + offset = field_map[offset_slot]; + } + if (offset == 0) + return NULL; + return tuple + offset; +} + +/** + * Get tuple field by field index, relative JSON path and + * multikey_idx. * @param format Tuple format. * @param tuple MessagePack tuple's body. * @param field_map Tuple field map. @@ -528,12 +565,15 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len); * access data in a single operation. * Else it is initialized with offset_slot * of format field by path. + * @param multikey_idx The multikey index hint - index of + * document to retrieve when array index + * placeholder "[*]" is met. */ static inline const char * tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t fieldno, const char *path, uint32_t path_len, - int32_t *offset_slot_hint) + int32_t *offset_slot_hint, int multikey_idx) { int32_t offset_slot; if (offset_slot_hint != NULL && @@ -558,10 +598,8 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, if (offset_slot_hint != NULL) *offset_slot_hint = offset_slot; offset_slot_access: - /* Indexed field */ - if (field_map[offset_slot] == 0) - return NULL; - tuple += field_map[offset_slot]; + tuple = tuple_field_raw_by_offset_slot(tuple, field_map, + offset_slot, multikey_idx); } else { uint32_t field_count; parse: @@ -572,8 +610,8 @@ parse: for (uint32_t k = 0; k < fieldno; k++) mp_next(&tuple); if (path != NULL && - unlikely(tuple_go_to_path(&tuple, path, - path_len) != 0)) + unlikely(tuple_go_to_path(&tuple, path, path_len, + multikey_idx) != 0)) return NULL; } return tuple; @@ -595,7 +633,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t field_no) { return tuple_field_raw_by_path(format, tuple, field_map, field_no, - NULL, 0, NULL); + NULL, 0, NULL, -1); } /** @@ -634,16 +672,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash); /** - * Get a tuple field pointed to by an index part. + * Get a tuple field pointed to by an index part and multikey hint. * @param format Tuple format. * @param tuple A pointer to MessagePack array. * @param field_map A pointer to the LAST element of field map. + * @param multikey_idx A multikey hint. * @param part Index part to use. * @retval Field data if the field exists or NULL. */ static inline const char * -tuple_field_raw_by_part(struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, int multikey_idx) { if (unlikely(part->format_epoch != format->epoch)) { assert(format->epoch != 0); @@ -656,7 +696,23 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data, } return tuple_field_raw_by_path(format, data, field_map, part->fieldno, part->path, part->path_len, - &part->offset_slot_cache); + &part->offset_slot_cache, multikey_idx); +} + +/** + * Get a tuple field pointed to by an index part. + * @param format Tuple format. + * @param tuple A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param part Index part to use. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_raw_by_part(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + return tuple_field_raw_by_part_multikey(format, data, field_map, + part, -1); } /** @@ -672,6 +728,33 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part) tuple_field_map(tuple), part); } +/** + * Get count of documents in the multikey index. + * @param format Tuple format. + * @param data A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param key_def Index key_definition. + * @retval Count of documents in the multikey index. + */ +uint32_t +tuple_field_raw_multikey_items(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_def *key_def); + +/** + * Get count of documents in the multikey index. + * @param tuple Tuple to get the count of documents from. + * @param key_def Index key_definition. + * @retval Count of documents in the multikey index. + */ +static inline uint32_t +tuple_field_multikey_items(struct tuple *tuple, struct key_def *key_def) +{ + return tuple_field_raw_multikey_items(tuple_format(tuple), + tuple_data(tuple), + tuple_field_map(tuple), key_def); +} + /** * @brief Tuple Interator */ diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 93756365b..14c630af5 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -445,7 +445,8 @@ tuple_compare_field_with_type(const char *field_a, enum mp_type a_type, } } -template +template static inline int tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, const struct tuple *tuple_b, hint_t tuple_b_hint, @@ -455,8 +456,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); - int rc = hint_cmp(tuple_a_hint, tuple_b_hint); - if (rc != 0) + assert(key_def_is_multikey(key_def) == is_multikey); + assert(!is_multikey || is_multikey == has_json_paths); + int rc = 0; + if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) return rc; struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); @@ -499,7 +502,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, end = part + key_def->part_count; for (; part < end; part++) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + (int)tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + (int)tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -556,7 +566,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, */ end = key_def->parts + key_def->part_count; for (; part < end; ++part) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + (int)tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + (int)tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -586,24 +603,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def) { return tuple_compare_slowpath_hinted - + (tuple_a, HINT_NONE, tuple_b, HINT_NONE, key_def); } -template +template static inline int tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, hint_t tuple_hint, const char *key, uint32_t part_count, hint_t key_hint, struct key_def *key_def) { + (void)key_hint; assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); - int rc = hint_cmp(tuple_hint, key_hint); - if (rc != 0) + assert(key_def_is_multikey(key_def) == is_multikey); + assert(!is_multikey || is_multikey == has_json_paths); + int rc = 0; + if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0) return rc; struct key_part *part = key_def->parts; struct tuple_format *format = tuple_format(tuple); @@ -612,7 +633,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, enum mp_type a_type, b_type; if (likely(part_count == 1)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + (int)tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -642,7 +667,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, struct key_part *end = part + part_count; for (; part < end; ++part, mp_next(&key)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + (int)tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -684,7 +713,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, uint32_t part_count, struct key_def *key_def) { return tuple_compare_with_key_slowpath_hinted - + (tuple, HINT_NONE, key, part_count, HINT_NONE, key_def); } @@ -1710,7 +1739,7 @@ key_def_set_compare_func_fast(struct key_def *def) tuple_compare_slowpath; cmp_hinted = is_sequential ? tuple_compare_sequential_hinted : - tuple_compare_slowpath_hinted; + tuple_compare_slowpath_hinted; } if (cmp_wk == NULL) { cmp_wk = is_sequential ? @@ -1718,7 +1747,8 @@ key_def_set_compare_func_fast(struct key_def *def) tuple_compare_with_key_slowpath; cmp_wk_hinted = is_sequential ? tuple_compare_with_key_sequential_hinted : - tuple_compare_with_key_slowpath_hinted; + tuple_compare_with_key_slowpath_hinted; } def->tuple_compare = cmp; @@ -1746,16 +1776,17 @@ key_def_set_compare_func_plain(struct key_def *def) def->tuple_compare = tuple_compare_slowpath ; def->tuple_compare_hinted = tuple_compare_slowpath_hinted - ; + ; def->tuple_compare_with_key = tuple_compare_with_key_slowpath ; def->tuple_compare_with_key_hinted = tuple_compare_with_key_slowpath_hinted - ; + ; } } -template +template static void key_def_set_compare_func_json(struct key_def *def) { @@ -1763,12 +1794,12 @@ key_def_set_compare_func_json(struct key_def *def) def->tuple_compare = tuple_compare_slowpath ; def->tuple_compare_hinted = tuple_compare_slowpath_hinted - ; + ; def->tuple_compare_with_key = tuple_compare_with_key_slowpath ; def->tuple_compare_with_key_hinted = tuple_compare_with_key_slowpath_hinted - ; + ; } void @@ -1786,14 +1817,23 @@ key_def_set_compare_func(struct key_def *def) assert(!def->is_nullable && !def->has_optional_parts); key_def_set_compare_func_plain(def); } + } else if (key_def_is_multikey(def)) { + if (def->is_nullable && def->has_optional_parts) { + key_def_set_compare_func_json(def); + } else if (def->is_nullable && !def->has_optional_parts) { + key_def_set_compare_func_json(def); + } else { + assert(!def->is_nullable && !def->has_optional_parts); + key_def_set_compare_func_json(def); + } } else { if (def->is_nullable && def->has_optional_parts) { - key_def_set_compare_func_json(def); + key_def_set_compare_func_json(def); } else if (def->is_nullable && !def->has_optional_parts) { - key_def_set_compare_func_json(def); + key_def_set_compare_func_json(def); } else { assert(!def->is_nullable && !def->has_optional_parts); - key_def_set_compare_func_json(def); + key_def_set_compare_func_json(def); } } key_def_set_hint_func(def); diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index 0a8337102..00bcfacdd 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -310,7 +310,7 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, const char *src_end = field_end; if (has_json_paths && part->path != NULL) { if (tuple_go_to_path(&src, part->path, - part->path_len) != 0) { + part->path_len, -1) != 0) { /* * The path must be correct as * it has already been validated diff --git a/src/box/tuple_field_map.c b/src/box/tuple_field_map.c index ea77745aa..e76068619 100644 --- a/src/box/tuple_field_map.c +++ b/src/box/tuple_field_map.c @@ -36,6 +36,8 @@ int field_map_builder_create(struct field_map_builder *builder, uint32_t field_map_size, struct region *region) { + builder->region = region; + builder->extents_size = 0; builder->item_count = field_map_size / sizeof(uint32_t); if (field_map_size == 0) { builder->items = NULL; @@ -51,3 +53,28 @@ field_map_builder_create(struct field_map_builder *builder, builder->items = (field_map_builder_item *)((char *)builder->items + sz); return 0; } + +struct field_map_ext * +field_map_builder_ext_get(struct field_map_builder *builder, int32_t offset_slot, + uint32_t extent_items) +{ + struct field_map_ext *extent; + if (builder->items[offset_slot].has_extent) { + extent = builder->items[offset_slot].extent; + assert(extent != NULL); + assert(extent->items == extent_items); + return extent; + } + uint32_t sz = field_map_ext_size(extent_items); + extent = region_alloc(builder->region, sz); + if (extent == NULL) { + diag_set(OutOfMemory, sz, "region", "extent"); + return NULL; + } + memset(extent, 0, sz); + extent->items = extent_items; + builder->items[offset_slot].extent = extent; + builder->items[offset_slot].has_extent = true; + builder->extents_size += sz; + return extent; +} diff --git a/src/box/tuple_field_map.h b/src/box/tuple_field_map.h index 3a9927faa..042209a80 100644 --- a/src/box/tuple_field_map.h +++ b/src/box/tuple_field_map.h @@ -31,11 +31,48 @@ * SUCH DAMAGE. */ #include +#include +#include struct region; +/** The field_map extent. */ +struct field_map_ext { + /** Count of field_map_ext::offset[] items. */ + uint32_t items; + /** Data offset in tuple array. */ + uint32_t offset[0]; +}; + +/** Return field_map extention allocation size. */ +static inline uint32_t +field_map_ext_size(uint32_t items) +{ + return sizeof(struct field_map_ext) + items * sizeof(uint32_t); +} + +/** Return field_map extention for field_map and offset_slot. */ +static inline struct field_map_ext * +field_map_ext_get(const uint32_t *field_map, int32_t offset_slot) +{ + return (struct field_map_ext *)((char *)field_map - + field_map[offset_slot]); +} + /** Preliminary field_map atom. */ -typedef uint32_t field_map_builder_item; +typedef struct { + /** + * True when this slot must be interpret as + * extention pointer. + */ + bool has_extent; + union { + /** Data offset in tuple. */ + uint32_t offset; + /** Pointer to field_map_ext extention. */ + struct field_map_ext *extent; + }; +} field_map_builder_item; /** A class that contains a preliminary view of the field_map. */ struct field_map_builder { @@ -49,8 +86,20 @@ struct field_map_builder { * count of field_map_builder::items. */ uint32_t item_count; + /** + * Total size of memory is allocated for field_map + * extentions. + */ + uint32_t extents_size; + /** Region to use for allocations. */ + struct region *region; }; +/** Get or allocate field_map_ext for offset_slot. */ +struct field_map_ext * +field_map_builder_ext_get(struct field_map_builder *builder, + int32_t offset_slot, uint32_t extent_items); + /** * Initialize field_map_builder to prepare field_map of size * field_map_size. Use region for temporary allocations. @@ -65,22 +114,50 @@ field_map_builder_slot_set(struct field_map_builder *builder, int32_t offset_slot, uint32_t offset) { assert(offset_slot < 0); - builder->items[offset_slot] = offset; + builder->items[offset_slot].offset = offset; +} + +/** Initialize corresponding field_map_ext with offset value. */ +static inline int +field_map_builder_ext_slot_set(struct field_map_builder *builder, + int32_t offset_slot, int32_t extent_slot, + uint32_t extent_items, uint32_t offset) +{ + assert(offset_slot < 0 && extent_slot >= 0 && extent_items > 0); + struct field_map_ext *extent = + field_map_builder_ext_get(builder, offset_slot, extent_items); + if (extent == NULL) + return -1; + assert(extent->items == extent_items); + extent->offset[extent_slot] = offset; + return 0; } /** Return built field_map size. */ static inline uint32_t field_map_builder_size(struct field_map_builder *builder) { - return builder->item_count * sizeof(uint32_t); + return builder->item_count * sizeof(uint32_t) + builder->extents_size; } /** Write build field_map size in the preallocated buffer wptr. */ static inline void field_map_builder_build(struct field_map_builder *builder, char *wptr) { - uint32_t field_map_size = field_map_builder_size(builder); - memcpy(wptr, (char *)builder->items - field_map_size, field_map_size); + uint32_t *field_map = + (uint32_t *)(wptr + field_map_builder_size(builder)); + char *extent_wptr = wptr + builder->extents_size; + for (int32_t i = -1; i >= -(int32_t)builder->item_count; i--) { + if (!builder->items[i].has_extent) { + field_map[i] = builder->items[i].offset; + continue; + } + struct field_map_ext *extent = builder->items[i].extent; + uint32_t sz = field_map_ext_size(extent->items); + extent_wptr -= sz; + field_map[i] = (char *)field_map - (char *)extent_wptr; + memcpy(extent_wptr, extent, sz); + } } #endif /* TARANTOOL_BOX_FIELD_MAP_H_INCLUDED */ diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 1d765c7c7..883656bf9 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -194,6 +194,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id) return NULL; } +/** + * Check if child_field may be attached to parent_field, + * update the parent_field container type if required. + */ +static int +tuple_format_field_update_type(struct tuple_field *parent_field, + struct tuple_field *child_field) +{ + enum field_type expected_type = + child_field->token.type == JSON_TOKEN_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (child_field->token.type == JSON_TOKEN_ANY && + !json_token_is_multikey(&parent_field->token) && + !json_token_is_leaf(&parent_field->token)) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + tt_sprintf("field %s is already refer to document by " + "identifier and cannot use array index " + "placeholder [*]", + tuple_field_path(parent_field))); + return -1; + } + if (json_token_is_multikey(&parent_field->token) && + child_field->token.type != JSON_TOKEN_ANY) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + tt_sprintf("field %s is already used with array index " + "placeholder [*] and cannot refer to " + "document by identifier", + tuple_field_path(parent_field))); + return -1; + } + if (field_type1_contains_type2(parent_field->type, expected_type)) { + parent_field->type = expected_type; + } else { + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tuple_field_path(parent_field), + field_type_strs[parent_field->type], + field_type_strs[expected_type]); + return -1; + } + return 0; +} + /** * Given a field number and a path, add the corresponding field * to the tuple format, allocating intermediate fields if @@ -228,29 +272,16 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, *path_pool += path_len; int rc = 0; - uint32_t token_count = 0; + uint32_t token_count = 0, multikey_token_count = 0; struct json_tree *tree = &format->fields; struct json_lexer lexer; json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && field->token.type != JSON_TOKEN_END) { - if (field->token.type == JSON_TOKEN_ANY) { - diag_set(ClientError, ER_UNSUPPORTED, - "Tarantool", "multikey indexes"); - goto fail; - } - enum field_type expected_type = - field->token.type == JSON_TOKEN_STR ? - FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; - if (field_type1_contains_type2(parent->type, expected_type)) { - parent->type = expected_type; - } else { - diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, - tuple_field_path(parent), - field_type_strs[parent->type], - field_type_strs[expected_type]); + if (tuple_format_field_update_type(parent, field) != 0) goto fail; - } + if (field->token.type == JSON_TOKEN_ANY) + multikey_token_count++; struct tuple_field *next = json_tree_lookup_entry(tree, &parent->token, &field->token, @@ -268,6 +299,18 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, if (field == NULL) goto fail; } + if (json_token_is_multikey(&parent->token) && + parent->offset_slot == TUPLE_OFFSET_SLOT_NIL) { + /** + * Allocate offset slot for array is used + * in multikey index. This is required to + * quickly extract its size. + * @see tuple_field_multikey_items() + */ + assert(parent->type == FIELD_TYPE_ARRAY); + *current_slot = *current_slot - 1; + parent->offset_slot = *current_slot; + } parent->is_key_part = true; parent = next; token_count++; @@ -280,6 +323,13 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, assert(parent != NULL); /* Update tree depth information. */ format->fields_depth = MAX(format->fields_depth, token_count + 1); + if (multikey_token_count > 1) { + assert(path_len > 0); + diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID, + tt_sprintf("no more than one array index placeholder " + "[*] is allowed in JSON path")); + goto fail; + } cleanup: tuple_field_delete(field); end: @@ -782,6 +832,7 @@ tuple_field_map_initialize(struct field_map_builder *builder, * validations and field map initialization. */ uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); + struct mp_frame *mk_parent_frame = NULL; struct mp_frame *frames = region_alloc(region, frames_sz); if (frames == NULL) { diag_set(OutOfMemory, frames_sz, "region", "frames"); @@ -805,6 +856,10 @@ tuple_field_map_initialize(struct field_map_builder *builder, mp_stack_pop(&stack); if (mp_stack_is_empty(&stack)) goto end; + if (json_token_is_multikey(parent)) { + /* Leave multikey index branch. */ + mk_parent_frame = NULL; + } parent = parent->parent; } /* @@ -855,7 +910,16 @@ tuple_field_map_initialize(struct field_map_builder *builder, field_type_strs[field->type]); return -1; } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + mk_parent_frame != NULL) { + int multikey_idx = mk_parent_frame->curr; + uint32_t multikey_items = mk_parent_frame->count; + if (field_map_builder_ext_slot_set(builder, + field->offset_slot, multikey_idx, + multikey_items, + *pos - tuple) != 0) + return -1; + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { field_map_builder_slot_set(builder, field->offset_slot, *pos - tuple); @@ -876,6 +940,10 @@ tuple_field_map_initialize(struct field_map_builder *builder, mp_decode_array(pos) : mp_decode_map(pos); mp_stack_push(&stack, type, size); + if (json_token_is_multikey(&field->token)) { + assert(type == MP_ARRAY); + mk_parent_frame = &frames[stack.used - 1]; + } parent = &field->token; } else { mp_next(pos); diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 78f29c624..39ba76394 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -699,6 +699,12 @@ vinyl_space_check_index_def(struct space *space, struct index_def *index_def) return -1; } } + if (key_def_is_multikey(index_def->key_def)) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + "vinyl space index cannot be multikey"); + return -1; + } return 0; } diff --git a/test/box/misc.result b/test/box/misc.result index c350bbd73..88cc64daf 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -516,6 +516,7 @@ t; 185: box.error.SQL_UNKNOWN_TOKEN 186: box.error.SQL_PARSER_GENERIC 187: box.error.SQL_ANALYZE_ARGUMENT + 188: box.error.INDEX_MULTIKEY_INVALID ... test_run:cmd("setopt delimiter ''"); --- diff --git a/test/engine/json.result b/test/engine/json.result index a790cdfbc..1bac85edd 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -683,16 +683,3 @@ town:select() s:drop() --- ... --- --- gh-1260: Multikey indexes --- -s = box.schema.space.create('withdata') ---- -... -idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) ---- -- error: Tarantool does not support multikey indexes -... -s:drop() ---- -... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index f9a7180b1..9afa3daa2 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -192,10 +192,3 @@ town:select() name:drop() town:select() s:drop() - --- --- gh-1260: Multikey indexes --- -s = box.schema.space.create('withdata') -idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) -s:drop() diff --git a/test/engine/multikey_idx.result b/test/engine/multikey_idx.result new file mode 100644 index 000000000..f41ddafa8 --- /dev/null +++ b/test/engine/multikey_idx.result @@ -0,0 +1,228 @@ +test_run = require('test_run').new() +--- +... +engine = test_run:get_cfg('engine') +--- +... +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata', {engine = 'vinyl'}) +--- +... +-- Vinyl's space can't be multikey (yet). +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key + cannot be multikey' +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = 'memtx'}) +--- +... +-- Primary index must be unique so it can't be multikey. +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key + cannot be multikey' +... +pk = s:create_index('pk') +--- +... +-- Only tree index type may be mutlikey. +_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': HASH index + cannot be multikey' +... +_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': BITSET index + cannot be multikey' +... +_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}}) +--- +- error: 'Can''t create or modify index ''idx'' in space ''withdata'': RTREE index + cannot be multikey' +... +-- Test incompatible multikey index parts. +_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}}) +--- +- error: 'Can''t create or modify index ''idx3'' in space ''withdata'': multikey index + parts are incompatible' +... +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}}) +--- +- error: 'Multikey index is invalid: no more than one array index placeholder [*] + is allowed in JSON path' +... +idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}}) +--- +... +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +- error: 'Multikey index is invalid: field 3 is already refer to document by identifier + and cannot use array index placeholder [*]' +... +idx0:drop() +--- +... +-- Unique multikey index. +idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +--- +... +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +--- +- error: 'Multikey index is invalid: field 3 is already used with array index placeholder + [*] and cannot refer to document by identifier' +... +s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}}) +--- +- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}}) +--- +- error: Duplicate key exists in unique index 'arr_idx' in space 'withdata' +... +-- Non-unique multikey index; two multikey indexes per space. +arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}}) +--- +... +arr_idx:select() +--- +- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:get({'James', 'Bond'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:get({'Ivan', 'Ivanych'}) +--- +- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:get({'Vasya', 'Pupkin'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +s:insert({4, {1}, {{fname='James', sname='Bond'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +-- Duplicates in multikey parts. +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +idx:get({'A', 'B'}) +--- +... +idx:get({'C', 'D'}) +--- +... +idx:delete({'Vasya', 'Pupkin'}) +--- +- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +--- +- [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({7, {1}, {{fname='James', sname='Bond'}}}) +--- +- [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +arr_idx:select({1}) +--- +- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +-- Snapshot & recovery. +box.snapshot() +--- +- ok +... +test_run:cmd("restart server default") +s = box.space["withdata"] +--- +... +idx = s.index["idx"] +--- +... +arr_idx = s.index["arr_idx"] +--- +... +s:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +arr_idx:select() +--- +- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]] + - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +s:drop() +--- +... +-- Assymetric multikey index paths. +s = box.schema.space.create('withdata') +--- +... +pk = s:create_index('pk') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +--- +... +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +--- +- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1', + 'extra': {'sname': 'C2'}}]] +... +s:drop() +--- +... diff --git a/test/engine/multikey_idx.test.lua b/test/engine/multikey_idx.test.lua new file mode 100644 index 000000000..2b25063cf --- /dev/null +++ b/test/engine/multikey_idx.test.lua @@ -0,0 +1,67 @@ +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') + +-- +-- gh-1260: Multikey indexes +-- +s = box.schema.space.create('withdata', {engine = 'vinyl'}) +-- Vinyl's space can't be multikey (yet). +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:drop() + +s = box.schema.space.create('withdata', {engine = 'memtx'}) +-- Primary index must be unique so it can't be multikey. +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}}) +pk = s:create_index('pk') +-- Only tree index type may be mutlikey. +_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}}) +_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}}) +_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}}) +-- Test incompatible multikey index parts. +_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}}) +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}}) +idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}}) +_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +idx0:drop() +-- Unique multikey index. +idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}}) +_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}}) +-- Non-unique multikey index; two multikey indexes per space. +arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}}) +arr_idx:select() +idx:get({'James', 'Bond'}) +idx:get({'Ivan', 'Ivanych'}) +idx:get({'Vasya', 'Pupkin'}) +idx:select() +s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({4, {1}, {{fname='James', sname='Bond'}}}) +idx:select() +-- Duplicates in multikey parts. +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +idx:get({'A', 'B'}) +idx:get({'C', 'D'}) +idx:delete({'Vasya', 'Pupkin'}) +s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({7, {1}, {{fname='James', sname='Bond'}}}) +arr_idx:select({1}) +idx:select() +-- Snapshot & recovery. +box.snapshot() +test_run:cmd("restart server default") +s = box.space["withdata"] +idx = s.index["idx"] +arr_idx = s.index["arr_idx"] +s:select() +idx:select() +arr_idx:select() +s:drop() + +-- Assymetric multikey index paths. +s = box.schema.space.create('withdata') +pk = s:create_index('pk') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +s:drop() -- 2.21.0