From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v6 3/3] box: introduce multikey indexes Date: Wed, 13 Mar 2019 15:15:39 +0300 Message-Id: <02bb2d4f30b4766d8fc4ce2f1b7ee94226ff043f.1552478226.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: I didn't finished review fixes for multikey indexes, but I've included this part to show how the modified hints changes affect multikey indexes. ========================================================================= With multikey index feature introduction, JSON path may have [*] placeholder instead of array index: this allows to index multiple documents by one JSON path automatically. Example: s = box.schema.space.create('withdata') idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) _ = s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Austin', sname='Powers'}}}) idx:get({'Austin', 'Powers'}) --- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Austin', 'sname': 'Powers'}]] ... Closes #1257 --- src/box/key_def.c | 15 ++++ src/box/key_def.h | 5 ++ src/box/memtx_tree.c | 175 +++++++++++++++++++++++++++++++++++++- src/box/tuple.c | 8 +- src/box/tuple.h | 122 +++++++++++++++++++++++--- src/box/tuple_compare.cc | 115 +++++++++++++++++++------ src/box/tuple_format.c | 120 +++++++++++++++++++++----- test/engine/json.result | 80 ++++++++++++++++- test/engine/json.test.lua | 20 +++++ 9 files changed, 592 insertions(+), 68 deletions(-) diff --git a/src/box/key_def.c b/src/box/key_def.c index d4c979aa1..b9cc77699 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -164,9 +164,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, *path_pool += path_len; memcpy(def->parts[part_no].path, path, path_len); def->parts[part_no].path_len = path_len; + + int rc; + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); + while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { + if (token.type == JSON_TOKEN_ANY) { + def->has_multikey_parts = true; + def->parts[part_no].is_multikey = true; + break; + } else if (token.type == JSON_TOKEN_END) { + break; + } + } } else { def->parts[part_no].path = NULL; def->parts[part_no].path_len = 0; + def->parts[part_no].is_multikey = false; } column_mask_set_fieldno(&def->column_mask, fieldno); } diff --git a/src/box/key_def.h b/src/box/key_def.h index 0d8f3112e..6a914df11 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -97,6 +97,8 @@ struct key_part { char *path; /** The length of JSON path. */ uint32_t path_len; + /** True if this is multikey key part. */ + bool is_multikey; /** * Epoch of the tuple format the offset slot cached in * this part is valid for, see tuple_format::epoch. @@ -205,6 +207,8 @@ struct key_def { bool is_nullable; /** True if some key part has JSON path. */ bool has_json_paths; + /** True if some part has array index placeholder *. */ + bool has_multikey_parts; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -699,6 +703,7 @@ key_hash(const char *key, struct key_def *key_def) * Get a comparison hint for a @a tuple. * @param tuple - tuple to get uint64_t of. * @param key_def - key_def that defines which comparison is used. + * @param multikey_idx - index of multikey array item. * @return the comparison auxiliary information. */ static inline uint64_t diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index 2c1933ceb..bfc618bdf 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -537,7 +537,8 @@ memtx_tree_index_get(struct index *base, const char *key, struct memtx_tree_key_data key_data; key_data.key = key; key_data.part_count = part_count; - key_data.hint = key_hint(key, cmp_def); + if (!cmp_def->has_multikey_parts) + key_data.hint = key_hint(key, cmp_def); struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data); *result = res != NULL ? res->tuple : NULL; return 0; @@ -593,6 +594,98 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, return 0; } +static int +multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def) +{ + assert(key_def->has_multikey_parts); + struct key_part *part = key_def->parts; + /* FIXME: don't like it... */ + while (part < key_def->parts + key_def->part_count && + !part->is_multikey) + part++; + assert(part->path != NULL && part->is_multikey); + struct tuple_field *field = + tuple_format_field_by_path(tuple_format(tuple), part->fieldno, + part->path, part->path_len); + assert(field != NULL); + struct field_map_ext *field_map_ext = + tuple_field_map_ext((uint32_t *)tuple_field_map(tuple), + field->offset_slot); + return field_map_ext->size; +} + +int +memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple, + struct tuple *new_tuple, + enum dup_replace_mode mode, + struct tuple **result) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = index->tree.arg; + if (new_tuple != NULL) { + int errcode = 0, tree_res = 0; + struct tuple *dup_tuple = NULL; + int multikey_idx = 0; + int sz = multikey_get_array_sz(new_tuple, key_def); + for (; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data new_data; + new_data.tuple = new_tuple; + new_data.hint = multikey_idx; + struct memtx_tree_data dup_data; + dup_data.tuple = NULL; + tree_res = memtx_tree_insert(&index->tree, new_data, + &dup_data); + if (tree_res != 0) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "replace"); + break; + } + dup_tuple = dup_data.tuple; + errcode = replace_check_dup(old_tuple, dup_tuple, mode); + if (errcode != 0) { + memtx_tree_delete(&index->tree, new_data); + if (dup_tuple != NULL) { + memtx_tree_insert(&index->tree, + dup_data, NULL); + } + struct space *sp = + space_cache_find(base->def->space_id); + if (sp != NULL) { + diag_set(ClientError, errcode, + base->def->name, + space_name(sp)); + } + break; + } + } + if (tree_res != 0 || errcode != 0) { + multikey_idx--; + for (; multikey_idx >= 0; multikey_idx--) { + struct memtx_tree_data data; + data.tuple = new_tuple; + data.hint = multikey_idx; + memtx_tree_delete(&index->tree, data); + } + return -1; + } + if (dup_tuple != NULL) { + *result = dup_tuple; + return 0; + } + } + if (old_tuple != NULL) { + int sz = multikey_get_array_sz(old_tuple, key_def); + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data old_data; + old_data.tuple = old_tuple; + old_data.hint = multikey_idx; + memtx_tree_delete(&index->tree, old_data); + } + } + *result = old_tuple; + return 0; +} + static struct iterator * memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, const char *key, uint32_t part_count) @@ -630,7 +723,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, it->key_data.key = key; it->key_data.part_count = part_count; struct key_def *cmp_def = memtx_tree_index_cmp_def(index); - it->key_data.hint = key_hint(key, cmp_def); + if (!cmp_def->has_multikey_parts) + it->key_data.hint = key_hint(key, cmp_def); it->index_def = base->def; it->tree = &index->tree; it->tree_iterator = memtx_tree_invalid_iterator(); @@ -700,6 +794,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) return 0; } +static int +memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = index->tree.arg; + int sz = multikey_get_array_sz(tuple, key_def); + + if (index->build_array == NULL) { + index->build_array = + (struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE); + if (index->build_array == NULL) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "build_next"); + return -1; + } + index->build_array_alloc_size = + MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); + } + assert(index->build_array_size <= index->build_array_alloc_size); + if (index->build_array_size == index->build_array_alloc_size) { + index->build_array_alloc_size = index->build_array_alloc_size + + index->build_array_alloc_size / 2; + struct memtx_tree_data *tmp = + realloc(index->build_array, + index->build_array_alloc_size * sizeof(*tmp)); + if (tmp == NULL) { + diag_set(OutOfMemory, index->build_array_alloc_size * + sizeof(*tmp), "memtx_tree_index", "build_next"); + return -1; + } + index->build_array = tmp; + } + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data *elem = + &index->build_array[index->build_array_size++]; + assert(index->build_array_size < index->build_array_alloc_size); + elem->tuple = tuple; + elem->hint = multikey_idx; + } + return 0; +} + static void memtx_tree_index_end_build(struct index *base) { @@ -802,6 +938,36 @@ static const struct index_vtab memtx_tree_index_vtab = { /* .end_build = */ memtx_tree_index_end_build, }; +static const struct index_vtab memtx_multikey_tree_index_vtab = { + /* .destroy = */ memtx_tree_index_destroy, + /* .commit_create = */ generic_index_commit_create, + /* .abort_create = */ generic_index_abort_create, + /* .commit_modify = */ generic_index_commit_modify, + /* .commit_drop = */ generic_index_commit_drop, + /* .update_def = */ memtx_tree_index_update_def, + /* .depends_on_pk = */ memtx_tree_index_depends_on_pk, + /* .def_change_requires_rebuild = */ + memtx_index_def_change_requires_rebuild, + /* .size = */ memtx_tree_index_size, + /* .bsize = */ memtx_tree_index_bsize, + /* .min = */ generic_index_min, + /* .max = */ generic_index_max, + /* .random = */ memtx_tree_index_random, + /* .count = */ memtx_tree_index_count, + /* .get = */ memtx_tree_index_get, + /* .replace = */ memtx_multikey_tree_index_replace, + /* .create_iterator = */ memtx_tree_index_create_iterator, + /* .create_snapshot_iterator = */ + memtx_tree_index_create_snapshot_iterator, + /* .stat = */ generic_index_stat, + /* .compact = */ generic_index_compact, + /* .reset_stat = */ generic_index_reset_stat, + /* .begin_build = */ memtx_tree_index_begin_build, + /* .reserve = */ memtx_tree_index_reserve, + /* .build_next = */ memtx_multikey_tree_index_build_next, + /* .end_build = */ memtx_tree_index_end_build, +}; + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { @@ -812,8 +978,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) "malloc", "struct memtx_tree_index"); return NULL; } + const struct index_vtab *vtab = def->key_def->has_multikey_parts ? + &memtx_multikey_tree_index_vtab : + &memtx_tree_index_vtab; if (index_create(&index->base, (struct engine *)memtx, - &memtx_tree_index_vtab, def) != 0) { + vtab, def) != 0) { free(index); return NULL; } diff --git a/src/box/tuple.c b/src/box/tuple.c index 7f06d4053..c8a938c4c 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len) } int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +tuple_go_to_path_multikey(const char **data, const char *path, + uint32_t path_len, uint64_t multikey_idx) { int rc; struct json_lexer lexer; @@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len) json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { switch (token.type) { + case JSON_TOKEN_ANY: + token.num = (int)multikey_idx; case JSON_TOKEN_NUM: rc = tuple_field_go_to_index(data, token.num); break; @@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, } return tuple_field_raw_by_path(format, tuple, field_map, fieldno, path + lexer.offset, - path_len - lexer.offset, NULL); + path_len - lexer.offset, NULL, + INVALID_MULTIKEY_INDEX); } /* }}} tuple_field_* getters */ diff --git a/src/box/tuple.h b/src/box/tuple.h index 8b12fd5a8..e498e1e41 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -45,6 +45,8 @@ struct slab_arena; struct quota; struct key_part; +#define INVALID_MULTIKEY_INDEX UINT64_MAX + /** * A format for standalone tuples allocated on runtime arena. * \sa tuple_new(). @@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const /** \endcond public */ + /** * An atom of Tarantool storage. Represents MsgPack Array. * Tuple has the following structure: @@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const * | ^ * +---------------------------------------data_offset * - * Each 'off_i' is the offset to the i-th indexed field. + * Each 'off_i' is the offset to the i-th indexed field or field + * map extention (described with struct tuple_field_map_ext) + * offset (in sizeof(field_map[0]) units). */ struct PACKED tuple { @@ -328,6 +333,26 @@ struct PACKED tuple */ }; +/** + * Tuple field map extent. Is allocated on tuple_field_map_create + * call automatically when necessary, when tuple field map must be + * reallocated dynamically. + */ +struct field_map_ext { + /* Sequence of data offset slots. */ + uint32_t field_map[1]; + /* The count of field_map items. */ + uint32_t size; +}; + +static inline struct field_map_ext * +tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot) +{ + uint32_t slot_off = field_map[root_offset_slot]; + return (struct field_map_ext *)((char *)field_map - + slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext)); +} + /** Size of the tuple including size of struct tuple. */ static inline size_t tuple_size(const struct tuple *tuple) @@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple) * with NULL. * @param path The path to process. * @param path_len The length of the @path. + * @param multikey_index The multikey index hint - index of item + * for JSON_TOKEN_ANY level. * @retval 0 On success. * @retval -1 In case of error in JSON path. */ int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len); +tuple_go_to_path_multikey(const char **data, const char *path, + uint32_t path_len, uint64_t multikey_idx); + +/** + * Retrieve msgpack data by JSON path. + * @param data[in, out] Pointer to msgpack with data. + * If the field cannot be retrieved be the + * specified path @path, it is overwritten + * with NULL. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval -1 In case of error in JSON path. + */ +static inline int +tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +{ + return tuple_go_to_path_multikey(data, path, path_len, + INVALID_MULTIKEY_INDEX); +} /** * Get tuple field by field index and relative JSON path. @@ -533,7 +579,7 @@ static inline const char * tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t fieldno, const char *path, uint32_t path_len, - int32_t *offset_slot_hint) + int32_t *offset_slot_hint, uint64_t multikey_idx) { int32_t offset_slot; if (offset_slot_hint != NULL && @@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, if (offset_slot_hint != NULL) *offset_slot_hint = offset_slot; offset_slot_access: - /* Indexed field */ - if (field_map[offset_slot] == 0) - return NULL; - tuple += field_map[offset_slot]; + if (multikey_idx != INVALID_MULTIKEY_INDEX) { + struct field_map_ext *field_map_ext = + tuple_field_map_ext((uint32_t *)field_map, + offset_slot); + if (multikey_idx >= field_map_ext->size) + return NULL; + uint32_t off = field_map_ext->field_map[-multikey_idx]; + if (off == 0) + return NULL; + tuple += off; + } else { + /* Indexed field */ + if (field_map[offset_slot] == 0) + return NULL; + tuple += field_map[offset_slot]; + } } else { uint32_t field_count; parse: @@ -572,8 +630,8 @@ parse: for (uint32_t k = 0; k < fieldno; k++) mp_next(&tuple); if (path != NULL && - unlikely(tuple_go_to_path(&tuple, path, - path_len) != 0)) + unlikely(tuple_go_to_path_multikey(&tuple, path, path_len, + multikey_idx) != 0)) return NULL; } return tuple; @@ -595,7 +653,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t field_no) { return tuple_field_raw_by_path(format, tuple, field_map, field_no, - NULL, 0, NULL); + NULL, 0, NULL, INVALID_MULTIKEY_INDEX); } /** @@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash); /** - * Get a tuple field pointed to by an index part. + * Get a tuple field pointed to by an index part and multikey hint. * @param format Tuple format. * @param tuple A pointer to MessagePack array. * @param field_map A pointer to the LAST element of field map. + * @param multikey_idx A multikey hint. * @param part Index part to use. * @retval Field data if the field exists or NULL. */ static inline const char * -tuple_field_raw_by_part(struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, uint64_t multikey_idx) { if (unlikely(part->format_epoch != format->epoch)) { assert(format->epoch != 0); @@ -656,7 +716,41 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data, } return tuple_field_raw_by_path(format, data, field_map, part->fieldno, part->path, part->path_len, - &part->offset_slot_cache); + &part->offset_slot_cache, multikey_idx); +} + +/** + * Get a field refereed by index @part and multikey hint in tuple. + * @param tuple Tuple to get the field from. + * @param part Index part to use. + * @param multikey_idx A multikey hint. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_by_part_multikey(const struct tuple *tuple, struct key_part *part, + uint64_t multikey_idx) +{ + return tuple_field_raw_by_part_multikey(tuple_format(tuple), + tuple_data(tuple), + tuple_field_map(tuple), part, + multikey_idx); +} + + +/** + * Get a tuple field pointed to by an index part. + * @param format Tuple format. + * @param tuple A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param part Index part to use. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_raw_by_part(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + return tuple_field_raw_by_part_multikey(format, data, field_map, + part, INVALID_MULTIKEY_INDEX); } /** diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 86922c9bb..cdb08f8fe 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -351,6 +351,8 @@ key_def_set_hint_func(struct key_def *def) { def->key_hint = NULL; def->tuple_hint = NULL; + if (def->has_multikey_parts) + return; bool is_nullable = key_part_is_nullable(def->parts); switch (def->parts->type) { case FIELD_TYPE_UNSIGNED: { @@ -835,7 +837,8 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b, return i; } -template +template static inline int tuple_compare_slowpath_hinted(const struct tuple *tuple_a, uint64_t tuple_a_hint, const struct tuple *tuple_b, @@ -845,8 +848,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); + assert(is_multikey == key_def->has_multikey_parts); + assert(!is_multikey || is_multikey == has_json_paths); int rc = 0; - if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) + if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) return rc; struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); @@ -889,7 +894,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, end = part + key_def->part_count; for (; part < end; part++) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -946,7 +958,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a, */ end = key_def->parts + key_def->part_count; for (; part < end; ++part) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + tuple_a_hint); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + tuple_b_hint); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -976,24 +995,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def) { return tuple_compare_slowpath_hinted(tuple_a, HINT_INVALID, tuple_b, + has_json_paths, false>(tuple_a, HINT_INVALID, tuple_b, HINT_INVALID, key_def); } -template +template static inline int tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, uint64_t tuple_hint, const char *key, uint32_t part_count, uint64_t key_hint, struct key_def *key_def) { + (void)key_hint; assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); + assert(is_multikey == key_def->has_multikey_parts); + assert(!is_multikey || is_multikey == has_json_paths); int rc = 0; - if ((rc = hint_cmp(tuple_hint, key_hint)) != 0) + if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0) return rc; struct key_part *part = key_def->parts; struct tuple_format *format = tuple_format(tuple); @@ -1002,7 +1025,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, enum mp_type a_type, b_type; if (likely(part_count == 1)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -1032,7 +1058,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, struct key_part *end = part + part_count; for (; part < end; ++part, mp_next(&key)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, tuple_hint); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -1074,7 +1103,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, uint32_t part_count, struct key_def *key_def) { return tuple_compare_with_key_slowpath_hinted(tuple, + has_optional_parts, has_json_paths, false>(tuple, HINT_INVALID, key, part_count, HINT_INVALID, key_def); } @@ -1508,14 +1537,22 @@ static const tuple_compare_t compare_slowpath_funcs[] = { }; static const tuple_compare_hinted_t compare_slowpath_hinted_funcs[] = { - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted, - tuple_compare_slowpath_hinted + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, }; void @@ -1523,7 +1560,14 @@ key_def_set_tuple_compare(struct key_def *def) { int cmp_func_idx = (def->is_nullable ? 1 : 0) + 2 * (def->has_optional_parts ? 1 : 0) + - 4 * (def->has_json_paths ? 1 : 0); + 4 * (def->has_json_paths ? 1 : 0) + + 8 * (def->has_multikey_parts ? 1 : 0); + if (def->has_multikey_parts) { + def->tuple_compare = NULL; + def->tuple_compare_hinted = + compare_slowpath_hinted_funcs[cmp_func_idx]; + return; + } if (def->is_nullable) { if (key_def_is_sequential(def)) { if (def->has_optional_parts) { @@ -1795,14 +1839,22 @@ static const tuple_compare_with_key_t compare_with_key_slowpath_funcs[] = { static const tuple_compare_with_key_hinted_t compare_with_key_slowpath_hinted_funcs[] = { - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted, - tuple_compare_with_key_slowpath_hinted + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted }; void @@ -1810,7 +1862,14 @@ key_def_set_tuple_compare_with_key(struct key_def *def) { int cmp_func_idx = (def->is_nullable ? 1 : 0) + 2 * (def->has_optional_parts ? 1 : 0) + - 4 * (def->has_json_paths ? 1 : 0); + 4 * (def->has_json_paths ? 1 : 0) + + 8 * (def->has_multikey_parts ? 1 : 0); + if (def->has_multikey_parts) { + def->tuple_compare_with_key = NULL; + def->tuple_compare_with_key_hinted = + compare_with_key_slowpath_hinted_funcs[cmp_func_idx]; + return; + } if (def->is_nullable) { if (key_def_is_sequential(def)) { if (def->has_optional_parts) { diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 4439ce3e0..99b602cd5 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -33,6 +33,7 @@ #include "json/json.h" #include "tuple_format.h" #include "coll_id_cache.h" +#include "tuple.h" #include "third_party/PMurHash.h" @@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && field->token.type != JSON_TOKEN_END) { - if (field->token.type == JSON_TOKEN_ANY) { - diag_set(ClientError, ER_UNSUPPORTED, - "Tarantool", "multikey indexes"); - goto fail; - } enum field_type expected_type = field->token.type == JSON_TOKEN_STR ? FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (field->token.type == JSON_TOKEN_ANY && + !json_token_is_multikey(&parent->token) && + !json_token_is_leaf(&parent->token)) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tuple_field_path(parent), + field_type_strs[parent->type], + "multikey array"); + goto fail; + } if (field_type1_contains_type2(parent->type, expected_type)) { parent->type = expected_type; } else { @@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, break; format->min_tuple_size += mp_sizeof_nil(); } - } else { + } else if (field->token.type == JSON_TOKEN_STR) { /* Account a key string for map member. */ - assert(field->token.type == JSON_TOKEN_STR); format->min_tuple_size += mp_sizeof_str(field->token.len); } @@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, return true; } +/** + * Grow tuple_field_map allocation left with extent_size + * 0 bytes. + */ +static int +tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size, + uint32_t extent_size, struct region *region) +{ + assert(extent_size % sizeof(uint32_t) == 0); + uint32_t new_field_map_size = *field_map_size + extent_size; + uint32_t *new_field_map = region_alloc(region, new_field_map_size); + if (new_field_map == NULL) { + diag_set(OutOfMemory, new_field_map_size, "region_alloc", + "new_field_map"); + return -1; + } + memset(new_field_map, 0, extent_size); + if (*field_map != NULL) { + memcpy((char *)new_field_map + extent_size, + (char *)*field_map - *field_map_size, *field_map_size); + } + *field_map = (uint32_t *)((char *)new_field_map + new_field_map_size); + *field_map_size = new_field_map_size; + return 0; +} + +/** + * Retrieve tuple field map extent by root_offset_slot, reallocate + * memory if required. + */ +struct field_map_ext * +tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size, + int32_t root_offset_slot, uint32_t extent_items, + struct region *region) +{ + uint32_t extent_sz = sizeof(struct field_map_ext) + + extent_items * sizeof(uint32_t); + uint32_t slot_off = (*field_map)[root_offset_slot]; + if (slot_off == 0) { + /* Field map extent was not created yet. */ + slot_off = *field_map_size / sizeof(uint32_t); + (*field_map)[root_offset_slot] = slot_off; + if (tuple_field_map_realloc(field_map, field_map_size, + extent_sz, region) != 0) + return NULL; + } + struct field_map_ext *field_map_ext = + tuple_field_map_ext(*field_map, root_offset_slot); + assert(field_map_ext->size == 0 || field_map_ext->size == extent_items); + field_map_ext->size = extent_items; + return field_map_ext; +} + /** @sa declaration for details. */ int tuple_field_map_create(struct tuple_format *format, const char *tuple, @@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, return 0; /* Nothing to initialize */ } struct region *region = &fiber()->gc; - *field_map_size = format->field_map_size; - *field_map = region_alloc(region, *field_map_size); - if (*field_map == NULL) { - diag_set(OutOfMemory, *field_map_size, "region_alloc", - "field_map"); + *field_map = NULL; + *field_map_size = 0; + if (tuple_field_map_realloc(field_map, field_map_size, + format->field_map_size, region) != 0) return -1; - } - *field_map = (uint32_t *)((char *)*field_map + *field_map_size); const char *pos = tuple; int rc = 0; @@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, uint32_t defined_field_count = MIN(field_count, validate ? tuple_format_field_count(format) : format->index_field_count); - /* - * Nullify field map to be able to detect by 0, - * which key fields are absent in tuple_field(). - */ - memset((char *)*field_map - *field_map_size, 0, *field_map_size); /* * Prepare mp stack of the size equal to the maximum depth * of the indexed field in the format::fields tree @@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, struct mp_stack stack; mp_stack_create(&stack, format->fields_depth, frames); mp_stack_push(&stack, MP_ARRAY, defined_field_count); + /** + * Pointer to the stack entry that represents the multikey + * index root ARRAY entry. + */ + struct mp_frame *mk_parent_frame = NULL; + struct tuple_field *field; struct json_token *parent = &format->fields.root; while (true) { @@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, mp_stack_pop(&stack); if (mp_stack_is_empty(&stack)) goto finish; + if (json_token_is_multikey(parent)) { + /* Leave multikey index branch. */ + mk_parent_frame = NULL; + } parent = parent->parent; } /* @@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, field_type_strs[field->type]); goto error; } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + mk_parent_frame == NULL) { (*field_map)[field->offset_slot] = pos - tuple; + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + mk_parent_frame != NULL) { + uint32_t extent_items = mk_parent_frame->count; + struct field_map_ext *field_map_ext = + tuple_field_map_ext_retrieve(field_map, + field_map_size, + field->offset_slot, + extent_items, region); + if (field_map_ext == NULL) + goto error; + int multikey_idx = mk_parent_frame->curr; + field_map_ext->field_map[ + -multikey_idx] = pos - tuple; + } if (required_fields != NULL) bit_clear(required_fields, field->id); } @@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, mp_decode_array(&pos) : mp_decode_map(&pos); mp_stack_push(&stack, type, size); + if (json_token_is_multikey(&field->token)) { + /* Track multikey entry frame. */ + assert(type == MP_ARRAY); + mk_parent_frame = &frames[stack.used - 1]; + } parent = &field->token; } else { mp_next(&pos); diff --git a/test/engine/json.result b/test/engine/json.result index a790cdfbc..5c7a9b3b3 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -691,7 +691,85 @@ s = box.schema.space.create('withdata') ... idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) --- -- error: Tarantool does not support multikey indexes +... +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +--- +- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}}) +--- +- [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:select({'James', 'Bond'}) +--- +- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select({'Kirill', 'Shcherbatov'}) +--- +- [] +... +idx:select({'Ivan', 'Ivanych'}) +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:select({'Vasya', 'Pupkin'}) +--- +- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:delete({'Vasya', 'Pupkin'}) +--- +- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +--- +- [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +--- +- [2, 1, [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, 1, [{'fname': 'James', 'sname': 'Bond'}]] + - [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata') +--- +... +pk = s:create_index('pk') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +--- +... +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +--- +- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1', + 'extra': {'sname': 'C2'}}]] ... s:drop() --- diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index f9a7180b1..dc6016af9 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -198,4 +198,24 @@ s:drop() -- s = box.schema.space.create('withdata') idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}}) +idx:select({'James', 'Bond'}) +idx:select({'Kirill', 'Shcherbatov'}) +idx:select({'Ivan', 'Ivanych'}) +idx:select({'Vasya', 'Pupkin'}) +idx:select() +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +idx:select() +idx:delete({'Vasya', 'Pupkin'}) +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +idx:select() +s:drop() + +s = box.schema.space.create('withdata') +pk = s:create_index('pk') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) s:drop() -- 2.21.0