From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] [PATCH v5 4/4] box: introduce multikey indexes References: <126ecffeb103797f6a466387cd780587d0270275.1551951540.git.kshcherbatov@tarantool.org> From: Kirill Shcherbatov Message-ID: Date: Thu, 7 Mar 2019 18:55:09 +0300 MIME-Version: 1.0 In-Reply-To: <126ecffeb103797f6a466387cd780587d0270275.1551951540.git.kshcherbatov@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com List-ID: I've implemented a better version that is easier to understand and works better. I am going to think, how do not reallocate field map when multikey index was met. Maybe I'll resend this patch again later. ======================================================= With multikey index feature introduction, JSON path may have [*] placeholder instead of array index: this allows to index multiple documents by one JSON path automatically. Example: s = box.schema.space.create('withdata') idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) _ = s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Austin', sname='Powers'}}}) idx:get({'Austin', 'Powers'}) --- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Austin', 'sname': 'Powers'}]] ... Closes #1257 --- src/box/key_def.c | 15 ++++ src/box/key_def.h | 15 +++- src/box/memtx_tree.c | 180 ++++++++++++++++++++++++++++++++++++-- src/box/tuple.c | 8 +- src/box/tuple.h | 122 +++++++++++++++++++++++--- src/box/tuple_compare.cc | 155 +++++++++++++++++++++++++++----- src/box/tuple_format.c | 120 ++++++++++++++++++++----- test/engine/json.result | 80 ++++++++++++++++- test/engine/json.test.lua | 20 +++++ 9 files changed, 648 insertions(+), 67 deletions(-) diff --git a/src/box/key_def.c b/src/box/key_def.c index 771c30172..f6ca461c4 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -165,9 +165,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, *path_pool += path_len; memcpy(def->parts[part_no].path, path, path_len); def->parts[part_no].path_len = path_len; + + int rc; + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); + while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { + if (token.type == JSON_TOKEN_ANY) { + def->has_multikey_parts = true; + def->parts[part_no].is_multikey = true; + break; + } else if (token.type == JSON_TOKEN_END) { + break; + } + } } else { def->parts[part_no].path = NULL; def->parts[part_no].path_len = 0; + def->parts[part_no].is_multikey = false; } column_mask_set_fieldno(&def->column_mask, fieldno); } diff --git a/src/box/key_def.h b/src/box/key_def.h index c630e6b43..37fae6b52 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -97,6 +97,8 @@ struct key_part { char *path; /** The length of JSON path. */ uint32_t path_len; + /** True if this is multikey key part. */ + bool is_multikey; /** * Epoch of the tuple format the offset slot cached in * this part is valid for, see tuple_format::epoch. @@ -129,6 +131,8 @@ typedef union { * the tuples themselves. */ uint64_t hint; + /** Index of item in the array used in multikey index. */ + uint64_t multikey_idx; } cmp_aux_t; /** Test if cmp_aux_t a and b are equal. */ @@ -189,7 +193,8 @@ typedef uint32_t (*key_hash_t)(const char *key, /** @copydoc tuple_cmp_aux() */ typedef cmp_aux_t (*tuple_cmp_aux_t)(const struct tuple *tuple, - struct key_def *key_def); + struct key_def *key_def, + uint64_t multikey_idx); /** @copydoc key_cmp_aux() */ typedef cmp_aux_t (*key_cmp_aux_t)(const char *key, struct key_def *key_def); @@ -228,6 +233,8 @@ struct key_def { bool is_nullable; /** True if some key part has JSON path. */ bool has_json_paths; + /** True if some part has array index placeholder *. */ + bool has_multikey_parts; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -723,12 +730,14 @@ key_hash(const char *key, struct key_def *key_def) * Get a comparison auxiliary information for a tuple. * @param tuple - tuple to get cmp_aux_t of. * @param key_def - key_def that defines which comparison is used. + * @param multikey_idx - index of multikey array item. * @return the comparison auxiliary information. */ static inline cmp_aux_t -tuple_cmp_aux(const struct tuple *tuple, struct key_def *key_def) +tuple_cmp_aux(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { - return key_def->tuple_cmp_aux(tuple, key_def); + return key_def->tuple_cmp_aux(tuple, key_def, multikey_idx); } /** diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index 4b0886bef..d7f639ca4 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -91,11 +91,11 @@ memtx_tree_data_clear(struct memtx_tree_data *data) */ static void memtx_tree_data_set(struct memtx_tree_data *data, struct tuple *tuple, - struct key_def *key_def) + struct key_def *key_def, uint64_t multikey_idx) { (void)key_def; data->tuple = tuple; - data->aux_info = tuple_cmp_aux(tuple, key_def); + data->aux_info = tuple_cmp_aux(tuple, key_def, multikey_idx); } /** @@ -578,7 +578,8 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, struct key_def *key_def = index->tree.arg; if (new_tuple) { struct memtx_tree_data new_data; - memtx_tree_data_set(&new_data, new_tuple, key_def); + memtx_tree_data_set(&new_data, new_tuple, key_def, + INVALID_MULTIKEY_INDEX); struct memtx_tree_data dup_data; memtx_tree_data_clear(&dup_data); @@ -610,13 +611,106 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, } if (old_tuple) { struct memtx_tree_data old_data; - memtx_tree_data_set(&old_data, old_tuple, key_def); + memtx_tree_data_set(&old_data, old_tuple, key_def, + INVALID_MULTIKEY_INDEX); memtx_tree_delete(&index->tree, old_data); } *result = old_tuple; return 0; } +static int +multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def) +{ + assert(key_def->has_multikey_parts); + struct key_part *part = key_def->parts; + /* FIXME: don't like it... */ + while (part < key_def->parts + key_def->part_count && + !part->is_multikey) + part++; + assert(part->path != NULL && part->is_multikey); + struct tuple_field *field = + tuple_format_field_by_path(tuple_format(tuple), part->fieldno, + part->path, part->path_len); + assert(field != NULL); + struct field_map_ext *field_map_ext = + tuple_field_map_ext((uint32_t *)tuple_field_map(tuple), + field->offset_slot); + return field_map_ext->size; +} + +int +memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple, + struct tuple *new_tuple, + enum dup_replace_mode mode, + struct tuple **result) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = index->tree.arg; + if (new_tuple != NULL) { + int errcode = 0, tree_res = 0; + struct tuple *dup_tuple = NULL; + int multikey_idx = 0; + int sz = multikey_get_array_sz(new_tuple, key_def); + for (; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data new_data; + memtx_tree_data_set(&new_data, new_tuple, key_def, + multikey_idx); + struct memtx_tree_data dup_data; + memtx_tree_data_clear(&dup_data); + tree_res = memtx_tree_insert(&index->tree, new_data, + &dup_data); + if (tree_res != 0) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "replace"); + break; + } + dup_tuple = dup_data.tuple; + errcode = replace_check_dup(old_tuple, dup_tuple, mode); + if (errcode != 0) { + memtx_tree_delete(&index->tree, new_data); + if (dup_tuple != NULL) { + memtx_tree_insert(&index->tree, + dup_data, NULL); + } + struct space *sp = + space_cache_find(base->def->space_id); + if (sp != NULL) { + diag_set(ClientError, errcode, + base->def->name, + space_name(sp)); + } + break; + } + } + if (tree_res != 0 || errcode != 0) { + multikey_idx--; + for (; multikey_idx >= 0; multikey_idx--) { + struct memtx_tree_data data; + memtx_tree_data_set(&data, new_tuple, key_def, + multikey_idx); + memtx_tree_delete(&index->tree, data); + } + return -1; + } + if (dup_tuple != NULL) { + *result = dup_tuple; + return 0; + } + } + if (old_tuple != NULL) { + int sz = multikey_get_array_sz(old_tuple, key_def); + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data old_data; + memtx_tree_data_set(&old_data, old_tuple, key_def, + multikey_idx); + memtx_tree_delete(&index->tree, old_data); + } + } + *result = old_tuple; + return 0; +} + static struct iterator * memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, const char *key, uint32_t part_count) @@ -718,7 +812,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) } struct memtx_tree_data *elem = &index->build_array[index->build_array_size++]; - memtx_tree_data_set(elem, tuple, key_def); + memtx_tree_data_set(elem, tuple, key_def, INVALID_MULTIKEY_INDEX); + return 0; +} + +static int +memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple) +{ + struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = index->tree.arg; + int sz = multikey_get_array_sz(tuple, key_def); + + if (index->build_array == NULL) { + index->build_array = + (struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE); + if (index->build_array == NULL) { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, + "memtx_tree_index", "build_next"); + return -1; + } + index->build_array_alloc_size = + MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); + } + assert(index->build_array_size <= index->build_array_alloc_size); + if (index->build_array_size == index->build_array_alloc_size) { + index->build_array_alloc_size = index->build_array_alloc_size + + index->build_array_alloc_size / 2; + struct memtx_tree_data *tmp = + realloc(index->build_array, + index->build_array_alloc_size * sizeof(*tmp)); + if (tmp == NULL) { + diag_set(OutOfMemory, index->build_array_alloc_size * + sizeof(*tmp), "memtx_tree_index", "build_next"); + return -1; + } + index->build_array = tmp; + } + for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) { + struct memtx_tree_data *elem = + &index->build_array[index->build_array_size++]; + assert(index->build_array_size < index->build_array_alloc_size); + memtx_tree_data_set(elem, tuple, key_def, multikey_idx); + } return 0; } @@ -824,6 +959,36 @@ static const struct index_vtab memtx_tree_index_vtab = { /* .end_build = */ memtx_tree_index_end_build, }; +static const struct index_vtab memtx_multikey_tree_index_vtab = { + /* .destroy = */ memtx_tree_index_destroy, + /* .commit_create = */ generic_index_commit_create, + /* .abort_create = */ generic_index_abort_create, + /* .commit_modify = */ generic_index_commit_modify, + /* .commit_drop = */ generic_index_commit_drop, + /* .update_def = */ memtx_tree_index_update_def, + /* .depends_on_pk = */ memtx_tree_index_depends_on_pk, + /* .def_change_requires_rebuild = */ + memtx_index_def_change_requires_rebuild, + /* .size = */ memtx_tree_index_size, + /* .bsize = */ memtx_tree_index_bsize, + /* .min = */ generic_index_min, + /* .max = */ generic_index_max, + /* .random = */ memtx_tree_index_random, + /* .count = */ memtx_tree_index_count, + /* .get = */ memtx_tree_index_get, + /* .replace = */ memtx_multikey_tree_index_replace, + /* .create_iterator = */ memtx_tree_index_create_iterator, + /* .create_snapshot_iterator = */ + memtx_tree_index_create_snapshot_iterator, + /* .stat = */ generic_index_stat, + /* .compact = */ generic_index_compact, + /* .reset_stat = */ generic_index_reset_stat, + /* .begin_build = */ memtx_tree_index_begin_build, + /* .reserve = */ memtx_tree_index_reserve, + /* .build_next = */ memtx_multikey_tree_index_build_next, + /* .end_build = */ memtx_tree_index_end_build, +}; + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { @@ -834,8 +999,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) "malloc", "struct memtx_tree_index"); return NULL; } + const struct index_vtab *vtab = def->key_def->has_multikey_parts ? + &memtx_multikey_tree_index_vtab : + &memtx_tree_index_vtab; if (index_create(&index->base, (struct engine *)memtx, - &memtx_tree_index_vtab, def) != 0) { + vtab, def) != 0) { free(index); return NULL; } diff --git a/src/box/tuple.c b/src/box/tuple.c index 7f06d4053..c8a938c4c 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len) } int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +tuple_go_to_path_multikey(const char **data, const char *path, + uint32_t path_len, uint64_t multikey_idx) { int rc; struct json_lexer lexer; @@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len) json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { switch (token.type) { + case JSON_TOKEN_ANY: + token.num = (int)multikey_idx; case JSON_TOKEN_NUM: rc = tuple_field_go_to_index(data, token.num); break; @@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, } return tuple_field_raw_by_path(format, tuple, field_map, fieldno, path + lexer.offset, - path_len - lexer.offset, NULL); + path_len - lexer.offset, NULL, + INVALID_MULTIKEY_INDEX); } /* }}} tuple_field_* getters */ diff --git a/src/box/tuple.h b/src/box/tuple.h index 8b12fd5a8..e498e1e41 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -45,6 +45,8 @@ struct slab_arena; struct quota; struct key_part; +#define INVALID_MULTIKEY_INDEX UINT64_MAX + /** * A format for standalone tuples allocated on runtime arena. * \sa tuple_new(). @@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const /** \endcond public */ + /** * An atom of Tarantool storage. Represents MsgPack Array. * Tuple has the following structure: @@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const * | ^ * +---------------------------------------data_offset * - * Each 'off_i' is the offset to the i-th indexed field. + * Each 'off_i' is the offset to the i-th indexed field or field + * map extention (described with struct tuple_field_map_ext) + * offset (in sizeof(field_map[0]) units). */ struct PACKED tuple { @@ -328,6 +333,26 @@ struct PACKED tuple */ }; +/** + * Tuple field map extent. Is allocated on tuple_field_map_create + * call automatically when necessary, when tuple field map must be + * reallocated dynamically. + */ +struct field_map_ext { + /* Sequence of data offset slots. */ + uint32_t field_map[1]; + /* The count of field_map items. */ + uint32_t size; +}; + +static inline struct field_map_ext * +tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot) +{ + uint32_t slot_off = field_map[root_offset_slot]; + return (struct field_map_ext *)((char *)field_map - + slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext)); +} + /** Size of the tuple including size of struct tuple. */ static inline size_t tuple_size(const struct tuple *tuple) @@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple) * with NULL. * @param path The path to process. * @param path_len The length of the @path. + * @param multikey_index The multikey index hint - index of item + * for JSON_TOKEN_ANY level. * @retval 0 On success. * @retval -1 In case of error in JSON path. */ int -tuple_go_to_path(const char **data, const char *path, uint32_t path_len); +tuple_go_to_path_multikey(const char **data, const char *path, + uint32_t path_len, uint64_t multikey_idx); + +/** + * Retrieve msgpack data by JSON path. + * @param data[in, out] Pointer to msgpack with data. + * If the field cannot be retrieved be the + * specified path @path, it is overwritten + * with NULL. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval -1 In case of error in JSON path. + */ +static inline int +tuple_go_to_path(const char **data, const char *path, uint32_t path_len) +{ + return tuple_go_to_path_multikey(data, path, path_len, + INVALID_MULTIKEY_INDEX); +} /** * Get tuple field by field index and relative JSON path. @@ -533,7 +579,7 @@ static inline const char * tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t fieldno, const char *path, uint32_t path_len, - int32_t *offset_slot_hint) + int32_t *offset_slot_hint, uint64_t multikey_idx) { int32_t offset_slot; if (offset_slot_hint != NULL && @@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, if (offset_slot_hint != NULL) *offset_slot_hint = offset_slot; offset_slot_access: - /* Indexed field */ - if (field_map[offset_slot] == 0) - return NULL; - tuple += field_map[offset_slot]; + if (multikey_idx != INVALID_MULTIKEY_INDEX) { + struct field_map_ext *field_map_ext = + tuple_field_map_ext((uint32_t *)field_map, + offset_slot); + if (multikey_idx >= field_map_ext->size) + return NULL; + uint32_t off = field_map_ext->field_map[-multikey_idx]; + if (off == 0) + return NULL; + tuple += off; + } else { + /* Indexed field */ + if (field_map[offset_slot] == 0) + return NULL; + tuple += field_map[offset_slot]; + } } else { uint32_t field_count; parse: @@ -572,8 +630,8 @@ parse: for (uint32_t k = 0; k < fieldno; k++) mp_next(&tuple); if (path != NULL && - unlikely(tuple_go_to_path(&tuple, path, - path_len) != 0)) + unlikely(tuple_go_to_path_multikey(&tuple, path, path_len, + multikey_idx) != 0)) return NULL; } return tuple; @@ -595,7 +653,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple, const uint32_t *field_map, uint32_t field_no) { return tuple_field_raw_by_path(format, tuple, field_map, field_no, - NULL, 0, NULL); + NULL, 0, NULL, INVALID_MULTIKEY_INDEX); } /** @@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash); /** - * Get a tuple field pointed to by an index part. + * Get a tuple field pointed to by an index part and multikey hint. * @param format Tuple format. * @param tuple A pointer to MessagePack array. * @param field_map A pointer to the LAST element of field map. + * @param multikey_idx A multikey hint. * @param part Index part to use. * @retval Field data if the field exists or NULL. */ static inline const char * -tuple_field_raw_by_part(struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) +tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, uint64_t multikey_idx) { if (unlikely(part->format_epoch != format->epoch)) { assert(format->epoch != 0); @@ -656,7 +716,41 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data, } return tuple_field_raw_by_path(format, data, field_map, part->fieldno, part->path, part->path_len, - &part->offset_slot_cache); + &part->offset_slot_cache, multikey_idx); +} + +/** + * Get a field refereed by index @part and multikey hint in tuple. + * @param tuple Tuple to get the field from. + * @param part Index part to use. + * @param multikey_idx A multikey hint. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_by_part_multikey(const struct tuple *tuple, struct key_part *part, + uint64_t multikey_idx) +{ + return tuple_field_raw_by_part_multikey(tuple_format(tuple), + tuple_data(tuple), + tuple_field_map(tuple), part, + multikey_idx); +} + + +/** + * Get a tuple field pointed to by an index part. + * @param format Tuple format. + * @param tuple A pointer to MessagePack array. + * @param field_map A pointer to the LAST element of field map. + * @param part Index part to use. + * @retval Field data if the field exists or NULL. + */ +static inline const char * +tuple_field_raw_by_part(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + return tuple_field_raw_by_part_multikey(format, data, field_map, + part, INVALID_MULTIKEY_INDEX); } /** diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 5b06e06b3..dbf7bb4f6 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -458,10 +458,12 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b, return i; } -template +template static inline int -tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, - struct key_def *key_def) +tuple_compare_slowpath_multikey(const struct tuple *tuple_a, + cmp_aux_t tuple_a_cmp_aux, const struct tuple *tuple_b, + cmp_aux_t tuple_b_cmp_aux, struct key_def *key_def) { assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); @@ -471,7 +473,7 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); if (key_def->part_count == 1 && part->fieldno == 0 && - (!has_json_paths || part->path == NULL)) { + (!has_json_paths || part->path == NULL) && !is_multikey) { /* * First field can not be optional - empty tuples * can not exist. @@ -509,7 +511,14 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, end = part + key_def->part_count; for (; part < end; part++) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + tuple_a_cmp_aux.multikey_idx); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + tuple_b_cmp_aux.multikey_idx); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -566,7 +575,14 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, */ end = key_def->parts + key_def->part_count; for (; part < end; ++part) { - if (has_json_paths) { + if (is_multikey) { + field_a = tuple_field_raw_by_part_multikey(format_a, + tuple_a_raw, field_map_a, part, + tuple_a_cmp_aux.multikey_idx); + field_b = tuple_field_raw_by_part_multikey(format_b, + tuple_b_raw, field_map_b, part, + tuple_b_cmp_aux.multikey_idx); + } else if (has_json_paths) { field_a = tuple_field_raw_by_part(format_a, tuple_a_raw, field_map_a, part); field_b = tuple_field_raw_by_part(format_b, tuple_b_raw, @@ -592,9 +608,23 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, template static inline int -tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, - uint32_t part_count, struct key_def *key_def) +tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, + struct key_def *key_def) +{ + cmp_aux_t dummy; + return tuple_compare_slowpath_multikey + ( + tuple_a, dummy, tuple_b, dummy, key_def); +} + +template +static inline int +tuple_compare_with_key_slowpath_multikey(const struct tuple *tuple, + cmp_aux_t tuple_cmp_aux, const char *key, uint32_t part_count, + cmp_aux_t key_cmp_aux, struct key_def *key_def) { + (void)key_cmp_aux; assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); @@ -608,7 +638,11 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, enum mp_type a_type, b_type; if (likely(part_count == 1)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + tuple_cmp_aux.multikey_idx); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -639,7 +673,11 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, int rc; for (; part < end; ++part, mp_next(&key)) { const char *field; - if (has_json_paths) { + if (is_multikey) { + field = tuple_field_raw_by_part_multikey(format, + tuple_raw, field_map, part, + tuple_cmp_aux.multikey_idx); + } else if (has_json_paths) { field = tuple_field_raw_by_part(format, tuple_raw, field_map, part); } else { @@ -675,6 +713,17 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, return 0; } +template +static inline int +tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + cmp_aux_t dummy; + return tuple_compare_with_key_slowpath_multikey + ( + tuple, dummy, key, part_count, dummy, key_def); +} + template static inline int key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count, @@ -1342,11 +1391,28 @@ tuple_hinted_compare(const struct tuple *tuple_a, cmp_aux_t tuple_a_cmp_aux, return tuple_compare(tuple_a, tuple_b, key_def); } +static const tuple_aux_compare_t compare_slowpath_multikey_funcs[] = { + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey, + tuple_compare_slowpath_multikey +}; + static tuple_aux_compare_t tuple_aux_compare_create(const struct key_def *def) { - (void)def; - return tuple_hinted_compare; + if (def->has_multikey_parts) { + int cmp_func_idx = (def->is_nullable ? 1 : 0) + + 2 * (def->has_optional_parts ? 1 : 0) + + 4 * (def->has_json_paths ? 1 : 0); + return compare_slowpath_multikey_funcs[cmp_func_idx]; + } else { + return tuple_hinted_compare; + } } /* }}} tuple_aux_compare */ @@ -1367,11 +1433,29 @@ tuple_hinted_compare_with_key(const struct tuple *tuple, cmp_aux_t tuple_cmp_aux return tuple_compare_with_key(tuple, key, part_count, key_def); } +static const tuple_aux_compare_with_key_t + compare_with_key_slowpath_multikey_funcs[] = { + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey, + tuple_compare_with_key_slowpath_multikey +}; + static tuple_aux_compare_with_key_t tuple_aux_compare_with_key_create(const struct key_def *def) { - (void)def; - return tuple_hinted_compare_with_key; + if (def->has_multikey_parts) { + int cmp_func_idx = (def->is_nullable ? 1 : 0) + + 2 * (def->has_optional_parts ? 1 : 0) + + 4 * (def->has_json_paths ? 1 : 0); + return compare_with_key_slowpath_multikey_funcs[cmp_func_idx]; + } else { + return tuple_hinted_compare_with_key; + } } /* }}} tuple_aux_compare_with_key */ @@ -1399,10 +1483,12 @@ key_hint_default(const char *key, struct key_def *key_def) } static cmp_aux_t -tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { (void)tuple; (void)key_def; + (void)multikey_idx; cmp_aux_t ret; ret.hint = CMP_HINT_INVALID; return ret; @@ -1429,8 +1515,10 @@ key_hint_uint(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); cmp_aux_t ret; @@ -1468,8 +1556,10 @@ key_hint_int(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->type == FIELD_TYPE_INTEGER); cmp_aux_t ret; @@ -1537,8 +1627,10 @@ key_hint_number(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_number(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_number(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->type == FIELD_TYPE_NUMBER); cmp_aux_t ret; @@ -1569,8 +1661,10 @@ key_hint_boolean(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); const char *field = tuple_field_by_part(tuple, key_def->parts); @@ -1612,8 +1706,10 @@ key_hint_string(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->coll == NULL); assert(key_def->parts->type == FIELD_TYPE_STRING); @@ -1647,8 +1743,10 @@ key_hint_string_coll(const char *key, struct key_def *key_def) template static cmp_aux_t -tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) { + (void)multikey_idx; assert(key_part_is_nullable(key_def->parts) == is_nullable); assert(key_def->parts->type == FIELD_TYPE_STRING && key_def->parts->coll != NULL); @@ -1661,11 +1759,26 @@ tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) return key_hint_string_coll(field, key_def); } +static cmp_aux_t +tuple_multikey_idx_hint(const struct tuple *tuple, struct key_def *key_def, + uint64_t multikey_idx) +{ + (void)tuple; + (void)key_def; + cmp_aux_t ret; + ret.multikey_idx = multikey_idx; + return ret; +} + void key_def_set_cmp_aux_func(struct key_def *def) { def->key_cmp_aux = key_hint_default; def->tuple_cmp_aux = tuple_hint_default; + if (def->has_multikey_parts) { + def->tuple_cmp_aux = tuple_multikey_idx_hint; + return; + } bool is_nullable = key_part_is_nullable(def->parts); if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { def->key_cmp_aux = is_nullable ? key_hint_string_coll : diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 4439ce3e0..99b602cd5 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -33,6 +33,7 @@ #include "json/json.h" #include "tuple_format.h" #include "coll_id_cache.h" +#include "tuple.h" #include "third_party/PMurHash.h" @@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno, json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && field->token.type != JSON_TOKEN_END) { - if (field->token.type == JSON_TOKEN_ANY) { - diag_set(ClientError, ER_UNSUPPORTED, - "Tarantool", "multikey indexes"); - goto fail; - } enum field_type expected_type = field->token.type == JSON_TOKEN_STR ? FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (field->token.type == JSON_TOKEN_ANY && + !json_token_is_multikey(&parent->token) && + !json_token_is_leaf(&parent->token)) { + assert(expected_type == FIELD_TYPE_ARRAY); + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tuple_field_path(parent), + field_type_strs[parent->type], + "multikey array"); + goto fail; + } if (field_type1_contains_type2(parent->type, expected_type)) { parent->type = expected_type; } else { @@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, break; format->min_tuple_size += mp_sizeof_nil(); } - } else { + } else if (field->token.type == JSON_TOKEN_STR) { /* Account a key string for map member. */ - assert(field->token.type == JSON_TOKEN_STR); format->min_tuple_size += mp_sizeof_str(field->token.len); } @@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, return true; } +/** + * Grow tuple_field_map allocation left with extent_size + * 0 bytes. + */ +static int +tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size, + uint32_t extent_size, struct region *region) +{ + assert(extent_size % sizeof(uint32_t) == 0); + uint32_t new_field_map_size = *field_map_size + extent_size; + uint32_t *new_field_map = region_alloc(region, new_field_map_size); + if (new_field_map == NULL) { + diag_set(OutOfMemory, new_field_map_size, "region_alloc", + "new_field_map"); + return -1; + } + memset(new_field_map, 0, extent_size); + if (*field_map != NULL) { + memcpy((char *)new_field_map + extent_size, + (char *)*field_map - *field_map_size, *field_map_size); + } + *field_map = (uint32_t *)((char *)new_field_map + new_field_map_size); + *field_map_size = new_field_map_size; + return 0; +} + +/** + * Retrieve tuple field map extent by root_offset_slot, reallocate + * memory if required. + */ +struct field_map_ext * +tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size, + int32_t root_offset_slot, uint32_t extent_items, + struct region *region) +{ + uint32_t extent_sz = sizeof(struct field_map_ext) + + extent_items * sizeof(uint32_t); + uint32_t slot_off = (*field_map)[root_offset_slot]; + if (slot_off == 0) { + /* Field map extent was not created yet. */ + slot_off = *field_map_size / sizeof(uint32_t); + (*field_map)[root_offset_slot] = slot_off; + if (tuple_field_map_realloc(field_map, field_map_size, + extent_sz, region) != 0) + return NULL; + } + struct field_map_ext *field_map_ext = + tuple_field_map_ext(*field_map, root_offset_slot); + assert(field_map_ext->size == 0 || field_map_ext->size == extent_items); + field_map_ext->size = extent_items; + return field_map_ext; +} + /** @sa declaration for details. */ int tuple_field_map_create(struct tuple_format *format, const char *tuple, @@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, return 0; /* Nothing to initialize */ } struct region *region = &fiber()->gc; - *field_map_size = format->field_map_size; - *field_map = region_alloc(region, *field_map_size); - if (*field_map == NULL) { - diag_set(OutOfMemory, *field_map_size, "region_alloc", - "field_map"); + *field_map = NULL; + *field_map_size = 0; + if (tuple_field_map_realloc(field_map, field_map_size, + format->field_map_size, region) != 0) return -1; - } - *field_map = (uint32_t *)((char *)*field_map + *field_map_size); const char *pos = tuple; int rc = 0; @@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, uint32_t defined_field_count = MIN(field_count, validate ? tuple_format_field_count(format) : format->index_field_count); - /* - * Nullify field map to be able to detect by 0, - * which key fields are absent in tuple_field(). - */ - memset((char *)*field_map - *field_map_size, 0, *field_map_size); /* * Prepare mp stack of the size equal to the maximum depth * of the indexed field in the format::fields tree @@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, struct mp_stack stack; mp_stack_create(&stack, format->fields_depth, frames); mp_stack_push(&stack, MP_ARRAY, defined_field_count); + /** + * Pointer to the stack entry that represents the multikey + * index root ARRAY entry. + */ + struct mp_frame *mk_parent_frame = NULL; + struct tuple_field *field; struct json_token *parent = &format->fields.root; while (true) { @@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, mp_stack_pop(&stack); if (mp_stack_is_empty(&stack)) goto finish; + if (json_token_is_multikey(parent)) { + /* Leave multikey index branch. */ + mk_parent_frame = NULL; + } parent = parent->parent; } /* @@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, field_type_strs[field->type]); goto error; } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + mk_parent_frame == NULL) { (*field_map)[field->offset_slot] = pos - tuple; + } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + mk_parent_frame != NULL) { + uint32_t extent_items = mk_parent_frame->count; + struct field_map_ext *field_map_ext = + tuple_field_map_ext_retrieve(field_map, + field_map_size, + field->offset_slot, + extent_items, region); + if (field_map_ext == NULL) + goto error; + int multikey_idx = mk_parent_frame->curr; + field_map_ext->field_map[ + -multikey_idx] = pos - tuple; + } if (required_fields != NULL) bit_clear(required_fields, field->id); } @@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, mp_decode_array(&pos) : mp_decode_map(&pos); mp_stack_push(&stack, type, size); + if (json_token_is_multikey(&field->token)) { + /* Track multikey entry frame. */ + assert(type == MP_ARRAY); + mk_parent_frame = &frames[stack.used - 1]; + } parent = &field->token; } else { mp_next(&pos); diff --git a/test/engine/json.result b/test/engine/json.result index a790cdfbc..5c7a9b3b3 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -691,7 +691,85 @@ s = box.schema.space.create('withdata') ... idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) --- -- error: Tarantool does not support multikey indexes +... +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +--- +- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}}) +--- +- [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:select({'James', 'Bond'}) +--- +- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select({'Kirill', 'Shcherbatov'}) +--- +- [] +... +idx:select({'Ivan', 'Ivanych'}) +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] +... +idx:select({'Vasya', 'Pupkin'}) +--- +- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +--- +- error: Duplicate key exists in unique index 'idx' in space 'withdata' +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] + - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +idx:delete({'Vasya', 'Pupkin'}) +--- +- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +--- +- [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +--- +- [2, 1, [{'fname': 'James', 'sname': 'Bond'}]] +... +idx:select() +--- +- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]] + - [2, 1, [{'fname': 'James', 'sname': 'Bond'}]] + - [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata') +--- +... +pk = s:create_index('pk') +--- +... +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +--- +... +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) +--- +- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1', + 'extra': {'sname': 'C2'}}]] ... s:drop() --- diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index f9a7180b1..dc6016af9 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -198,4 +198,24 @@ s:drop() -- s = box.schema.space.create('withdata') idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}}) +s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}}) +s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}}) +idx:select({'James', 'Bond'}) +idx:select({'Kirill', 'Shcherbatov'}) +idx:select({'Ivan', 'Ivanych'}) +idx:select({'Vasya', 'Pupkin'}) +idx:select() +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +idx:select() +idx:delete({'Vasya', 'Pupkin'}) +s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}}) +s:insert({2, 1, {{fname='James', sname='Bond'}}}) +idx:select() +s:drop() + +s = box.schema.space.create('withdata') +pk = s:create_index('pk') +idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}}) +s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}}) s:drop() -- 2.21.0