[PATCH v4 3/3] box: introduce multikey indexes in memtx
Vladimir Davydov
vdavydov.dev at gmail.com
Mon Apr 29 19:06:15 MSK 2019
On Fri, Apr 19, 2019 at 05:14:25PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/field_map.h b/src/box/field_map.h
> index f6c653030..11733baa4 100644
> --- a/src/box/field_map.h
> +++ b/src/box/field_map.h
> @@ -32,8 +32,10 @@
> */
> #include <assert.h>
> #include <stdint.h>
> +#include <stdlib.h>
Unnecessary inclusion?
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index d07bbe8bc..1366c46b8 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -104,6 +104,10 @@ key_def_dup(const struct key_def *src)
> size_t path_offset = src->parts[i].path - (char *)src;
> res->parts[i].path = (char *)res + path_offset;
> }
> + if (src->multikey_path != NULL) {
> + size_t path_offset = src->multikey_path - (char *)src;
> + res->multikey_path = (char *)res + path_offset;
> + }
> return res;
> }
>
> @@ -138,7 +142,69 @@ key_def_set_func(struct key_def *def)
> key_def_set_extract_func(def);
> }
>
> -static void
> +static int
> +key_def_set_part_path(struct key_def *def, uint32_t part_no, const char *path,
> + uint32_t path_len, char **path_pool)
> +{
> + struct key_part *part = &def->parts[part_no];
> + if (path == NULL) {
> + part->path = NULL;
> + part->path_len = 0;
> + return 0;
> + }
> + assert(path_pool != NULL);
> + part->path = *path_pool;
> + *path_pool += path_len;
> + memcpy(part->path, path, path_len);
> + part->path_len = path_len;
> +
> + /*
> + * Test whether this key_part has array index
> + * placeholder [*] (i.e. is a part of multikey index
> + * definition).
> + */
> + int multikey_path_len =
> + json_path_multikey_offset(path, path_len, TUPLE_INDEX_BASE);
> + if ((uint32_t) multikey_path_len == path_len)
> + return 0;
> +
> + /*
> + * In case of multikey index key_parts must have the
> + * same JSON prefix.
> + */
> + if (def->multikey_path == NULL) {
> + /*
> + * Keep the index of the first multikey key_part
> + * and the length of JSON path substring to the
> + * array index placeholder sign [*].
> + */
> + def->multikey_path = part->path;
> + def->multikey_fieldno = part->fieldno;
> + def->multikey_path_len = (uint32_t) multikey_path_len;
> + } else if (json_path_cmp(path, multikey_path_len, def->multikey_path,
> + def->multikey_path_len,
> + TUPLE_INDEX_BASE) != 0) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part_no + TUPLE_INDEX_BASE,
> + "incompatable multikey index path");
> + return -1;
> + }
> + int multikey_path_suffix_len =
> + path_len - multikey_path_len - strlen("[*]");
> + if (json_path_multikey_offset(path + multikey_path_len + strlen("[*]"),
> + multikey_path_suffix_len,
> + TUPLE_INDEX_BASE) !=
Please don't assume that JSON_TOKEN_ANY is represented by "[*]" outside
json library: suppose we'll allow the user to add some spaces ("[ * ]")
- this code will break then.
Either use json_lexer to skip the multikey token or add a helper
function to the json lib.
> + multikey_path_suffix_len) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part_no + TUPLE_INDEX_BASE,
> + "no more than one array index placeholder [*] is "
> + "allowed in JSON path");
> + return -1;
> + }
> + return 0;
> +}
> +
> +static int
> key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
> enum field_type type, enum on_conflict_action nullable_action,
> struct coll *coll, uint32_t coll_id,
> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index fe037c54a..9a0834994 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -584,6 +584,146 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
> return 0;
> }
>
> +/**
> + * Perform tuple insertion by given multikey index.
> + * In case of replacement, all old tuple entries are deleted
> + * by all it's multikey indexes.
> + */
> +static int
> +memtx_tree_index_insert_multikey(struct memtx_tree_index *index,
Bad name: we have replace_multikey and insert_multikey - one would think
the only difference between them is the operation type (REPLACE/INSERT),
but it would be totally wrong, as one is used as a helper for another.
Please come up with a better name. What about
memtx_tree_index_replace_multikey_one
?
That would emphasize that it inserts a single multikey entry.
> + struct tuple *old_tuple, struct tuple *new_tuple,
> + enum dup_replace_mode mode, int multikey_idx,
> + struct tuple **replaced_tuple)
> +{
> + struct memtx_tree_data new_data, dup_data;
> + new_data.tuple = new_tuple;
> + new_data.hint = multikey_idx;
> + dup_data.tuple = NULL;
> + if (memtx_tree_insert(&index->tree, new_data, &dup_data) != 0) {
> + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, "memtx_tree_index",
> + "replace");
> + return -1;
> + }
> + int errcode = 0;
> + if (dup_data.tuple == new_tuple) {
> + /*
> + * When tuple contains the same key multiple
> + * times, the previous key occurrence is pushed
> + * out of the index.
> + */
> + dup_data.tuple = NULL;
> + } else if ((errcode = replace_check_dup(old_tuple, dup_data.tuple,
> + mode)) != 0) {
> + /* Rollback replace. */
> + memtx_tree_delete(&index->tree, new_data);
> + if (dup_data.tuple != NULL)
> + memtx_tree_insert(&index->tree, dup_data, NULL);
> + struct space *sp = space_cache_find(index->base.def->space_id);
> + if (sp != NULL) {
> + diag_set(ClientError, errcode, index->base.def->name,
> + space_name(sp));
> + }
> + return -1;
> + }
> + *replaced_tuple = dup_data.tuple;
> + if (dup_data.tuple == NULL)
> + return 0;
> +
> + /*
> + * Propagate dup_data.tuple deletion for all multikey
> + * index keys extracted by dup_data.tuple.
> + */
> + int conflict_idx = (int)dup_data.hint;
> + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + uint32_t multikey_count = tuple_multikey_count(dup_data.tuple, cmp_def);
> + for (int i = 0; (uint32_t) i < multikey_count; i++) {
> + if (conflict_idx == i)
> + continue;
> + dup_data.hint = i;
> + memtx_tree_delete(&index->tree, dup_data);
> + }
> + /*
> + * The new_tuple multikey index key (by conflict_idx) may
> + * be deleted from index when old_tuple has another key by
> + * some other multikey index != conflict_idx. Restore it
> + * as a precaution.
> + */
> + memtx_tree_insert(&index->tree, new_data, NULL);
Would be better if we didn't delete a tuple in this case. Or at least,
memtx_tree_delete returned the deleted tuple so that we wouldn't call
memtx_tree_insert when we didn't need to. Not a show stopper though.
> + return 0;
> +}
> +
> +static int
> +memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple,
> + struct tuple *new_tuple, enum dup_replace_mode mode,
> + struct tuple **result)
> +{
> + struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + *result = NULL;
> + if (new_tuple != NULL) {
> + int multikey_idx = 0, err;
> + uint32_t multikey_count =
> + tuple_multikey_count(new_tuple, cmp_def);
> + for (; (uint32_t) multikey_idx < multikey_count;
> + multikey_idx++) {
> + struct tuple *replaced_tuple;
> + err = memtx_tree_index_insert_multikey(index, old_tuple,
> + new_tuple, mode, multikey_idx,
> + &replaced_tuple);
> + if (err != 0)
> + break;
> + if (replaced_tuple != NULL) {
> + assert(*result == NULL ||
> + *result == replaced_tuple);
> + *result = replaced_tuple;
> + }
> + }
> + if (err != 0) {
> + /*
> + * In case of error, rollback new_tuple
> + * insertion by multikey index [0, multikey_idx).
> + */
> + struct memtx_tree_data data;
> + data.tuple = new_tuple;
> + for (int i = 0; i < multikey_idx; i++) {
> + data.hint = i;
> + memtx_tree_delete(&index->tree, data);
> + }
> + if (*result != NULL) {
> + /*
> + * Restore replaced tuple index
> + * occurrences.
> + */
> + data.tuple = *result;
> + multikey_count =
> + tuple_multikey_count(*result, cmp_def);
> + for (int i = 0;
> + (uint32_t) i < multikey_count; i++) {
> + data.hint = i;
> + memtx_tree_insert(&index->tree, data,
> + NULL);
> + }
> + }
> + return -1;
> + }
> + if (*result != NULL)
> + return 0;
> + }
> + if (old_tuple != NULL) {
> + *result = old_tuple;
> + struct memtx_tree_data data;
> + data.tuple = old_tuple;
> + uint32_t multikey_count =
> + tuple_multikey_count(old_tuple, cmp_def);
> + for (int multikey_idx = 0;
> + (uint32_t) multikey_idx < multikey_count; multikey_idx++) {
> + data.hint = multikey_idx;
> + memtx_tree_delete(&index->tree, data);
> + }
> + }
> + return 0;
> +}
> +
> static struct iterator *
> memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
> const char *key, uint32_t part_count)
> @@ -655,31 +795,32 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
> return 0;
> }
>
> +/* Initialize the next element of the index build_array. */
> static int
> -memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
> +memtx_tree_index_build_array_append(struct memtx_tree_index *index,
> + struct tuple *tuple, hint_t hint)
> {
> - struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> - struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + bool need_realloc = false;
> if (index->build_array == NULL) {
> - index->build_array = malloc(MEMTX_EXTENT_SIZE);
> - if (index->build_array == NULL) {
> - diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
> - "memtx_tree_index", "build_next");
> - return -1;
> - }
> index->build_array_alloc_size =
> MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
> + need_realloc = true;
> + }
> + if (index->build_array_size >= index->build_array_alloc_size) {
> + index->build_array_alloc_size +=
> + MAX(index->build_array_alloc_size / 2,
> + index->build_array_size + 1 -
> + index->build_array_alloc_size);
> + need_realloc = true;
Please avoid unnecessary refactoring - it complicates review.
The function looks fine to me as it is - no need to merge malloc
and realloc paths IMO.
> }
> - assert(index->build_array_size <= index->build_array_alloc_size);
> - if (index->build_array_size == index->build_array_alloc_size) {
> - index->build_array_alloc_size = index->build_array_alloc_size +
> - index->build_array_alloc_size / 2;
> + if (need_realloc) {
> struct memtx_tree_data *tmp =
> realloc(index->build_array,
> index->build_array_alloc_size * sizeof(*tmp));
> if (tmp == NULL) {
> diag_set(OutOfMemory, index->build_array_alloc_size *
> - sizeof(*tmp), "memtx_tree_index", "build_next");
> + sizeof(*tmp), "memtx_tree_index",
> + "build_array");
> return -1;
> }
> index->build_array = tmp;
> @@ -687,10 +828,66 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
> struct memtx_tree_data *elem =
> &index->build_array[index->build_array_size++];
> elem->tuple = tuple;
> - elem->hint = tuple_hint(tuple, cmp_def);
> + elem->hint = hint;
> + return 0;
> +}
> +
> +static int
> +memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
> +{
> + struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + return memtx_tree_index_build_array_append(index, tuple,
> + tuple_hint(tuple, cmp_def));
> +}
> +
> +static int
> +memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple)
> +{
> + struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + uint32_t multikey_count = tuple_multikey_count(tuple, cmp_def);
> + for (uint32_t multikey_idx = 0; multikey_idx < multikey_count;
> + multikey_idx++) {
> + if (memtx_tree_index_build_array_append(index, tuple,
> + multikey_idx) != 0)
> + return -1;
> + }
> return 0;
> }
>
> +/**
> + * Process build_array of specified index and remove duplicates
> + * of equal tuples (in terms of index's cmp_def and have same
> + * tuple pointer). The build_array is expected to be sorted.
> + */
> +static void
> +memtx_tree_index_build_array_deduplicate(struct memtx_tree_index *index)
> +{
> + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
> + size_t i = 0;
> + while (i < index->build_array_size) {
> + size_t next_key = ++i;
> + while (next_key < index->build_array_size &&
> + index->build_array[next_key].tuple ==
> + index->build_array[next_key - 1].tuple &&
> + tuple_compare_hinted(index->build_array[next_key].tuple,
> + index->build_array[next_key].hint,
> + index->build_array[next_key - 1].tuple,
> + index->build_array[next_key - 1].hint,
> + cmp_def) == 0)
> + next_key++;
> + size_t equal_keys = next_key - i;
> + if (equal_keys == 0)
> + continue;
> + memmove(&index->build_array[i - 1],
> + &index->build_array[next_key - 1],
> + (index->build_array_size - next_key + 1) *
> + sizeof(index->build_array[0]));
This smells O(N^2). Apparently, one can remove duplicates from a sorted
array in O(N). Please fix.
> + index->build_array_size -= equal_keys;
> + }
> +}
> +
> static void
> memtx_tree_index_end_build(struct index *base)
> {
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index a7c8e872f..585f2680f 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -194,6 +196,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
> return NULL;
> }
>
> +/**
> + * Check if child_field may be attached to parent_field,
> + * update the parent_field container type if required.
> + */
> +static int
> +tuple_field_ensure_child_compatibility(struct tuple_field *parent,
> + struct tuple_field *child)
Wouldn't tuple_format_check_field be a better name?
> +{
> + enum field_type expected_type =
> + child->token.type == JSON_TOKEN_STR ?
> + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
> + if (field_type1_contains_type2(parent->type, expected_type)) {
> + parent->type = expected_type;
> + } else {
> + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
> + tuple_field_path(parent),
> + field_type_strs[parent->type],
> + field_type_strs[expected_type]);
> + return -1;
> + }
> + /*
> + * Attempt to append JSON_TOKEN_ANY leaf to parent that
> + * has other child records already i.e. is a intermediate
> + * field of non-multikey JSON index.
> + */
> + bool is_not_multikey_parent_conflict =
> + child->token.type == JSON_TOKEN_ANY &&
> + !json_token_is_multikey(&parent->token) &&
> + !json_token_is_leaf(&parent->token);
> + /*
> + * Attempt to append not JSON_TOKEN_ANY child record to
> + * the parent defined as multikey index root.
> + */
> + bool is_multikey_parent_conflict =
> + json_token_is_multikey(&parent->token) &&
> + child->token.type != JSON_TOKEN_ANY;
> + if (is_not_multikey_parent_conflict || is_multikey_parent_conflict) {
if (!is not A || is A)
sounds like logical tautology to me.
Please rewrite without these confusing helper flags. You could simply
set diag twice - it'd be okay:
if (child->token.type == JSON_TOKEN_ANY ...) {
diag_set(...)
return -1;
}
if (json_token_is_multikey(&parent->token) ...) {
diag_set(...)
return -1;
}
> + diag_set(ClientError, ER_MULTIKEY_INDEX_MISMATCH,
> + tuple_field_path(parent));
> + return -1;
> + }
> + return 0;
> +}
> +
> /**
> * Given a field number and a path, add the corresponding field
> * to the tuple format, allocating intermediate fields if
> @@ -454,6 +497,34 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
> if (json_token_is_leaf(&field->token) &&
> !tuple_field_is_nullable(field))
> bit_set(format->required_fields, field->id);
> + /*
> + * When the field represents array index
> + * placeholder [*] (JSON_TOKEN_ANY), it requires
> + * own multikey_required_fields allocation, that
> + * must be initialized by bypassing the JSON
> + * subtree of this field.
> + */
> + if (field->token.type == JSON_TOKEN_ANY) {
> + assert(field->multikey_required_fields == NULL);
> + void *multikey_required_fields =
> + calloc(1, required_fields_sz);
> + if (multikey_required_fields == NULL) {
> + diag_set(OutOfMemory, required_fields_sz,
> + "malloc", "required field bitmap");
> + return -1;
> + }
> + struct tuple_field *child;
> + json_tree_foreach_entry_preorder(child, &field->token,
> + struct tuple_field, token) {
> + if (json_token_is_leaf(&child->token) &&
> + !tuple_field_is_nullable(child)) {
> + bit_set(multikey_required_fields,
> + child->id);
> + }
json_tree_foreach_entry_preorder inside another one. O(N^2) again.
AFAIU you could fill the map in the outer loop without hurting
readability.
Also, I don't quite like that you use two bitmaps simultaneously here
and in tuple_field_map_create. I think we could make the root bitmap
only store fields not present in any multikey bitmap (those are checked
anyway). The code would look cleaner IMHO.
> + }
> + field->multikey_required_fields =
> + multikey_required_fields;
> + }
> }
> format->hash = tuple_format_hash(format);
> return 0;
More information about the Tarantool-patches
mailing list