From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Mon, 29 Apr 2019 19:06:15 +0300 From: Vladimir Davydov Subject: Re: [PATCH v4 3/3] box: introduce multikey indexes in memtx Message-ID: <20190429160615.5a2uedzpzj6rw76k@esperanza> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org, kostja@tarantool.org List-ID: On Fri, Apr 19, 2019 at 05:14:25PM +0300, Kirill Shcherbatov wrote: > diff --git a/src/box/field_map.h b/src/box/field_map.h > index f6c653030..11733baa4 100644 > --- a/src/box/field_map.h > +++ b/src/box/field_map.h > @@ -32,8 +32,10 @@ > */ > #include > #include > +#include Unnecessary inclusion? > diff --git a/src/box/key_def.c b/src/box/key_def.c > index d07bbe8bc..1366c46b8 100644 > --- a/src/box/key_def.c > +++ b/src/box/key_def.c > @@ -104,6 +104,10 @@ key_def_dup(const struct key_def *src) > size_t path_offset = src->parts[i].path - (char *)src; > res->parts[i].path = (char *)res + path_offset; > } > + if (src->multikey_path != NULL) { > + size_t path_offset = src->multikey_path - (char *)src; > + res->multikey_path = (char *)res + path_offset; > + } > return res; > } > > @@ -138,7 +142,69 @@ key_def_set_func(struct key_def *def) > key_def_set_extract_func(def); > } > > -static void > +static int > +key_def_set_part_path(struct key_def *def, uint32_t part_no, const char *path, > + uint32_t path_len, char **path_pool) > +{ > + struct key_part *part = &def->parts[part_no]; > + if (path == NULL) { > + part->path = NULL; > + part->path_len = 0; > + return 0; > + } > + assert(path_pool != NULL); > + part->path = *path_pool; > + *path_pool += path_len; > + memcpy(part->path, path, path_len); > + part->path_len = path_len; > + > + /* > + * Test whether this key_part has array index > + * placeholder [*] (i.e. is a part of multikey index > + * definition). > + */ > + int multikey_path_len = > + json_path_multikey_offset(path, path_len, TUPLE_INDEX_BASE); > + if ((uint32_t) multikey_path_len == path_len) > + return 0; > + > + /* > + * In case of multikey index key_parts must have the > + * same JSON prefix. > + */ > + if (def->multikey_path == NULL) { > + /* > + * Keep the index of the first multikey key_part > + * and the length of JSON path substring to the > + * array index placeholder sign [*]. > + */ > + def->multikey_path = part->path; > + def->multikey_fieldno = part->fieldno; > + def->multikey_path_len = (uint32_t) multikey_path_len; > + } else if (json_path_cmp(path, multikey_path_len, def->multikey_path, > + def->multikey_path_len, > + TUPLE_INDEX_BASE) != 0) { > + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, > + part_no + TUPLE_INDEX_BASE, > + "incompatable multikey index path"); > + return -1; > + } > + int multikey_path_suffix_len = > + path_len - multikey_path_len - strlen("[*]"); > + if (json_path_multikey_offset(path + multikey_path_len + strlen("[*]"), > + multikey_path_suffix_len, > + TUPLE_INDEX_BASE) != Please don't assume that JSON_TOKEN_ANY is represented by "[*]" outside json library: suppose we'll allow the user to add some spaces ("[ * ]") - this code will break then. Either use json_lexer to skip the multikey token or add a helper function to the json lib. > + multikey_path_suffix_len) { > + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, > + part_no + TUPLE_INDEX_BASE, > + "no more than one array index placeholder [*] is " > + "allowed in JSON path"); > + return -1; > + } > + return 0; > +} > + > +static int > key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, > enum field_type type, enum on_conflict_action nullable_action, > struct coll *coll, uint32_t coll_id, > diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c > index fe037c54a..9a0834994 100644 > --- a/src/box/memtx_tree.c > +++ b/src/box/memtx_tree.c > @@ -584,6 +584,146 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, > return 0; > } > > +/** > + * Perform tuple insertion by given multikey index. > + * In case of replacement, all old tuple entries are deleted > + * by all it's multikey indexes. > + */ > +static int > +memtx_tree_index_insert_multikey(struct memtx_tree_index *index, Bad name: we have replace_multikey and insert_multikey - one would think the only difference between them is the operation type (REPLACE/INSERT), but it would be totally wrong, as one is used as a helper for another. Please come up with a better name. What about memtx_tree_index_replace_multikey_one ? That would emphasize that it inserts a single multikey entry. > + struct tuple *old_tuple, struct tuple *new_tuple, > + enum dup_replace_mode mode, int multikey_idx, > + struct tuple **replaced_tuple) > +{ > + struct memtx_tree_data new_data, dup_data; > + new_data.tuple = new_tuple; > + new_data.hint = multikey_idx; > + dup_data.tuple = NULL; > + if (memtx_tree_insert(&index->tree, new_data, &dup_data) != 0) { > + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, "memtx_tree_index", > + "replace"); > + return -1; > + } > + int errcode = 0; > + if (dup_data.tuple == new_tuple) { > + /* > + * When tuple contains the same key multiple > + * times, the previous key occurrence is pushed > + * out of the index. > + */ > + dup_data.tuple = NULL; > + } else if ((errcode = replace_check_dup(old_tuple, dup_data.tuple, > + mode)) != 0) { > + /* Rollback replace. */ > + memtx_tree_delete(&index->tree, new_data); > + if (dup_data.tuple != NULL) > + memtx_tree_insert(&index->tree, dup_data, NULL); > + struct space *sp = space_cache_find(index->base.def->space_id); > + if (sp != NULL) { > + diag_set(ClientError, errcode, index->base.def->name, > + space_name(sp)); > + } > + return -1; > + } > + *replaced_tuple = dup_data.tuple; > + if (dup_data.tuple == NULL) > + return 0; > + > + /* > + * Propagate dup_data.tuple deletion for all multikey > + * index keys extracted by dup_data.tuple. > + */ > + int conflict_idx = (int)dup_data.hint; > + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + uint32_t multikey_count = tuple_multikey_count(dup_data.tuple, cmp_def); > + for (int i = 0; (uint32_t) i < multikey_count; i++) { > + if (conflict_idx == i) > + continue; > + dup_data.hint = i; > + memtx_tree_delete(&index->tree, dup_data); > + } > + /* > + * The new_tuple multikey index key (by conflict_idx) may > + * be deleted from index when old_tuple has another key by > + * some other multikey index != conflict_idx. Restore it > + * as a precaution. > + */ > + memtx_tree_insert(&index->tree, new_data, NULL); Would be better if we didn't delete a tuple in this case. Or at least, memtx_tree_delete returned the deleted tuple so that we wouldn't call memtx_tree_insert when we didn't need to. Not a show stopper though. > + return 0; > +} > + > +static int > +memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple, > + struct tuple *new_tuple, enum dup_replace_mode mode, > + struct tuple **result) > +{ > + struct memtx_tree_index *index = (struct memtx_tree_index *)base; > + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + *result = NULL; > + if (new_tuple != NULL) { > + int multikey_idx = 0, err; > + uint32_t multikey_count = > + tuple_multikey_count(new_tuple, cmp_def); > + for (; (uint32_t) multikey_idx < multikey_count; > + multikey_idx++) { > + struct tuple *replaced_tuple; > + err = memtx_tree_index_insert_multikey(index, old_tuple, > + new_tuple, mode, multikey_idx, > + &replaced_tuple); > + if (err != 0) > + break; > + if (replaced_tuple != NULL) { > + assert(*result == NULL || > + *result == replaced_tuple); > + *result = replaced_tuple; > + } > + } > + if (err != 0) { > + /* > + * In case of error, rollback new_tuple > + * insertion by multikey index [0, multikey_idx). > + */ > + struct memtx_tree_data data; > + data.tuple = new_tuple; > + for (int i = 0; i < multikey_idx; i++) { > + data.hint = i; > + memtx_tree_delete(&index->tree, data); > + } > + if (*result != NULL) { > + /* > + * Restore replaced tuple index > + * occurrences. > + */ > + data.tuple = *result; > + multikey_count = > + tuple_multikey_count(*result, cmp_def); > + for (int i = 0; > + (uint32_t) i < multikey_count; i++) { > + data.hint = i; > + memtx_tree_insert(&index->tree, data, > + NULL); > + } > + } > + return -1; > + } > + if (*result != NULL) > + return 0; > + } > + if (old_tuple != NULL) { > + *result = old_tuple; > + struct memtx_tree_data data; > + data.tuple = old_tuple; > + uint32_t multikey_count = > + tuple_multikey_count(old_tuple, cmp_def); > + for (int multikey_idx = 0; > + (uint32_t) multikey_idx < multikey_count; multikey_idx++) { > + data.hint = multikey_idx; > + memtx_tree_delete(&index->tree, data); > + } > + } > + return 0; > +} > + > static struct iterator * > memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, > const char *key, uint32_t part_count) > @@ -655,31 +795,32 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint) > return 0; > } > > +/* Initialize the next element of the index build_array. */ > static int > -memtx_tree_index_build_next(struct index *base, struct tuple *tuple) > +memtx_tree_index_build_array_append(struct memtx_tree_index *index, > + struct tuple *tuple, hint_t hint) > { > - struct memtx_tree_index *index = (struct memtx_tree_index *)base; > - struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + bool need_realloc = false; > if (index->build_array == NULL) { > - index->build_array = malloc(MEMTX_EXTENT_SIZE); > - if (index->build_array == NULL) { > - diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, > - "memtx_tree_index", "build_next"); > - return -1; > - } > index->build_array_alloc_size = > MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]); > + need_realloc = true; > + } > + if (index->build_array_size >= index->build_array_alloc_size) { > + index->build_array_alloc_size += > + MAX(index->build_array_alloc_size / 2, > + index->build_array_size + 1 - > + index->build_array_alloc_size); > + need_realloc = true; Please avoid unnecessary refactoring - it complicates review. The function looks fine to me as it is - no need to merge malloc and realloc paths IMO. > } > - assert(index->build_array_size <= index->build_array_alloc_size); > - if (index->build_array_size == index->build_array_alloc_size) { > - index->build_array_alloc_size = index->build_array_alloc_size + > - index->build_array_alloc_size / 2; > + if (need_realloc) { > struct memtx_tree_data *tmp = > realloc(index->build_array, > index->build_array_alloc_size * sizeof(*tmp)); > if (tmp == NULL) { > diag_set(OutOfMemory, index->build_array_alloc_size * > - sizeof(*tmp), "memtx_tree_index", "build_next"); > + sizeof(*tmp), "memtx_tree_index", > + "build_array"); > return -1; > } > index->build_array = tmp; > @@ -687,10 +828,66 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) > struct memtx_tree_data *elem = > &index->build_array[index->build_array_size++]; > elem->tuple = tuple; > - elem->hint = tuple_hint(tuple, cmp_def); > + elem->hint = hint; > + return 0; > +} > + > +static int > +memtx_tree_index_build_next(struct index *base, struct tuple *tuple) > +{ > + struct memtx_tree_index *index = (struct memtx_tree_index *)base; > + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + return memtx_tree_index_build_array_append(index, tuple, > + tuple_hint(tuple, cmp_def)); > +} > + > +static int > +memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple) > +{ > + struct memtx_tree_index *index = (struct memtx_tree_index *)base; > + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + uint32_t multikey_count = tuple_multikey_count(tuple, cmp_def); > + for (uint32_t multikey_idx = 0; multikey_idx < multikey_count; > + multikey_idx++) { > + if (memtx_tree_index_build_array_append(index, tuple, > + multikey_idx) != 0) > + return -1; > + } > return 0; > } > > +/** > + * Process build_array of specified index and remove duplicates > + * of equal tuples (in terms of index's cmp_def and have same > + * tuple pointer). The build_array is expected to be sorted. > + */ > +static void > +memtx_tree_index_build_array_deduplicate(struct memtx_tree_index *index) > +{ > + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); > + size_t i = 0; > + while (i < index->build_array_size) { > + size_t next_key = ++i; > + while (next_key < index->build_array_size && > + index->build_array[next_key].tuple == > + index->build_array[next_key - 1].tuple && > + tuple_compare_hinted(index->build_array[next_key].tuple, > + index->build_array[next_key].hint, > + index->build_array[next_key - 1].tuple, > + index->build_array[next_key - 1].hint, > + cmp_def) == 0) > + next_key++; > + size_t equal_keys = next_key - i; > + if (equal_keys == 0) > + continue; > + memmove(&index->build_array[i - 1], > + &index->build_array[next_key - 1], > + (index->build_array_size - next_key + 1) * > + sizeof(index->build_array[0])); This smells O(N^2). Apparently, one can remove duplicates from a sorted array in O(N). Please fix. > + index->build_array_size -= equal_keys; > + } > +} > + > static void > memtx_tree_index_end_build(struct index *base) > { > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index a7c8e872f..585f2680f 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -194,6 +196,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id) > return NULL; > } > > +/** > + * Check if child_field may be attached to parent_field, > + * update the parent_field container type if required. > + */ > +static int > +tuple_field_ensure_child_compatibility(struct tuple_field *parent, > + struct tuple_field *child) Wouldn't tuple_format_check_field be a better name? > +{ > + enum field_type expected_type = > + child->token.type == JSON_TOKEN_STR ? > + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; > + if (field_type1_contains_type2(parent->type, expected_type)) { > + parent->type = expected_type; > + } else { > + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, > + tuple_field_path(parent), > + field_type_strs[parent->type], > + field_type_strs[expected_type]); > + return -1; > + } > + /* > + * Attempt to append JSON_TOKEN_ANY leaf to parent that > + * has other child records already i.e. is a intermediate > + * field of non-multikey JSON index. > + */ > + bool is_not_multikey_parent_conflict = > + child->token.type == JSON_TOKEN_ANY && > + !json_token_is_multikey(&parent->token) && > + !json_token_is_leaf(&parent->token); > + /* > + * Attempt to append not JSON_TOKEN_ANY child record to > + * the parent defined as multikey index root. > + */ > + bool is_multikey_parent_conflict = > + json_token_is_multikey(&parent->token) && > + child->token.type != JSON_TOKEN_ANY; > + if (is_not_multikey_parent_conflict || is_multikey_parent_conflict) { if (!is not A || is A) sounds like logical tautology to me. Please rewrite without these confusing helper flags. You could simply set diag twice - it'd be okay: if (child->token.type == JSON_TOKEN_ANY ...) { diag_set(...) return -1; } if (json_token_is_multikey(&parent->token) ...) { diag_set(...) return -1; } > + diag_set(ClientError, ER_MULTIKEY_INDEX_MISMATCH, > + tuple_field_path(parent)); > + return -1; > + } > + return 0; > +} > + > /** > * Given a field number and a path, add the corresponding field > * to the tuple format, allocating intermediate fields if > @@ -454,6 +497,34 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, > if (json_token_is_leaf(&field->token) && > !tuple_field_is_nullable(field)) > bit_set(format->required_fields, field->id); > + /* > + * When the field represents array index > + * placeholder [*] (JSON_TOKEN_ANY), it requires > + * own multikey_required_fields allocation, that > + * must be initialized by bypassing the JSON > + * subtree of this field. > + */ > + if (field->token.type == JSON_TOKEN_ANY) { > + assert(field->multikey_required_fields == NULL); > + void *multikey_required_fields = > + calloc(1, required_fields_sz); > + if (multikey_required_fields == NULL) { > + diag_set(OutOfMemory, required_fields_sz, > + "malloc", "required field bitmap"); > + return -1; > + } > + struct tuple_field *child; > + json_tree_foreach_entry_preorder(child, &field->token, > + struct tuple_field, token) { > + if (json_token_is_leaf(&child->token) && > + !tuple_field_is_nullable(child)) { > + bit_set(multikey_required_fields, > + child->id); > + } json_tree_foreach_entry_preorder inside another one. O(N^2) again. AFAIU you could fill the map in the outer loop without hurting readability. Also, I don't quite like that you use two bitmaps simultaneously here and in tuple_field_map_create. I think we could make the root bitmap only store fields not present in any multikey bitmap (those are checked anyway). The code would look cleaner IMHO. > + } > + field->multikey_required_fields = > + multikey_required_fields; > + } > } > format->hash = tuple_format_hash(format); > return 0;