[PATCH v6 3/3] box: introduce multikey indexes
Kirill Shcherbatov
kshcherbatov at tarantool.org
Wed Mar 13 15:15:39 MSK 2019
I didn't finished review fixes for multikey indexes, but I've
included this part to show how the modified hints changes affect
multikey indexes.
=========================================================================
With multikey index feature introduction, JSON path may have [*]
placeholder instead of array index: this allows to index
multiple documents by one JSON path automatically.
Example:
s = box.schema.space.create('withdata')
idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'},
{3, 'str', path = '[*].sname'}}})
_ = s:insert({1, 1, {{fname='James', sname='Bond'},
{fname='Austin', sname='Powers'}}})
idx:get({'Austin', 'Powers'})
---
- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Austin',
'sname': 'Powers'}]]
...
Closes #1257
---
src/box/key_def.c | 15 ++++
src/box/key_def.h | 5 ++
src/box/memtx_tree.c | 175 +++++++++++++++++++++++++++++++++++++-
src/box/tuple.c | 8 +-
src/box/tuple.h | 122 +++++++++++++++++++++++---
src/box/tuple_compare.cc | 115 +++++++++++++++++++------
src/box/tuple_format.c | 120 +++++++++++++++++++++-----
test/engine/json.result | 80 ++++++++++++++++-
test/engine/json.test.lua | 20 +++++
9 files changed, 592 insertions(+), 68 deletions(-)
diff --git a/src/box/key_def.c b/src/box/key_def.c
index d4c979aa1..b9cc77699 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -164,9 +164,24 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
*path_pool += path_len;
memcpy(def->parts[part_no].path, path, path_len);
def->parts[part_no].path_len = path_len;
+
+ int rc;
+ struct json_lexer lexer;
+ struct json_token token;
+ json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
+ while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
+ if (token.type == JSON_TOKEN_ANY) {
+ def->has_multikey_parts = true;
+ def->parts[part_no].is_multikey = true;
+ break;
+ } else if (token.type == JSON_TOKEN_END) {
+ break;
+ }
+ }
} else {
def->parts[part_no].path = NULL;
def->parts[part_no].path_len = 0;
+ def->parts[part_no].is_multikey = false;
}
column_mask_set_fieldno(&def->column_mask, fieldno);
}
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 0d8f3112e..6a914df11 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -97,6 +97,8 @@ struct key_part {
char *path;
/** The length of JSON path. */
uint32_t path_len;
+ /** True if this is multikey key part. */
+ bool is_multikey;
/**
* Epoch of the tuple format the offset slot cached in
* this part is valid for, see tuple_format::epoch.
@@ -205,6 +207,8 @@ struct key_def {
bool is_nullable;
/** True if some key part has JSON path. */
bool has_json_paths;
+ /** True if some part has array index placeholder *. */
+ bool has_multikey_parts;
/**
* True, if some key parts can be absent in a tuple. These
* fields assumed to be MP_NIL.
@@ -699,6 +703,7 @@ key_hash(const char *key, struct key_def *key_def)
* Get a comparison hint for a @a tuple.
* @param tuple - tuple to get uint64_t of.
* @param key_def - key_def that defines which comparison is used.
+ * @param multikey_idx - index of multikey array item.
* @return the comparison auxiliary information.
*/
static inline uint64_t
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 2c1933ceb..bfc618bdf 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -537,7 +537,8 @@ memtx_tree_index_get(struct index *base, const char *key,
struct memtx_tree_key_data key_data;
key_data.key = key;
key_data.part_count = part_count;
- key_data.hint = key_hint(key, cmp_def);
+ if (!cmp_def->has_multikey_parts)
+ key_data.hint = key_hint(key, cmp_def);
struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
*result = res != NULL ? res->tuple : NULL;
return 0;
@@ -593,6 +594,98 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
return 0;
}
+static int
+multikey_get_array_sz(struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_def->has_multikey_parts);
+ struct key_part *part = key_def->parts;
+ /* FIXME: don't like it... */
+ while (part < key_def->parts + key_def->part_count &&
+ !part->is_multikey)
+ part++;
+ assert(part->path != NULL && part->is_multikey);
+ struct tuple_field *field =
+ tuple_format_field_by_path(tuple_format(tuple), part->fieldno,
+ part->path, part->path_len);
+ assert(field != NULL);
+ struct field_map_ext *field_map_ext =
+ tuple_field_map_ext((uint32_t *)tuple_field_map(tuple),
+ field->offset_slot);
+ return field_map_ext->size;
+}
+
+int
+memtx_multikey_tree_index_replace(struct index *base, struct tuple *old_tuple,
+ struct tuple *new_tuple,
+ enum dup_replace_mode mode,
+ struct tuple **result)
+{
+ struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *key_def = index->tree.arg;
+ if (new_tuple != NULL) {
+ int errcode = 0, tree_res = 0;
+ struct tuple *dup_tuple = NULL;
+ int multikey_idx = 0;
+ int sz = multikey_get_array_sz(new_tuple, key_def);
+ for (; multikey_idx < sz; multikey_idx++) {
+ struct memtx_tree_data new_data;
+ new_data.tuple = new_tuple;
+ new_data.hint = multikey_idx;
+ struct memtx_tree_data dup_data;
+ dup_data.tuple = NULL;
+ tree_res = memtx_tree_insert(&index->tree, new_data,
+ &dup_data);
+ if (tree_res != 0) {
+ diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+ "memtx_tree_index", "replace");
+ break;
+ }
+ dup_tuple = dup_data.tuple;
+ errcode = replace_check_dup(old_tuple, dup_tuple, mode);
+ if (errcode != 0) {
+ memtx_tree_delete(&index->tree, new_data);
+ if (dup_tuple != NULL) {
+ memtx_tree_insert(&index->tree,
+ dup_data, NULL);
+ }
+ struct space *sp =
+ space_cache_find(base->def->space_id);
+ if (sp != NULL) {
+ diag_set(ClientError, errcode,
+ base->def->name,
+ space_name(sp));
+ }
+ break;
+ }
+ }
+ if (tree_res != 0 || errcode != 0) {
+ multikey_idx--;
+ for (; multikey_idx >= 0; multikey_idx--) {
+ struct memtx_tree_data data;
+ data.tuple = new_tuple;
+ data.hint = multikey_idx;
+ memtx_tree_delete(&index->tree, data);
+ }
+ return -1;
+ }
+ if (dup_tuple != NULL) {
+ *result = dup_tuple;
+ return 0;
+ }
+ }
+ if (old_tuple != NULL) {
+ int sz = multikey_get_array_sz(old_tuple, key_def);
+ for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
+ struct memtx_tree_data old_data;
+ old_data.tuple = old_tuple;
+ old_data.hint = multikey_idx;
+ memtx_tree_delete(&index->tree, old_data);
+ }
+ }
+ *result = old_tuple;
+ return 0;
+}
+
static struct iterator *
memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
const char *key, uint32_t part_count)
@@ -630,7 +723,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
it->key_data.key = key;
it->key_data.part_count = part_count;
struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
- it->key_data.hint = key_hint(key, cmp_def);
+ if (!cmp_def->has_multikey_parts)
+ it->key_data.hint = key_hint(key, cmp_def);
it->index_def = base->def;
it->tree = &index->tree;
it->tree_iterator = memtx_tree_invalid_iterator();
@@ -700,6 +794,48 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
return 0;
}
+static int
+memtx_multikey_tree_index_build_next(struct index *base, struct tuple *tuple)
+{
+ struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *key_def = index->tree.arg;
+ int sz = multikey_get_array_sz(tuple, key_def);
+
+ if (index->build_array == NULL) {
+ index->build_array =
+ (struct memtx_tree_data *)malloc(MEMTX_EXTENT_SIZE);
+ if (index->build_array == NULL) {
+ diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+ "memtx_tree_index", "build_next");
+ return -1;
+ }
+ index->build_array_alloc_size =
+ MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
+ }
+ assert(index->build_array_size <= index->build_array_alloc_size);
+ if (index->build_array_size == index->build_array_alloc_size) {
+ index->build_array_alloc_size = index->build_array_alloc_size +
+ index->build_array_alloc_size / 2;
+ struct memtx_tree_data *tmp =
+ realloc(index->build_array,
+ index->build_array_alloc_size * sizeof(*tmp));
+ if (tmp == NULL) {
+ diag_set(OutOfMemory, index->build_array_alloc_size *
+ sizeof(*tmp), "memtx_tree_index", "build_next");
+ return -1;
+ }
+ index->build_array = tmp;
+ }
+ for (int multikey_idx = 0; multikey_idx < sz; multikey_idx++) {
+ struct memtx_tree_data *elem =
+ &index->build_array[index->build_array_size++];
+ assert(index->build_array_size < index->build_array_alloc_size);
+ elem->tuple = tuple;
+ elem->hint = multikey_idx;
+ }
+ return 0;
+}
+
static void
memtx_tree_index_end_build(struct index *base)
{
@@ -802,6 +938,36 @@ static const struct index_vtab memtx_tree_index_vtab = {
/* .end_build = */ memtx_tree_index_end_build,
};
+static const struct index_vtab memtx_multikey_tree_index_vtab = {
+ /* .destroy = */ memtx_tree_index_destroy,
+ /* .commit_create = */ generic_index_commit_create,
+ /* .abort_create = */ generic_index_abort_create,
+ /* .commit_modify = */ generic_index_commit_modify,
+ /* .commit_drop = */ generic_index_commit_drop,
+ /* .update_def = */ memtx_tree_index_update_def,
+ /* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
+ /* .def_change_requires_rebuild = */
+ memtx_index_def_change_requires_rebuild,
+ /* .size = */ memtx_tree_index_size,
+ /* .bsize = */ memtx_tree_index_bsize,
+ /* .min = */ generic_index_min,
+ /* .max = */ generic_index_max,
+ /* .random = */ memtx_tree_index_random,
+ /* .count = */ memtx_tree_index_count,
+ /* .get = */ memtx_tree_index_get,
+ /* .replace = */ memtx_multikey_tree_index_replace,
+ /* .create_iterator = */ memtx_tree_index_create_iterator,
+ /* .create_snapshot_iterator = */
+ memtx_tree_index_create_snapshot_iterator,
+ /* .stat = */ generic_index_stat,
+ /* .compact = */ generic_index_compact,
+ /* .reset_stat = */ generic_index_reset_stat,
+ /* .begin_build = */ memtx_tree_index_begin_build,
+ /* .reserve = */ memtx_tree_index_reserve,
+ /* .build_next = */ memtx_multikey_tree_index_build_next,
+ /* .end_build = */ memtx_tree_index_end_build,
+};
+
struct index *
memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
{
@@ -812,8 +978,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
"malloc", "struct memtx_tree_index");
return NULL;
}
+ const struct index_vtab *vtab = def->key_def->has_multikey_parts ?
+ &memtx_multikey_tree_index_vtab :
+ &memtx_tree_index_vtab;
if (index_create(&index->base, (struct engine *)memtx,
- &memtx_tree_index_vtab, def) != 0) {
+ vtab, def) != 0) {
free(index);
return NULL;
}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 7f06d4053..c8a938c4c 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -455,7 +455,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
}
int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+tuple_go_to_path_multikey(const char **data, const char *path,
+ uint32_t path_len, uint64_t multikey_idx)
{
int rc;
struct json_lexer lexer;
@@ -463,6 +464,8 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
switch (token.type) {
+ case JSON_TOKEN_ANY:
+ token.num = (int)multikey_idx;
case JSON_TOKEN_NUM:
rc = tuple_field_go_to_index(data, token.num);
break;
@@ -532,7 +535,8 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
}
return tuple_field_raw_by_path(format, tuple, field_map, fieldno,
path + lexer.offset,
- path_len - lexer.offset, NULL);
+ path_len - lexer.offset, NULL,
+ INVALID_MULTIKEY_INDEX);
}
/* }}} tuple_field_* getters */
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 8b12fd5a8..e498e1e41 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -45,6 +45,8 @@ struct slab_arena;
struct quota;
struct key_part;
+#define INVALID_MULTIKEY_INDEX UINT64_MAX
+
/**
* A format for standalone tuples allocated on runtime arena.
* \sa tuple_new().
@@ -286,6 +288,7 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
/** \endcond public */
+
/**
* An atom of Tarantool storage. Represents MsgPack Array.
* Tuple has the following structure:
@@ -296,7 +299,9 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
* | ^
* +---------------------------------------data_offset
*
- * Each 'off_i' is the offset to the i-th indexed field.
+ * Each 'off_i' is the offset to the i-th indexed field or field
+ * map extention (described with struct tuple_field_map_ext)
+ * offset (in sizeof(field_map[0]) units).
*/
struct PACKED tuple
{
@@ -328,6 +333,26 @@ struct PACKED tuple
*/
};
+/**
+ * Tuple field map extent. Is allocated on tuple_field_map_create
+ * call automatically when necessary, when tuple field map must be
+ * reallocated dynamically.
+ */
+struct field_map_ext {
+ /* Sequence of data offset slots. */
+ uint32_t field_map[1];
+ /* The count of field_map items. */
+ uint32_t size;
+};
+
+static inline struct field_map_ext *
+tuple_field_map_ext(uint32_t *field_map, int32_t root_offset_slot)
+{
+ uint32_t slot_off = field_map[root_offset_slot];
+ return (struct field_map_ext *)((char *)field_map -
+ slot_off * sizeof(uint32_t) - sizeof(struct field_map_ext));
+}
+
/** Size of the tuple including size of struct tuple. */
static inline size_t
tuple_size(const struct tuple *tuple)
@@ -508,11 +533,32 @@ tuple_field_count(const struct tuple *tuple)
* with NULL.
* @param path The path to process.
* @param path_len The length of the @path.
+ * @param multikey_index The multikey index hint - index of item
+ * for JSON_TOKEN_ANY level.
* @retval 0 On success.
* @retval -1 In case of error in JSON path.
*/
int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
+tuple_go_to_path_multikey(const char **data, const char *path,
+ uint32_t path_len, uint64_t multikey_idx);
+
+/**
+ * Retrieve msgpack data by JSON path.
+ * @param data[in, out] Pointer to msgpack with data.
+ * If the field cannot be retrieved be the
+ * specified path @path, it is overwritten
+ * with NULL.
+ * @param path The path to process.
+ * @param path_len The length of the @path.
+ * @retval 0 On success.
+ * @retval -1 In case of error in JSON path.
+ */
+static inline int
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+{
+ return tuple_go_to_path_multikey(data, path, path_len,
+ INVALID_MULTIKEY_INDEX);
+}
/**
* Get tuple field by field index and relative JSON path.
@@ -533,7 +579,7 @@ static inline const char *
tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
const uint32_t *field_map, uint32_t fieldno,
const char *path, uint32_t path_len,
- int32_t *offset_slot_hint)
+ int32_t *offset_slot_hint, uint64_t multikey_idx)
{
int32_t offset_slot;
if (offset_slot_hint != NULL &&
@@ -558,10 +604,22 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
if (offset_slot_hint != NULL)
*offset_slot_hint = offset_slot;
offset_slot_access:
- /* Indexed field */
- if (field_map[offset_slot] == 0)
- return NULL;
- tuple += field_map[offset_slot];
+ if (multikey_idx != INVALID_MULTIKEY_INDEX) {
+ struct field_map_ext *field_map_ext =
+ tuple_field_map_ext((uint32_t *)field_map,
+ offset_slot);
+ if (multikey_idx >= field_map_ext->size)
+ return NULL;
+ uint32_t off = field_map_ext->field_map[-multikey_idx];
+ if (off == 0)
+ return NULL;
+ tuple += off;
+ } else {
+ /* Indexed field */
+ if (field_map[offset_slot] == 0)
+ return NULL;
+ tuple += field_map[offset_slot];
+ }
} else {
uint32_t field_count;
parse:
@@ -572,8 +630,8 @@ parse:
for (uint32_t k = 0; k < fieldno; k++)
mp_next(&tuple);
if (path != NULL &&
- unlikely(tuple_go_to_path(&tuple, path,
- path_len) != 0))
+ unlikely(tuple_go_to_path_multikey(&tuple, path, path_len,
+ multikey_idx) != 0))
return NULL;
}
return tuple;
@@ -595,7 +653,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple,
const uint32_t *field_map, uint32_t field_no)
{
return tuple_field_raw_by_path(format, tuple, field_map, field_no,
- NULL, 0, NULL);
+ NULL, 0, NULL, INVALID_MULTIKEY_INDEX);
}
/**
@@ -634,16 +692,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
uint32_t path_len, uint32_t path_hash);
/**
- * Get a tuple field pointed to by an index part.
+ * Get a tuple field pointed to by an index part and multikey hint.
* @param format Tuple format.
* @param tuple A pointer to MessagePack array.
* @param field_map A pointer to the LAST element of field map.
+ * @param multikey_idx A multikey hint.
* @param part Index part to use.
* @retval Field data if the field exists or NULL.
*/
static inline const char *
-tuple_field_raw_by_part(struct tuple_format *format, const char *data,
- const uint32_t *field_map, struct key_part *part)
+tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_part *part, uint64_t multikey_idx)
{
if (unlikely(part->format_epoch != format->epoch)) {
assert(format->epoch != 0);
@@ -656,7 +716,41 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data,
}
return tuple_field_raw_by_path(format, data, field_map, part->fieldno,
part->path, part->path_len,
- &part->offset_slot_cache);
+ &part->offset_slot_cache, multikey_idx);
+}
+
+/**
+ * Get a field refereed by index @part and multikey hint in tuple.
+ * @param tuple Tuple to get the field from.
+ * @param part Index part to use.
+ * @param multikey_idx A multikey hint.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_by_part_multikey(const struct tuple *tuple, struct key_part *part,
+ uint64_t multikey_idx)
+{
+ return tuple_field_raw_by_part_multikey(tuple_format(tuple),
+ tuple_data(tuple),
+ tuple_field_map(tuple), part,
+ multikey_idx);
+}
+
+
+/**
+ * Get a tuple field pointed to by an index part.
+ * @param format Tuple format.
+ * @param tuple A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param part Index part to use.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_raw_by_part(struct tuple_format *format, const char *data,
+ const uint32_t *field_map, struct key_part *part)
+{
+ return tuple_field_raw_by_part_multikey(format, data, field_map,
+ part, INVALID_MULTIKEY_INDEX);
}
/**
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 86922c9bb..cdb08f8fe 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -351,6 +351,8 @@ key_def_set_hint_func(struct key_def *def)
{
def->key_hint = NULL;
def->tuple_hint = NULL;
+ if (def->has_multikey_parts)
+ return;
bool is_nullable = key_part_is_nullable(def->parts);
switch (def->parts->type) {
case FIELD_TYPE_UNSIGNED: {
@@ -835,7 +837,8 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b,
return i;
}
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+ bool is_multikey>
static inline int
tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
uint64_t tuple_a_hint, const struct tuple *tuple_b,
@@ -845,8 +848,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ assert(is_multikey == key_def->has_multikey_parts);
+ assert(!is_multikey || is_multikey == has_json_paths);
int rc = 0;
- if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
+ if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
return rc;
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
@@ -889,7 +894,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
end = part + key_def->part_count;
for (; part < end; part++) {
- if (has_json_paths) {
+ if (is_multikey) {
+ field_a = tuple_field_raw_by_part_multikey(format_a,
+ tuple_a_raw, field_map_a, part,
+ tuple_a_hint);
+ field_b = tuple_field_raw_by_part_multikey(format_b,
+ tuple_b_raw, field_map_b, part,
+ tuple_b_hint);
+ } else if (has_json_paths) {
field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
field_map_a, part);
field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -946,7 +958,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
*/
end = key_def->parts + key_def->part_count;
for (; part < end; ++part) {
- if (has_json_paths) {
+ if (is_multikey) {
+ field_a = tuple_field_raw_by_part_multikey(format_a,
+ tuple_a_raw, field_map_a, part,
+ tuple_a_hint);
+ field_b = tuple_field_raw_by_part_multikey(format_b,
+ tuple_b_raw, field_map_b, part,
+ tuple_b_hint);
+ } else if (has_json_paths) {
field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
field_map_a, part);
field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -976,24 +995,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_def *key_def)
{
return tuple_compare_slowpath_hinted<is_nullable, has_optional_parts,
- has_json_paths>(tuple_a, HINT_INVALID, tuple_b,
+ has_json_paths, false>(tuple_a, HINT_INVALID, tuple_b,
HINT_INVALID, key_def);
}
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+ bool is_multikey>
static inline int
tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
uint64_t tuple_hint, const char *key, uint32_t part_count,
uint64_t key_hint, struct key_def *key_def)
{
+ (void)key_hint;
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
assert(key != NULL || part_count == 0);
assert(part_count <= key_def->part_count);
+ assert(is_multikey == key_def->has_multikey_parts);
+ assert(!is_multikey || is_multikey == has_json_paths);
int rc = 0;
- if ((rc = hint_cmp(tuple_hint, key_hint)) != 0)
+ if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0)
return rc;
struct key_part *part = key_def->parts;
struct tuple_format *format = tuple_format(tuple);
@@ -1002,7 +1025,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
enum mp_type a_type, b_type;
if (likely(part_count == 1)) {
const char *field;
- if (has_json_paths) {
+ if (is_multikey) {
+ field = tuple_field_raw_by_part_multikey(format,
+ tuple_raw, field_map, part, tuple_hint);
+ } else if (has_json_paths) {
field = tuple_field_raw_by_part(format, tuple_raw,
field_map, part);
} else {
@@ -1032,7 +1058,10 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
struct key_part *end = part + part_count;
for (; part < end; ++part, mp_next(&key)) {
const char *field;
- if (has_json_paths) {
+ if (is_multikey) {
+ field = tuple_field_raw_by_part_multikey(format,
+ tuple_raw, field_map, part, tuple_hint);
+ } else if (has_json_paths) {
field = tuple_field_raw_by_part(format, tuple_raw,
field_map, part);
} else {
@@ -1074,7 +1103,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
uint32_t part_count, struct key_def *key_def)
{
return tuple_compare_with_key_slowpath_hinted<is_nullable,
- has_optional_parts, has_json_paths>(tuple,
+ has_optional_parts, has_json_paths, false>(tuple,
HINT_INVALID, key, part_count,
HINT_INVALID, key_def);
}
@@ -1508,14 +1537,22 @@ static const tuple_compare_t compare_slowpath_funcs[] = {
};
static const tuple_compare_hinted_t compare_slowpath_hinted_funcs[] = {
- tuple_compare_slowpath_hinted<false, false, false>,
- tuple_compare_slowpath_hinted<true, false, false>,
- tuple_compare_slowpath_hinted<false, true, false>,
- tuple_compare_slowpath_hinted<true, true, false>,
- tuple_compare_slowpath_hinted<false, false, true>,
- tuple_compare_slowpath_hinted<true, false, true>,
- tuple_compare_slowpath_hinted<false, true, true>,
- tuple_compare_slowpath_hinted<true, true, true>
+ tuple_compare_slowpath_hinted<false, false, false, false>,
+ tuple_compare_slowpath_hinted<true, false, false, false>,
+ tuple_compare_slowpath_hinted<false, true, false, false>,
+ tuple_compare_slowpath_hinted<true, true, false, false>,
+ tuple_compare_slowpath_hinted<false, false, true, false>,
+ tuple_compare_slowpath_hinted<true, false, true, false>,
+ tuple_compare_slowpath_hinted<false, true, true, false>,
+ tuple_compare_slowpath_hinted<true, true, true, false>,
+ tuple_compare_slowpath_hinted<false, false, false, true>,
+ tuple_compare_slowpath_hinted<true, false, false, true>,
+ tuple_compare_slowpath_hinted<false, true, false, true>,
+ tuple_compare_slowpath_hinted<true, true, false, true>,
+ tuple_compare_slowpath_hinted<false, false, true, true>,
+ tuple_compare_slowpath_hinted<true, false, true, true>,
+ tuple_compare_slowpath_hinted<false, true, true, true>,
+ tuple_compare_slowpath_hinted<true, true, true, true>,
};
void
@@ -1523,7 +1560,14 @@ key_def_set_tuple_compare(struct key_def *def)
{
int cmp_func_idx = (def->is_nullable ? 1 : 0) +
2 * (def->has_optional_parts ? 1 : 0) +
- 4 * (def->has_json_paths ? 1 : 0);
+ 4 * (def->has_json_paths ? 1 : 0) +
+ 8 * (def->has_multikey_parts ? 1 : 0);
+ if (def->has_multikey_parts) {
+ def->tuple_compare = NULL;
+ def->tuple_compare_hinted =
+ compare_slowpath_hinted_funcs[cmp_func_idx];
+ return;
+ }
if (def->is_nullable) {
if (key_def_is_sequential(def)) {
if (def->has_optional_parts) {
@@ -1795,14 +1839,22 @@ static const tuple_compare_with_key_t compare_with_key_slowpath_funcs[] = {
static const tuple_compare_with_key_hinted_t
compare_with_key_slowpath_hinted_funcs[] = {
- tuple_compare_with_key_slowpath_hinted<false, false, false>,
- tuple_compare_with_key_slowpath_hinted<true, false, false>,
- tuple_compare_with_key_slowpath_hinted<false, true, false>,
- tuple_compare_with_key_slowpath_hinted<true, true, false>,
- tuple_compare_with_key_slowpath_hinted<false, false, true>,
- tuple_compare_with_key_slowpath_hinted<true, false, true>,
- tuple_compare_with_key_slowpath_hinted<false, true, true>,
- tuple_compare_with_key_slowpath_hinted<true, true, true>
+ tuple_compare_with_key_slowpath_hinted<false, false, false, false>,
+ tuple_compare_with_key_slowpath_hinted<true, false, false, false>,
+ tuple_compare_with_key_slowpath_hinted<false, true, false, false>,
+ tuple_compare_with_key_slowpath_hinted<true, true, false, false>,
+ tuple_compare_with_key_slowpath_hinted<false, false, true, false>,
+ tuple_compare_with_key_slowpath_hinted<true, false, true, false>,
+ tuple_compare_with_key_slowpath_hinted<false, true, true, false>,
+ tuple_compare_with_key_slowpath_hinted<true, true, true, false>,
+ tuple_compare_with_key_slowpath_hinted<false, false, false, true>,
+ tuple_compare_with_key_slowpath_hinted<true, false, false, true>,
+ tuple_compare_with_key_slowpath_hinted<false, true, false, true>,
+ tuple_compare_with_key_slowpath_hinted<true, true, false, true>,
+ tuple_compare_with_key_slowpath_hinted<false, false, true, true>,
+ tuple_compare_with_key_slowpath_hinted<true, false, true, true>,
+ tuple_compare_with_key_slowpath_hinted<false, true, true, true>,
+ tuple_compare_with_key_slowpath_hinted<true, true, true, true>
};
void
@@ -1810,7 +1862,14 @@ key_def_set_tuple_compare_with_key(struct key_def *def)
{
int cmp_func_idx = (def->is_nullable ? 1 : 0) +
2 * (def->has_optional_parts ? 1 : 0) +
- 4 * (def->has_json_paths ? 1 : 0);
+ 4 * (def->has_json_paths ? 1 : 0) +
+ 8 * (def->has_multikey_parts ? 1 : 0);
+ if (def->has_multikey_parts) {
+ def->tuple_compare_with_key = NULL;
+ def->tuple_compare_with_key_hinted =
+ compare_with_key_slowpath_hinted_funcs[cmp_func_idx];
+ return;
+ }
if (def->is_nullable) {
if (key_def_is_sequential(def)) {
if (def->has_optional_parts) {
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 4439ce3e0..99b602cd5 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -33,6 +33,7 @@
#include "json/json.h"
#include "tuple_format.h"
#include "coll_id_cache.h"
+#include "tuple.h"
#include "third_party/PMurHash.h"
@@ -233,14 +234,19 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
field->token.type != JSON_TOKEN_END) {
- if (field->token.type == JSON_TOKEN_ANY) {
- diag_set(ClientError, ER_UNSUPPORTED,
- "Tarantool", "multikey indexes");
- goto fail;
- }
enum field_type expected_type =
field->token.type == JSON_TOKEN_STR ?
FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+ if (field->token.type == JSON_TOKEN_ANY &&
+ !json_token_is_multikey(&parent->token) &&
+ !json_token_is_leaf(&parent->token)) {
+ assert(expected_type == FIELD_TYPE_ARRAY);
+ diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+ tuple_field_path(parent),
+ field_type_strs[parent->type],
+ "multikey array");
+ goto fail;
+ }
if (field_type1_contains_type2(parent->type, expected_type)) {
parent->type = expected_type;
} else {
@@ -475,9 +481,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
break;
format->min_tuple_size += mp_sizeof_nil();
}
- } else {
+ } else if (field->token.type == JSON_TOKEN_STR) {
/* Account a key string for map member. */
- assert(field->token.type == JSON_TOKEN_STR);
format->min_tuple_size +=
mp_sizeof_str(field->token.len);
}
@@ -805,6 +810,59 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
return true;
}
+/**
+ * Grow tuple_field_map allocation left with extent_size
+ * 0 bytes.
+ */
+static int
+tuple_field_map_realloc(uint32_t **field_map, uint32_t *field_map_size,
+ uint32_t extent_size, struct region *region)
+{
+ assert(extent_size % sizeof(uint32_t) == 0);
+ uint32_t new_field_map_size = *field_map_size + extent_size;
+ uint32_t *new_field_map = region_alloc(region, new_field_map_size);
+ if (new_field_map == NULL) {
+ diag_set(OutOfMemory, new_field_map_size, "region_alloc",
+ "new_field_map");
+ return -1;
+ }
+ memset(new_field_map, 0, extent_size);
+ if (*field_map != NULL) {
+ memcpy((char *)new_field_map + extent_size,
+ (char *)*field_map - *field_map_size, *field_map_size);
+ }
+ *field_map = (uint32_t *)((char *)new_field_map + new_field_map_size);
+ *field_map_size = new_field_map_size;
+ return 0;
+}
+
+/**
+ * Retrieve tuple field map extent by root_offset_slot, reallocate
+ * memory if required.
+ */
+struct field_map_ext *
+tuple_field_map_ext_retrieve(uint32_t **field_map, uint32_t *field_map_size,
+ int32_t root_offset_slot, uint32_t extent_items,
+ struct region *region)
+{
+ uint32_t extent_sz = sizeof(struct field_map_ext) +
+ extent_items * sizeof(uint32_t);
+ uint32_t slot_off = (*field_map)[root_offset_slot];
+ if (slot_off == 0) {
+ /* Field map extent was not created yet. */
+ slot_off = *field_map_size / sizeof(uint32_t);
+ (*field_map)[root_offset_slot] = slot_off;
+ if (tuple_field_map_realloc(field_map, field_map_size,
+ extent_sz, region) != 0)
+ return NULL;
+ }
+ struct field_map_ext *field_map_ext =
+ tuple_field_map_ext(*field_map, root_offset_slot);
+ assert(field_map_ext->size == 0 || field_map_ext->size == extent_items);
+ field_map_ext->size = extent_items;
+ return field_map_ext;
+}
+
/** @sa declaration for details. */
int
tuple_field_map_create(struct tuple_format *format, const char *tuple,
@@ -817,14 +875,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
return 0; /* Nothing to initialize */
}
struct region *region = &fiber()->gc;
- *field_map_size = format->field_map_size;
- *field_map = region_alloc(region, *field_map_size);
- if (*field_map == NULL) {
- diag_set(OutOfMemory, *field_map_size, "region_alloc",
- "field_map");
+ *field_map = NULL;
+ *field_map_size = 0;
+ if (tuple_field_map_realloc(field_map, field_map_size,
+ format->field_map_size, region) != 0)
return -1;
- }
- *field_map = (uint32_t *)((char *)*field_map + *field_map_size);
const char *pos = tuple;
int rc = 0;
@@ -864,11 +919,6 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
uint32_t defined_field_count = MIN(field_count, validate ?
tuple_format_field_count(format) :
format->index_field_count);
- /*
- * Nullify field map to be able to detect by 0,
- * which key fields are absent in tuple_field().
- */
- memset((char *)*field_map - *field_map_size, 0, *field_map_size);
/*
* Prepare mp stack of the size equal to the maximum depth
* of the indexed field in the format::fields tree
@@ -885,6 +935,12 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
struct mp_stack stack;
mp_stack_create(&stack, format->fields_depth, frames);
mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+ /**
+ * Pointer to the stack entry that represents the multikey
+ * index root ARRAY entry.
+ */
+ struct mp_frame *mk_parent_frame = NULL;
+
struct tuple_field *field;
struct json_token *parent = &format->fields.root;
while (true) {
@@ -901,6 +957,10 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
mp_stack_pop(&stack);
if (mp_stack_is_empty(&stack))
goto finish;
+ if (json_token_is_multikey(parent)) {
+ /* Leave multikey index branch. */
+ mk_parent_frame = NULL;
+ }
parent = parent->parent;
}
/*
@@ -950,8 +1010,23 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
field_type_strs[field->type]);
goto error;
}
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+ mk_parent_frame == NULL) {
(*field_map)[field->offset_slot] = pos - tuple;
+ } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+ mk_parent_frame != NULL) {
+ uint32_t extent_items = mk_parent_frame->count;
+ struct field_map_ext *field_map_ext =
+ tuple_field_map_ext_retrieve(field_map,
+ field_map_size,
+ field->offset_slot,
+ extent_items, region);
+ if (field_map_ext == NULL)
+ goto error;
+ int multikey_idx = mk_parent_frame->curr;
+ field_map_ext->field_map[
+ -multikey_idx] = pos - tuple;
+ }
if (required_fields != NULL)
bit_clear(required_fields, field->id);
}
@@ -968,6 +1043,11 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
mp_decode_array(&pos) :
mp_decode_map(&pos);
mp_stack_push(&stack, type, size);
+ if (json_token_is_multikey(&field->token)) {
+ /* Track multikey entry frame. */
+ assert(type == MP_ARRAY);
+ mk_parent_frame = &frames[stack.used - 1];
+ }
parent = &field->token;
} else {
mp_next(&pos);
diff --git a/test/engine/json.result b/test/engine/json.result
index a790cdfbc..5c7a9b3b3 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -691,7 +691,85 @@ s = box.schema.space.create('withdata')
...
idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
---
-- error: Tarantool does not support multikey indexes
+...
+s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+---
+- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}})
+---
+- [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:select({'James', 'Bond'})
+---
+- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select({'Kirill', 'Shcherbatov'})
+---
+- []
+...
+idx:select({'Ivan', 'Ivanych'})
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:select({'Vasya', 'Pupkin'})
+---
+- - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:delete({'Vasya', 'Pupkin'})
+---
+- [1, 1, [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+---
+- [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+---
+- [2, 1, [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [1, 1, [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [2, 1, [{'fname': 'James', 'sname': 'Bond'}]]
+ - [2, 1, [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata')
+---
+...
+pk = s:create_index('pk')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+---
+...
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+---
+- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1',
+ 'extra': {'sname': 'C2'}}]]
...
s:drop()
---
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index f9a7180b1..dc6016af9 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -198,4 +198,24 @@ s:drop()
--
s = box.schema.space.create('withdata')
idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+s:insert({1, 1, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+s:insert({1, 1, {{fname='Ivan', sname='Ivanych'}}})
+idx:select({'James', 'Bond'})
+idx:select({'Kirill', 'Shcherbatov'})
+idx:select({'Ivan', 'Ivanych'})
+idx:select({'Vasya', 'Pupkin'})
+idx:select()
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+idx:select()
+idx:delete({'Vasya', 'Pupkin'})
+s:insert({2, 1, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({2, 1, {{fname='James', sname='Bond'}}})
+idx:select()
+s:drop()
+
+s = box.schema.space.create('withdata')
+pk = s:create_index('pk')
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
s:drop()
--
2.21.0
More information about the Tarantool-patches
mailing list