From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [PATCH v2 5/5] box: introduce multikey indexes in memtx
Date: Tue, 19 Mar 2019 15:32:10 +0300 [thread overview]
Message-ID: <5cc1d6c549a630d4925475de757baed71d5b5b1d.1552998554.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1552998554.git.kshcherbatov@tarantool.org>
Part of #1257
@TarantoolBot document
Title: introduce multikey indexes in memtx
Multikey indexes allows you to automatically index set of documents
by JSON paths having array index placeholder "[*]". Multikey index
cannot be primary as it cannot be unique(by definition).
Multikey index parts must be compatible: only one "[*]" placeholder
is allowed in same position(for all JSON paths in index parts).
Example:
s = box.schema.space.create('withdata')
pk = s:create_index('pk')
parts = {
{2, 'str', path = 'data[*].name'},
{2, 'str', path = 'data[*].extra.phone'}
}
idx = s:create_index('idx', {parts = parts})
s:insert({1, {data = {{name="A", extra={phone="111"}},
{name="B", extra={phone="111"}}},
garbage = 1}}
idx:get({'A', '111'})
---
src/box/errcode.h | 1 +
src/box/index_def.c | 25 +++-
src/box/key_def.c | 10 ++
src/box/key_def.h | 18 +++
src/box/memtx_space.c | 18 +++
src/box/memtx_tree.c | 183 ++++++++++++++++++++----
src/box/tuple.c | 28 +++-
src/box/tuple.h | 111 +++++++++++++--
src/box/tuple_compare.cc | 87 +++++++++---
| 2 +-
src/box/tuple_field_map.c | 27 ++++
src/box/tuple_field_map.h | 87 +++++++++++-
src/box/tuple_format.c | 105 +++++++++++---
src/box/vinyl.c | 6 +
test/box/misc.result | 1 +
test/engine/json.result | 13 --
test/engine/json.test.lua | 7 -
test/engine/multikey_idx.result | 228 ++++++++++++++++++++++++++++++
test/engine/multikey_idx.test.lua | 67 +++++++++
19 files changed, 915 insertions(+), 109 deletions(-)
create mode 100644 test/engine/multikey_idx.result
create mode 100644 test/engine/multikey_idx.test.lua
diff --git a/src/box/errcode.h b/src/box/errcode.h
index d234d26c8..16cec30c0 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -239,6 +239,7 @@ struct errcode_record {
/*184 */_(ER_SQL_UNRECOGNIZED_SYNTAX, "Syntax error near '%.*s'") \
/*185 */_(ER_SQL_UNKNOWN_TOKEN, "Syntax error: unrecognized token: '%.*s'") \
/*186 */_(ER_SQL_PARSER_GENERIC, "%s") \
+ /*187 */_(ER_INDEX_MULTIKEY_INVALID, "Multikey index is invalid: %s") \
/*
* !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/index_def.c b/src/box/index_def.c
index f0c7dc4f6..dfa539f86 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -268,9 +268,15 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
space_name, "too many key parts");
return false;
}
+ if (index_def->iid == 0 && key_def_is_multikey(index_def->key_def)) {
+ diag_set(ClientError, ER_MODIFY_INDEX, index_def->name,
+ space_name, "primary key cannot be multikey");
+ return false;
+ }
for (uint32_t i = 0; i < index_def->key_def->part_count; i++) {
- assert(index_def->key_def->parts[i].type < field_type_MAX);
- if (index_def->key_def->parts[i].fieldno > BOX_INDEX_FIELD_MAX) {
+ struct key_part *parts = index_def->key_def->parts;
+ assert(parts[i].type < field_type_MAX);
+ if (parts[i].fieldno > BOX_INDEX_FIELD_MAX) {
diag_set(ClientError, ER_MODIFY_INDEX, index_def->name,
space_name, "field no is too big");
return false;
@@ -280,8 +286,8 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
* Courtesy to a user who could have made
* a typo.
*/
- struct key_part *part_a = &index_def->key_def->parts[i];
- struct key_part *part_b = &index_def->key_def->parts[j];
+ struct key_part *part_a = &parts[i];
+ struct key_part *part_b = &parts[j];
if (part_a->fieldno == part_b->fieldno &&
json_path_cmp(part_a->path, part_a->path_len,
part_b->path, part_b->path_len,
@@ -292,6 +298,17 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
return false;
}
}
+ if (key_def_is_multikey(index_def->key_def) &&
+ json_path_cmp(parts[i].path, parts[i].path_len,
+ parts[index_def->key_def->
+ multikey_part_idx].path,
+ index_def->key_def->multikey_path_len,
+ true, TUPLE_INDEX_BASE) != 0) {
+ diag_set(ClientError, ER_MODIFY_INDEX,
+ index_def->name, space_name,
+ "multikey index parts are incompatible");
+ return false;
+ }
}
return true;
}
diff --git a/src/box/key_def.c b/src/box/key_def.c
index f9e464402..ffc1e1665 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -164,6 +164,13 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
*path_pool += path_len;
memcpy(def->parts[part_no].path, path, path_len);
def->parts[part_no].path_len = path_len;
+ int multikey_path_len;
+ if (json_path_is_multikey(path, path_len, &multikey_path_len,
+ TUPLE_INDEX_BASE) &&
+ def->multikey_path_len <= (uint32_t)multikey_path_len) {
+ def->multikey_part_idx = part_no;
+ def->multikey_path_len = (uint32_t)multikey_path_len;
+ }
} else {
def->parts[part_no].path = NULL;
def->parts[part_no].path_len = 0;
@@ -185,6 +192,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
}
def->part_count = part_count;
+ def->multikey_part_idx = part_count;
def->unique_part_count = part_count;
/* A pointer to the JSON paths data in the new key_def. */
@@ -253,6 +261,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
}
key_def->part_count = part_count;
+ key_def->multikey_part_idx = part_count;
key_def->unique_part_count = part_count;
for (uint32_t item = 0; item < part_count; ++item) {
@@ -672,6 +681,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
return NULL;
}
new_def->part_count = new_part_count;
+ new_def->multikey_part_idx = new_part_count;
new_def->unique_part_count = new_part_count;
new_def->is_nullable = first->is_nullable || second->is_nullable;
new_def->has_optional_parts = first->has_optional_parts ||
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 174790f72..690954f2d 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -247,6 +247,18 @@ struct key_def {
bool has_optional_parts;
/** Key fields mask. @sa column_mask.h for details. */
uint64_t column_mask;
+ /**
+ * In case of multikey index, the index of the key_part
+ * containing JSON path with array index placeholder "[*]".
+ * Otherwise multikey_part_idx == part_count.
+ */
+ uint32_t multikey_part_idx;
+ /**
+ * In case of multikey index, the length of the
+ * parts[multikey_part_idx].path substring "...[*]"
+ * @see json_path_is_multikey().
+ */
+ uint32_t multikey_path_len;
/** The size of the 'parts' array. */
uint32_t part_count;
/** Description of parts of a multipart index. */
@@ -485,6 +497,12 @@ key_def_is_sequential(const struct key_def *key_def)
return true;
}
+static inline bool
+key_def_is_multikey(const struct key_def *key_def)
+{
+ return key_def->multikey_part_idx < key_def->part_count;
+}
+
/**
* Return true if @a key_def defines has fields that requires
* special collation comparison.
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index 92ec0b300..9a5132979 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -638,6 +638,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
"HASH index must be unique");
return -1;
}
+ if (key_def_is_multikey(index_def->key_def)) {
+ diag_set(ClientError, ER_MODIFY_INDEX,
+ index_def->name, space_name(space),
+ "HASH index cannot be multikey");
+ return -1;
+ }
break;
case TREE:
/* TREE index has no limitations. */
@@ -661,6 +667,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
"RTREE index field type must be ARRAY");
return -1;
}
+ if (key_def_is_multikey(index_def->key_def)) {
+ diag_set(ClientError, ER_MODIFY_INDEX,
+ index_def->name, space_name(space),
+ "RTREE index cannot be multikey");
+ return -1;
+ }
/* no furter checks of parts needed */
return 0;
case BITSET:
@@ -683,6 +695,12 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
"BITSET index field type must be NUM or STR");
return -1;
}
+ if (key_def_is_multikey(index_def->key_def)) {
+ diag_set(ClientError, ER_MODIFY_INDEX,
+ index_def->name, space_name(space),
+ "BITSET index cannot be multikey");
+ return -1;
+ }
/* no furter checks of parts needed */
return 0;
default:
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index c2c8a19e7..38a6992ef 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -531,7 +531,8 @@ memtx_tree_index_get(struct index *base, const char *key,
struct memtx_tree_key_data key_data;
key_data.key = key;
key_data.part_count = part_count;
- key_data.hint = key_hint(key, cmp_def);
+ if (!key_def_is_multikey(cmp_def))
+ key_data.hint = key_hint(key, cmp_def);
struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
*result = res != NULL ? res->tuple : NULL;
return 0;
@@ -587,6 +588,79 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
return 0;
}
+int
+memtx_tree_index_replace_multikey(struct index *base, struct tuple *old_tuple,
+ struct tuple *new_tuple,
+ enum dup_replace_mode mode,
+ struct tuple **result)
+{
+ struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *key_def = memtx_tree_index_cmp_def(index);
+ if (new_tuple != NULL) {
+ int errcode = 0, tree_res = 0;
+ struct tuple *dup_tuple = NULL;
+ int multikey_idx = 0;
+ uint32_t items = tuple_field_multikey_items(new_tuple, key_def);
+ for (; (uint32_t)multikey_idx < items; multikey_idx++) {
+ struct memtx_tree_data new_data;
+ new_data.tuple = new_tuple;
+ new_data.hint = multikey_idx;
+ struct memtx_tree_data dup_data;
+ dup_data.tuple = NULL;
+ tree_res = memtx_tree_insert(&index->tree, new_data,
+ &dup_data);
+ if (tree_res != 0) {
+ diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+ "memtx_tree_index", "replace");
+ break;
+ }
+ dup_tuple = dup_data.tuple;
+ errcode = replace_check_dup(old_tuple, dup_tuple, mode);
+ if (errcode != 0) {
+ memtx_tree_delete(&index->tree, new_data);
+ if (dup_tuple != NULL) {
+ memtx_tree_insert(&index->tree,
+ dup_data, NULL);
+ }
+ struct space *sp =
+ space_cache_find(base->def->space_id);
+ if (sp != NULL) {
+ diag_set(ClientError, errcode,
+ base->def->name,
+ space_name(sp));
+ }
+ break;
+ }
+ }
+ if (tree_res != 0 || errcode != 0) {
+ multikey_idx--;
+ for (; multikey_idx >= 0; multikey_idx--) {
+ struct memtx_tree_data data;
+ data.tuple = new_tuple;
+ data.hint = multikey_idx;
+ memtx_tree_delete(&index->tree, data);
+ }
+ return -1;
+ }
+ if (dup_tuple != NULL) {
+ *result = dup_tuple;
+ return 0;
+ }
+ }
+ if (old_tuple != NULL) {
+ uint32_t items = tuple_field_multikey_items(old_tuple, key_def);
+ for (uint32_t multikey_idx = 0; multikey_idx < items;
+ multikey_idx++) {
+ struct memtx_tree_data old_data;
+ old_data.tuple = old_tuple;
+ old_data.hint = multikey_idx;
+ memtx_tree_delete(&index->tree, old_data);
+ }
+ }
+ *result = old_tuple;
+ return 0;
+}
+
static struct iterator *
memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
const char *key, uint32_t part_count)
@@ -624,7 +698,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
it->key_data.key = key;
it->key_data.part_count = part_count;
struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
- it->key_data.hint = key_hint(key, cmp_def);
+ if (!key_def_is_multikey(cmp_def))
+ it->key_data.hint = key_hint(key, cmp_def);
it->index_def = base->def;
it->tree = &index->tree;
it->tree_iterator = memtx_tree_invalid_iterator();
@@ -659,33 +734,42 @@ memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
}
static int
-memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
+memtx_tree_index_build_array_realloc(struct memtx_tree_index *index,
+ uint32_t items)
{
- struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ bool need_realloc = false;
if (index->build_array == NULL) {
- index->build_array = malloc(MEMTX_EXTENT_SIZE);
- if (index->build_array == NULL) {
- diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
- "memtx_tree_index", "build_next");
- return -1;
- }
index->build_array_alloc_size =
MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
+ need_realloc = true;
}
- assert(index->build_array_size <= index->build_array_alloc_size);
- if (index->build_array_size == index->build_array_alloc_size) {
- index->build_array_alloc_size = index->build_array_alloc_size +
- index->build_array_alloc_size / 2;
- struct memtx_tree_data *tmp =
- realloc(index->build_array,
- index->build_array_alloc_size * sizeof(*tmp));
- if (tmp == NULL) {
- diag_set(OutOfMemory, index->build_array_alloc_size *
- sizeof(*tmp), "memtx_tree_index", "build_next");
- return -1;
- }
- index->build_array = tmp;
+ uint32_t build_array_new_size = index->build_array_size + items;
+ if (build_array_new_size > index->build_array_alloc_size) {
+ index->build_array_alloc_size +=
+ MAX(index->build_array_alloc_size / 2,
+ build_array_new_size - index->build_array_alloc_size);
+ need_realloc = true;
+ }
+ if (!need_realloc)
+ return 0;
+ struct memtx_tree_data *tmp =
+ realloc(index->build_array,
+ index->build_array_alloc_size * sizeof(*tmp));
+ if (tmp == NULL) {
+ diag_set(OutOfMemory, index->build_array_alloc_size *
+ sizeof(*tmp), "memtx_tree_index", "build_next");
+ return -1;
}
+ index->build_array = tmp;
+ return 0;
+}
+
+static int
+memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
+{
+ struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ if (memtx_tree_index_build_array_realloc(index, 1) != 0)
+ return -1;
struct memtx_tree_data *elem =
&index->build_array[index->build_array_size++];
elem->tuple = tuple;
@@ -694,6 +778,24 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
return 0;
}
+static int
+memtx_tree_index_build_next_multikey(struct index *base, struct tuple *tuple)
+{
+ struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *key_def = memtx_tree_index_cmp_def(index);
+ uint32_t items = tuple_field_multikey_items(tuple, key_def);
+ if (memtx_tree_index_build_array_realloc(index, items) != 0)
+ return -1;
+ for (uint32_t multikey_idx = 0; multikey_idx < items; multikey_idx++) {
+ struct memtx_tree_data *elem =
+ &index->build_array[index->build_array_size++];
+ assert(index->build_array_size <= index->build_array_alloc_size);
+ elem->tuple = tuple;
+ elem->hint = multikey_idx;
+ }
+ return 0;
+}
+
static void
memtx_tree_index_end_build(struct index *base)
{
@@ -796,6 +898,36 @@ static const struct index_vtab memtx_tree_index_vtab = {
/* .end_build = */ memtx_tree_index_end_build,
};
+static const struct index_vtab memtx_tree_index_multikey_vtab = {
+ /* .destroy = */ memtx_tree_index_destroy,
+ /* .commit_create = */ generic_index_commit_create,
+ /* .abort_create = */ generic_index_abort_create,
+ /* .commit_modify = */ generic_index_commit_modify,
+ /* .commit_drop = */ generic_index_commit_drop,
+ /* .update_def = */ memtx_tree_index_update_def,
+ /* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
+ /* .def_change_requires_rebuild = */
+ memtx_index_def_change_requires_rebuild,
+ /* .size = */ memtx_tree_index_size,
+ /* .bsize = */ memtx_tree_index_bsize,
+ /* .min = */ generic_index_min,
+ /* .max = */ generic_index_max,
+ /* .random = */ memtx_tree_index_random,
+ /* .count = */ memtx_tree_index_count,
+ /* .get = */ memtx_tree_index_get,
+ /* .replace = */ memtx_tree_index_replace_multikey,
+ /* .create_iterator = */ memtx_tree_index_create_iterator,
+ /* .create_snapshot_iterator = */
+ memtx_tree_index_create_snapshot_iterator,
+ /* .stat = */ generic_index_stat,
+ /* .compact = */ generic_index_compact,
+ /* .reset_stat = */ generic_index_reset_stat,
+ /* .begin_build = */ memtx_tree_index_begin_build,
+ /* .reserve = */ memtx_tree_index_reserve,
+ /* .build_next = */ memtx_tree_index_build_next_multikey,
+ /* .end_build = */ memtx_tree_index_end_build,
+};
+
struct index *
memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
{
@@ -806,8 +938,11 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
"malloc", "struct memtx_tree_index");
return NULL;
}
+ const struct index_vtab *vtab = key_def_is_multikey(def->key_def) ?
+ &memtx_tree_index_multikey_vtab :
+ &memtx_tree_index_vtab;
if (index_create(&index->base, (struct engine *)memtx,
- &memtx_tree_index_vtab, def) != 0) {
+ vtab, def) != 0) {
free(index);
return NULL;
}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 68f0670f2..985117957 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -129,8 +129,6 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
struct field_map_builder builder;
int rc = tuple_field_map_create(&builder, format, tuple, true);
region_truncate(region, region_svp);
- assert(rc != 0 ||
- field_map_builder_size(&builder) == format->field_map_size);
return rc;
}
@@ -455,7 +453,8 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
}
int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len,
+ int multikey_idx)
{
int rc;
struct json_lexer lexer;
@@ -463,6 +462,10 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len)
json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
switch (token.type) {
+ case JSON_TOKEN_ANY:
+ assert(multikey_idx >= 0);
+ token.num = multikey_idx;
+ FALLTHROUGH;
case JSON_TOKEN_NUM:
rc = tuple_field_go_to_index(data, token.num);
break;
@@ -532,7 +535,24 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
}
return tuple_field_raw_by_path(format, tuple, field_map, fieldno,
path + lexer.offset,
- path_len - lexer.offset, NULL);
+ path_len - lexer.offset, NULL, -1);
+}
+
+uint32_t
+tuple_field_raw_multikey_items(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_def *key_def)
+{
+ assert(key_def_is_multikey(key_def));
+ struct key_part *part = &key_def->parts[key_def->multikey_part_idx];
+ const char *array_raw =
+ tuple_field_raw_by_path(format, data, field_map, part->fieldno,
+ part->path, key_def->multikey_path_len,
+ NULL, -1);
+ if (array_raw == NULL)
+ return 0;
+ assert(mp_typeof(*array_raw) == MP_ARRAY);
+ return mp_decode_array(&array_raw);
}
/* }}} tuple_field_* getters */
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 8b12fd5a8..060e4a076 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -508,14 +508,51 @@ tuple_field_count(const struct tuple *tuple)
* with NULL.
* @param path The path to process.
* @param path_len The length of the @path.
+ * @param multikey_idx The multikey index hint - index of
+ * document to retrieve when array index
+ * placeholder "[*]" is met.
* @retval 0 On success.
* @retval -1 In case of error in JSON path.
*/
int
-tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
+tuple_go_to_path(const char **data, const char *path, uint32_t path_len,
+ int multikey_idx);
/**
- * Get tuple field by field index and relative JSON path.
+ * Get tuple field by field_map, offset_slot and multikey_idx.
+ * @param tuple MessagePack tuple's body.
+ * @param field_map Tuple field map.
+ * @param offset_slot The field_map slot with field to get offset.
+ * @param multikey_idx The multikey index hint - index of
+ * document to retrieve when array index
+ * placeholder "[*]" is met.
+ * @retval Not NULL field pointer on success.
+ * @retval NULL otherwise, when field is absents.
+ */
+static const char *
+tuple_field_raw_by_offset_slot(const char *tuple, const uint32_t *field_map,
+ int32_t offset_slot, int multikey_idx)
+{
+ assert(offset_slot != TUPLE_OFFSET_SLOT_NIL);
+ uint32_t offset;
+ if (multikey_idx >= 0) {
+ assert(field_map[offset_slot] != 0);
+ struct field_map_ext *extent =
+ field_map_ext_get(field_map, offset_slot);
+ if ((uint32_t)multikey_idx >= extent->items)
+ return NULL;
+ offset = extent->offset[multikey_idx];
+ } else {
+ offset = field_map[offset_slot];
+ }
+ if (offset == 0)
+ return NULL;
+ return tuple + offset;
+}
+
+/**
+ * Get tuple field by field index, relative JSON path and
+ * multikey_idx.
* @param format Tuple format.
* @param tuple MessagePack tuple's body.
* @param field_map Tuple field map.
@@ -528,12 +565,15 @@ tuple_go_to_path(const char **data, const char *path, uint32_t path_len);
* access data in a single operation.
* Else it is initialized with offset_slot
* of format field by path.
+ * @param multikey_idx The multikey index hint - index of
+ * document to retrieve when array index
+ * placeholder "[*]" is met.
*/
static inline const char *
tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
const uint32_t *field_map, uint32_t fieldno,
const char *path, uint32_t path_len,
- int32_t *offset_slot_hint)
+ int32_t *offset_slot_hint, int multikey_idx)
{
int32_t offset_slot;
if (offset_slot_hint != NULL &&
@@ -558,10 +598,8 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
if (offset_slot_hint != NULL)
*offset_slot_hint = offset_slot;
offset_slot_access:
- /* Indexed field */
- if (field_map[offset_slot] == 0)
- return NULL;
- tuple += field_map[offset_slot];
+ tuple = tuple_field_raw_by_offset_slot(tuple, field_map,
+ offset_slot, multikey_idx);
} else {
uint32_t field_count;
parse:
@@ -572,8 +610,8 @@ parse:
for (uint32_t k = 0; k < fieldno; k++)
mp_next(&tuple);
if (path != NULL &&
- unlikely(tuple_go_to_path(&tuple, path,
- path_len) != 0))
+ unlikely(tuple_go_to_path(&tuple, path, path_len,
+ multikey_idx) != 0))
return NULL;
}
return tuple;
@@ -595,7 +633,7 @@ tuple_field_raw(struct tuple_format *format, const char *tuple,
const uint32_t *field_map, uint32_t field_no)
{
return tuple_field_raw_by_path(format, tuple, field_map, field_no,
- NULL, 0, NULL);
+ NULL, 0, NULL, -1);
}
/**
@@ -634,16 +672,18 @@ tuple_field_raw_by_full_path(struct tuple_format *format, const char *tuple,
uint32_t path_len, uint32_t path_hash);
/**
- * Get a tuple field pointed to by an index part.
+ * Get a tuple field pointed to by an index part and multikey hint.
* @param format Tuple format.
* @param tuple A pointer to MessagePack array.
* @param field_map A pointer to the LAST element of field map.
+ * @param multikey_idx A multikey hint.
* @param part Index part to use.
* @retval Field data if the field exists or NULL.
*/
static inline const char *
-tuple_field_raw_by_part(struct tuple_format *format, const char *data,
- const uint32_t *field_map, struct key_part *part)
+tuple_field_raw_by_part_multikey(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_part *part, int multikey_idx)
{
if (unlikely(part->format_epoch != format->epoch)) {
assert(format->epoch != 0);
@@ -656,7 +696,23 @@ tuple_field_raw_by_part(struct tuple_format *format, const char *data,
}
return tuple_field_raw_by_path(format, data, field_map, part->fieldno,
part->path, part->path_len,
- &part->offset_slot_cache);
+ &part->offset_slot_cache, multikey_idx);
+}
+
+/**
+ * Get a tuple field pointed to by an index part.
+ * @param format Tuple format.
+ * @param tuple A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param part Index part to use.
+ * @retval Field data if the field exists or NULL.
+ */
+static inline const char *
+tuple_field_raw_by_part(struct tuple_format *format, const char *data,
+ const uint32_t *field_map, struct key_part *part)
+{
+ return tuple_field_raw_by_part_multikey(format, data, field_map,
+ part, -1);
}
/**
@@ -672,6 +728,33 @@ tuple_field_by_part(const struct tuple *tuple, struct key_part *part)
tuple_field_map(tuple), part);
}
+/**
+ * Get count of documents in the multikey index.
+ * @param format Tuple format.
+ * @param data A pointer to MessagePack array.
+ * @param field_map A pointer to the LAST element of field map.
+ * @param key_def Index key_definition.
+ * @retval Count of documents in the multikey index.
+ */
+uint32_t
+tuple_field_raw_multikey_items(struct tuple_format *format, const char *data,
+ const uint32_t *field_map,
+ struct key_def *key_def);
+
+/**
+ * Get count of documents in the multikey index.
+ * @param tuple Tuple to get the count of documents from.
+ * @param key_def Index key_definition.
+ * @retval Count of documents in the multikey index.
+ */
+static inline uint32_t
+tuple_field_multikey_items(struct tuple *tuple, struct key_def *key_def)
+{
+ return tuple_field_raw_multikey_items(tuple_format(tuple),
+ tuple_data(tuple),
+ tuple_field_map(tuple), key_def);
+}
+
/**
* @brief Tuple Interator
*/
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 508aff04c..8896390e3 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -433,7 +433,8 @@ tuple_compare_field_with_hint(const char *field_a, enum mp_type a_type,
}
}
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+ bool is_multikey>
static inline int
tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
hint_t tuple_a_hint, const struct tuple *tuple_b,
@@ -443,8 +444,10 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ assert(key_def_is_multikey(key_def) == is_multikey);
+ assert(!is_multikey || is_multikey == has_json_paths);
int rc = 0;
- if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
+ if (!is_multikey && (rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
return rc;
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
@@ -487,7 +490,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
end = part + key_def->part_count;
for (; part < end; part++) {
- if (has_json_paths) {
+ if (is_multikey) {
+ field_a = tuple_field_raw_by_part_multikey(format_a,
+ tuple_a_raw, field_map_a, part,
+ (int)tuple_a_hint);
+ field_b = tuple_field_raw_by_part_multikey(format_b,
+ tuple_b_raw, field_map_b, part,
+ (int)tuple_b_hint);
+ } else if (has_json_paths) {
field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
field_map_a, part);
field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -544,7 +554,14 @@ tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
*/
end = key_def->parts + key_def->part_count;
for (; part < end; ++part) {
- if (has_json_paths) {
+ if (is_multikey) {
+ field_a = tuple_field_raw_by_part_multikey(format_a,
+ tuple_a_raw, field_map_a, part,
+ (int)tuple_a_hint);
+ field_b = tuple_field_raw_by_part_multikey(format_b,
+ tuple_b_raw, field_map_b, part,
+ (int)tuple_b_hint);
+ } else if (has_json_paths) {
field_a = tuple_field_raw_by_part(format_a, tuple_a_raw,
field_map_a, part);
field_b = tuple_field_raw_by_part(format_b, tuple_b_raw,
@@ -574,24 +591,28 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_def *key_def)
{
return tuple_compare_slowpath_hinted<is_nullable, has_optional_parts,
- has_json_paths>(tuple_a, HINT_NONE, tuple_b, HINT_NONE,
- key_def);
+ has_json_paths, false>(tuple_a, HINT_NONE, tuple_b,
+ HINT_NONE, key_def);
}
-template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths,
+ bool is_multikey>
static inline int
tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
hint_t tuple_hint, const char *key, uint32_t part_count,
hint_t key_hint, struct key_def *key_def)
{
+ (void)key_hint;
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
assert(key != NULL || part_count == 0);
assert(part_count <= key_def->part_count);
+ assert(key_def_is_multikey(key_def) == is_multikey);
+ assert(!is_multikey || is_multikey == has_json_paths);
int rc = 0;
- if ((rc = hint_cmp(tuple_hint, key_hint)) != 0)
+ if (!is_multikey && (rc = hint_cmp(tuple_hint, key_hint)) != 0)
return rc;
struct key_part *part = key_def->parts;
struct tuple_format *format = tuple_format(tuple);
@@ -600,7 +621,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
enum mp_type a_type, b_type;
if (likely(part_count == 1)) {
const char *field;
- if (has_json_paths) {
+ if (is_multikey) {
+ field = tuple_field_raw_by_part_multikey(format,
+ tuple_raw, field_map, part,
+ (int)tuple_hint);
+ } else if (has_json_paths) {
field = tuple_field_raw_by_part(format, tuple_raw,
field_map, part);
} else {
@@ -630,7 +655,11 @@ tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
struct key_part *end = part + part_count;
for (; part < end; ++part, mp_next(&key)) {
const char *field;
- if (has_json_paths) {
+ if (is_multikey) {
+ field = tuple_field_raw_by_part_multikey(format,
+ tuple_raw, field_map, part,
+ (int)tuple_hint);
+ } else if (has_json_paths) {
field = tuple_field_raw_by_part(format, tuple_raw,
field_map, part);
} else {
@@ -672,7 +701,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
uint32_t part_count, struct key_def *key_def)
{
return tuple_compare_with_key_slowpath_hinted<is_nullable,
- has_optional_parts, has_json_paths>(tuple,
+ has_optional_parts, has_json_paths, false>(tuple,
HINT_NONE, key, part_count,
HINT_NONE, key_def);
}
@@ -1301,16 +1330,22 @@ static const comparator_with_key_signature precompiled_cmp_wk_arr[] = {
#define COMPARE_SLOWPATH(...) \
{tuple_compare_slowpath<__VA_ARGS__>, \
tuple_compare_with_key_slowpath<__VA_ARGS__>, \
- tuple_compare_slowpath_hinted<__VA_ARGS__>, \
- tuple_compare_with_key_slowpath_hinted<__VA_ARGS__>, \
- __VA_ARGS__, false}
+ tuple_compare_slowpath_hinted<__VA_ARGS__, false>, \
+ tuple_compare_with_key_slowpath_hinted<__VA_ARGS__, false>, \
+ __VA_ARGS__, false, false}
+
+#define COMPARE_SLOWPATH_MULTIKEY(...) \
+ {NULL, NULL, \
+ tuple_compare_slowpath_hinted<__VA_ARGS__, true>, \
+ tuple_compare_with_key_slowpath_hinted<__VA_ARGS__, true>, \
+ __VA_ARGS__, false, true}
#define COMPARE_SEQUENTIAL(...) \
{tuple_compare_sequential<__VA_ARGS__>, \
tuple_compare_with_key_sequential<__VA_ARGS__>, \
tuple_compare_sequential_hinted<__VA_ARGS__>, \
tuple_compare_with_key_sequential_hinted<__VA_ARGS__>, \
- __VA_ARGS__, false, true}
+ __VA_ARGS__, false, true, false}
static struct {
tuple_compare_t tuple_compare;
@@ -1321,6 +1356,7 @@ static struct {
bool has_optional_parts;
bool has_json_paths;
bool is_sequential;
+ bool is_multikey;
} cmp_arr[] = {
COMPARE_SLOWPATH(false, false, false),
COMPARE_SLOWPATH(true, false, false),
@@ -1334,10 +1370,19 @@ static struct {
COMPARE_SEQUENTIAL(true, false),
COMPARE_SEQUENTIAL(false, true),
COMPARE_SEQUENTIAL(true, true),
+ COMPARE_SLOWPATH_MULTIKEY(false, false, false),
+ COMPARE_SLOWPATH_MULTIKEY(true, false, false),
+ COMPARE_SLOWPATH_MULTIKEY(false, true, false),
+ COMPARE_SLOWPATH_MULTIKEY(true, true, false),
+ COMPARE_SLOWPATH_MULTIKEY(false, false, true),
+ COMPARE_SLOWPATH_MULTIKEY(true, false, true),
+ COMPARE_SLOWPATH_MULTIKEY(false, true, true),
+ COMPARE_SLOWPATH_MULTIKEY(true, true, true),
};
#undef COMPARE_SLOWPATH
#undef COMPARE_SEQUENTIAL
+#undef COMPARE_SLOWPATH_MULTIKEY
/* }}} tuple_compare_with_key */
@@ -1571,8 +1616,10 @@ key_def_set_compare_func(struct key_def *def)
def->tuple_compare_with_key_hinted = NULL;
def->key_hint = NULL;
def->tuple_hint = NULL;
+ bool is_multikey = key_def_is_multikey(def);
if (!def->is_nullable && !key_def_has_collation(def) &&
!def->has_json_paths) {
+ assert(!is_multikey);
/* Precalculated comparators don't use collation */
for (uint32_t k = 0; k < sizeof(precompiled_cmp_arr) /
sizeof(precompiled_cmp_arr[0]); k++) {
@@ -1611,7 +1658,8 @@ key_def_set_compare_func(struct key_def *def)
}
}
}
- if (def->tuple_compare == NULL || def->tuple_compare_with_key == NULL) {
+ if (def->tuple_compare_hinted == NULL ||
+ def->tuple_compare_with_key_hinted == NULL) {
for (uint32_t k = 0; k < sizeof(cmp_arr) /
sizeof(cmp_arr[0]); k++) {
if (def->is_nullable == cmp_arr[k].is_nullable &&
@@ -1619,7 +1667,8 @@ key_def_set_compare_func(struct key_def *def)
cmp_arr[k].has_optional_parts &&
def->has_json_paths == cmp_arr[k].has_json_paths &&
key_def_is_sequential(def) ==
- cmp_arr[k].is_sequential) {
+ cmp_arr[k].is_sequential &&
+ is_multikey == cmp_arr[k].is_multikey) {
if (def->tuple_compare == NULL) {
def->tuple_compare =
cmp_arr[k].tuple_compare;
@@ -1638,8 +1687,8 @@ key_def_set_compare_func(struct key_def *def)
}
}
}
- assert(def->tuple_compare != NULL &&
- def->tuple_compare_with_key != NULL);
+ assert(def->tuple_compare_hinted != NULL &&
+ def->tuple_compare_with_key_hinted != NULL);
for (uint32_t k = 0; k < sizeof(hint_arr) / sizeof(hint_arr[0]); k++) {
if (def->parts->type == hint_arr[k].type &&
key_part_is_nullable(def->parts) ==
--git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
index 0a8337102..00bcfacdd 100644
--- a/src/box/tuple_extract_key.cc
+++ b/src/box/tuple_extract_key.cc
@@ -310,7 +310,7 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end,
const char *src_end = field_end;
if (has_json_paths && part->path != NULL) {
if (tuple_go_to_path(&src, part->path,
- part->path_len) != 0) {
+ part->path_len, -1) != 0) {
/*
* The path must be correct as
* it has already been validated
diff --git a/src/box/tuple_field_map.c b/src/box/tuple_field_map.c
index ea77745aa..e76068619 100644
--- a/src/box/tuple_field_map.c
+++ b/src/box/tuple_field_map.c
@@ -36,6 +36,8 @@ int
field_map_builder_create(struct field_map_builder *builder,
uint32_t field_map_size, struct region *region)
{
+ builder->region = region;
+ builder->extents_size = 0;
builder->item_count = field_map_size / sizeof(uint32_t);
if (field_map_size == 0) {
builder->items = NULL;
@@ -51,3 +53,28 @@ field_map_builder_create(struct field_map_builder *builder,
builder->items = (field_map_builder_item *)((char *)builder->items + sz);
return 0;
}
+
+struct field_map_ext *
+field_map_builder_ext_get(struct field_map_builder *builder, int32_t offset_slot,
+ uint32_t extent_items)
+{
+ struct field_map_ext *extent;
+ if (builder->items[offset_slot].has_extent) {
+ extent = builder->items[offset_slot].extent;
+ assert(extent != NULL);
+ assert(extent->items == extent_items);
+ return extent;
+ }
+ uint32_t sz = field_map_ext_size(extent_items);
+ extent = region_alloc(builder->region, sz);
+ if (extent == NULL) {
+ diag_set(OutOfMemory, sz, "region", "extent");
+ return NULL;
+ }
+ memset(extent, 0, sz);
+ extent->items = extent_items;
+ builder->items[offset_slot].extent = extent;
+ builder->items[offset_slot].has_extent = true;
+ builder->extents_size += sz;
+ return extent;
+}
diff --git a/src/box/tuple_field_map.h b/src/box/tuple_field_map.h
index 3a9927faa..042209a80 100644
--- a/src/box/tuple_field_map.h
+++ b/src/box/tuple_field_map.h
@@ -31,11 +31,48 @@
* SUCH DAMAGE.
*/
#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
struct region;
+/** The field_map extent. */
+struct field_map_ext {
+ /** Count of field_map_ext::offset[] items. */
+ uint32_t items;
+ /** Data offset in tuple array. */
+ uint32_t offset[0];
+};
+
+/** Return field_map extention allocation size. */
+static inline uint32_t
+field_map_ext_size(uint32_t items)
+{
+ return sizeof(struct field_map_ext) + items * sizeof(uint32_t);
+}
+
+/** Return field_map extention for field_map and offset_slot. */
+static inline struct field_map_ext *
+field_map_ext_get(const uint32_t *field_map, int32_t offset_slot)
+{
+ return (struct field_map_ext *)((char *)field_map -
+ field_map[offset_slot]);
+}
+
/** Preliminary field_map atom. */
-typedef uint32_t field_map_builder_item;
+typedef struct {
+ /**
+ * True when this slot must be interpret as
+ * extention pointer.
+ */
+ bool has_extent;
+ union {
+ /** Data offset in tuple. */
+ uint32_t offset;
+ /** Pointer to field_map_ext extention. */
+ struct field_map_ext *extent;
+ };
+} field_map_builder_item;
/** A class that contains a preliminary view of the field_map. */
struct field_map_builder {
@@ -49,8 +86,20 @@ struct field_map_builder {
* count of field_map_builder::items.
*/
uint32_t item_count;
+ /**
+ * Total size of memory is allocated for field_map
+ * extentions.
+ */
+ uint32_t extents_size;
+ /** Region to use for allocations. */
+ struct region *region;
};
+/** Get or allocate field_map_ext for offset_slot. */
+struct field_map_ext *
+field_map_builder_ext_get(struct field_map_builder *builder,
+ int32_t offset_slot, uint32_t extent_items);
+
/**
* Initialize field_map_builder to prepare field_map of size
* field_map_size. Use region for temporary allocations.
@@ -65,22 +114,50 @@ field_map_builder_slot_set(struct field_map_builder *builder,
int32_t offset_slot, uint32_t offset)
{
assert(offset_slot < 0);
- builder->items[offset_slot] = offset;
+ builder->items[offset_slot].offset = offset;
+}
+
+/** Initialize corresponding field_map_ext with offset value. */
+static inline int
+field_map_builder_ext_slot_set(struct field_map_builder *builder,
+ int32_t offset_slot, int32_t extent_slot,
+ uint32_t extent_items, uint32_t offset)
+{
+ assert(offset_slot < 0 && extent_slot >= 0 && extent_items > 0);
+ struct field_map_ext *extent =
+ field_map_builder_ext_get(builder, offset_slot, extent_items);
+ if (extent == NULL)
+ return -1;
+ assert(extent->items == extent_items);
+ extent->offset[extent_slot] = offset;
+ return 0;
}
/** Return built field_map size. */
static inline uint32_t
field_map_builder_size(struct field_map_builder *builder)
{
- return builder->item_count * sizeof(uint32_t);
+ return builder->item_count * sizeof(uint32_t) + builder->extents_size;
}
/** Write build field_map size in the preallocated buffer wptr. */
static inline void
field_map_builder_build(struct field_map_builder *builder, char *wptr)
{
- uint32_t field_map_size = field_map_builder_size(builder);
- memcpy(wptr, (char *)builder->items - field_map_size, field_map_size);
+ uint32_t *field_map =
+ (uint32_t *)(wptr + field_map_builder_size(builder));
+ char *extent_wptr = wptr + builder->extents_size;
+ for (int32_t i = -1; i >= -(int32_t)builder->item_count; i--) {
+ if (!builder->items[i].has_extent) {
+ field_map[i] = builder->items[i].offset;
+ continue;
+ }
+ struct field_map_ext *extent = builder->items[i].extent;
+ uint32_t sz = field_map_ext_size(extent->items);
+ extent_wptr -= sz;
+ field_map[i] = (char *)field_map - (char *)extent_wptr;
+ memcpy(extent_wptr, extent, sz);
+ }
}
#endif /* TARANTOOL_BOX_FIELD_MAP_H_INCLUDED */
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1d765c7c7..60cca677b 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -30,6 +30,7 @@
*/
#include "bit/bit.h"
#include "fiber.h"
+#include "tuple.h"
#include "json/json.h"
#include "tuple_format.h"
#include "coll_id_cache.h"
@@ -194,6 +195,50 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
return NULL;
}
+/**
+ * Check if child_field may be attached to parent_field,
+ * update the parent_field container type if required.
+ */
+static int
+tuple_format_field_update_type(struct tuple_field *parent_field,
+ struct tuple_field *child_field)
+{
+ enum field_type expected_type =
+ child_field->token.type == JSON_TOKEN_STR ?
+ FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
+ if (child_field->token.type == JSON_TOKEN_ANY &&
+ !json_token_is_multikey(&parent_field->token) &&
+ !json_token_is_leaf(&parent_field->token)) {
+ assert(expected_type == FIELD_TYPE_ARRAY);
+ diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+ tt_sprintf("field %s is already refer to document by "
+ "identifier and cannot use array index "
+ "placeholder [*]",
+ tuple_field_path(parent_field)));
+ return -1;
+ }
+ if (json_token_is_multikey(&parent_field->token) &&
+ child_field->token.type != JSON_TOKEN_ANY) {
+ assert(expected_type == FIELD_TYPE_ARRAY);
+ diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+ tt_sprintf("field %s is already used with array index "
+ "placeholder [*] and cannot refer to "
+ "document by identifier",
+ tuple_field_path(parent_field)));
+ return -1;
+ }
+ if (field_type1_contains_type2(parent_field->type, expected_type)) {
+ parent_field->type = expected_type;
+ } else {
+ diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
+ tuple_field_path(parent_field),
+ field_type_strs[parent_field->type],
+ field_type_strs[expected_type]);
+ return -1;
+ }
+ return 0;
+}
+
/**
* Given a field number and a path, add the corresponding field
* to the tuple format, allocating intermediate fields if
@@ -228,29 +273,16 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
*path_pool += path_len;
int rc = 0;
- uint32_t token_count = 0;
+ uint32_t token_count = 0, multikey_token_count = 0;
struct json_tree *tree = &format->fields;
struct json_lexer lexer;
json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 &&
field->token.type != JSON_TOKEN_END) {
- if (field->token.type == JSON_TOKEN_ANY) {
- diag_set(ClientError, ER_UNSUPPORTED,
- "Tarantool", "multikey indexes");
- goto fail;
- }
- enum field_type expected_type =
- field->token.type == JSON_TOKEN_STR ?
- FIELD_TYPE_MAP : FIELD_TYPE_ARRAY;
- if (field_type1_contains_type2(parent->type, expected_type)) {
- parent->type = expected_type;
- } else {
- diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH,
- tuple_field_path(parent),
- field_type_strs[parent->type],
- field_type_strs[expected_type]);
+ if (tuple_format_field_update_type(parent, field) != 0)
goto fail;
- }
+ if (field->token.type == JSON_TOKEN_ANY)
+ multikey_token_count++;
struct tuple_field *next =
json_tree_lookup_entry(tree, &parent->token,
&field->token,
@@ -268,6 +300,18 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
if (field == NULL)
goto fail;
}
+ if (json_token_is_multikey(&parent->token) &&
+ parent->offset_slot == TUPLE_OFFSET_SLOT_NIL) {
+ /**
+ * Allocate offset slot for array is used
+ * in multikey index. This is required to
+ * quickly extract its size.
+ * @see tuple_field_multikey_items()
+ */
+ assert(parent->type == FIELD_TYPE_ARRAY);
+ *current_slot = *current_slot - 1;
+ parent->offset_slot = *current_slot;
+ }
parent->is_key_part = true;
parent = next;
token_count++;
@@ -280,6 +324,13 @@ tuple_format_add_field(struct tuple_format *format, uint32_t fieldno,
assert(parent != NULL);
/* Update tree depth information. */
format->fields_depth = MAX(format->fields_depth, token_count + 1);
+ if (multikey_token_count > 1) {
+ assert(path_len > 0);
+ diag_set(ClientError, ER_INDEX_MULTIKEY_INVALID,
+ tt_sprintf("no more than one array index placeholder "
+ "[*] is allowed in JSON path"));
+ goto fail;
+ }
cleanup:
tuple_field_delete(field);
end:
@@ -782,6 +833,7 @@ tuple_field_map_initialize(struct field_map_builder *builder,
* validations and field map initialization.
*/
uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+ struct mp_frame *mk_parent_frame = NULL;
struct mp_frame *frames = region_alloc(region, frames_sz);
if (frames == NULL) {
diag_set(OutOfMemory, frames_sz, "region", "frames");
@@ -805,6 +857,10 @@ tuple_field_map_initialize(struct field_map_builder *builder,
mp_stack_pop(&stack);
if (mp_stack_is_empty(&stack))
goto end;
+ if (json_token_is_multikey(parent)) {
+ /* Leave multikey index branch. */
+ mk_parent_frame = NULL;
+ }
parent = parent->parent;
}
/*
@@ -855,7 +911,16 @@ tuple_field_map_initialize(struct field_map_builder *builder,
field_type_strs[field->type]);
return -1;
}
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL &&
+ mk_parent_frame != NULL) {
+ int multikey_idx = mk_parent_frame->curr;
+ uint32_t multikey_items = mk_parent_frame->count;
+ if (field_map_builder_ext_slot_set(builder,
+ field->offset_slot, multikey_idx,
+ multikey_items,
+ *pos - tuple) != 0)
+ return -1;
+ } else if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
field_map_builder_slot_set(builder,
field->offset_slot,
*pos - tuple);
@@ -876,6 +941,10 @@ tuple_field_map_initialize(struct field_map_builder *builder,
mp_decode_array(pos) :
mp_decode_map(pos);
mp_stack_push(&stack, type, size);
+ if (json_token_is_multikey(&field->token)) {
+ assert(type == MP_ARRAY);
+ mk_parent_frame = &frames[stack.used - 1];
+ }
parent = &field->token;
} else {
mp_next(pos);
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 78f29c624..39ba76394 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -699,6 +699,12 @@ vinyl_space_check_index_def(struct space *space, struct index_def *index_def)
return -1;
}
}
+ if (key_def_is_multikey(index_def->key_def)) {
+ diag_set(ClientError, ER_MODIFY_INDEX,
+ index_def->name, space_name(space),
+ "vinyl space index cannot be multikey");
+ return -1;
+ }
return 0;
}
diff --git a/test/box/misc.result b/test/box/misc.result
index 9f0b2c73a..c0afc7e64 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -515,6 +515,7 @@ t;
184: box.error.SQL_UNRECOGNIZED_SYNTAX
185: box.error.SQL_UNKNOWN_TOKEN
186: box.error.SQL_PARSER_GENERIC
+ 187: box.error.INDEX_MULTIKEY_INVALID
...
test_run:cmd("setopt delimiter ''");
---
diff --git a/test/engine/json.result b/test/engine/json.result
index a790cdfbc..1bac85edd 100644
--- a/test/engine/json.result
+++ b/test/engine/json.result
@@ -683,16 +683,3 @@ town:select()
s:drop()
---
...
---
--- gh-1260: Multikey indexes
---
-s = box.schema.space.create('withdata')
----
-...
-idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
----
-- error: Tarantool does not support multikey indexes
-...
-s:drop()
----
-...
diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua
index f9a7180b1..9afa3daa2 100644
--- a/test/engine/json.test.lua
+++ b/test/engine/json.test.lua
@@ -192,10 +192,3 @@ town:select()
name:drop()
town:select()
s:drop()
-
---
--- gh-1260: Multikey indexes
---
-s = box.schema.space.create('withdata')
-idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
-s:drop()
diff --git a/test/engine/multikey_idx.result b/test/engine/multikey_idx.result
new file mode 100644
index 000000000..f41ddafa8
--- /dev/null
+++ b/test/engine/multikey_idx.result
@@ -0,0 +1,228 @@
+test_run = require('test_run').new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata', {engine = 'vinyl'})
+---
+...
+-- Vinyl's space can't be multikey (yet).
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key
+ cannot be multikey'
+...
+s:drop()
+---
+...
+s = box.schema.space.create('withdata', {engine = 'memtx'})
+---
+...
+-- Primary index must be unique so it can't be multikey.
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': primary key
+ cannot be multikey'
+...
+pk = s:create_index('pk')
+---
+...
+-- Only tree index type may be mutlikey.
+_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': HASH index
+ cannot be multikey'
+...
+_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': BITSET index
+ cannot be multikey'
+...
+_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}})
+---
+- error: 'Can''t create or modify index ''idx'' in space ''withdata'': RTREE index
+ cannot be multikey'
+...
+-- Test incompatible multikey index parts.
+_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}})
+---
+- error: 'Can''t create or modify index ''idx3'' in space ''withdata'': multikey index
+ parts are incompatible'
+...
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}})
+---
+- error: 'Multikey index is invalid: no more than one array index placeholder [*]
+ is allowed in JSON path'
+...
+idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}})
+---
+...
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+- error: 'Multikey index is invalid: field 3 is already refer to document by identifier
+ and cannot use array index placeholder [*]'
+...
+idx0:drop()
+---
+...
+-- Unique multikey index.
+idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+---
+...
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}})
+---
+- error: 'Multikey index is invalid: field 3 is already used with array index placeholder
+ [*] and cannot refer to document by identifier'
+...
+s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}})
+---
+- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}})
+---
+- error: Duplicate key exists in unique index 'arr_idx' in space 'withdata'
+...
+-- Non-unique multikey index; two multikey indexes per space.
+arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}})
+---
+...
+arr_idx:select()
+---
+- - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:get({'James', 'Bond'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:get({'Ivan', 'Ivanych'})
+---
+- [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+idx:get({'Vasya', 'Pupkin'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+s:insert({4, {1}, {{fname='James', sname='Bond'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+-- Duplicates in multikey parts.
+s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}})
+---
+- error: Duplicate key exists in unique index 'idx' in space 'withdata'
+...
+idx:get({'A', 'B'})
+---
+...
+idx:get({'C', 'D'})
+---
+...
+idx:delete({'Vasya', 'Pupkin'})
+---
+- [1, [1, 2, 3], [{'fname': 'James', 'sname': 'Bond'}, {'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+---
+- [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+s:insert({7, {1}, {{fname='James', sname='Bond'}}})
+---
+- [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+arr_idx:select({1})
+---
+- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+ - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+-- Snapshot & recovery.
+box.snapshot()
+---
+- ok
+...
+test_run:cmd("restart server default")
+s = box.space["withdata"]
+---
+...
+idx = s.index["idx"]
+---
+...
+arr_idx = s.index["arr_idx"]
+---
+...
+s:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+...
+idx:select()
+---
+- - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+ - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+...
+arr_idx:select()
+---
+- - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [7, [1], [{'fname': 'James', 'sname': 'Bond'}]]
+ - [6, [1, 2], [{'fname': 'Vasya', 'sname': 'Pupkin'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+ - [2, [3, 4, 5], [{'fname': 'Ivan', 'sname': 'Ivanych'}]]
+...
+s:drop()
+---
+...
+-- Assymetric multikey index paths.
+s = box.schema.space.create('withdata')
+---
+...
+pk = s:create_index('pk')
+---
+...
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+---
+...
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+---
+- [1, 1, [{'fname': 'A1', 'extra': {'sname': 'A2'}}, {'fname': 'B1'}, {'fname': 'C1',
+ 'extra': {'sname': 'C2'}}]]
+...
+s:drop()
+---
+...
diff --git a/test/engine/multikey_idx.test.lua b/test/engine/multikey_idx.test.lua
new file mode 100644
index 000000000..2b25063cf
--- /dev/null
+++ b/test/engine/multikey_idx.test.lua
@@ -0,0 +1,67 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+
+--
+-- gh-1260: Multikey indexes
+--
+s = box.schema.space.create('withdata', {engine = 'vinyl'})
+-- Vinyl's space can't be multikey (yet).
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+s:drop()
+
+s = box.schema.space.create('withdata', {engine = 'memtx'})
+-- Primary index must be unique so it can't be multikey.
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}}})
+pk = s:create_index('pk')
+-- Only tree index type may be mutlikey.
+_ = s:create_index('idx', {type = 'hash', unique = true, parts = {{3, 'str', path = '[*].fname'}}})
+_ = s:create_index('idx', {type = 'bitset', unique = false, parts = {{3, 'str', path = '[*].fname'}}})
+_ = s:create_index('idx', {type = 'rtree', unique = false, parts = {{3, 'array', path = '[*].fname'}}})
+-- Test incompatible multikey index parts.
+_ = s:create_index('idx3', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '["data"][*].sname'}}})
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname[*].a'}}})
+idx0 = s:create_index('idx0', {parts = {{3, 'str', path = '[1].fname'}}})
+_ = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+idx0:drop()
+-- Unique multikey index.
+idx = s:create_index('idx', {unique = true, parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].sname'}}})
+_ = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}})
+s:insert({1, {1, 2, 3}, {{fname='James', sname='Bond'}, {fname='Vasya', sname='Pupkin'}}})
+s:insert({2, {3, 4, 5}, {{fname='Ivan', sname='Ivanych'}}})
+_ = s:create_index('arr_idx', {unique = true, parts = {{2, 'unsigned', path = '[*]'}}})
+-- Non-unique multikey index; two multikey indexes per space.
+arr_idx = s:create_index('arr_idx', {unique = false, parts = {{2, 'unsigned', path = '[*]'}}})
+arr_idx:select()
+idx:get({'James', 'Bond'})
+idx:get({'Ivan', 'Ivanych'})
+idx:get({'Vasya', 'Pupkin'})
+idx:select()
+s:insert({3, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({4, {1}, {{fname='James', sname='Bond'}}})
+idx:select()
+-- Duplicates in multikey parts.
+s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}})
+idx:get({'A', 'B'})
+idx:get({'C', 'D'})
+idx:delete({'Vasya', 'Pupkin'})
+s:insert({6, {1, 2}, {{fname='Vasya', sname='Pupkin'}}})
+s:insert({7, {1}, {{fname='James', sname='Bond'}}})
+arr_idx:select({1})
+idx:select()
+-- Snapshot & recovery.
+box.snapshot()
+test_run:cmd("restart server default")
+s = box.space["withdata"]
+idx = s.index["idx"]
+arr_idx = s.index["arr_idx"]
+s:select()
+idx:select()
+arr_idx:select()
+s:drop()
+
+-- Assymetric multikey index paths.
+s = box.schema.space.create('withdata')
+pk = s:create_index('pk')
+idx = s:create_index('idx', {parts = {{3, 'str', path = '[*].fname'}, {3, 'str', path = '[*].extra.sname', is_nullable = true}}})
+s:insert({1, 1, {{fname='A1', extra={sname='A2'}}, {fname='B1'}, {fname='C1', extra={sname='C2'}}}})
+s:drop()
--
2.21.0
next prev parent reply other threads:[~2019-03-19 12:32 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-19 12:32 [PATCH v2 0/5] " Kirill Shcherbatov
2019-03-19 12:32 ` [PATCH v2 1/5] lib: introduce json_path_is_multikey helper Kirill Shcherbatov
2019-03-19 15:43 ` [tarantool-patches] " Konstantin Osipov
2019-04-02 15:49 ` [tarantool-patches] " Kirill Shcherbatov
2019-03-19 12:32 ` [PATCH v2 2/5] lib: introduce is_weak_cmp flag for json_path_cmp Kirill Shcherbatov
2019-03-19 15:47 ` [tarantool-patches] " Konstantin Osipov
2019-03-19 12:32 ` [PATCH v2 3/5] box: move offset_slot init to tuple_format_add_field Kirill Shcherbatov
2019-03-19 12:32 ` [PATCH v2 4/5] box: introduce field_map_builder for field_map init Kirill Shcherbatov
2019-03-19 12:32 ` Kirill Shcherbatov [this message]
2019-03-19 16:05 ` [tarantool-patches] [PATCH v2 5/5] box: introduce multikey indexes in memtx Kirill Shcherbatov
2019-03-21 12:35 ` Kirill Shcherbatov
2019-03-28 10:21 ` [PATCH v2 0/5] " Vladimir Davydov
2019-04-02 15:49 ` [tarantool-patches] " Kirill Shcherbatov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5cc1d6c549a630d4925475de757baed71d5b5b1d.1552998554.git.kshcherbatov@tarantool.org \
--to=kshcherbatov@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=vdavydov.dev@gmail.com \
--subject='Re: [PATCH v2 5/5] box: introduce multikey indexes in memtx' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox