From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id A03232EDC1 for ; Mon, 26 Nov 2018 05:49:58 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Vd8RfRfaPQGE for ; Mon, 26 Nov 2018 05:49:58 -0500 (EST) Received: from smtp35.i.mail.ru (smtp35.i.mail.ru [94.100.177.95]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id D09A52EDAD for ; Mon, 26 Nov 2018 05:49:56 -0500 (EST) From: Kirill Shcherbatov Subject: [tarantool-patches] [PATCH v5 5/9] box: introduce JSON indexes Date: Mon, 26 Nov 2018 13:49:39 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov New JSON-path-based indexes allows to index documents content. As we need to store user-defined JSON path in key_part and key_part_def, we have introduced path and path_len fields. JSON path is verified and transformed to canonical form on index msgpack unpack. Path string stored as a part of the key_def allocation: +-------+---------+-------+---------+-------+-------+-------+ |key_def|key_part1| ... |key_partN| path1 | pathK | pathN | +-------+---------+-------+---------+-------+-------+-------+ | ^ |-> path _________________| With format creation JSON paths are stored at the end of format allocation: +------------+------------+-------+------------+-------+ |tuple_format|tuple_field1| ... |tuple_fieldN| pathK | +------------+------------+-------+------------+-------+ Part of #1012 --- src/box/errcode.h | 2 +- src/box/index_def.c | 8 +- src/box/key_def.c | 164 +++++++++++++--- src/box/key_def.h | 23 ++- src/box/lua/space.cc | 5 + src/box/memtx_engine.c | 3 + src/box/sql.c | 1 + src/box/sql/build.c | 1 + src/box/sql/select.c | 6 +- src/box/sql/where.c | 1 + src/box/tuple.c | 38 +--- src/box/tuple_compare.cc | 13 +- src/box/tuple_extract_key.cc | 21 ++- src/box/tuple_format.c | 439 ++++++++++++++++++++++++++++++++++++------- src/box/tuple_format.h | 38 +++- src/box/tuple_hash.cc | 2 +- src/box/vinyl.c | 3 + src/box/vy_log.c | 3 +- src/box/vy_point_lookup.c | 2 - src/box/vy_stmt.c | 166 +++++++++++++--- test/box/misc.result | 1 + test/engine/tuple.result | 416 ++++++++++++++++++++++++++++++++++++++++ test/engine/tuple.test.lua | 121 ++++++++++++ 23 files changed, 1306 insertions(+), 171 deletions(-) diff --git a/src/box/errcode.h b/src/box/errcode.h index 73359eb..2f979ab 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -138,7 +138,7 @@ struct errcode_record { /* 83 */_(ER_ROLE_EXISTS, "Role '%s' already exists") \ /* 84 */_(ER_CREATE_ROLE, "Failed to create role '%s': %s") \ /* 85 */_(ER_INDEX_EXISTS, "Index '%s' already exists") \ - /* 86 */_(ER_UNUSED6, "") \ + /* 86 */_(ER_DATA_STRUCTURE_MISMATCH, "Tuple doesn't math document structure: %s") \ /* 87 */_(ER_ROLE_LOOP, "Granting role '%s' to role '%s' would create a loop") \ /* 88 */_(ER_GRANT, "Incorrect grant arguments: %s") \ /* 89 */_(ER_PRIV_GRANTED, "User '%s' already has %s access on %s '%s'") \ diff --git a/src/box/index_def.c b/src/box/index_def.c index 45c74d9..de4ea85 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -31,6 +31,7 @@ #include "index_def.h" #include "schema_def.h" #include "identifier.h" +#include "json/json.h" const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" }; @@ -298,8 +299,11 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) * Courtesy to a user who could have made * a typo. */ - if (index_def->key_def->parts[i].fieldno == - index_def->key_def->parts[j].fieldno) { + struct key_part *part_a = &index_def->key_def->parts[i]; + struct key_part *part_b = &index_def->key_def->parts[j]; + if (part_a->fieldno == part_b->fieldno && + json_path_cmp(part_a->path, part_a->path_len, + part_b->path, part_b->path_len) == 0){ diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, "same key part is indexed twice"); diff --git a/src/box/key_def.c b/src/box/key_def.c index 2119ca3..bc6cecd 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -28,6 +28,8 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "fiber.h" +#include "json/json.h" #include "key_def.h" #include "tuple_compare.h" #include "tuple_extract_key.h" @@ -44,7 +46,8 @@ const struct key_part_def key_part_def_default = { COLL_NONE, false, ON_CONFLICT_ACTION_DEFAULT, - SORT_ORDER_ASC + SORT_ORDER_ASC, + NULL }; static int64_t @@ -59,6 +62,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len) #define PART_OPT_NULLABILITY "is_nullable" #define PART_OPT_NULLABLE_ACTION "nullable_action" #define PART_OPT_SORT_ORDER "sort_order" +#define PART_OPT_PATH "path" const struct opt_def part_def_reg[] = { OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type, @@ -71,6 +75,7 @@ const struct opt_def part_def_reg[] = { struct key_part_def, nullable_action, NULL), OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def, sort_order, NULL), + OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path), OPT_END, }; @@ -106,13 +111,25 @@ const uint32_t key_mp_type[] = { struct key_def * key_def_dup(const struct key_def *src) { - size_t sz = key_def_sizeof(src->part_count); - struct key_def *res = (struct key_def *)malloc(sz); + const struct key_part *parts = src->parts; + const struct key_part *parts_end = parts + src->part_count; + size_t sz = 0; + for (; parts < parts_end; parts++) + sz += parts->path != NULL ? parts->path_len + 1 : 0; + sz = key_def_sizeof(src->part_count, sz); + struct key_def *res = (struct key_def *)calloc(1, sz); if (res == NULL) { diag_set(OutOfMemory, sz, "malloc", "res"); return NULL; } memcpy(res, src, sz); + /* Update paths to point to the new memory chunk.*/ + for (uint32_t i = 0; i < src->part_count; i++) { + if (src->parts[i].path == NULL) + continue; + size_t path_offset = src->parts[i].path - (char *)src; + res->parts[i].path = (char *)res + path_offset; + } return res; } @@ -120,8 +137,23 @@ void key_def_swap(struct key_def *old_def, struct key_def *new_def) { assert(old_def->part_count == new_def->part_count); - for (uint32_t i = 0; i < new_def->part_count; i++) - SWAP(old_def->parts[i], new_def->parts[i]); + for (uint32_t i = 0; i < new_def->part_count; i++) { + if (old_def->parts[i].path == NULL) { + SWAP(old_def->parts[i], new_def->parts[i]); + } else { + /* + * Since the data is located in memory + * in the same order (otherwise rebuild + * would be called), just update the + * pointers. + */ + size_t path_offset = + old_def->parts[i].path - (char *)old_def; + SWAP(old_def->parts[i], new_def->parts[i]); + old_def->parts[i].path = (char *)old_def + path_offset; + new_def->parts[i].path = (char *)new_def + path_offset; + } + } SWAP(*old_def, *new_def); } @@ -144,24 +176,38 @@ static void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, enum on_conflict_action nullable_action, struct coll *coll, uint32_t coll_id, - enum sort_order sort_order) + enum sort_order sort_order, const char *path, + uint32_t path_len) { assert(part_no < def->part_count); assert(type < field_type_MAX); def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE); + def->has_json_paths |= path != NULL; def->parts[part_no].nullable_action = nullable_action; def->parts[part_no].fieldno = fieldno; def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; def->parts[part_no].sort_order = sort_order; + if (path != NULL) { + def->parts[part_no].path_len = path_len; + assert(def->parts[part_no].path != NULL); + memcpy(def->parts[part_no].path, path, path_len); + def->parts[part_no].path[path_len] = '\0'; + } else { + def->parts[part_no].path_len = 0; + def->parts[part_no].path = NULL; + } column_mask_set_fieldno(&def->column_mask, fieldno); } struct key_def * key_def_new(const struct key_part_def *parts, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + ssize_t sz = 0; + for (uint32_t i = 0; i < part_count; i++) + sz += parts[i].path != NULL ? strlen(parts[i].path) + 1 : 0; + sz = key_def_sizeof(part_count, sz); struct key_def *def = calloc(1, sz); if (def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -171,6 +217,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) def->part_count = part_count; def->unique_part_count = part_count; + char *data = (char *)def + key_def_sizeof(part_count, 0); for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; struct coll *coll = NULL; @@ -184,16 +231,23 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) } coll = coll_id->coll; } + uint32_t path_len = 0; + if (part->path != NULL) { + path_len = strlen(part->path); + def->parts[i].path = data; + data += path_len + 1; + } key_def_set_part(def, i, part->fieldno, part->type, part->nullable_action, coll, part->coll_id, - part->sort_order); + part->sort_order, part->path, path_len); } key_def_set_cmp(def); return def; } -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts) { for (uint32_t i = 0; i < def->part_count; i++) { const struct key_part *part = &def->parts[i]; @@ -203,13 +257,27 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) part_def->is_nullable = key_part_is_nullable(part); part_def->nullable_action = part->nullable_action; part_def->coll_id = part->coll_id; + if (part->path != NULL) { + char *path = region_alloc(pool, part->path_len + 1); + if (path == NULL) { + diag_set(OutOfMemory, part->path_len + 1, + "region_alloc", "part_def->path"); + return -1; + } + memcpy(path, part->path, part->path_len); + path[part->path_len] = '\0'; + part_def->path = path; + } else { + part_def->path = NULL; +} } + return 0; } box_key_def_t * box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + size_t sz = key_def_sizeof(part_count, 0); struct key_def *key_def = calloc(1, sz); if (key_def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -223,7 +291,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) key_def_set_part(key_def, item, fields[item], (enum field_type)types[item], ON_CONFLICT_ACTION_DEFAULT, - NULL, COLL_NONE, SORT_ORDER_ASC); + NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0); } key_def_set_cmp(key_def); return key_def; @@ -272,6 +340,10 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, if (key_part_is_nullable(part1) != key_part_is_nullable(part2)) return key_part_is_nullable(part1) < key_part_is_nullable(part2) ? -1 : 1; + int rc; + if ((rc = json_path_cmp(part1->path, part1->path_len, + part2->path, part2->path_len)) != 0) + return rc; } return part_count1 < part_count2 ? -1 : part_count1 > part_count2; } @@ -303,8 +375,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts, for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; assert(part->type < field_type_MAX); - SNPRINT(total, snprintf, buf, size, "%d, '%s'", - (int)part->fieldno, field_type_strs[part->type]); + if (part->path != NULL) { + SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'", + (int)part->fieldno, part->path, + field_type_strs[part->type]); + } else { + SNPRINT(total, snprintf, buf, size, "%d, '%s'", + (int)part->fieldno, + field_type_strs[part->type]); + } if (i < part_count - 1) SNPRINT(total, snprintf, buf, size, ", "); } @@ -323,6 +402,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; size += mp_sizeof_map(count); size += mp_sizeof_str(strlen(PART_OPT_FIELD)); size += mp_sizeof_uint(part->fieldno); @@ -337,6 +418,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY)); size += mp_sizeof_bool(part->is_nullable); } + if (part->path != NULL) { + size += mp_sizeof_str(strlen(PART_OPT_PATH)); + size += mp_sizeof_str(strlen(part->path)); + } } return size; } @@ -352,6 +437,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; data = mp_encode_map(data, count); data = mp_encode_str(data, PART_OPT_FIELD, strlen(PART_OPT_FIELD)); @@ -371,6 +458,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, strlen(PART_OPT_NULLABILITY)); data = mp_encode_bool(data, part->is_nullable); } + if (part->path != NULL) { + data = mp_encode_str(data, PART_OPT_PATH, + strlen(PART_OPT_PATH)); + data = mp_encode_str(data, part->path, + strlen(part->path)); + } } return data; } @@ -432,6 +525,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -445,6 +539,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, return key_def_decode_parts_166(parts, part_count, data, fields, field_count); } + struct region *region = &fiber()->gc; for (uint32_t i = 0; i < part_count; i++) { struct key_part_def *part = &parts[i]; if (mp_typeof(**data) != MP_MAP) { @@ -468,7 +563,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char *key = mp_decode_str(data, &key_len); if (opts_parse_key(part, part_def_reg, key, key_len, data, ER_WRONG_INDEX_OPTIONS, - i + TUPLE_INDEX_BASE, NULL, + i + TUPLE_INDEX_BASE, region, false) != 0) return -1; if (is_action_missing && @@ -533,7 +628,9 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find) const struct key_part *part = key_def->parts; const struct key_part *end = part + key_def->part_count; for (; part != end; part++) { - if (part->fieldno == to_find->fieldno) + if (part->fieldno == to_find->fieldno && + json_path_cmp(part->path, part->path_len, + to_find->path, to_find->path_len) == 0) return part; } return NULL; @@ -559,18 +656,27 @@ key_def_merge(const struct key_def *first, const struct key_def *second) * Find and remove part duplicates, i.e. parts counted * twice since they are present in both key defs. */ - const struct key_part *part = second->parts; - const struct key_part *end = part + second->part_count; + size_t sz = 0; + const struct key_part *part = first->parts; + const struct key_part *end = part + first->part_count; + for (; part != end; part++) { + if (part->path != NULL) + sz += part->path_len + 1; + } + part = second->parts; + end = part + second->part_count; for (; part != end; part++) { if (key_def_find(first, part) != NULL) --new_part_count; + else if (part->path != NULL) + sz += part->path_len + 1; } + sz = key_def_sizeof(new_part_count, sz); struct key_def *new_def; - new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count)); + new_def = (struct key_def *)calloc(1, sz); if (new_def == NULL) { - diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc", - "new_def"); + diag_set(OutOfMemory, sz, "malloc", "new_def"); return NULL; } new_def->part_count = new_part_count; @@ -578,15 +684,22 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->is_nullable = first->is_nullable || second->is_nullable; new_def->has_optional_parts = first->has_optional_parts || second->has_optional_parts; + /* Path data write position in the new key_def. */ + char *data = (char *)new_def + key_def_sizeof(new_part_count, 0); /* Write position in the new key def. */ uint32_t pos = 0; /* Append first key def's parts to the new index_def. */ part = first->parts; end = part + first->part_count; for (; part != end; part++) { + if (part->path != NULL) { + new_def->parts[pos].path = data; + data += part->path_len + 1; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len); } /* Set-append second key def's part to the new key def. */ @@ -595,9 +708,14 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { if (key_def_find(first, part) != NULL) continue; + if (part->path != NULL) { + new_def->parts[pos].path = data; + data += part->path_len + 1; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len); } key_def_set_cmp(new_def); return new_def; diff --git a/src/box/key_def.h b/src/box/key_def.h index d4da6c5..7731e48 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -68,6 +68,8 @@ struct key_part_def { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** JSON path to data. */ + const char *path; }; extern const struct key_part_def key_part_def_default; @@ -86,6 +88,13 @@ struct key_part { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** + * JSON path to data in 'canonical' form. + * Read json_path_normalize to get more details. + */ + char *path; + /** The length of JSON path. */ + uint32_t path_len; }; struct key_def; @@ -152,6 +161,8 @@ struct key_def { uint32_t unique_part_count; /** True, if at least one part can store NULL. */ bool is_nullable; + /** True, if some key part has JSON path. */ + bool has_json_paths; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -245,9 +256,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b, /** \endcond public */ static inline size_t -key_def_sizeof(uint32_t part_count) +key_def_sizeof(uint32_t part_count, uint32_t paths_size) { - return sizeof(struct key_def) + sizeof(struct key_part) * part_count; + return sizeof(struct key_def) + sizeof(struct key_part) * part_count + + paths_size; } /** @@ -260,8 +272,9 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count); /** * Dump part definitions of the given key def. */ -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts); /** * Update 'has_optional_parts' of @a key_def with correspondence @@ -368,6 +381,8 @@ key_validate_parts(const struct key_def *key_def, const char *key, static inline bool key_def_is_sequential(const struct key_def *key_def) { + if (key_def->has_json_paths) + return false; for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) { if (key_def->parts[part_id].fieldno != part_id) return false; diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc index 7cae436..a882a9d 100644 --- a/src/box/lua/space.cc +++ b/src/box/lua/space.cc @@ -296,6 +296,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i) lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE); lua_setfield(L, -2, "fieldno"); + if (part->path != NULL) { + lua_pushstring(L, part->path); + lua_setfield(L, -2, "path"); + } + lua_pushboolean(L, key_part_is_nullable(part)); lua_setfield(L, -2, "is_nullable"); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 28afb32..1bc46c6 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -1316,6 +1316,9 @@ memtx_index_def_change_requires_rebuild(struct index *index, return true; if (old_part->coll != new_part->coll) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len) != 0) + return true; } return false; } diff --git a/src/box/sql.c b/src/box/sql.c index 0e4e0f4..d199171 100644 --- a/src/box/sql.c +++ b/src/box/sql.c @@ -378,6 +378,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info) part->nullable_action = ON_CONFLICT_ACTION_NONE; part->is_nullable = true; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; if (def != NULL && i < def->part_count) part->coll_id = def->parts[i].coll_id; else diff --git a/src/box/sql/build.c b/src/box/sql/build.c index b5abaee..9f5d5aa 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -2423,6 +2423,7 @@ index_fill_def(struct Parse *parse, struct index *index, part->is_nullable = part->nullable_action == ON_CONFLICT_ACTION_NONE; part->sort_order = SORT_ORDER_ASC; part->coll_id = coll_id; + part->path = NULL; } key_def = key_def_new(key_parts, expr_list->nExpr); if (key_def == NULL) diff --git a/src/box/sql/select.c b/src/box/sql/select.c index ca709b4..0734712 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1349,6 +1349,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count) part->is_nullable = false; part->nullable_action = ON_CONFLICT_ACTION_ABORT; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; } return key_info; } @@ -1356,6 +1357,9 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count) struct sql_key_info * sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def) { + /** SQL key_parts could not have JSON paths. */ + for (uint32_t i = 0; i < key_def->part_count; i++) + assert(key_def->parts[i].path == NULL); struct sql_key_info *key_info = sqlite3DbMallocRawNN(db, sql_key_info_sizeof(key_def->part_count)); if (key_info == NULL) { @@ -1366,7 +1370,7 @@ sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def) key_info->key_def = NULL; key_info->refs = 1; key_info->part_count = key_def->part_count; - key_def_dump_parts(key_def, key_info->parts); + key_def_dump_parts(&fiber()->gc, key_def, key_info->parts); return key_info; } diff --git a/src/box/sql/where.c b/src/box/sql/where.c index 9c3462b..78f70f4 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -2807,6 +2807,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */ part.is_nullable = false; part.sort_order = SORT_ORDER_ASC; part.coll_id = COLL_NONE; + part.path = NULL; struct key_def *key_def = key_def_new(&part, 1); if (key_def == NULL) { diff --git a/src/box/tuple.c b/src/box/tuple.c index aae1c3c..62e06e7 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -138,38 +138,18 @@ runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple) int tuple_validate_raw(struct tuple_format *format, const char *tuple) { - if (tuple_format_field_count(format) == 0) - return 0; /* Nothing to check */ - - /* Check to see if the tuple has a sufficient number of fields. */ - uint32_t field_count = mp_decode_array(&tuple); - if (format->exact_field_count > 0 && - format->exact_field_count != field_count) { - diag_set(ClientError, ER_EXACT_FIELD_COUNT, - (unsigned) field_count, - (unsigned) format->exact_field_count); + struct region *region = &fiber()->gc; + uint32_t used = region_used(region); + uint32_t *field_map = region_alloc(region, format->field_map_size); + if (field_map == NULL) { + diag_set(OutOfMemory, format->field_map_size, "region_alloc", + "field_map"); return -1; } - if (unlikely(field_count < format->min_field_count)) { - diag_set(ClientError, ER_MIN_FIELD_COUNT, - (unsigned) field_count, - (unsigned) format->min_field_count); + field_map = (uint32_t *)((char *)field_map + format->field_map_size); + if (tuple_init_field_map(format, field_map, tuple, true) != 0) return -1; - } - - /* Check field types */ - struct tuple_field *field = tuple_format_field(format, 0); - uint32_t i = 0; - uint32_t defined_field_count = - MIN(field_count, tuple_format_field_count(format)); - for (; i < defined_field_count; ++i) { - field = tuple_format_field(format, i); - if (key_mp_type_validate(field->type, mp_typeof(*tuple), - ER_FIELD_TYPE, i + TUPLE_INDEX_BASE, - tuple_field_is_nullable(field))) - return -1; - mp_next(&tuple); - } + region_truncate(region, used); return 0; } diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index e21b009..554c29f 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); - if (key_def->part_count == 1 && part->fieldno == 0) { + if (key_def->part_count == 1 && part->fieldno == 0 && + part->path == NULL) { /* * First field can not be optional - empty tuples * can not exist. @@ -493,8 +494,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, } bool was_null_met = false; - const struct tuple_format *format_a = tuple_format(tuple_a); - const struct tuple_format *format_b = tuple_format(tuple_b); + struct tuple_format *format_a = tuple_format(tuple_a); + struct tuple_format *format_b = tuple_format(tuple_b); const uint32_t *field_map_a = tuple_field_map(tuple_a); const uint32_t *field_map_b = tuple_field_map(tuple_b); struct key_part *end; @@ -585,7 +586,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); struct key_part *part = key_def->parts; - const struct tuple_format *format = tuple_format(tuple); + struct tuple_format *format = tuple_format(tuple); const char *tuple_raw = tuple_data(tuple); const uint32_t *field_map = tuple_field_map(tuple); enum mp_type a_type, b_type; @@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) { @@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]); diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index e9d7cac..04c5463 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i) { uint32_t fieldno1 = def->parts[i].fieldno + 1; uint32_t fieldno2 = def->parts[i + 1].fieldno; - return fieldno1 == fieldno2; + return fieldno1 == fieldno2 && def->parts[i].path == NULL && + def->parts[i + 1].path == NULL; } /** True, if a key con contain two or more parts in sequence. */ @@ -111,7 +112,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple, const char *data = tuple_data(tuple); uint32_t part_count = key_def->part_count; uint32_t bsize = mp_sizeof_array(part_count); - const struct tuple_format *format = tuple_format(tuple); + struct tuple_format *format = tuple_format(tuple); const uint32_t *field_map = tuple_field_map(tuple); const char *tuple_end = data + tuple->bsize; @@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, if (!key_def_parts_are_sequential(key_def, i)) break; } - uint32_t end_fieldno = key_def->parts[i].fieldno; + const struct key_part *part = &key_def->parts[i]; + uint32_t end_fieldno = part->fieldno; if (fieldno < current_fieldno) { /* Rewind. */ @@ -283,6 +285,15 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, current_fieldno++; } } + const char *field_last, *field_end_last; + if (part->path != NULL) { + field_last = field; + field_end_last = field_end; + (void)tuple_field_go_to_path(&field, part->path, + part->path_len); + field_end = field; + mp_next(&field_end); + } memcpy(key_buf, field, field_end - field); key_buf += field_end - field; if (has_optional_parts && null_count != 0) { @@ -291,6 +302,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, } else { assert(key_buf - key <= data_end - data); } + if (part->path != NULL) { + field = field_last; + field_end = field_end_last; + } } if (key_size != NULL) *key_size = (uint32_t)(key_buf - key); diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 92028c5..193d0d8 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "fiber.h" #include "json/json.h" #include "tuple_format.h" #include "coll_id_cache.h" @@ -51,7 +52,8 @@ tuple_field_create(struct json_token *token) ret->offset_slot = TUPLE_OFFSET_SLOT_NIL; ret->coll_id = COLL_NONE; ret->nullable_action = ON_CONFLICT_ACTION_NONE; - ret->token = *token; + if (token != NULL) + ret->token = *token; return ret; } @@ -61,14 +63,114 @@ tuple_field_destroy(struct tuple_field *field) free(field); } +/** Build a JSON tree path for specified path. */ +static struct tuple_field * +tuple_field_tree_add_path(struct tuple_format *format, const char *path, + uint32_t path_len, uint32_t fieldno) +{ + int rc = 0; + struct json_tree *tree = &format->tree; + struct tuple_field *parent = tuple_format_field(format, fieldno); + struct tuple_field *field = tuple_field_create(NULL); + if (unlikely(field == NULL)) + goto end; + + struct json_lexer lexer; + bool is_last_new = false; + json_lexer_create(&lexer, path, path_len); + while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && + field->token.key.type != JSON_TOKEN_END) { + enum field_type iterm_node_type = + field->token.key.type == JSON_TOKEN_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (parent->type != FIELD_TYPE_ANY && + parent->type != iterm_node_type) { + const char *name = + tt_sprintf("[%d]%.*s", fieldno, path_len, path); + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, name, + field_type_strs[parent->type], + field_type_strs[iterm_node_type]); + parent = NULL; + goto end; + } + struct tuple_field *next = + json_tree_lookup_entry(tree, &parent->token, + &field->token, + struct tuple_field, token); + if (next == NULL) { + rc = json_tree_add(tree, &parent->token, &field->token); + if (unlikely(rc != 0)) { + diag_set(OutOfMemory, sizeof(struct json_token), + "json_tree_add", "tree"); + parent = NULL; + goto end; + } + next = field; + is_last_new = true; + field = tuple_field_create(NULL); + if (unlikely(next == NULL)) + goto end; + } else { + is_last_new = false; + } + parent->type = iterm_node_type; + parent = next; + } + if (rc != 0 || field->token.key.type != JSON_TOKEN_END) { + const char *err_msg = + tt_sprintf("invalid JSON path '%s': path has invalid " + "structure (error at position %d)", path, + rc); + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + fieldno + TUPLE_INDEX_BASE, err_msg); + parent = NULL; + goto end; + } + assert(parent != NULL); + /* Update tree depth information. */ + if (is_last_new) { + uint32_t depth = 1; + for (struct json_token *iter = parent->token.parent; + iter != &format->tree.root; iter = iter->parent, ++depth) { + struct tuple_field *record = + json_tree_entry(iter, struct tuple_field, + token); + record->subtree_depth = + MAX(record->subtree_depth, depth); + } + } +end: + tuple_field_destroy(field); + return parent; +} + static int tuple_format_use_key_part(struct tuple_format *format, const struct field_def *fields, uint32_t field_count, const struct key_part *part, bool is_sequential, - int *current_slot) + int *current_slot, char **path_data) { assert(part->fieldno < tuple_format_field_count(format)); struct tuple_field *field = tuple_format_field(format, part->fieldno); + if (unlikely(part->path != NULL)) { + assert(!is_sequential); + /** + * Copy JSON path data to reserved area at the + * end of format allocation. + */ + memcpy(*path_data, part->path, part->path_len); + (*path_data)[part->path_len] = '\0'; + struct tuple_field *root = field; + field = tuple_field_tree_add_path(format, *path_data, + part->path_len, + part->fieldno); + if (field == NULL) + return -1; + format->subtree_depth = + MAX(format->subtree_depth, root->subtree_depth + 1); + field->is_key_part = true; + *path_data += part->path_len + 1; + } /* * If a field is not present in the space format, * inherit nullable action of the first key part @@ -113,7 +215,10 @@ tuple_format_use_key_part(struct tuple_format *format, field->type)) { const char *name; int fieldno = part->fieldno + TUPLE_INDEX_BASE; - if (part->fieldno >= field_count) { + if (unlikely(part->path != NULL)) { + name = tt_sprintf("[%d]%.*s", fieldno, part->path_len, + part->path); + } else if (part->fieldno >= field_count) { name = tt_sprintf("%d", fieldno); } else { const struct field_def *def = @@ -137,10 +242,9 @@ tuple_format_use_key_part(struct tuple_format *format, * simply accessible, so we don't store an offset for it. */ if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL && - is_sequential == false && part->fieldno > 0) { - *current_slot = *current_slot - 1; - field->offset_slot = *current_slot; - } + is_sequential == false && + (part->fieldno > 0 || part->path != NULL)) + field->offset_slot = (*current_slot = *current_slot - 1); return 0; } @@ -181,7 +285,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, } int current_slot = 0; - + char *paths_data = (char *)format + sizeof(struct tuple_format); /* extract field type info */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { const struct key_def *key_def = keys[key_no]; @@ -193,7 +297,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, if (tuple_format_use_key_part(format, fields, field_count, part, is_sequential, - ¤t_slot) != 0) + ¤t_slot, + &paths_data) != 0) return -1; } } @@ -261,6 +366,8 @@ static struct tuple_format * tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, uint32_t space_field_count, struct tuple_dictionary *dict) { + /* Size of area to store paths. */ + uint32_t paths_size = 0; uint32_t index_field_count = 0; /* find max max field no */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { @@ -270,13 +377,16 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, for (; part < pend; part++) { index_field_count = MAX(index_field_count, part->fieldno + 1); + if (part->path != NULL) + paths_size += part->path_len + 1; } } uint32_t field_count = MAX(space_field_count, index_field_count); - struct tuple_format *format = malloc(sizeof(struct tuple_format)); + uint32_t allocation_size = sizeof(struct tuple_format) + paths_size; + struct tuple_format *format = malloc(allocation_size); if (format == NULL) { - diag_set(OutOfMemory, sizeof(struct tuple_format), "malloc", + diag_set(OutOfMemory, allocation_size, "malloc", "tuple format"); return NULL; } @@ -284,6 +394,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, free(format); return NULL; } + format->subtree_depth = 1; struct json_token token; memset(&token, 0, sizeof(token)); token.key.type = JSON_TOKEN_NUM; @@ -306,6 +417,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, format->dict = dict; tuple_dictionary_ref(dict); } + format->allocation_size = allocation_size; format->refs = 0; format->id = FORMAT_ID_NIL; format->index_field_count = index_field_count; @@ -377,16 +489,37 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, { if (format1->exact_field_count != format2->exact_field_count) return false; - uint32_t format1_field_count = tuple_format_field_count(format1); - uint32_t format2_field_count = tuple_format_field_count(format2); - for (uint32_t i = 0; i < format1_field_count; ++i) { - const struct tuple_field *field1 = - tuple_format_field(format1, i); + struct tuple_field *field1; + struct json_token *field2_prev_token = NULL; + struct json_token *skip_root_token = NULL; + struct json_token *field1_prev_token = &format1->tree.root; + json_tree_foreach_entry_preorder(field1, &format1->tree.root, + struct tuple_field, token) { + /* Test if subtree skip is required. */ + if (skip_root_token != NULL) { + struct json_token *tmp = &field1->token; + while (tmp->parent != NULL && + tmp->parent != skip_root_token) + tmp = tmp->parent; + if (tmp->parent == skip_root_token) + continue; + } + skip_root_token = NULL; + /* Lookup for a valid parent node in new tree. */ + while (field1_prev_token != field1->token.parent) { + field1_prev_token = field1_prev_token->parent; + field2_prev_token = field2_prev_token->parent; + assert(field1_prev_token != NULL); + } + struct tuple_field *field2 = + json_tree_lookup_entry(&format2->tree, field2_prev_token, + &field1->token, + struct tuple_field, token); /* * The field has a data type in format1, but has * no data type in format2. */ - if (i >= format2_field_count) { + if (field2 == NULL) { /* * The field can get a name added * for it, and this doesn't require a data @@ -397,13 +530,13 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, * NULLs or miss the subject field. */ if (field1->type == FIELD_TYPE_ANY && - tuple_field_is_nullable(field1)) + tuple_field_is_nullable(field1)) { + skip_root_token = &field1->token; continue; - else + } else { return false; + } } - const struct tuple_field *field2 = - tuple_format_field(format2, i); if (! field_type1_contains_type2(field1->type, field2->type)) return false; /* @@ -413,10 +546,82 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, if (tuple_field_is_nullable(field2) && !tuple_field_is_nullable(field1)) return false; + + field2_prev_token = &field2->token; + field1_prev_token = &field1->token; } return true; } +/** Find a field in format by offset slot. */ +static struct tuple_field * +tuple_field_by_offset_slot(const struct tuple_format *format, + int32_t offset_slot) +{ + struct tuple_field *field; + struct json_token *root = (struct json_token *)&format->tree.root; + json_tree_foreach_entry_preorder(field, root, struct tuple_field, + token) { + if (field->offset_slot == offset_slot) + return field; + } + return NULL; +} + +/** + * Verify field_map and raise error on some indexed field has + * not been initialized. Routine rely on field_map has been + * initialized with UINT32_MAX marker before field_map + * initialization. + */ +static int +tuple_field_map_validate(const struct tuple_format *format, uint32_t *field_map) +{ + struct json_token *tree_node = (struct json_token *)&format->tree.root; + /* Lookup for absent not-nullable fields. */ + int32_t field_map_items = + (int32_t)(format->field_map_size/sizeof(field_map[0])); + for (int32_t i = -1; i >= -field_map_items; i--) { + if (field_map[i] != UINT32_MAX) + continue; + + struct tuple_field *field = + tuple_field_by_offset_slot(format, i); + assert(field != NULL); + /* Lookup for field number in tree. */ + struct json_token *parent = &field->token; + while (parent->parent != &format->tree.root) + parent = parent->parent; + assert(parent->key.type == JSON_TOKEN_NUM); + uint32_t fieldno = parent->key.num; + + tree_node = &field->token; + const char *err_msg; + if (field->token.key.type == JSON_TOKEN_STR) { + err_msg = tt_sprintf("invalid field %d document " + "content: map doesn't contain a " + "key '%.*s' defined in index", + fieldno, tree_node->key.len, + tree_node->key.str); + } else if (field->token.key.type == JSON_TOKEN_NUM) { + err_msg = tt_sprintf("invalid field %d document " + "content: array size %d is less " + "than size %d defined in index", + fieldno, tree_node->key.num, + tree_node->parent->child_count); + } + diag_set(ClientError, ER_DATA_STRUCTURE_MISMATCH, err_msg); + return -1; + } + return 0; +} + +struct parse_ctx { + enum json_token_type child_type; + uint32_t items; + uint32_t curr; +}; + /** @sa declaration for details. */ int tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, @@ -442,44 +647,123 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, (unsigned) format->min_field_count); return -1; } - - /* first field is simply accessible, so we do not store offset to it */ - enum mp_type mp_type = mp_typeof(*pos); - const struct tuple_field *field = - tuple_format_field((struct tuple_format *)format, 0); - if (validate && - key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE, - TUPLE_INDEX_BASE, tuple_field_is_nullable(field))) - return -1; - mp_next(&pos); - /* other fields...*/ - uint32_t i = 1; uint32_t defined_field_count = MIN(field_count, validate ? tuple_format_field_count(format) : format->index_field_count); - if (field_count < format->index_field_count) { - /* - * Nullify field map to be able to detect by 0, - * which key fields are absent in tuple_field(). - */ - memset((char *)field_map - format->field_map_size, 0, - format->field_map_size); - } - for (; i < defined_field_count; ++i) { - field = tuple_format_field((struct tuple_format *)format, i); - mp_type = mp_typeof(*pos); - if (validate && - key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE, - i + TUPLE_INDEX_BASE, - tuple_field_is_nullable(field))) - return -1; - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { - field_map[field->offset_slot] = - (uint32_t) (pos - tuple); + /* + * Fill field_map with marker for toutine + * tuple_field_map_validate to detect absent fields. + */ + memset((char *)field_map - format->field_map_size, + validate ? UINT32_MAX : 0, format->field_map_size); + + struct region *region = &fiber()->gc; + uint32_t mp_stack_items = format->subtree_depth + 1; + uint32_t mp_stack_size = mp_stack_items * sizeof(struct parse_ctx); + struct parse_ctx *mp_stack = region_alloc(region, mp_stack_size); + if (unlikely(mp_stack == NULL)) { + diag_set(OutOfMemory, mp_stack_size, "region_alloc", + "mp_stack"); + return -1; + } + mp_stack[0] = (struct parse_ctx){ + .child_type = JSON_TOKEN_NUM, + .items = defined_field_count, + .curr = 0, + }; + uint32_t mp_stack_idx = 0; + struct json_tree *tree = (struct json_tree *)&format->tree; + struct json_token *parent = &tree->root; + while (mp_stack[0].curr <= mp_stack[0].items) { + /* Prepare key for tree lookup. */ + struct json_token token; + token.key.type = mp_stack[mp_stack_idx].child_type; + ++mp_stack[mp_stack_idx].curr; + if (token.key.type == JSON_TOKEN_NUM) { + token.key.num = mp_stack[mp_stack_idx].curr; + } else if (token.key.type == JSON_TOKEN_STR) { + if (mp_typeof(*pos) != MP_STR) { + /* + * We do not support non-string + * keys in maps. + */ + mp_next(&pos); + mp_next(&pos); + continue; + } + token.key.str = + mp_decode_str(&pos, (uint32_t *)&token.key.len); + } else { + unreachable(); + } + struct tuple_field *field = + json_tree_lookup_entry(tree, parent, &token, + struct tuple_field, token); + enum mp_type type = mp_typeof(*pos); + if (field != NULL) { + bool is_nullable = tuple_field_is_nullable(field); + if (validate && + key_mp_type_validate(field->type, type, + ER_FIELD_TYPE, + mp_stack[0].curr, + is_nullable) != 0) + return -1; + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + field_map[field->offset_slot] = + (uint32_t)(pos - tuple); + } + } + /* Prepare stack info for next iteration. */ + if (field != NULL && type == MP_ARRAY && + mp_stack_idx + 1 < format->subtree_depth) { + uint32_t size = mp_decode_array(&pos); + if (unlikely(size == 0)) + continue; + parent = &field->token; + mp_stack[++mp_stack_idx] = (struct parse_ctx){ + .child_type = JSON_TOKEN_NUM, + .items = size, + .curr = 0, + }; + } else if (field != NULL && type == MP_MAP && + mp_stack_idx + 1 < format->subtree_depth) { + uint32_t size = mp_decode_map(&pos); + if (unlikely(size == 0)) + continue; + parent = &field->token; + mp_stack[++mp_stack_idx] = (struct parse_ctx){ + .child_type = JSON_TOKEN_STR, + .items = size, + .curr = 0, + }; + } else { + mp_next(&pos); + while (mp_stack[mp_stack_idx].curr >= + mp_stack[mp_stack_idx].items) { + assert(parent != NULL); + parent = parent->parent; + if (mp_stack_idx-- == 0) + goto end; + } } - mp_next(&pos); + }; +end:; + /* + * Field map has already been initialized with zeros when + * no validation is required. + */ + if (!validate) + return 0; + struct tuple_field *field; + struct json_token *root = (struct json_token *)&format->tree.root; + json_tree_foreach_entry_preorder(field, root, struct tuple_field, + token) { + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && + tuple_field_is_nullable(field) && + field_map[field->offset_slot] == UINT32_MAX) + field_map[field->offset_slot] = 0; } - return 0; + return tuple_field_map_validate(format, field_map); } uint32_t @@ -617,15 +901,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len) return -1; } -/** - * Retrieve msgpack data by JSON path. - * @param data Pointer to msgpack with data. - * @param path The path to process. - * @param path_len The length of the @path. - * @retval 0 On success. - * @retval >0 On path parsing error, invalid character position. - */ -static int +int tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len) { int rc; @@ -731,3 +1007,40 @@ error: tt_sprintf("error in path on position %d", rc)); return -1; } + +const char * +tuple_field_by_part_raw(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + if (likely(part->path == NULL)) + return tuple_field_raw(format, data, field_map, part->fieldno); + + uint32_t field_count = tuple_format_field_count(format); + struct tuple_field *root_field = + likely(part->fieldno < field_count) ? + tuple_format_field(format, part->fieldno) : NULL; + struct tuple_field *field = + unlikely(root_field == NULL) ? NULL: + tuple_format_field_by_path(format, root_field, part->path, + part->path_len); + if (unlikely(field == NULL)) { + /* + * Legacy tuple having no field map for JSON + * index require full path parse. + */ + const char *field_raw = + tuple_field_raw(format, data, field_map, part->fieldno); + if (unlikely(field_raw == NULL)) + return NULL; + if (tuple_field_go_to_path(&field_raw, part->path, + part->path_len) != 0) + return NULL; + return field_raw; + } + int32_t offset_slot = field->offset_slot; + assert(offset_slot < 0); + assert(-offset_slot * sizeof(uint32_t) <= format->field_map_size); + if (unlikely(field_map[offset_slot] == 0)) + return NULL; + return data + field_map[offset_slot]; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 2da773b..860f052 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -116,6 +116,8 @@ struct tuple_field { uint32_t coll_id; /** An JSON entry to organize tree. */ struct json_token token; + /** A maximum depth of field subtree. */ + uint32_t subtree_depth; }; /** @@ -169,12 +171,16 @@ struct tuple_format { * index_field_count <= min_field_count <= field_count. */ uint32_t min_field_count; + /** Size of format allocation. */ + uint32_t allocation_size; /** * Shared names storage used by all formats of a space. */ struct tuple_dictionary *dict; /** JSON tree of fields. */ struct json_tree tree; + /** A maximum depth of fields subtree. */ + uint32_t subtree_depth; }; @@ -196,6 +202,17 @@ tuple_format_field(struct tuple_format *format, uint32_t fieldno) struct tuple_field, token); } +static inline struct tuple_field * +tuple_format_field_by_path(struct tuple_format *format, + struct tuple_field *root, const char *path, + uint32_t path_len) +{ + return json_tree_lookup_path_entry(&format->tree, &root->token, + path, path_len, struct tuple_field, + token); +} + + extern struct tuple_format **tuple_formats; static inline uint32_t @@ -397,6 +414,18 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, } /** + * Retrieve msgpack data by JSON path. + * @param data Pointer to msgpack with data. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval >0 On path parsing error, invalid character position. + */ +int +tuple_field_go_to_path(const char **data, const char *path, + uint32_t path_len); + +/** * Get tuple field by its path. * @param format Tuple format. * @param tuple MessagePack tuple's body. @@ -423,12 +452,9 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, * @param part Index part to use. * @retval Field data if the field exists or NULL. */ -static inline const char * -tuple_field_by_part_raw(const struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) -{ - return tuple_field_raw(format, data, field_map, part->fieldno); -} +const char * +tuple_field_by_part_raw(struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc index b394804..3486ce1 100644 --- a/src/box/tuple_hash.cc +++ b/src/box/tuple_hash.cc @@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def); void tuple_hash_func_set(struct key_def *key_def) { - if (key_def->is_nullable) + if (key_def->is_nullable || key_def->has_json_paths) goto slowpath; /* * Check that key_def defines sequential a key without holes diff --git a/src/box/vinyl.c b/src/box/vinyl.c index ce81c6a..3c9fbf8 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -982,6 +982,9 @@ vinyl_index_def_change_requires_rebuild(struct index *index, return true; if (!field_type1_contains_type2(new_part->type, old_part->type)) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len) != 0) + return true; } return false; } diff --git a/src/box/vy_log.c b/src/box/vy_log.c index 8a8f9d7..0550144 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src) "struct key_part_def"); goto err; } - key_def_dump_parts(src->key_def, dst->key_parts); + if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0) + goto err; dst->key_part_count = src->key_def->part_count; dst->key_def = NULL; } diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index 7b704b8..9d5e220 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *key, struct tuple **ret) { - assert(tuple_field_count(key) >= lsm->cmp_def->part_count); - *ret = NULL; double start_time = ev_monotonic_now(loop()); int rc = 0; diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 3e60fec..2f35284 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -29,6 +29,7 @@ * SUCH DAMAGE. */ +#include "assoc.h" #include "vy_stmt.h" #include @@ -370,6 +371,85 @@ vy_stmt_replace_from_upsert(const struct tuple *upsert) return replace; } +/** + * Construct tuple or calculate it's size. The fields_iov_ht + * is a hashtable that links leaf field records of field path + * tree and iovs that contain raw data. Function also fills the + * tuple field_map when write_data flag is set true. + */ +static void +vy_stmt_tuple_restore_raw(struct tuple_format *format, char *tuple_raw, + uint32_t *field_map, char **offset, + struct mh_i64ptr_t *fields_iov_ht, bool write_data) +{ + struct tuple_field *prev = NULL; + struct tuple_field *curr; + json_tree_foreach_entry_preorder(curr, &format->tree.root, + struct tuple_field, token) { + struct json_token *curr_node = &curr->token; + struct tuple_field *parent = + curr_node->parent == NULL ? NULL : + json_tree_entry(curr_node->parent, struct tuple_field, + token); + if (parent != NULL && parent->type == FIELD_TYPE_ARRAY && + curr_node->sibling_idx > 0) { + /* + * Fill unindexed array items with nulls. + * Gaps size calculated as a difference + * between sibling nodes. + */ + for (uint32_t i = curr_node->sibling_idx - 1; + curr_node->parent->children[i] == NULL && + i > 0; i--) { + *offset = !write_data ? + (*offset += mp_sizeof_nil()) : + mp_encode_nil(*offset); + } + } else if (parent != NULL && parent->type == FIELD_TYPE_MAP) { + /* Set map key. */ + const char *str = curr_node->key.str; + uint32_t len = curr_node->key.len; + *offset = !write_data ? + (*offset += mp_sizeof_str(len)) : + mp_encode_str(*offset, str, len); + } + /* Fill data. */ + uint32_t children_count = curr_node->child_count; + if (curr->type == FIELD_TYPE_ARRAY) { + *offset = !write_data ? + (*offset += mp_sizeof_array(children_count)) : + mp_encode_array(*offset, children_count); + } else if (curr->type == FIELD_TYPE_MAP) { + *offset = !write_data ? + (*offset += mp_sizeof_map(children_count)) : + mp_encode_map(*offset, children_count); + } else { + /* Leaf record. */ + mh_int_t k = mh_i64ptr_find(fields_iov_ht, + (uint64_t)curr, NULL); + struct iovec *iov = + k != mh_end(fields_iov_ht) ? + mh_i64ptr_node(fields_iov_ht, k)->val : NULL; + if (iov == NULL) { + *offset = !write_data ? + (*offset += mp_sizeof_nil()) : + mp_encode_nil(*offset); + } else { + uint32_t data_offset = *offset - tuple_raw; + int32_t slot = curr->offset_slot; + if (write_data) { + memcpy(*offset, iov->iov_base, + iov->iov_len); + if (slot != TUPLE_OFFSET_SLOT_NIL) + field_map[slot] = data_offset; + } + *offset += iov->iov_len; + } + } + prev = curr; + } +} + static struct tuple * vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, const struct key_def *cmp_def, @@ -378,51 +458,79 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, /* UPSERT can't be surrogate. */ assert(type != IPROTO_UPSERT); struct region *region = &fiber()->gc; + struct tuple *stmt = NULL; uint32_t field_count = format->index_field_count; - struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count); + uint32_t part_count = mp_decode_array(&key); + assert(part_count == cmp_def->part_count); + struct iovec *iov = region_alloc(region, sizeof(*iov) * part_count); if (iov == NULL) { - diag_set(OutOfMemory, sizeof(*iov) * field_count, - "region", "iov for surrogate key"); + diag_set(OutOfMemory, sizeof(*iov) * part_count, "region", + "iov for surrogate key"); return NULL; } - memset(iov, 0, sizeof(*iov) * field_count); - uint32_t part_count = mp_decode_array(&key); - assert(part_count == cmp_def->part_count); - assert(part_count <= field_count); - uint32_t nulls_count = field_count - cmp_def->part_count; - uint32_t bsize = mp_sizeof_array(field_count) + - mp_sizeof_nil() * nulls_count; - for (uint32_t i = 0; i < part_count; ++i) { - const struct key_part *part = &cmp_def->parts[i]; + /* Hastable linking leaf field and corresponding iov. */ + struct mh_i64ptr_t *fields_iov_ht = mh_i64ptr_new(); + if (fields_iov_ht == NULL) { + diag_set(OutOfMemory, sizeof(struct mh_i64ptr_t), + "mh_i64ptr_new", "fields_iov_ht"); + return NULL; + } + if (mh_i64ptr_reserve(fields_iov_ht, part_count, NULL) != 0) { + diag_set(OutOfMemory, part_count, "mh_i64ptr_reserve", + "fields_iov_ht"); + goto end; + } + memset(iov, 0, sizeof(*iov) * part_count); + const struct key_part *part = cmp_def->parts; + for (uint32_t i = 0; i < part_count; ++i, ++part) { assert(part->fieldno < field_count); const char *svp = key; - iov[part->fieldno].iov_base = (char *) key; + iov[i].iov_base = (char *) key; mp_next(&key); - iov[part->fieldno].iov_len = key - svp; - bsize += key - svp; + iov[i].iov_len = key - svp; + struct tuple_field *field; + field = tuple_format_field(format, part->fieldno); + assert(field != NULL); + if (unlikely(part->path != NULL)) { + field = tuple_format_field_by_path(format, field, + part->path, + part->path_len); + } + assert(field != NULL); + struct mh_i64ptr_node_t node = {(uint64_t)field, &iov[i]}; + mh_int_t k = mh_i64ptr_put(fields_iov_ht, &node, NULL, NULL); + if (unlikely(k == mh_end(fields_iov_ht))) { + diag_set(OutOfMemory, part_count, "mh_i64ptr_put", + "fields_iov_ht"); + goto end; + } + k = mh_i64ptr_find(fields_iov_ht, (uint64_t)field, NULL); + assert(k != mh_end(fields_iov_ht)); } + /* Calculate tuple size to make allocation. */ + char *data = NULL; + vy_stmt_tuple_restore_raw(format, NULL, NULL, &data, fields_iov_ht, + false); + uint32_t bsize = mp_sizeof_array(field_count) + data - (char *)NULL; - struct tuple *stmt = vy_stmt_alloc(format, bsize); + stmt = vy_stmt_alloc(format, bsize); if (stmt == NULL) - return NULL; + goto end; + /* Construct tuple. */ char *raw = (char *) tuple_data(stmt); uint32_t *field_map = (uint32_t *) raw; + memset((char *)field_map - format->field_map_size, 0, + format->field_map_size); char *wpos = mp_encode_array(raw, field_count); - for (uint32_t i = 0; i < field_count; ++i) { - const struct tuple_field *field = tuple_format_field(format, i); - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - field_map[field->offset_slot] = wpos - raw; - if (iov[i].iov_base == NULL) { - wpos = mp_encode_nil(wpos); - } else { - memcpy(wpos, iov[i].iov_base, iov[i].iov_len); - wpos += iov[i].iov_len; - } - } - assert(wpos == raw + bsize); + vy_stmt_tuple_restore_raw(format, raw, field_map, &wpos, fields_iov_ht, + true); + + assert(wpos <= raw + bsize); vy_stmt_set_type(stmt, type); +end: + mh_i64ptr_delete(fields_iov_ht); return stmt; } diff --git a/test/box/misc.result b/test/box/misc.result index 9f863d9..97070f3 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -415,6 +415,7 @@ t; 83: box.error.ROLE_EXISTS 84: box.error.CREATE_ROLE 85: box.error.INDEX_EXISTS + 86: box.error.DATA_STRUCTURE_MISMATCH 87: box.error.ROLE_LOOP 88: box.error.GRANT 89: box.error.PRIV_GRANTED diff --git a/test/engine/tuple.result b/test/engine/tuple.result index 35c700e..322821e 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -954,6 +954,422 @@ type(tuple:tomap().fourth) s:drop() --- ... +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +box.cfg() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key + part is indexed twice' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +- error: 'Wrong index options (field 2): ''path'' must be string' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''map'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''array'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field [2]["FIO"].fname has type 'string' in one index, but type 'map' in + another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field [2]["FIO"].fname has type 'array' in one index, but type 'map' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +--- +- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': path + has invalid structure (error at position 5)' +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +assert(idx.parts[2].path == "FIO.fname") +--- +- true +... +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +--- +- error: 'Tuple field 3 type does not match one required by operation: expected string' +... +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +--- +- error: 'Tuple doesn''t math document structure: invalid field 3 document content: + map doesn''t contain a key ''sname'' defined in index' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:select() +--- +- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] + - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:min() +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:max() +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +s:drop() +--- +... +s = box.schema.create_space('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[2]'} +--- +... +pk = s:create_index('pk', {parts = parts}) +--- +... +s:insert{{1, 2}, 3} +--- +- [[1, 2], 3] +... +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +--- +... +s:get(2) +--- +- [[1, 2], 8] +... +s:drop() +--- +... +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk = s:create_index('primary', { type = 'tree' }) +--- +... +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +--- +- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5] +... +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +--- +- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5] +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +_ = s:delete(1) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +_ = s:delete(2) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +_ = s:delete(4) +--- +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +--- +- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number' + in another +... +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +... +assert(idx2 ~= nil) +--- +- true +... +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +--- +... +idx:select() +--- +- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] + - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:min() +--- +- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] +... +idx:max() +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:drop() +--- +... +s:drop() +--- +... +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'str', path='[3][2].a'} +--- +... +parts[2] = {1, 'unsigned', path = '[3][1]'} +--- +... +parts[3] = {2, 'str', path = '[2].d[1]'} +--- +... +pk = s:create_index('primary', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +--- +- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, + [1, 2, 3]] +... +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +--- +- error: Duplicate key exists in unique index 'primary' in space 'withdata' +... +parts = {} +--- +... +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +--- +... +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +--- +... +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +--- +... +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +--- +- error: 'Tuple doesn''t math document structure: invalid field 4 document content: + array size 1 is less than size 4 defined in index' +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b' } +--- +... +parts[2] = {3, 'unsigned'} +--- +... +crosspart_idx = s:create_index('crosspart', { parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b'} +--- +... +num_idx = s:create_index('numeric', {parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:get(2) +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +num_idx:select() +--- +- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 9, 2, 3]] + - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], + 6, [1, 2, 3]] + - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 0]] +... +num_idx:max() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:min() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +assert(crosspart_idx:max() == num_idx:max()) +--- +- true +... +assert(crosspart_idx:min() == num_idx:min()) +--- +- true +... +trap_idx:max() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +trap_idx:min() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +--- +... +assert(pk_simplified.path == box.NULL) +--- +- true +... +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +--- +... +s:insert{31, {a = 1, aa = -1}} +--- +- [31, {'a': 1, 'aa': -1}] +... +s:insert{22, {a = 2, aa = -2}} +--- +- [22, {'a': 2, 'aa': -2}] +... +s:insert{13, {a = 3, aa = -3}} +--- +- [13, {'a': 3, 'aa': -3}] +... +idx:select() +--- +- - [31, {'a': 1, 'aa': -1}] + - [22, {'a': 2, 'aa': -2}] + - [13, {'a': 3, 'aa': -3}] +... +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +--- +... +idx:select() +--- +- - [13, {'a': 3, 'aa': -3}] + - [22, {'a': 2, 'aa': -2}] + - [31, {'a': 1, 'aa': -1}] +... +s:drop() +--- +... +-- incompatible format change +s = box.schema.space.create('test') +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +--- +... +s:insert{{-1}} +--- +- [[-1]] +... +i:alter{parts = {{1, 'string', path = '[1]'}}} +--- +- error: 'Tuple field 1 type does not match one required by operation: expected string' +... +s:insert{{'a'}} +--- +- error: 'Tuple field 1 type does not match one required by operation: expected integer' +... +i:drop() +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +--- +... +s:insert{{{FIO=-1}}} +--- +- [[{'FIO': -1}]] +... +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +--- +- error: 'Tuple field 1 type does not match one required by operation: expected array' +... +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +--- +- error: 'Tuple field 1 type does not match one required by operation: expected array' +... +s:drop() +--- +... engine = nil --- ... diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua index edc3dab..d53ab42 100644 --- a/test/engine/tuple.test.lua +++ b/test/engine/tuple.test.lua @@ -312,5 +312,126 @@ tuple:tomap().fourth type(tuple:tomap().fourth) s:drop() +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +box.cfg() +s = box.schema.space.create('withdata', {engine = engine}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +assert(idx ~= nil) +assert(idx.parts[2].path == "FIO.fname") +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +idx:select() +idx:min() +idx:max() +s:drop() + +s = box.schema.create_space('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'unsigned', path='[2]'} +pk = s:create_index('pk', {parts = parts}) +s:insert{{1, 2}, 3} +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +s:get(2) +s:drop() + +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +pk = s:create_index('primary', { type = 'tree' }) +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(1) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(2) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(4) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +assert(idx ~= nil) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +assert(idx2 ~= nil) +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +idx:select() +idx:min() +idx:max() +idx:drop() +s:drop() + +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'str', path='[3][2].a'} +parts[2] = {1, 'unsigned', path = '[3][1]'} +parts[3] = {2, 'str', path = '[2].d[1]'} +pk = s:create_index('primary', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +parts = {} +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b' } +parts[2] = {3, 'unsigned'} +crosspart_idx = s:create_index('crosspart', { parts = parts}) +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b'} +num_idx = s:create_index('numeric', {parts = parts}) +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +num_idx:get(2) +num_idx:select() +num_idx:max() +num_idx:min() +assert(crosspart_idx:max() == num_idx:max()) +assert(crosspart_idx:min() == num_idx:min()) +trap_idx:max() +trap_idx:min() +s:drop() + +s = box.schema.space.create('withdata', {engine = engine}) +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +assert(pk_simplified.path == box.NULL) +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +s:insert{31, {a = 1, aa = -1}} +s:insert{22, {a = 2, aa = -2}} +s:insert{13, {a = 3, aa = -3}} +idx:select() +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +idx:select() +s:drop() + +-- incompatible format change +s = box.schema.space.create('test') +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +s:insert{{-1}} +i:alter{parts = {{1, 'string', path = '[1]'}}} +s:insert{{'a'}} +i:drop() +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +s:insert{{{FIO=-1}}} +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +s:drop() + + engine = nil test_run = nil -- 2.7.4