From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v6 4/8] box: introduce JSON Indexes Date: Mon, 17 Dec 2018 09:52:48 +0300 Message-Id: <969089641de9324af05250ab6a1bc75f55115439.1544995259.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov List-ID: New JSON indexes allows to index documents content. At first, introduced new key_part fields path and path_len representing JSON path string specified by user. Modified tuple_format_use_key_part routine constructs corresponding tuple_fields chain in tuple_format:fields tree to indexed data. The resulting tree is used for type checking and for alloctating indexed fields offset slots. Refined tuple_init_field_map routine logic parses tuple msgpack in depth using stack allocated on region and initialize field map with corresponding tuple_format:field if any. This stack is necessary as mp-container(map or array) length is specified at the frame beginning, but this information is also required to determine mp-container end. Then tuple_field_map_validate checks initialized field_map and raises an ER_DATA_STRUCTURE_MISMATCH error when magic UINT32_MAX value founded. This typically means that indexed fields presenting in tuple_format:fields tree were not looked up during antecedent tuple parsing step and is not nullable(otherwise there would be 0, not UINT32_MAX magic). Offset slot allows to uniquely identify the indexed field in the tree to prepare descriptive message. The other essential feature is vinyl's secondary key restored by key_part (stmt) extracted keys loaded frim disc. Introduced tuple_format:total_field_count and tuple_field:id allows to allocate iov's array of size sizeof(iov[0])*tuple_format:total_field_count on region and link extracted keys blobs with corresponding fields by tuple_field:id. New vy_stmt_tuple_restore_raw would traverse tuple_format:fields tree and contruct vy_stmt data using iov's array to place data blobs for indexed leafs. Introduced vy_stmt_meta_size - precalculated stmt size as if all leaf fields are zero. It allows allocate stmt chunk without extra traversing a tree. Example: To create a new JSON index specify path to document data as a part of key_part: parts = {{3, 'str', path = '.FIO.fname', is_nullable = false}} idx = s:create_index('json_idx', {parts = parse}) idx:select("Ivanov") Part of #1012 --- src/box/alter.cc | 2 +- src/box/errcode.h | 2 +- src/box/index_def.c | 10 +- src/box/key_def.c | 166 +++++++++-- src/box/key_def.h | 33 ++- src/box/lua/space.cc | 5 + src/box/memtx_engine.c | 4 + src/box/sql.c | 1 + src/box/sql/build.c | 1 + src/box/sql/select.c | 3 +- src/box/sql/where.c | 1 + src/box/tuple_compare.cc | 13 +- src/box/tuple_extract_key.cc | 28 +- src/box/tuple_format.c | 518 ++++++++++++++++++++++++++++++----- src/box/tuple_format.h | 73 ++++- src/box/tuple_hash.cc | 2 +- src/box/vinyl.c | 4 + src/box/vy_log.c | 11 +- src/box/vy_point_lookup.c | 2 - src/box/vy_stmt.c | 105 +++++-- test/box/misc.result | 1 + test/engine/json.result | 450 ++++++++++++++++++++++++++++++ test/engine/json.test.lua | 129 +++++++++ 23 files changed, 1427 insertions(+), 137 deletions(-) create mode 100644 test/engine/json.result create mode 100644 test/engine/json.test.lua diff --git a/src/box/alter.cc b/src/box/alter.cc index 03ba68adc..d6d37d60e 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -277,7 +277,7 @@ index_def_new_from_tuple(struct tuple *tuple, struct space *space) }); if (key_def_decode_parts(part_def, part_count, &parts, space->def->fields, - space->def->field_count) != 0) + space->def->field_count, &fiber()->gc) != 0) diag_raise(); key_def = key_def_new(part_def, part_count); if (key_def == NULL) diff --git a/src/box/errcode.h b/src/box/errcode.h index 7d1f8ddc7..e097607e3 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -138,7 +138,7 @@ struct errcode_record { /* 83 */_(ER_ROLE_EXISTS, "Role '%s' already exists") \ /* 84 */_(ER_CREATE_ROLE, "Failed to create role '%s': %s") \ /* 85 */_(ER_INDEX_EXISTS, "Index '%s' already exists") \ - /* 86 */_(ER_UNUSED6, "") \ + /* 86 */_(ER_DATA_STRUCTURE_MISMATCH, "Tuple doesn't math document structure: %s") \ /* 87 */_(ER_ROLE_LOOP, "Granting role '%s' to role '%s' would create a loop") \ /* 88 */_(ER_GRANT, "Incorrect grant arguments: %s") \ /* 89 */_(ER_PRIV_GRANTED, "User '%s' already has %s access on %s '%s'") \ diff --git a/src/box/index_def.c b/src/box/index_def.c index 45c74d9ec..e430a70ba 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -31,6 +31,8 @@ #include "index_def.h" #include "schema_def.h" #include "identifier.h" +#include "tuple_format.h" +#include "json/json.h" const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" }; @@ -298,8 +300,12 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) * Courtesy to a user who could have made * a typo. */ - if (index_def->key_def->parts[i].fieldno == - index_def->key_def->parts[j].fieldno) { + struct key_part *part_a = &index_def->key_def->parts[i]; + struct key_part *part_b = &index_def->key_def->parts[j]; + if (part_a->fieldno == part_b->fieldno && + json_path_cmp(part_a->path, part_a->path_len, + part_b->path, part_b->path_len, + TUPLE_INDEX_BASE) == 0){ diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, "same key part is indexed twice"); diff --git a/src/box/key_def.c b/src/box/key_def.c index 2119ca389..a6bb89f5b 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "json/json.h" #include "key_def.h" #include "tuple_compare.h" #include "tuple_extract_key.h" @@ -35,6 +36,7 @@ #include "column_mask.h" #include "schema_def.h" #include "coll_id_cache.h" +#include "small/region.h" const char *sort_order_strs[] = { "asc", "desc", "undef" }; @@ -44,7 +46,8 @@ const struct key_part_def key_part_def_default = { COLL_NONE, false, ON_CONFLICT_ACTION_DEFAULT, - SORT_ORDER_ASC + SORT_ORDER_ASC, + NULL }; static int64_t @@ -59,6 +62,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len) #define PART_OPT_NULLABILITY "is_nullable" #define PART_OPT_NULLABLE_ACTION "nullable_action" #define PART_OPT_SORT_ORDER "sort_order" +#define PART_OPT_PATH "path" const struct opt_def part_def_reg[] = { OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type, @@ -71,6 +75,7 @@ const struct opt_def part_def_reg[] = { struct key_part_def, nullable_action, NULL), OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def, sort_order, NULL), + OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path), OPT_END, }; @@ -106,13 +111,23 @@ const uint32_t key_mp_type[] = { struct key_def * key_def_dup(const struct key_def *src) { - size_t sz = key_def_sizeof(src->part_count); - struct key_def *res = (struct key_def *)malloc(sz); + size_t sz = 0; + for (uint32_t i = 0; i < src->part_count; i++) + sz += src->parts[i].path_len; + sz = key_def_sizeof(src->part_count, sz); + struct key_def *res = (struct key_def *)calloc(1, sz); if (res == NULL) { diag_set(OutOfMemory, sz, "malloc", "res"); return NULL; } memcpy(res, src, sz); + /* Update paths to point to the new memory chunk.*/ + for (uint32_t i = 0; i < src->part_count; i++) { + if (src->parts[i].path == NULL) + continue; + size_t path_offset = src->parts[i].path - (char *)src; + res->parts[i].path = (char *)res + path_offset; + } return res; } @@ -120,8 +135,16 @@ void key_def_swap(struct key_def *old_def, struct key_def *new_def) { assert(old_def->part_count == new_def->part_count); - for (uint32_t i = 0; i < new_def->part_count; i++) + for (uint32_t i = 0; i < new_def->part_count; i++) { SWAP(old_def->parts[i], new_def->parts[i]); + /* + * Paths are allocated as a part of key_def so + * we need to swap path pointers back - it's OK + * as paths aren't supposed to change. + */ + assert(old_def->parts[i].path_len == new_def->parts[i].path_len); + SWAP(old_def->parts[i].path, new_def->parts[i].path); + } SWAP(*old_def, *new_def); } @@ -144,24 +167,39 @@ static void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, enum on_conflict_action nullable_action, struct coll *coll, uint32_t coll_id, - enum sort_order sort_order) + enum sort_order sort_order, const char *path, + uint32_t path_len, char **static_pool) { assert(part_no < def->part_count); assert(type < field_type_MAX); def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE); + def->has_json_paths |= path != NULL; def->parts[part_no].nullable_action = nullable_action; def->parts[part_no].fieldno = fieldno; def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; def->parts[part_no].sort_order = sort_order; + if (path != NULL) { + assert(static_pool != NULL); + def->parts[part_no].path = *static_pool; + *static_pool += path_len; + memcpy(def->parts[part_no].path, path, path_len); + def->parts[part_no].path_len = path_len; + } else { + def->parts[part_no].path = NULL; + def->parts[part_no].path_len = 0; + } column_mask_set_fieldno(&def->column_mask, fieldno); } struct key_def * key_def_new(const struct key_part_def *parts, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + ssize_t sz = 0; + for (uint32_t i = 0; i < part_count; i++) + sz += parts[i].path != NULL ? strlen(parts[i].path) : 0; + sz = key_def_sizeof(part_count, sz); struct key_def *def = calloc(1, sz); if (def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -171,6 +209,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) def->part_count = part_count; def->unique_part_count = part_count; + char *data = (char *)def + key_def_sizeof(part_count, 0); for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; struct coll *coll = NULL; @@ -184,16 +223,18 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) } coll = coll_id->coll; } + uint32_t path_len = part->path != NULL ? strlen(part->path) : 0; key_def_set_part(def, i, part->fieldno, part->type, part->nullable_action, coll, part->coll_id, - part->sort_order); + part->sort_order, part->path, path_len, &data); } key_def_set_cmp(def); return def; } -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) +int +key_def_dump_parts(const struct key_def *def, struct key_part_def *parts, + struct region *pool) { for (uint32_t i = 0; i < def->part_count; i++) { const struct key_part *part = &def->parts[i]; @@ -203,13 +244,28 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) part_def->is_nullable = key_part_is_nullable(part); part_def->nullable_action = part->nullable_action; part_def->coll_id = part->coll_id; + if (part->path != NULL) { + assert(pool != NULL); + char *path = region_alloc(pool, part->path_len + 1); + if (path == NULL) { + diag_set(OutOfMemory, part->path_len + 1, + "region_alloc", "part_def->path"); + return -1; + } + memcpy(path, part->path, part->path_len); + path[part->path_len] = '\0'; + part_def->path = path; + } else { + part_def->path = NULL; + } } + return 0; } box_key_def_t * box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + size_t sz = key_def_sizeof(part_count, 0); struct key_def *key_def = calloc(1, sz); if (key_def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -223,7 +279,8 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) key_def_set_part(key_def, item, fields[item], (enum field_type)types[item], ON_CONFLICT_ACTION_DEFAULT, - NULL, COLL_NONE, SORT_ORDER_ASC); + NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0, + NULL); } key_def_set_cmp(key_def); return key_def; @@ -272,6 +329,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, if (key_part_is_nullable(part1) != key_part_is_nullable(part2)) return key_part_is_nullable(part1) < key_part_is_nullable(part2) ? -1 : 1; + int rc; + if ((rc = json_path_cmp(part1->path, part1->path_len, + part2->path, part2->path_len, + TUPLE_INDEX_BASE)) != 0) + return rc; } return part_count1 < part_count2 ? -1 : part_count1 > part_count2; } @@ -303,8 +365,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts, for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; assert(part->type < field_type_MAX); - SNPRINT(total, snprintf, buf, size, "%d, '%s'", - (int)part->fieldno, field_type_strs[part->type]); + if (part->path != NULL) { + SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'", + (int)part->fieldno, field_type_strs[part->type], + part->path); + } else { + SNPRINT(total, snprintf, buf, size, "%d, '%s'", + (int)part->fieldno, + field_type_strs[part->type]); + } if (i < part_count - 1) SNPRINT(total, snprintf, buf, size, ", "); } @@ -323,6 +392,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; size += mp_sizeof_map(count); size += mp_sizeof_str(strlen(PART_OPT_FIELD)); size += mp_sizeof_uint(part->fieldno); @@ -337,6 +408,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY)); size += mp_sizeof_bool(part->is_nullable); } + if (part->path != NULL) { + size += mp_sizeof_str(strlen(PART_OPT_PATH)); + size += mp_sizeof_str(strlen(part->path)); + } } return size; } @@ -352,6 +427,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; data = mp_encode_map(data, count); data = mp_encode_str(data, PART_OPT_FIELD, strlen(PART_OPT_FIELD)); @@ -371,6 +448,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, strlen(PART_OPT_NULLABILITY)); data = mp_encode_bool(data, part->is_nullable); } + if (part->path != NULL) { + data = mp_encode_str(data, PART_OPT_PATH, + strlen(PART_OPT_PATH)); + data = mp_encode_str(data, part->path, + strlen(part->path)); + } } return data; } @@ -432,6 +515,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -439,7 +523,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, int key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char **data, const struct field_def *fields, - uint32_t field_count) + uint32_t field_count, struct region *pool) { if (mp_typeof(**data) == MP_ARRAY) { return key_def_decode_parts_166(parts, part_count, data, @@ -468,7 +552,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char *key = mp_decode_str(data, &key_len); if (opts_parse_key(part, part_def_reg, key, key_len, data, ER_WRONG_INDEX_OPTIONS, - i + TUPLE_INDEX_BASE, NULL, + i + TUPLE_INDEX_BASE, pool, false) != 0) return -1; if (is_action_missing && @@ -514,6 +598,28 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, "index part: unknown sort order"); return -1; } + if (part->path != NULL) { + uint32_t path_len = strlen(part->path); + int rc = 0; + if (*part->path != '.' && *part->path != '[') { + /* JSON path should be relative. */ + rc = 1; + } else { + rc = json_path_validate(part->path, path_len, + TUPLE_INDEX_BASE); + } + if (rc != 0) { + const char *err_msg = + tt_sprintf("invalid JSON path '%s': " + "path has invalid structure " + "(error at position %d)", + part->path, rc); + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno + TUPLE_INDEX_BASE, + err_msg); + return -1; + } + } } return 0; } @@ -533,7 +639,10 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find) const struct key_part *part = key_def->parts; const struct key_part *end = part + key_def->part_count; for (; part != end; part++) { - if (part->fieldno == to_find->fieldno) + if (part->fieldno == to_find->fieldno && + json_path_cmp(part->path, part->path_len, + to_find->path, to_find->path_len, + TUPLE_INDEX_BASE) == 0) return part; } return NULL; @@ -559,18 +668,25 @@ key_def_merge(const struct key_def *first, const struct key_def *second) * Find and remove part duplicates, i.e. parts counted * twice since they are present in both key defs. */ - const struct key_part *part = second->parts; - const struct key_part *end = part + second->part_count; + size_t sz = 0; + const struct key_part *part = first->parts; + const struct key_part *end = part + first->part_count; + for (; part != end; part++) + sz += part->path_len; + part = second->parts; + end = part + second->part_count; for (; part != end; part++) { if (key_def_find(first, part) != NULL) --new_part_count; + else + sz += part->path_len; } + sz = key_def_sizeof(new_part_count, sz); struct key_def *new_def; - new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count)); + new_def = (struct key_def *)calloc(1, sz); if (new_def == NULL) { - diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc", - "new_def"); + diag_set(OutOfMemory, sz, "malloc", "new_def"); return NULL; } new_def->part_count = new_part_count; @@ -578,6 +694,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->is_nullable = first->is_nullable || second->is_nullable; new_def->has_optional_parts = first->has_optional_parts || second->has_optional_parts; + /* Path data write position in the new key_def. */ + char *data = (char *)new_def + key_def_sizeof(new_part_count, 0); /* Write position in the new key def. */ uint32_t pos = 0; /* Append first key def's parts to the new index_def. */ @@ -586,7 +704,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len, &data); } /* Set-append second key def's part to the new key def. */ @@ -597,7 +716,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second) continue; key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len, &data); } key_def_set_cmp(new_def); return new_def; diff --git a/src/box/key_def.h b/src/box/key_def.h index d4da6c5a1..df498964c 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -68,6 +68,11 @@ struct key_part_def { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** + * JSON path to indexed data, relative to the field number, + * or NULL if this key part indexes a top-level field. + */ + const char *path; }; extern const struct key_part_def key_part_def_default; @@ -86,6 +91,15 @@ struct key_part { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** + * JSON path to indexed data, relative to the field number, + * or NULL if this key part indexes a top-level field. + * This sting is not 0-terminated. Memory is allocated + * at the end of key_def region. + */ + char *path; + /** The length of JSON path. */ + uint32_t path_len; }; struct key_def; @@ -152,6 +166,8 @@ struct key_def { uint32_t unique_part_count; /** True, if at least one part can store NULL. */ bool is_nullable; + /** True, if some key part has JSON path. */ + bool has_json_paths; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -245,9 +261,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b, /** \endcond public */ static inline size_t -key_def_sizeof(uint32_t part_count) +key_def_sizeof(uint32_t part_count, uint32_t paths_size) { - return sizeof(struct key_def) + sizeof(struct key_part) * part_count; + return sizeof(struct key_def) + sizeof(struct key_part) * part_count + + paths_size; } /** @@ -259,9 +276,13 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count); /** * Dump part definitions of the given key def. + * Region is required to make allocation for JSON path if some + * such path present. JSON path strings are 0-terminated. + * Return -1 on memory allocation error, 0 on success. */ -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); +int +key_def_dump_parts(const struct key_def *def, struct key_part_def *parts, + struct region *pool); /** * Update 'has_optional_parts' of @a key_def with correspondence @@ -307,7 +328,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, int key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char **data, const struct field_def *fields, - uint32_t field_count); + uint32_t field_count, struct region *pool); /** * Returns the part in index_def->parts for the specified fieldno. @@ -368,6 +389,8 @@ key_validate_parts(const struct key_def *key_def, const char *key, static inline bool key_def_is_sequential(const struct key_def *key_def) { + if (key_def->has_json_paths) + return false; for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) { if (key_def->parts[part_id].fieldno != part_id) return false; diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc index 7cae436f1..1f152917e 100644 --- a/src/box/lua/space.cc +++ b/src/box/lua/space.cc @@ -296,6 +296,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i) lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE); lua_setfield(L, -2, "fieldno"); + if (part->path != NULL) { + lua_pushlstring(L, part->path, part->path_len); + lua_setfield(L, -2, "path"); + } + lua_pushboolean(L, key_part_is_nullable(part)); lua_setfield(L, -2, "is_nullable"); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 5cf70ab94..2cae791e1 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -1317,6 +1317,10 @@ memtx_index_def_change_requires_rebuild(struct index *index, return true; if (old_part->coll != new_part->coll) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len, + TUPLE_INDEX_BASE) != 0) + return true; } return false; } diff --git a/src/box/sql.c b/src/box/sql.c index 2cb0edbff..b5d0eefd4 100644 --- a/src/box/sql.c +++ b/src/box/sql.c @@ -380,6 +380,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info) part->nullable_action = ON_CONFLICT_ACTION_NONE; part->is_nullable = true; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; if (def != NULL && i < def->part_count) part->coll_id = def->parts[i].coll_id; else diff --git a/src/box/sql/build.c b/src/box/sql/build.c index ab42694f4..68bc77f10 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -2423,6 +2423,7 @@ index_fill_def(struct Parse *parse, struct index *index, part->is_nullable = part->nullable_action == ON_CONFLICT_ACTION_NONE; part->sort_order = SORT_ORDER_ASC; part->coll_id = coll_id; + part->path = NULL; } key_def = key_def_new(key_parts, expr_list->nExpr); if (key_def == NULL) diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 02ee225f1..3f136a342 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1360,6 +1360,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count) part->is_nullable = false; part->nullable_action = ON_CONFLICT_ACTION_ABORT; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; } return key_info; } @@ -1377,7 +1378,7 @@ sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def) key_info->key_def = NULL; key_info->refs = 1; key_info->part_count = key_def->part_count; - key_def_dump_parts(key_def, key_info->parts); + key_def_dump_parts(key_def, key_info->parts, NULL); return key_info; } diff --git a/src/box/sql/where.c b/src/box/sql/where.c index 9c3462bc0..78f70f4b5 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -2807,6 +2807,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */ part.is_nullable = false; part.sort_order = SORT_ORDER_ASC; part.coll_id = COLL_NONE; + part.path = NULL; struct key_def *key_def = key_def_new(&part, 1); if (key_def == NULL) { diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index e21b0096c..554c29f83 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); - if (key_def->part_count == 1 && part->fieldno == 0) { + if (key_def->part_count == 1 && part->fieldno == 0 && + part->path == NULL) { /* * First field can not be optional - empty tuples * can not exist. @@ -493,8 +494,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, } bool was_null_met = false; - const struct tuple_format *format_a = tuple_format(tuple_a); - const struct tuple_format *format_b = tuple_format(tuple_b); + struct tuple_format *format_a = tuple_format(tuple_a); + struct tuple_format *format_b = tuple_format(tuple_b); const uint32_t *field_map_a = tuple_field_map(tuple_a); const uint32_t *field_map_b = tuple_field_map(tuple_b); struct key_part *end; @@ -585,7 +586,7 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); struct key_part *part = key_def->parts; - const struct tuple_format *format = tuple_format(tuple); + struct tuple_format *format = tuple_format(tuple); const char *tuple_raw = tuple_data(tuple); const uint32_t *field_map = tuple_field_map(tuple); enum mp_type a_type, b_type; @@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) { @@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]); diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index e9d7cac3e..c40d7887d 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i) { uint32_t fieldno1 = def->parts[i].fieldno + 1; uint32_t fieldno2 = def->parts[i + 1].fieldno; - return fieldno1 == fieldno2; + return fieldno1 == fieldno2 && def->parts[i].path == NULL && + def->parts[i + 1].path == NULL; } /** True, if a key con contain two or more parts in sequence. */ @@ -111,7 +112,7 @@ tuple_extract_key_slowpath(const struct tuple *tuple, const char *data = tuple_data(tuple); uint32_t part_count = key_def->part_count; uint32_t bsize = mp_sizeof_array(part_count); - const struct tuple_format *format = tuple_format(tuple); + struct tuple_format *format = tuple_format(tuple); const uint32_t *field_map = tuple_field_map(tuple); const char *tuple_end = data + tuple->bsize; @@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, if (!key_def_parts_are_sequential(key_def, i)) break; } - uint32_t end_fieldno = key_def->parts[i].fieldno; + const struct key_part *part = &key_def->parts[i]; + uint32_t end_fieldno = part->fieldno; if (fieldno < current_fieldno) { /* Rewind. */ @@ -283,6 +285,22 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, current_fieldno++; } } + const char *field_last, *field_end_last; + if (part->path != NULL) { + field_last = field; + field_end_last = field_end; + MAYBE_UNUSED int rc = + tuple_field_go_to_path(&field, part->path, + part->path_len); + /* + * All tuples must be valid as all + * integrity checks has already been + * passed. + */ + assert(rc == 0); + field_end = field; + mp_next(&field_end); + } memcpy(key_buf, field, field_end - field); key_buf += field_end - field; if (has_optional_parts && null_count != 0) { @@ -291,6 +309,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, } else { assert(key_buf - key <= data_end - data); } + if (part->path != NULL) { + field = field_last; + field_end = field_end_last; + } } if (key_size != NULL) *key_size = (uint32_t)(key_buf - key); diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 8338bba44..4314d3b1d 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "fiber.h" #include "json/json.h" #include "tuple_format.h" #include "coll_id_cache.h" @@ -64,30 +65,160 @@ tuple_field_delete(struct tuple_field *field) /** Build the JSON path by field specified. */ static const char * tuple_field_json_path(const struct tuple_format *format, - struct tuple_field *field) + struct tuple_field *field, + struct region *region) { + /* Don't put brackets for first-level fields. */ + bool brackets = true; + if (field->token.parent == &format->fields.root) + brackets = false; + + uint32_t token_path_sz = sizeof(void *)*format->max_path_tokens; + uint32_t token_path_len = 0; + struct json_token **token_path = region_alloc(region, token_path_sz); + if (token_path == NULL) { + diag_set(OutOfMemory, token_path_sz, "region_alloc", + "token_path"); + return NULL; + } + uint32_t path_size = 1; struct json_token *token = &field->token; - const char *path; - if (token->parent == &format->fields.root && - token->num < (int)format->dict->name_count) { - const char *field_name = - format->dict->names[token->num]; - path = tt_sprintf("\"%s\"", field_name); - } else if (token->type == JSON_TOKEN_NUM) { - path = tt_sprintf("%u", token->num + TUPLE_INDEX_BASE); - } else { - unreachable(); + while (token != &format->fields.root) { + token_path[token_path_len++] = token; + if (token->parent == &format->fields.root && + token->num < (int)format->dict->name_count) { + const char *field_name = + format->dict->names[token->num]; + path_size += 4 + strlen(field_name); + } else if (token->type == JSON_TOKEN_NUM) { + uint32_t digits = 0; + for (int num = token->num + TUPLE_INDEX_BASE; num > 0; + num /= 10) + digits++; + path_size += 2 + digits; + } else if (token->type == JSON_TOKEN_STR) { + path_size += 4 + token->len; + } else { + unreachable(); + } + token = token->parent; } + char *path = region_alloc(region, path_size); + if (path == NULL) { + diag_set(OutOfMemory, path_size, "region_alloc", "path"); + return NULL; + } + char *wptr = path; + for (int i = token_path_len - 1; i >= 0; i--) { + token = token_path[i]; + if (token->parent == &format->fields.root && + token->num < (int)format->dict->name_count) { + const char *field_name = + format->dict->names[token->num]; + wptr += sprintf(wptr, brackets ? "[\"%s\"]" : "\"%s\"", + field_name); + } else if (token->type == JSON_TOKEN_NUM) { + wptr += sprintf(wptr, brackets ? "[%u]" : "%u", + token->num + TUPLE_INDEX_BASE); + } else if (token->type == JSON_TOKEN_STR) { + wptr += sprintf(wptr, "[\"%.*s\"]", token->len, + token->str); + } else { + unreachable(); + } + } + *wptr = '\0'; return path; } +/** Build a JSON tree path for specified path. */ +static struct tuple_field * +tuple_field_tree_add_path(struct tuple_format *format, const char *path, + uint32_t path_len, uint32_t fieldno) +{ + int rc = 0; + struct json_tree *tree = &format->fields; + struct tuple_field *parent = tuple_format_field(format, fieldno); + struct tuple_field *field = tuple_field_new(); + if (field == NULL) + goto fail; + + struct json_lexer lexer; + uint32_t token_count = 0; + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); + while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && + field->token.type != JSON_TOKEN_END) { + enum field_type expected_type = + field->token.type == JSON_TOKEN_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (parent->type != FIELD_TYPE_ANY && + parent->type != expected_type) { + /* Parent field has incompatable type. */ + const char *path = tuple_field_json_path(format, parent, + &fiber()->gc); + if (path != NULL) { + diag_set(ClientError, + ER_INDEX_PART_TYPE_MISMATCH, path, + field_type_strs[parent->type], + field_type_strs[expected_type]); + } + goto fail; + } + struct tuple_field *next = + json_tree_lookup_entry(tree, &parent->token, + &field->token, + struct tuple_field, token); + if (next == NULL) { + rc = json_tree_add(tree, &parent->token, &field->token); + if (rc != 0) { + diag_set(OutOfMemory, sizeof(struct json_token), + "json_tree_add", "tree"); + goto fail; + } + next = field; + field = tuple_field_new(); + if (field == NULL) + goto fail; + } + parent->type = expected_type; + parent = next; + token_count++; + } + assert(rc == 0 && field->token.type == JSON_TOKEN_END); + assert(parent != NULL); + /* Update tree depth information. */ + format->max_path_tokens = MAX(format->max_path_tokens, token_count + 1); +end: + tuple_field_delete(field); + return parent; +fail: + parent = NULL; + goto end; +} + static int tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, const struct key_part *part, bool is_sequential, - int *current_slot) + int *current_slot, char **path_data) { assert(part->fieldno < tuple_format_field_count(format)); - struct tuple_field *field = tuple_format_field(format, part->fieldno); + struct tuple_field *field; + if (part->path == NULL) { + field = tuple_format_field(format, part->fieldno); + } else { + assert(!is_sequential); + /** + * Copy JSON path data to reserved area at the + * end of format allocation. + */ + memcpy(*path_data, part->path, part->path_len); + field = tuple_field_tree_add_path(format, *path_data, + part->path_len, + part->fieldno); + if (field == NULL) + return -1; + *path_data += part->path_len; + } /* * If a field is not present in the space format, * inherit nullable action of the first key part @@ -112,11 +243,13 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, if (field->nullable_action == ON_CONFLICT_ACTION_NONE) field->nullable_action = part->nullable_action; } else if (field->nullable_action != part->nullable_action) { - const char *path = tuple_field_json_path(format, field); - assert(path != NULL); - diag_set(ClientError, ER_ACTION_MISMATCH, path, - on_conflict_action_strs[field->nullable_action], - on_conflict_action_strs[part->nullable_action]); + const char *path = tuple_field_json_path(format, field, + &fiber()->gc); + if (path != NULL) { + diag_set(ClientError, ER_ACTION_MISMATCH, path, + on_conflict_action_strs[field->nullable_action], + on_conflict_action_strs[part->nullable_action]); + } return -1; } @@ -136,11 +269,13 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, errcode = ER_FORMAT_MISMATCH_INDEX_PART; else errcode = ER_INDEX_PART_TYPE_MISMATCH; - const char *path = tuple_field_json_path(format, field); - assert(path != NULL); - diag_set(ClientError, errcode, path, - field_type_strs[field->type], - field_type_strs[part->type]); + const char *path = tuple_field_json_path(format, field, + &fiber()->gc); + if (path != NULL) { + diag_set(ClientError, errcode, path, + field_type_strs[field->type], + field_type_strs[part->type]); + } return -1; } field->is_key_part = true; @@ -150,7 +285,8 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, * simply accessible, so we don't store an offset for it. */ if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL && - is_sequential == false && part->fieldno > 0) { + is_sequential == false && + (part->fieldno > 0 || part->path != NULL)) { *current_slot = *current_slot - 1; field->offset_slot = *current_slot; } @@ -195,6 +331,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, int current_slot = 0; + char *paths_data = (char *)format + sizeof(struct tuple_format); /* extract field type info */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { const struct key_def *key_def = keys[key_no]; @@ -205,7 +342,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, for (; part < parts_end; part++) { if (tuple_format_use_key_part(format, field_count, part, is_sequential, - ¤t_slot) != 0) + ¤t_slot, + &paths_data) != 0) return -1; } } @@ -241,6 +379,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, format->field_map_size); for (int i = -1; i >= current_slot; i--) field_map_template[i] = UINT32_MAX; + int id = 0; struct tuple_field *field; struct json_token *root = (struct json_token *)&format->fields.root; json_tree_foreach_entry_preorder(field, root, struct tuple_field, @@ -255,7 +394,53 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL && tuple_field_is_nullable(field)) field_map_template[field->offset_slot] = 0; + + /* + * Estimate the size of vy_stmt secondary key + * tuple. All leaf records are assumed to be + * nil(s). + */ + int size = 0; + struct json_token *curr_node = &field->token; + enum field_type parent_type = + curr_node->parent == &format->fields.root ? + FIELD_TYPE_ARRAY : + json_tree_entry(curr_node->parent, struct tuple_field, + token)->type; + if (parent_type == FIELD_TYPE_ARRAY) { + /* + * Account a gap between neighboring + * fields filled with nil(s) when parent + * field type is FIELD_TYPE_ARRAY. + */ + int nulls = 0; + for (int i = field->token.sibling_idx - 1; + i > 0 && curr_node->parent->children[i] == NULL; + i--) + nulls++; + size += nulls * mp_sizeof_nil(); + } else if (parent_type == FIELD_TYPE_MAP) { + /* + * Account memory required for map key + * string when parent field type is + * FIELD_TYPE_MAP. + */ + size += mp_sizeof_str(field->token.len); + } + if (field->token.max_child_idx == -1) { + size += mp_sizeof_nil(); + } else if (field->type == FIELD_TYPE_ARRAY) { + size += mp_sizeof_array(field->token.max_child_idx); + } else if (field->type == FIELD_TYPE_MAP) { + size += mp_sizeof_map(field->token.max_child_idx); + } + format->vy_stmt_meta_size += size; + + /* Assign unique identifier for each field. */ + field->id = id++; } + /* Total amount of fields in format. */ + format->total_field_count = id; return 0; } @@ -325,6 +510,8 @@ static struct tuple_format * tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, uint32_t space_field_count, struct tuple_dictionary *dict) { + /* Size of area to store paths. */ + uint32_t paths_size = 0; uint32_t index_field_count = 0; /* find max max field no */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { @@ -334,13 +521,15 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, for (; part < pend; part++) { index_field_count = MAX(index_field_count, part->fieldno + 1); + paths_size += part->path_len; } } uint32_t field_count = MAX(space_field_count, index_field_count); - struct tuple_format *format = malloc(sizeof(struct tuple_format)); + uint32_t allocation_size = sizeof(struct tuple_format) + paths_size; + struct tuple_format *format = malloc(allocation_size); if (format == NULL) { - diag_set(OutOfMemory, sizeof(struct tuple_format), "malloc", + diag_set(OutOfMemory, allocation_size, "malloc", "tuple format"); return NULL; } @@ -354,6 +543,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, struct tuple_field *field = tuple_field_new(); if (field == NULL) goto error; + field->id = fieldno; field->token.num = fieldno; field->token.type = JSON_TOKEN_NUM; if (json_tree_add(&format->fields, &format->fields.root, @@ -373,6 +563,9 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, format->dict = dict; tuple_dictionary_ref(dict); } + format->max_path_tokens = 1; + format->total_field_count = field_count; + format->vy_stmt_meta_size = 0; format->refs = 0; format->id = FORMAT_ID_NIL; format->index_field_count = index_field_count; @@ -434,16 +627,32 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, { if (format1->exact_field_count != format2->exact_field_count) return false; - uint32_t format1_field_count = tuple_format_field_count(format1); - uint32_t format2_field_count = tuple_format_field_count(format2); - for (uint32_t i = 0; i < format1_field_count; ++i) { - const struct tuple_field *field1 = - tuple_format_field(format1, i); + struct tuple_field *field1; + struct json_token *field2_prev_token = &format2->fields.root; + struct json_token *field1_prev_token = &format1->fields.root; + json_tree_foreach_entry_preorder(field1, &format1->fields.root, + struct tuple_field, token) { +next: + /* + * While switching to the next item, it may be + * necessary to update the parents of both tree + * iterators. + */ + while (field1_prev_token != field1->token.parent) { + field1_prev_token = field1_prev_token->parent; + field2_prev_token = field2_prev_token->parent; + assert(field1_prev_token != NULL); + } + struct tuple_field *field2 = + json_tree_lookup_entry(&format2->fields, + field2_prev_token, + &field1->token, + struct tuple_field, token); /* * The field has a data type in format1, but has * no data type in format2. */ - if (i >= format2_field_count) { + if (field2 == NULL) { /* * The field can get a name added * for it, and this doesn't require a data @@ -454,13 +663,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, * NULLs or miss the subject field. */ if (field1->type == FIELD_TYPE_ANY && - tuple_field_is_nullable(field1)) - continue; - else + tuple_field_is_nullable(field1)) { + /* Skip subtree. */ + struct json_token *root = &format1->fields.root; + struct json_token *next = + json_tree_preorder_next(root, + &field1->token); + field1 = json_tree_entry_safe(next, + struct tuple_field, + token); + if (field1 == NULL) + break; + goto next; + } else { return false; + } } - const struct tuple_field *field2 = - tuple_format_field(format2, i); if (! field_type1_contains_type2(field1->type, field2->type)) return false; /* @@ -470,10 +688,28 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, if (tuple_field_is_nullable(field2) && !tuple_field_is_nullable(field1)) return false; + + field2_prev_token = &field2->token; + field1_prev_token = &field1->token; } return true; } +/** Find a field in format by offset slot. */ +static struct tuple_field * +tuple_field_by_offset_slot(const struct tuple_format *format, + int32_t offset_slot) +{ + struct tuple_field *field; + struct json_token *root = (struct json_token *)&format->fields.root; + json_tree_foreach_entry_preorder(field, root, struct tuple_field, + token) { + if (field->offset_slot == offset_slot) + return field; + } + return NULL; +} + /** * Verify that all offset_slots has been initialized in field_map. * Routine relies on the field_map memory has been filled from the @@ -488,6 +724,32 @@ tuple_field_map_validate(const struct tuple_format *format, uint32_t *field_map) for (int32_t i = -1; i >= -field_map_items; i--) { if (field_map[i] != UINT32_MAX) continue; + + struct tuple_field *field = + tuple_field_by_offset_slot(format, i); + assert(field != NULL); + /* Lookup for field number in tree. */ + const char *path = + tuple_field_json_path(format, field, &fiber()->gc); + if (path == NULL) + return -1; + + struct json_token *token = &field->token; + const char *err_msg; + if (field->token.type == JSON_TOKEN_STR) { + err_msg = tt_sprintf("invalid field \"%s\" document " + "content: map doesn't contain a " + "key '%.*s' defined in index", + path, token->len, token->str); + } else if (field->token.type == JSON_TOKEN_NUM) { + uint32_t expected_size = + token->parent->max_child_idx + 1; + err_msg = tt_sprintf("invalid field \"%s\" document " + "content: array size %d is less " + "than size %d defined in index", + path, token->num, expected_size); + } + diag_set(ClientError, ER_DATA_STRUCTURE_MISMATCH, err_msg); return -1; } return 0; @@ -504,6 +766,90 @@ mp_type_is_compatible(enum mp_type mp_type, enum field_type type, return (mask & (1U << mp_type)) != 0; } +/** + * Descriptor of the parsed msgpack frame. + * Due to the fact that the msgpack has nested structures whose + * length is stored in the frame header at the blob beginning, we + * need to be able to determine that we have finished parsing the + * current component and should move on to the next one. + * For this purpose a stack of disassembled levels is organized, + * where the type of the level, the total number of elements, + * and the number of elements that have already been parsed are + * stored. + */ +struct mp_frame { + /** JSON token type representing frame data structure. */ + enum json_token_type child_type; + /** Total count of MP members to process. */ + uint32_t total; + /** Count of MP elements that already have parseed. */ + uint32_t curr; +}; + +/** + * Emit token to analyze and do msgpack pointer shift using top + * mp_stack frame. Return 0 on success, -1 when analyse step must + * be skipped (on usuported term detection). + */ +static int +mp_frame_parse(struct mp_frame *mp_stack, uint32_t mp_stack_idx, + const char **pos, struct json_token *token) +{ + token->type = mp_stack[mp_stack_idx].child_type; + ++mp_stack[mp_stack_idx].curr; + if (token->type == JSON_TOKEN_NUM) { + token->num = mp_stack[mp_stack_idx].curr - TUPLE_INDEX_BASE; + } else if (token->type == JSON_TOKEN_STR) { + if (mp_typeof(**pos) != MP_STR) { + /* Skip key. */ + mp_next(pos); + return -1; + } + token->str = mp_decode_str(pos, (uint32_t *)&token->len); + } else { + unreachable(); + } + return 0; +} + +/** + * Prepare mp_frame for futher iterations. Store container length + * and child_type. Update parent token pointer and shift msgpack + * pointer. + */ +static int +mp_frame_prepare(struct mp_frame *mp_stack, uint32_t *mp_stack_idx, + uint32_t mp_stack_total, struct json_token *token, + const char **pos, struct json_token **parent) +{ + enum mp_type type = mp_typeof(**pos); + if (token != NULL && *mp_stack_idx + 1 < mp_stack_total && + (type == MP_MAP || type == MP_ARRAY)) { + uint32_t size = type == MP_ARRAY ? mp_decode_array(pos) : + mp_decode_map(pos); + if (size == 0) + return 0; + *parent = token; + enum json_token_type child_type = + type == MP_ARRAY ? JSON_TOKEN_NUM : JSON_TOKEN_STR; + *mp_stack_idx = *mp_stack_idx + 1; + mp_stack[*mp_stack_idx].child_type = child_type; + mp_stack[*mp_stack_idx].total = size; + mp_stack[*mp_stack_idx].curr = 0; + } else { + mp_next(pos); + while (mp_stack[*mp_stack_idx].curr >= + mp_stack[*mp_stack_idx].total) { + assert(*parent != NULL); + *parent = (*parent)->parent; + if (*mp_stack_idx == 0) + return -1; + *mp_stack_idx = *mp_stack_idx - 1; + } + } + return 0; +} + /** @sa declaration for details. */ int tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, @@ -544,12 +890,28 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, memcpy((char *)field_map - format->field_map_size, format->field_map_template, format->field_map_size); } + + struct region *region = &fiber()->gc; + uint32_t mp_stack_size = + format->max_path_tokens * sizeof(struct mp_frame); + struct mp_frame *mp_stack = region_alloc(region, mp_stack_size); + if (mp_stack == NULL) { + diag_set(OutOfMemory, mp_stack_size, "region_alloc", + "mp_stack"); + return -1; + } + mp_stack[0].child_type = JSON_TOKEN_NUM; + mp_stack[0].total = defined_field_count; + mp_stack[0].curr = 0; + uint32_t mp_stack_idx = 0; struct json_tree *tree = (struct json_tree *)&format->fields; struct json_token *parent = &tree->root; - struct json_token token; - token.type = JSON_TOKEN_NUM; - token.num = 0; - while ((uint32_t)token.num < defined_field_count) { + while (mp_stack[0].curr <= mp_stack[0].total) { + /* Prepare key for tree lookup. */ + struct json_token token; + if (mp_frame_parse(mp_stack, mp_stack_idx, &pos, &token) != 0) + goto finish_frame; + struct tuple_field *field = json_tree_lookup_entry(tree, parent, &token, struct tuple_field, token); @@ -560,10 +922,13 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, !mp_type_is_compatible(type, field->type, is_nullable) != 0) { const char *path = - tuple_field_json_path(format, field); - assert(path != NULL); - diag_set(ClientError, ER_FIELD_TYPE, path, - field_type_strs[field->type]); + tuple_field_json_path(format, field, + region); + if (path != NULL) { + diag_set(ClientError, ER_FIELD_TYPE, + path, + field_type_strs[field->type]); + } return -1; } if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { @@ -571,19 +936,16 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, (uint32_t)(pos - tuple); } } - token.num++; - mp_next(&pos); +finish_frame: + /* Prepare stack info for next iteration. */ + if (mp_frame_prepare(mp_stack, &mp_stack_idx, + format->max_path_tokens, + field != NULL ? &field->token : NULL, + &pos, &parent) != 0) + goto end; }; - if (!validate) - return 0; - int rc = tuple_field_map_validate(format, field_map); - /* - * As assert field_count >= min_field_count has already - * tested and all first-level fields has parsed, all - * offset_slots must be initialized. - */ - assert(rc == 0); - return rc; +end: + return validate ? tuple_field_map_validate(format, field_map) : 0; } uint32_t @@ -718,15 +1080,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len) return -1; } -/** - * Retrieve msgpack data by JSON path. - * @param data Pointer to msgpack with data. - * @param path The path to process. - * @param path_len The length of the @path. - * @retval 0 On success. - * @retval >0 On path parsing error, invalid character position. - */ -static int +int tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len) { int rc; @@ -825,3 +1179,33 @@ error: tt_sprintf("error in path on position %d", rc)); return -1; } + +int +tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, const char **raw) +{ + assert(part->path != NULL); + struct tuple_field *field = + tuple_format_field_by_path(format, part->fieldno, part->path, + part->path_len); + if (field != NULL) { + int32_t offset_slot = field->offset_slot; + assert(-offset_slot * sizeof(uint32_t) <= + format->field_map_size); + *raw = field_map[offset_slot] == 0 ? + NULL : data + field_map[offset_slot]; + return 0; + } + /* + * Format doesn't have field representing specified part. + * Make slow tuple parsing. + */ + *raw = tuple_field_raw(format, data, field_map, part->fieldno); + if (*raw == NULL) + return 0; + int rc = 0; + if ((rc = tuple_field_go_to_path(raw, part->path, part->path_len)) != 0) + return rc; + return 0; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index e61e271d5..8c05c3322 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -114,6 +114,8 @@ struct tuple_field { struct coll *coll; /** Collation identifier. */ uint32_t coll_id; + /** Field unique identifier in tuple_format. */ + uint32_t id; /** Link in tuple_format::fields. */ struct json_token token; }; @@ -181,6 +183,27 @@ struct tuple_format { * Shared names storage used by all formats of a space. */ struct tuple_dictionary *dict; + /** + * A maximum depth of fields subtree. This information + * is required for allocating stack for tuple parse + * context during tuple_init_field_map call. + */ + uint32_t max_path_tokens; + /** + * Total count of format fields in fields subtree. + * This information is required to allocate iov array + * argument for vy_stmt_tuple_restore_raw routine that + * is able to store extracted key iov descriptor for each + * field identified by id. + */ + uint32_t total_field_count; + /** + * Calculated size of vinyl's secondary key tuple lacking + * all leaf fields. This information is required in + * vy_stmt_new_surrogate_from_key routine to estimate + * stmt tuple size withut tree traversal. + */ + uint32_t vy_stmt_meta_size; /** * Fields comprising the format, organized in a tree. * First level nodes correspond to tuple fields. @@ -217,6 +240,25 @@ tuple_format_field(struct tuple_format *format, uint32_t fieldno) &token, struct tuple_field, token); } +/** + * Lookup field by relative JSON path and root fieldno in + * format:fields tree. +*/ +static inline struct tuple_field * +tuple_format_field_by_path(struct tuple_format *format, uint32_t fieldno, + const char *path, uint32_t path_len) +{ + uint32_t field_count = tuple_format_field_count(format); + if (fieldno >= field_count) + return NULL; + struct tuple_field *root = tuple_format_field(format, fieldno); + assert(root != NULL); + return json_tree_lookup_path_entry(&format->fields, &root->token, + path, path_len, TUPLE_INDEX_BASE, + struct tuple_field, token); +} + + extern struct tuple_format **tuple_formats; static inline uint32_t @@ -417,6 +459,18 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, return tuple_field_raw(format, tuple, field_map, fieldno); } +/** + * Retrieve msgpack data by JSON path. + * @param data Pointer to msgpack with data. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval >0 On path parsing error, invalid character position. + */ +int +tuple_field_go_to_path(const char **data, const char *path, + uint32_t path_len); + /** * Get tuple field by its path. * @param format Tuple format. @@ -436,6 +490,12 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash, const char **field); +/** Internal function, use tuple_field_by_part_raw instead. */ +int +tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, const char **raw); + /** * Get a tuple field pointed to by an index part. * @param format Tuple format. @@ -445,10 +505,19 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, * @retval Field data if the field exists or NULL. */ static inline const char * -tuple_field_by_part_raw(const struct tuple_format *format, const char *data, +tuple_field_by_part_raw(struct tuple_format *format, const char *data, const uint32_t *field_map, struct key_part *part) { - return tuple_field_raw(format, data, field_map, part->fieldno); + if (likely(part->path == NULL)) { + return tuple_field_raw(format, data, field_map, part->fieldno); + } else { + const char *raw; + MAYBE_UNUSED int rc = + tuple_field_by_part_raw_slowpath(format, data, + field_map, part, &raw); + assert(rc == 0); + return raw; + } } #if defined(__cplusplus) diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc index b394804fe..3486ce11c 100644 --- a/src/box/tuple_hash.cc +++ b/src/box/tuple_hash.cc @@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def); void tuple_hash_func_set(struct key_def *key_def) { - if (key_def->is_nullable) + if (key_def->is_nullable || key_def->has_json_paths) goto slowpath; /* * Check that key_def defines sequential a key without holes diff --git a/src/box/vinyl.c b/src/box/vinyl.c index f5b36ce14..2199ebe09 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -982,6 +982,10 @@ vinyl_index_def_change_requires_rebuild(struct index *index, return true; if (!field_type1_contains_type2(new_part->type, old_part->type)) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len, + TUPLE_INDEX_BASE) != 0) + return true; } return false; } diff --git a/src/box/vy_log.c b/src/box/vy_log.c index c9e0713c8..6fc051648 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -581,9 +581,11 @@ vy_log_record_decode(struct vy_log_record *record, record->group_id = mp_decode_uint(&pos); break; case VY_LOG_KEY_DEF: { + struct region *region = &fiber()->gc; uint32_t part_count = mp_decode_array(&pos); - struct key_part_def *parts = region_alloc(&fiber()->gc, - sizeof(*parts) * part_count); + struct key_part_def *parts = + region_alloc(region, + sizeof(*parts) * part_count); if (parts == NULL) { diag_set(OutOfMemory, sizeof(*parts) * part_count, @@ -591,7 +593,7 @@ vy_log_record_decode(struct vy_log_record *record, return -1; } if (key_def_decode_parts(parts, part_count, &pos, - NULL, 0) != 0) { + NULL, 0, region) != 0) { diag_log(); diag_set(ClientError, ER_INVALID_VYLOG_FILE, "Bad record: failed to decode " @@ -705,7 +707,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src) "struct key_part_def"); goto err; } - key_def_dump_parts(src->key_def, dst->key_parts); + if (key_def_dump_parts(src->key_def, dst->key_parts, pool) != 0) + goto err; dst->key_part_count = src->key_def->part_count; dst->key_def = NULL; } diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index ddbc2d46f..14e0c0c93 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *key, struct tuple **ret) { - assert(tuple_field_count(key) >= lsm->cmp_def->part_count); - *ret = NULL; double start_time = ev_monotonic_now(loop()); int rc = 0; diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 3e60fece9..f3f678f99 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -370,6 +370,63 @@ vy_stmt_replace_from_upsert(const struct tuple *upsert) return replace; } +/** + * Construct secondary-index tuple and initialize field_map. + * The iov[field->id] array item contains an extracted key + * for indexed field identified with unique field->id. + */ +static void +vy_stmt_tuple_restore_raw(struct tuple_format *format, char *tuple_raw, + uint32_t *field_map, char **offset, struct iovec *iov) +{ + struct tuple_field *curr; + json_tree_foreach_entry_preorder(curr, &format->fields.root, + struct tuple_field, token) { + struct json_token *curr_node = &curr->token; + enum field_type parent_type = + curr_node->parent == &format->fields.root ? FIELD_TYPE_ARRAY : + json_tree_entry(curr_node->parent, struct tuple_field, + token)->type; + if (parent_type == FIELD_TYPE_ARRAY && + curr_node->sibling_idx > 0) { + /* + * Fill unindexed array items with nulls. + * Gaps size calculated as a difference + * between sibling nodes. + */ + for (uint32_t i = curr_node->sibling_idx - 1; + curr_node->parent->children[i] == NULL && + i > 0; i--) + *offset = mp_encode_nil(*offset); + } else if (parent_type == FIELD_TYPE_MAP) { + /* Set map key. */ + const char *str = curr_node->str; + uint32_t len = curr_node->len; + *offset = mp_encode_str(*offset, str, len); + } + /* Fill data. */ + uint32_t children_count = curr_node->max_child_idx + 1; + if (curr_node->max_child_idx == -1) { + /* Leaf record. */ + if (iov[curr->id].iov_len == 0) { + *offset = mp_encode_nil(*offset); + } else { + uint32_t data_offset = *offset - tuple_raw; + int32_t slot = curr->offset_slot; + memcpy(*offset, iov[curr->id].iov_base, + iov[curr->id].iov_len); + if (slot != TUPLE_OFFSET_SLOT_NIL) + field_map[slot] = data_offset; + *offset += iov[curr->id].iov_len; + } + } else if (curr->type == FIELD_TYPE_ARRAY) { + *offset = mp_encode_array(*offset, children_count); + } else if (curr->type == FIELD_TYPE_MAP) { + *offset = mp_encode_map(*offset, children_count); + } + } +} + static struct tuple * vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, const struct key_def *cmp_def, @@ -380,26 +437,43 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, struct region *region = &fiber()->gc; uint32_t field_count = format->index_field_count; - struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count); + struct iovec *iov = + region_alloc(region, sizeof(*iov) * format->total_field_count); if (iov == NULL) { - diag_set(OutOfMemory, sizeof(*iov) * field_count, + diag_set(OutOfMemory, sizeof(*iov) * format->total_field_count, "region", "iov for surrogate key"); return NULL; } - memset(iov, 0, sizeof(*iov) * field_count); + memset(iov, 0, sizeof(*iov) * format->total_field_count); uint32_t part_count = mp_decode_array(&key); assert(part_count == cmp_def->part_count); - assert(part_count <= field_count); - uint32_t nulls_count = field_count - cmp_def->part_count; + assert(part_count <= format->total_field_count); + /** + * The format:vy_stmt_meta_size contains a size of + * stmt tuple having all leaf fields set to null. + * Calculate bsize as vy_stmt_meta_size where parts_count + * nulls replaced with extracted keys. + */ uint32_t bsize = mp_sizeof_array(field_count) + - mp_sizeof_nil() * nulls_count; + format->vy_stmt_meta_size - + mp_sizeof_nil() * part_count; for (uint32_t i = 0; i < part_count; ++i) { const struct key_part *part = &cmp_def->parts[i]; assert(part->fieldno < field_count); + struct tuple_field *field; + if (part->path != NULL) { + field = tuple_format_field_by_path(format, + part->fieldno, + part->path, + part->path_len); + } else { + field = tuple_format_field(format, part->fieldno); + } + assert(field != NULL); const char *svp = key; - iov[part->fieldno].iov_base = (char *) key; + iov[field->id].iov_base = (char *) key; mp_next(&key); - iov[part->fieldno].iov_len = key - svp; + iov[field->id].iov_len = key - svp; bsize += key - svp; } @@ -409,18 +483,11 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, char *raw = (char *) tuple_data(stmt); uint32_t *field_map = (uint32_t *) raw; + memset((char *)field_map - format->field_map_size, 0, + format->field_map_size); char *wpos = mp_encode_array(raw, field_count); - for (uint32_t i = 0; i < field_count; ++i) { - const struct tuple_field *field = tuple_format_field(format, i); - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - field_map[field->offset_slot] = wpos - raw; - if (iov[i].iov_base == NULL) { - wpos = mp_encode_nil(wpos); - } else { - memcpy(wpos, iov[i].iov_base, iov[i].iov_len); - wpos += iov[i].iov_len; - } - } + vy_stmt_tuple_restore_raw(format, raw, field_map, &wpos, iov); + assert(wpos == raw + bsize); vy_stmt_set_type(stmt, type); return stmt; diff --git a/test/box/misc.result b/test/box/misc.result index d266bb334..e4a8c0efb 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -415,6 +415,7 @@ t; 83: box.error.ROLE_EXISTS 84: box.error.CREATE_ROLE 85: box.error.INDEX_EXISTS + 86: box.error.DATA_STRUCTURE_MISMATCH 87: box.error.ROLE_LOOP 88: box.error.GRANT 89: box.error.PRIV_GRANTED diff --git a/test/engine/json.result b/test/engine/json.result new file mode 100644 index 000000000..7e7374e6f --- /dev/null +++ b/test/engine/json.result @@ -0,0 +1,450 @@ +test_run = require('test_run').new() +--- +... +engine = test_run:get_cfg('engine') +--- +... +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key + part is indexed twice' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +- error: 'Wrong index options (field 2): ''path'' must be string' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = '.FIO'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''map'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''array'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO'}, {3, 'str', path = '.FIO.fname'}}}) +--- +- error: Field [3]["FIO"] has type 'string' in one index, but type 'map' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field 3 has type 'array' in one index, but type 'map' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO....fname'}}}) +--- +- error: 'Wrong index options (field 3): invalid JSON path ''.FIO....fname'': path + has invalid structure (error at position 6)' +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +assert(idx.parts[2].path == ".FIO.fname") +--- +- true +... +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +--- +... +s:format(format) +--- +- error: Field 3 has type 'array' in one index, but type 'map' in another +... +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +--- +... +s:format(format) +--- +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '.FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Field ["data"]["FIO"]["fname"] has type 'string' in one index, but type 'number' + in another +... +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +--- +- error: 'Tuple field ["data"]["FIO"] type does not match one required by operation: + expected map' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +--- +- error: 'Tuple field ["data"]["FIO"]["fname"] type does not match one required by + operation: expected string' +... +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +--- +- error: 'Tuple doesn''t math document structure: invalid field "["data"]["FIO"]["sname"]" + document content: map doesn''t contain a key ''sname'' defined in index' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:select() +--- +- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] + - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:min() +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:max() +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +s:drop() +--- +... +s = box.schema.create_space('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[2]'} +--- +... +pk = s:create_index('pk', {parts = parts}) +--- +... +s:insert{{1, 2}, 3} +--- +- [[1, 2], 3] +... +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +--- +... +s:get(2) +--- +- [[1, 2], 8] +... +s:drop() +--- +... +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk = s:create_index('primary', { type = 'tree' }) +--- +... +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +--- +- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5] +... +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +--- +- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5] +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected + map' +... +_ = s:delete(1) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +_ = s:delete(2) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected + map' +... +_ = s:delete(4) +--- +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +--- +- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number' + in another +... +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +... +assert(idx2 ~= nil) +--- +- true +... +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +--- +... +idx:select() +--- +- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] + - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:min() +--- +- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] +... +idx:max() +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:drop() +--- +... +s:drop() +--- +... +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'str', path='[3][2].a'} +--- +... +parts[2] = {1, 'unsigned', path = '[3][1]'} +--- +... +parts[3] = {2, 'str', path = '[2].d[1]'} +--- +... +pk = s:create_index('primary', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +--- +- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, + [1, 2, 3]] +... +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +--- +- error: Duplicate key exists in unique index 'primary' in space 'withdata' +... +parts = {} +--- +... +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +--- +... +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +--- +... +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +--- +... +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +--- +- error: 'Tuple doesn''t math document structure: invalid field "[4][1]" document + content: array size 0 is less than size 4 defined in index' +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b' } +--- +... +parts[2] = {3, 'unsigned'} +--- +... +crosspart_idx = s:create_index('crosspart', { parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b'} +--- +... +num_idx = s:create_index('numeric', {parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:get(2) +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +num_idx:select() +--- +- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 9, 2, 3]] + - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], + 6, [1, 2, 3]] + - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 0]] +... +num_idx:max() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:min() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +assert(crosspart_idx:max() == num_idx:max()) +--- +- true +... +assert(crosspart_idx:min() == num_idx:min()) +--- +- true +... +trap_idx:max() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +trap_idx:min() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +--- +... +assert(pk_simplified.path == box.NULL) +--- +- true +... +idx = s:create_index('idx', {parts = {{2, 'integer', path = '.a'}}}) +--- +... +s:insert{31, {a = 1, aa = -1}} +--- +- [31, {'a': 1, 'aa': -1}] +... +s:insert{22, {a = 2, aa = -2}} +--- +- [22, {'a': 2, 'aa': -2}] +... +s:insert{13, {a = 3, aa = -3}} +--- +- [13, {'a': 3, 'aa': -3}] +... +idx:select() +--- +- - [31, {'a': 1, 'aa': -1}] + - [22, {'a': 2, 'aa': -2}] + - [13, {'a': 3, 'aa': -3}] +... +idx:alter({parts = {{2, 'integer', path = '.aa'}}}) +--- +... +idx:select() +--- +- - [13, {'a': 3, 'aa': -3}] + - [22, {'a': 2, 'aa': -2}] + - [31, {'a': 1, 'aa': -1}] +... +s:drop() +--- +... +-- incompatible format change +s = box.schema.space.create('test') +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +--- +... +s:insert{{-1}} +--- +- [[-1]] +... +i:alter{parts = {{1, 'string', path = '[1]'}}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + string' +... +s:insert{{'a'}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + integer' +... +i:drop() +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +--- +... +s:insert{{{FIO=-1}}} +--- +- [[{'FIO': -1}]] +... +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + array' +... +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +--- +- error: 'Tuple field [1][1]["FIO"] type does not match one required by operation: + expected array' +... +s:drop() +--- +... +engine = nil +--- +... +test_run = nil +--- +... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua new file mode 100644 index 000000000..50759a5b1 --- /dev/null +++ b/test/engine/json.test.lua @@ -0,0 +1,129 @@ +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +s = box.schema.space.create('withdata', {engine = engine}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = '.FIO'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO'}, {3, 'str', path = '.FIO.fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO....fname'}}}) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '.FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}}) +assert(idx ~= nil) +assert(idx.parts[2].path == ".FIO.fname") +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +s:format(format) +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +s:format(format) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '.FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +idx:select() +idx:min() +idx:max() +s:drop() + +s = box.schema.create_space('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'unsigned', path='[2]'} +pk = s:create_index('pk', {parts = parts}) +s:insert{{1, 2}, 3} +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +s:get(2) +s:drop() + +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +pk = s:create_index('primary', { type = 'tree' }) +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(1) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(2) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(4) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +assert(idx ~= nil) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +assert(idx2 ~= nil) +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +idx:select() +idx:min() +idx:max() +idx:drop() +s:drop() + +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'str', path='[3][2].a'} +parts[2] = {1, 'unsigned', path = '[3][1]'} +parts[3] = {2, 'str', path = '[2].d[1]'} +pk = s:create_index('primary', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +parts = {} +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b' } +parts[2] = {3, 'unsigned'} +crosspart_idx = s:create_index('crosspart', { parts = parts}) +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b'} +num_idx = s:create_index('numeric', {parts = parts}) +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +num_idx:get(2) +num_idx:select() +num_idx:max() +num_idx:min() +assert(crosspart_idx:max() == num_idx:max()) +assert(crosspart_idx:min() == num_idx:min()) +trap_idx:max() +trap_idx:min() +s:drop() + +s = box.schema.space.create('withdata', {engine = engine}) +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +assert(pk_simplified.path == box.NULL) +idx = s:create_index('idx', {parts = {{2, 'integer', path = '.a'}}}) +s:insert{31, {a = 1, aa = -1}} +s:insert{22, {a = 2, aa = -2}} +s:insert{13, {a = 3, aa = -3}} +idx:select() +idx:alter({parts = {{2, 'integer', path = '.aa'}}}) +idx:select() +s:drop() + +-- incompatible format change +s = box.schema.space.create('test') +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +s:insert{{-1}} +i:alter{parts = {{1, 'string', path = '[1]'}}} +s:insert{{'a'}} +i:drop() +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +s:insert{{{FIO=-1}}} +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +s:drop() + +engine = nil +test_run = nil + -- 2.19.2