From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v7 1/5] box: introduce JSON Indexes Date: Wed, 9 Jan 2019 11:29:36 +0300 Message-Id: <90b211d01c5a7af0e5b3015c1a33b0b27d432ab0.1547022001.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov List-ID: New JSON indexes allows to index documents content. At first, introduced new key_part fields path and path_len representing JSON path string specified by user. Modified tuple_format_use_key_part routine constructs corresponding tuple_fields chain in tuple_format:fields tree to indexed data. The resulting tree is used for type checking and for alloctating indexed fields offset slots. Refined tuple_init_field_map routine logic parses tuple msgpack in depth using stack allocated on region and initialize field map with corresponding tuple_format:field if any. This stack is necessary as mp-container(map or array) length is specified at the frame beginning, but this information is also required to determine mp-container end. The other essential feature is vinyl's secondary key restored by key_part (stmt) extracted keys loaded from disc. New tuple_format_stmt_encode would traverse tuple_format:fields tree and construct vy_stmt data using iov's array to place data blobs for indexed leafs. Introduced vy_stmt_meta_size - precalculated stmt size as if all leaf fields are zero. It allows allocate stmt chunk without extra traversing a tree. Example: To create a new JSON index specify path to document data as a part of key_part: parts = {{3, 'str', path = '.FIO.fname', is_nullable = false}} idx = s:create_index('json_idx', {parts = parse}) idx:select("Ivanov") Part of #1012 --- src/box/alter.cc | 2 +- src/box/index_def.c | 10 +- src/box/key_def.c | 166 +++++++++++-- src/box/key_def.h | 33 ++- src/box/lua/space.cc | 5 + src/box/memtx_engine.c | 4 + src/box/schema_def.h | 1 + src/box/sql.c | 1 + src/box/sql/build.c | 1 + src/box/sql/select.c | 3 +- src/box/sql/where.c | 1 + src/box/tuple_compare.cc | 7 +- src/box/tuple_extract_key.cc | 26 +- src/box/tuple_format.c | 463 +++++++++++++++++++++++++++++------ src/box/tuple_format.h | 70 +++++- src/box/tuple_hash.cc | 2 +- src/box/vinyl.c | 4 + src/box/vy_log.c | 11 +- src/box/vy_point_lookup.c | 2 - src/box/vy_stmt.c | 49 ++-- src/lib/json/json.c | 7 +- src/lib/json/json.h | 16 ++ test/engine/json.result | 448 +++++++++++++++++++++++++++++++++ test/engine/json.test.lua | 129 ++++++++++ 24 files changed, 1321 insertions(+), 140 deletions(-) create mode 100644 test/engine/json.result create mode 100644 test/engine/json.test.lua diff --git a/src/box/alter.cc b/src/box/alter.cc index 0589c9678..9656a4189 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -268,7 +268,7 @@ index_def_new_from_tuple(struct tuple *tuple, struct space *space) }); if (key_def_decode_parts(part_def, part_count, &parts, space->def->fields, - space->def->field_count) != 0) + space->def->field_count, &fiber()->gc) != 0) diag_raise(); key_def = key_def_new(part_def, part_count); if (key_def == NULL) diff --git a/src/box/index_def.c b/src/box/index_def.c index 2ba57ee9d..58137ed07 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -31,6 +31,8 @@ #include "index_def.h" #include "schema_def.h" #include "identifier.h" +#include "tuple_format.h" +#include "json/json.h" const char *index_type_strs[] = { "HASH", "TREE", "BITSET", "RTREE" }; @@ -278,8 +280,12 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) * Courtesy to a user who could have made * a typo. */ - if (index_def->key_def->parts[i].fieldno == - index_def->key_def->parts[j].fieldno) { + struct key_part *part_a = &index_def->key_def->parts[i]; + struct key_part *part_b = &index_def->key_def->parts[j]; + if (part_a->fieldno == part_b->fieldno && + json_path_cmp(part_a->path, part_a->path_len, + part_b->path, part_b->path_len, + TUPLE_INDEX_BASE) == 0){ diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, "same key part is indexed twice"); diff --git a/src/box/key_def.c b/src/box/key_def.c index dae3580e2..3012b05df 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "json/json.h" #include "key_def.h" #include "tuple_compare.h" #include "tuple_extract_key.h" @@ -35,6 +36,7 @@ #include "column_mask.h" #include "schema_def.h" #include "coll_id_cache.h" +#include "small/region.h" const char *sort_order_strs[] = { "asc", "desc", "undef" }; @@ -44,7 +46,8 @@ const struct key_part_def key_part_def_default = { COLL_NONE, false, ON_CONFLICT_ACTION_DEFAULT, - SORT_ORDER_ASC + SORT_ORDER_ASC, + NULL }; static int64_t @@ -59,6 +62,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len) #define PART_OPT_NULLABILITY "is_nullable" #define PART_OPT_NULLABLE_ACTION "nullable_action" #define PART_OPT_SORT_ORDER "sort_order" +#define PART_OPT_PATH "path" const struct opt_def part_def_reg[] = { OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type, @@ -71,19 +75,30 @@ const struct opt_def part_def_reg[] = { struct key_part_def, nullable_action, NULL), OPT_DEF_ENUM(PART_OPT_SORT_ORDER, sort_order, struct key_part_def, sort_order, NULL), + OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path), OPT_END, }; struct key_def * key_def_dup(const struct key_def *src) { - size_t sz = key_def_sizeof(src->part_count); - struct key_def *res = (struct key_def *)malloc(sz); + size_t sz = 0; + for (uint32_t i = 0; i < src->part_count; i++) + sz += src->parts[i].path_len; + sz = key_def_sizeof(src->part_count, sz); + struct key_def *res = (struct key_def *)calloc(1, sz); if (res == NULL) { diag_set(OutOfMemory, sz, "malloc", "res"); return NULL; } memcpy(res, src, sz); + /* Update paths to point to the new memory chunk.*/ + for (uint32_t i = 0; i < src->part_count; i++) { + if (src->parts[i].path == NULL) + continue; + size_t path_offset = src->parts[i].path - (char *)src; + res->parts[i].path = (char *)res + path_offset; + } return res; } @@ -91,8 +106,16 @@ void key_def_swap(struct key_def *old_def, struct key_def *new_def) { assert(old_def->part_count == new_def->part_count); - for (uint32_t i = 0; i < new_def->part_count; i++) + for (uint32_t i = 0; i < new_def->part_count; i++) { SWAP(old_def->parts[i], new_def->parts[i]); + /* + * Paths are allocated as a part of key_def so + * we need to swap path pointers back - it's OK + * as paths aren't supposed to change. + */ + assert(old_def->parts[i].path_len == new_def->parts[i].path_len); + SWAP(old_def->parts[i].path, new_def->parts[i].path); + } SWAP(*old_def, *new_def); } @@ -115,24 +138,39 @@ static void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, enum on_conflict_action nullable_action, struct coll *coll, uint32_t coll_id, - enum sort_order sort_order) + enum sort_order sort_order, const char *path, + uint32_t path_len, char **paths) { assert(part_no < def->part_count); assert(type < field_type_MAX); def->is_nullable |= (nullable_action == ON_CONFLICT_ACTION_NONE); + def->has_json_paths |= path != NULL; def->parts[part_no].nullable_action = nullable_action; def->parts[part_no].fieldno = fieldno; def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; def->parts[part_no].sort_order = sort_order; + if (path != NULL) { + assert(paths != NULL); + def->parts[part_no].path = *paths; + *paths += path_len; + memcpy(def->parts[part_no].path, path, path_len); + def->parts[part_no].path_len = path_len; + } else { + def->parts[part_no].path = NULL; + def->parts[part_no].path_len = 0; + } column_mask_set_fieldno(&def->column_mask, fieldno); } struct key_def * key_def_new(const struct key_part_def *parts, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + ssize_t sz = 0; + for (uint32_t i = 0; i < part_count; i++) + sz += parts[i].path != NULL ? strlen(parts[i].path) : 0; + sz = key_def_sizeof(part_count, sz); struct key_def *def = calloc(1, sz); if (def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -142,6 +180,8 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) def->part_count = part_count; def->unique_part_count = part_count; + /* Paths data in key_def chunk. */ + char *paths = (char *)def + key_def_sizeof(part_count, 0); for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; struct coll *coll = NULL; @@ -155,16 +195,18 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) } coll = coll_id->coll; } + uint32_t path_len = part->path != NULL ? strlen(part->path) : 0; key_def_set_part(def, i, part->fieldno, part->type, part->nullable_action, coll, part->coll_id, - part->sort_order); + part->sort_order, part->path, path_len, &paths); } key_def_set_cmp(def); return def; } -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) +int +key_def_dump_parts(const struct key_def *def, struct key_part_def *parts, + struct region *region) { for (uint32_t i = 0; i < def->part_count; i++) { const struct key_part *part = &def->parts[i]; @@ -174,13 +216,27 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) part_def->is_nullable = key_part_is_nullable(part); part_def->nullable_action = part->nullable_action; part_def->coll_id = part->coll_id; + if (part->path != NULL) { + char *path = region_alloc(region, part->path_len + 1); + if (path == NULL) { + diag_set(OutOfMemory, part->path_len + 1, + "region_alloc", "part_def->path"); + return -1; + } + memcpy(path, part->path, part->path_len); + path[part->path_len] = '\0'; + part_def->path = path; + } else { + part_def->path = NULL; + } } + return 0; } box_key_def_t * box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + size_t sz = key_def_sizeof(part_count, 0); struct key_def *key_def = calloc(1, sz); if (key_def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -194,7 +250,8 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) key_def_set_part(key_def, item, fields[item], (enum field_type)types[item], ON_CONFLICT_ACTION_DEFAULT, - NULL, COLL_NONE, SORT_ORDER_ASC); + NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0, + NULL); } key_def_set_cmp(key_def); return key_def; @@ -243,6 +300,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, if (key_part_is_nullable(part1) != key_part_is_nullable(part2)) return key_part_is_nullable(part1) < key_part_is_nullable(part2) ? -1 : 1; + int rc = json_path_cmp(part1->path, part1->path_len, + part2->path, part2->path_len, + TUPLE_INDEX_BASE); + if (rc != 0) + return rc; } return part_count1 < part_count2 ? -1 : part_count1 > part_count2; } @@ -274,8 +336,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts, for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; assert(part->type < field_type_MAX); - SNPRINT(total, snprintf, buf, size, "%d, '%s'", - (int)part->fieldno, field_type_strs[part->type]); + if (part->path != NULL) { + SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'", + (int)part->fieldno, field_type_strs[part->type], + part->path); + } else { + SNPRINT(total, snprintf, buf, size, "%d, '%s'", + (int)part->fieldno, + field_type_strs[part->type]); + } if (i < part_count - 1) SNPRINT(total, snprintf, buf, size, ", "); } @@ -294,6 +363,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; size += mp_sizeof_map(count); size += mp_sizeof_str(strlen(PART_OPT_FIELD)); size += mp_sizeof_uint(part->fieldno); @@ -308,6 +379,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY)); size += mp_sizeof_bool(part->is_nullable); } + if (part->path != NULL) { + size += mp_sizeof_str(strlen(PART_OPT_PATH)); + size += mp_sizeof_str(strlen(part->path)); + } } return size; } @@ -323,6 +398,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; data = mp_encode_map(data, count); data = mp_encode_str(data, PART_OPT_FIELD, strlen(PART_OPT_FIELD)); @@ -342,6 +419,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, strlen(PART_OPT_NULLABILITY)); data = mp_encode_bool(data, part->is_nullable); } + if (part->path != NULL) { + data = mp_encode_str(data, PART_OPT_PATH, + strlen(PART_OPT_PATH)); + data = mp_encode_str(data, part->path, + strlen(part->path)); + } } return data; } @@ -403,6 +486,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -410,7 +494,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, int key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char **data, const struct field_def *fields, - uint32_t field_count) + uint32_t field_count, struct region *region) { if (mp_typeof(**data) == MP_ARRAY) { return key_def_decode_parts_166(parts, part_count, data, @@ -439,7 +523,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char *key = mp_decode_str(data, &key_len); if (opts_parse_key(part, part_def_reg, key, key_len, data, ER_WRONG_INDEX_OPTIONS, - i + TUPLE_INDEX_BASE, NULL, + i + TUPLE_INDEX_BASE, region, false) != 0) return -1; if (is_action_missing && @@ -485,6 +569,27 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, "index part: unknown sort order"); return -1; } + if (part->path != NULL) { + uint32_t path_len = strlen(part->path); + if (path_len > BOX_JSON_PATH_MAX) { + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + "JSON path is too long"); + return -1; + } + int rc = json_path_validate(part->path, path_len, + TUPLE_INDEX_BASE); + if (rc != 0) { + const char *err_msg = + tt_sprintf("invalid JSON path '%s': " + "error in path on " + "position %d", part->path, + rc); + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno + TUPLE_INDEX_BASE, + err_msg); + return -1; + } + } } return 0; } @@ -504,7 +609,10 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find) const struct key_part *part = key_def->parts; const struct key_part *end = part + key_def->part_count; for (; part != end; part++) { - if (part->fieldno == to_find->fieldno) + if (part->fieldno == to_find->fieldno && + json_path_cmp(part->path, part->path_len, + to_find->path, to_find->path_len, + TUPLE_INDEX_BASE) == 0) return part; } return NULL; @@ -530,18 +638,25 @@ key_def_merge(const struct key_def *first, const struct key_def *second) * Find and remove part duplicates, i.e. parts counted * twice since they are present in both key defs. */ - const struct key_part *part = second->parts; - const struct key_part *end = part + second->part_count; + size_t sz = 0; + const struct key_part *part = first->parts; + const struct key_part *end = part + first->part_count; + for (; part != end; part++) + sz += part->path_len; + part = second->parts; + end = part + second->part_count; for (; part != end; part++) { if (key_def_find(first, part) != NULL) --new_part_count; + else + sz += part->path_len; } + sz = key_def_sizeof(new_part_count, sz); struct key_def *new_def; - new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count)); + new_def = (struct key_def *)calloc(1, sz); if (new_def == NULL) { - diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc", - "new_def"); + diag_set(OutOfMemory, sz, "malloc", "new_def"); return NULL; } new_def->part_count = new_part_count; @@ -549,6 +664,9 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->is_nullable = first->is_nullable || second->is_nullable; new_def->has_optional_parts = first->has_optional_parts || second->has_optional_parts; + + /* Paths data in the new key_def chunk. */ + char *paths = (char *)new_def + key_def_sizeof(new_part_count, 0); /* Write position in the new key def. */ uint32_t pos = 0; /* Append first key def's parts to the new index_def. */ @@ -557,7 +675,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len, &paths); } /* Set-append second key def's part to the new key def. */ @@ -568,7 +687,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second) continue; key_def_set_part(new_def, pos++, part->fieldno, part->type, part->nullable_action, part->coll, - part->coll_id, part->sort_order); + part->coll_id, part->sort_order, part->path, + part->path_len, &paths); } key_def_set_cmp(new_def); return new_def; diff --git a/src/box/key_def.h b/src/box/key_def.h index d1866303b..c6b7a8c74 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -64,6 +64,11 @@ struct key_part_def { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** + * JSON path to indexed data, relative to the field number, + * or NULL if this key part indexes a top-level field. + */ + const char *path; }; extern const struct key_part_def key_part_def_default; @@ -82,6 +87,15 @@ struct key_part { enum on_conflict_action nullable_action; /** Part sort order. */ enum sort_order sort_order; + /** + * JSON path to indexed data, relative to the field number, + * or NULL if this key part indexes a top-level field. + * This sting is not 0-terminated. Memory is allocated + * at the end of key_def chunk. + */ + char *path; + /** The length of JSON path. */ + uint32_t path_len; }; struct key_def; @@ -148,6 +162,8 @@ struct key_def { uint32_t unique_part_count; /** True, if at least one part can store NULL. */ bool is_nullable; + /** True, if some key part has JSON path. */ + bool has_json_paths; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -241,9 +257,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b, /** \endcond public */ static inline size_t -key_def_sizeof(uint32_t part_count) +key_def_sizeof(uint32_t part_count, uint32_t paths_size) { - return sizeof(struct key_def) + sizeof(struct key_part) * part_count; + return sizeof(struct key_def) + sizeof(struct key_part) * part_count + + paths_size; } /** @@ -255,9 +272,13 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count); /** * Dump part definitions of the given key def. + * Region is required to make allocations for JSON paths when some + * path present. JSON path strings are 0-terminated. + * Return -1 on memory allocation error, 0 on success. */ -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); +int +key_def_dump_parts(const struct key_def *def, struct key_part_def *parts, + struct region *region); /** * Update 'has_optional_parts' of @a key_def with correspondence @@ -303,7 +324,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, int key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, const char **data, const struct field_def *fields, - uint32_t field_count); + uint32_t field_count, struct region *region); /** * Returns the part in index_def->parts for the specified fieldno. @@ -364,6 +385,8 @@ key_validate_parts(const struct key_def *key_def, const char *key, static inline bool key_def_is_sequential(const struct key_def *key_def) { + if (key_def->has_json_paths) + return false; for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) { if (key_def->parts[part_id].fieldno != part_id) return false; diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc index 7cae436f1..1f152917e 100644 --- a/src/box/lua/space.cc +++ b/src/box/lua/space.cc @@ -296,6 +296,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i) lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE); lua_setfield(L, -2, "fieldno"); + if (part->path != NULL) { + lua_pushlstring(L, part->path, part->path_len); + lua_setfield(L, -2, "path"); + } + lua_pushboolean(L, key_part_is_nullable(part)); lua_setfield(L, -2, "is_nullable"); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 5cf70ab94..2cae791e1 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -1317,6 +1317,10 @@ memtx_index_def_change_requires_rebuild(struct index *index, return true; if (old_part->coll != new_part->coll) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len, + TUPLE_INDEX_BASE) != 0) + return true; } return false; } diff --git a/src/box/schema_def.h b/src/box/schema_def.h index a760ecc3f..b7a9d3284 100644 --- a/src/box/schema_def.h +++ b/src/box/schema_def.h @@ -44,6 +44,7 @@ enum { BOX_INDEX_MAX = 128, BOX_NAME_MAX = 65000, BOX_INVALID_NAME_MAX = 64, + BOX_JSON_PATH_MAX = 512, ENGINE_NAME_MAX = 16, FIELD_TYPE_NAME_MAX = 16, GRANT_NAME_MAX = 16, diff --git a/src/box/sql.c b/src/box/sql.c index 8c7607d84..c54a0c0ce 100644 --- a/src/box/sql.c +++ b/src/box/sql.c @@ -380,6 +380,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info) part->nullable_action = ON_CONFLICT_ACTION_NONE; part->is_nullable = true; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; if (def != NULL && i < def->part_count) part->coll_id = def->parts[i].coll_id; else diff --git a/src/box/sql/build.c b/src/box/sql/build.c index 49b90b5d0..947daf8f6 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -2185,6 +2185,7 @@ index_fill_def(struct Parse *parse, struct index *index, part->is_nullable = part->nullable_action == ON_CONFLICT_ACTION_NONE; part->sort_order = SORT_ORDER_ASC; part->coll_id = coll_id; + part->path = NULL; } key_def = key_def_new(key_parts, expr_list->nExpr); if (key_def == NULL) diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 02ee225f1..3f136a342 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1360,6 +1360,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count) part->is_nullable = false; part->nullable_action = ON_CONFLICT_ACTION_ABORT; part->sort_order = SORT_ORDER_ASC; + part->path = NULL; } return key_info; } @@ -1377,7 +1378,7 @@ sql_key_info_new_from_key_def(sqlite3 *db, const struct key_def *key_def) key_info->key_def = NULL; key_info->refs = 1; key_info->part_count = key_def->part_count; - key_def_dump_parts(key_def, key_info->parts); + key_def_dump_parts(key_def, key_info->parts, NULL); return key_info; } diff --git a/src/box/sql/where.c b/src/box/sql/where.c index 571b5af78..814bd3926 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -2807,6 +2807,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */ part.is_nullable = false; part.sort_order = SORT_ORDER_ASC; part.coll_id = COLL_NONE; + part.path = NULL; struct key_def *key_def = key_def_new(&part, 1); if (key_def == NULL) { diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 3fe4cae32..7ab6e3bf6 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); - if (key_def->part_count == 1 && part->fieldno == 0) { + if (key_def->part_count == 1 && part->fieldno == 0 && + part->path == NULL) { /* * First field can not be optional - empty tuples * can not exist. @@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) { @@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]); diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index ac8b5a44e..c40d7887d 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i) { uint32_t fieldno1 = def->parts[i].fieldno + 1; uint32_t fieldno2 = def->parts[i + 1].fieldno; - return fieldno1 == fieldno2; + return fieldno1 == fieldno2 && def->parts[i].path == NULL && + def->parts[i + 1].path == NULL; } /** True, if a key con contain two or more parts in sequence. */ @@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, if (!key_def_parts_are_sequential(key_def, i)) break; } - uint32_t end_fieldno = key_def->parts[i].fieldno; + const struct key_part *part = &key_def->parts[i]; + uint32_t end_fieldno = part->fieldno; if (fieldno < current_fieldno) { /* Rewind. */ @@ -283,6 +285,22 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, current_fieldno++; } } + const char *field_last, *field_end_last; + if (part->path != NULL) { + field_last = field; + field_end_last = field_end; + MAYBE_UNUSED int rc = + tuple_field_go_to_path(&field, part->path, + part->path_len); + /* + * All tuples must be valid as all + * integrity checks has already been + * passed. + */ + assert(rc == 0); + field_end = field; + mp_next(&field_end); + } memcpy(key_buf, field, field_end - field); key_buf += field_end - field; if (has_optional_parts && null_count != 0) { @@ -291,6 +309,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, } else { assert(key_buf - key <= data_end - data); } + if (part->path != NULL) { + field = field_last; + field_end = field_end_last; + } } if (key_size != NULL) *key_size = (uint32_t)(key_buf - key); diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index e11b4e6f3..c81c23fd1 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include #include "bit/bit.h" #include "fiber.h" #include "json/json.h" @@ -66,12 +67,88 @@ tuple_field_delete(struct tuple_field *field) /** Return path to a tuple field. Used for error reporting. */ static const char * -tuple_field_path(const struct tuple_field *field) +tuple_field_path(const struct tuple_field *field, bool json_only) { assert(field->token.parent != NULL); - assert(field->token.parent->parent == NULL); - assert(field->token.type == JSON_TOKEN_NUM); - return int2str(field->token.num + TUPLE_INDEX_BASE); + char *path; + if (!json_only && field->token.parent->type == JSON_TOKEN_END) { + assert(field->token.type == JSON_TOKEN_NUM); + path = int2str(field->token.num + TUPLE_INDEX_BASE); + } else { + path = tt_static_buf(); + MAYBE_UNUSED int rc = + json_tree_snprint_path(path, TT_STATIC_BUF_LEN, + &field->token, TUPLE_INDEX_BASE); + assert(rc > 0 && rc < TT_STATIC_BUF_LEN); + } + return path; +} + +/** + * Add corresponding format:fields for specified JSON path. + * Return a pointer to the leaf field on success, NULL on memory + * allocation error or type/nullability mistmatch error, diag + * message is set. + */ +static struct tuple_field * +tuple_field_tree_add_path(struct tuple_format *format, const char *path, + uint32_t path_len, uint32_t fieldno) +{ + int rc = 0; + struct json_tree *tree = &format->fields; + struct tuple_field *parent = tuple_format_field(format, fieldno); + struct tuple_field *field = tuple_field_new(); + if (field == NULL) + goto fail; + + struct json_lexer lexer; + uint32_t token_count = 0; + json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE); + while ((rc = json_lexer_next_token(&lexer, &field->token)) == 0 && + field->token.type != JSON_TOKEN_END) { + enum field_type expected_type = + field->token.type == JSON_TOKEN_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (field_type1_contains_type2(parent->type, expected_type)) { + parent->type = expected_type; + } else if (!field_type1_contains_type2(expected_type, + parent->type)) { + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tuple_field_path(parent, false), + field_type_strs[parent->type], + field_type_strs[expected_type]); + goto fail; + } + struct tuple_field *next = + json_tree_lookup_entry(tree, &parent->token, + &field->token, + struct tuple_field, token); + if (next == NULL) { + rc = json_tree_add(tree, &parent->token, &field->token); + if (rc != 0) { + diag_set(OutOfMemory, sizeof(struct json_token), + "json_tree_add", "tree"); + goto fail; + } + next = field; + field = tuple_field_new(); + if (field == NULL) + goto fail; + } + parent = next; + token_count++; + } + /* Path has been verified key_def_decode_parts. */ + assert(rc == 0 && field->token.type == JSON_TOKEN_END); + assert(parent != NULL); + /* Update tree depth information. */ + format->max_path_tokens = MAX(format->max_path_tokens, token_count + 1); +end: + tuple_field_delete(field); + return parent; +fail: + parent = NULL; + goto end; } /** @@ -95,10 +172,25 @@ tuple_format_field_by_id(struct tuple_format *format, uint32_t id) static int tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, const struct key_part *part, bool is_sequential, - int *current_slot) + int *current_slot, char **paths) { assert(part->fieldno < tuple_format_field_count(format)); - struct tuple_field *field = tuple_format_field(format, part->fieldno); + struct tuple_field *field; + if (part->path == NULL) { + field = tuple_format_field(format, part->fieldno); + } else { + assert(!is_sequential); + /** + * Copy JSON path data to reserved area at the + * end of format allocation. + */ + memcpy(*paths, part->path, part->path_len); + field = tuple_field_tree_add_path(format, *paths, part->path_len, + part->fieldno); + if (field == NULL) + return -1; + *paths += part->path_len; + } /* * If a field is not present in the space format, * inherit nullable action of the first key part @@ -124,7 +216,7 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, field->nullable_action = part->nullable_action; } else if (field->nullable_action != part->nullable_action) { diag_set(ClientError, ER_ACTION_MISMATCH, - tuple_field_path(field), + tuple_field_path(field, false), on_conflict_action_strs[field->nullable_action], on_conflict_action_strs[part->nullable_action]); return -1; @@ -146,7 +238,7 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, errcode = ER_FORMAT_MISMATCH_INDEX_PART; else errcode = ER_INDEX_PART_TYPE_MISMATCH; - diag_set(ClientError, errcode, tuple_field_path(field), + diag_set(ClientError, errcode, tuple_field_path(field, false), field_type_strs[field->type], field_type_strs[part->type]); return -1; @@ -158,13 +250,93 @@ tuple_format_use_key_part(struct tuple_format *format, uint32_t field_count, * simply accessible, so we don't store an offset for it. */ if (field->offset_slot == TUPLE_OFFSET_SLOT_NIL && - is_sequential == false && part->fieldno > 0) { + is_sequential == false && + (part->fieldno > 0 || part->path != NULL)) { *current_slot = *current_slot - 1; field->offset_slot = *current_slot; } return 0; } +/** + * Get format:field parent field_type. + * This routine is required as first-level fields has no parent + * field so it could not be retrieved with json_tree_entry. + */ +static enum field_type +tuple_format_field_parent_type(struct tuple_format *format, + struct tuple_field *field) +{ + struct json_token *parent = field->token.parent; + if (parent == &format->fields.root) + return FIELD_TYPE_ARRAY; + return json_tree_entry(parent, struct tuple_field, token)->type; +} + +uint32_t +tuple_format_stmt_encode(struct tuple_format *format, char **offset, + char *tuple_raw, uint32_t *field_map, + struct iovec *iov) +{ + bool write = offset != NULL; + uint32_t size = 0; + struct tuple_field *field; + json_tree_foreach_entry_preorder(field, &format->fields.root, + struct tuple_field, token) { + enum field_type parent_type = + tuple_format_field_parent_type(format, field); + if (parent_type == FIELD_TYPE_ARRAY && + field->token.sibling_idx > 0) { + /* + * Write nil istead of omitted array + * members. + */ + struct json_token **neighbors = + field->token.parent->children; + for (uint32_t i = field->token.sibling_idx - 1; + neighbors[i] == NULL && i > 0; i--) { + if (write) + *offset = mp_encode_nil(*offset); + size += mp_sizeof_nil(); + } + } else if (parent_type == FIELD_TYPE_MAP) { + /* Write map key string. */ + const char *str = field->token.str; + uint32_t len = field->token.len; + if (write) + *offset = mp_encode_str(*offset, str, len); + size += mp_sizeof_str(len); + } + /* Fill data. */ + uint32_t children_cnt = field->token.max_child_idx + 1; + if (json_token_is_leaf(&field->token)) { + if (!write || iov[field->id].iov_len == 0) { + if (write) + *offset = mp_encode_nil(*offset); + size += mp_sizeof_nil(); + } else { + memcpy(*offset, iov[field->id].iov_base, + iov[field->id].iov_len); + uint32_t data_offset = *offset - tuple_raw; + int32_t slot = field->offset_slot; + if (slot != TUPLE_OFFSET_SLOT_NIL) + field_map[slot] = data_offset; + *offset += iov[field->id].iov_len; + size += iov[field->id].iov_len; + } + } else if (field->type == FIELD_TYPE_ARRAY) { + if (write) + *offset = mp_encode_array(*offset, children_cnt); + size += mp_sizeof_array(children_cnt); + } else if (field->type == FIELD_TYPE_MAP) { + if (write) + *offset = mp_encode_map(*offset, children_cnt); + size += mp_sizeof_map(children_cnt); + } + } + return size; +} + /** * Extract all available type info from keys and field * definitions. @@ -203,6 +375,11 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, int current_slot = 0; + /* + * Set pointer to reserved area in the format chunk + * allocated with tuple_format_alloc call. + */ + char *paths = (char *)format + sizeof(struct tuple_format); /* extract field type info */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { const struct key_def *key_def = keys[key_no]; @@ -213,7 +390,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, for (; part < parts_end; part++) { if (tuple_format_use_key_part(format, field_count, part, is_sequential, - ¤t_slot) != 0) + ¤t_slot, + &paths) != 0) return -1; } } @@ -236,9 +414,12 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, "malloc", "required field bitmap"); return -1; } + uint32_t id = 0; struct tuple_field *field; json_tree_foreach_entry_preorder(field, &format->fields.root, struct tuple_field, token) { + /* Set the unique field identifier. */ + field->id = id++; /* * Mark all leaf non-nullable fields as required * by setting the corresponding bit in the bitmap @@ -248,6 +429,10 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, !tuple_field_is_nullable(field)) bit_set(format->required_fields, field->id); } + /* Update format metadate for a new format:fields tree. */ + format->total_field_count = id; + format->vy_stmt_size = tuple_format_stmt_encode(format, NULL, NULL, + NULL, NULL); return 0; } @@ -317,6 +502,8 @@ static struct tuple_format * tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, uint32_t space_field_count, struct tuple_dictionary *dict) { + /* Size of area to store paths. */ + uint32_t paths_size = 0; uint32_t index_field_count = 0; /* find max max field no */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { @@ -326,13 +513,15 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, for (; part < pend; part++) { index_field_count = MAX(index_field_count, part->fieldno + 1); + paths_size += part->path_len; } } uint32_t field_count = MAX(space_field_count, index_field_count); - struct tuple_format *format = malloc(sizeof(struct tuple_format)); + uint32_t allocation_size = sizeof(struct tuple_format) + paths_size; + struct tuple_format *format = malloc(allocation_size); if (format == NULL) { - diag_set(OutOfMemory, sizeof(struct tuple_format), "malloc", + diag_set(OutOfMemory, allocation_size, "malloc", "tuple format"); return NULL; } @@ -346,7 +535,6 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, struct tuple_field *field = tuple_field_new(); if (field == NULL) goto error; - field->id = fieldno; field->token.num = fieldno; field->token.type = JSON_TOKEN_NUM; if (json_tree_add(&format->fields, &format->fields.root, @@ -368,6 +556,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, } format->total_field_count = field_count; format->required_fields = NULL; + format->max_path_tokens = 1; + format->vy_stmt_size = UINT32_MAX; format->refs = 0; format->id = FORMAT_ID_NIL; format->index_field_count = index_field_count; @@ -428,15 +618,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, { if (format1->exact_field_count != format2->exact_field_count) return false; - uint32_t format1_field_count = tuple_format_field_count(format1); - uint32_t format2_field_count = tuple_format_field_count(format2); - for (uint32_t i = 0; i < format1_field_count; ++i) { - struct tuple_field *field1 = tuple_format_field(format1, i); + struct tuple_field *field1; + json_tree_foreach_entry_preorder(field1, &format1->fields.root, + struct tuple_field, token) { +next:; + const char *path = tuple_field_path(field1, true); + struct tuple_field *field2 = + json_tree_lookup_path_entry(&format2->fields, + &format2->fields.root, + path, strlen(path), + TUPLE_INDEX_BASE, + struct tuple_field, token); /* * The field has a data type in format1, but has * no data type in format2. */ - if (i >= format2_field_count) { + if (field2 == NULL) { /* * The field can get a name added * for it, and this doesn't require a data @@ -447,12 +644,22 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, * NULLs or miss the subject field. */ if (field1->type == FIELD_TYPE_ANY && - tuple_field_is_nullable(field1)) - continue; - else + tuple_field_is_nullable(field1)) { + /* Skip subtree. */ + struct json_token *token = &field1->token; + struct json_token *parent = token->parent; + field1 = json_tree_child_next_entry(parent, + token, + struct + tuple_field, + token); + if (field1 == NULL) + break; + goto next; + } else { return false; + } } - struct tuple_field *field2 = tuple_format_field(format2, i); if (! field_type1_contains_type2(field1->type, field2->type)) return false; /* @@ -466,6 +673,90 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, return true; } +/** + * Descriptor of the parsed msgpack frame. + * Due to the fact that the msgpack has nested structures whose + * length is stored in the frame header at the blob beginning, we + * need to be able to determine that we have finished parsing the + * current component and should move on to the next one. + * For this purpose a stack of disassembled levels is organized, + * where the type of the level, the total number of elements, + * and the number of elements that have already been parsed are + * stored. + */ +struct mp_frame { + /** JSON token type representing frame data structure. */ + enum json_token_type child_type; + /** Total count of MP members to process. */ + uint32_t total; + /** Count of MP elements that already have parseed. */ + uint32_t curr; +}; + +/** + * Emit token to analyze and do msgpack pointer shift using top + * mp_stack frame. Return 0 on success, -1 when analyse step must + * be skipped (on usuported term detection). + */ +static int +mp_frame_parse(struct mp_frame *mp_stack, uint32_t mp_stack_idx, + const char **pos, struct json_token *token) +{ + token->type = mp_stack[mp_stack_idx].child_type; + ++mp_stack[mp_stack_idx].curr; + if (token->type == JSON_TOKEN_NUM) { + token->num = mp_stack[mp_stack_idx].curr - TUPLE_INDEX_BASE; + } else if (token->type == JSON_TOKEN_STR) { + if (mp_typeof(**pos) != MP_STR) { + /* Skip key. */ + mp_next(pos); + return -1; + } + token->str = mp_decode_str(pos, (uint32_t *)&token->len); + } else { + unreachable(); + } + return 0; +} + +/** + * Prepare mp_frame for futher iterations. Store container length + * and child_type. Update parent token pointer and shift msgpack + * pointer. + */ +static int +mp_frame_prepare(struct mp_frame *mp_stack, uint32_t *mp_stack_idx, + uint32_t mp_stack_total, struct json_token *token, + const char **pos, struct json_token **parent) +{ + enum mp_type type = mp_typeof(**pos); + if (token != NULL && *mp_stack_idx + 1 < mp_stack_total && + (type == MP_MAP || type == MP_ARRAY)) { + uint32_t size = type == MP_ARRAY ? mp_decode_array(pos) : + mp_decode_map(pos); + if (size == 0) + return 0; + *parent = token; + enum json_token_type child_type = + type == MP_ARRAY ? JSON_TOKEN_NUM : JSON_TOKEN_STR; + *mp_stack_idx = *mp_stack_idx + 1; + mp_stack[*mp_stack_idx].child_type = child_type; + mp_stack[*mp_stack_idx].total = size; + mp_stack[*mp_stack_idx].curr = 0; + } else { + mp_next(pos); + while (mp_stack[*mp_stack_idx].curr >= + mp_stack[*mp_stack_idx].total) { + assert(*parent != NULL); + *parent = (*parent)->parent; + if (*mp_stack_idx == 0) + return -1; + *mp_stack_idx = *mp_stack_idx - 1; + } + } + return 0; +} + /** @sa declaration for details. */ int tuple_init_field_map(struct tuple_format *format, uint32_t *field_map, @@ -512,49 +803,64 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map, /* Empty tuple, nothing to do. */ goto skip; } - /* first field is simply accessible, so we do not store offset to it */ - struct tuple_field *field = tuple_format_field(format, 0); - if (validate && - !field_mp_type_is_compatible(field->type, mp_typeof(*pos), - tuple_field_is_nullable(field))) { - diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field), - field_type_strs[field->type]); - goto error; - } - if (required_fields != NULL) - bit_clear(required_fields, field->id); - mp_next(&pos); - /* other fields...*/ - uint32_t i = 1; uint32_t defined_field_count = MIN(field_count, validate ? tuple_format_field_count(format) : format->index_field_count); - if (field_count < format->index_field_count) { - /* - * Nullify field map to be able to detect by 0, - * which key fields are absent in tuple_field(). - */ - memset((char *)field_map - format->field_map_size, 0, - format->field_map_size); + /* + * Nullify field map to be able to detect by 0, + * which key fields are absent in tuple_field(). + */ + memset((char *)field_map - format->field_map_size, 0, + format->field_map_size); + uint32_t mp_stack_size = + format->max_path_tokens * sizeof(struct mp_frame); + struct mp_frame *mp_stack = region_alloc(region, mp_stack_size); + if (mp_stack == NULL) { + diag_set(OutOfMemory, mp_stack_size, "region_alloc", + "mp_stack"); + goto error; } - for (; i < defined_field_count; ++i) { - field = tuple_format_field(format, i); - if (validate && - !field_mp_type_is_compatible(field->type, mp_typeof(*pos), - tuple_field_is_nullable(field))) { - diag_set(ClientError, ER_FIELD_TYPE, - tuple_field_path(field), - field_type_strs[field->type]); - goto error; + struct tuple_field *field; + mp_stack[0].child_type = JSON_TOKEN_NUM; + mp_stack[0].total = defined_field_count; + mp_stack[0].curr = 0; + uint32_t mp_stack_idx = 0; + struct json_tree *tree = (struct json_tree *)&format->fields; + struct json_token *parent = &tree->root; + while (mp_stack[0].curr <= mp_stack[0].total) { + struct json_token token; + if (mp_frame_parse(mp_stack, mp_stack_idx, &pos, &token) != 0) { + /* Unsupported token. */ + goto finish_frame; } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { - field_map[field->offset_slot] = - (uint32_t) (pos - tuple); + field = json_tree_lookup_entry(tree, parent, &token, + struct tuple_field, token); + if (field != NULL) { + bool is_nullable = tuple_field_is_nullable(field); + if (validate && + !field_mp_type_is_compatible(field->type, + mp_typeof(*pos), + is_nullable) != 0) { + diag_set(ClientError, ER_FIELD_TYPE, + tuple_field_path(field, false), + field_type_strs[field->type]); + goto error; + } + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + field_map[field->offset_slot] = + (uint32_t)(pos - tuple); + } + if (required_fields != NULL) + bit_clear(required_fields, field->id); } - if (required_fields != NULL) - bit_clear(required_fields, field->id); - mp_next(&pos); - } +finish_frame: + /* Prepare stack info for next iteration. */ + if (mp_frame_prepare(mp_stack, &mp_stack_idx, + format->max_path_tokens, + field != NULL ? &field->token : NULL, + &pos, &parent) != 0) + break; + }; skip: /* * Check the required field bitmap for missing fields. @@ -569,7 +875,7 @@ skip: field = tuple_format_field_by_id(format, id); assert(field != NULL); diag_set(ClientError, ER_FIELD_MISSING, - tuple_field_path(field)); + tuple_field_path(field, false)); goto error; } } @@ -713,15 +1019,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len) return -1; } -/** - * Retrieve msgpack data by JSON path. - * @param data Pointer to msgpack with data. - * @param path The path to process. - * @param path_len The length of the @path. - * @retval 0 On success. - * @retval >0 On path parsing error, invalid character position. - */ -static int +int tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len) { int rc; @@ -820,3 +1118,30 @@ error: tt_sprintf("error in path on position %d", rc)); return -1; } + +int +tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, const char **raw) +{ + assert(part->path != NULL); + struct tuple_field *field = + tuple_format_field_by_path(format, part->fieldno, part->path, + part->path_len); + if (field != NULL) { + int32_t offset_slot = field->offset_slot; + assert(-offset_slot * sizeof(uint32_t) <= + format->field_map_size); + *raw = field_map[offset_slot] == 0 ? + NULL : data + field_map[offset_slot]; + return 0; + } + /* + * Format doesn't have field representing specified part. + * Make slow tuple parsing. + */ + *raw = tuple_field_raw(format, data, field_map, part->fieldno); + if (*raw == NULL) + return 0; + return tuple_field_go_to_path(raw, part->path, part->path_len); +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 30b93b610..3b630c3bb 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -65,6 +65,7 @@ enum { TUPLE_OFFSET_SLOT_NIL = INT32_MAX }; struct tuple; struct tuple_format; struct coll; +struct iovec; /** Engine-specific tuple format methods. */ struct tuple_format_vtab { @@ -185,6 +186,15 @@ struct tuple_format { * Shared names storage used by all formats of a space. */ struct tuple_dictionary *dict; + /** + * A maximum depth of format:fields subtree. + */ + uint32_t max_path_tokens; + /** + * The size of the secondary key built for format:fields + * with all leaf records set to nil. + */ + uint32_t vy_stmt_size; /** * Fields comprising the format, organized in a tree. * First level nodes correspond to tuple fields. @@ -221,6 +231,37 @@ tuple_format_field(struct tuple_format *format, uint32_t fieldno) &token, struct tuple_field, token); } +/** + * Lookup field by relative JSON path and root field fieldno in + * format:fields tree. +*/ +static inline struct tuple_field * +tuple_format_field_by_path(struct tuple_format *format, uint32_t fieldno, + const char *path, uint32_t path_len) +{ + uint32_t field_count = tuple_format_field_count(format); + if (fieldno >= field_count) + return NULL; + struct tuple_field *root = tuple_format_field(format, fieldno); + assert(root != NULL); + return json_tree_lookup_path_entry(&format->fields, &root->token, + path, path_len, TUPLE_INDEX_BASE, + struct tuple_field, token); +} + +/** + * Construct secondary-index tuple and initialize field_map. + * The iov[field->id] array item contains an extracted key + * for indexed field identified with unique field->id. + * Return the size of constructed tuple. + * In case of offset == NULL routine may be used for tuple size up + * limit estimation: all leaf records are assumed to be nil(s). + */ +uint32_t +tuple_format_stmt_encode(struct tuple_format *format, char **offset, + char *tuple_raw, uint32_t *field_map, + struct iovec *iov); + extern struct tuple_format **tuple_formats; static inline uint32_t @@ -420,6 +461,18 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, return tuple_field_raw(format, tuple, field_map, fieldno); } +/** + * Retrieve msgpack data by JSON path. + * @param data Pointer to msgpack with data. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval >0 On path parsing error, invalid character position. + */ +int +tuple_field_go_to_path(const char **data, const char *path, + uint32_t path_len); + /** * Get tuple field by its path. * @param format Tuple format. @@ -439,6 +492,12 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, uint32_t path_len, uint32_t path_hash, const char **field); +/** Internal function, use tuple_field_by_part_raw instead. */ +int +tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data, + const uint32_t *field_map, + struct key_part *part, const char **raw); + /** * Get a tuple field pointed to by an index part. * @param format Tuple format. @@ -451,7 +510,16 @@ static inline const char * tuple_field_by_part_raw(struct tuple_format *format, const char *data, const uint32_t *field_map, struct key_part *part) { - return tuple_field_raw(format, data, field_map, part->fieldno); + if (likely(part->path == NULL)) { + return tuple_field_raw(format, data, field_map, part->fieldno); + } else { + const char *raw; + MAYBE_UNUSED int rc = + tuple_field_by_part_raw_slowpath(format, data, + field_map, part, &raw); + assert(rc == 0); + return raw; + } } #if defined(__cplusplus) diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc index b394804fe..3486ce11c 100644 --- a/src/box/tuple_hash.cc +++ b/src/box/tuple_hash.cc @@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def); void tuple_hash_func_set(struct key_def *key_def) { - if (key_def->is_nullable) + if (key_def->is_nullable || key_def->has_json_paths) goto slowpath; /* * Check that key_def defines sequential a key without holes diff --git a/src/box/vinyl.c b/src/box/vinyl.c index ca987134c..acd2d7fd6 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -982,6 +982,10 @@ vinyl_index_def_change_requires_rebuild(struct index *index, return true; if (!field_type1_contains_type2(new_part->type, old_part->type)) return true; + if (json_path_cmp(old_part->path, old_part->path_len, + new_part->path, new_part->path_len, + TUPLE_INDEX_BASE) != 0) + return true; } return false; } diff --git a/src/box/vy_log.c b/src/box/vy_log.c index c9e0713c8..6fc051648 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -581,9 +581,11 @@ vy_log_record_decode(struct vy_log_record *record, record->group_id = mp_decode_uint(&pos); break; case VY_LOG_KEY_DEF: { + struct region *region = &fiber()->gc; uint32_t part_count = mp_decode_array(&pos); - struct key_part_def *parts = region_alloc(&fiber()->gc, - sizeof(*parts) * part_count); + struct key_part_def *parts = + region_alloc(region, + sizeof(*parts) * part_count); if (parts == NULL) { diag_set(OutOfMemory, sizeof(*parts) * part_count, @@ -591,7 +593,7 @@ vy_log_record_decode(struct vy_log_record *record, return -1; } if (key_def_decode_parts(parts, part_count, &pos, - NULL, 0) != 0) { + NULL, 0, region) != 0) { diag_log(); diag_set(ClientError, ER_INVALID_VYLOG_FILE, "Bad record: failed to decode " @@ -705,7 +707,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src) "struct key_part_def"); goto err; } - key_def_dump_parts(src->key_def, dst->key_parts); + if (key_def_dump_parts(src->key_def, dst->key_parts, pool) != 0) + goto err; dst->key_part_count = src->key_def->part_count; dst->key_def = NULL; } diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index ddbc2d46f..14e0c0c93 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *key, struct tuple **ret) { - assert(tuple_field_count(key) >= lsm->cmp_def->part_count); - *ret = NULL; double start_time = ev_monotonic_now(loop()); int rc = 0; diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 47f135c65..7a302e6f3 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -385,26 +385,43 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, struct region *region = &fiber()->gc; uint32_t field_count = format->index_field_count; - struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count); + uint32_t iov_sz = + sizeof(struct iovec) * format->total_field_count; + struct iovec *iov = region_alloc(region, iov_sz); if (iov == NULL) { - diag_set(OutOfMemory, sizeof(*iov) * field_count, - "region", "iov for surrogate key"); + diag_set(OutOfMemory, iov_sz, "region_alloc", + "iov for surrogate key"); return NULL; } - memset(iov, 0, sizeof(*iov) * field_count); + memset(iov, 0, iov_sz); uint32_t part_count = mp_decode_array(&key); assert(part_count == cmp_def->part_count); - assert(part_count <= field_count); - uint32_t nulls_count = field_count - cmp_def->part_count; + assert(part_count <= format->total_field_count); + /** + * The format:vy_stmt_size contains a size of + * stmt tuple having all leaf fields set to null. + * Calculate bsize as vy_stmt_size where parts_count + * nulls replaced with extracted keys. + */ uint32_t bsize = mp_sizeof_array(field_count) + - mp_sizeof_nil() * nulls_count; + format->vy_stmt_size - mp_sizeof_nil() * part_count; for (uint32_t i = 0; i < part_count; ++i) { const struct key_part *part = &cmp_def->parts[i]; assert(part->fieldno < field_count); + struct tuple_field *field; + if (part->path != NULL) { + field = tuple_format_field_by_path(format, + part->fieldno, + part->path, + part->path_len); + } else { + field = tuple_format_field(format, part->fieldno); + } + assert(field != NULL); const char *svp = key; - iov[part->fieldno].iov_base = (char *) key; + iov[field->id].iov_base = (char *) key; mp_next(&key); - iov[part->fieldno].iov_len = key - svp; + iov[field->id].iov_len = key - svp; bsize += key - svp; } @@ -414,18 +431,10 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, char *raw = (char *) tuple_data(stmt); uint32_t *field_map = (uint32_t *) raw; + memset((char *)field_map - format->field_map_size, 0, + format->field_map_size); char *wpos = mp_encode_array(raw, field_count); - for (uint32_t i = 0; i < field_count; ++i) { - struct tuple_field *field = tuple_format_field(format, i); - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - field_map[field->offset_slot] = wpos - raw; - if (iov[i].iov_base == NULL) { - wpos = mp_encode_nil(wpos); - } else { - memcpy(wpos, iov[i].iov_base, iov[i].iov_len); - wpos += iov[i].iov_len; - } - } + (void)tuple_format_stmt_encode(format, &wpos, raw, field_map, iov); assert(wpos == raw + bsize); vy_stmt_set_type(stmt, type); return stmt; diff --git a/src/lib/json/json.c b/src/lib/json/json.c index 010a61d62..1d79bceb0 100644 --- a/src/lib/json/json.c +++ b/src/lib/json/json.c @@ -572,12 +572,7 @@ json_tree_lookup_path(struct json_tree *tree, struct json_token *root, return ret; } -/** - * Return the child of @parent following @pos or NULL if @pos - * points to the last child in the children array. If @pos is - * NULL, this function returns the first child. - */ -static struct json_token * +struct json_token * json_tree_child_next(struct json_token *parent, struct json_token *pos) { assert(pos == NULL || pos->parent == parent); diff --git a/src/lib/json/json.h b/src/lib/json/json.h index 66cddd026..fc441a887 100644 --- a/src/lib/json/json.h +++ b/src/lib/json/json.h @@ -353,6 +353,14 @@ struct json_token * json_tree_lookup_path(struct json_tree *tree, struct json_token *root, const char *path, int path_len, int index_base); +/** + * Return the child of @parent following @pos or NULL if @pos + * points to the last child in the children array. If @pos is + * NULL, this function returns the first child. + */ +struct json_token * +json_tree_child_next(struct json_token *parent, struct json_token *pos); + /** * Perform pre-order traversal in a JSON subtree rooted * at a given node. @@ -436,6 +444,14 @@ json_tree_postorder_next(struct json_token *root, struct json_token *pos); json_tree_entry_safe(ret, type, member); \ }) +/** + * Container-aware wrapper around json_tree_child_next(). + */ +#define json_tree_child_next_entry(parent, pos, type, member) ({ \ + struct json_token *next = json_tree_child_next((parent), (pos)); \ + json_tree_entry_safe(next, type, member); \ +}) + /** * Container-aware wrapper around json_tree_preorder_next(). */ diff --git a/test/engine/json.result b/test/engine/json.result new file mode 100644 index 000000000..711f7f256 --- /dev/null +++ b/test/engine/json.result @@ -0,0 +1,448 @@ +test_run = require('test_run').new() +--- +... +engine = test_run:get_cfg('engine') +--- +... +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key + part is indexed twice' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +- error: 'Wrong index options (field 2): ''path'' must be string' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''map'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''array'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = 'FIO.fname'}}}) +--- +- error: Field [3]["FIO"] has type 'string' in one index, but type 'map' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field 3 has type 'array' in one index, but type 'map' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +--- +- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': error + in path on position 5' +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +assert(idx.parts[2].path == 'FIO.fname') +--- +- true +... +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +--- +... +s:format(format) +--- +- error: Field 3 has type 'array' in one index, but type 'map' in another +... +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +--- +... +s:format(format) +--- +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number' + in another +... +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +--- +- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected + map' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +--- +- error: 'Tuple field [3]["FIO"]["fname"] type does not match one required by operation: + expected string' +... +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +--- +- error: Tuple field [3]["FIO"]["sname"] required by space format is missing +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:select() +--- +- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] + - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:min() +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:max() +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +s:drop() +--- +... +s = box.schema.create_space('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[2]'} +--- +... +pk = s:create_index('pk', {parts = parts}) +--- +... +s:insert{{1, 2}, 3} +--- +- [[1, 2], 3] +... +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +--- +... +s:get(2) +--- +- [[1, 2], 8] +... +s:drop() +--- +... +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk = s:create_index('primary', { type = 'tree' }) +--- +... +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +--- +- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5] +... +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +--- +- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5] +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected + map' +... +_ = s:delete(1) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +_ = s:delete(2) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field [3]["FIO"] type does not match one required by operation: expected + map' +... +_ = s:delete(4) +--- +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +--- +- error: Field [3]["FIO"]["fname"] has type 'string' in one index, but type 'number' + in another +... +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +... +assert(idx2 ~= nil) +--- +- true +... +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +--- +... +idx:select() +--- +- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] + - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:min() +--- +- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] +... +idx:max() +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:drop() +--- +... +s:drop() +--- +... +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'str', path='[3][2].a'} +--- +... +parts[2] = {1, 'unsigned', path = '[3][1]'} +--- +... +parts[3] = {2, 'str', path = '[2].d[1]'} +--- +... +pk = s:create_index('primary', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +--- +- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, + [1, 2, 3]] +... +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +--- +- error: Duplicate key exists in unique index 'primary' in space 'withdata' +... +parts = {} +--- +... +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +--- +... +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +--- +... +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +--- +... +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +--- +- error: Tuple field [4][1] required by space format is missing +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b' } +--- +... +parts[2] = {3, 'unsigned'} +--- +... +crosspart_idx = s:create_index('crosspart', { parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b'} +--- +... +num_idx = s:create_index('numeric', {parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:get(2) +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +num_idx:select() +--- +- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 9, 2, 3]] + - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], + 6, [1, 2, 3]] + - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 0]] +... +num_idx:max() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:min() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +assert(crosspart_idx:max() == num_idx:max()) +--- +- true +... +assert(crosspart_idx:min() == num_idx:min()) +--- +- true +... +trap_idx:max() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +trap_idx:min() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +--- +... +assert(pk_simplified.path == box.NULL) +--- +- true +... +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +--- +... +s:insert{31, {a = 1, aa = -1}} +--- +- [31, {'a': 1, 'aa': -1}] +... +s:insert{22, {a = 2, aa = -2}} +--- +- [22, {'a': 2, 'aa': -2}] +... +s:insert{13, {a = 3, aa = -3}} +--- +- [13, {'a': 3, 'aa': -3}] +... +idx:select() +--- +- - [31, {'a': 1, 'aa': -1}] + - [22, {'a': 2, 'aa': -2}] + - [13, {'a': 3, 'aa': -3}] +... +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +--- +... +idx:select() +--- +- - [13, {'a': 3, 'aa': -3}] + - [22, {'a': 2, 'aa': -2}] + - [31, {'a': 1, 'aa': -1}] +... +s:drop() +--- +... +-- incompatible format change +s = box.schema.space.create('test') +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +--- +... +s:insert{{-1}} +--- +- [[-1]] +... +i:alter{parts = {{1, 'string', path = '[1]'}}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + string' +... +s:insert{{'a'}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + integer' +... +i:drop() +--- +... +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +--- +... +s:insert{{{FIO=-1}}} +--- +- [[{'FIO': -1}]] +... +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +--- +- error: 'Tuple field [1][1] type does not match one required by operation: expected + array' +... +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +--- +- error: 'Tuple field [1][1]["FIO"] type does not match one required by operation: + expected array' +... +s:drop() +--- +... +engine = nil +--- +... +test_run = nil +--- +... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua new file mode 100644 index 000000000..2a20fc3e5 --- /dev/null +++ b/test/engine/json.test.lua @@ -0,0 +1,129 @@ +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +s = box.schema.space.create('withdata', {engine = engine}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = 'FIO.fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO.fname', is_nullable = false}, {3, 'str', path = '["FIO"]["sname"]'}}}) +assert(idx ~= nil) +assert(idx.parts[2].path == 'FIO.fname') +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'array'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +s:format(format) +format = {{'id', 'unsigned'}, {'meta', 'unsigned'}, {'data', 'map'}, {'age', 'unsigned'}, {'level', 'unsigned'}} +s:format(format) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = 'FIO.fname'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +idx:select() +idx:min() +idx:max() +s:drop() + +s = box.schema.create_space('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'unsigned', path='[2]'} +pk = s:create_index('pk', {parts = parts}) +s:insert{{1, 2}, 3} +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +s:get(2) +s:drop() + +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +pk = s:create_index('primary', { type = 'tree' }) +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(1) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(2) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(4) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +assert(idx ~= nil) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +assert(idx2 ~= nil) +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +idx:select() +idx:min() +idx:max() +idx:drop() +s:drop() + +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'str', path='[3][2].a'} +parts[2] = {1, 'unsigned', path = '[3][1]'} +parts[3] = {2, 'str', path = '[2].d[1]'} +pk = s:create_index('primary', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +parts = {} +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b' } +parts[2] = {3, 'unsigned'} +crosspart_idx = s:create_index('crosspart', { parts = parts}) +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b'} +num_idx = s:create_index('numeric', {parts = parts}) +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +num_idx:get(2) +num_idx:select() +num_idx:max() +num_idx:min() +assert(crosspart_idx:max() == num_idx:max()) +assert(crosspart_idx:min() == num_idx:min()) +trap_idx:max() +trap_idx:min() +s:drop() + +s = box.schema.space.create('withdata', {engine = engine}) +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +assert(pk_simplified.path == box.NULL) +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +s:insert{31, {a = 1, aa = -1}} +s:insert{22, {a = 2, aa = -2}} +s:insert{13, {a = 3, aa = -3}} +idx:select() +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +idx:select() +s:drop() + +-- incompatible format change +s = box.schema.space.create('test') +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1]'}}}) +s:insert{{-1}} +i:alter{parts = {{1, 'string', path = '[1]'}}} +s:insert{{'a'}} +i:drop() +i = s:create_index('pk', {parts = {{1, 'integer', path = '[1].FIO'}}}) +s:insert{{{FIO=-1}}} +i:alter{parts = {{1, 'integer', path = '[1][1]'}}} +i:alter{parts = {{1, 'integer', path = '[1].FIO[1]'}}} +s:drop() + +engine = nil +test_run = nil + -- 2.19.2