From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v4 10/14] box: introduce JSON indexes Date: Thu, 11 Oct 2018 10:58:42 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, Kirill Shcherbatov List-ID: As we need to store user-defined JSON path in key_part and key_part_def, we have introduced path and path_len fields. JSON path is verified and transformed to canonical form on index msgpack unpack. Path string stored as a part of the key_def allocation: +-------+---------+-------+---------+-------+-------+-------+ |key_def|key_part1| ... |key_partN| path1 | pathK | pathN | +-------+---------+-------+---------+-------+-------+-------+ | ^ |-> path _________________| To work with JSON-defined indexes we use json_tree class: [2].FIO.fname -> field "fname" {type=str, off_slot=-1} [2].FIO.sname -> field "sname" {type=str, off_slot=-2} : format->field[2] {type = map} | FIO {type = map} | "fname" | "sname" {type=str,off_slot=-1} ____|____ {type = str,off_slot=-2} All JSON paths stored at the end of format allocation: +------------+------------+-------+------------+-------+ |tuple_format|tuple_field1| ... |tuple_fieldN| pathK | +------------+------------+-------+------------+-------+ Part of #1012. --- src/box/errcode.h | 2 +- src/box/index_def.c | 6 +- src/box/key_def.c | 189 +++++++++++++++-- src/box/key_def.h | 31 ++- src/box/lua/space.cc | 5 + src/box/memtx_engine.c | 2 + src/box/tuple.c | 17 +- src/box/tuple_compare.cc | 7 +- src/box/tuple_extract_key.cc | 21 +- src/box/tuple_format.c | 496 ++++++++++++++++++++++++++++++++++++++----- src/box/tuple_format.h | 65 +++++- src/box/tuple_hash.cc | 2 +- src/box/vinyl.c | 2 + src/box/vy_log.c | 3 +- src/box/vy_point_lookup.c | 2 - src/box/vy_stmt.c | 184 +++++++++++++--- test/box/misc.result | 57 ++--- test/engine/tuple.result | 371 ++++++++++++++++++++++++++++++++ test/engine/tuple.test.lua | 106 +++++++++ 19 files changed, 1415 insertions(+), 153 deletions(-) diff --git a/src/box/errcode.h b/src/box/errcode.h index 4115e6b..464f413 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -107,7 +107,7 @@ struct errcode_record { /* 52 */_(ER_FUNCTION_EXISTS, "Function '%s' already exists") \ /* 53 */_(ER_BEFORE_REPLACE_RET, "Invalid return value of space:before_replace trigger: expected tuple or nil, got %s") \ /* 54 */_(ER_FUNCTION_MAX, "A limit on the total number of functions has been reached: %u") \ - /* 55 */_(ER_UNUSED4, "") \ + /* 55 */_(ER_DATA_STRUCTURE_MISMATCH, "Tuple doesn't math document structure: %s") \ /* 56 */_(ER_USER_MAX, "A limit on the total number of users has been reached: %u") \ /* 57 */_(ER_NO_SUCH_ENGINE, "Space engine '%s' does not exist") \ /* 58 */_(ER_RELOAD_CFG, "Can't set option '%s' dynamically") \ diff --git a/src/box/index_def.c b/src/box/index_def.c index 9cda63c..6e206b1 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -209,8 +209,10 @@ index_def_is_valid(struct index_def *index_def, const char *space_name) * Courtesy to a user who could have made * a typo. */ - if (index_def->key_def->parts[i].fieldno == - index_def->key_def->parts[j].fieldno) { + struct key_part *part_a = &index_def->key_def->parts[i]; + struct key_part *part_b = &index_def->key_def->parts[j]; + if (part_a->fieldno == part_b->fieldno && + key_part_path_cmp(part_a, part_b) == 0) { diag_set(ClientError, ER_MODIFY_INDEX, index_def->name, space_name, "same key part is indexed twice"); diff --git a/src/box/key_def.c b/src/box/key_def.c index 67b517f..1b32387 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -28,6 +28,8 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "fiber.h" +#include "json/path.h" #include "key_def.h" #include "tuple_compare.h" #include "tuple_extract_key.h" @@ -41,6 +43,7 @@ const struct key_part_def key_part_def_default = { field_type_MAX, COLL_NONE, false, + NULL }; static int64_t @@ -53,6 +56,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len) #define PART_OPT_FIELD "field" #define PART_OPT_COLLATION "collation" #define PART_OPT_NULLABILITY "is_nullable" +#define PART_OPT_PATH "path" const struct opt_def part_def_reg[] = { OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type, @@ -61,6 +65,7 @@ const struct opt_def part_def_reg[] = { OPT_DEF(PART_OPT_COLLATION, OPT_UINT32, struct key_part_def, coll_id), OPT_DEF(PART_OPT_NULLABILITY, OPT_BOOL, struct key_part_def, is_nullable), + OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path), OPT_END, }; @@ -96,13 +101,25 @@ const uint32_t key_mp_type[] = { struct key_def * key_def_dup(const struct key_def *src) { - size_t sz = key_def_sizeof(src->part_count); - struct key_def *res = (struct key_def *)malloc(sz); + const struct key_part *parts = src->parts; + const struct key_part *parts_end = parts + src->part_count; + size_t sz = 0; + for (; parts < parts_end; parts++) + sz += parts->path != NULL ? parts->path_len + 1 : 0; + sz = key_def_sizeof(src->part_count, sz); + struct key_def *res = (struct key_def *)calloc(1, sz); if (res == NULL) { diag_set(OutOfMemory, sz, "malloc", "res"); return NULL; } memcpy(res, src, sz); + /* Update paths to point to the new memory chunk.*/ + for (uint32_t i = 0; i < src->part_count; i++) { + if (src->parts[i].path == NULL) + continue; + size_t path_offset = src->parts[i].path - (char *)src; + res->parts[i].path = (char *)res + path_offset; + } return res; } @@ -110,8 +127,23 @@ void key_def_swap(struct key_def *old_def, struct key_def *new_def) { assert(old_def->part_count == new_def->part_count); - for (uint32_t i = 0; i < new_def->part_count; i++) - SWAP(old_def->parts[i], new_def->parts[i]); + for (uint32_t i = 0; i < new_def->part_count; i++) { + if (old_def->parts[i].path == NULL) { + SWAP(old_def->parts[i], new_def->parts[i]); + } else { + /* + * Since the data is located in memory + * in the same order (otherwise rebuild + * would be called), just update the + * pointers. + */ + size_t path_offset = + old_def->parts[i].path - (char *)old_def; + SWAP(old_def->parts[i], new_def->parts[i]); + old_def->parts[i].path = (char *)old_def + path_offset; + new_def->parts[i].path = (char *)new_def + path_offset; + } + } SWAP(*old_def, *new_def); } @@ -133,23 +165,36 @@ key_def_set_cmp(struct key_def *def) static void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, bool is_nullable, struct coll *coll, - uint32_t coll_id) + uint32_t coll_id, const char *path, uint32_t path_len) { assert(part_no < def->part_count); assert(type < field_type_MAX); def->is_nullable |= is_nullable; + def->has_json_paths |= path != NULL; def->parts[part_no].is_nullable = is_nullable; def->parts[part_no].fieldno = fieldno; def->parts[part_no].type = type; def->parts[part_no].coll = coll; def->parts[part_no].coll_id = coll_id; + if (path != NULL) { + def->parts[part_no].path_len = path_len; + assert(def->parts[part_no].path != NULL); + memcpy(def->parts[part_no].path, path, path_len); + def->parts[part_no].path[path_len] = '\0'; + } else { + def->parts[part_no].path_len = 0; + def->parts[part_no].path = NULL; + } column_mask_set_fieldno(&def->column_mask, fieldno); } struct key_def * key_def_new(const struct key_part_def *parts, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + ssize_t sz = 0; + for (uint32_t i = 0; i < part_count; i++) + sz += parts[i].path != NULL ? strlen(parts[i].path) + 1 : 0; + sz = key_def_sizeof(part_count, sz); struct key_def *def = calloc(1, sz); if (def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -159,6 +204,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) def->part_count = part_count; def->unique_part_count = part_count; + char *data = (char *)def + key_def_sizeof(part_count, 0); for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; struct coll *coll = NULL; @@ -172,15 +218,23 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count) } coll = coll_id->coll; } + uint32_t path_len = 0; + if (part->path != NULL) { + path_len = strlen(part->path); + def->parts[i].path = data; + data += path_len + 1; + } key_def_set_part(def, i, part->fieldno, part->type, - part->is_nullable, coll, part->coll_id); + part->is_nullable, coll, part->coll_id, + part->path, path_len); } key_def_set_cmp(def); return def; } -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts) { for (uint32_t i = 0; i < def->part_count; i++) { const struct key_part *part = &def->parts[i]; @@ -189,13 +243,27 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) part_def->type = part->type; part_def->is_nullable = part->is_nullable; part_def->coll_id = part->coll_id; + if (part->path != NULL) { + char *path = region_alloc(pool, part->path_len + 1); + if (path == NULL) { + diag_set(OutOfMemory, part->path_len + 1, + "region_alloc", "part_def->path"); + return -1; + } + memcpy(path, part->path, part->path_len); + path[part->path_len] = '\0'; + part_def->path = path; + } else { + part_def->path = NULL; +} } + return 0; } box_key_def_t * box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) { - size_t sz = key_def_sizeof(part_count); + size_t sz = key_def_sizeof(part_count, 0); struct key_def *key_def = calloc(1, sz); if (key_def == NULL) { diag_set(OutOfMemory, sz, "malloc", "struct key_def"); @@ -208,7 +276,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) for (uint32_t item = 0; item < part_count; ++item) { key_def_set_part(key_def, item, fields[item], (enum field_type)types[item], - false, NULL, COLL_NONE); + false, NULL, COLL_NONE, NULL, 0); } key_def_set_cmp(key_def); return key_def; @@ -255,6 +323,9 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, if (part1->is_nullable != part2->is_nullable) return part1->is_nullable < part2->is_nullable ? -1 : 1; + int rc; + if ((rc = key_part_path_cmp(part1, part2)) != 0) + return rc; } return part_count1 < part_count2 ? -1 : part_count1 > part_count2; } @@ -286,8 +357,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts, for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; assert(part->type < field_type_MAX); - SNPRINT(total, snprintf, buf, size, "%d, '%s'", - (int)part->fieldno, field_type_strs[part->type]); + if (part->path != NULL) { + SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'", + (int)part->fieldno, part->path, + field_type_strs[part->type]); + } else { + SNPRINT(total, snprintf, buf, size, "%d, '%s'", + (int)part->fieldno, + field_type_strs[part->type]); + } if (i < part_count - 1) SNPRINT(total, snprintf, buf, size, ", "); } @@ -306,6 +384,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; size += mp_sizeof_map(count); size += mp_sizeof_str(strlen(PART_OPT_FIELD)); size += mp_sizeof_uint(part->fieldno); @@ -320,6 +400,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY)); size += mp_sizeof_bool(part->is_nullable); } + if (part->path != NULL) { + size += mp_sizeof_str(strlen(PART_OPT_PATH)); + size += mp_sizeof_str(strlen(part->path)); + } } return size; } @@ -335,6 +419,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; data = mp_encode_map(data, count); data = mp_encode_str(data, PART_OPT_FIELD, strlen(PART_OPT_FIELD)); @@ -354,6 +440,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, strlen(PART_OPT_NULLABILITY)); data = mp_encode_bool(data, part->is_nullable); } + if (part->path != NULL) { + data = mp_encode_str(data, PART_OPT_PATH, + strlen(PART_OPT_PATH)); + data = mp_encode_str(data, part->path, + strlen(part->path)); + } } return data; } @@ -414,6 +506,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -427,6 +520,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, return key_def_decode_parts_166(parts, part_count, data, fields, field_count); } + struct region *region = &fiber()->gc; for (uint32_t i = 0; i < part_count; i++) { struct key_part_def *part = &parts[i]; if (mp_typeof(**data) != MP_MAP) { @@ -438,7 +532,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, *part = key_part_def_default; if (opts_decode(part, part_def_reg, data, ER_WRONG_INDEX_OPTIONS, i + TUPLE_INDEX_BASE, - NULL) != 0) + region) != 0) return -1; if (part->type == field_type_MAX) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, @@ -455,6 +549,34 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, "string and scalar parts"); return -1; } + /* + * We should convert JSON path to canonical form + * to prevent indexing same field twice. + */ + if (part->path != NULL) { + uint32_t path_len = strlen(part->path); + char *path_normalized = + region_alloc(region, 2.5 * path_len + 1); + if (path_normalized == NULL) { + diag_set(OutOfMemory, 2.5 * path_len + 1, + "region_alloc", "path"); + return -1; + } + int rc = json_path_normalize(part->path, path_len, + path_normalized); + if (rc != 0) { + const char *err_msg = + tt_sprintf("invalid JSON path '%s': " + "path has invalid structure " + "(error at position %d)", + part->path, rc); + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno + TUPLE_INDEX_BASE, + err_msg); + return -1; + } + part->path = path_normalized; + } } return 0; } @@ -479,6 +601,7 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -490,7 +613,8 @@ key_def_contains_part(const struct key_def *key_def, const struct key_part *part = key_def->parts; const struct key_part *end = part + key_def->part_count; for (; part != end; part++) { - if (part->fieldno == to_find->fieldno) + if (part->fieldno == to_find->fieldno && + key_part_path_cmp(part, to_find) == 0) return true; } return false; @@ -516,18 +640,27 @@ key_def_merge(const struct key_def *first, const struct key_def *second) * Find and remove part duplicates, i.e. parts counted * twice since they are present in both key defs. */ - const struct key_part *part = second->parts; - const struct key_part *end = part + second->part_count; + size_t sz = 0; + const struct key_part *part = first->parts; + const struct key_part *end = part + first->part_count; + for (; part != end; part++) { + if (part->path != NULL) + sz += part->path_len + 1; + } + part = second->parts; + end = part + second->part_count; for (; part != end; part++) { if (key_def_contains_part(first, part)) --new_part_count; + else if (part->path != NULL) + sz += part->path_len + 1; } + sz = key_def_sizeof(new_part_count, sz); struct key_def *new_def; - new_def = (struct key_def *)calloc(1, key_def_sizeof(new_part_count)); + new_def = (struct key_def *)calloc(1, sz); if (new_def == NULL) { - diag_set(OutOfMemory, key_def_sizeof(new_part_count), "malloc", - "new_def"); + diag_set(OutOfMemory, sz, "malloc", "new_def"); return NULL; } new_def->part_count = new_part_count; @@ -535,14 +668,21 @@ key_def_merge(const struct key_def *first, const struct key_def *second) new_def->is_nullable = first->is_nullable || second->is_nullable; new_def->has_optional_parts = first->has_optional_parts || second->has_optional_parts; + /* Path data write position in the new key_def. */ + char *data = (char *)new_def + key_def_sizeof(new_part_count, 0); /* Write position in the new key def. */ uint32_t pos = 0; /* Append first key def's parts to the new index_def. */ part = first->parts; end = part + first->part_count; for (; part != end; part++) { + if (part->path != NULL) { + new_def->parts[pos].path = data; + data += part->path_len + 1; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->is_nullable, part->coll, part->coll_id); + part->is_nullable, part->coll, part->coll_id, + part->path, part->path_len); } /* Set-append second key def's part to the new key def. */ @@ -551,8 +691,13 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { if (key_def_contains_part(first, part)) continue; + if (part->path != NULL) { + new_def->parts[pos].path = data; + data += part->path_len + 1; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->is_nullable, part->coll, part->coll_id); + part->is_nullable, part->coll, part->coll_id, + part->path, part->path_len); } key_def_set_cmp(new_def); return new_def; diff --git a/src/box/key_def.h b/src/box/key_def.h index 37b9045..ca73015 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -54,6 +54,8 @@ struct key_part_def { uint32_t coll_id; /** True if a key part can store NULLs. */ bool is_nullable; + /** JSON path to data. */ + const char *path; }; extern const struct key_part_def key_part_def_default; @@ -76,6 +78,13 @@ struct key_part { struct coll *coll; /** True if a part can store NULLs. */ bool is_nullable; + /** + * JSON path to data in 'canonical' form. + * Read json_path_normalize to get more details. + */ + char *path; + /** The length of JSON path. */ + uint32_t path_len; }; struct key_def; @@ -130,6 +139,8 @@ struct key_def { uint32_t unique_part_count; /** True, if at least one part can store NULL. */ bool is_nullable; + /** True, if some key part has JSON path. */ + bool has_json_paths; /** * True, if some key parts can be absent in a tuple. These * fields assumed to be MP_NIL. @@ -223,9 +234,10 @@ box_tuple_compare_with_key(const box_tuple_t *tuple_a, const char *key_b, /** \endcond public */ static inline size_t -key_def_sizeof(uint32_t part_count) +key_def_sizeof(uint32_t part_count, uint32_t paths_size) { - return sizeof(struct key_def) + sizeof(struct key_part) * part_count; + return sizeof(struct key_def) + sizeof(struct key_part) * part_count + + paths_size; } /** @@ -238,8 +250,9 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count); /** * Dump part definitions of the given key def. */ -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts); /** * Update 'has_optional_parts' of @a key_def with correspondence @@ -351,6 +364,8 @@ key_validate_parts(const struct key_def *key_def, const char *key, static inline bool key_def_is_sequential(const struct key_def *key_def) { + if (key_def->has_json_paths) + return false; for (uint32_t part_id = 0; part_id < key_def->part_count; part_id++) { if (key_def->parts[part_id].fieldno != part_id) return false; @@ -401,6 +416,14 @@ key_mp_type_validate(enum field_type key_type, enum mp_type mp_type, return 0; } +static inline int +key_part_path_cmp(const struct key_part *part1, const struct key_part *part2) +{ + if (part1->path_len != part2->path_len) + return part1->path_len - part2->path_len; + return memcmp(part1->path, part2->path, part1->path_len); +} + /** * Compare two key part arrays. * diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc index 25b7e36..875e51f 100644 --- a/src/box/lua/space.cc +++ b/src/box/lua/space.cc @@ -295,6 +295,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i) lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE); lua_setfield(L, -2, "fieldno"); + if (part->path != NULL) { + lua_pushstring(L, part->path); + lua_setfield(L, -2, "path"); + } + lua_pushboolean(L, part->is_nullable); lua_setfield(L, -2, "is_nullable"); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index ae1f5a0..a35f5e7 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -1317,6 +1317,8 @@ memtx_index_def_change_requires_rebuild(struct index *index, return true; if (old_part->coll != new_part->coll) return true; + if (key_part_path_cmp(old_part, new_part) != 0) + return true; } return false; } diff --git a/src/box/tuple.c b/src/box/tuple.c index d7dbad3..2bc414f 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -161,10 +161,21 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple) struct tuple_field *field = &format->fields[0]; uint32_t i = 0; uint32_t defined_field_count = MIN(field_count, format->field_count); + size_t off_stack_size = + format->max_path_tree_depth * sizeof(const char *); + const char **off_stack = region_alloc(&fiber()->gc, off_stack_size); + if (off_stack == NULL) { + diag_set(OutOfMemory, off_stack_size, "region_alloc", + "off_stack"); + return -1; + } for (; i < defined_field_count; ++i, ++field) { - if (key_mp_type_validate(field->type, mp_typeof(*tuple), - ER_FIELD_TYPE, i + TUPLE_INDEX_BASE, - field->is_nullable)) + /* + * Don't fill field_map, just make types + * validations. + */ + if (tuple_field_tree_parse_raw(field, tuple, NULL, i, NULL, + off_stack) != 0) return -1; mp_next(&tuple); } diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index e21b009..9bff340 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -469,7 +469,8 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); - if (key_def->part_count == 1 && part->fieldno == 0) { + if (key_def->part_count == 1 && part->fieldno == 0 && + part->path == NULL) { /* * First field can not be optional - empty tuples * can not exist. @@ -1027,7 +1028,7 @@ tuple_compare_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_arr) / sizeof(cmp_arr[0]); k++) { @@ -1247,7 +1248,7 @@ tuple_compare_with_key_create(const struct key_def *def) } } assert(! def->has_optional_parts); - if (!key_def_has_collation(def)) { + if (!key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ for (uint32_t k = 0; k < sizeof(cmp_wk_arr) / sizeof(cmp_wk_arr[0]); diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc index e9d7cac..cb4ae71 100644 --- a/src/box/tuple_extract_key.cc +++ b/src/box/tuple_extract_key.cc @@ -10,7 +10,8 @@ key_def_parts_are_sequential(const struct key_def *def, int i) { uint32_t fieldno1 = def->parts[i].fieldno + 1; uint32_t fieldno2 = def->parts[i + 1].fieldno; - return fieldno1 == fieldno2; + return fieldno1 == fieldno2 && def->parts[i].path == NULL && + def->parts[i + 1].path == NULL; } /** True, if a key con contain two or more parts in sequence. */ @@ -241,7 +242,8 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, if (!key_def_parts_are_sequential(key_def, i)) break; } - uint32_t end_fieldno = key_def->parts[i].fieldno; + const struct key_part *part = &key_def->parts[i]; + uint32_t end_fieldno = part->fieldno; if (fieldno < current_fieldno) { /* Rewind. */ @@ -283,6 +285,17 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, current_fieldno++; } } + const char *field_last, *field_end_last; + if (part->path != NULL) { + field_last = field; + field_end_last = field_end; + int rc = tuple_field_by_relative_path(&field, + part->path, + part->path_len); + assert(rc == 0); + field_end = field; + mp_next(&field_end); + } memcpy(key_buf, field, field_end - field); key_buf += field_end - field; if (has_optional_parts && null_count != 0) { @@ -291,6 +304,10 @@ tuple_extract_key_slowpath_raw(const char *data, const char *data_end, } else { assert(key_buf - key <= data_end - data); } + if (part->path != NULL) { + field = field_last; + field_end = field_end_last; + } } if (key_size != NULL) *key_size = (uint32_t)(key_buf - key); diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index a8aceef..30deac5 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -28,6 +28,7 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "fiber.h" #include "json/path.h" #include "tuple_format.h" @@ -37,20 +38,17 @@ static intptr_t recycled_format_ids = FORMAT_ID_NIL; static uint32_t formats_size = 0, formats_capacity = 0; -static const struct tuple_field tuple_field_default = { - FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false, false, -}; - /** * Calculate the size of tuple format allocation. * @param field_count Count of tuple fields. + * @param path_size Size of paths extention. * @retval Size of memory allocation. */ static inline uint32_t -tuple_format_sizeof(uint32_t field_count) +tuple_format_sizeof(uint32_t field_count, uint32_t paths_size) { return sizeof(struct tuple_format) + - field_count * sizeof(struct tuple_field); + field_count * sizeof(struct tuple_field) + paths_size; } /** @@ -134,6 +132,146 @@ tuple_field_go_to_key(const char **field, const char *key, int len) } /** + * Initialize tuple_field with default values. + * @param field Fields definition. + * @param path_node JSON parser node to initialize JSON tree + * tuple_field. May be NULL for regular fields. + * @param path_node_hash Hash of @path_node calculated with + * json_path_node_hash. + */ +static inline void +tuple_field_create(struct tuple_field *field, struct json_path_node *path_node, + uint32_t path_node_hash) +{ + memset(field, 0, sizeof(struct tuple_field)); + field->type = FIELD_TYPE_ANY; + field->offset_slot = TUPLE_OFFSET_SLOT_NIL; + json_tree_node_create(&field->path_tree_node, path_node, + path_node_hash); +} + +/** + * Release tuple_field resources. + * @param field Fields definition. + */ +static inline void +tuple_field_destroy(struct tuple_field *field) +{ + json_tree_node_destroy(&field->path_tree_node); +} + +/** + * Construct a new JSON tree path for tuple_field by path string. + * Make types checks for existent paths and initialize it with + * value allocated form offset_slot pool. + * @param parent Root field to build JSON path tree. + * @param path JSON path to data used to construct tree path. + * @param path_len Length of the path. + * @param fieldno Index of field used for error handling. + * @param field_type Tuple field type of leaf record. + * @param is_nullable Nullability of leaf record. + * @param current_slot Pointer to offset slot pool used to + * allocate leaf field offset_slot. + * @retval not NULL JSON tree leaf tuple record. + * @retval NULL on memory allocation or path conflict error. + * diag message is set. + */ +static struct tuple_field * +tuple_field_tree_add_path(struct tuple_field *parent, const char *path, + uint32_t path_len, uint32_t fieldno, + enum field_type type, bool is_nullable, + int *current_slot) +{ + int rc; + enum field_type iterm_node_type = FIELD_TYPE_ANY; + struct json_path_parser parser; + struct json_path_node path_node; + struct tuple_field *field = parent; + /* True if the last entry has just been allocated. */ + bool is_last_new = false; + json_path_parser_create(&parser, path, path_len); + while ((rc = json_path_next(&parser, &path_node)) == 0 && + path_node.type != JSON_PATH_END) { + iterm_node_type = path_node.type == JSON_PATH_STR ? + FIELD_TYPE_MAP : FIELD_TYPE_ARRAY; + if (field->type != FIELD_TYPE_ANY && + field->type != iterm_node_type) + goto error_type_mistmatch; + uint32_t path_node_hash = json_path_node_hash(&path_node); + struct tuple_field *next_field = + json_tree_lookup_entry_by_path_node(field, &path_node, + path_node_hash, + struct tuple_field, + path_tree_node); + if (next_field == NULL) { + is_last_new = true; + next_field = malloc(sizeof(struct tuple_field)); + if (next_field == NULL) { + diag_set(OutOfMemory, + sizeof(struct tuple_field), + "malloc", "next_field"); + return NULL; + } + tuple_field_create(next_field, &path_node, + path_node_hash); + rc = json_tree_add(&field->path_tree_node, + &next_field->path_tree_node); + if (rc != 0) { + diag_set(OutOfMemory, + sizeof(struct json_tree_node), + "json_tree_add", "hashtable"); + return NULL; + } + } else { + is_last_new = false; + } + field->type = iterm_node_type; + field = next_field; + } + /* + * Key parts path is already checked and normalized, + * so we don't need to handle parse error. + */ + assert(rc == 0 && path_node.type == JSON_PATH_END); + assert(field != NULL); + if (!is_last_new) { + assert(field->offset_slot != TUPLE_OFFSET_SLOT_NIL); + /* Set up the strongest type and nullability. */ + if (field->is_nullable != is_nullable) + field->is_nullable = false; + iterm_node_type = type; + if (field_type1_contains_type2(field->type, iterm_node_type)) { + field->type = iterm_node_type; + } else if (!field_type1_contains_type2(iterm_node_type, + field->type)) + goto error_type_mistmatch; + } else { + field->is_nullable = is_nullable; + field->type = type; + field->offset_slot = (*current_slot = *current_slot - 1); + + /* Update tree depths for path. */ + uint32_t depth = 1; + for (struct json_tree_node *iter = field->path_tree_node.parent; + iter != NULL; iter = iter->parent, ++depth) { + struct tuple_field *record = + json_tree_entry(iter, struct tuple_field, + path_tree_node); + record->path_tree_depth = + MAX(record->path_tree_depth, depth); + } + } + return field; + +error_type_mistmatch: ; + diag_set(ClientError, ER_INDEX_PART_TYPE_MISMATCH, + tt_sprintf("%d", fieldno + TUPLE_INDEX_BASE), + field_type_strs[iterm_node_type], + field_type_strs[field->type]); + return NULL; +} + +/** * Add and initialize a new key_part to format. * @param format Format to initialize. * @param fields Fields definition if any. @@ -141,6 +279,7 @@ tuple_field_go_to_key(const char **field, const char *key, int len) * @param part An index part to append. * @param is_sequential Does this part sequential. * @param current_slot Pointer to last offset slot. + * @param path_data Pointer to memory to store paths strings. * @retval -1 On error. * @retval 0 On success. */ @@ -148,10 +287,33 @@ static int tuple_format_add_key_part(struct tuple_format *format, const struct field_def *fields, uint32_t field_count, const struct key_part *part, bool is_sequential, - int *current_slot) + int *current_slot, char **path_data) { assert(part->fieldno < format->field_count); struct tuple_field *field = &format->fields[part->fieldno]; + if (part->path != NULL) { + field->is_key_part = true; + assert(!is_sequential); + /** + * Copy JSON path data to reserved area at the + * end of format allocation. + */ + memcpy(*path_data, part->path, part->path_len); + (*path_data)[part->path_len] = '\0'; + field = tuple_field_tree_add_path(field, *path_data, + part->path_len, part->fieldno, + part->type, part->is_nullable, + current_slot); + *path_data += part->path_len + 1; + if (field != NULL) { + format->max_path_tree_depth = + MAX(format->max_path_tree_depth, + field->path_tree_depth); + return 0; + } else { + return -1; + } + } if (part->fieldno >= field_count) { field->is_nullable = part->is_nullable; } else if (field->is_nullable != part->is_nullable) { @@ -214,18 +376,15 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, format->field_map_size = 0; return 0; } - /* Initialize defined fields */ + /* Update defined fields attributes. */ for (uint32_t i = 0; i < field_count; ++i) { - format->fields[i].is_key_part = false; format->fields[i].type = fields[i].type; - format->fields[i].offset_slot = TUPLE_OFFSET_SLOT_NIL; format->fields[i].is_nullable = fields[i].is_nullable; } - /* Initialize remaining fields */ - for (uint32_t i = field_count; i < format->field_count; i++) - format->fields[i] = tuple_field_default; int current_slot = 0; + char *paths_data = + (char *)format + tuple_format_sizeof(format->field_count, 0); /* extract field type info */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { @@ -238,7 +397,8 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, if (tuple_format_add_key_part(format, fields, field_count, part, is_sequential, - ¤t_slot) != 0) + ¤t_slot, + &paths_data) != 0) return -1; } } @@ -305,6 +465,8 @@ static struct tuple_format * tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, uint32_t space_field_count, struct tuple_dictionary *dict) { + /* Size of area to store paths. */ + uint32_t paths_size = 0; uint32_t index_field_count = 0; /* find max max field no */ for (uint16_t key_no = 0; key_no < key_count; ++key_no) { @@ -314,10 +476,12 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, for (; part < pend; part++) { index_field_count = MAX(index_field_count, part->fieldno + 1); + if (part->path != NULL) + paths_size += part->path_len + 1; } } uint32_t field_count = MAX(space_field_count, index_field_count); - uint32_t total = tuple_format_sizeof(field_count); + uint32_t total = tuple_format_sizeof(field_count, paths_size); struct tuple_format *format = (struct tuple_format *) malloc(total); if (format == NULL) { @@ -336,6 +500,10 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, format->dict = dict; tuple_dictionary_ref(dict); } + for (uint32_t i = 0; i < field_count; i++) + tuple_field_create(&format->fields[i], NULL, 0); + format->max_path_tree_depth = 1; + format->allocation_size = total; format->refs = 0; format->id = FORMAT_ID_NIL; format->field_count = field_count; @@ -349,6 +517,19 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, static inline void tuple_format_destroy(struct tuple_format *format) { + for (uint32_t i = 0; i < format->field_count; i++) { + struct tuple_field *field; + json_tree_foreach_entry_safe(field, &format->fields[i], + path_tree_node) { + tuple_field_destroy(field); + /* + * Root records has been allocated with + * format. Don't need to free them. + */ + if (field != &format->fields[i]) + free(field); + } + } tuple_dictionary_unref(format->dict); } @@ -428,26 +609,201 @@ tuple_format1_can_store_format2_tuples(const struct tuple_format *format1, return true; } +/** + * Build a copy of JSON tree that use a new source path string. + * Function use the assumption that format data was memcpy(ied) + * so new_data and old_data memory have similar paths order. + * @param new_root Root of a new JSON path tree. + * @param old_root Root of original JSON path tree to be copied. + * @param new_data Pointer to memory containing new paths strings. + * @param old_data Painter to memory containing old paths strings. + * @retval 0 On success, -1 On memory allocation error. + */ +static int +tuple_field_tree_dup(struct tuple_field *new_root, struct tuple_field *old_root, + const char *new_data, const char *old_data) +{ + struct json_tree_node *new_prev = &new_root->path_tree_node; + struct json_tree_node *old_prev = &old_root->path_tree_node; + struct json_tree_node *old_curr; + json_tree_foreach_pre(old_curr, &old_root->path_tree_node) { + if (unlikely(old_curr == &old_root->path_tree_node)) + continue; + struct tuple_field *new_field = + calloc(1, sizeof(struct tuple_field)); + if (new_field == NULL) { + diag_set(OutOfMemory, sizeof(struct tuple_field), + "calloc", "new_field"); + return -1; + } + *new_field = *json_tree_entry(old_curr, struct tuple_field, + path_tree_node); + /* + * Re-initialize JSON tree node with a new + * string pointer. + */ + struct json_path_node node = old_curr->key; + if (node.type == JSON_PATH_STR) + node.str = new_data + (node.str - old_data); + assert(old_curr->key_hash == json_path_node_hash(&node)); + json_tree_node_create(&new_field->path_tree_node, &node, + old_curr->key_hash); + /* Lookup for a valid parent node in new tree. */ + while (old_prev != old_curr->parent) { + old_prev = old_prev->parent; + new_prev = new_prev->parent; + assert(old_prev != NULL); + } + assert(new_prev != NULL); + int rc = json_tree_add(new_prev, &new_field->path_tree_node); + if (rc != 0) { + diag_set(OutOfMemory, sizeof(struct json_tree_node), + "json_tree_add", "hashtable"); + return -1; + } + old_prev = old_curr; + new_prev = &new_field->path_tree_node; + } + return 0; +} + struct tuple_format * tuple_format_dup(struct tuple_format *src) { - uint32_t total = sizeof(struct tuple_format) + - src->field_count * sizeof(struct tuple_field); + uint32_t total = src->allocation_size; struct tuple_format *format = (struct tuple_format *) malloc(total); if (format == NULL) { diag_set(OutOfMemory, total, "malloc", "tuple format"); return NULL; } memcpy(format, src, total); + + char *src_data_begin = + (char *)src + tuple_format_sizeof(src->field_count, 0); + char *new_data_begin = + (char *)format + tuple_format_sizeof(format->field_count, 0); + /* + * Destroy references to source tree to prevent source + * corruption during resources release on dup failure. + */ + for (uint32_t i = 0; i < src->field_count; i++) + json_tree_node_create(&format->fields[i].path_tree_node, NULL, 0); + for (uint32_t i = 0; i < src->field_count; i++) { + if (tuple_field_tree_dup(&format->fields[i], &src->fields[i], + new_data_begin, src_data_begin) != 0) + goto error; + } tuple_dictionary_ref(format->dict); format->id = FORMAT_ID_NIL; format->refs = 0; - if (tuple_format_register(format) != 0) { - tuple_format_destroy(format); - free(format); - return NULL; - } + if (tuple_format_register(format) != 0) + goto error; return format; +error: + tuple_format_destroy(format); + free(format); + return NULL; +} + +int +tuple_field_tree_parse_raw(struct tuple_field *field, const char *field_raw, + const char *tuple_raw, uint32_t fieldno, + uint32_t *field_map, const char **off_stack) +{ + /* + * Need to make root field type validation here to + * do not make prev != NULL ? prev->type : field->type + * each iteration. + */ + if (key_mp_type_validate(field->type, mp_typeof(*field_raw), + ER_FIELD_TYPE, TUPLE_INDEX_BASE + fieldno, + field->is_nullable) != 0) + return -1; + const char *err_details = NULL; + /* Stack of JSON path nodes offsets. */ + uint32_t off_stack_idx = 0; + const char *offset = field_raw; + struct tuple_field *prev = NULL; + struct tuple_field *curr; + json_tree_foreach_entry_pre(curr, field, path_tree_node) { + int rc = 0; + struct tuple_field *curr_parent = + json_tree_entry_safe(curr->path_tree_node.parent, + struct tuple_field, + path_tree_node); + /* + * If currect record parent is not previous + * record, there were a switch to other path tree + * path. + */ + while (curr_parent != prev) { + assert(prev != NULL); + struct json_tree_node *prev_parent = + prev->path_tree_node.parent; + assert(prev_parent != NULL); + assert(off_stack_idx > 0); + prev = json_tree_entry(prev_parent, struct tuple_field, + path_tree_node); + /* + * Pop-up offset coresponding to the + * current field. + */ + offset = off_stack[--off_stack_idx]; + } + off_stack[off_stack_idx++] = offset; + + struct json_path_node *key = &curr->path_tree_node.key; + if (key->type == JSON_PATH_NUM) { + assert(prev == NULL || prev->type == FIELD_TYPE_ARRAY); + rc = tuple_field_go_to_index(&offset, key->num); + if (rc != 0 && !curr->is_nullable) { + err_details = + tt_sprintf("array size %d is less than " + "size of %d defined in " + "index", + key->num, key->num + 1); + goto error_invalid_document; + } + } else if (key->type == JSON_PATH_STR) { + assert(prev == NULL || prev->type == FIELD_TYPE_MAP); + rc = tuple_field_go_to_key(&offset, key->str, key->len); + if (rc != 0 && !curr->is_nullable) { + err_details = + tt_sprintf("map doesn't contain key " + "'%.*s' defined in index", + key->len, key->str); + goto error_invalid_document; + } + } + /* + * We can't make this validation below as we + * didn't know, does this field present in tuple, + * as offset could point to the next field. + */ + if (rc == 0 && + key_mp_type_validate(curr->type, mp_typeof(*offset), + ER_FIELD_TYPE, + TUPLE_INDEX_BASE + fieldno, + curr->is_nullable) != 0) + return -1; + if (field_map != NULL && + curr->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + field_map[curr->offset_slot] = + rc == 0 ? (uint32_t)(offset - tuple_raw) : 0; + } + prev = curr; + } + return 0; + +error_invalid_document: + assert(err_details != NULL); + char *data_buff = tt_static_buf(); + mp_snprint(data_buff, TT_STATIC_BUF_LEN, field_raw); + const char *err_msg = + tt_sprintf("invalid field %d document content '%s': %s", + fieldno + TUPLE_INDEX_BASE, data_buff, err_details); + diag_set(ClientError, ER_DATA_STRUCTURE_MISMATCH, err_msg); + return -1; } /** @sa declaration for details. */ @@ -477,34 +833,39 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map, } /* first field is simply accessible, so we do not store offset to it */ - enum mp_type mp_type = mp_typeof(*pos); - const struct tuple_field *field = &format->fields[0]; - if (key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE, - TUPLE_INDEX_BASE, field->is_nullable)) - return -1; - mp_next(&pos); - /* other fields...*/ - ++field; - uint32_t i = 1; - uint32_t defined_field_count = MIN(field_count, format->field_count); - if (field_count < format->index_field_count) { + struct tuple_field *field = &format->fields[0]; + uint32_t i = 0; + if (field_count < format->index_field_count || + json_tree_node_children_count(&field->path_tree_node) == 0) { /* - * Nullify field map to be able to detect by 0, - * which key fields are absent in tuple_field(). - */ + * Nullify field map to be able to detect by 0, + * which key fields are absent in tuple_field(). + */ memset((char *)field_map - format->field_map_size, 0, - format->field_map_size); + format->field_map_size); } - for (; i < defined_field_count; ++i, ++field) { - mp_type = mp_typeof(*pos); + if (json_tree_node_children_count(&field->path_tree_node) == 0) { + enum mp_type mp_type = mp_typeof(*pos); if (key_mp_type_validate(field->type, mp_type, ER_FIELD_TYPE, - i + TUPLE_INDEX_BASE, - field->is_nullable)) + TUPLE_INDEX_BASE, field->is_nullable)) + return -1; + mp_next(&pos); + ++field; + ++i; + } + size_t off_stack_size = + format->max_path_tree_depth * sizeof(const char *); + const char **off_stack = region_alloc(&fiber()->gc, off_stack_size); + if (off_stack == NULL) { + diag_set(OutOfMemory, off_stack_size, "region_alloc", + "off_stack"); + return -1; + } + uint32_t defined_field_count = MIN(field_count, format->field_count); + for (; i < defined_field_count; ++i, ++field) { + if (tuple_field_tree_parse_raw(field, pos, tuple, i, field_map, + off_stack) != 0) return -1; - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { - field_map[field->offset_slot] = - (uint32_t) (pos - tuple); - } mp_next(&pos); } return 0; @@ -565,15 +926,7 @@ box_tuple_format_unref(box_tuple_format_t *format) tuple_format_unref(format); } -/** - * Retrieve field data by JSON path. - * @param field Pointer to msgpack with data. - * @param path The path to process. - * @param path_len The length of the @path. - * @retval 0 On success. - * @retval >0 On path parsing error, invalid character position. - */ -static inline int +int tuple_field_by_relative_path(const char **field, const char *path, uint32_t path_len) { @@ -677,3 +1030,38 @@ error: tt_sprintf("error in path on position %d", rc)); return -1; } + +const char * +tuple_field_by_part_raw(const struct tuple_format *format, const char *data, + const uint32_t *field_map, struct key_part *part) +{ + if (likely(part->path == NULL)) + return tuple_field_raw(format, data, field_map, part->fieldno); + + struct tuple_field *root_field = + unlikely(part->fieldno < format->field_count) ? + (struct tuple_field *)&format->fields[part->fieldno] : NULL; + struct tuple_field *field = + unlikely(root_field == NULL) ? NULL: + tuple_field_tree_lookup(root_field, part->path, part->path_len); + if (unlikely(field == NULL)) { + /* + * Legacy tuple having no field map for JSON + * index require full path parse. + */ + const char *field_raw = + tuple_field_raw(format, data, field_map, part->fieldno); + if (unlikely(field_raw == NULL)) + return NULL; + if (tuple_field_by_relative_path(&field_raw, part->path, + part->path_len) != 0) + return NULL; + return field_raw; + } + int32_t offset_slot = field->offset_slot; + assert(offset_slot < 0); + assert(-offset_slot * sizeof(uint32_t) <= format->field_map_size); + if (unlikely(field_map[offset_slot] == 0)) + return NULL; + return data + field_map[offset_slot]; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 894620f..599e768 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -35,6 +35,7 @@ #include "field_def.h" #include "errinj.h" #include "tuple_dictionary.h" +#include "json/tree.h" #if defined(__cplusplus) extern "C" { @@ -108,6 +109,10 @@ struct tuple_field { bool is_key_part; /** True, if a field can store NULL. */ bool is_nullable; + /** JSON path tree max depth. */ + uint32_t path_tree_depth; + /** JSON root path tree node for registered indexes. */ + struct json_tree_node path_tree_node; }; /** @@ -157,6 +162,14 @@ struct tuple_format { uint32_t min_field_count; /* Length of 'fields' array. */ uint32_t field_count; + /** Size of format allocation. */ + uint32_t allocation_size; + /** + * Maximum depth of JSON path tree. This value is used to + * allocate stack that used in pre order JSON path tree + * traversal. + */ + uint32_t max_path_tree_depth; /** * Shared names storage used by all formats of a space. */ @@ -388,6 +401,38 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple, } /** + * Retrieve field data by JSON path. + * @param field Pointer to msgpack with data. + * @param path The path to process. + * @param path_len The length of the @path. + * @retval 0 On success. + * @retval >0 On path parsing error, invalid character position. + */ +int +tuple_field_by_relative_path(const char **field, const char *path, + uint32_t path_len); + +/** + * Make tuple parsing doing JSON tree traversal. Make types + * validations. Init field_map for fields that have offset_slots. + * @param field Root field of path tree. + * @param field_raw Pointer to field data. + * @param tuple_raw Pointer to tuple data. + * May be NULL when field_map is NULL. + * @param fieldno Number of root field in tuple. + * @param field_map[out] Tuple field map to initialize. + * May be NULL if initialization is not + * required. + * @param off_stack Stack of intermediate nodes offsets. + * @retval 0 On success. + * @retval -1 On memory allocation or field parsing error. + */ +int +tuple_field_tree_parse_raw(struct tuple_field *field, const char *field_raw, + const char *tuple_raw, uint32_t fieldno, + uint32_t *field_map, const char **off_stack); + +/** * Get tuple field by its path. * @param format Tuple format. * @param tuple MessagePack tuple's body. @@ -414,11 +459,25 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, * @param part Index part to use. * @retval Field data if the field exists or NULL. */ -static inline const char * +const char * tuple_field_by_part_raw(const struct tuple_format *format, const char *data, - const uint32_t *field_map, struct key_part *part) + const uint32_t *field_map, struct key_part *part); + +/** + * Lookup JSON path in tuple_field JSON paths tree. + * @param field Tuple field with JSON tree root. + * @param path JSON path string to lookup. + * @param path_len Length of path. + * @retval not NULL tuple_field pointer on success. + * @retval NULL On nothing has been found. + */ +static inline struct tuple_field * +tuple_field_tree_lookup(struct tuple_field *field, const char *path, + uint32_t path_len) { - return tuple_field_raw(format, data, field_map, part->fieldno); + return json_tree_lookup_entry_by_path(field, path, path_len, + struct tuple_field, + path_tree_node); } #if defined(__cplusplus) diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc index b394804..3486ce1 100644 --- a/src/box/tuple_hash.cc +++ b/src/box/tuple_hash.cc @@ -222,7 +222,7 @@ key_hash_slowpath(const char *key, struct key_def *key_def); void tuple_hash_func_set(struct key_def *key_def) { - if (key_def->is_nullable) + if (key_def->is_nullable || key_def->has_json_paths) goto slowpath; /* * Check that key_def defines sequential a key without holes diff --git a/src/box/vinyl.c b/src/box/vinyl.c index acfe86d..0a6ead9 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -975,6 +975,8 @@ vinyl_index_def_change_requires_rebuild(struct index *index, return true; if (!field_type1_contains_type2(new_part->type, old_part->type)) return true; + if (key_part_path_cmp(old_part, new_part) != 0) + return true; } return false; } diff --git a/src/box/vy_log.c b/src/box/vy_log.c index fc8ede5..f396705 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src) "struct key_part_def"); goto err; } - key_def_dump_parts(src->key_def, dst->key_parts); + if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0) + goto err; dst->key_part_count = src->key_def->part_count; dst->key_def = NULL; } diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index 7b704b8..9d5e220 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -196,8 +196,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *key, struct tuple **ret) { - assert(tuple_field_count(key) >= lsm->cmp_def->part_count); - *ret = NULL; double start_time = ev_monotonic_now(loop()); int rc = 0; diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 08b8fb6..9c47b75 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -29,6 +29,7 @@ * SUCH DAMAGE. */ +#include "assoc.h" #include "vy_stmt.h" #include @@ -349,6 +350,103 @@ vy_stmt_replace_from_upsert(const struct tuple *upsert) return replace; } +/** + * Construct tuple field or calculate it's size. The fields_iov_ht + * is a hashtable that links leaf field records of field path + * tree and iovs that contain raw data. Function also fills the + * tuple field_map when write_data flag is set true. + */ +static void +tuple_field_restore_raw(struct tuple_field *field, char *tuple_raw, + uint32_t *field_map, char **offset, + struct mh_i64ptr_t *fields_iov_ht, bool write_data) +{ + struct tuple_field *prev = NULL; + struct tuple_field *curr; + json_tree_foreach_entry_pre(curr, field, path_tree_node) { + struct json_tree_node *curr_node = &curr->path_tree_node; + uint32_t children_count = + json_tree_node_children_count(curr_node); + struct tuple_field *parent = + curr_node->parent == NULL ? NULL : + json_tree_entry(curr_node->parent, struct tuple_field, + path_tree_node); + if (parent != NULL && parent->type == FIELD_TYPE_ARRAY){ + /* + * Fill unindexed array items with nulls. + * Gaps size calculated as a difference + * between sibling nodes. + */ + uint32_t nulls_count = curr_node->key.num - 1; + if (prev != parent) { + struct json_tree_node *prev_node = + rlist_prev_entry(curr_node, sibling); + assert(prev_node != NULL); + nulls_count -= prev_node->key.num; + } + for (uint32_t i = 0; i < nulls_count; i++) { + *offset = !write_data ? + (*offset += mp_sizeof_nil()) : + mp_encode_nil(*offset); + } + } else if (parent != NULL && parent->type == FIELD_TYPE_MAP) { + const char *str = curr_node->key.str; + uint32_t len = curr_node->key.len; + *offset = !write_data ? + (*offset += mp_sizeof_str(len)) : + mp_encode_str(*offset, str, len); + } + /* Fill data. */ + if (curr->type == FIELD_TYPE_ARRAY) { + /* + * As numeric records list is sorted in + * ascending order in JSON tree, last + * entry contain the biggest number + * matching the size of the array. + */ + assert(children_count > 0); + struct json_tree_node *max_idx_node = + rlist_last_entry(&curr_node->children, + struct json_tree_node, + sibling); + uint32_t array_size = max_idx_node->key.num; + assert(array_size > 0); + *offset = !write_data ? + (*offset += mp_sizeof_array(array_size)) : + mp_encode_array(*offset, array_size); + } else if (curr->type == FIELD_TYPE_MAP) { + assert(children_count > 0); + *offset = !write_data ? + (*offset += mp_sizeof_map(children_count)) : + mp_encode_map(*offset, children_count); + } else { + /* Leaf record. */ + assert(children_count == 0); + mh_int_t k = mh_i64ptr_find(fields_iov_ht, + (uint64_t)curr, NULL); + struct iovec *iov = + k != mh_end(fields_iov_ht) ? + mh_i64ptr_node(fields_iov_ht, k)->val : NULL; + if (iov == NULL) { + *offset = !write_data ? + (*offset += mp_sizeof_nil()) : + mp_encode_nil(*offset); + } else { + uint32_t data_offset = *offset - tuple_raw; + int32_t slot = curr->offset_slot; + if (write_data) { + memcpy(*offset, iov->iov_base, + iov->iov_len); + if (slot != TUPLE_OFFSET_SLOT_NIL) + field_map[slot] = data_offset; + } + *offset += iov->iov_len; + } + } + prev = curr; + } +} + static struct tuple * vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, const struct key_def *cmp_def, @@ -357,51 +455,83 @@ vy_stmt_new_surrogate_from_key(const char *key, enum iproto_type type, /* UPSERT can't be surrogate. */ assert(type != IPROTO_UPSERT); struct region *region = &fiber()->gc; + struct tuple *stmt = NULL; uint32_t field_count = format->index_field_count; - struct iovec *iov = region_alloc(region, sizeof(*iov) * field_count); + uint32_t part_count = mp_decode_array(&key); + assert(part_count == cmp_def->part_count); + struct iovec *iov = region_alloc(region, sizeof(*iov) * part_count); if (iov == NULL) { - diag_set(OutOfMemory, sizeof(*iov) * field_count, - "region", "iov for surrogate key"); + diag_set(OutOfMemory, sizeof(*iov) * part_count, "region", + "iov for surrogate key"); return NULL; } - memset(iov, 0, sizeof(*iov) * field_count); - uint32_t part_count = mp_decode_array(&key); - assert(part_count == cmp_def->part_count); - assert(part_count <= field_count); - uint32_t nulls_count = field_count - cmp_def->part_count; - uint32_t bsize = mp_sizeof_array(field_count) + - mp_sizeof_nil() * nulls_count; - for (uint32_t i = 0; i < part_count; ++i) { - const struct key_part *part = &cmp_def->parts[i]; + /* Hastable linking leaf field and corresponding iov. */ + struct mh_i64ptr_t *fields_iov_ht = mh_i64ptr_new(); + if (fields_iov_ht == NULL) { + diag_set(OutOfMemory, sizeof(struct mh_i64ptr_t), + "mh_i64ptr_new", "fields_iov_ht"); + return NULL; + } + if (mh_i64ptr_reserve(fields_iov_ht, part_count, NULL) != 0) { + diag_set(OutOfMemory, part_count, "mh_i64ptr_reserve", + "fields_iov_ht"); + goto end; + } + uint32_t bsize = mp_sizeof_array(field_count); + uint32_t nulls_count = field_count; + memset(iov, 0, sizeof(*iov) * part_count); + const struct key_part *part = cmp_def->parts; + for (uint32_t i = 0; i < part_count; ++i, ++part) { assert(part->fieldno < field_count); const char *svp = key; - iov[part->fieldno].iov_base = (char *) key; + iov[i].iov_base = (char *) key; mp_next(&key); - iov[part->fieldno].iov_len = key - svp; - bsize += key - svp; + iov[i].iov_len = key - svp; + struct tuple_field *field; + if (part->path == NULL) { + field = &format->fields[part->fieldno]; + --nulls_count; + } else { + struct tuple_field *root_field = + &format->fields[part->fieldno]; + field = tuple_field_tree_lookup(root_field, part->path, + part->path_len); + assert(field != NULL); + } + struct mh_i64ptr_node_t node = {(uint64_t)field, &iov[i]}; + mh_int_t k = mh_i64ptr_put(fields_iov_ht, &node, NULL, NULL); + if (unlikely(k == mh_end(fields_iov_ht))) { + diag_set(OutOfMemory, part_count, "mh_i64ptr_put", + "fields_iov_ht"); + goto end; + } + k = mh_i64ptr_find(fields_iov_ht, (uint64_t)field, NULL); + assert(k != mh_end(fields_iov_ht)); + } + /* Calculate tuple size to make allocation. */ + for (uint32_t i = 0; i < field_count; ++i) { + char *data = NULL; + tuple_field_restore_raw(&format->fields[i], NULL, NULL, &data, + fields_iov_ht, false); + bsize += data - (char *)NULL; } - struct tuple *stmt = vy_stmt_alloc(format, bsize); + stmt = vy_stmt_alloc(format, bsize); if (stmt == NULL) - return NULL; + goto end; char *raw = (char *) tuple_data(stmt); uint32_t *field_map = (uint32_t *) raw; char *wpos = mp_encode_array(raw, field_count); for (uint32_t i = 0; i < field_count; ++i) { - const struct tuple_field *field = &format->fields[i]; - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - field_map[field->offset_slot] = wpos - raw; - if (iov[i].iov_base == NULL) { - wpos = mp_encode_nil(wpos); - } else { - memcpy(wpos, iov[i].iov_base, iov[i].iov_len); - wpos += iov[i].iov_len; - } + tuple_field_restore_raw(&format->fields[i], raw, field_map, + &wpos, fields_iov_ht, true); } - assert(wpos == raw + bsize); + assert(wpos <= raw + bsize); vy_stmt_set_type(stmt, type); +end: + mh_i64ptr_delete(fields_iov_ht); return stmt; } diff --git a/test/box/misc.result b/test/box/misc.result index 4ee4797..b1fda78 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -350,7 +350,7 @@ t; - 'box.error.CANT_CREATE_COLLATION : 150' - 'box.error.USER_EXISTS : 46' - 'box.error.WAL_IO : 40' - - 'box.error.PROC_RET : 21' + - 'box.error.RTREE_RECT : 101' - 'box.error.PRIV_GRANTED : 89' - 'box.error.CREATE_SPACE : 9' - 'box.error.GRANT : 88' @@ -361,7 +361,7 @@ t; - 'box.error.VINYL_MAX_TUPLE_SIZE : 139' - 'box.error.LOAD_FUNCTION : 99' - 'box.error.INVALID_XLOG : 74' - - 'box.error.READ_VIEW_ABORTED : 130' + - 'box.error.PRIV_NOT_GRANTED : 91' - 'box.error.TRANSACTION_CONFLICT : 97' - 'box.error.GUEST_USER_PASSWORD : 96' - 'box.error.PROC_C : 102' @@ -371,8 +371,8 @@ t; - 'box.error.DROP_FUNCTION : 71' - 'box.error.CFG : 59' - 'box.error.NO_SUCH_FIELD : 37' - - 'box.error.CONNECTION_TO_SELF : 117' - - 'box.error.FUNCTION_MAX : 54' + - 'box.error.MORE_THAN_ONE_TUPLE : 41' + - 'box.error.PROC_LUA : 32' - 'box.error.ILLEGAL_PARAMS : 1' - 'box.error.PARTIAL_KEY : 136' - 'box.error.SAVEPOINT_NO_TRANSACTION : 114' @@ -400,34 +400,35 @@ t; - 'box.error.UPDATE_ARG_TYPE : 26' - 'box.error.CROSS_ENGINE_TRANSACTION : 81' - 'box.error.FORMAT_MISMATCH_INDEX_PART : 27' - - 'box.error.FUNCTION_TX_ACTIVE : 30' - 'box.error.injection : table:
- - 'box.error.ITERATOR_TYPE : 72' + - 'box.error.FUNCTION_TX_ACTIVE : 30' + - 'box.error.IDENTIFIER : 70' + - 'box.error.TRANSACTION_YIELD : 154' - 'box.error.NO_SUCH_ENGINE : 57' - 'box.error.COMMIT_IN_SUB_STMT : 122' - - 'box.error.TRANSACTION_YIELD : 154' - - 'box.error.UNSUPPORTED : 5' - - 'box.error.LAST_DROP : 15' + - 'box.error.RELOAD_CFG : 58' - 'box.error.SPACE_FIELD_IS_DUPLICATE : 149' + - 'box.error.LAST_DROP : 15' + - 'box.error.SEQUENCE_OVERFLOW : 147' - 'box.error.DECOMPRESSION : 124' - 'box.error.CREATE_SEQUENCE : 142' - 'box.error.CREATE_USER : 43' - - 'box.error.SEQUENCE_OVERFLOW : 147' + - 'box.error.FUNCTION_MAX : 54' - 'box.error.INSTANCE_UUID_MISMATCH : 66' - - 'box.error.RELOAD_CFG : 58' + - 'box.error.TUPLE_FORMAT_LIMIT : 16' - 'box.error.SYSTEM : 115' - 'box.error.KEY_PART_IS_TOO_LONG : 118' - - 'box.error.MORE_THAN_ONE_TUPLE : 41' - 'box.error.TRUNCATE_SYSTEM_SPACE : 137' - - 'box.error.NO_SUCH_SAVEPOINT : 61' - 'box.error.VY_QUOTA_TIMEOUT : 135' - - 'box.error.PRIV_NOT_GRANTED : 91' + - 'box.error.NO_SUCH_SAVEPOINT : 61' + - 'box.error.PROTOCOL : 104' + - 'box.error.READ_VIEW_ABORTED : 130' - 'box.error.WRONG_INDEX_OPTIONS : 108' - 'box.error.INVALID_VYLOG_FILE : 133' - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127' - - 'box.error.BEFORE_REPLACE_RET : 53' + - 'box.error.DATA_STRUCTURE_MISMATCH : 55' - 'box.error.USER_MAX : 56' - - 'box.error.INVALID_MSGPACK : 20' + - 'box.error.BEFORE_REPLACE_RET : 53' - 'box.error.TUPLE_NOT_ARRAY : 22' - 'box.error.KEY_PART_COUNT : 31' - 'box.error.ALTER_SPACE : 12' @@ -436,47 +437,47 @@ t; - 'box.error.DROP_SEQUENCE : 144' - 'box.error.INVALID_XLOG_ORDER : 76' - 'box.error.UNKNOWN_REQUEST_TYPE : 48' - - 'box.error.PROC_LUA : 32' + - 'box.error.PROC_RET : 21' - 'box.error.SUB_STMT_MAX : 121' - 'box.error.ROLE_NOT_GRANTED : 92' - 'box.error.SPACE_EXISTS : 10' - - 'box.error.UPDATE_INTEGER_OVERFLOW : 95' + - 'box.error.UNSUPPORTED : 5' - 'box.error.MIN_FIELD_COUNT : 39' - 'box.error.NO_SUCH_SPACE : 36' - 'box.error.WRONG_INDEX_PARTS : 107' - 'box.error.REPLICASET_UUID_MISMATCH : 63' - 'box.error.UPDATE_FIELD : 29' - 'box.error.INDEX_EXISTS : 85' - - 'box.error.SPLICE : 25' + - 'box.error.DROP_SPACE : 11' - 'box.error.COMPRESSION : 119' - 'box.error.INVALID_ORDER : 68' - - 'box.error.UNKNOWN : 0' + - 'box.error.SPLICE : 25' - 'box.error.NO_SUCH_GROUP : 155' - - 'box.error.TUPLE_FORMAT_LIMIT : 16' + - 'box.error.INVALID_MSGPACK : 20' - 'box.error.DROP_PRIMARY_KEY : 17' - 'box.error.NULLABLE_PRIMARY : 152' - 'box.error.NO_SUCH_SEQUENCE : 145' - 'box.error.INJECTION : 8' - 'box.error.INVALID_UUID : 64' - - 'box.error.IDENTIFIER : 70' + - 'box.error.NO_SUCH_ROLE : 82' - 'box.error.TIMEOUT : 78' + - 'box.error.ITERATOR_TYPE : 72' - 'box.error.REPLICA_MAX : 73' - - 'box.error.NO_SUCH_ROLE : 82' - - 'box.error.DROP_SPACE : 11' + - 'box.error.UNKNOWN : 0' - 'box.error.MISSING_REQUEST_FIELD : 69' - 'box.error.MISSING_SNAPSHOT : 93' - 'box.error.WRONG_SPACE_OPTIONS : 111' - 'box.error.READONLY : 7' - - 'box.error.RTREE_RECT : 101' + - 'box.error.UPDATE_INTEGER_OVERFLOW : 95' - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105' - 'box.error.NO_CONNECTION : 77' - 'box.error.UNSUPPORTED_PRIV : 98' - 'box.error.WRONG_SCHEMA_VERSION : 109' - 'box.error.ROLLBACK_IN_SUB_STMT : 123' - - 'box.error.PROTOCOL : 104' - - 'box.error.INVALID_XLOG_TYPE : 125' - - 'box.error.INDEX_PART_TYPE_MISMATCH : 24' - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112' + - 'box.error.CONNECTION_TO_SELF : 117' + - 'box.error.INDEX_PART_TYPE_MISMATCH : 24' + - 'box.error.INVALID_XLOG_TYPE : 125' ... test_run:cmd("setopt delimiter ''"); --- diff --git a/test/engine/tuple.result b/test/engine/tuple.result index 35c700e..e551f1a 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -954,6 +954,377 @@ type(tuple:tomap().fourth) s:drop() --- ... +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +box.cfg() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': same key + part is indexed twice' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +- error: 'Wrong index options (field 2): ''path'' must be string' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''map'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +--- +- error: 'Can''t create or modify index ''test1'' in space ''withdata'': field type + ''array'' is not supported' +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field 3 has type 'map' in one index, but type 'string' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +--- +- error: Field 3 has type 'map' in one index, but type 'array' in another +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +--- +- error: 'Wrong index options (field 3): invalid JSON path ''FIO....fname'': path + has invalid structure (error at position 5)' +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +--- +- error: 'Tuple field 3 type does not match one required by operation: expected string' +... +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +--- +- error: 'Tuple doesn''t math document structure: invalid field 3 document content + ''{"town": "London", "FIO": {"fname": "James"}}'': map doesn''t contain key ''sname'' + defined in index' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:select() +--- +- - [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] + - [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +idx:min() +--- +- [7, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:max() +--- +- [7, 7, {'town': 'Moscow', 'FIO': {'fname': 'Max', 'data': 'extra', 'sname': 'Isaev'}}, + 4, 5] +... +s:drop() +--- +... +s = box.schema.create_space('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[2]'} +--- +... +pk = s:create_index('pk', {parts = parts}) +--- +... +s:insert{{1, 2}, 3} +--- +- [[1, 2], 3] +... +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +--- +... +s:get(2) +--- +- [[1, 2], 8] +... +s:drop() +--- +... +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk = s:create_index('primary', { type = 'tree' }) +--- +... +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +--- +- [1, 7, {'town': 'London', 'FIO': 1234}, 4, 5] +... +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [2, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +--- +- [4, 7, {'town': 'London', 'FIO': [1, 2, 3]}, 4, 5] +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +_ = s:delete(1) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: Duplicate key exists in unique index 'test1' in space 'withdata' +... +_ = s:delete(2) +--- +... +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +--- +- error: 'Tuple field 3 type does not match one required by operation: expected map' +... +_ = s:delete(4) +--- +... +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +--- +... +assert(idx ~= nil) +--- +- true +... +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +--- +- error: Field 3 has type 'number' in one index, but type 'string' in another +... +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +--- +... +assert(idx2 ~= nil) +--- +- true +... +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +--- +... +idx:select() +--- +- - [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] + - [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:min() +--- +- [5, 7, {'town': 'Matrix', 'FIO': {'fname': 'Agent', 'sname': 'Smith'}}, 4, 5] +... +idx:max() +--- +- [3, 7, {'town': 'London', 'FIO': {'fname': 'James', 'sname': 'Bond'}}, 4, 5] +... +idx:drop() +--- +... +s:drop() +--- +... +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +parts = {} +--- +... +parts[1] = {1, 'str', path='[3][2].a'} +--- +... +parts[2] = {1, 'unsigned', path = '[3][1]'} +--- +... +parts[3] = {2, 'str', path = '[2].d[1]'} +--- +... +pk = s:create_index('primary', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +--- +- [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, + [1, 2, 3]] +... +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +--- +- error: Duplicate key exists in unique index 'primary' in space 'withdata' +... +parts = {} +--- +... +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +--- +... +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +--- +... +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +--- +... +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +--- +... +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +--- +- error: 'Tuple doesn''t math document structure: invalid field 4 document content + ''[]'': array size 1 is less than size of 2 defined in index' +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b' } +--- +... +parts[2] = {3, 'unsigned'} +--- +... +crosspart_idx = s:create_index('crosspart', { parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +parts = {} +--- +... +parts[1] = {1, 'unsigned', path='[3][2].b'} +--- +... +num_idx = s:create_index('numeric', {parts = parts}) +--- +... +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:get(2) +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +num_idx:select() +--- +- - [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 9, 2, 3]] + - [[1, 2, [3, {1: 3, 'a': 'str', 'b': 5}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], + 6, [1, 2, 3]] + - [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [ + 0]] +... +num_idx:max() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +num_idx:min() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +assert(crosspart_idx:max() == num_idx:max()) +--- +- true +... +assert(crosspart_idx:min() == num_idx:min()) +--- +- true +... +trap_idx:max() +--- +- [[1, 2, [3, {'a': 'str2', 'b': 2}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [9, + 2, 3]] +... +trap_idx:min() +--- +- [[1, 2, [3, {'a': 'str3', 'b': 9}]], ['c', {'d': ['e', 'f'], 'e': 'g'}], 6, [0]] +... +s:drop() +--- +... +s = box.schema.space.create('withdata', {engine = engine}) +--- +... +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +--- +... +assert(pk_simplified.path == box.NULL) +--- +- true +... +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +--- +... +s:insert{31, {a = 1, aa = -1}} +--- +- [31, {'a': 1, 'aa': -1}] +... +s:insert{22, {a = 2, aa = -2}} +--- +- [22, {'a': 2, 'aa': -2}] +... +s:insert{13, {a = 3, aa = -3}} +--- +- [13, {'a': 3, 'aa': -3}] +... +idx:select() +--- +- - [31, {'a': 1, 'aa': -1}] + - [22, {'a': 2, 'aa': -2}] + - [13, {'a': 3, 'aa': -3}] +... +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +--- +... +idx:select() +--- +- - [13, {'a': 3, 'aa': -3}] + - [22, {'a': 2, 'aa': -2}] + - [31, {'a': 1, 'aa': -1}] +... +s:drop() +--- +... engine = nil --- ... diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua index edc3dab..e865c67 100644 --- a/test/engine/tuple.test.lua +++ b/test/engine/tuple.test.lua @@ -312,5 +312,111 @@ tuple:tomap().fourth type(tuple:tomap().fourth) s:drop() +-- +-- gh-1012: Indexes for JSON-defined paths. +-- +box.cfg() +s = box.schema.space.create('withdata', {engine = engine}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO["fname"]'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 666}, {3, 'str', path = '["FIO"]["fname"]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'map', path = 'FIO'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'array', path = '[1]'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '[1].sname'}, {3, 'str', path = '["FIO"].fname'}}}) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = 'FIO....fname'}}}) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +assert(idx ~= nil) +s:insert{7, 7, {town = 'London', FIO = 666}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 666, sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = "James"}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{7, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond', data = "extra"}}, 4, 5} +s:insert{7, 7, {town = 'Moscow', FIO = {fname = 'Max', sname = 'Isaev', data = "extra"}}, 4, 5} +idx:select() +idx:min() +idx:max() +s:drop() + +s = box.schema.create_space('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'unsigned', path='[2]'} +pk = s:create_index('pk', {parts = parts}) +s:insert{{1, 2}, 3} +s:upsert({{box.null, 2}}, {{'+', 2, 5}}) +s:get(2) +s:drop() + +-- Create index on space with data +s = box.schema.space.create('withdata', {engine = engine}) +pk = s:create_index('primary', { type = 'tree' }) +s:insert{1, 7, {town = 'London', FIO = 1234}, 4, 5} +s:insert{2, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{3, 7, {town = 'London', FIO = {fname = 'James', sname = 'Bond'}}, 4, 5} +s:insert{4, 7, {town = 'London', FIO = {1,2,3}}, 4, 5} +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(1) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(2) +s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}, {3, 'str', path = '["FIO"]["sname"]'}}}) +_ = s:delete(4) +idx = s:create_index('test1', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]', is_nullable = true}, {3, 'str', path = '["FIO"]["sname"]'}, {3, 'str', path = '["FIO"]["extra"]', is_nullable = true}}}) +assert(idx ~= nil) +s:create_index('test2', {parts = {{2, 'number'}, {3, 'number', path = '["FIO"]["fname"]'}}}) +idx2 = s:create_index('test2', {parts = {{2, 'number'}, {3, 'str', path = '["FIO"]["fname"]'}}}) +assert(idx2 ~= nil) +t = s:insert{5, 7, {town = 'Matrix', FIO = {fname = 'Agent', sname = 'Smith'}}, 4, 5} +idx:select() +idx:min() +idx:max() +idx:drop() +s:drop() + +-- Test complex JSON indexes +s = box.schema.space.create('withdata', {engine = engine}) +parts = {} +parts[1] = {1, 'str', path='[3][2].a'} +parts[2] = {1, 'unsigned', path = '[3][1]'} +parts[3] = {2, 'str', path = '[2].d[1]'} +pk = s:create_index('primary', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {1, 2, 3}} +s:insert{{1, 2, {3, {a = 'str', b = 1}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6} +parts = {} +parts[1] = {4, 'unsigned', path='[1]', is_nullable = false} +parts[2] = {4, 'unsigned', path='[2]', is_nullable = true} +parts[3] = {4, 'unsigned', path='[4]', is_nullable = true} +trap_idx = s:create_index('trap', { type = 'tree', parts = parts}) +s:insert{{1, 2, {3, {3, a = 'str2', b = 5}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b' } +parts[2] = {3, 'unsigned'} +crosspart_idx = s:create_index('crosspart', { parts = parts}) +s:insert{{1, 2, {3, {a = 'str2', b = 2}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {9, 2, 3}} +parts = {} +parts[1] = {1, 'unsigned', path='[3][2].b'} +num_idx = s:create_index('numeric', {parts = parts}) +s:insert{{1, 2, {3, {a = 'str3', b = 9}}}, {'c', {d = {'e', 'f'}, e = 'g'}}, 6, {0}} +num_idx:get(2) +num_idx:select() +num_idx:max() +num_idx:min() +assert(crosspart_idx:max() == num_idx:max()) +assert(crosspart_idx:min() == num_idx:min()) +trap_idx:max() +trap_idx:min() +s:drop() + +s = box.schema.space.create('withdata', {engine = engine}) +pk_simplified = s:create_index('primary', { type = 'tree', parts = {{1, 'unsigned'}}}) +assert(pk_simplified.path == box.NULL) +idx = s:create_index('idx', {parts = {{2, 'integer', path = 'a'}}}) +s:insert{31, {a = 1, aa = -1}} +s:insert{22, {a = 2, aa = -2}} +s:insert{13, {a = 3, aa = -3}} +idx:select() +idx:alter({parts = {{2, 'integer', path = 'aa'}}}) +idx:select() +s:drop() + engine = nil test_run = nil -- 2.7.4