From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 7CEBA280A4 for ; Mon, 6 Aug 2018 08:27:08 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id zSB-g--EHus2 for ; Mon, 6 Aug 2018 08:27:08 -0400 (EDT) Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id C5CE426F32 for ; Mon, 6 Aug 2018 08:27:07 -0400 (EDT) From: Kirill Shcherbatov Subject: [tarantool-patches] [PATCH v1 3/5] box: introduce path field in key_part Date: Mon, 6 Aug 2018 15:27:00 +0300 Message-Id: <63cb256a0206e222ee10199e7d671700e44ce2aa.1533558332.git.kshcherbatov@tarantool.org> In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Kirill Shcherbatov As we need to store user-defined JSON path in key_part and key_part_def, we have introduced path and path_len fields. JSON path is verified and transformed to canonical form on index msgpack unpack. Because of field names specified as format could be changed key_part path persisted in Tarantool should be always started with first-level field access via array index(not by name). Part of #1012. --- src/box/key_def.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++---- src/box/key_def.h | 13 +++- src/box/lua/space.cc | 5 ++ src/box/schema.cc | 8 +-- src/box/vy_log.c | 3 +- 5 files changed, 205 insertions(+), 21 deletions(-) diff --git a/src/box/key_def.c b/src/box/key_def.c index 8a4262b..79e07f8 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -35,12 +35,15 @@ #include "column_mask.h" #include "schema_def.h" #include "coll_id_cache.h" +#include "fiber.h" +#include "json/path.h" static const struct key_part_def key_part_def_default = { 0, field_type_MAX, COLL_NONE, false, + NULL }; static int64_t @@ -53,6 +56,7 @@ part_type_by_name_wrapper(const char *str, uint32_t len) #define PART_OPT_FIELD "field" #define PART_OPT_COLLATION "collation" #define PART_OPT_NULLABILITY "is_nullable" +#define PART_OPT_PATH "path" const struct opt_def part_def_reg[] = { OPT_DEF_ENUM(PART_OPT_TYPE, field_type, struct key_part_def, type, @@ -61,6 +65,7 @@ const struct opt_def part_def_reg[] = { OPT_DEF(PART_OPT_COLLATION, OPT_UINT32, struct key_part_def, coll_id), OPT_DEF(PART_OPT_NULLABILITY, OPT_BOOL, struct key_part_def, is_nullable), + OPT_DEF(PART_OPT_PATH, OPT_STRPTR, struct key_part_def, path), OPT_END, }; @@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src) return NULL; } memcpy(res, src, sz); + uint32_t i = 0; + for (; i < src->part_count; i++) { + if (src->parts[i].path == NULL) { + res->parts[i].path = NULL; + continue; + } + char *path = strdup(src->parts[i].path); + if (path == NULL) { + diag_set(OutOfMemory, src->parts[i].path_len + 1, + "strdup", "path"); + goto error; + } + res->parts[i].path = path; + } return res; + +error: + for (uint32_t j = 0; j < i; j++) + free((void *)res->parts[j].path); + free(res); + return NULL; } void @@ -118,6 +143,8 @@ key_def_swap(struct key_def *old_def, struct key_def *new_def) void key_def_delete(struct key_def *def) { + for (uint32_t i = 0; i < def->part_count; i++) + free((void *)def->parts[i].path); free(def); } @@ -160,19 +187,34 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count) if (coll_id == NULL) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, i + 1, "collation was not found by ID"); - key_def_delete(def); - return NULL; + goto error; } coll = coll_id->coll; } + char *path = NULL; + if (part->path != NULL && + (path = strdup(part->path)) == NULL) { + diag_set(OutOfMemory, strlen(part->path) + 1, "strdup", + "path"); + goto error; + } key_def_set_part(def, i, part->fieldno, part->type, - part->is_nullable, coll, part->coll_id); + part->is_nullable, coll, part->coll_id, + path); } return def; +error: + /* + * Don't care about non-initialized fields as them filled + * with 0 via calloc. + */ + key_def_delete(def); + return NULL; } -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts) { for (uint32_t i = 0; i < def->part_count; i++) { const struct key_part *part = &def->parts[i]; @@ -181,7 +223,20 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts) part_def->type = part->type; part_def->is_nullable = part->is_nullable; part_def->coll_id = part->coll_id; + if (part->path != NULL) { + part_def->path = region_alloc(pool, part->path_len + 1); + if (part_def->path == NULL) { + diag_set(OutOfMemory, part->path_len + 1, + "region_alloc", "part_def->path"); + return -1; + } + memcpy(part_def->path, part->path, part->path_len); + part_def->path[part->path_len] = '\0'; + } else { + part_def->path = NULL; + } } + return 0; } box_key_def_t * @@ -195,7 +250,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count) key_def_set_part(key_def, item, fields[item], (enum field_type)types[item], key_part_def_default.is_nullable, NULL, - COLL_NONE); + COLL_NONE, NULL); } return key_def; } @@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, if (part1->is_nullable != part2->is_nullable) return part1->is_nullable < part2->is_nullable ? -1 : 1; + /* Lexicographic strings order. */ + uint32_t len = MIN(part1->path_len, part2->path_len); + int rc = 0; + if ((rc = strncmp(part1->path, part2->path, len)) != 0) + return rc; } return part_count1 < part_count2 ? -1 : part_count1 > part_count2; } @@ -248,7 +308,7 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1, void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, bool is_nullable, struct coll *coll, - uint32_t coll_id) + uint32_t coll_id, char *path) { assert(part_no < def->part_count); assert(type < field_type_MAX); @@ -260,6 +320,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, def->parts[part_no].coll_id = coll_id; def->parts[part_no].slot_cache = TUPLE_OFFSET_SLOT_NIL; def->parts[part_no].format_epoch = 0; + def->parts[part_no].path = path; + def->parts[part_no].path_len = path != NULL ? strlen(path) : 0; column_mask_set_fieldno(&def->column_mask, fieldno); /** * When all parts are set, initialize the tuple @@ -304,8 +366,15 @@ key_def_snprint_parts(char *buf, int size, const struct key_part_def *parts, for (uint32_t i = 0; i < part_count; i++) { const struct key_part_def *part = &parts[i]; assert(part->type < field_type_MAX); - SNPRINT(total, snprintf, buf, size, "%d, '%s'", - (int)part->fieldno, field_type_strs[part->type]); + if (part->path != NULL) { + SNPRINT(total, snprintf, buf, size, "%d, '%s', '%s'", + (int) part->fieldno, part->path, + field_type_strs[part->type]); + } else { + SNPRINT(total, snprintf, buf, size, "%d, '%s'", + (int) part->fieldno, + field_type_strs[part->type]); + } if (i < part_count - 1) SNPRINT(total, snprintf, buf, size, ", "); } @@ -324,6 +393,8 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) count++; if (part->is_nullable) count++; + if (part->path != NULL) + count++; size += mp_sizeof_map(count); size += mp_sizeof_str(strlen(PART_OPT_FIELD)); size += mp_sizeof_uint(part->fieldno); @@ -338,6 +409,10 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count) size += mp_sizeof_str(strlen(PART_OPT_NULLABILITY)); size += mp_sizeof_bool(part->is_nullable); } + if (part->path != NULL) { + size += mp_sizeof_str(strlen(PART_OPT_PATH)); + size += mp_sizeof_str(strlen(part->path)); + } } return size; } @@ -351,6 +426,8 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, int count = 2; if (part->coll_id != COLL_NONE) count++; + if (part->path != NULL) + count++; if (part->is_nullable) count++; data = mp_encode_map(data, count); @@ -372,6 +449,12 @@ key_def_encode_parts(char *data, const struct key_part_def *parts, strlen(PART_OPT_NULLABILITY)); data = mp_encode_bool(data, part->is_nullable); } + if (part->path != NULL) { + data = mp_encode_str(data, PART_OPT_PATH, + strlen(PART_OPT_PATH)); + data = mp_encode_str(data, part->path, + strlen(part->path)); + } } return data; } @@ -432,6 +515,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, return key_def_decode_parts_166(parts, part_count, data, fields, field_count); } + struct key_part_def *part; for (uint32_t i = 0; i < part_count; i++) { - struct key_part_def *part = &parts[i]; + part = &parts[i]; if (mp_typeof(**data) != MP_MAP) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, i + TUPLE_INDEX_BASE, @@ -456,7 +541,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, *part = key_part_def_default; if (opts_decode(part, part_def_reg, data, ER_WRONG_INDEX_OPTIONS, i + TUPLE_INDEX_BASE, - NULL) != 0) + &fiber()->gc) != 0) return -1; if (part->type == field_type_MAX) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, @@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count, "string and scalar parts"); return -1; } + if (part->path != NULL) { + struct region *region = &fiber()->gc; + size_t path_len = strlen(part->path); + struct json_path_parser parser; + struct json_path_node node; + json_path_parser_create(&parser, part->path, path_len); + /* + * A worst-case scenario is .a -> ["a"] + * i.e. 3*path_len + 1 is enough. + */ + uint32_t size = region_used(region); + char *path = + region_alloc(region, 3 * path_len + 1); + if (path == NULL) { + diag_set(OutOfMemory, 3 * path_len + 1, + "region_alloc", "path"); + return -1; + } + part->path = path; + int rc = json_path_next(&parser, &node); + if (rc != 0) + goto error_invalid_json; + if (node.type != JSON_PATH_NUM) { + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno, + "invalid JSON path: first part should " + "be defined as array"); + return -1; + } + if (node.num - TUPLE_INDEX_BASE != part->fieldno) { + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno, + "invalid JSON path: first part refers " + "to invalid field"); + return -1; + } + uint32_t lexemes = 0; + do { + if (node.type == JSON_PATH_NUM) { + path += sprintf(path, "[%u]", + (unsigned)node.num); + } else if (node.type == JSON_PATH_STR) { + path += sprintf(path, "[\"%.*s\"]", + node.len, node.str); + } else { + unreachable(); + } + lexemes++; + } while ((rc = json_path_next(&parser, &node)) == 0 && + node.type != JSON_PATH_END); + if (rc != 0 || node.type != JSON_PATH_END) + goto error_invalid_json; + /* JSON index is useless. */ + if (lexemes == 1) { + region_truncate(region, size); + part->path = NULL; + } else { + region_truncate(region, + size + (path - part->path + 1)); + } + } } return 0; + +error_invalid_json: + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, + part->fieldno + TUPLE_INDEX_BASE, + "invalid JSON path: path has invalid structure"); + return -1; } int @@ -497,6 +649,7 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count, fields[part->fieldno].is_nullable : key_part_def_default.is_nullable); part->coll_id = COLL_NONE; + part->path = NULL; } return 0; } @@ -558,8 +711,15 @@ key_def_merge(const struct key_def *first, const struct key_def *second) part = first->parts; end = part + first->part_count; for (; part != end; part++) { + char *path = NULL; + if (part->path != NULL && (path = strdup(part->path)) == NULL) { + diag_set(OutOfMemory, part->path_len + 1, "strdup", + "path"); + goto error; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->is_nullable, part->coll, part->coll_id); + part->is_nullable, part->coll, part->coll_id, + path); } /* Set-append second key def's part to the new key def. */ @@ -568,10 +728,21 @@ key_def_merge(const struct key_def *first, const struct key_def *second) for (; part != end; part++) { if (key_def_find(first, part->fieldno)) continue; + char *path = NULL; + if (part->path != NULL && (path = strdup(part->path)) == NULL) { + diag_set(OutOfMemory, part->path_len + 1, "strdup", + "path"); + goto error; + } key_def_set_part(new_def, pos++, part->fieldno, part->type, - part->is_nullable, part->coll, part->coll_id); + part->is_nullable, part->coll, part->coll_id, + path); } return new_def; + +error: + key_def_delete(new_def); + return NULL; } int diff --git a/src/box/key_def.h b/src/box/key_def.h index 42c054c..f14a928 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -54,6 +54,8 @@ struct key_part_def { uint32_t coll_id; /** True if a key part can store NULLs. */ bool is_nullable; + /** JSON path to data. */ + char *path; }; /** @@ -78,6 +80,10 @@ struct key_part { uint64_t format_epoch; /** Cache for corresponding tuple_format slot_offset. */ int32_t slot_cache; + /** JSON path to data. */ + const char *path; + /** JSON path length. */ + uint32_t path_len; }; struct key_def; @@ -246,8 +252,9 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count); /** * Dump part definitions of the given key def. */ -void -key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); +int +key_def_dump_parts(struct region *pool, const struct key_def *def, + struct key_part_def *parts); /** * Set a single key part in a key def. @@ -256,7 +263,7 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts); void key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno, enum field_type type, bool is_nullable, struct coll *coll, - uint32_t coll_id); + uint32_t coll_id, char *path); /** * Update 'has_optional_parts' of @a key_def with correspondence diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc index 580e0ea..98bb969 100644 --- a/src/box/lua/space.cc +++ b/src/box/lua/space.cc @@ -295,6 +295,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i) lua_pushnumber(L, part->fieldno + TUPLE_INDEX_BASE); lua_setfield(L, -2, "fieldno"); + if (part->path != NULL) { + lua_pushstring(L, part->path); + lua_setfield(L, -2, "path"); + } + lua_pushboolean(L, part->is_nullable); lua_setfield(L, -2, "is_nullable"); diff --git a/src/box/schema.cc b/src/box/schema.cc index 433f52c..a01126a 100644 --- a/src/box/schema.cc +++ b/src/box/schema.cc @@ -291,13 +291,13 @@ schema_init() auto key_def_guard = make_scoped_guard([&] { key_def_delete(key_def); }); key_def_set_part(key_def, 0 /* part no */, 0 /* field no */, - FIELD_TYPE_STRING, false, NULL, COLL_NONE); + FIELD_TYPE_STRING, false, NULL, COLL_NONE, NULL); sc_space_new(BOX_SCHEMA_ID, "_schema", key_def, &on_replace_schema, NULL); /* _space - home for all spaces. */ key_def_set_part(key_def, 0 /* part no */, 0 /* field no */, - FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE); + FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL); /* _collation - collation description. */ sc_space_new(BOX_COLLATION_ID, "_collation", key_def, @@ -345,10 +345,10 @@ schema_init() diag_raise(); /* space no */ key_def_set_part(key_def, 0 /* part no */, 0 /* field no */, - FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE); + FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL); /* index no */ key_def_set_part(key_def, 1 /* part no */, 1 /* field no */, - FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE); + FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE, NULL); sc_space_new(BOX_INDEX_ID, "_index", key_def, &alter_space_on_replace_index, &on_stmt_begin_index); } diff --git a/src/box/vy_log.c b/src/box/vy_log.c index 3843cad..b1c6659 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -711,7 +711,8 @@ vy_log_record_dup(struct region *pool, const struct vy_log_record *src) "struct key_part_def"); goto err; } - key_def_dump_parts(src->key_def, dst->key_parts); + if (key_def_dump_parts(pool, src->key_def, dst->key_parts) != 0) + goto err; dst->key_part_count = src->key_def->part_count; dst->key_def = NULL; } -- 2.7.4