[tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field in key_part
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Thu Aug 9 01:03:13 MSK 2018
Thanks for the patch! See 8 comments below.
On 06/08/2018 15:27, Kirill Shcherbatov wrote:
> As we need to store user-defined JSON path in key_part
> and key_part_def, we have introduced path and path_len
> fields. JSON path is verified and transformed to canonical
> form on index msgpack unpack.
> Because of field names specified as format could be changed
> key_part path persisted in Tarantool should be always started
> with first-level field access via array index(not by name).
>
> Part of #1012.
> ---
> src/box/key_def.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++----
> src/box/key_def.h | 13 +++-
> src/box/lua/space.cc | 5 ++
> src/box/schema.cc | 8 +--
> src/box/vy_log.c | 3 +-
> 5 files changed, 205 insertions(+), 21 deletions(-)
>
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 8a4262b..79e07f8 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src)
> return NULL;
> }
> memcpy(res, src, sz);
> + uint32_t i = 0;
> + for (; i < src->part_count; i++) {
> + if (src->parts[i].path == NULL) {
> + res->parts[i].path = NULL;
> + continue;
> + }
> + char *path = strdup(src->parts[i].path);
1. Please, keep monolithic key_def memory layout.
> + if (path == NULL) {
> + diag_set(OutOfMemory, src->parts[i].path_len + 1,
> + "strdup", "path");
> + goto error;
> + }
> + res->parts[i].path = path;
> + }
> return res;
> +
> +error:
> + for (uint32_t j = 0; j < i; j++)
> + free((void *)res->parts[j].path);
> + free(res);
> + return NULL;
> }
>
> void
> @@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
> if (part1->is_nullable != part2->is_nullable)
> return part1->is_nullable <
> part2->is_nullable ? -1 : 1;
> + /* Lexicographic strings order. */
> + uint32_t len = MIN(part1->path_len, part2->path_len);
> + int rc = 0;
> + if ((rc = strncmp(part1->path, part2->path, len)) != 0)
2. As I said you at least two times, strncmp treats non-equal strings
as the same one when the one is prefix of another. So this comparison
is not correct.
> + return rc;
> }
> return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
> }
> @@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
> return key_def_decode_parts_166(parts, part_count, data,
> fields, field_count);
> }
> + struct key_part_def *part;
> for (uint32_t i = 0; i < part_count; i++) {
> - struct key_part_def *part = &parts[i];
3. Just do ++part on the previous line.
> + part = &parts[i];
> if (mp_typeof(**data) != MP_MAP) {
> diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> i + TUPLE_INDEX_BASE,
> @@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
> "string and scalar parts");
> return -1;
> }
> + if (part->path != NULL) {
> + struct region *region = &fiber()->gc;
> + size_t path_len = strlen(part->path);
> + struct json_path_parser parser;
> + struct json_path_node node;
> + json_path_parser_create(&parser, part->path, path_len);
> + /*
> + * A worst-case scenario is .a -> ["a"]
> + * i.e. 3*path_len + 1 is enough.
4. As I understand, len of ".a" is 2, len of "['a']" is 5. So
it is actually 2*len + 1. Not 3*len + 1. It is not?
> + */
> + uint32_t size = region_used(region);
5. You do not need to truncate the region yourself. This is done
later by commit/rollback. But you can reuse previous the region
allocations if store the region memory pointer out of this cycle
together size. And when a new path len * 2 + 1 > than the memory
size you have, you allocate more. Else you can reuse it. In the
best case it reduces number of allocations in part_count times,
in the worst one it is not worse than your way.
> + char *path =
> + region_alloc(region, 3 * path_len + 1);
> + if (path == NULL) {
> + diag_set(OutOfMemory, 3 * path_len + 1,
> + "region_alloc", "path");
> + return -1;
> + }
> + part->path = path;
> + int rc = json_path_next(&parser, &node);
> + if (rc != 0)
> + goto error_invalid_json;
> + if (node.type != JSON_PATH_NUM) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno,
> + "invalid JSON path: first part should "
> + "be defined as array");
6. As array index. Not array.
> + return -1;
> + }
> + if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno,
> + "invalid JSON path: first part refers "
> + "to invalid field");
> + return -1;
> + }
> + uint32_t lexemes = 0;
> + do {
> + if (node.type == JSON_PATH_NUM) {
> + path += sprintf(path, "[%u]",
> + (unsigned)node.num);
> + } else if (node.type == JSON_PATH_STR) {
> + path += sprintf(path, "[\"%.*s\"]",
> + node.len, node.str);
> + } else {
> + unreachable();
> + }
> + lexemes++;
> + } while ((rc = json_path_next(&parser, &node)) == 0 &&
> + node.type != JSON_PATH_END);
> + if (rc != 0 || node.type != JSON_PATH_END)
> + goto error_invalid_json;
> + /* JSON index is useless. */
> + if (lexemes == 1) {
> + region_truncate(region, size);
> + part->path = NULL;
> + } else {
> + region_truncate(region,
> + size + (path - part->path + 1));
> + }
> + }
7. Please, reduce indentation level. You can check
'if part->path == NULL then continue' and move this
code block 1 tab left. Even now you can join some of
lines into one.
> }
> return 0;
> +
> +error_invalid_json:
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno + TUPLE_INDEX_BASE,
> + "invalid JSON path: path has invalid structure");
8. Please, write on which position the error occurred.
> + return -1;
> }
>
> in
More information about the Tarantool-patches
mailing list