From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org,
Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field in key_part
Date: Thu, 9 Aug 2018 01:03:13 +0300 [thread overview]
Message-ID: <295310f0-1590-0b32-47f7-3231f925e19a@tarantool.org> (raw)
In-Reply-To: <63cb256a0206e222ee10199e7d671700e44ce2aa.1533558332.git.kshcherbatov@tarantool.org>
Thanks for the patch! See 8 comments below.
On 06/08/2018 15:27, Kirill Shcherbatov wrote:
> As we need to store user-defined JSON path in key_part
> and key_part_def, we have introduced path and path_len
> fields. JSON path is verified and transformed to canonical
> form on index msgpack unpack.
> Because of field names specified as format could be changed
> key_part path persisted in Tarantool should be always started
> with first-level field access via array index(not by name).
>
> Part of #1012.
> ---
> src/box/key_def.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++----
> src/box/key_def.h | 13 +++-
> src/box/lua/space.cc | 5 ++
> src/box/schema.cc | 8 +--
> src/box/vy_log.c | 3 +-
> 5 files changed, 205 insertions(+), 21 deletions(-)
>
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 8a4262b..79e07f8 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src)
> return NULL;
> }
> memcpy(res, src, sz);
> + uint32_t i = 0;
> + for (; i < src->part_count; i++) {
> + if (src->parts[i].path == NULL) {
> + res->parts[i].path = NULL;
> + continue;
> + }
> + char *path = strdup(src->parts[i].path);
1. Please, keep monolithic key_def memory layout.
> + if (path == NULL) {
> + diag_set(OutOfMemory, src->parts[i].path_len + 1,
> + "strdup", "path");
> + goto error;
> + }
> + res->parts[i].path = path;
> + }
> return res;
> +
> +error:
> + for (uint32_t j = 0; j < i; j++)
> + free((void *)res->parts[j].path);
> + free(res);
> + return NULL;
> }
>
> void
> @@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
> if (part1->is_nullable != part2->is_nullable)
> return part1->is_nullable <
> part2->is_nullable ? -1 : 1;
> + /* Lexicographic strings order. */
> + uint32_t len = MIN(part1->path_len, part2->path_len);
> + int rc = 0;
> + if ((rc = strncmp(part1->path, part2->path, len)) != 0)
2. As I said you at least two times, strncmp treats non-equal strings
as the same one when the one is prefix of another. So this comparison
is not correct.
> + return rc;
> }
> return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
> }
> @@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
> return key_def_decode_parts_166(parts, part_count, data,
> fields, field_count);
> }
> + struct key_part_def *part;
> for (uint32_t i = 0; i < part_count; i++) {
> - struct key_part_def *part = &parts[i];
3. Just do ++part on the previous line.
> + part = &parts[i];
> if (mp_typeof(**data) != MP_MAP) {
> diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> i + TUPLE_INDEX_BASE,
> @@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
> "string and scalar parts");
> return -1;
> }
> + if (part->path != NULL) {
> + struct region *region = &fiber()->gc;
> + size_t path_len = strlen(part->path);
> + struct json_path_parser parser;
> + struct json_path_node node;
> + json_path_parser_create(&parser, part->path, path_len);
> + /*
> + * A worst-case scenario is .a -> ["a"]
> + * i.e. 3*path_len + 1 is enough.
4. As I understand, len of ".a" is 2, len of "['a']" is 5. So
it is actually 2*len + 1. Not 3*len + 1. It is not?
> + */
> + uint32_t size = region_used(region);
5. You do not need to truncate the region yourself. This is done
later by commit/rollback. But you can reuse previous the region
allocations if store the region memory pointer out of this cycle
together size. And when a new path len * 2 + 1 > than the memory
size you have, you allocate more. Else you can reuse it. In the
best case it reduces number of allocations in part_count times,
in the worst one it is not worse than your way.
> + char *path =
> + region_alloc(region, 3 * path_len + 1);
> + if (path == NULL) {
> + diag_set(OutOfMemory, 3 * path_len + 1,
> + "region_alloc", "path");
> + return -1;
> + }
> + part->path = path;
> + int rc = json_path_next(&parser, &node);
> + if (rc != 0)
> + goto error_invalid_json;
> + if (node.type != JSON_PATH_NUM) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno,
> + "invalid JSON path: first part should "
> + "be defined as array");
6. As array index. Not array.
> + return -1;
> + }
> + if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno,
> + "invalid JSON path: first part refers "
> + "to invalid field");
> + return -1;
> + }
> + uint32_t lexemes = 0;
> + do {
> + if (node.type == JSON_PATH_NUM) {
> + path += sprintf(path, "[%u]",
> + (unsigned)node.num);
> + } else if (node.type == JSON_PATH_STR) {
> + path += sprintf(path, "[\"%.*s\"]",
> + node.len, node.str);
> + } else {
> + unreachable();
> + }
> + lexemes++;
> + } while ((rc = json_path_next(&parser, &node)) == 0 &&
> + node.type != JSON_PATH_END);
> + if (rc != 0 || node.type != JSON_PATH_END)
> + goto error_invalid_json;
> + /* JSON index is useless. */
> + if (lexemes == 1) {
> + region_truncate(region, size);
> + part->path = NULL;
> + } else {
> + region_truncate(region,
> + size + (path - part->path + 1));
> + }
> + }
7. Please, reduce indentation level. You can check
'if part->path == NULL then continue' and move this
code block 1 tab left. Even now you can join some of
lines into one.
> }
> return 0;
> +
> +error_invalid_json:
> + diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> + part->fieldno + TUPLE_INDEX_BASE,
> + "invalid JSON path: path has invalid structure");
8. Please, write on which position the error occurred.
> + return -1;
> }
>
> in
next prev parent reply other threads:[~2018-08-08 22:03 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-06 12:26 [tarantool-patches] [PATCH v1 0/5] box: indexes by JSON path Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 1/5] rfc: describe a Tarantool JSON indexes Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 2/5] box: introduce slot_cache in key_part Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14 ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 3/5] box: introduce path field " Kirill Shcherbatov
2018-08-08 22:03 ` Vladislav Shpilevoy [this message]
2018-08-15 12:14 ` [tarantool-patches] " Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 4/5] box: introduce path_hash and tuple_field tree Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14 ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] Re: [PATCH v1 0/5] box: indexes by JSON path Vladislav Shpilevoy
2018-08-15 12:14 ` Kirill Shcherbatov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=295310f0-1590-0b32-47f7-3231f925e19a@tarantool.org \
--to=v.shpilevoy@tarantool.org \
--cc=kshcherbatov@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='[tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field in key_part' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox