Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org,
	Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field in key_part
Date: Thu, 9 Aug 2018 01:03:13 +0300	[thread overview]
Message-ID: <295310f0-1590-0b32-47f7-3231f925e19a@tarantool.org> (raw)
In-Reply-To: <63cb256a0206e222ee10199e7d671700e44ce2aa.1533558332.git.kshcherbatov@tarantool.org>

Thanks for the patch! See 8 comments below.

On 06/08/2018 15:27, Kirill Shcherbatov wrote:
> As we need to store user-defined JSON path in key_part
> and key_part_def, we have introduced path and path_len
> fields. JSON path is verified and transformed to canonical
> form on index msgpack unpack.
> Because of field names specified as format could be changed
> key_part path persisted in Tarantool should be always started
> with first-level field access via array index(not by name).
> 
> Part of #1012.
> ---
>   src/box/key_def.c    | 197 +++++++++++++++++++++++++++++++++++++++++++++++----
>   src/box/key_def.h    |  13 +++-
>   src/box/lua/space.cc |   5 ++
>   src/box/schema.cc    |   8 +--
>   src/box/vy_log.c     |   3 +-
>   5 files changed, 205 insertions(+), 21 deletions(-)
> 
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 8a4262b..79e07f8 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -103,7 +108,27 @@ key_def_dup(const struct key_def *src)
>   		return NULL;
>   	}
>   	memcpy(res, src, sz);
> +	uint32_t i = 0;
> +	for (; i < src->part_count; i++) {
> +		if (src->parts[i].path == NULL) {
> +			res->parts[i].path = NULL;
> +			continue;
> +		}
> +		char *path = strdup(src->parts[i].path);

1. Please, keep monolithic key_def memory layout.

> +		if (path == NULL) {
> +			diag_set(OutOfMemory, src->parts[i].path_len + 1,
> +				"strdup", "path");
> +			goto error;
> +		}
> +		res->parts[i].path = path;
> +	}
>   	return res;
> +
> +error:
> +	for (uint32_t j = 0; j < i; j++)
> +		free((void *)res->parts[j].path);
> +	free(res);
> +	return NULL;
>   }
>   
>   void
> @@ -241,6 +296,11 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
>   		if (part1->is_nullable != part2->is_nullable)
>   			return part1->is_nullable <
>   			       part2->is_nullable ? -1 : 1;
> +		/* Lexicographic strings order. */
> +		uint32_t len = MIN(part1->path_len, part2->path_len);
> +		int rc = 0;
> +		if ((rc = strncmp(part1->path, part2->path, len)) != 0)

2. As I said you at least two times, strncmp treats non-equal strings
as the same one when the one is prefix of another. So this comparison
is not correct.

> +			return rc;
>   	}
>   	return part_count1 < part_count2 ? -1 : part_count1 > part_count2;
>   }
> @@ -445,8 +529,9 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
>   		return key_def_decode_parts_166(parts, part_count, data,
>   						fields, field_count);
>   	}
> +	struct key_part_def *part;
>   	for (uint32_t i = 0; i < part_count; i++) {
> -		struct key_part_def *part = &parts[i];

3. Just do ++part on the previous line.

> +		part = &parts[i];
>   		if (mp_typeof(**data) != MP_MAP) {
>   			diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
>   				 i + TUPLE_INDEX_BASE,
> @@ -473,8 +558,75 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
>   				 "string and scalar parts");
>   			return -1;
>   		}
> +		if (part->path != NULL) {
> +			struct region *region = &fiber()->gc;
> +			size_t path_len = strlen(part->path);
> +			struct json_path_parser parser;
> +			struct json_path_node node;
> +			json_path_parser_create(&parser, part->path, path_len);
> +			/*
> +			 * A worst-case scenario is .a -> ["a"]
> +			 * i.e. 3*path_len + 1 is enough.

4. As I understand, len of ".a" is 2, len of "['a']" is 5. So
it is actually 2*len + 1. Not 3*len + 1. It is not?

> +			 */
> +			uint32_t size = region_used(region);

5. You do not need to truncate the region yourself. This is done
later by commit/rollback. But you can reuse previous the region
allocations if store the region memory pointer out of this cycle
together size. And when a new path len * 2 + 1 > than the memory
size you have, you allocate more. Else you can reuse it. In the
best case it reduces number of allocations in part_count times,
in the worst one it is not worse than your way.

> +			char *path =
> +				region_alloc(region, 3 * path_len + 1);
> +			if (path == NULL) {
> +				diag_set(OutOfMemory, 3 * path_len + 1,
> +					"region_alloc", "path");
> +				return -1;
> +			}
> +			part->path = path;
> +			int rc = json_path_next(&parser, &node);
> +			if (rc != 0)
> +				goto error_invalid_json;
> +			if (node.type != JSON_PATH_NUM) {
> +				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +					part->fieldno,
> +					"invalid JSON path: first part should "
> +					"be defined as array");

6. As array index. Not array.

> +				return -1;
> +			}
> +			if (node.num - TUPLE_INDEX_BASE != part->fieldno) {
> +				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +					part->fieldno,
> +					"invalid JSON path: first part refers "
> +					"to invalid field");
> +				return -1;
> +			}
> +			uint32_t lexemes = 0;
> +			do {
> +				if (node.type == JSON_PATH_NUM) {
> +					path += sprintf(path, "[%u]",
> +						(unsigned)node.num);
> +				} else if (node.type == JSON_PATH_STR) {
> +					path += sprintf(path, "[\"%.*s\"]",
> +							node.len, node.str);
> +				} else {
> +					unreachable();
> +				}
> +				lexemes++;
> +			} while ((rc = json_path_next(&parser, &node)) == 0 &&
> +				 node.type != JSON_PATH_END);
> +			if (rc != 0 || node.type != JSON_PATH_END)
> +				goto error_invalid_json;
> +			/* JSON index is useless. */
> +			if (lexemes == 1) {
> +				region_truncate(region, size);
> +				part->path = NULL;
> +			} else {
> +				region_truncate(region,
> +						size + (path - part->path + 1));
> +			}
> +		}

7. Please, reduce indentation level. You can check
'if part->path == NULL then continue' and move this
code block 1 tab left. Even now you can join some of
lines into one.

>   	}
>   	return 0;
> +
> +error_invalid_json:
> +	diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
> +		 part->fieldno + TUPLE_INDEX_BASE,
> +		 "invalid JSON path: path has invalid structure");

8. Please, write on which position the error occurred.

> +	return -1;
>   }
>   
>   in

  reply	other threads:[~2018-08-08 22:03 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-06 12:26 [tarantool-patches] [PATCH v1 0/5] box: indexes by JSON path Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 1/5] rfc: describe a Tarantool JSON indexes Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 2/5] box: introduce slot_cache in key_part Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 3/5] box: introduce path field " Kirill Shcherbatov
2018-08-08 22:03   ` Vladislav Shpilevoy [this message]
2018-08-15 12:14     ` [tarantool-patches] " Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 4/5] box: introduce path_hash and tuple_field tree Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] Re: [PATCH v1 0/5] box: indexes by JSON path Vladislav Shpilevoy
2018-08-15 12:14   ` Kirill Shcherbatov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=295310f0-1590-0b32-47f7-3231f925e19a@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH v1 3/5] box: introduce path field in key_part' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox