Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: Sergey Ostanevich <sergos@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
Date: Sat, 14 Dec 2019 00:34:26 +0100	[thread overview]
Message-ID: <c84d5d7c-5a08-c3fd-02c1-b105d5751fab@tarantool.org> (raw)
In-Reply-To: <20191209201842.GA14366@tarantool.org>

Hi! Thanks for the review!

On 09/12/2019 21:18, Sergey Ostanevich wrote:
> Hi! 
> 
> Thanks for the patch!
> Please, see 4 comments below.
> 
> Regards,
> Sergos
> 
> 
>> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
>> index 767e40006..15a136f81 100644
>> --- a/src/box/vinyl.c
>> +++ b/src/box/vinyl.c
>> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
>>  		ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>>  
>>  		int field_no;
>> -		if (mp_typeof(*pos) == MP_INT) {
>> +		const char *field_name;
>> +		switch (mp_typeof(*pos)) {
>> +		case MP_INT:
>>  			field_no = mp_decode_int(&pos);
>>  			ops_end = mp_encode_int(ops_end, field_no);
>> -		} else {
>> +			break;
>> +		case MP_UINT:
>>  			field_no = mp_decode_uint(&pos);
>>  			field_no -= request->index_base;
>>  			ops_end = mp_encode_uint(ops_end, field_no);
>> +			break;
>> +		case MP_STR:
>> +			field_name = pos;
>> +			mp_next(&pos);
>> +			memcpy(ops_end, field_name, pos - field_name);
>> +			ops_end += pos - field_name;
>> +			break;
> 
> Are you intentionally skip encode/decode here? Is it safe enough?

mp_next() does decode. Memcpy is the same as encode here. Just without
'if's. It is safe.

>> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>> index 123db081a..ecda17544 100644
>> --- a/src/box/xrow_update.c
>> +++ b/src/box/xrow_update.c
>> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
>>  			if (op->opcode != '+' && op->opcode != '-' &&
>>  			    op->opcode != '=')
>>  				return NULL;
>> +			/*
>> +			 * Not terminal operation means, that the
>> +			 * update is not flat, and squash would
>> +			 * need to build a tree of operations to
>> +			 * find matches. That is too complex,
>> +			 * squash is skipped.
>> +			 */
>> +			if (! xrow_update_op_is_term(op))
>> +				return NULL;
> 
> Please, no space after unary op - and further on in the patch -
> according to $3.1 of
> https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
> 

Ok, here it is:

==============================================================================

diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
index ecda17544..0cee50017 100644
--- a/src/box/xrow_update.c
+++ b/src/box/xrow_update.c
@@ -441,7 +441,7 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
 			 * find matches. That is too complex,
 			 * squash is skipped.
 			 */
-			if (! xrow_update_op_is_term(op))
+			if (!xrow_update_op_is_term(op))
 				return NULL;
 			if (op->field_no <= prev_field_no)
 				return NULL;
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index 1cc49f861..57427e39c 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -221,7 +221,7 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
 {
 	assert(field->type == XUPDATE_ARRAY);
 	struct xrow_update_array_item *item;
-	if (! xrow_update_op_is_term(op)) {
+	if (!xrow_update_op_is_term(op)) {
 		item = xrow_update_array_extract_item(field, op);
 		if (item == NULL)
 			return -1;
@@ -256,7 +256,7 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
 		xrow_update_array_extract_item(field, op);
 	if (item == NULL)
 		return -1;
-	if (! xrow_update_op_is_term(op))
+	if (!xrow_update_op_is_term(op))
 		return xrow_update_op_do_field_set(op, &item->field);
 	op->new_field_len = op->arg.set.length;
 	/*
@@ -275,7 +275,7 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
 			       struct xrow_update_field *field)
 {
 	assert(field->type == XUPDATE_ARRAY);
-	if (! xrow_update_op_is_term(op)) {
+	if (!xrow_update_op_is_term(op)) {
 		struct xrow_update_array_item *item =
 			xrow_update_array_extract_item(field, op);
 		if (item == NULL)
@@ -305,7 +305,7 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op,			\
 		xrow_update_array_extract_item(field, op);			\
 	if (item == NULL)							\
 		return -1;							\
-	if (! xrow_update_op_is_term(op))					\
+	if (!xrow_update_op_is_term(op))					\
 		return xrow_update_op_do_field_##op_type(op, &item->field);	\
 	if (item->field.type != XUPDATE_NOP)					\
 		return xrow_update_err_double(op);				\
diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
index 737673e8a..7285c7e2b 100644
--- a/src/box/xrow_update_bar.c
+++ b/src/box/xrow_update_bar.c
@@ -58,7 +58,7 @@ xrow_update_bar_locate(struct xrow_update_op *op,
 	 * non empty path. This is why op is expected to be not
 	 * terminal.
 	 */
-	assert(! xrow_update_op_is_term(op));
+	assert(!xrow_update_op_is_term(op));
 	int rc;
 	field->type = XUPDATE_BAR;
 	field->bar.op = op;
@@ -113,7 +113,7 @@ xrow_update_bar_locate_opt(struct xrow_update_op *op,
 	 * non empty path. This is why op is expected to be not
 	 * terminal.
 	 */
-	assert(! xrow_update_op_is_term(op));
+	assert(!xrow_update_op_is_term(op));
 	int rc;
 	field->type = XUPDATE_BAR;
 	field->bar.op = op;
@@ -245,7 +245,7 @@ xrow_update_op_do_nop_set(struct xrow_update_op *op,
 	if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
 		return -1;
 	op->new_field_len = op->arg.set.length;
-	if (! is_found) {
+	if (!is_found) {
 		op->opcode = '!';
 		if (mp_typeof(*field->bar.parent) == MP_MAP)
 			op->new_field_len += mp_sizeof_str(key_len);

==============================================================================

>> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>> new file mode 100644
>> index 000000000..737673e8a
>> --- /dev/null
>> +++ b/src/box/xrow_update_bar.c
>> +static inline int
>> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
>> +			   struct xrow_update_field *field, bool *is_found,
>> +			   int *key_len_or_index)
>> +{
>> +	/*
>> +	 * Bar update is not flat by definition. It always has a
>> +	 * non empty path. This is why op is expected to be not
>> +	 * terminal.
>> +	 */
>> +	assert(! xrow_update_op_is_term(op));
>> +	int rc;
>> +	field->type = XUPDATE_BAR;
>> +	field->bar.op = op;
>> +	field->bar.path = op->lexer.src + op->lexer.offset;
>> +	field->bar.path_len = op->lexer.src_len - op->lexer.offset;
>> +	const char *pos = field->data;
>> +	struct json_token token;
>> +	do {
>> +		rc = json_lexer_next_token(&op->lexer, &token);
>> +		if (rc != 0)
>> +			return xrow_update_err_bad_json(op, rc);
>> +
>> +		switch (token.type) {
>> +		case JSON_TOKEN_END:
>> +			*is_found = true;
>> +			field->bar.point = pos;
>> +			mp_next(&pos);
>> +			field->bar.point_size = pos - field->bar.point;
>> +			return 0;
>> +		case JSON_TOKEN_NUM:
>> +			field->bar.parent = pos;
>> +			*key_len_or_index = token.num;
>> +			rc = tuple_field_go_to_index(&pos, token.num);
>> +			break;
>> +		case JSON_TOKEN_STR:
>> +			field->bar.parent = pos;
>> +			*key_len_or_index = token.len;
>> +			rc = tuple_field_go_to_key(&pos, token.str, token.len);
>> +			break;
>> +		default:
>> +			assert(token.type == JSON_TOKEN_ANY);
>> +			rc = op->lexer.symbol_count - 1;
>> +			return xrow_update_err_bad_json(op, rc);
>> +		}
>> +	} while (rc == 0);
>> +	assert(rc == -1);
>> +	/* Ensure, that 'token' is next to last path part. */
>> +	struct json_token tmp_token;
>> +	rc = json_lexer_next_token(&op->lexer, &tmp_token);
>> +	if (rc != 0)
>> +		return xrow_update_err_bad_json(op, rc);
>> +	if (tmp_token.type != JSON_TOKEN_END)
>> +		return xrow_update_err_no_such_field(op);
>> +
>> +	*is_found = false;
>> +	if (token.type == JSON_TOKEN_NUM) {
>> +		const char *tmp = field->bar.parent;
>> +		if (mp_typeof(*tmp) != MP_ARRAY) {
>> +			return xrow_update_err(op, "can not access by index a "\
>> +					       "non-array field");
>> +		}
>> +		uint32_t size = mp_decode_array(&tmp);
>> +		if ((uint32_t) token.num > size)
>> +			return xrow_update_err_no_such_field(op);
> 
> IMHO there will be more informative to mention the array and an index
> that is out of bounds.
> 

Well, xrow_update_err_no_such_field() does it. It adds the whole
path to the error message. So a user will see what a path caused
the problem.

>> +		/*
>> +		 * The updated point is in an array, its position
>> +		 * was not found, and its index is <= size. The
>> +		 * only way how can that happen - the update tries
>> +		 * to append a new array element. The following
>> +		 * code tries to find the array's end.
>> +		 */
>> +		assert((uint32_t) token.num == size);
>> +		if (field->bar.parent == field->data) {
>> +			/*
>> +			 * Optimization for the case when the path
>> +			 * is short. So parent of the updated
>> +			 * point is the field itself. It allows
>> +			 * not to decode anything at all. It is
>> +			 * worth doing, since the paths are
>> +			 * usually short.
>> +			 */
>> +			field->bar.point = field->data + field->size;
>> +		} else {
>> +			field->bar.point = field->bar.parent;
>> +			mp_next(&field->bar.point);
>> +		}
>> +		field->bar.point_size = 0;
>> +	} else {
>> +		assert(token.type == JSON_TOKEN_STR);
>> +		field->bar.new_key = token.str;
>> +		field->bar.new_key_len = token.len;
>> +		if (mp_typeof(*field->bar.parent) != MP_MAP) {
>> +			return xrow_update_err(op, "can not access by key a "\
>> +					       "non-map field");
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>> +/**
>> + * Nop fields are those which are not updated. And when they
>> + * receive an update via one of xrow_update_op_do_nop_* functions,
>> + * it means, that there is a non terminal path digging inside this
>> + * not updated field. It turns nop field into a bar field. How
>> + * exactly - depends on a concrete operation.
>> + */
>> +
>> +int
>> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
>> +			     struct xrow_update_field *field)
>> +{
>> +	assert(op->opcode == '!');
>> +	assert(field->type == XUPDATE_NOP);
> 
> Hmm. Do you expect to handle all possible cases in debug mode? 
> Otherwise here should be some sort of gaceful fail to make prod more
> stable.
> 

In Tarantool we usually rely on the tests and debug mode. Concretely
this case I covered with tests on 100% not including OOM (we can't
test malloc failures, basically). This is done to not slowdown release
build with never passing checks such as this. Removal of assertion
checks from release gives very significant perf impact. However in some
places which are not hot paths there is a thing related to what you
mean: https://github.com/tarantool/tarantool/issues/2571

  reply	other threads:[~2019-12-13 23:34 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-21 23:19 Vladislav Shpilevoy
2019-12-09 20:18 ` Sergey Ostanevich
2019-12-13 23:34   ` Vladislav Shpilevoy [this message]
2019-12-14  9:20     ` Sergey Ostanevich
2019-12-19  8:34 ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c84d5d7c-5a08-c3fd-02c1-b105d5751fab@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox