[Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates

Sergey Ostanevich sergos at tarantool.org
Sat Dec 14 12:20:28 MSK 2019


Thanks, LGTM.

Best regards,
Sergos 

Saturday, 14 December 2019, 02:34 +0300 from Vladislav Shpilevoy  <v.shpilevoy at tarantool.org>:
>Hi! Thanks for the review!
>
>On 09/12/2019 21:18, Sergey Ostanevich wrote:
>> Hi! 
>> 
>> Thanks for the patch!
>> Please, see 4 comments below.
>> 
>> Regards,
>> Sergos
>> 
>> 
>>> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
>>> index 767e40006..15a136f81 100644
>>> --- a/src/box/vinyl.c
>>> +++ b/src/box/vinyl.c
>>> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
>>>  		ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>>> 
>>>  		int field_no;
>>> -		if (mp_typeof(*pos) == MP_INT) {
>>> +		const char *field_name;
>>> +		switch (mp_typeof(*pos)) {
>>> +		case MP_INT:
>>>  			field_no = mp_decode_int(&pos);
>>>  			ops_end = mp_encode_int(ops_end, field_no);
>>> -		} else {
>>> +			break;
>>> +		case MP_UINT:
>>>  			field_no = mp_decode_uint(&pos);
>>>  			field_no -= request->index_base;
>>>  			ops_end = mp_encode_uint(ops_end, field_no);
>>> +			break;
>>> +		case MP_STR:
>>> +			field_name = pos;
>>> +			mp_next(&pos);
>>> +			memcpy(ops_end, field_name, pos - field_name);
>>> +			ops_end += pos - field_name;
>>> +			break;
>> 
>> Are you intentionally skip encode/decode here? Is it safe enough?
>
>mp_next() does decode. Memcpy is the same as encode here. Just without
>'if's. It is safe.
>
>>> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>>> index 123db081a..ecda17544 100644
>>> --- a/src/box/xrow_update.c
>>> +++ b/src/box/xrow_update.c
>>> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
>>>  			if (op->opcode != '+' && op->opcode != '-' &&
>>>  			    op->opcode != '=')
>>>  				return NULL;
>>> +			/*
>>> +			 * Not terminal operation means, that the
>>> +			 * update is not flat, and squash would
>>> +			 * need to build a tree of operations to
>>> +			 * find matches. That is too complex,
>>> +			 * squash is skipped.
>>> +			 */
>>> +			if (! xrow_update_op_is_term(op))
>>> +				return NULL;
>> 
>> Please, no space after unary op - and further on in the patch -
>> according to $3.1 of
>>  https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
>> 
>
>Ok, here it is:
>
>==============================================================================
>
>diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>index ecda17544..0cee50017 100644
>--- a/src/box/xrow_update.c
>+++ b/src/box/xrow_update.c
>@@ -441,7 +441,7 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
> 			 * find matches. That is too complex,
> 			 * squash is skipped.
> 			 */
>-			if (! xrow_update_op_is_term(op))
>+			if (!xrow_update_op_is_term(op))
> 				return NULL;
> 			if (op->field_no <= prev_field_no)
> 				return NULL;
>diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
>index 1cc49f861..57427e39c 100644
>--- a/src/box/xrow_update_array.c
>+++ b/src/box/xrow_update_array.c
>@@ -221,7 +221,7 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
> {
> 	assert(field->type == XUPDATE_ARRAY);
> 	struct xrow_update_array_item *item;
>-	if (! xrow_update_op_is_term(op)) {
>+	if (!xrow_update_op_is_term(op)) {
> 		item = xrow_update_array_extract_item(field, op);
> 		if (item == NULL)
> 			return -1;
>@@ -256,7 +256,7 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
> 		xrow_update_array_extract_item(field, op);
> 	if (item == NULL)
> 		return -1;
>-	if (! xrow_update_op_is_term(op))
>+	if (!xrow_update_op_is_term(op))
> 		return xrow_update_op_do_field_set(op, &item->field);
> 	op->new_field_len = op->arg.set.length;
> 	/*
>@@ -275,7 +275,7 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
> 			       struct xrow_update_field *field)
> {
> 	assert(field->type == XUPDATE_ARRAY);
>-	if (! xrow_update_op_is_term(op)) {
>+	if (!xrow_update_op_is_term(op)) {
> 		struct xrow_update_array_item *item =
> 			xrow_update_array_extract_item(field, op);
> 		if (item == NULL)
>@@ -305,7 +305,7 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op,			\
> 		xrow_update_array_extract_item(field, op);			\
> 	if (item == NULL)							\
> 		return -1;							\
>-	if (! xrow_update_op_is_term(op))					\
>+	if (!xrow_update_op_is_term(op))					\
> 		return xrow_update_op_do_field_##op_type(op, &item->field);	\
> 	if (item->field.type != XUPDATE_NOP)					\
> 		return xrow_update_err_double(op);				\
>diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>index 737673e8a..7285c7e2b 100644
>--- a/src/box/xrow_update_bar.c
>+++ b/src/box/xrow_update_bar.c
>@@ -58,7 +58,7 @@ xrow_update_bar_locate(struct xrow_update_op *op,
> 	 * non empty path. This is why op is expected to be not
> 	 * terminal.
> 	 */
>-	assert(! xrow_update_op_is_term(op));
>+	assert(!xrow_update_op_is_term(op));
> 	int rc;
> 	field->type = XUPDATE_BAR;
> 	field->bar.op = op;
>@@ -113,7 +113,7 @@ xrow_update_bar_locate_opt(struct xrow_update_op *op,
> 	 * non empty path. This is why op is expected to be not
> 	 * terminal.
> 	 */
>-	assert(! xrow_update_op_is_term(op));
>+	assert(!xrow_update_op_is_term(op));
> 	int rc;
> 	field->type = XUPDATE_BAR;
> 	field->bar.op = op;
>@@ -245,7 +245,7 @@ xrow_update_op_do_nop_set(struct xrow_update_op *op,
> 	if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
> 		return -1;
> 	op->new_field_len = op->arg.set.length;
>-	if (! is_found) {
>+	if (!is_found) {
> 		op->opcode = '!';
> 		if (mp_typeof(*field->bar.parent) == MP_MAP)
> 			op->new_field_len += mp_sizeof_str(key_len);
>
>==============================================================================
>
>>> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>>> new file mode 100644
>>> index 000000000..737673e8a
>>> --- /dev/null
>>> +++ b/src/box/xrow_update_bar.c
>>> +static inline int
>>> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
>>> +			   struct xrow_update_field *field, bool *is_found,
>>> +			   int *key_len_or_index)
>>> +{
>>> +	/*
>>> +	 * Bar update is not flat by definition. It always has a
>>> +	 * non empty path. This is why op is expected to be not
>>> +	 * terminal.
>>> +	 */
>>> +	assert(! xrow_update_op_is_term(op));
>>> +	int rc;
>>> +	field->type = XUPDATE_BAR;
>>> +	field->bar.op = op;
>>> +	field->bar.path = op->lexer.src + op->lexer.offset;
>>> +	field->bar.path_len = op->lexer.src_len - op->lexer.offset;
>>> +	const char *pos = field->data;
>>> +	struct json_token token;
>>> +	do {
>>> +		rc = json_lexer_next_token(&op->lexer, &token);
>>> +		if (rc != 0)
>>> +			return xrow_update_err_bad_json(op, rc);
>>> +
>>> +		switch (token.type) {
>>> +		case JSON_TOKEN_END:
>>> +			*is_found = true;
>>> +			field->bar.point = pos;
>>> +			mp_next(&pos);
>>> +			field->bar.point_size = pos - field->bar.point;
>>> +			return 0;
>>> +		case JSON_TOKEN_NUM:
>>> +			field->bar.parent = pos;
>>> +			*key_len_or_index = token.num;
>>> +			rc = tuple_field_go_to_index(&pos, token.num);
>>> +			break;
>>> +		case JSON_TOKEN_STR:
>>> +			field->bar.parent = pos;
>>> +			*key_len_or_index = token.len;
>>> +			rc = tuple_field_go_to_key(&pos, token.str, token.len);
>>> +			break;
>>> +		default:
>>> +			assert(token.type == JSON_TOKEN_ANY);
>>> +			rc = op->lexer.symbol_count - 1;
>>> +			return xrow_update_err_bad_json(op, rc);
>>> +		}
>>> +	} while (rc == 0);
>>> +	assert(rc == -1);
>>> +	/* Ensure, that 'token' is next to last path part. */
>>> +	struct json_token tmp_token;
>>> +	rc = json_lexer_next_token(&op->lexer, &tmp_token);
>>> +	if (rc != 0)
>>> +		return xrow_update_err_bad_json(op, rc);
>>> +	if (tmp_token.type != JSON_TOKEN_END)
>>> +		return xrow_update_err_no_such_field(op);
>>> +
>>> +	*is_found = false;
>>> +	if (token.type == JSON_TOKEN_NUM) {
>>> +		const char *tmp = field->bar.parent;
>>> +		if (mp_typeof(*tmp) != MP_ARRAY) {
>>> +			return xrow_update_err(op, "can not access by index a "\
>>> +					       "non-array field");
>>> +		}
>>> +		uint32_t size = mp_decode_array(&tmp);
>>> +		if ((uint32_t) token.num > size)
>>> +			return xrow_update_err_no_such_field(op);
>> 
>> IMHO there will be more informative to mention the array and an index
>> that is out of bounds.
>> 
>
>Well, xrow_update_err_no_such_field() does it. It adds the whole
>path to the error message. So a user will see what a path caused
>the problem.
>
>>> +		/*
>>> +		 * The updated point is in an array, its position
>>> +		 * was not found, and its index is <= size. The
>>> +		 * only way how can that happen - the update tries
>>> +		 * to append a new array element. The following
>>> +		 * code tries to find the array's end.
>>> +		 */
>>> +		assert((uint32_t) token.num == size);
>>> +		if (field->bar.parent == field->data) {
>>> +			/*
>>> +			 * Optimization for the case when the path
>>> +			 * is short. So parent of the updated
>>> +			 * point is the field itself. It allows
>>> +			 * not to decode anything at all. It is
>>> +			 * worth doing, since the paths are
>>> +			 * usually short.
>>> +			 */
>>> +			field->bar.point = field->data + field->size;
>>> +		} else {
>>> +			field->bar.point = field->bar.parent;
>>> +			mp_next(&field->bar.point);
>>> +		}
>>> +		field->bar.point_size = 0;
>>> +	} else {
>>> +		assert(token.type == JSON_TOKEN_STR);
>>> +		field->bar.new_key = token.str;
>>> +		field->bar.new_key_len = token.len;
>>> +		if (mp_typeof(*field->bar.parent) != MP_MAP) {
>>> +			return xrow_update_err(op, "can not access by key a "\
>>> +					       "non-map field");
>>> +		}
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +/**
>>> + * Nop fields are those which are not updated. And when they
>>> + * receive an update via one of xrow_update_op_do_nop_* functions,
>>> + * it means, that there is a non terminal path digging inside this
>>> + * not updated field. It turns nop field into a bar field. How
>>> + * exactly - depends on a concrete operation.
>>> + */
>>> +
>>> +int
>>> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
>>> +			     struct xrow_update_field *field)
>>> +{
>>> +	assert(op->opcode == '!');
>>> +	assert(field->type == XUPDATE_NOP);
>> 
>> Hmm. Do you expect to handle all possible cases in debug mode? 
>> Otherwise here should be some sort of gaceful fail to make prod more
>> stable.
>> 
>
>In Tarantool we usually rely on the tests and debug mode. Concretely
>this case I covered with tests on 100% not including OOM (we can't
>test malloc failures, basically). This is done to not slowdown release
>build with never passing checks such as this. Removal of assertion
>checks from release gives very significant perf impact. However in some
>places which are not hot paths there is a thing related to what you
>mean:  https://github.com/tarantool/tarantool/issues/2571
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20191214/c301f1aa/attachment.html>


More information about the Tarantool-patches mailing list