[Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Dec 14 02:34:26 MSK 2019


Hi! Thanks for the review!

On 09/12/2019 21:18, Sergey Ostanevich wrote:
> Hi! 
> 
> Thanks for the patch!
> Please, see 4 comments below.
> 
> Regards,
> Sergos
> 
> 
>> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
>> index 767e40006..15a136f81 100644
>> --- a/src/box/vinyl.c
>> +++ b/src/box/vinyl.c
>> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
>>  		ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>>  
>>  		int field_no;
>> -		if (mp_typeof(*pos) == MP_INT) {
>> +		const char *field_name;
>> +		switch (mp_typeof(*pos)) {
>> +		case MP_INT:
>>  			field_no = mp_decode_int(&pos);
>>  			ops_end = mp_encode_int(ops_end, field_no);
>> -		} else {
>> +			break;
>> +		case MP_UINT:
>>  			field_no = mp_decode_uint(&pos);
>>  			field_no -= request->index_base;
>>  			ops_end = mp_encode_uint(ops_end, field_no);
>> +			break;
>> +		case MP_STR:
>> +			field_name = pos;
>> +			mp_next(&pos);
>> +			memcpy(ops_end, field_name, pos - field_name);
>> +			ops_end += pos - field_name;
>> +			break;
> 
> Are you intentionally skip encode/decode here? Is it safe enough?

mp_next() does decode. Memcpy is the same as encode here. Just without
'if's. It is safe.

>> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>> index 123db081a..ecda17544 100644
>> --- a/src/box/xrow_update.c
>> +++ b/src/box/xrow_update.c
>> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
>>  			if (op->opcode != '+' && op->opcode != '-' &&
>>  			    op->opcode != '=')
>>  				return NULL;
>> +			/*
>> +			 * Not terminal operation means, that the
>> +			 * update is not flat, and squash would
>> +			 * need to build a tree of operations to
>> +			 * find matches. That is too complex,
>> +			 * squash is skipped.
>> +			 */
>> +			if (! xrow_update_op_is_term(op))
>> +				return NULL;
> 
> Please, no space after unary op - and further on in the patch -
> according to $3.1 of
> https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
> 

Ok, here it is:

==============================================================================

diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
index ecda17544..0cee50017 100644
--- a/src/box/xrow_update.c
+++ b/src/box/xrow_update.c
@@ -441,7 +441,7 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
 			 * find matches. That is too complex,
 			 * squash is skipped.
 			 */
-			if (! xrow_update_op_is_term(op))
+			if (!xrow_update_op_is_term(op))
 				return NULL;
 			if (op->field_no <= prev_field_no)
 				return NULL;
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index 1cc49f861..57427e39c 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -221,7 +221,7 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
 {
 	assert(field->type == XUPDATE_ARRAY);
 	struct xrow_update_array_item *item;
-	if (! xrow_update_op_is_term(op)) {
+	if (!xrow_update_op_is_term(op)) {
 		item = xrow_update_array_extract_item(field, op);
 		if (item == NULL)
 			return -1;
@@ -256,7 +256,7 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
 		xrow_update_array_extract_item(field, op);
 	if (item == NULL)
 		return -1;
-	if (! xrow_update_op_is_term(op))
+	if (!xrow_update_op_is_term(op))
 		return xrow_update_op_do_field_set(op, &item->field);
 	op->new_field_len = op->arg.set.length;
 	/*
@@ -275,7 +275,7 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
 			       struct xrow_update_field *field)
 {
 	assert(field->type == XUPDATE_ARRAY);
-	if (! xrow_update_op_is_term(op)) {
+	if (!xrow_update_op_is_term(op)) {
 		struct xrow_update_array_item *item =
 			xrow_update_array_extract_item(field, op);
 		if (item == NULL)
@@ -305,7 +305,7 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op,			\
 		xrow_update_array_extract_item(field, op);			\
 	if (item == NULL)							\
 		return -1;							\
-	if (! xrow_update_op_is_term(op))					\
+	if (!xrow_update_op_is_term(op))					\
 		return xrow_update_op_do_field_##op_type(op, &item->field);	\
 	if (item->field.type != XUPDATE_NOP)					\
 		return xrow_update_err_double(op);				\
diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
index 737673e8a..7285c7e2b 100644
--- a/src/box/xrow_update_bar.c
+++ b/src/box/xrow_update_bar.c
@@ -58,7 +58,7 @@ xrow_update_bar_locate(struct xrow_update_op *op,
 	 * non empty path. This is why op is expected to be not
 	 * terminal.
 	 */
-	assert(! xrow_update_op_is_term(op));
+	assert(!xrow_update_op_is_term(op));
 	int rc;
 	field->type = XUPDATE_BAR;
 	field->bar.op = op;
@@ -113,7 +113,7 @@ xrow_update_bar_locate_opt(struct xrow_update_op *op,
 	 * non empty path. This is why op is expected to be not
 	 * terminal.
 	 */
-	assert(! xrow_update_op_is_term(op));
+	assert(!xrow_update_op_is_term(op));
 	int rc;
 	field->type = XUPDATE_BAR;
 	field->bar.op = op;
@@ -245,7 +245,7 @@ xrow_update_op_do_nop_set(struct xrow_update_op *op,
 	if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
 		return -1;
 	op->new_field_len = op->arg.set.length;
-	if (! is_found) {
+	if (!is_found) {
 		op->opcode = '!';
 		if (mp_typeof(*field->bar.parent) == MP_MAP)
 			op->new_field_len += mp_sizeof_str(key_len);

==============================================================================

>> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>> new file mode 100644
>> index 000000000..737673e8a
>> --- /dev/null
>> +++ b/src/box/xrow_update_bar.c
>> +static inline int
>> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
>> +			   struct xrow_update_field *field, bool *is_found,
>> +			   int *key_len_or_index)
>> +{
>> +	/*
>> +	 * Bar update is not flat by definition. It always has a
>> +	 * non empty path. This is why op is expected to be not
>> +	 * terminal.
>> +	 */
>> +	assert(! xrow_update_op_is_term(op));
>> +	int rc;
>> +	field->type = XUPDATE_BAR;
>> +	field->bar.op = op;
>> +	field->bar.path = op->lexer.src + op->lexer.offset;
>> +	field->bar.path_len = op->lexer.src_len - op->lexer.offset;
>> +	const char *pos = field->data;
>> +	struct json_token token;
>> +	do {
>> +		rc = json_lexer_next_token(&op->lexer, &token);
>> +		if (rc != 0)
>> +			return xrow_update_err_bad_json(op, rc);
>> +
>> +		switch (token.type) {
>> +		case JSON_TOKEN_END:
>> +			*is_found = true;
>> +			field->bar.point = pos;
>> +			mp_next(&pos);
>> +			field->bar.point_size = pos - field->bar.point;
>> +			return 0;
>> +		case JSON_TOKEN_NUM:
>> +			field->bar.parent = pos;
>> +			*key_len_or_index = token.num;
>> +			rc = tuple_field_go_to_index(&pos, token.num);
>> +			break;
>> +		case JSON_TOKEN_STR:
>> +			field->bar.parent = pos;
>> +			*key_len_or_index = token.len;
>> +			rc = tuple_field_go_to_key(&pos, token.str, token.len);
>> +			break;
>> +		default:
>> +			assert(token.type == JSON_TOKEN_ANY);
>> +			rc = op->lexer.symbol_count - 1;
>> +			return xrow_update_err_bad_json(op, rc);
>> +		}
>> +	} while (rc == 0);
>> +	assert(rc == -1);
>> +	/* Ensure, that 'token' is next to last path part. */
>> +	struct json_token tmp_token;
>> +	rc = json_lexer_next_token(&op->lexer, &tmp_token);
>> +	if (rc != 0)
>> +		return xrow_update_err_bad_json(op, rc);
>> +	if (tmp_token.type != JSON_TOKEN_END)
>> +		return xrow_update_err_no_such_field(op);
>> +
>> +	*is_found = false;
>> +	if (token.type == JSON_TOKEN_NUM) {
>> +		const char *tmp = field->bar.parent;
>> +		if (mp_typeof(*tmp) != MP_ARRAY) {
>> +			return xrow_update_err(op, "can not access by index a "\
>> +					       "non-array field");
>> +		}
>> +		uint32_t size = mp_decode_array(&tmp);
>> +		if ((uint32_t) token.num > size)
>> +			return xrow_update_err_no_such_field(op);
> 
> IMHO there will be more informative to mention the array and an index
> that is out of bounds.
> 

Well, xrow_update_err_no_such_field() does it. It adds the whole
path to the error message. So a user will see what a path caused
the problem.

>> +		/*
>> +		 * The updated point is in an array, its position
>> +		 * was not found, and its index is <= size. The
>> +		 * only way how can that happen - the update tries
>> +		 * to append a new array element. The following
>> +		 * code tries to find the array's end.
>> +		 */
>> +		assert((uint32_t) token.num == size);
>> +		if (field->bar.parent == field->data) {
>> +			/*
>> +			 * Optimization for the case when the path
>> +			 * is short. So parent of the updated
>> +			 * point is the field itself. It allows
>> +			 * not to decode anything at all. It is
>> +			 * worth doing, since the paths are
>> +			 * usually short.
>> +			 */
>> +			field->bar.point = field->data + field->size;
>> +		} else {
>> +			field->bar.point = field->bar.parent;
>> +			mp_next(&field->bar.point);
>> +		}
>> +		field->bar.point_size = 0;
>> +	} else {
>> +		assert(token.type == JSON_TOKEN_STR);
>> +		field->bar.new_key = token.str;
>> +		field->bar.new_key_len = token.len;
>> +		if (mp_typeof(*field->bar.parent) != MP_MAP) {
>> +			return xrow_update_err(op, "can not access by key a "\
>> +					       "non-map field");
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>> +/**
>> + * Nop fields are those which are not updated. And when they
>> + * receive an update via one of xrow_update_op_do_nop_* functions,
>> + * it means, that there is a non terminal path digging inside this
>> + * not updated field. It turns nop field into a bar field. How
>> + * exactly - depends on a concrete operation.
>> + */
>> +
>> +int
>> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
>> +			     struct xrow_update_field *field)
>> +{
>> +	assert(op->opcode == '!');
>> +	assert(field->type == XUPDATE_NOP);
> 
> Hmm. Do you expect to handle all possible cases in debug mode? 
> Otherwise here should be some sort of gaceful fail to make prod more
> stable.
> 

In Tarantool we usually rely on the tests and debug mode. Concretely
this case I covered with tests on 100% not including OOM (we can't
test malloc failures, basically). This is done to not slowdown release
build with never passing checks such as this. Removal of assertion
checks from release gives very significant perf impact. However in some
places which are not hot paths there is a thing related to what you
mean: https://github.com/tarantool/tarantool/issues/2571


More information about the Tarantool-patches mailing list