[Tarantool-patches] [PATCH 1/3] tuple: make update operation tokens consumable

Thu Dec 26 15:07:51 MSK 2019

Hi!

Thanks for the patch! 
Just one comment below, otherwise LGTM.

Sergos

On 23 Dec 23:41, Vladislav Shpilevoy wrote:
> There is a case: [1][2][3][4] = 100. It is not a problem when it
> is a single operation, not intersecting with anything. It is an
> isolated update then, and works ok. But the next patch allows
> several update operations have the same prefix, and the path
> [1][2][3][4] can become a tree of updated arrays. For example, a
> trivial tree like this:
> 
>     root: [ [1] ]
>              |
>              [ [1] [2] ]
>                     |
>                     [ [1] [2] [3] ]
>                                |
>                                [ [1] [2] [3] [4] ]
>                                              =100
> 
> When the update is applied to root, the JSON path [1][2][3][4]
> is decoded one part by one. And the operation goes down the tree
> until reaches the leaf, where [4] = 100 is applied. Each time when
> the update goes one level down, somebody should update
> xrow_update_op.field_no so as on the first level it would be 1,
> then 2, 3, 4.
> 
> Does it mean that each level of the update [1][2][3][4] should
> prepare field_no for the next child? No, because then they would
> need to check type of the child if it is an array or map, or
> whatever expects a valid field_no/key in xrow_update_op, and
> ensure that map-child gets a key, array-child gets a field_no.
> That would complicate the code to a totally unreadable
> state, and would break encapsulation between
> xrow_update_array/map/bar... . Each array update operation would
> check a child for all existing types to ensure that the next token
> matches it. The same would happen to map updates.
> 
> This patch goes another way - let each level of update check if
> its field_no/key is already prepared by the caller. And if not,
> extract a next token from the operation path. So the map update
> will ensure that it has a valid key, an array update will ensure
> that it has a valid field no.
> 
> Part of #1261
> ---
>  src/box/xrow_update_array.c | 47 +++++++++++++++++++++++++++++++++++--
>  src/box/xrow_update_field.c | 22 +++++++++++++++++
>  src/box/xrow_update_field.h | 25 ++++++++++++++++++--
>  3 files changed, 90 insertions(+), 4 deletions(-)
> 
> diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
> index 57427e39c..6a7ce09ff 100644
> --- a/src/box/xrow_update_array.c
> +++ b/src/box/xrow_update_array.c
> @@ -32,6 +32,31 @@
>  #include "msgpuck.h"
>  #include "fiber.h"
>  
> +/**
> + * Make sure @a op contains a valid field number to where the
> + * operation should be applied next. Field number may be not
> + * known, if the array's parent didn't propagate operation's
> + * lexer. In fact, the parent fills fieldno only in some rare
> + * cases like branching. Generally, an array should care about
> + * fieldno by itself.
> + */
> +static inline int
> +xrow_update_op_prepare_num_token(struct xrow_update_op *op)
> +{
> +	/*
> +	 * Token type END is a special value meaning that the
> +	 * current token needs to be parsed.
> +	 */

I really don't like reuse of entities introduced for some purpose. If
one will read this code, then it will be hard to understand why out of
nowhere Lexer forced to come to an end at
xrow_update_op_do_array_insert() for example. More that that - if lexer
in a state of 'reached the end' it forced to parse. 

If we introduce some more functionality for JSON later - I expect we
will face even more logic to hang on this 'special value'. Then
contexts of these functionalities may intersect... oh my...

Can we introduce a new type with some self-explanatory name instead and
leave the JSON_TOKEN_END on its own? 

> +	if (op->token_type == JSON_TOKEN_END &&
> +	    xrow_update_op_consume_token(op) != 0)
> +			return -1;
> +	if (op->token_type != JSON_TOKEN_NUM) {
> +		return xrow_update_err(op, "can't update an array by not a "\
> +				       "number index");
> +	}
> +	return 0;
> +}
> +
>  /**
>   * Make field index non-negative and check for the field
>   * existence.
> @@ -39,6 +64,7 @@
>  static inline int
>  xrow_update_op_adjust_field_no(struct xrow_update_op *op, int32_t field_count)
>  {
> +	assert(op->token_type == JSON_TOKEN_NUM);
>  	if (op->field_no >= 0) {
>  		if (op->field_no < field_count)
>  			return 0;
> @@ -221,10 +247,14 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
>  {
>  	assert(field->type == XUPDATE_ARRAY);
>  	struct xrow_update_array_item *item;
> +	if (xrow_update_op_prepare_num_token(op) != 0)
> +		return -1;
> +
>  	if (!xrow_update_op_is_term(op)) {
>  		item = xrow_update_array_extract_item(field, op);
>  		if (item == NULL)
>  			return -1;
> +		op->token_type = JSON_TOKEN_END;
>  		return xrow_update_op_do_field_insert(op, &item->field);
>  	}
>  
> @@ -248,6 +278,9 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
>  {
>  	assert(field->type == XUPDATE_ARRAY);
>  	struct xrow_update_rope *rope = field->array.rope;
> +	if (xrow_update_op_prepare_num_token(op) != 0)
> +		return -1;
> +
>  	/* Interpret '=' for n + 1 field as insert. */
>  	if (op->field_no == (int32_t) xrow_update_rope_size(rope))
>  		return xrow_update_op_do_array_insert(op, field);
> @@ -256,8 +289,10 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
>  		xrow_update_array_extract_item(field, op);
>  	if (item == NULL)
>  		return -1;
> -	if (!xrow_update_op_is_term(op))
> +	if (!xrow_update_op_is_term(op)) {
> +		op->token_type = JSON_TOKEN_END;
>  		return xrow_update_op_do_field_set(op, &item->field);
> +	}
>  	op->new_field_len = op->arg.set.length;
>  	/*
>  	 * Ignore the previous op, if any. It is not correct,
> @@ -275,11 +310,15 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
>  			       struct xrow_update_field *field)
>  {
>  	assert(field->type == XUPDATE_ARRAY);
> +	if (xrow_update_op_prepare_num_token(op) != 0)
> +		return -1;
> +
>  	if (!xrow_update_op_is_term(op)) {
>  		struct xrow_update_array_item *item =
>  			xrow_update_array_extract_item(field, op);
>  		if (item == NULL)
>  			return -1;
> +		op->token_type = JSON_TOKEN_END;
>  		return xrow_update_op_do_field_delete(op, &item->field);
>  	}
>  
> @@ -301,12 +340,16 @@ int										\
>  xrow_update_op_do_array_##op_type(struct xrow_update_op *op,			\
>  				  struct xrow_update_field *field)		\
>  {										\
> +	if (xrow_update_op_prepare_num_token(op) != 0)				\
> +		return -1;							\
>  	struct xrow_update_array_item *item =					\
>  		xrow_update_array_extract_item(field, op);			\
>  	if (item == NULL)							\
>  		return -1;							\
> -	if (!xrow_update_op_is_term(op))					\
> +	if (!xrow_update_op_is_term(op)) {					\
> +		op->token_type = JSON_TOKEN_END;				\
>  		return xrow_update_op_do_field_##op_type(op, &item->field);	\
> +	}									\
>  	if (item->field.type != XUPDATE_NOP)					\
>  		return xrow_update_err_double(op);				\
>  	if (xrow_update_op_do_##op_type(op, item->field.data) != 0)		\
> diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c
> index de865a21d..96fcaf747 100644
> --- a/src/box/xrow_update_field.c
> +++ b/src/box/xrow_update_field.c
> @@ -611,6 +611,22 @@ xrow_update_op_by(char opcode)
>  	}
>  }
>  
> +int
> +xrow_update_op_consume_token(struct xrow_update_op *op)
> +{
> +	struct json_token token;
> +	int rc = json_lexer_next_token(&op->lexer, &token);
> +	if (rc != 0)
> +		return xrow_update_err_bad_json(op, rc);
> +	if (token.type == JSON_TOKEN_END)
> +		return xrow_update_err_no_such_field(op);
> +	op->token_type = token.type;
> +	op->key = token.str;
> +	op->key_len = token.len;
> +	op->field_no = token.num;
> +	return 0;
> +}
> +
>  int
>  xrow_update_op_decode(struct xrow_update_op *op, int index_base,
>  		      struct tuple_dictionary *dict, const char **expr)
> @@ -639,6 +655,12 @@ xrow_update_op_decode(struct xrow_update_op *op, int index_base,
>  		diag_set(ClientError, ER_UNKNOWN_UPDATE_OP);
>  		return -1;
>  	}
> +	/*
> +	 * First token is always num. Even if a user specified a
> +	 * field name it is converted to num by the tuple
> +	 * dictionary.
> +	 */
> +	op->token_type = JSON_TOKEN_NUM;
>  	int32_t field_no = 0;
>  	switch(mp_typeof(**expr)) {
>  	case MP_INT:
> diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h
> index bda9222cc..fbaf45c5d 100644
> --- a/src/box/xrow_update_field.h
> +++ b/src/box/xrow_update_field.h
> @@ -179,8 +179,18 @@ struct xrow_update_op {
>  	const struct xrow_update_op_meta *meta;
>  	/** Operation arguments. */
>  	union xrow_update_arg arg;
> -	/** First level field no. */
> -	int32_t field_no;
> +	/**
> +	 * Current level token. END means that it is invalid and
> +	 * a next token should be extracted from the lexer.
> +	 */
> +	enum json_token_type token_type;
> +	union {
> +		struct {
> +			const char *key;
> +			uint32_t key_len;
> +		};
> +		int32_t field_no;
> +	};
>  	/** Size of a new field after it is updated. */
>  	uint32_t new_field_len;
>  	/** Opcode symbol: = + - / ... */
> @@ -193,6 +203,17 @@ struct xrow_update_op {
>  	struct json_lexer lexer;
>  };
>  
> +/**
> + * Extract a next token from the operation path lexer. The result
> + * is used to decide to which child of a current map/array the
> + * operation should be forwarded. It is not just a synonym to
> + * json_lexer_next_token, because fills some fields of @a op,
> + * and should be used only to chose a next child inside a current
> + * map/array.
> + */
> +int
> +xrow_update_op_consume_token(struct xrow_update_op *op);
> +
>  /**
>   * Decode an update operation from MessagePack.
>   * @param[out] op Update operation.
> -- 
> 2.21.0 (Apple Git-122.2)
>