[Tarantool-patches] [PATCH 2/3] tuple: JSON path update intersection at arrays

Sergey Ostanevich sergos at tarantool.org
Fri Dec 27 17:13:25 MSK 2019


Hi!

Thanks for the patch, just a couple of comments below, LGTM

Sergos

On 23 Dec 23:41, Vladislav Shpilevoy wrote:
> Before the patch only isolated JSON updates were supported as the
> simplest and fastest to implement. This patch allows update
> operations with paths having the same prefix. But difference of
> the paths should start from an array index.
> 
> For example, this is allowed:
> 
>     [1][2][3].a.b.c = 20
>     [1][2][4].e.f.g = 30
> 
>     First difference is [3] vs [4] - intersection by an array, ok.
> 
> This is not allowed yet:
> 
>     [1][2][3].a.b.c = 20
>     [1][2][3].a.e.f = 30
> 
>     First difference is 'b' vs 'e' - intersection by a map,
>     not ok.
> 
> For that a new update tree node type is added - XUPDATE_ROUTE.
> When several update operations have the same prefix, this prefix
> becomes an XUPDATE_ROUTE tree field. It stores the prefix and a
> subtree with these operations.
> 
> Bar and route update nodes can branch and produce more bars and
> routes, when new operations come.
> 
> Part of #1261
> ---
>  src/box/CMakeLists.txt      |   1 +
>  src/box/xrow_update_array.c |  50 ++++++
>  src/box/xrow_update_bar.c   |  14 +-
>  src/box/xrow_update_field.c |   4 +
>  src/box/xrow_update_field.h |  99 +++++++++++
>  src/box/xrow_update_route.c | 346 ++++++++++++++++++++++++++++++++++++
>  test/box/update.result      | 177 ++++++++++++++++++
>  test/box/update.test.lua    |  91 ++++++++++
>  8 files changed, 772 insertions(+), 10 deletions(-)
>  create mode 100644 src/box/xrow_update_route.c
> 
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index fc9d1a3e8..f99efc503 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -44,6 +44,7 @@ add_library(tuple STATIC
>      xrow_update_field.c
>      xrow_update_array.c
>      xrow_update_bar.c
> +    xrow_update_route.c
>      tuple_compare.cc
>      tuple_extract_key.cc
>      tuple_hash.cc
> diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
> index 6a7ce09ff..e49dad5c1 100644
> --- a/src/box/xrow_update_array.c
> +++ b/src/box/xrow_update_array.c
> @@ -195,6 +195,56 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header,
>  	return xrow_update_rope_append(field->array.rope, item, field_count);
>  }
>  
> +int
> +xrow_update_array_create_with_child(struct xrow_update_field *field,
> +				    const char *header,
> +				    const struct xrow_update_field *child,
> +				    int32_t field_no)
> +{
> +	const char *data = header;
> +	uint32_t field_count = mp_decode_array(&data);
> +	const char *first_field = data;
> +	const char *first_field_end = first_field;
> +	mp_next(&first_field_end);
> +	struct region *region = &fiber()->gc;
> +	struct xrow_update_rope *rope = xrow_update_rope_new(region);
> +	if (rope == NULL)
> +		return -1;
> +	struct xrow_update_array_item *item = (struct xrow_update_array_item *)
> +		xrow_update_alloc(region, sizeof(*item));
> +	if (item == NULL)
> +		return -1;
> +	const char *end = first_field_end;
> +	if (field_no > 0) {
> +		for (int32_t i = 1; i < field_no; ++i)
> +			mp_next(&end);
> +		xrow_update_array_item_create(item, XUPDATE_NOP, first_field,
> +					      first_field_end - first_field,
> +					      end - first_field_end);
> +		if (xrow_update_rope_append(rope, item, field_no) != 0)
> +			return -1;
> +		item = (struct xrow_update_array_item *)
> +			xrow_update_alloc(region, sizeof(*item));
> +		if (item == NULL)
> +			return -1;
> +		first_field = end;
> +		first_field_end = first_field;
> +		mp_next(&first_field_end);
> +		end = first_field_end;
> +	}
> +	for (uint32_t i = field_no + 1; i < field_count; ++i)
> +		mp_next(&end);
> +	item->field = *child;
> +	xrow_update_array_item_create(item, child->type, first_field,
> +				      first_field_end - first_field,
> +				      end - first_field_end);
> +	field->type = XUPDATE_ARRAY;
> +	field->data = header;
> +	field->size = end - header;
> +	field->array.rope = rope;
> +	return xrow_update_rope_append(rope, item, field_count - field_no);
> +}
> +
>  uint32_t
>  xrow_update_array_sizeof(struct xrow_update_field *field)
>  {
> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
> index 7285c7e2b..1c2d2ef99 100644
> --- a/src/box/xrow_update_bar.c
> +++ b/src/box/xrow_update_bar.c
> @@ -307,17 +307,11 @@ int										\
>  xrow_update_op_do_bar_##op_type(struct xrow_update_op *op,			\
>  				struct xrow_update_field *field)		\
>  {										\
> -	/*									\
> -	 * The only way to update a bar is to make a second update		\
> -	 * with the same prefix as this bar. But it is not			\
> -	 * supported yet.							\
> -	 */									\
> -	(void) op;								\
> -	(void) field;								\
>  	assert(field->type == XUPDATE_BAR);					\
> -	diag_set(ClientError, ER_UNSUPPORTED, "update",				\
> -		 "intersected JSON paths");					\
> -	return -1;								\
> +	field = xrow_update_route_branch(field, op);				\
> +	if (field == NULL)							\
> +		return -1;							\
> +	return xrow_update_op_do_field_##op_type(op, field);			\
>  }
>  
>  DO_BAR_OP_GENERIC(insert)
> diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c
> index 96fcaf747..3a4aa6c90 100644
> --- a/src/box/xrow_update_field.c
> +++ b/src/box/xrow_update_field.c
> @@ -114,6 +114,8 @@ xrow_update_field_sizeof(struct xrow_update_field *field)
>  		return xrow_update_array_sizeof(field);
>  	case XUPDATE_BAR:
>  		return xrow_update_bar_sizeof(field);
> +	case XUPDATE_ROUTE:
> +		return xrow_update_route_sizeof(field);
>  	default:
>  		unreachable();
>  	}
> @@ -141,6 +143,8 @@ xrow_update_field_store(struct xrow_update_field *field, char *out,
>  		return xrow_update_array_store(field, out, out_end);
>  	case XUPDATE_BAR:
>  		return xrow_update_bar_store(field, out, out_end);
> +	case XUPDATE_ROUTE:
> +		return xrow_update_route_store(field, out, out_end);
>  	default:
>  		unreachable();
>  	}
> diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h
> index fbaf45c5d..f07b5a649 100644
> --- a/src/box/xrow_update_field.h
> +++ b/src/box/xrow_update_field.h
> @@ -275,6 +275,23 @@ enum xrow_update_type {
>  	 * path nodes. And this is the most common case.
>  	 */
>  	XUPDATE_BAR,
> +	/**
> +	 * Field with a subtree of updates having the same prefix
> +	 * stored here explicitly. New updates with the same
> +	 * prefix just follow it without decoding of JSON nor
> +	 * MessagePack. It can be quite helpful when an update
> +	 * works with the same internal object via several
> +	 * operations like this:
> +	 *
> +	 *    [1][2].a.b.c[1] = 20
> +	 *    [1][2].a.b.c[2] = 30
> +	 *    [1][2].a.b.c[3] = true
> +	 *
> +	 * Here [1][2].a.b.c is stored only once as a route with a
> +	 * child XUPDATE_ARRAY making updates '[1] = 20',
> +	 * '[2] = 30', '[3] = true'.
> +	 */
> +	XUPDATE_ROUTE,
>  };
>  
>  /**
> @@ -355,6 +372,17 @@ struct xrow_update_field {
>  				};
>  			};
>  		} bar;
> +		/** Route update - path to an update subtree. */
> +		struct {
> +			/**
> +			 * Common prefix of all updates in the
> +			 * subtree.
> +			 */
> +			const char *path;
> +			int path_len;
> +			/** Update subtree. */
> +			struct xrow_update_field *next_hop;
> +		} route;
>  	};
>  };
>  
> @@ -447,6 +475,33 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header,
>  			 const char *data, const char *data_end,
>  			 uint32_t field_count);
>  
> +/**
> + * The same as mere create, but the array is created with a first
> + * child right away. It allows to make it more efficient, because
> + * under the hood a rope is created not as a one huge range which
> + * is then spit in parts, but as two rope nodes from the
              ^^^^
Should be     split here?

> + * beginning. On the summary: -1 rope node split, -1 decoding of
> + * fields from 0 to @a field_no.
> + *
> + * The function is used during branching, where there was an
> + * existing update, but another one came with the same prefix, and
> + * a different suffix.
> + *
> + * @param[out] field Field to initialize.
> + * @param header Header of the MessagePack array.
> + * @param child A child subtree. The child is copied by value into
> + *        the created array.
> + * @param field_no Field number of @a child.
> + *
> + * @retval  0 Success.
> + * @retval -1 Error.
> + */
> +int
> +xrow_update_array_create_with_child(struct xrow_update_field *field,
> +				    const char *header,
> +				    const struct xrow_update_field *child,
> +				    int32_t field_no);
> +
>  OP_DECL_GENERIC(array)
>  
>  /* }}} xrow_update_field.array */
> @@ -463,6 +518,37 @@ OP_DECL_GENERIC(nop)
>  
>  /* }}} update_field.nop */
>  
> +/* {{{ xrow_update_field.route */
> +
> +/**
> + * Take a bar or a route @a field and split its path in the place
> + * where @a new_op should be applied. Prefix becomes a new route
> + * object, suffix becomes a child of the result route. In the
> + * result @a field stays root of its subtree, and a node of that
> + * subtree is returned, to which @a new_op should be applied.
> + *
> + * Note, this function does not apply @a new_op. It just finds to
> + * where it *should be* applied and does all preparations. It is
> + * done deliberately, because otherwise do_cb() virtual function
> + * of @a new_op would have been called here, since there is no
> + * context. But a caller always knows exactly if it was insert,
> + * set, arith, etc. And a caller can and does use more specific
> + * function like xrow_update_op_do_field_set/insert/... .
> + *
> + * @param field Field to find where to apply @a new_op.
> + * @param new_op New operation to apply.
> + *
> + * @retval not-NULL A field to which @a new_op should be applied.
> + * @retval NULL Error.
> + */
> +struct xrow_update_field *
> +xrow_update_route_branch(struct xrow_update_field *field,
> +			 struct xrow_update_op *new_op);
> +
> +OP_DECL_GENERIC(route)
> +
> +/* }}} xrow_update_field.route */
> +
>  #undef OP_DECL_GENERIC
>  
>  /* {{{ Common helpers. */
> @@ -475,6 +561,17 @@ OP_DECL_GENERIC(nop)
>   * xrow_update_op_do_field_set on its childs. Each child can be
>   * another array, a bar, a route, a map - anything. The functions
>   * below help to make such places shorter and simpler.
> + *
> + * Note, that they are recursive, although it is not clearly
> + * visible. For example, if an update tree contains several array
> + * nodes on one tree branch, then update of the deepest array goes
> + * through each of these nodes and calls
> + * xrow_update_op_do_array_<opname>() on needed children. But it
> + * is ok, because operation count is usually small (<<50 in the
> + * most cases, usually <= 5), and the update tree depth is not
> + * bigger than operation count. Also, fiber stack is big enough to
> + * fit ~10k update tree depth - incredible number, even though the
> + * real limit is 4k due to limited number of operations.
>   */
>  #define OP_DECL_GENERIC(op_type)						\
>  static inline int								\
> @@ -488,6 +585,8 @@ xrow_update_op_do_field_##op_type(struct xrow_update_op *op,			\
>  		return xrow_update_op_do_nop_##op_type(op, field);		\
>  	case XUPDATE_BAR:							\
>  		return xrow_update_op_do_bar_##op_type(op, field);		\
> +	case XUPDATE_ROUTE:							\
> +		return xrow_update_op_do_route_##op_type(op, field);		\
>  	default:								\
>  		unreachable();							\
>  	}									\
> diff --git a/src/box/xrow_update_route.c b/src/box/xrow_update_route.c
> new file mode 100644
> index 000000000..f374db837
> --- /dev/null
> +++ b/src/box/xrow_update_route.c
> @@ -0,0 +1,346 @@
> +/*
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + *    copyright notice, this list of conditions and the
> + *    following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +#include "xrow_update_field.h"
> +#include "fiber.h"
> +#include "tuple.h"
> +
> +/**
> + * Do the actual branch. This is the case when an existing
> + * bar/route path is different from a new operation's path in an
> + * array. The existing object needs to be split into parent-child,
> + * and the new operation becomes a second child.
> + *
> + * @param[out] next_hop A field which will be initialized as an
> + *        array, and which will be a point to apply a new
> + *        operation.
> + * @param parent MessagePack array to be taken by @a next_hop.
> + * @param child Current field from which the branch happens. It
> + *        already contains an update subtree.
> + */
> +static int
> +xrow_update_route_branch_array(struct xrow_update_field *next_hop,
> +			       const char *parent,
> +			       const struct xrow_update_field *child,
> +			       int32_t field_no)
> +{
> +	struct xrow_update_op *op = child->bar.op;
> +	/*
> +	 * There are rules when a subtree can be just copied as is

I would name these as limitations, not rules. 

> +	 * from one parent to another.
> +	 * 1) It should not be a bar update. Because if it is not
> +	 *    a bar, then it is either scalar or an array/map.
> +	 *    Scalar update can be safely moved. Array/map update
> +	 *    doesn't change their parent, and also can be moved.
> +	 *    Otherwise see (2).
> +	 * 2) It is a bar. Then it should not be a leaf. If it is
> +	 *    not a leaf, then it does not change header and other
> +	 *    fields of this particular array, and can be safely
> +	 *    moved to somewhere else.
> +	 *    Otherwise see (1).
                            ^^^
                            (3) I suppose?
> +	 * 3) Ok, it is a bar, a leaf. Then it is a bar with zero
> +	 *    path length. It could degrade to zero path len in
> +	 *    during branching. In this case it should be a scalar
> +	 *    bar. The only non-scalar operations are '!' and '#'.
> +	 *
> +	 * Why '#' and '!' can't be moved? '!', for example, being
> +	 * applied to a field [1], affects all fields [2-*], and
> +	 * the array header. The same but in an even worse form
> +	 * about '#'. Such operations should be redone. They
> +	 * affect many fields and the parent.
> +	 *
> +	 * There is a tricky thing though - why not to just redo
> +	 * all operations here, for the sake of code simplicity?
> +	 * It would allow to remove 'create_with_child' crutch.
> +	 * The answer is - it is not possible. If a field is
> +	 * copyable, it is not re-applicable. And vice-versa. For
> +	 * example, if it is not a leaf, then there may be many
> +	 * operations, not one. A subtree just can't be
> +	 * 're-applied'.
> +	 *
> +	 * If the operation is scalar and a leaf, then its result
> +	 * has already overridden its arguments. This is because
> +	 * scalar operations save result into the arguments, to
> +	 * save memory. A second operation appliance would lead
> +	 * to very surprising results.
> +	 *
> +	 * Another reason - performance. This path should be
> +	 * quite hot, and to copy a struct is for sure much faster
> +	 * than to reapply an operation using a virtual function.
> +	 * Operations '!' and '#' are quite rare, so their
> +	 * optimization is not a critical goal.
> +	 */
> +	if (/* (1) Not a bar. */
> +	    child->type != XUPDATE_BAR ||
> +	    /* (2) Bar, but not a leaf. */
> +	    child->bar.path_len > 0 ||
> +	    /* (3) Leaf, bar, but a scalar operation. */
> +	    (op->opcode != '!' && op->opcode != '#')) {
> +		return xrow_update_array_create_with_child(next_hop, parent,
> +							   child, field_no);
> +	}
> +	/*
> +	 * Can't move the child. The only way to branch is to
> +	 * reapply the child's operation.
> +	 */
> +	op->token_type = JSON_TOKEN_NUM;
> +	op->field_no = field_no;
> +	const char *data = parent;
> +	uint32_t field_count = mp_decode_array(&data);
> +	const char *end = data;
> +	for (uint32_t i = 0; i < field_count; ++i)
> +		mp_next(&end);
> +	if (xrow_update_array_create(next_hop, parent, data, end,
> +				     field_count) != 0)
> +		return -1;
> +	return op->meta->do_op(op, next_hop);
> +}
> +
> +struct xrow_update_field *
> +xrow_update_route_branch(struct xrow_update_field *field,
> +			 struct xrow_update_op *new_op)
> +{
> +	assert(new_op->lexer.src != NULL);
> +	const char *old_path;
> +	int old_path_len;
> +	if (field->type == XUPDATE_BAR) {
> +		old_path = field->bar.path;
> +		old_path_len = field->bar.path_len;
> +	} else {
> +		assert(field->type == XUPDATE_ROUTE);
> +		old_path = field->route.path;
> +		old_path_len = field->route.path_len;
> +	}
> +	assert(old_path != NULL);
> +	struct json_lexer old_path_lexer;
> +	struct json_token old_token, new_token;
> +	/*
> +	 * Offset is going to be used as a length of the route
> +	 * node created as a parent of the old subtree and the
> +	 * new operation. As it is described in the route
> +	 * definition in struct xrow_update_field - route is the
> +	 * common prefix of all operations of the subtree. Here
> +	 * length of that prefix is calculated.
> +	 * In addition, it is used here to determine if the new
> +	 * operation is different from the current subtree from
> +	 * the very beginning. Basically, it means the offset is
> +	 * 0, and no route is generated. Root becomes a regular
> +	 * update field (array, map), not a route. Example:
> +	 * @a field is a bar '[1].a.b = 20', @a new_op is
> +	 * '[2].c.d = 30'. In this case the paths are already
> +	 * different from the beginning, no common prefix = no
> +	 * route. Array with children [1].a.b and [2].c.d becomes
> +	 * a root.
> +	 */
> +	int saved_old_offset;
> +	json_lexer_create(&old_path_lexer, old_path, old_path_len,
> +			  TUPLE_INDEX_BASE);
> +	const char *parent = field->data;
> +	do {
> +		saved_old_offset = old_path_lexer.offset;
> +		int rc = json_lexer_next_token(&old_path_lexer, &old_token);
> +		/* Old path is already validated. */
> +		assert(rc == 0);
> +		rc = json_lexer_next_token(&new_op->lexer, &new_token);
> +		if (rc != 0) {
> +			xrow_update_err_bad_json(new_op, rc);
> +			return NULL;
> +		}
> +		if (json_token_cmp(&old_token, &new_token) != 0)
> +			break;
> +		switch(new_token.type) {
> +		case JSON_TOKEN_NUM:
> +			rc = tuple_field_go_to_index(&parent, new_token.num);
> +			break;
> +		case JSON_TOKEN_STR:
> +			rc = tuple_field_go_to_key(&parent, new_token.str,
> +						   new_token.len);
> +			break;
> +		default:
> +			/*
> +			 * Can't be JSON_TOKEN_ANY, because old
> +			 * and new tokens are equal, but '*' is
> +			 * considered invalid and the old was
> +			 * already checked for that. So the new is
> +			 * valid too. And can't have type ANY.
> +			 */
> +			assert(new_token.type == JSON_TOKEN_END);
> +			xrow_update_err_double(new_op);
> +			return NULL;
> +		}
> +		/*
> +		 * Must always find a field, because the old
> +		 * token already went that path.
> +		 */
> +		assert(rc == 0);
> +	} while (true);
> +	enum mp_type type = mp_typeof(*parent);
> +	/*
> +	 * Ched if the paths are different from the very
           ^^^^
           check(ed)?
> +	 * beginning. It means, that the old field should be
> +	 * transformed instead of creating a new route node.
> +	 */
> +	bool transform_root = saved_old_offset == 0;

It would be nicer if you put comparison in () up there

> +	struct xrow_update_field *next_hop;
> +	if (! transform_root) {

No spaces after unaries, use sed -ie "s/(! /\(\!/"

> +		next_hop = (struct xrow_update_field *)
> +			region_alloc(&fiber()->gc, sizeof(*next_hop));
> +		if (next_hop == NULL) {
> +			diag_set(OutOfMemory, sizeof(*next_hop), "region_alloc",
> +				 "next_hop");
> +			return NULL;
> +		}
> +	} else {
> +		next_hop = field;
> +	}
> +
> +	int path_offset = old_path_lexer.offset;
> +	struct xrow_update_field child = *field;
> +	if (child.type == XUPDATE_ROUTE) {
> +		child.route.path += path_offset;
> +		child.route.path_len -= path_offset;
> +		if (child.route.path_len == 0)
> +			child = *child.route.next_hop;
> +	} else {
> +		assert(child.type == XUPDATE_BAR);
> +		child.bar.path += path_offset;
> +		child.bar.path_len -= path_offset;
> +		/*
> +		 * Yeah, bar length can become 0 here, but it is
> +		 * ok as long as it is a scalar operation (not '!'
> +		 * and not '#'). When a bar is scalar, it operates
> +		 * on one concrete field and works even if its
> +		 * path len is 0. Talking of '#' and '!' - they
> +		 * are handled by array and map 'branchers'
> +		 * internally, below. They reapply such
> +		 * operations.
> +		 */
> +	}
> +
> +	if (type == MP_ARRAY) {
> +		if (new_token.type != JSON_TOKEN_NUM) {
> +			xrow_update_err(new_op, "can not update array by "\
> +					"non-integer index");
> +			return NULL;
> +		}
> +		new_op->token_type = JSON_TOKEN_NUM;
> +		new_op->field_no = new_token.num;
> +		if (xrow_update_route_branch_array(next_hop, parent, &child,
> +						   old_token.num) != 0)
> +			return NULL;
> +	} else if (type == MP_MAP) {
> +		diag_set(ClientError, ER_UNSUPPORTED, "update",
> +			 "path intersection on map");
> +		return NULL;
> +	} else {
> +		xrow_update_err_no_such_field(new_op);
> +		return NULL;
> +	}
> +
> +	if (! transform_root) {
> +		field->type = XUPDATE_ROUTE;
> +		field->route.path = old_path;
> +		field->route.path_len = saved_old_offset;
> +		field->route.next_hop = next_hop;
> +	}
> +	return next_hop;
> +}
> +
> +/**
> + * Obtain a next node of the update tree to which @a op should be
> + * propagated. It is the same as branch, but has a fast path in
> + * case @a field is route, and operation prefix matches this
> + * route - then no need to parse JSON and dive into MessagePack,
> + * the route is just followed, via a lexer offset increase.
> + */
> +static struct xrow_update_field *
> +xrow_update_route_next(struct xrow_update_field *field, struct xrow_update_op *op)
> +{
> +	assert(field->type == XUPDATE_ROUTE);
> +	assert(! xrow_update_op_is_term(op));
> +	const char *new_path = op->lexer.src + op->lexer.offset;
> +	int new_path_len = op->lexer.src_len - op->lexer.offset;
> +	if (field->route.path_len <= new_path_len &&
> +	    memcmp(field->route.path, new_path, field->route.path_len) == 0) {
> +		/*
> +		 * Fast path: jump to the next hop with no
> +		 * decoding. Is used, when several JSON updates
> +		 * have the same prefix.
> +		 */
> +		op->lexer.offset += field->route.path_len;
> +		return field->route.next_hop;
> +	}
> +	return xrow_update_route_branch(field, op);
> +}
> +
> +#define DO_SCALAR_OP_GENERIC(op_type)						\
> +int										\
> +xrow_update_op_do_route_##op_type(struct xrow_update_op *op,			\
> +				  struct xrow_update_field *field)		\
> +{										\
> +	assert(field->type == XUPDATE_ROUTE);					\
> +	struct xrow_update_field *next_hop = xrow_update_route_next(field, op);	\
> +	if (next_hop == NULL)							\
> +		return -1;							\
> +	return xrow_update_op_do_field_##op_type(op, next_hop);					\
> +}
> +
> +DO_SCALAR_OP_GENERIC(set)
> +
> +DO_SCALAR_OP_GENERIC(insert)
> +
> +DO_SCALAR_OP_GENERIC(delete)
> +
> +DO_SCALAR_OP_GENERIC(arith)
> +
> +DO_SCALAR_OP_GENERIC(bit)
> +
> +DO_SCALAR_OP_GENERIC(splice)
> +
> +uint32_t
> +xrow_update_route_sizeof(struct xrow_update_field *field)
> +{
> +	return field->size - field->route.next_hop->size +
> +	       xrow_update_field_sizeof(field->route.next_hop);
> +}
> +
> +uint32_t
> +xrow_update_route_store(struct xrow_update_field *field, char *out,
> +			char *out_end)
> +{
> +	char *saved_out = out;
> +	int before_hop = field->route.next_hop->data - field->data;
> +	memcpy(out, field->data, before_hop);
> +	out += before_hop;
> +	out += xrow_update_field_store(field->route.next_hop, out, out_end);
> +	int after_hop = before_hop + field->route.next_hop->size;
> +	memcpy(out, field->data + after_hop, field->size - after_hop);
> +	return out + field->size - after_hop - saved_out;
> +}
> diff --git a/test/box/update.result b/test/box/update.result
> index ee38c100f..bf76f6638 100644
> --- a/test/box/update.result
> +++ b/test/box/update.result
> @@ -1294,6 +1294,183 @@ vy_s:select()
>  vy_s:drop()
>  ---
>  ...
> +--
> +-- Complex updates. Intersected paths, different types.
> +--
> +--
> +-- =
> +--
> +-- Difference in the last part.
> +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
> +---
> +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
> +      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 375], [400,
> +        450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
> +...
> +-- Different starting from the non-last part.
> +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
> +---
> +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
> +      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 350], [400,
> +        475]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
> +...
> +-- Contains string keys in the middle.
> +t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
> +---
> +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4000, 5000, 6, 7, 8],
> +      'e': 500, 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350],
> +      [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
> +...
> +t = {1}
> +---
> +...
> +t[2] = setmetatable({}, {__serialize = 'map'})
> +---
> +...
> +t[3] = {}
> +---
> +...
> +t[4] = {                        \
> +    1,                          \
> +    2,                          \
> +    3,                          \
> +    {                           \
> +        4,                      \
> +        5,                      \
> +        6,                      \
> +        7,                      \
> +        {                       \
> +            8,                  \
> +            9,                  \
> +            {10, 11, 12},       \
> +            13,                 \
> +            14,                 \
> +        },                      \
> +        15,                     \
> +        16,                     \
> +        {17, 18, 19}            \
> +    },                          \
> +    20,                         \
> +    {                           \
> +        a = 21,                 \
> +        b = 22,                 \
> +        c = {                   \
> +            d = 23,             \
> +            e = 24              \
> +        }                       \
> +    },                          \
> +    25                          \
> +}
> +---
> +...
> +t = s:replace(t)
> +---
> +...
> +-- Non-first and non-last field updates.
> +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
> +---
> +- [1, {}, [], [1, 2, 3, [4, 5, 6, 7, [8, 9, [10, 11000, 12], 13, 14], 15, 16, [17,
> +        18, 19000]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Triple intersection with enabled fast jump by route.
> +t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
> +---
> +- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9000, [10, 11, 12], 13, 14], 15, 16, [
> +        17, 18000, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Triple intersection with no optimizations.
> +t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
> +---
> +- [1, {}, [], [1, 2, 3000, [4, 5000, 6, 7, [8000, 9, [10, 11, 12], 13, 14], 15, 16,
> +      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Last operation splits previous ones.
> +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
> +---
> +- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9, [10000, 11000, 12], 13, 14], 15, 16,
> +      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- First two operations generate a route '[4]' in the 4th field of
> +-- the tuple. With childs [1] and [2]. The last operation should
> +-- delete the route and replace it with a full featured array.
> +t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
> +---
> +- [1, {}, [], [1, 2, 3.5, [4.5, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16, [
> +        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Test errors.
> +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
> +---
> +- error: 'Field ''[4][4][5].a'' UPDATE error: can not update array by non-integer
> +    index'
> +...
> +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
> +---
> +- error: 'Field ''[4][4][*].a'' UPDATE error: can not update array by non-integer
> +    index'
> +...
> +t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
> +---
> +- error: 'Field ''[4][4][*].b'' UPDATE error: invalid JSON in position 8'
> +...
> +t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
> +---
> +- error: 'Field ''[4][4]-badjson'' UPDATE error: invalid JSON in position 7'
> +...
> +t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
> +---
> +- error: 'Field ''[4][4][1]'' UPDATE error: double update of the same field'
> +...
> +-- First two operations produce zero length bar update in field
> +-- [4][4][5][4]. The third operation should raise an error.
> +t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
> +---
> +- error: 'Field ''[4][4][5][4]'' UPDATE error: double update of the same field'
> +...
> +--
> +-- !
> +--
> +t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
> +---
> +- [1, {}, [], [1, 2, 3, [4, 5.25, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16,
> +      [17, 18, 19]], 19.5, 19.75, 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}},
> +    25]]
> +...
> +--
> +-- #
> +--
> +t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
> +---
> +- [1, {}, [], [1, 2, 3, [4, 5, 6, 15, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {
> +        'd': 23, 'e': 24}}, 25]]
> +...
> +--
> +-- Scalar operations.
> +--
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
> +---
> +- [1, {}, [], [1, 2, 3.5, [4, 5, 6, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16, [
> +        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
> +---
> +- [1, {}, [], [1, 2, 3.5, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16,
> +      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Combined - check that operations following '!' take into
> +-- account that there is a new element. Indexes should be relative
> +-- to the new array size.
> +t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
> +---
> +- [1, {}, [], [1, 2, 2.5, 3, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15,
> +      16, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
> +...
> +-- Error - an attempt to follow JSON path in a scalar field
> +-- during branching.
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
> +---
> +- error: Field ''[4][3][2]'' was not found in the tuple
> +...
>  s:drop()
>  ---
>  ...
> diff --git a/test/box/update.test.lua b/test/box/update.test.lua
> index 60e669d27..d91a2e95c 100644
> --- a/test/box/update.test.lua
> +++ b/test/box/update.test.lua
> @@ -424,4 +424,95 @@ box.commit()
>  vy_s:select()
>  vy_s:drop()
>  
> +--
> +-- Complex updates. Intersected paths, different types.
> +--
> +
> +--
> +-- =
> +--
> +-- Difference in the last part.
> +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
> +-- Different starting from the non-last part.
> +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
> +-- Contains string keys in the middle.
> +t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
> +t = {1}
> +t[2] = setmetatable({}, {__serialize = 'map'})
> +t[3] = {}
> +t[4] = {                        \
> +    1,                          \
> +    2,                          \
> +    3,                          \
> +    {                           \
> +        4,                      \
> +        5,                      \
> +        6,                      \
> +        7,                      \
> +        {                       \
> +            8,                  \
> +            9,                  \
> +            {10, 11, 12},       \
> +            13,                 \
> +            14,                 \
> +        },                      \
> +        15,                     \
> +        16,                     \
> +        {17, 18, 19}            \
> +    },                          \
> +    20,                         \
> +    {                           \
> +        a = 21,                 \
> +        b = 22,                 \
> +        c = {                   \
> +            d = 23,             \
> +            e = 24              \
> +        }                       \
> +    },                          \
> +    25                          \
> +}
> +t = s:replace(t)
> +-- Non-first and non-last field updates.
> +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
> +-- Triple intersection with enabled fast jump by route.
> +t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
> +-- Triple intersection with no optimizations.
> +t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
> +-- Last operation splits previous ones.
> +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
> +-- First two operations generate a route '[4]' in the 4th field of
> +-- the tuple. With childs [1] and [2]. The last operation should
> +-- delete the route and replace it with a full featured array.
> +t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
> +-- Test errors.
> +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
> +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
> +t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
> +t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
> +t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
> +-- First two operations produce zero length bar update in field
> +-- [4][4][5][4]. The third operation should raise an error.
> +t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
> +
> +--
> +-- !
> +--
> +t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
> +--
> +-- #
> +--
> +t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
> +--
> +-- Scalar operations.
> +--
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
> +-- Combined - check that operations following '!' take into
> +-- account that there is a new element. Indexes should be relative
> +-- to the new array size.
> +t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
> +-- Error - an attempt to follow JSON path in a scalar field
> +-- during branching.
> +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
> +
>  s:drop()
> -- 
> 2.21.0 (Apple Git-122.2)
> 


More information about the Tarantool-patches mailing list