[Tarantool-patches] [PATCH 2/3] tuple: JSON path update intersection at arrays

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Dec 24 01:41:49 MSK 2019


Before the patch only isolated JSON updates were supported as the
simplest and fastest to implement. This patch allows update
operations with paths having the same prefix. But difference of
the paths should start from an array index.

For example, this is allowed:

    [1][2][3].a.b.c = 20
    [1][2][4].e.f.g = 30

    First difference is [3] vs [4] - intersection by an array, ok.

This is not allowed yet:

    [1][2][3].a.b.c = 20
    [1][2][3].a.e.f = 30

    First difference is 'b' vs 'e' - intersection by a map,
    not ok.

For that a new update tree node type is added - XUPDATE_ROUTE.
When several update operations have the same prefix, this prefix
becomes an XUPDATE_ROUTE tree field. It stores the prefix and a
subtree with these operations.

Bar and route update nodes can branch and produce more bars and
routes, when new operations come.

Part of #1261
---
 src/box/CMakeLists.txt      |   1 +
 src/box/xrow_update_array.c |  50 ++++++
 src/box/xrow_update_bar.c   |  14 +-
 src/box/xrow_update_field.c |   4 +
 src/box/xrow_update_field.h |  99 +++++++++++
 src/box/xrow_update_route.c | 346 ++++++++++++++++++++++++++++++++++++
 test/box/update.result      | 177 ++++++++++++++++++
 test/box/update.test.lua    |  91 ++++++++++
 8 files changed, 772 insertions(+), 10 deletions(-)
 create mode 100644 src/box/xrow_update_route.c

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index fc9d1a3e8..f99efc503 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -44,6 +44,7 @@ add_library(tuple STATIC
     xrow_update_field.c
     xrow_update_array.c
     xrow_update_bar.c
+    xrow_update_route.c
     tuple_compare.cc
     tuple_extract_key.cc
     tuple_hash.cc
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index 6a7ce09ff..e49dad5c1 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -195,6 +195,56 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header,
 	return xrow_update_rope_append(field->array.rope, item, field_count);
 }
 
+int
+xrow_update_array_create_with_child(struct xrow_update_field *field,
+				    const char *header,
+				    const struct xrow_update_field *child,
+				    int32_t field_no)
+{
+	const char *data = header;
+	uint32_t field_count = mp_decode_array(&data);
+	const char *first_field = data;
+	const char *first_field_end = first_field;
+	mp_next(&first_field_end);
+	struct region *region = &fiber()->gc;
+	struct xrow_update_rope *rope = xrow_update_rope_new(region);
+	if (rope == NULL)
+		return -1;
+	struct xrow_update_array_item *item = (struct xrow_update_array_item *)
+		xrow_update_alloc(region, sizeof(*item));
+	if (item == NULL)
+		return -1;
+	const char *end = first_field_end;
+	if (field_no > 0) {
+		for (int32_t i = 1; i < field_no; ++i)
+			mp_next(&end);
+		xrow_update_array_item_create(item, XUPDATE_NOP, first_field,
+					      first_field_end - first_field,
+					      end - first_field_end);
+		if (xrow_update_rope_append(rope, item, field_no) != 0)
+			return -1;
+		item = (struct xrow_update_array_item *)
+			xrow_update_alloc(region, sizeof(*item));
+		if (item == NULL)
+			return -1;
+		first_field = end;
+		first_field_end = first_field;
+		mp_next(&first_field_end);
+		end = first_field_end;
+	}
+	for (uint32_t i = field_no + 1; i < field_count; ++i)
+		mp_next(&end);
+	item->field = *child;
+	xrow_update_array_item_create(item, child->type, first_field,
+				      first_field_end - first_field,
+				      end - first_field_end);
+	field->type = XUPDATE_ARRAY;
+	field->data = header;
+	field->size = end - header;
+	field->array.rope = rope;
+	return xrow_update_rope_append(rope, item, field_count - field_no);
+}
+
 uint32_t
 xrow_update_array_sizeof(struct xrow_update_field *field)
 {
diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
index 7285c7e2b..1c2d2ef99 100644
--- a/src/box/xrow_update_bar.c
+++ b/src/box/xrow_update_bar.c
@@ -307,17 +307,11 @@ int										\
 xrow_update_op_do_bar_##op_type(struct xrow_update_op *op,			\
 				struct xrow_update_field *field)		\
 {										\
-	/*									\
-	 * The only way to update a bar is to make a second update		\
-	 * with the same prefix as this bar. But it is not			\
-	 * supported yet.							\
-	 */									\
-	(void) op;								\
-	(void) field;								\
 	assert(field->type == XUPDATE_BAR);					\
-	diag_set(ClientError, ER_UNSUPPORTED, "update",				\
-		 "intersected JSON paths");					\
-	return -1;								\
+	field = xrow_update_route_branch(field, op);				\
+	if (field == NULL)							\
+		return -1;							\
+	return xrow_update_op_do_field_##op_type(op, field);			\
 }
 
 DO_BAR_OP_GENERIC(insert)
diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c
index 96fcaf747..3a4aa6c90 100644
--- a/src/box/xrow_update_field.c
+++ b/src/box/xrow_update_field.c
@@ -114,6 +114,8 @@ xrow_update_field_sizeof(struct xrow_update_field *field)
 		return xrow_update_array_sizeof(field);
 	case XUPDATE_BAR:
 		return xrow_update_bar_sizeof(field);
+	case XUPDATE_ROUTE:
+		return xrow_update_route_sizeof(field);
 	default:
 		unreachable();
 	}
@@ -141,6 +143,8 @@ xrow_update_field_store(struct xrow_update_field *field, char *out,
 		return xrow_update_array_store(field, out, out_end);
 	case XUPDATE_BAR:
 		return xrow_update_bar_store(field, out, out_end);
+	case XUPDATE_ROUTE:
+		return xrow_update_route_store(field, out, out_end);
 	default:
 		unreachable();
 	}
diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h
index fbaf45c5d..f07b5a649 100644
--- a/src/box/xrow_update_field.h
+++ b/src/box/xrow_update_field.h
@@ -275,6 +275,23 @@ enum xrow_update_type {
 	 * path nodes. And this is the most common case.
 	 */
 	XUPDATE_BAR,
+	/**
+	 * Field with a subtree of updates having the same prefix
+	 * stored here explicitly. New updates with the same
+	 * prefix just follow it without decoding of JSON nor
+	 * MessagePack. It can be quite helpful when an update
+	 * works with the same internal object via several
+	 * operations like this:
+	 *
+	 *    [1][2].a.b.c[1] = 20
+	 *    [1][2].a.b.c[2] = 30
+	 *    [1][2].a.b.c[3] = true
+	 *
+	 * Here [1][2].a.b.c is stored only once as a route with a
+	 * child XUPDATE_ARRAY making updates '[1] = 20',
+	 * '[2] = 30', '[3] = true'.
+	 */
+	XUPDATE_ROUTE,
 };
 
 /**
@@ -355,6 +372,17 @@ struct xrow_update_field {
 				};
 			};
 		} bar;
+		/** Route update - path to an update subtree. */
+		struct {
+			/**
+			 * Common prefix of all updates in the
+			 * subtree.
+			 */
+			const char *path;
+			int path_len;
+			/** Update subtree. */
+			struct xrow_update_field *next_hop;
+		} route;
 	};
 };
 
@@ -447,6 +475,33 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header,
 			 const char *data, const char *data_end,
 			 uint32_t field_count);
 
+/**
+ * The same as mere create, but the array is created with a first
+ * child right away. It allows to make it more efficient, because
+ * under the hood a rope is created not as a one huge range which
+ * is then spit in parts, but as two rope nodes from the
+ * beginning. On the summary: -1 rope node split, -1 decoding of
+ * fields from 0 to @a field_no.
+ *
+ * The function is used during branching, where there was an
+ * existing update, but another one came with the same prefix, and
+ * a different suffix.
+ *
+ * @param[out] field Field to initialize.
+ * @param header Header of the MessagePack array.
+ * @param child A child subtree. The child is copied by value into
+ *        the created array.
+ * @param field_no Field number of @a child.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+int
+xrow_update_array_create_with_child(struct xrow_update_field *field,
+				    const char *header,
+				    const struct xrow_update_field *child,
+				    int32_t field_no);
+
 OP_DECL_GENERIC(array)
 
 /* }}} xrow_update_field.array */
@@ -463,6 +518,37 @@ OP_DECL_GENERIC(nop)
 
 /* }}} update_field.nop */
 
+/* {{{ xrow_update_field.route */
+
+/**
+ * Take a bar or a route @a field and split its path in the place
+ * where @a new_op should be applied. Prefix becomes a new route
+ * object, suffix becomes a child of the result route. In the
+ * result @a field stays root of its subtree, and a node of that
+ * subtree is returned, to which @a new_op should be applied.
+ *
+ * Note, this function does not apply @a new_op. It just finds to
+ * where it *should be* applied and does all preparations. It is
+ * done deliberately, because otherwise do_cb() virtual function
+ * of @a new_op would have been called here, since there is no
+ * context. But a caller always knows exactly if it was insert,
+ * set, arith, etc. And a caller can and does use more specific
+ * function like xrow_update_op_do_field_set/insert/... .
+ *
+ * @param field Field to find where to apply @a new_op.
+ * @param new_op New operation to apply.
+ *
+ * @retval not-NULL A field to which @a new_op should be applied.
+ * @retval NULL Error.
+ */
+struct xrow_update_field *
+xrow_update_route_branch(struct xrow_update_field *field,
+			 struct xrow_update_op *new_op);
+
+OP_DECL_GENERIC(route)
+
+/* }}} xrow_update_field.route */
+
 #undef OP_DECL_GENERIC
 
 /* {{{ Common helpers. */
@@ -475,6 +561,17 @@ OP_DECL_GENERIC(nop)
  * xrow_update_op_do_field_set on its childs. Each child can be
  * another array, a bar, a route, a map - anything. The functions
  * below help to make such places shorter and simpler.
+ *
+ * Note, that they are recursive, although it is not clearly
+ * visible. For example, if an update tree contains several array
+ * nodes on one tree branch, then update of the deepest array goes
+ * through each of these nodes and calls
+ * xrow_update_op_do_array_<opname>() on needed children. But it
+ * is ok, because operation count is usually small (<<50 in the
+ * most cases, usually <= 5), and the update tree depth is not
+ * bigger than operation count. Also, fiber stack is big enough to
+ * fit ~10k update tree depth - incredible number, even though the
+ * real limit is 4k due to limited number of operations.
  */
 #define OP_DECL_GENERIC(op_type)						\
 static inline int								\
@@ -488,6 +585,8 @@ xrow_update_op_do_field_##op_type(struct xrow_update_op *op,			\
 		return xrow_update_op_do_nop_##op_type(op, field);		\
 	case XUPDATE_BAR:							\
 		return xrow_update_op_do_bar_##op_type(op, field);		\
+	case XUPDATE_ROUTE:							\
+		return xrow_update_op_do_route_##op_type(op, field);		\
 	default:								\
 		unreachable();							\
 	}									\
diff --git a/src/box/xrow_update_route.c b/src/box/xrow_update_route.c
new file mode 100644
index 000000000..f374db837
--- /dev/null
+++ b/src/box/xrow_update_route.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "xrow_update_field.h"
+#include "fiber.h"
+#include "tuple.h"
+
+/**
+ * Do the actual branch. This is the case when an existing
+ * bar/route path is different from a new operation's path in an
+ * array. The existing object needs to be split into parent-child,
+ * and the new operation becomes a second child.
+ *
+ * @param[out] next_hop A field which will be initialized as an
+ *        array, and which will be a point to apply a new
+ *        operation.
+ * @param parent MessagePack array to be taken by @a next_hop.
+ * @param child Current field from which the branch happens. It
+ *        already contains an update subtree.
+ */
+static int
+xrow_update_route_branch_array(struct xrow_update_field *next_hop,
+			       const char *parent,
+			       const struct xrow_update_field *child,
+			       int32_t field_no)
+{
+	struct xrow_update_op *op = child->bar.op;
+	/*
+	 * There are rules when a subtree can be just copied as is
+	 * from one parent to another.
+	 * 1) It should not be a bar update. Because if it is not
+	 *    a bar, then it is either scalar or an array/map.
+	 *    Scalar update can be safely moved. Array/map update
+	 *    doesn't change their parent, and also can be moved.
+	 *    Otherwise see (2).
+	 * 2) It is a bar. Then it should not be a leaf. If it is
+	 *    not a leaf, then it does not change header and other
+	 *    fields of this particular array, and can be safely
+	 *    moved to somewhere else.
+	 *    Otherwise see (1).
+	 * 3) Ok, it is a bar, a leaf. Then it is a bar with zero
+	 *    path length. It could degrade to zero path len in
+	 *    during branching. In this case it should be a scalar
+	 *    bar. The only non-scalar operations are '!' and '#'.
+	 *
+	 * Why '#' and '!' can't be moved? '!', for example, being
+	 * applied to a field [1], affects all fields [2-*], and
+	 * the array header. The same but in an even worse form
+	 * about '#'. Such operations should be redone. They
+	 * affect many fields and the parent.
+	 *
+	 * There is a tricky thing though - why not to just redo
+	 * all operations here, for the sake of code simplicity?
+	 * It would allow to remove 'create_with_child' crutch.
+	 * The answer is - it is not possible. If a field is
+	 * copyable, it is not re-applicable. And vice-versa. For
+	 * example, if it is not a leaf, then there may be many
+	 * operations, not one. A subtree just can't be
+	 * 're-applied'.
+	 *
+	 * If the operation is scalar and a leaf, then its result
+	 * has already overridden its arguments. This is because
+	 * scalar operations save result into the arguments, to
+	 * save memory. A second operation appliance would lead
+	 * to very surprising results.
+	 *
+	 * Another reason - performance. This path should be
+	 * quite hot, and to copy a struct is for sure much faster
+	 * than to reapply an operation using a virtual function.
+	 * Operations '!' and '#' are quite rare, so their
+	 * optimization is not a critical goal.
+	 */
+	if (/* (1) Not a bar. */
+	    child->type != XUPDATE_BAR ||
+	    /* (2) Bar, but not a leaf. */
+	    child->bar.path_len > 0 ||
+	    /* (3) Leaf, bar, but a scalar operation. */
+	    (op->opcode != '!' && op->opcode != '#')) {
+		return xrow_update_array_create_with_child(next_hop, parent,
+							   child, field_no);
+	}
+	/*
+	 * Can't move the child. The only way to branch is to
+	 * reapply the child's operation.
+	 */
+	op->token_type = JSON_TOKEN_NUM;
+	op->field_no = field_no;
+	const char *data = parent;
+	uint32_t field_count = mp_decode_array(&data);
+	const char *end = data;
+	for (uint32_t i = 0; i < field_count; ++i)
+		mp_next(&end);
+	if (xrow_update_array_create(next_hop, parent, data, end,
+				     field_count) != 0)
+		return -1;
+	return op->meta->do_op(op, next_hop);
+}
+
+struct xrow_update_field *
+xrow_update_route_branch(struct xrow_update_field *field,
+			 struct xrow_update_op *new_op)
+{
+	assert(new_op->lexer.src != NULL);
+	const char *old_path;
+	int old_path_len;
+	if (field->type == XUPDATE_BAR) {
+		old_path = field->bar.path;
+		old_path_len = field->bar.path_len;
+	} else {
+		assert(field->type == XUPDATE_ROUTE);
+		old_path = field->route.path;
+		old_path_len = field->route.path_len;
+	}
+	assert(old_path != NULL);
+	struct json_lexer old_path_lexer;
+	struct json_token old_token, new_token;
+	/*
+	 * Offset is going to be used as a length of the route
+	 * node created as a parent of the old subtree and the
+	 * new operation. As it is described in the route
+	 * definition in struct xrow_update_field - route is the
+	 * common prefix of all operations of the subtree. Here
+	 * length of that prefix is calculated.
+	 * In addition, it is used here to determine if the new
+	 * operation is different from the current subtree from
+	 * the very beginning. Basically, it means the offset is
+	 * 0, and no route is generated. Root becomes a regular
+	 * update field (array, map), not a route. Example:
+	 * @a field is a bar '[1].a.b = 20', @a new_op is
+	 * '[2].c.d = 30'. In this case the paths are already
+	 * different from the beginning, no common prefix = no
+	 * route. Array with children [1].a.b and [2].c.d becomes
+	 * a root.
+	 */
+	int saved_old_offset;
+	json_lexer_create(&old_path_lexer, old_path, old_path_len,
+			  TUPLE_INDEX_BASE);
+	const char *parent = field->data;
+	do {
+		saved_old_offset = old_path_lexer.offset;
+		int rc = json_lexer_next_token(&old_path_lexer, &old_token);
+		/* Old path is already validated. */
+		assert(rc == 0);
+		rc = json_lexer_next_token(&new_op->lexer, &new_token);
+		if (rc != 0) {
+			xrow_update_err_bad_json(new_op, rc);
+			return NULL;
+		}
+		if (json_token_cmp(&old_token, &new_token) != 0)
+			break;
+		switch(new_token.type) {
+		case JSON_TOKEN_NUM:
+			rc = tuple_field_go_to_index(&parent, new_token.num);
+			break;
+		case JSON_TOKEN_STR:
+			rc = tuple_field_go_to_key(&parent, new_token.str,
+						   new_token.len);
+			break;
+		default:
+			/*
+			 * Can't be JSON_TOKEN_ANY, because old
+			 * and new tokens are equal, but '*' is
+			 * considered invalid and the old was
+			 * already checked for that. So the new is
+			 * valid too. And can't have type ANY.
+			 */
+			assert(new_token.type == JSON_TOKEN_END);
+			xrow_update_err_double(new_op);
+			return NULL;
+		}
+		/*
+		 * Must always find a field, because the old
+		 * token already went that path.
+		 */
+		assert(rc == 0);
+	} while (true);
+	enum mp_type type = mp_typeof(*parent);
+	/*
+	 * Ched if the paths are different from the very
+	 * beginning. It means, that the old field should be
+	 * transformed instead of creating a new route node.
+	 */
+	bool transform_root = saved_old_offset == 0;
+	struct xrow_update_field *next_hop;
+	if (! transform_root) {
+		next_hop = (struct xrow_update_field *)
+			region_alloc(&fiber()->gc, sizeof(*next_hop));
+		if (next_hop == NULL) {
+			diag_set(OutOfMemory, sizeof(*next_hop), "region_alloc",
+				 "next_hop");
+			return NULL;
+		}
+	} else {
+		next_hop = field;
+	}
+
+	int path_offset = old_path_lexer.offset;
+	struct xrow_update_field child = *field;
+	if (child.type == XUPDATE_ROUTE) {
+		child.route.path += path_offset;
+		child.route.path_len -= path_offset;
+		if (child.route.path_len == 0)
+			child = *child.route.next_hop;
+	} else {
+		assert(child.type == XUPDATE_BAR);
+		child.bar.path += path_offset;
+		child.bar.path_len -= path_offset;
+		/*
+		 * Yeah, bar length can become 0 here, but it is
+		 * ok as long as it is a scalar operation (not '!'
+		 * and not '#'). When a bar is scalar, it operates
+		 * on one concrete field and works even if its
+		 * path len is 0. Talking of '#' and '!' - they
+		 * are handled by array and map 'branchers'
+		 * internally, below. They reapply such
+		 * operations.
+		 */
+	}
+
+	if (type == MP_ARRAY) {
+		if (new_token.type != JSON_TOKEN_NUM) {
+			xrow_update_err(new_op, "can not update array by "\
+					"non-integer index");
+			return NULL;
+		}
+		new_op->token_type = JSON_TOKEN_NUM;
+		new_op->field_no = new_token.num;
+		if (xrow_update_route_branch_array(next_hop, parent, &child,
+						   old_token.num) != 0)
+			return NULL;
+	} else if (type == MP_MAP) {
+		diag_set(ClientError, ER_UNSUPPORTED, "update",
+			 "path intersection on map");
+		return NULL;
+	} else {
+		xrow_update_err_no_such_field(new_op);
+		return NULL;
+	}
+
+	if (! transform_root) {
+		field->type = XUPDATE_ROUTE;
+		field->route.path = old_path;
+		field->route.path_len = saved_old_offset;
+		field->route.next_hop = next_hop;
+	}
+	return next_hop;
+}
+
+/**
+ * Obtain a next node of the update tree to which @a op should be
+ * propagated. It is the same as branch, but has a fast path in
+ * case @a field is route, and operation prefix matches this
+ * route - then no need to parse JSON and dive into MessagePack,
+ * the route is just followed, via a lexer offset increase.
+ */
+static struct xrow_update_field *
+xrow_update_route_next(struct xrow_update_field *field, struct xrow_update_op *op)
+{
+	assert(field->type == XUPDATE_ROUTE);
+	assert(! xrow_update_op_is_term(op));
+	const char *new_path = op->lexer.src + op->lexer.offset;
+	int new_path_len = op->lexer.src_len - op->lexer.offset;
+	if (field->route.path_len <= new_path_len &&
+	    memcmp(field->route.path, new_path, field->route.path_len) == 0) {
+		/*
+		 * Fast path: jump to the next hop with no
+		 * decoding. Is used, when several JSON updates
+		 * have the same prefix.
+		 */
+		op->lexer.offset += field->route.path_len;
+		return field->route.next_hop;
+	}
+	return xrow_update_route_branch(field, op);
+}
+
+#define DO_SCALAR_OP_GENERIC(op_type)						\
+int										\
+xrow_update_op_do_route_##op_type(struct xrow_update_op *op,			\
+				  struct xrow_update_field *field)		\
+{										\
+	assert(field->type == XUPDATE_ROUTE);					\
+	struct xrow_update_field *next_hop = xrow_update_route_next(field, op);	\
+	if (next_hop == NULL)							\
+		return -1;							\
+	return xrow_update_op_do_field_##op_type(op, next_hop);					\
+}
+
+DO_SCALAR_OP_GENERIC(set)
+
+DO_SCALAR_OP_GENERIC(insert)
+
+DO_SCALAR_OP_GENERIC(delete)
+
+DO_SCALAR_OP_GENERIC(arith)
+
+DO_SCALAR_OP_GENERIC(bit)
+
+DO_SCALAR_OP_GENERIC(splice)
+
+uint32_t
+xrow_update_route_sizeof(struct xrow_update_field *field)
+{
+	return field->size - field->route.next_hop->size +
+	       xrow_update_field_sizeof(field->route.next_hop);
+}
+
+uint32_t
+xrow_update_route_store(struct xrow_update_field *field, char *out,
+			char *out_end)
+{
+	char *saved_out = out;
+	int before_hop = field->route.next_hop->data - field->data;
+	memcpy(out, field->data, before_hop);
+	out += before_hop;
+	out += xrow_update_field_store(field->route.next_hop, out, out_end);
+	int after_hop = before_hop + field->route.next_hop->size;
+	memcpy(out, field->data + after_hop, field->size - after_hop);
+	return out + field->size - after_hop - saved_out;
+}
diff --git a/test/box/update.result b/test/box/update.result
index ee38c100f..bf76f6638 100644
--- a/test/box/update.result
+++ b/test/box/update.result
@@ -1294,6 +1294,183 @@ vy_s:select()
 vy_s:drop()
 ---
 ...
+--
+-- Complex updates. Intersected paths, different types.
+--
+--
+-- =
+--
+-- Difference in the last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 375], [400,
+        450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Different starting from the non-last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 350], [400,
+        475]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Contains string keys in the middle.
+t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4000, 5000, 6, 7, 8],
+      'e': 500, 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350],
+      [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t = {1}
+---
+...
+t[2] = setmetatable({}, {__serialize = 'map'})
+---
+...
+t[3] = {}
+---
+...
+t[4] = {                        \
+    1,                          \
+    2,                          \
+    3,                          \
+    {                           \
+        4,                      \
+        5,                      \
+        6,                      \
+        7,                      \
+        {                       \
+            8,                  \
+            9,                  \
+            {10, 11, 12},       \
+            13,                 \
+            14,                 \
+        },                      \
+        15,                     \
+        16,                     \
+        {17, 18, 19}            \
+    },                          \
+    20,                         \
+    {                           \
+        a = 21,                 \
+        b = 22,                 \
+        c = {                   \
+            d = 23,             \
+            e = 24              \
+        }                       \
+    },                          \
+    25                          \
+}
+---
+...
+t = s:replace(t)
+---
+...
+-- Non-first and non-last field updates.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6, 7, [8, 9, [10, 11000, 12], 13, 14], 15, 16, [17,
+        18, 19000]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Triple intersection with enabled fast jump by route.
+t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9000, [10, 11, 12], 13, 14], 15, 16, [
+        17, 18000, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Triple intersection with no optimizations.
+t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
+---
+- [1, {}, [], [1, 2, 3000, [4, 5000, 6, 7, [8000, 9, [10, 11, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Last operation splits previous ones.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9, [10000, 11000, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- First two operations generate a route '[4]' in the 4th field of
+-- the tuple. With childs [1] and [2]. The last operation should
+-- delete the route and replace it with a full featured array.
+t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
+---
+- [1, {}, [], [1, 2, 3.5, [4.5, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16, [
+        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Test errors.
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
+---
+- error: 'Field ''[4][4][5].a'' UPDATE error: can not update array by non-integer
+    index'
+...
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
+---
+- error: 'Field ''[4][4][*].a'' UPDATE error: can not update array by non-integer
+    index'
+...
+t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
+---
+- error: 'Field ''[4][4][*].b'' UPDATE error: invalid JSON in position 8'
+...
+t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
+---
+- error: 'Field ''[4][4]-badjson'' UPDATE error: invalid JSON in position 7'
+...
+t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
+---
+- error: 'Field ''[4][4][1]'' UPDATE error: double update of the same field'
+...
+-- First two operations produce zero length bar update in field
+-- [4][4][5][4]. The third operation should raise an error.
+t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
+---
+- error: 'Field ''[4][4][5][4]'' UPDATE error: double update of the same field'
+...
+--
+-- !
+--
+t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5.25, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 19.5, 19.75, 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}},
+    25]]
+...
+--
+-- #
+--
+t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6, 15, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {
+        'd': 23, 'e': 24}}, 25]]
+...
+--
+-- Scalar operations.
+--
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
+---
+- [1, {}, [], [1, 2, 3.5, [4, 5, 6, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16, [
+        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
+---
+- [1, {}, [], [1, 2, 3.5, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Combined - check that operations following '!' take into
+-- account that there is a new element. Indexes should be relative
+-- to the new array size.
+t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
+---
+- [1, {}, [], [1, 2, 2.5, 3, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15,
+      16, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Error - an attempt to follow JSON path in a scalar field
+-- during branching.
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
+---
+- error: Field ''[4][3][2]'' was not found in the tuple
+...
 s:drop()
 ---
 ...
diff --git a/test/box/update.test.lua b/test/box/update.test.lua
index 60e669d27..d91a2e95c 100644
--- a/test/box/update.test.lua
+++ b/test/box/update.test.lua
@@ -424,4 +424,95 @@ box.commit()
 vy_s:select()
 vy_s:drop()
 
+--
+-- Complex updates. Intersected paths, different types.
+--
+
+--
+-- =
+--
+-- Difference in the last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
+-- Different starting from the non-last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
+-- Contains string keys in the middle.
+t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
+t = {1}
+t[2] = setmetatable({}, {__serialize = 'map'})
+t[3] = {}
+t[4] = {                        \
+    1,                          \
+    2,                          \
+    3,                          \
+    {                           \
+        4,                      \
+        5,                      \
+        6,                      \
+        7,                      \
+        {                       \
+            8,                  \
+            9,                  \
+            {10, 11, 12},       \
+            13,                 \
+            14,                 \
+        },                      \
+        15,                     \
+        16,                     \
+        {17, 18, 19}            \
+    },                          \
+    20,                         \
+    {                           \
+        a = 21,                 \
+        b = 22,                 \
+        c = {                   \
+            d = 23,             \
+            e = 24              \
+        }                       \
+    },                          \
+    25                          \
+}
+t = s:replace(t)
+-- Non-first and non-last field updates.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
+-- Triple intersection with enabled fast jump by route.
+t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
+-- Triple intersection with no optimizations.
+t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
+-- Last operation splits previous ones.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
+-- First two operations generate a route '[4]' in the 4th field of
+-- the tuple. With childs [1] and [2]. The last operation should
+-- delete the route and replace it with a full featured array.
+t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
+-- Test errors.
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
+t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
+t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
+t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
+-- First two operations produce zero length bar update in field
+-- [4][4][5][4]. The third operation should raise an error.
+t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
+
+--
+-- !
+--
+t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
+--
+-- #
+--
+t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
+--
+-- Scalar operations.
+--
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
+-- Combined - check that operations following '!' take into
+-- account that there is a new element. Indexes should be relative
+-- to the new array size.
+t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
+-- Error - an attempt to follow JSON path in a scalar field
+-- during branching.
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
+
 s:drop()
-- 
2.21.0 (Apple Git-122.2)



More information about the Tarantool-patches mailing list