[tarantool-patches] [PATCH v2 7/8] tuple: JSON updates support intersection by arrays

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sun Sep 1 00:35:57 MSK 2019


Before the patch only not intersected JSON updates were supported
as the simplest and fastest to implement. This patch allows paths
to intersect. They can have a common JSON prefix, but difference
should start from an array index.

For example, this is allowed:

    [1][2][3].a.b.c = 20
    [1][2][4].e.f.g = 30

This is not allowed yet:

    [1][2][3].a.b.c = 20
    [1][2][3].a.e.f = 30

    Here first difference is 'b' vs 'e' - intersection by map
    keys.

Architecture of the solution is similar to bar updates: a new
update tree node type is added: UPDATE_ROUTE. When several
update operations have the same prefix, this prefix becomes an
UPDATE_ROUTE tree field. It stores the prefix and a subtree with
these operations.

Bar and route update nodes can branch and produce more bars and
routes, when new operations come.

Part of #1261
---
 src/box/CMakeLists.txt        |   1 +
 src/box/update/update_array.c |  49 +++++
 src/box/update/update_bar.c   |   9 +-
 src/box/update/update_field.c |   4 +
 src/box/update/update_field.h |  93 ++++++++++
 src/box/update/update_route.c | 334 ++++++++++++++++++++++++++++++++++
 test/box/update.result        | 177 ++++++++++++++++++
 test/box/update.test.lua      |  91 +++++++++
 8 files changed, 753 insertions(+), 5 deletions(-)
 create mode 100644 src/box/update/update_route.c

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 2888f4d7d..67461bd9b 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -44,6 +44,7 @@ add_library(tuple STATIC
     update/update_field.c
     update/update_array.c
     update/update_bar.c
+    update/update_route.c
     tuple_compare.cc
     tuple_extract_key.cc
     tuple_hash.cc
diff --git a/src/box/update/update_array.c b/src/box/update/update_array.c
index 262eceafb..2e354ed42 100644
--- a/src/box/update/update_array.c
+++ b/src/box/update/update_array.c
@@ -190,6 +190,55 @@ update_array_create(struct update_field *field, const char *header,
 	return rope_append(field->array.rope, item, field_count);
 }
 
+int
+update_array_create_with_child(struct update_field *field,
+			       const struct update_field *child,
+			       int32_t field_no, const char *header)
+{
+	const char *data = header;
+	uint32_t field_count = mp_decode_array(&data);
+	const char *first_field = data;
+	const char *first_field_end = first_field;
+	mp_next(&first_field_end);
+	struct region *region = &fiber()->gc;
+	struct rope *rope = rope_new(region);
+	if (rope == NULL)
+		return -1;
+	struct update_array_item *item =
+		(struct update_array_item *) rope_alloc(region, sizeof(*item));
+	if (item == NULL)
+		return -1;
+	const char *end = first_field_end;
+	if (field_no > 0) {
+		for (int32_t i = 1; i < field_no; ++i)
+			mp_next(&end);
+		update_array_item_create(item, UPDATE_NOP, first_field,
+					 first_field_end - first_field,
+					 end - first_field_end);
+		if (rope_append(rope, item, field_no) != 0)
+			return -1;
+		item = (struct update_array_item *) rope_alloc(region,
+							       sizeof(*item));
+		if (item == NULL)
+			return -1;
+		first_field = end;
+		first_field_end = first_field;
+		mp_next(&first_field_end);
+		end = first_field_end;
+	}
+	for (uint32_t i = field_no + 1; i < field_count; ++i)
+		mp_next(&end);
+	item->field = *child;
+	update_array_item_create(item, child->type, first_field,
+				 first_field_end - first_field,
+				 end - first_field_end);
+	field->type = UPDATE_ARRAY;
+	field->data = header;
+	field->size = end - header;
+	field->array.rope = rope;
+	return rope_append(rope, item, field_count - field_no);
+}
+
 uint32_t
 update_array_sizeof(struct update_field *field)
 {
diff --git a/src/box/update/update_bar.c b/src/box/update/update_bar.c
index f4fc00716..c8e28347e 100644
--- a/src/box/update/update_bar.c
+++ b/src/box/update/update_bar.c
@@ -250,12 +250,11 @@ do_op_nop_delete(struct update_op *op, struct update_field *field)
 int										\
 do_op_bar_##op_type(struct update_op *op, struct update_field *field)		\
 {										\
-	(void) op;								\
-	(void) field;								\
 	assert(field->type == UPDATE_BAR);					\
-	diag_set(ClientError, ER_UNSUPPORTED, "update",				\
-		 "intersected JSON paths");					\
-	return -1;								\
+	field = update_route_branch(field, op);					\
+	if (field == NULL)							\
+		return -1;							\
+	return do_op_##op_type(op, field);					\
 }
 
 DO_SCALAR_OP_GENERIC(insert)
diff --git a/src/box/update/update_field.c b/src/box/update/update_field.c
index db02959ff..161dd5875 100644
--- a/src/box/update/update_field.c
+++ b/src/box/update/update_field.c
@@ -116,6 +116,8 @@ update_field_sizeof(struct update_field *field)
 		return update_array_sizeof(field);
 	case UPDATE_BAR:
 		return update_bar_sizeof(field);
+	case UPDATE_ROUTE:
+		return update_route_sizeof(field);
 	default:
 		unreachable();
 	}
@@ -142,6 +144,8 @@ update_field_store(struct update_field *field, char *out, char *out_end)
 		return update_array_store(field, out, out_end);
 	case UPDATE_BAR:
 		return update_bar_store(field, out, out_end);
+	case UPDATE_ROUTE:
+		return update_route_store(field, out, out_end);
 	default:
 		unreachable();
 	}
diff --git a/src/box/update/update_field.h b/src/box/update/update_field.h
index ee42ba5ae..81c010615 100644
--- a/src/box/update/update_field.h
+++ b/src/box/update/update_field.h
@@ -266,6 +266,22 @@ enum update_type {
 	 * nodes. And this is the most common case.
 	 */
 	UPDATE_BAR,
+	/**
+	 * Field with a subtree of updates having the same prefix,
+	 * stored here explicitly. New updates with the same
+	 * prefix just follow it without decoding of JSON nor
+	 * MessagePack. It can be quite helpful when an update
+	 * works with the same internal object via several
+	 * operations like this:
+	 *
+	 *    [1][2].a.b.c[3].key1 = 20
+	 *    [1][2].a.b.c[3].key2 = 30
+	 *    [1][2].a.b.c[3].key3 = true
+	 *
+	 * Here [1][2].a.b.c[3] is stored only once as a route
+	 * with a subtree 'key1 = 20', 'key2 = 30', 'key3 = true'.
+	 */
+	UPDATE_ROUTE,
 };
 
 /**
@@ -343,6 +359,17 @@ struct update_field {
 				};
 			};
 		} bar;
+		/** Route update - path to an update subtree. */
+		struct {
+			/**
+			 * Common prefix of all updates in the
+			 * subtree.
+			 */
+			const char *path;
+			int path_len;
+			/** Update subtree. */
+			struct update_field *next_hop;
+		} route;
 	};
 };
 
@@ -408,6 +435,31 @@ update_array_create(struct update_field *field, const char *header,
 		    const char *data, const char *data_end,
 		    uint32_t field_count);
 
+/**
+ * The same as mere create, but the array is created with a first
+ * child already. It allows to make it more effective, because
+ * under the hood a rope is created not as a one huge range which
+ * is then spit in parts, but as two rope nodes from the
+ * beginning. On the summary: -1 rope node split, -1 decoding of
+ * fields to @a field_no.
+ *
+ * The function is used during branching, where there was an
+ * existing update, but another one came with the same prefix, and
+ * a different suffix.
+ *
+ * @param[out] field Field to initialize.
+ * @param child A child subtree.
+ * @param field_no Field number of @a child.
+ * @param header Beginning of the array, MessagePack header.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+int
+update_array_create_with_child(struct update_field *field,
+			       const struct update_field *child,
+			       int32_t field_no, const char *header);
+
 OP_DECL_GENERIC(array)
 
 /* }}} update_field.array */
@@ -424,6 +476,34 @@ OP_DECL_GENERIC(nop)
 
 /* }}} update_field.nop */
 
+/* {{{ update_field.route */
+
+/**
+ * Take a bar or a route @a field and split its path in the place
+ * where @a new_op should be applied. Prefix becomes a new route,
+ * suffix becomes a child of the result field. In the result @a
+ * field stays root of its subtree, and a node of that subtree
+ * is returned, to which @a new_op should be applied.
+ *
+ * Note, this function does not apply @a new_op. It just finds to
+ * where it should be applied and does all preparations. It is
+ * done deliberately, because otherwise do_cb() virtual function
+ * of @a new_op would have been called here, since there is no
+ * context. But a caller always knows exactly if it was insert,
+ * set, arith, etc. And a caller can and does use more specific
+ * function like do_op_set, do_op_insert ...
+ *
+ * @param field Field to find where to apply @a new_op.
+ * @param new_op New operation to apply.
+ * @return A field to which @a new_op should be applied.
+ */
+struct update_field *
+update_route_branch(struct update_field *field, struct update_op *new_op);
+
+OP_DECL_GENERIC(route)
+
+/* }}} update_field.route */
+
 #undef OP_DECL_GENERIC
 
 /* {{{ Common helpers. */
@@ -435,6 +515,17 @@ OP_DECL_GENERIC(nop)
  * Each child can be another array, a bar, a route, a map -
  * anything. The functions below help to make such places shorter
  * and simpler.
+ *
+ * Note, that they are recursive, although it is not clearly
+ * visible. For example, if an update tree contains several array
+ * nodes on one tree branch, then update of the deepest array goes
+ * through each of these nodes and calls do_op_array_<opname> on
+ * needed children. But it is ok, because operation count is
+ * usually small (<<50 in the most cases, usually <= 5), and the
+ * update tree hight is not bigger than operation count. Also,
+ * fiber stack is big enough to fit ~10k update tree depth -
+ * incredible number, even though the real limit is 4k due to
+ * limited number of operations.
  */
 #define OP_DECL_GENERIC(op_type)						\
 static inline int								\
@@ -447,6 +538,8 @@ do_op_##op_type(struct update_op *op, struct update_field *field)		\
 		return do_op_nop_##op_type(op, field);				\
 	case UPDATE_BAR:							\
 		return do_op_bar_##op_type(op, field);				\
+	case UPDATE_ROUTE:							\
+		return do_op_route_##op_type(op, field);			\
 	default:								\
 		unreachable();							\
 	}									\
diff --git a/src/box/update/update_route.c b/src/box/update/update_route.c
new file mode 100644
index 000000000..f1d2ced65
--- /dev/null
+++ b/src/box/update/update_route.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "update_field.h"
+#include "fiber.h"
+#include "box/tuple.h"
+
+/**
+ * Do the actual branch. This is the case when an existing
+ * bar/route path is different from a new operation's path in an
+ * array.
+ * @param[out] next_hop A field which will be initialized as an
+ *        array, and which will be a point to apply a new
+ *        operation.
+ * @param child Current field from which the branch happens. It
+ *        already contains an old update subtree.
+ * @param parent MessagePack array to be taken by @a next_hop.
+ */
+static int
+update_route_branch_array(struct update_field *next_hop,
+			  const struct update_field *child, int32_t field_no,
+			  const char *parent)
+{
+	struct update_op *op = child->bar.op;
+	/*
+	 * There are rules when a subtree can be just copied as is
+	 * from one parent to another.
+	 * 1) It should not be a leaf. If it is not leaf, then it
+	 *    does not change header and other fields of this
+	 *    particular array, and can be safely copied into one
+	 *    of its fields. Otherwise check next rules.
+	 * 2) It should not be a bar. Because if it is not a bar,
+	 *    and is a leaf, then it is scalar. Again, changes
+	 *    only its own field, does not affect anything else,
+	 *    can be copied. Otherwise go down.
+	 * 3) Ok, it is a bar, a leaf. It is a bar with zero path
+	 *    length. In this case it should be scalar bar, it is
+	 *    checked below. The only non-scalar operations are
+	 *    '!' and '#'.
+	 *
+	 * Why '#' and '!' can't be copied? '!', for example,
+	 * being applied to a field [1], affects all fields [2-*],
+	 * and the array header. The same but in an even worse
+	 * form about '#'. Such operations should be redone. They
+	 * affect many fields and the parent.
+	 *
+	 * There is a tricky thing though - why not to just redo
+	 * all operations here, for the sake of code simplicity?
+	 * It would allow to remove 'create_with_child' crutch.
+	 * The answer is - it is not possible. If a field is
+	 * copyable, it is not re-applicable. And vice-versa. For
+	 * example, if it is not a leaf, then there may be many
+	 * operations, not one. A subtree just can't be
+	 * 're-applied'.
+	 *
+	 * If the operation is scalar and a leaf, then its result
+	 * has already overridden its arguments. This is because
+	 * scalar operations save result into the arguments, to
+	 * save memory. A second operation appliance would lead
+	 * to very surprising results.
+	 *
+	 * Another reason - performance. This path should be
+	 * quite hot, and to copy a struct is for sure much faster
+	 * than to reapply an operation using a virtual function.
+	 * Operations '!' and '#' are quite rare, so their
+	 * optimization is not the goal.
+	 */
+	if (child->type != UPDATE_BAR || child->bar.path_len > 0 ||
+	    (op->opcode != '!' && op->opcode != '#')) {
+		return update_array_create_with_child(next_hop, child,
+						      field_no, parent);
+	}
+	op->token_type = JSON_TOKEN_NUM;
+	op->field_no = field_no;
+	const char *data = parent;
+	uint32_t field_count = mp_decode_array(&data);
+	const char *end = data;
+	for (uint32_t i = 0; i < field_count; ++i)
+		mp_next(&end);
+	if (update_array_create(next_hop, parent, data, end, field_count) != 0)
+		return -1;
+	return op->meta->do_cb(op, next_hop);
+}
+
+struct update_field *
+update_route_branch(struct update_field *field, struct update_op *new_op)
+{
+	assert(new_op->lexer.src != NULL);
+	const char *old_path;
+	int old_path_len;
+	if (field->type == UPDATE_BAR) {
+		old_path = field->bar.path;
+		old_path_len = field->bar.path_len;
+	} else {
+		assert(field->type == UPDATE_ROUTE);
+		old_path = field->route.path;
+		old_path_len = field->route.path_len;
+	}
+	assert(old_path != NULL);
+	struct json_lexer old_path_lexer;
+	struct json_token old_token, new_token;
+	/*
+	 * Offset is going to be used as a length of the route
+	 * node created as a parent of the old subtree and the
+	 * new operation. As it is described in the route
+	 * definition in struct update_field - route is the common
+	 * prefix of all operations of the subtree. Here its
+	 * length is calculated.
+	 * In addition, it is used here to determine if the new
+	 * operation is different from the current subtree from
+	 * the very beginning. Basically, it means the offset is
+	 * 0, and no route is generated. Root becomes a regular
+	 * update field (array, map), not a route. Example:
+	 * @a field is a bar '[1].a.b = 20', @a new_op is
+	 * '[2].c.d = 30'. In this case the paths are already
+	 * different from the beginning, no common prefix = no
+	 * route. Array with children [1].a.b and [2].c.d becomes
+	 * a root.
+	 */
+	int saved_old_offset;
+	json_lexer_create(&old_path_lexer, old_path, old_path_len,
+			  TUPLE_INDEX_BASE);
+	const char *parent = field->data;
+	int rc;
+	do {
+		saved_old_offset = old_path_lexer.offset;
+		rc = json_lexer_next_token(&old_path_lexer, &old_token);
+		/* Old path is already validated. */
+		assert(rc == 0);
+		rc = json_lexer_next_token(&new_op->lexer, &new_token);
+		if (rc != 0) {
+			update_err_bad_json(new_op, rc);
+			return NULL;
+		}
+		if (json_token_cmp(&old_token, &new_token) != 0)
+			break;
+		switch(new_token.type) {
+		case JSON_TOKEN_NUM:
+			rc = tuple_field_go_to_index(&parent, new_token.num);
+			break;
+		case JSON_TOKEN_STR:
+			rc = tuple_field_go_to_key(&parent, new_token.str,
+						   new_token.len);
+			break;
+		default:
+			/*
+			 * Can't be ANY, because old and new
+			 * tokens are equal, but '*' is invalid in
+			 * paths and old was already checked for
+			 * that.
+			 */
+			assert(new_token.type == JSON_TOKEN_END);
+			update_err_double(new_op);
+			return NULL;
+		}
+		/*
+		 * Must always find a field, because old
+		 * token already went that path.
+		 */
+		assert(rc == 0);
+	} while (true);
+	enum mp_type type = mp_typeof(*parent);
+	/*
+	 * The paths are different from the very beginning. It
+	 * means, that the old field should be transformed instead
+	 * of creating a new route node.
+	 */
+	bool transform_root = saved_old_offset == 0;
+	struct update_field *next_hop;
+	if (! transform_root) {
+		next_hop = (struct update_field *)
+			region_alloc(&fiber()->gc, sizeof(*next_hop));
+		if (next_hop == NULL) {
+			diag_set(OutOfMemory, sizeof(*next_hop), "region_alloc",
+				 "next_hop");
+			return NULL;
+		}
+	} else {
+		next_hop = field;
+	}
+	if (type == MP_MAP) {
+		diag_set(ClientError, ER_UNSUPPORTED, "update",
+			 "path intersection on map");
+		return NULL;
+	}
+	if (new_token.type != JSON_TOKEN_NUM) {
+		update_err(new_op, "can not update array by non-integer index");
+		return NULL;
+	}
+	if (type != MP_ARRAY) {
+		update_err_no_such_field(new_op);
+		return NULL;
+	}
+
+	int path_offset = old_path_lexer.offset;
+	struct update_field child = *field;
+	if (child.type == UPDATE_ROUTE) {
+		child.route.path += path_offset;
+		child.route.path_len -= path_offset;
+		if (child.route.path_len == 0)
+			child = *child.route.next_hop;
+	} else {
+		assert(child.type == UPDATE_BAR);
+		child.bar.path += path_offset;
+		child.bar.path_len -= path_offset;
+		/*
+		 * Yeah, bar length can become 0 here, but it is
+		 * ok as long as it is not a non-scalar operation
+		 * like '!' or '#'. In these cases it changes the
+		 * parent too. When a bar is scalar ('=', arith,
+		 * splice, ...), it operates on one concrete field
+		 * and works even if its path len is 0. Of course,
+		 * it is not a big deal to fix such bar here
+		 * making it scalar, but it would be additional
+		 * branching on an actually hot path.
+		 * Talking of '#' and '!' - they are handled by
+		 * array and map 'branchers' internally, below.
+		 */
+	}
+
+	new_op->token_type = JSON_TOKEN_NUM;
+	new_op->field_no = new_token.num;
+	if (update_route_branch_array(next_hop, &child, old_token.num,
+				      parent) != 0)
+		return NULL;
+	if (! transform_root) {
+		field->type = UPDATE_ROUTE;
+		field->route.path = old_path;
+		field->route.path_len = saved_old_offset;
+		field->route.next_hop = next_hop;
+	}
+	return next_hop;
+}
+
+/**
+ * Obtain a next node of the update tree to which @a op should be
+ * propagated. It is the same as branch, but has a fast path in
+ * case of @a field is route, and operation prefix matches this
+ * route - then no need to parse JSON and dive into MessagePack,
+ * the route is just followed, via a lexer offset increase.
+ *
+ * @param field Field to find where to apply @a op.
+ * @param op New operation to apply.
+ * @return A field to which @a op should be applied.
+ */
+static struct update_field *
+update_route_next(struct update_field *field, struct update_op *op)
+{
+	assert(field->type == UPDATE_ROUTE);
+	assert(! update_op_is_term(op));
+	const char *new_path = op->lexer.src + op->lexer.offset;
+	int new_path_len = op->lexer.src_len - op->lexer.offset;
+	if (field->route.path_len <= new_path_len &&
+	    memcmp(field->route.path, new_path, field->route.path_len) == 0) {
+		/*
+		 * Fast path: jump to the next hop with no
+		 * decoding. Is used, when several JSON updates
+		 * have the same prefix.
+		 */
+		op->lexer.offset += field->route.path_len;
+		return field->route.next_hop;
+	}
+	return update_route_branch(field, op);
+}
+
+#define DO_SCALAR_OP_GENERIC(op_type)						\
+int										\
+do_op_route_##op_type(struct update_op *op, struct update_field *field)		\
+{										\
+	assert(field->type == UPDATE_ROUTE);					\
+	struct update_field *next_hop = update_route_next(field, op);		\
+	if (next_hop == NULL)							\
+		return -1;							\
+	return do_op_##op_type(op, next_hop);					\
+}
+
+DO_SCALAR_OP_GENERIC(set)
+
+DO_SCALAR_OP_GENERIC(insert)
+
+DO_SCALAR_OP_GENERIC(delete)
+
+DO_SCALAR_OP_GENERIC(arith)
+
+DO_SCALAR_OP_GENERIC(bit)
+
+DO_SCALAR_OP_GENERIC(splice)
+
+uint32_t
+update_route_sizeof(struct update_field *field)
+{
+	return field->size - field->route.next_hop->size +
+	       update_field_sizeof(field->route.next_hop);
+}
+
+uint32_t
+update_route_store(struct update_field *field, char *out, char *out_end)
+{
+	char *saved_out = out;
+	int before_hop = field->route.next_hop->data - field->data;
+	memcpy(out, field->data, before_hop);
+	out += before_hop;
+	out += update_field_store(field->route.next_hop, out, out_end);
+	int after_hop = before_hop + field->route.next_hop->size;
+	memcpy(out, field->data + after_hop, field->size - after_hop);
+	return out + field->size - after_hop - saved_out;
+}
diff --git a/test/box/update.result b/test/box/update.result
index 38b8bc13e..2c5978d8a 100644
--- a/test/box/update.result
+++ b/test/box/update.result
@@ -1294,6 +1294,183 @@ vy_s:select()
 vy_s:drop()
 ---
 ...
+--
+-- Complex updates. Intersected paths, different types.
+--
+--
+-- =
+--
+-- Difference in the last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 375], [400,
+        450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Different starting from the non-last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+      'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 350], [400,
+        475]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Contains string keys in the middle.
+t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4000, 5000, 6, 7, 8],
+      'e': 500, 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350],
+      [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t = {1}
+---
+...
+t[2] = setmetatable({}, {__serialize = 'map'})
+---
+...
+t[3] = {}
+---
+...
+t[4] = {                        \
+    1,                          \
+    2,                          \
+    3,                          \
+    {                           \
+        4,                      \
+        5,                      \
+        6,                      \
+        7,                      \
+        {                       \
+            8,                  \
+            9,                  \
+            {10, 11, 12},       \
+            13,                 \
+            14,                 \
+        },                      \
+        15,                     \
+        16,                     \
+        {17, 18, 19}            \
+    },                          \
+    20,                         \
+    {                           \
+        a = 21,                 \
+        b = 22,                 \
+        c = {                   \
+            d = 23,             \
+            e = 24              \
+        }                       \
+    },                          \
+    25                          \
+}
+---
+...
+t = s:replace(t)
+---
+...
+-- Non-first and non-last field updates.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6, 7, [8, 9, [10, 11000, 12], 13, 14], 15, 16, [17,
+        18, 19000]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Triple intersection with enabled fast jump by route.
+t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9000, [10, 11, 12], 13, 14], 15, 16, [
+        17, 18000, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Triple intersection with no optimizations.
+t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
+---
+- [1, {}, [], [1, 2, 3000, [4, 5000, 6, 7, [8000, 9, [10, 11, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Last operation splits previous ones.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9, [10000, 11000, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- First two operations generate a route '[4]' in the 4th field of
+-- the tuple. With childs [1] and [2]. The last operation should
+-- delete the route and replace it with a full featured array.
+t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
+---
+- [1, {}, [], [1, 2, 3.5, [4.5, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16, [
+        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Test errors.
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
+---
+- error: 'Field ''[4][4][5].a'' UPDATE error: can not update array by non-integer
+    index'
+...
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
+---
+- error: 'Field ''[4][4][*].a'' UPDATE error: can not update array by non-integer
+    index'
+...
+t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
+---
+- error: 'Field ''[4][4][*].b'' UPDATE error: invalid JSON in position 8'
+...
+t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
+---
+- error: 'Field ''[4][4]-badjson'' UPDATE error: invalid JSON in position 7'
+...
+t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
+---
+- error: 'Field ''[4][4][1]'' UPDATE error: double update of the same field'
+...
+-- First two operations produce zero length bar update in field
+-- [4][4][5][4]. The third operation should raise an error.
+t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
+---
+- error: 'Field ''[4][4][5][4]'' UPDATE error: double update of the same field'
+...
+--
+-- !
+--
+t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5.25, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16,
+      [17, 18, 19]], 19.5, 19.75, 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}},
+    25]]
+...
+--
+-- #
+--
+t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
+---
+- [1, {}, [], [1, 2, 3, [4, 5, 6, 15, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {
+        'd': 23, 'e': 24}}, 25]]
+...
+--
+-- Scalar operations.
+--
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
+---
+- [1, {}, [], [1, 2, 3.5, [4, 5, 6, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16, [
+        17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
+---
+- [1, {}, [], [1, 2, 3.5, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16,
+      [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Combined - check that operations following '!' take into
+-- account that there is a new element. Indexes should be relative
+-- to the new array size.
+t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
+---
+- [1, {}, [], [1, 2, 2.5, 3, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15,
+      16, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]]
+...
+-- Error - an attempt to follow JSON path in a scalar field
+-- during branching.
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
+---
+- error: Field ''[4][3][2]'' was not found in the tuple
+...
 s:drop()
 ---
 ...
diff --git a/test/box/update.test.lua b/test/box/update.test.lua
index 60e669d27..d91a2e95c 100644
--- a/test/box/update.test.lua
+++ b/test/box/update.test.lua
@@ -424,4 +424,95 @@ box.commit()
 vy_s:select()
 vy_s:drop()
 
+--
+-- Complex updates. Intersected paths, different types.
+--
+
+--
+-- =
+--
+-- Difference in the last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}})
+-- Different starting from the non-last part.
+t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}})
+-- Contains string keys in the middle.
+t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}})
+t = {1}
+t[2] = setmetatable({}, {__serialize = 'map'})
+t[3] = {}
+t[4] = {                        \
+    1,                          \
+    2,                          \
+    3,                          \
+    {                           \
+        4,                      \
+        5,                      \
+        6,                      \
+        7,                      \
+        {                       \
+            8,                  \
+            9,                  \
+            {10, 11, 12},       \
+            13,                 \
+            14,                 \
+        },                      \
+        15,                     \
+        16,                     \
+        {17, 18, 19}            \
+    },                          \
+    20,                         \
+    {                           \
+        a = 21,                 \
+        b = 22,                 \
+        c = {                   \
+            d = 23,             \
+            e = 24              \
+        }                       \
+    },                          \
+    25                          \
+}
+t = s:replace(t)
+-- Non-first and non-last field updates.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}})
+-- Triple intersection with enabled fast jump by route.
+t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}})
+-- Triple intersection with no optimizations.
+t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}})
+-- Last operation splits previous ones.
+t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}})
+-- First two operations generate a route '[4]' in the 4th field of
+-- the tuple. With childs [1] and [2]. The last operation should
+-- delete the route and replace it with a full featured array.
+t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}})
+-- Test errors.
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}})
+t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}})
+t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}})
+t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}})
+t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}})
+-- First two operations produce zero length bar update in field
+-- [4][4][5][4]. The third operation should raise an error.
+t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}})
+
+--
+-- !
+--
+t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}})
+--
+-- #
+--
+t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}})
+--
+-- Scalar operations.
+--
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}})
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}})
+-- Combined - check that operations following '!' take into
+-- account that there is a new element. Indexes should be relative
+-- to the new array size.
+t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}})
+-- Error - an attempt to follow JSON path in a scalar field
+-- during branching.
+t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}})
+
 s:drop()
-- 
2.20.1 (Apple Git-117)





More information about the Tarantool-patches mailing list