From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp40.i.mail.ru (smtp40.i.mail.ru [94.100.177.100]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C2C4646970E for ; Fri, 27 Dec 2019 17:13:26 +0300 (MSK) Date: Fri, 27 Dec 2019 17:13:25 +0300 From: Sergey Ostanevich Message-ID: <20191227141325.GA1222@tarantool.org> References: MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: Subject: Re: [Tarantool-patches] [PATCH 2/3] tuple: JSON path update intersection at arrays List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy Cc: tarantool-patches@dev.tarantool.org Hi! Thanks for the patch, just a couple of comments below, LGTM Sergos On 23 Dec 23:41, Vladislav Shpilevoy wrote: > Before the patch only isolated JSON updates were supported as the > simplest and fastest to implement. This patch allows update > operations with paths having the same prefix. But difference of > the paths should start from an array index. > > For example, this is allowed: > > [1][2][3].a.b.c = 20 > [1][2][4].e.f.g = 30 > > First difference is [3] vs [4] - intersection by an array, ok. > > This is not allowed yet: > > [1][2][3].a.b.c = 20 > [1][2][3].a.e.f = 30 > > First difference is 'b' vs 'e' - intersection by a map, > not ok. > > For that a new update tree node type is added - XUPDATE_ROUTE. > When several update operations have the same prefix, this prefix > becomes an XUPDATE_ROUTE tree field. It stores the prefix and a > subtree with these operations. > > Bar and route update nodes can branch and produce more bars and > routes, when new operations come. > > Part of #1261 > --- > src/box/CMakeLists.txt | 1 + > src/box/xrow_update_array.c | 50 ++++++ > src/box/xrow_update_bar.c | 14 +- > src/box/xrow_update_field.c | 4 + > src/box/xrow_update_field.h | 99 +++++++++++ > src/box/xrow_update_route.c | 346 ++++++++++++++++++++++++++++++++++++ > test/box/update.result | 177 ++++++++++++++++++ > test/box/update.test.lua | 91 ++++++++++ > 8 files changed, 772 insertions(+), 10 deletions(-) > create mode 100644 src/box/xrow_update_route.c > > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > index fc9d1a3e8..f99efc503 100644 > --- a/src/box/CMakeLists.txt > +++ b/src/box/CMakeLists.txt > @@ -44,6 +44,7 @@ add_library(tuple STATIC > xrow_update_field.c > xrow_update_array.c > xrow_update_bar.c > + xrow_update_route.c > tuple_compare.cc > tuple_extract_key.cc > tuple_hash.cc > diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c > index 6a7ce09ff..e49dad5c1 100644 > --- a/src/box/xrow_update_array.c > +++ b/src/box/xrow_update_array.c > @@ -195,6 +195,56 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header, > return xrow_update_rope_append(field->array.rope, item, field_count); > } > > +int > +xrow_update_array_create_with_child(struct xrow_update_field *field, > + const char *header, > + const struct xrow_update_field *child, > + int32_t field_no) > +{ > + const char *data = header; > + uint32_t field_count = mp_decode_array(&data); > + const char *first_field = data; > + const char *first_field_end = first_field; > + mp_next(&first_field_end); > + struct region *region = &fiber()->gc; > + struct xrow_update_rope *rope = xrow_update_rope_new(region); > + if (rope == NULL) > + return -1; > + struct xrow_update_array_item *item = (struct xrow_update_array_item *) > + xrow_update_alloc(region, sizeof(*item)); > + if (item == NULL) > + return -1; > + const char *end = first_field_end; > + if (field_no > 0) { > + for (int32_t i = 1; i < field_no; ++i) > + mp_next(&end); > + xrow_update_array_item_create(item, XUPDATE_NOP, first_field, > + first_field_end - first_field, > + end - first_field_end); > + if (xrow_update_rope_append(rope, item, field_no) != 0) > + return -1; > + item = (struct xrow_update_array_item *) > + xrow_update_alloc(region, sizeof(*item)); > + if (item == NULL) > + return -1; > + first_field = end; > + first_field_end = first_field; > + mp_next(&first_field_end); > + end = first_field_end; > + } > + for (uint32_t i = field_no + 1; i < field_count; ++i) > + mp_next(&end); > + item->field = *child; > + xrow_update_array_item_create(item, child->type, first_field, > + first_field_end - first_field, > + end - first_field_end); > + field->type = XUPDATE_ARRAY; > + field->data = header; > + field->size = end - header; > + field->array.rope = rope; > + return xrow_update_rope_append(rope, item, field_count - field_no); > +} > + > uint32_t > xrow_update_array_sizeof(struct xrow_update_field *field) > { > diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c > index 7285c7e2b..1c2d2ef99 100644 > --- a/src/box/xrow_update_bar.c > +++ b/src/box/xrow_update_bar.c > @@ -307,17 +307,11 @@ int \ > xrow_update_op_do_bar_##op_type(struct xrow_update_op *op, \ > struct xrow_update_field *field) \ > { \ > - /* \ > - * The only way to update a bar is to make a second update \ > - * with the same prefix as this bar. But it is not \ > - * supported yet. \ > - */ \ > - (void) op; \ > - (void) field; \ > assert(field->type == XUPDATE_BAR); \ > - diag_set(ClientError, ER_UNSUPPORTED, "update", \ > - "intersected JSON paths"); \ > - return -1; \ > + field = xrow_update_route_branch(field, op); \ > + if (field == NULL) \ > + return -1; \ > + return xrow_update_op_do_field_##op_type(op, field); \ > } > > DO_BAR_OP_GENERIC(insert) > diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c > index 96fcaf747..3a4aa6c90 100644 > --- a/src/box/xrow_update_field.c > +++ b/src/box/xrow_update_field.c > @@ -114,6 +114,8 @@ xrow_update_field_sizeof(struct xrow_update_field *field) > return xrow_update_array_sizeof(field); > case XUPDATE_BAR: > return xrow_update_bar_sizeof(field); > + case XUPDATE_ROUTE: > + return xrow_update_route_sizeof(field); > default: > unreachable(); > } > @@ -141,6 +143,8 @@ xrow_update_field_store(struct xrow_update_field *field, char *out, > return xrow_update_array_store(field, out, out_end); > case XUPDATE_BAR: > return xrow_update_bar_store(field, out, out_end); > + case XUPDATE_ROUTE: > + return xrow_update_route_store(field, out, out_end); > default: > unreachable(); > } > diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h > index fbaf45c5d..f07b5a649 100644 > --- a/src/box/xrow_update_field.h > +++ b/src/box/xrow_update_field.h > @@ -275,6 +275,23 @@ enum xrow_update_type { > * path nodes. And this is the most common case. > */ > XUPDATE_BAR, > + /** > + * Field with a subtree of updates having the same prefix > + * stored here explicitly. New updates with the same > + * prefix just follow it without decoding of JSON nor > + * MessagePack. It can be quite helpful when an update > + * works with the same internal object via several > + * operations like this: > + * > + * [1][2].a.b.c[1] = 20 > + * [1][2].a.b.c[2] = 30 > + * [1][2].a.b.c[3] = true > + * > + * Here [1][2].a.b.c is stored only once as a route with a > + * child XUPDATE_ARRAY making updates '[1] = 20', > + * '[2] = 30', '[3] = true'. > + */ > + XUPDATE_ROUTE, > }; > > /** > @@ -355,6 +372,17 @@ struct xrow_update_field { > }; > }; > } bar; > + /** Route update - path to an update subtree. */ > + struct { > + /** > + * Common prefix of all updates in the > + * subtree. > + */ > + const char *path; > + int path_len; > + /** Update subtree. */ > + struct xrow_update_field *next_hop; > + } route; > }; > }; > > @@ -447,6 +475,33 @@ xrow_update_array_create(struct xrow_update_field *field, const char *header, > const char *data, const char *data_end, > uint32_t field_count); > > +/** > + * The same as mere create, but the array is created with a first > + * child right away. It allows to make it more efficient, because > + * under the hood a rope is created not as a one huge range which > + * is then spit in parts, but as two rope nodes from the ^^^^ Should be split here? > + * beginning. On the summary: -1 rope node split, -1 decoding of > + * fields from 0 to @a field_no. > + * > + * The function is used during branching, where there was an > + * existing update, but another one came with the same prefix, and > + * a different suffix. > + * > + * @param[out] field Field to initialize. > + * @param header Header of the MessagePack array. > + * @param child A child subtree. The child is copied by value into > + * the created array. > + * @param field_no Field number of @a child. > + * > + * @retval 0 Success. > + * @retval -1 Error. > + */ > +int > +xrow_update_array_create_with_child(struct xrow_update_field *field, > + const char *header, > + const struct xrow_update_field *child, > + int32_t field_no); > + > OP_DECL_GENERIC(array) > > /* }}} xrow_update_field.array */ > @@ -463,6 +518,37 @@ OP_DECL_GENERIC(nop) > > /* }}} update_field.nop */ > > +/* {{{ xrow_update_field.route */ > + > +/** > + * Take a bar or a route @a field and split its path in the place > + * where @a new_op should be applied. Prefix becomes a new route > + * object, suffix becomes a child of the result route. In the > + * result @a field stays root of its subtree, and a node of that > + * subtree is returned, to which @a new_op should be applied. > + * > + * Note, this function does not apply @a new_op. It just finds to > + * where it *should be* applied and does all preparations. It is > + * done deliberately, because otherwise do_cb() virtual function > + * of @a new_op would have been called here, since there is no > + * context. But a caller always knows exactly if it was insert, > + * set, arith, etc. And a caller can and does use more specific > + * function like xrow_update_op_do_field_set/insert/... . > + * > + * @param field Field to find where to apply @a new_op. > + * @param new_op New operation to apply. > + * > + * @retval not-NULL A field to which @a new_op should be applied. > + * @retval NULL Error. > + */ > +struct xrow_update_field * > +xrow_update_route_branch(struct xrow_update_field *field, > + struct xrow_update_op *new_op); > + > +OP_DECL_GENERIC(route) > + > +/* }}} xrow_update_field.route */ > + > #undef OP_DECL_GENERIC > > /* {{{ Common helpers. */ > @@ -475,6 +561,17 @@ OP_DECL_GENERIC(nop) > * xrow_update_op_do_field_set on its childs. Each child can be > * another array, a bar, a route, a map - anything. The functions > * below help to make such places shorter and simpler. > + * > + * Note, that they are recursive, although it is not clearly > + * visible. For example, if an update tree contains several array > + * nodes on one tree branch, then update of the deepest array goes > + * through each of these nodes and calls > + * xrow_update_op_do_array_() on needed children. But it > + * is ok, because operation count is usually small (<<50 in the > + * most cases, usually <= 5), and the update tree depth is not > + * bigger than operation count. Also, fiber stack is big enough to > + * fit ~10k update tree depth - incredible number, even though the > + * real limit is 4k due to limited number of operations. > */ > #define OP_DECL_GENERIC(op_type) \ > static inline int \ > @@ -488,6 +585,8 @@ xrow_update_op_do_field_##op_type(struct xrow_update_op *op, \ > return xrow_update_op_do_nop_##op_type(op, field); \ > case XUPDATE_BAR: \ > return xrow_update_op_do_bar_##op_type(op, field); \ > + case XUPDATE_ROUTE: \ > + return xrow_update_op_do_route_##op_type(op, field); \ > default: \ > unreachable(); \ > } \ > diff --git a/src/box/xrow_update_route.c b/src/box/xrow_update_route.c > new file mode 100644 > index 000000000..f374db837 > --- /dev/null > +++ b/src/box/xrow_update_route.c > @@ -0,0 +1,346 @@ > +/* > + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * 1. Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the > + * following disclaimer. > + * > + * 2. Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED > + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL > + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, > + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF > + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR > + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF > + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF > + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > + * SUCH DAMAGE. > + */ > +#include "xrow_update_field.h" > +#include "fiber.h" > +#include "tuple.h" > + > +/** > + * Do the actual branch. This is the case when an existing > + * bar/route path is different from a new operation's path in an > + * array. The existing object needs to be split into parent-child, > + * and the new operation becomes a second child. > + * > + * @param[out] next_hop A field which will be initialized as an > + * array, and which will be a point to apply a new > + * operation. > + * @param parent MessagePack array to be taken by @a next_hop. > + * @param child Current field from which the branch happens. It > + * already contains an update subtree. > + */ > +static int > +xrow_update_route_branch_array(struct xrow_update_field *next_hop, > + const char *parent, > + const struct xrow_update_field *child, > + int32_t field_no) > +{ > + struct xrow_update_op *op = child->bar.op; > + /* > + * There are rules when a subtree can be just copied as is I would name these as limitations, not rules. > + * from one parent to another. > + * 1) It should not be a bar update. Because if it is not > + * a bar, then it is either scalar or an array/map. > + * Scalar update can be safely moved. Array/map update > + * doesn't change their parent, and also can be moved. > + * Otherwise see (2). > + * 2) It is a bar. Then it should not be a leaf. If it is > + * not a leaf, then it does not change header and other > + * fields of this particular array, and can be safely > + * moved to somewhere else. > + * Otherwise see (1). ^^^ (3) I suppose? > + * 3) Ok, it is a bar, a leaf. Then it is a bar with zero > + * path length. It could degrade to zero path len in > + * during branching. In this case it should be a scalar > + * bar. The only non-scalar operations are '!' and '#'. > + * > + * Why '#' and '!' can't be moved? '!', for example, being > + * applied to a field [1], affects all fields [2-*], and > + * the array header. The same but in an even worse form > + * about '#'. Such operations should be redone. They > + * affect many fields and the parent. > + * > + * There is a tricky thing though - why not to just redo > + * all operations here, for the sake of code simplicity? > + * It would allow to remove 'create_with_child' crutch. > + * The answer is - it is not possible. If a field is > + * copyable, it is not re-applicable. And vice-versa. For > + * example, if it is not a leaf, then there may be many > + * operations, not one. A subtree just can't be > + * 're-applied'. > + * > + * If the operation is scalar and a leaf, then its result > + * has already overridden its arguments. This is because > + * scalar operations save result into the arguments, to > + * save memory. A second operation appliance would lead > + * to very surprising results. > + * > + * Another reason - performance. This path should be > + * quite hot, and to copy a struct is for sure much faster > + * than to reapply an operation using a virtual function. > + * Operations '!' and '#' are quite rare, so their > + * optimization is not a critical goal. > + */ > + if (/* (1) Not a bar. */ > + child->type != XUPDATE_BAR || > + /* (2) Bar, but not a leaf. */ > + child->bar.path_len > 0 || > + /* (3) Leaf, bar, but a scalar operation. */ > + (op->opcode != '!' && op->opcode != '#')) { > + return xrow_update_array_create_with_child(next_hop, parent, > + child, field_no); > + } > + /* > + * Can't move the child. The only way to branch is to > + * reapply the child's operation. > + */ > + op->token_type = JSON_TOKEN_NUM; > + op->field_no = field_no; > + const char *data = parent; > + uint32_t field_count = mp_decode_array(&data); > + const char *end = data; > + for (uint32_t i = 0; i < field_count; ++i) > + mp_next(&end); > + if (xrow_update_array_create(next_hop, parent, data, end, > + field_count) != 0) > + return -1; > + return op->meta->do_op(op, next_hop); > +} > + > +struct xrow_update_field * > +xrow_update_route_branch(struct xrow_update_field *field, > + struct xrow_update_op *new_op) > +{ > + assert(new_op->lexer.src != NULL); > + const char *old_path; > + int old_path_len; > + if (field->type == XUPDATE_BAR) { > + old_path = field->bar.path; > + old_path_len = field->bar.path_len; > + } else { > + assert(field->type == XUPDATE_ROUTE); > + old_path = field->route.path; > + old_path_len = field->route.path_len; > + } > + assert(old_path != NULL); > + struct json_lexer old_path_lexer; > + struct json_token old_token, new_token; > + /* > + * Offset is going to be used as a length of the route > + * node created as a parent of the old subtree and the > + * new operation. As it is described in the route > + * definition in struct xrow_update_field - route is the > + * common prefix of all operations of the subtree. Here > + * length of that prefix is calculated. > + * In addition, it is used here to determine if the new > + * operation is different from the current subtree from > + * the very beginning. Basically, it means the offset is > + * 0, and no route is generated. Root becomes a regular > + * update field (array, map), not a route. Example: > + * @a field is a bar '[1].a.b = 20', @a new_op is > + * '[2].c.d = 30'. In this case the paths are already > + * different from the beginning, no common prefix = no > + * route. Array with children [1].a.b and [2].c.d becomes > + * a root. > + */ > + int saved_old_offset; > + json_lexer_create(&old_path_lexer, old_path, old_path_len, > + TUPLE_INDEX_BASE); > + const char *parent = field->data; > + do { > + saved_old_offset = old_path_lexer.offset; > + int rc = json_lexer_next_token(&old_path_lexer, &old_token); > + /* Old path is already validated. */ > + assert(rc == 0); > + rc = json_lexer_next_token(&new_op->lexer, &new_token); > + if (rc != 0) { > + xrow_update_err_bad_json(new_op, rc); > + return NULL; > + } > + if (json_token_cmp(&old_token, &new_token) != 0) > + break; > + switch(new_token.type) { > + case JSON_TOKEN_NUM: > + rc = tuple_field_go_to_index(&parent, new_token.num); > + break; > + case JSON_TOKEN_STR: > + rc = tuple_field_go_to_key(&parent, new_token.str, > + new_token.len); > + break; > + default: > + /* > + * Can't be JSON_TOKEN_ANY, because old > + * and new tokens are equal, but '*' is > + * considered invalid and the old was > + * already checked for that. So the new is > + * valid too. And can't have type ANY. > + */ > + assert(new_token.type == JSON_TOKEN_END); > + xrow_update_err_double(new_op); > + return NULL; > + } > + /* > + * Must always find a field, because the old > + * token already went that path. > + */ > + assert(rc == 0); > + } while (true); > + enum mp_type type = mp_typeof(*parent); > + /* > + * Ched if the paths are different from the very ^^^^ check(ed)? > + * beginning. It means, that the old field should be > + * transformed instead of creating a new route node. > + */ > + bool transform_root = saved_old_offset == 0; It would be nicer if you put comparison in () up there > + struct xrow_update_field *next_hop; > + if (! transform_root) { No spaces after unaries, use sed -ie "s/(! /\(\!/" > + next_hop = (struct xrow_update_field *) > + region_alloc(&fiber()->gc, sizeof(*next_hop)); > + if (next_hop == NULL) { > + diag_set(OutOfMemory, sizeof(*next_hop), "region_alloc", > + "next_hop"); > + return NULL; > + } > + } else { > + next_hop = field; > + } > + > + int path_offset = old_path_lexer.offset; > + struct xrow_update_field child = *field; > + if (child.type == XUPDATE_ROUTE) { > + child.route.path += path_offset; > + child.route.path_len -= path_offset; > + if (child.route.path_len == 0) > + child = *child.route.next_hop; > + } else { > + assert(child.type == XUPDATE_BAR); > + child.bar.path += path_offset; > + child.bar.path_len -= path_offset; > + /* > + * Yeah, bar length can become 0 here, but it is > + * ok as long as it is a scalar operation (not '!' > + * and not '#'). When a bar is scalar, it operates > + * on one concrete field and works even if its > + * path len is 0. Talking of '#' and '!' - they > + * are handled by array and map 'branchers' > + * internally, below. They reapply such > + * operations. > + */ > + } > + > + if (type == MP_ARRAY) { > + if (new_token.type != JSON_TOKEN_NUM) { > + xrow_update_err(new_op, "can not update array by "\ > + "non-integer index"); > + return NULL; > + } > + new_op->token_type = JSON_TOKEN_NUM; > + new_op->field_no = new_token.num; > + if (xrow_update_route_branch_array(next_hop, parent, &child, > + old_token.num) != 0) > + return NULL; > + } else if (type == MP_MAP) { > + diag_set(ClientError, ER_UNSUPPORTED, "update", > + "path intersection on map"); > + return NULL; > + } else { > + xrow_update_err_no_such_field(new_op); > + return NULL; > + } > + > + if (! transform_root) { > + field->type = XUPDATE_ROUTE; > + field->route.path = old_path; > + field->route.path_len = saved_old_offset; > + field->route.next_hop = next_hop; > + } > + return next_hop; > +} > + > +/** > + * Obtain a next node of the update tree to which @a op should be > + * propagated. It is the same as branch, but has a fast path in > + * case @a field is route, and operation prefix matches this > + * route - then no need to parse JSON and dive into MessagePack, > + * the route is just followed, via a lexer offset increase. > + */ > +static struct xrow_update_field * > +xrow_update_route_next(struct xrow_update_field *field, struct xrow_update_op *op) > +{ > + assert(field->type == XUPDATE_ROUTE); > + assert(! xrow_update_op_is_term(op)); > + const char *new_path = op->lexer.src + op->lexer.offset; > + int new_path_len = op->lexer.src_len - op->lexer.offset; > + if (field->route.path_len <= new_path_len && > + memcmp(field->route.path, new_path, field->route.path_len) == 0) { > + /* > + * Fast path: jump to the next hop with no > + * decoding. Is used, when several JSON updates > + * have the same prefix. > + */ > + op->lexer.offset += field->route.path_len; > + return field->route.next_hop; > + } > + return xrow_update_route_branch(field, op); > +} > + > +#define DO_SCALAR_OP_GENERIC(op_type) \ > +int \ > +xrow_update_op_do_route_##op_type(struct xrow_update_op *op, \ > + struct xrow_update_field *field) \ > +{ \ > + assert(field->type == XUPDATE_ROUTE); \ > + struct xrow_update_field *next_hop = xrow_update_route_next(field, op); \ > + if (next_hop == NULL) \ > + return -1; \ > + return xrow_update_op_do_field_##op_type(op, next_hop); \ > +} > + > +DO_SCALAR_OP_GENERIC(set) > + > +DO_SCALAR_OP_GENERIC(insert) > + > +DO_SCALAR_OP_GENERIC(delete) > + > +DO_SCALAR_OP_GENERIC(arith) > + > +DO_SCALAR_OP_GENERIC(bit) > + > +DO_SCALAR_OP_GENERIC(splice) > + > +uint32_t > +xrow_update_route_sizeof(struct xrow_update_field *field) > +{ > + return field->size - field->route.next_hop->size + > + xrow_update_field_sizeof(field->route.next_hop); > +} > + > +uint32_t > +xrow_update_route_store(struct xrow_update_field *field, char *out, > + char *out_end) > +{ > + char *saved_out = out; > + int before_hop = field->route.next_hop->data - field->data; > + memcpy(out, field->data, before_hop); > + out += before_hop; > + out += xrow_update_field_store(field->route.next_hop, out, out_end); > + int after_hop = before_hop + field->route.next_hop->size; > + memcpy(out, field->data + after_hop, field->size - after_hop); > + return out + field->size - after_hop - saved_out; > +} > diff --git a/test/box/update.result b/test/box/update.result > index ee38c100f..bf76f6638 100644 > --- a/test/box/update.result > +++ b/test/box/update.result > @@ -1294,6 +1294,183 @@ vy_s:select() > vy_s:drop() > --- > ... > +-- > +-- Complex updates. Intersected paths, different types. > +-- > +-- > +-- = > +-- > +-- Difference in the last part. > +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}}) > +--- > +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500, > + 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 375], [400, > + 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]] > +... > +-- Different starting from the non-last part. > +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}}) > +--- > +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500, > + 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[325, 350], [400, > + 475]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]] > +... > +-- Contains string keys in the middle. > +t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}}) > +--- > +- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4000, 5000, 6, 7, 8], > + 'e': 500, 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], > + [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]] > +... > +t = {1} > +--- > +... > +t[2] = setmetatable({}, {__serialize = 'map'}) > +--- > +... > +t[3] = {} > +--- > +... > +t[4] = { \ > + 1, \ > + 2, \ > + 3, \ > + { \ > + 4, \ > + 5, \ > + 6, \ > + 7, \ > + { \ > + 8, \ > + 9, \ > + {10, 11, 12}, \ > + 13, \ > + 14, \ > + }, \ > + 15, \ > + 16, \ > + {17, 18, 19} \ > + }, \ > + 20, \ > + { \ > + a = 21, \ > + b = 22, \ > + c = { \ > + d = 23, \ > + e = 24 \ > + } \ > + }, \ > + 25 \ > +} > +--- > +... > +t = s:replace(t) > +--- > +... > +-- Non-first and non-last field updates. > +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}}) > +--- > +- [1, {}, [], [1, 2, 3, [4, 5, 6, 7, [8, 9, [10, 11000, 12], 13, 14], 15, 16, [17, > + 18, 19000]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Triple intersection with enabled fast jump by route. > +t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}}) > +--- > +- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9000, [10, 11, 12], 13, 14], 15, 16, [ > + 17, 18000, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Triple intersection with no optimizations. > +t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}}) > +--- > +- [1, {}, [], [1, 2, 3000, [4, 5000, 6, 7, [8000, 9, [10, 11, 12], 13, 14], 15, 16, > + [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Last operation splits previous ones. > +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}}) > +--- > +- [1, {}, [], [1, 2, 3, [4, 5, 6000, 7, [8, 9, [10000, 11000, 12], 13, 14], 15, 16, > + [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- First two operations generate a route '[4]' in the 4th field of > +-- the tuple. With childs [1] and [2]. The last operation should > +-- delete the route and replace it with a full featured array. > +t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}}) > +--- > +- [1, {}, [], [1, 2, 3.5, [4.5, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16, [ > + 17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Test errors. > +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}}) > +--- > +- error: 'Field ''[4][4][5].a'' UPDATE error: can not update array by non-integer > + index' > +... > +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}}) > +--- > +- error: 'Field ''[4][4][*].a'' UPDATE error: can not update array by non-integer > + index' > +... > +t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}}) > +--- > +- error: 'Field ''[4][4][*].b'' UPDATE error: invalid JSON in position 8' > +... > +t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}}) > +--- > +- error: 'Field ''[4][4]-badjson'' UPDATE error: invalid JSON in position 7' > +... > +t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}}) > +--- > +- error: 'Field ''[4][4][1]'' UPDATE error: double update of the same field' > +... > +-- First two operations produce zero length bar update in field > +-- [4][4][5][4]. The third operation should raise an error. > +t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}}) > +--- > +- error: 'Field ''[4][4][5][4]'' UPDATE error: double update of the same field' > +... > +-- > +-- ! > +-- > +t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}}) > +--- > +- [1, {}, [], [1, 2, 3, [4, 5.25, 5.5, 6, 7, [8, 9, [10, 11, 12], 13, 14], 15, 16, > + [17, 18, 19]], 19.5, 19.75, 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, > + 25]] > +... > +-- > +-- # > +-- > +t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}}) > +--- > +- [1, {}, [], [1, 2, 3, [4, 5, 6, 15, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': { > + 'd': 23, 'e': 24}}, 25]] > +... > +-- > +-- Scalar operations. > +-- > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}}) > +--- > +- [1, {}, [], [1, 2, 3.5, [4, 5, 6, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16, [ > + 17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}}) > +--- > +- [1, {}, [], [1, 2, 3.5, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, 16, > + [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Combined - check that operations following '!' take into > +-- account that there is a new element. Indexes should be relative > +-- to the new array size. > +t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}}) > +--- > +- [1, {}, [], [1, 2, 2.5, 3, [4, 5, 6.25, 7, [8, 9, [10, 11, 12], 13, 14.75], 15, > + 16, [17, 18, 19]], 20, {'b': 22, 'a': 21, 'c': {'d': 23, 'e': 24}}, 25]] > +... > +-- Error - an attempt to follow JSON path in a scalar field > +-- during branching. > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}}) > +--- > +- error: Field ''[4][3][2]'' was not found in the tuple > +... > s:drop() > --- > ... > diff --git a/test/box/update.test.lua b/test/box/update.test.lua > index 60e669d27..d91a2e95c 100644 > --- a/test/box/update.test.lua > +++ b/test/box/update.test.lua > @@ -424,4 +424,95 @@ box.commit() > vy_s:select() > vy_s:drop() > > +-- > +-- Complex updates. Intersected paths, different types. > +-- > + > +-- > +-- = > +-- > +-- Difference in the last part. > +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][1][2]', 375}}) > +-- Different starting from the non-last part. > +t:update({{'=', '[3][3][1][1]', 325}, {'=', '[3][3][2][2]', 475}}) > +-- Contains string keys in the middle. > +t:update({{'=', '[2].c.f[1]', 4000}, {'=', '[2].c.f[2]', 5000}}) > +t = {1} > +t[2] = setmetatable({}, {__serialize = 'map'}) > +t[3] = {} > +t[4] = { \ > + 1, \ > + 2, \ > + 3, \ > + { \ > + 4, \ > + 5, \ > + 6, \ > + 7, \ > + { \ > + 8, \ > + 9, \ > + {10, 11, 12}, \ > + 13, \ > + 14, \ > + }, \ > + 15, \ > + 16, \ > + {17, 18, 19} \ > + }, \ > + 20, \ > + { \ > + a = 21, \ > + b = 22, \ > + c = { \ > + d = 23, \ > + e = 24 \ > + } \ > + }, \ > + 25 \ > +} > +t = s:replace(t) > +-- Non-first and non-last field updates. > +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][8][3]', 19000}}) > +-- Triple intersection with enabled fast jump by route. > +t:update({{'=', '[4][4][3]', 6000}, {'=', '[4][4][5][2]', 9000}, {'=', '[4][4][8][2]', 18000}}) > +-- Triple intersection with no optimizations. > +t:update({{'=', '[4][3]', 3000}, {'=', '[4][4][2]', 5000}, {'=', '[4][4][5][1]', 8000}}) > +-- Last operation splits previous ones. > +t:update({{'=', '[4][4][5][3][2]', 11000}, {'=', '[4][4][5][3][1]', 10000}, {'=', '[4][4][3]', 6000}}) > +-- First two operations generate a route '[4]' in the 4th field of > +-- the tuple. With childs [1] and [2]. The last operation should > +-- delete the route and replace it with a full featured array. > +t:update({{'=', '[4][4][1]', 4.5}, {'=', '[4][4][2]', 5.5}, {'=', '[4][3]', 3.5}}) > +-- Test errors. > +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][5].a', -1}}) > +t:update({{'=', '[4][4][5][1]', 8000}, {'=', '[4][4][*].a', -1}}) > +t:update({{'=', '[4][4][*].b', 8000}, {'=', '[4][4][*].a', -1}}) > +t:update({{'=', '[4][4][1]', 4000}, {'=', '[4][4]-badjson', -1}}) > +t:update({{'=', '[4][4][1]', 1}, {'=', '[4][4][1]', 2}}) > +-- First two operations produce zero length bar update in field > +-- [4][4][5][4]. The third operation should raise an error. > +t:update({{'+', '[4][4][5][4]', 13000}, {'=', '[4][4][5][1]', 8000}, {'+', '[4][4][5][4]', 1}}) > + > +-- > +-- ! > +-- > +t:update({{'!', '[4][5]', 19.5}, {'!', '[4][6]', 19.75}, {'!', '[4][4][3]', 5.5}, {'=', '[4][4][2]', 5.25}}) > +-- > +-- # > +-- > +t:update({{'#', '[4][4][5]', 1}, {'#', '[4][4][4]', 1}, {'#', '[4][4][5]', 1}}) > +-- > +-- Scalar operations. > +-- > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}}) > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][4][5][5]', 0.75}, {'+', '[4][4][3]', 0.25}}) > +-- Combined - check that operations following '!' take into > +-- account that there is a new element. Indexes should be relative > +-- to the new array size. > +t:update({{'!', '[4][3]', 2.5}, {'+', '[4][5][5][5]', 0.75}, {'+', '[4][5][3]', 0.25}}) > +-- Error - an attempt to follow JSON path in a scalar field > +-- during branching. > +t:update({{'+', '[4][3]', 0.5}, {'+', '[4][3][2]', 0.5}}) > + > s:drop() > -- > 2.21.0 (Apple Git-122.2) >