* [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
@ 2019-11-21 23:19 Vladislav Shpilevoy
2019-12-09 20:18 ` Sergey Ostanevich
2019-12-19 8:34 ` Kirill Yukhin
0 siblings, 2 replies; 5+ messages in thread
From: Vladislav Shpilevoy @ 2019-11-21 23:19 UTC (permalink / raw)
To: tarantool-patches, kostja.osipov
Isolated tuple update is an update by JSON path, which hasn't a
common prefix with any other JSON update operation in the same
set. For example, these JSON update operations are isolated:
{'=', '[1][2][3]', 100},
{'+', '[2].b.c', 200}
Their JSON paths has no a common prefix. But these operations are
not isolated:
{'=', '[1][2][3]', 100},
{'+', '[1].b.c', 200}
They have a common prefix '[1]'.
Isolated updates are a first part of fully functional JSON
updates. Their feature is that their implementation is relatively
simple and lightweight - an isolated JSON update does not store
each part of the JSON path as a separate object. Isolated update
stores just string with JSON and pointer to the MessagePack
object to update.
Such isolated updates are called 'bar update'. They are a basic
brick of more complex JSON updates.
Part of #1261
---
Branch: https://github.com/tarantool/tarantool/tree/gerold103/gh-1261-json-bar-update
Issue: https://github.com/tarantool/tarantool/issues/1261
src/box/CMakeLists.txt | 1 +
src/box/vinyl.c | 16 +-
src/box/xrow_update.c | 41 +++-
src/box/xrow_update_array.c | 22 +-
src/box/xrow_update_bar.c | 454 +++++++++++++++++++++++++++++++++++
src/box/xrow_update_field.c | 45 +++-
src/box/xrow_update_field.h | 115 +++++++++
test/box/update.result | 410 ++++++++++++++++++++++++++++++-
test/box/update.test.lua | 145 +++++++++++
test/engine/update.result | 5 -
test/engine/update.test.lua | 2 -
test/unit/column_mask.c | 75 +++++-
test/unit/column_mask.result | 8 +-
13 files changed, 1314 insertions(+), 25 deletions(-)
create mode 100644 src/box/xrow_update_bar.c
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 5cd5cba81..fc9d1a3e8 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -43,6 +43,7 @@ add_library(tuple STATIC
xrow_update.c
xrow_update_field.c
xrow_update_array.c
+ xrow_update_bar.c
tuple_compare.cc
tuple_extract_key.cc
tuple_hash.cc
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 767e40006..15a136f81 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
ops_end = mp_encode_str(ops_end, op_name, op_name_len);
int field_no;
- if (mp_typeof(*pos) == MP_INT) {
+ const char *field_name;
+ switch (mp_typeof(*pos)) {
+ case MP_INT:
field_no = mp_decode_int(&pos);
ops_end = mp_encode_int(ops_end, field_no);
- } else {
+ break;
+ case MP_UINT:
field_no = mp_decode_uint(&pos);
field_no -= request->index_base;
ops_end = mp_encode_uint(ops_end, field_no);
+ break;
+ case MP_STR:
+ field_name = pos;
+ mp_next(&pos);
+ memcpy(ops_end, field_name, pos - field_name);
+ ops_end += pos - field_name;
+ break;
+ default:
+ unreachable();
}
if (*op_name == ':') {
diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
index 123db081a..ecda17544 100644
--- a/src/box/xrow_update.c
+++ b/src/box/xrow_update.c
@@ -116,7 +116,12 @@ struct xrow_update
* re-encode each Lua update with 0-based indexes.
*/
int index_base;
- /** A bitmask of all columns modified by this update. */
+ /**
+ * A bitmask of all columns modified by this update. Only
+ * the first level of a tuple is accounted here. I.e. if
+ * a field [1][2][3] was updated, then only [1] is
+ * reflected.
+ */
uint64_t column_mask;
/** First level of update tree. It is always array. */
struct xrow_update_field root;
@@ -182,9 +187,26 @@ xrow_update_read_ops(struct xrow_update *update, const char *expr,
*/
if (column_mask != COLUMN_MASK_FULL) {
int32_t field_no;
+ char opcode;
+ if (xrow_update_op_is_term(op)) {
+ opcode = op->opcode;
+ } else {
+ /*
+ * When a field is not terminal,
+ * on the first level it for sure
+ * changes only one field and in
+ * terms of column mask is
+ * equivalent to any scalar
+ * operation. Even if it was '!'
+ * or '#'. Zero means, that it
+ * won't match any checks with
+ * non-scalar operations below.
+ */
+ opcode = 0;
+ }
if (op->field_no >= 0)
field_no = op->field_no;
- else if (op->opcode != '!')
+ else if (opcode != '!')
field_no = field_count_hint + op->field_no;
else
/*
@@ -227,12 +249,12 @@ xrow_update_read_ops(struct xrow_update *update, const char *expr,
* hint. It is used to translate negative
* field numbers into positive ones.
*/
- if (op->opcode == '!')
+ if (opcode == '!')
++field_count_hint;
- else if (op->opcode == '#')
+ else if (opcode == '#')
field_count_hint -= (int32_t) op->arg.del.count;
- if (op->opcode == '!' || op->opcode == '#')
+ if (opcode == '!' || opcode == '#')
/*
* If the operation is insertion
* or deletion then it potentially
@@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
if (op->opcode != '+' && op->opcode != '-' &&
op->opcode != '=')
return NULL;
+ /*
+ * Not terminal operation means, that the
+ * update is not flat, and squash would
+ * need to build a tree of operations to
+ * find matches. That is too complex,
+ * squash is skipped.
+ */
+ if (! xrow_update_op_is_term(op))
+ return NULL;
if (op->field_no <= prev_field_no)
return NULL;
prev_field_no = op->field_no;
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index 7f198076b..1cc49f861 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -220,12 +220,20 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
struct xrow_update_field *field)
{
assert(field->type == XUPDATE_ARRAY);
+ struct xrow_update_array_item *item;
+ if (! xrow_update_op_is_term(op)) {
+ item = xrow_update_array_extract_item(field, op);
+ if (item == NULL)
+ return -1;
+ return xrow_update_op_do_field_insert(op, &item->field);
+ }
+
struct xrow_update_rope *rope = field->array.rope;
uint32_t size = xrow_update_rope_size(rope);
if (xrow_update_op_adjust_field_no(op, size + 1) != 0)
return -1;
- struct xrow_update_array_item *item = (struct xrow_update_array_item *)
+ item = (struct xrow_update_array_item *)
xrow_update_alloc(rope->ctx, sizeof(*item));
if (item == NULL)
return -1;
@@ -248,6 +256,8 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
xrow_update_array_extract_item(field, op);
if (item == NULL)
return -1;
+ if (! xrow_update_op_is_term(op))
+ return xrow_update_op_do_field_set(op, &item->field);
op->new_field_len = op->arg.set.length;
/*
* Ignore the previous op, if any. It is not correct,
@@ -265,6 +275,14 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
struct xrow_update_field *field)
{
assert(field->type == XUPDATE_ARRAY);
+ if (! xrow_update_op_is_term(op)) {
+ struct xrow_update_array_item *item =
+ xrow_update_array_extract_item(field, op);
+ if (item == NULL)
+ return -1;
+ return xrow_update_op_do_field_delete(op, &item->field);
+ }
+
struct xrow_update_rope *rope = field->array.rope;
uint32_t size = xrow_update_rope_size(rope);
if (xrow_update_op_adjust_field_no(op, size) != 0)
@@ -287,6 +305,8 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op, \
xrow_update_array_extract_item(field, op); \
if (item == NULL) \
return -1; \
+ if (! xrow_update_op_is_term(op)) \
+ return xrow_update_op_do_field_##op_type(op, &item->field); \
if (item->field.type != XUPDATE_NOP) \
return xrow_update_err_double(op); \
if (xrow_update_op_do_##op_type(op, item->field.data) != 0) \
diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
new file mode 100644
index 000000000..737673e8a
--- /dev/null
+++ b/src/box/xrow_update_bar.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "xrow_update_field.h"
+#include "tuple.h"
+
+/**
+ * Locate a field to update by @a op's JSON path and initialize
+ * @a field as a bar update.
+ *
+ * @param op Update operation.
+ * @param field Field to locate in.
+ * @param[out] key_len_or_index One parameter for two values,
+ * depending on where the target point is located: in an
+ * array or a map. In case of map it is size of a key
+ * before the found point. It is used to find range of the
+ * both key and value in '#' operation to drop the pair.
+ * In case of array it is index of the array element to be
+ * able to check how many fields are left for deletion.
+ *
+ * @retval 0 Success.
+ * @retval -1 Not found or invalid JSON.
+ */
+static inline int
+xrow_update_bar_locate(struct xrow_update_op *op,
+ struct xrow_update_field *field,
+ int *key_len_or_index)
+{
+ /*
+ * Bar update is not flat by definition. It always has a
+ * non empty path. This is why op is expected to be not
+ * terminal.
+ */
+ assert(! xrow_update_op_is_term(op));
+ int rc;
+ field->type = XUPDATE_BAR;
+ field->bar.op = op;
+ field->bar.path = op->lexer.src + op->lexer.offset;
+ field->bar.path_len = op->lexer.src_len - op->lexer.offset;
+ const char *pos = field->data;
+ struct json_token token;
+ while ((rc = json_lexer_next_token(&op->lexer, &token)) == 0 &&
+ token.type != JSON_TOKEN_END) {
+
+ switch (token.type) {
+ case JSON_TOKEN_NUM:
+ field->bar.parent = pos;
+ *key_len_or_index = token.num;
+ rc = tuple_field_go_to_index(&pos, token.num);
+ break;
+ case JSON_TOKEN_STR:
+ field->bar.parent = pos;
+ *key_len_or_index = token.len;
+ rc = tuple_field_go_to_key(&pos, token.str, token.len);
+ break;
+ default:
+ assert(token.type == JSON_TOKEN_ANY);
+ rc = op->lexer.symbol_count - 1;
+ return xrow_update_err_bad_json(op, rc);
+ }
+ if (rc != 0)
+ return xrow_update_err_no_such_field(op);
+ }
+ if (rc > 0)
+ return xrow_update_err_bad_json(op, rc);
+
+ field->bar.point = pos;
+ mp_next(&pos);
+ field->bar.point_size = pos - field->bar.point;
+ return 0;
+}
+
+/**
+ * Locate an optional field to update by @a op's JSON path. If
+ * found or only a last path part is not found, initialize @a
+ * field as a bar update. Last path part may not exist and it is
+ * ok, for example, for '!' and '=' operations.
+ */
+static inline int
+xrow_update_bar_locate_opt(struct xrow_update_op *op,
+ struct xrow_update_field *field, bool *is_found,
+ int *key_len_or_index)
+{
+ /*
+ * Bar update is not flat by definition. It always has a
+ * non empty path. This is why op is expected to be not
+ * terminal.
+ */
+ assert(! xrow_update_op_is_term(op));
+ int rc;
+ field->type = XUPDATE_BAR;
+ field->bar.op = op;
+ field->bar.path = op->lexer.src + op->lexer.offset;
+ field->bar.path_len = op->lexer.src_len - op->lexer.offset;
+ const char *pos = field->data;
+ struct json_token token;
+ do {
+ rc = json_lexer_next_token(&op->lexer, &token);
+ if (rc != 0)
+ return xrow_update_err_bad_json(op, rc);
+
+ switch (token.type) {
+ case JSON_TOKEN_END:
+ *is_found = true;
+ field->bar.point = pos;
+ mp_next(&pos);
+ field->bar.point_size = pos - field->bar.point;
+ return 0;
+ case JSON_TOKEN_NUM:
+ field->bar.parent = pos;
+ *key_len_or_index = token.num;
+ rc = tuple_field_go_to_index(&pos, token.num);
+ break;
+ case JSON_TOKEN_STR:
+ field->bar.parent = pos;
+ *key_len_or_index = token.len;
+ rc = tuple_field_go_to_key(&pos, token.str, token.len);
+ break;
+ default:
+ assert(token.type == JSON_TOKEN_ANY);
+ rc = op->lexer.symbol_count - 1;
+ return xrow_update_err_bad_json(op, rc);
+ }
+ } while (rc == 0);
+ assert(rc == -1);
+ /* Ensure, that 'token' is next to last path part. */
+ struct json_token tmp_token;
+ rc = json_lexer_next_token(&op->lexer, &tmp_token);
+ if (rc != 0)
+ return xrow_update_err_bad_json(op, rc);
+ if (tmp_token.type != JSON_TOKEN_END)
+ return xrow_update_err_no_such_field(op);
+
+ *is_found = false;
+ if (token.type == JSON_TOKEN_NUM) {
+ const char *tmp = field->bar.parent;
+ if (mp_typeof(*tmp) != MP_ARRAY) {
+ return xrow_update_err(op, "can not access by index a "\
+ "non-array field");
+ }
+ uint32_t size = mp_decode_array(&tmp);
+ if ((uint32_t) token.num > size)
+ return xrow_update_err_no_such_field(op);
+ /*
+ * The updated point is in an array, its position
+ * was not found, and its index is <= size. The
+ * only way how can that happen - the update tries
+ * to append a new array element. The following
+ * code tries to find the array's end.
+ */
+ assert((uint32_t) token.num == size);
+ if (field->bar.parent == field->data) {
+ /*
+ * Optimization for the case when the path
+ * is short. So parent of the updated
+ * point is the field itself. It allows
+ * not to decode anything at all. It is
+ * worth doing, since the paths are
+ * usually short.
+ */
+ field->bar.point = field->data + field->size;
+ } else {
+ field->bar.point = field->bar.parent;
+ mp_next(&field->bar.point);
+ }
+ field->bar.point_size = 0;
+ } else {
+ assert(token.type == JSON_TOKEN_STR);
+ field->bar.new_key = token.str;
+ field->bar.new_key_len = token.len;
+ if (mp_typeof(*field->bar.parent) != MP_MAP) {
+ return xrow_update_err(op, "can not access by key a "\
+ "non-map field");
+ }
+ }
+ return 0;
+}
+
+/**
+ * Nop fields are those which are not updated. And when they
+ * receive an update via one of xrow_update_op_do_nop_* functions,
+ * it means, that there is a non terminal path digging inside this
+ * not updated field. It turns nop field into a bar field. How
+ * exactly - depends on a concrete operation.
+ */
+
+int
+xrow_update_op_do_nop_insert(struct xrow_update_op *op,
+ struct xrow_update_field *field)
+{
+ assert(op->opcode == '!');
+ assert(field->type == XUPDATE_NOP);
+ bool is_found = false;
+ int key_len = 0;
+ if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
+ return -1;
+ op->new_field_len = op->arg.set.length;
+ if (mp_typeof(*field->bar.parent) == MP_MAP) {
+ if (is_found)
+ return xrow_update_err_duplicate(op);
+ /*
+ * Don't forget, that map element is a pair. So
+ * key length also should be accounted.
+ */
+ op->new_field_len += mp_sizeof_str(key_len);
+ }
+ return 0;
+}
+
+int
+xrow_update_op_do_nop_set(struct xrow_update_op *op,
+ struct xrow_update_field *field)
+{
+ assert(op->opcode == '=');
+ assert(field->type == XUPDATE_NOP);
+ bool is_found = false;
+ int key_len = 0;
+ if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
+ return -1;
+ op->new_field_len = op->arg.set.length;
+ if (! is_found) {
+ op->opcode = '!';
+ if (mp_typeof(*field->bar.parent) == MP_MAP)
+ op->new_field_len += mp_sizeof_str(key_len);
+ }
+ return 0;
+}
+
+int
+xrow_update_op_do_nop_delete(struct xrow_update_op *op,
+ struct xrow_update_field *field)
+{
+ assert(op->opcode == '#');
+ assert(field->type == XUPDATE_NOP);
+ int key_len_or_index = 0;
+ if (xrow_update_bar_locate(op, field, &key_len_or_index) != 0)
+ return -1;
+ if (mp_typeof(*field->bar.parent) == MP_ARRAY) {
+ const char *tmp = field->bar.parent;
+ uint32_t size = mp_decode_array(&tmp);
+ if (key_len_or_index + op->arg.del.count > size)
+ op->arg.del.count = size - key_len_or_index;
+ const char *end = field->bar.point + field->bar.point_size;
+ for (uint32_t i = 1; i < op->arg.del.count; ++i)
+ mp_next(&end);
+ field->bar.point_size = end - field->bar.point;
+ } else {
+ if (op->arg.del.count != 1)
+ return xrow_update_err_delete1(op);
+ /* Take key size into account to delete it too. */
+ key_len_or_index = mp_sizeof_str(key_len_or_index);
+ field->bar.point -= key_len_or_index;
+ field->bar.point_size += key_len_or_index;
+ }
+ return 0;
+}
+
+#define DO_NOP_OP_GENERIC(op_type) \
+int \
+xrow_update_op_do_nop_##op_type(struct xrow_update_op *op, \
+ struct xrow_update_field *field) \
+{ \
+ assert(field->type == XUPDATE_NOP); \
+ int key_len_or_index; \
+ if (xrow_update_bar_locate(op, field, &key_len_or_index) != 0) \
+ return -1; \
+ return xrow_update_op_do_##op_type(op, field->bar.point); \
+}
+
+DO_NOP_OP_GENERIC(arith)
+
+DO_NOP_OP_GENERIC(bit)
+
+DO_NOP_OP_GENERIC(splice)
+
+#undef DO_NOP_OP_GENERIC
+
+#define DO_BAR_OP_GENERIC(op_type) \
+int \
+xrow_update_op_do_bar_##op_type(struct xrow_update_op *op, \
+ struct xrow_update_field *field) \
+{ \
+ /* \
+ * The only way to update a bar is to make a second update \
+ * with the same prefix as this bar. But it is not \
+ * supported yet. \
+ */ \
+ (void) op; \
+ (void) field; \
+ assert(field->type == XUPDATE_BAR); \
+ diag_set(ClientError, ER_UNSUPPORTED, "update", \
+ "intersected JSON paths"); \
+ return -1; \
+}
+
+DO_BAR_OP_GENERIC(insert)
+
+DO_BAR_OP_GENERIC(set)
+
+DO_BAR_OP_GENERIC(delete)
+
+DO_BAR_OP_GENERIC(arith)
+
+DO_BAR_OP_GENERIC(bit)
+
+DO_BAR_OP_GENERIC(splice)
+
+#undef DO_BAR_OP_GENERIC
+
+uint32_t
+xrow_update_bar_sizeof(struct xrow_update_field *field)
+{
+ assert(field->type == XUPDATE_BAR);
+ switch(field->bar.op->opcode) {
+ case '!': {
+ const char *parent = field->bar.parent;
+ uint32_t size = field->size + field->bar.op->new_field_len;
+ if (mp_typeof(*parent) == MP_ARRAY) {
+ uint32_t array_size = mp_decode_array(&parent);
+ return size + mp_sizeof_array(array_size + 1) -
+ mp_sizeof_array(array_size);
+ } else {
+ uint32_t map_size = mp_decode_map(&parent);
+ return size + mp_sizeof_map(map_size + 1) -
+ mp_sizeof_map(map_size);
+ }
+ }
+ case '#': {
+ const char *parent = field->bar.parent;
+ uint32_t delete_count = field->bar.op->arg.del.count;
+ uint32_t size = field->size - field->bar.point_size;
+ if (mp_typeof(*parent) == MP_ARRAY) {
+ uint32_t array_size = mp_decode_array(&parent);
+ assert(array_size >= delete_count);
+ return size - mp_sizeof_array(array_size) +
+ mp_sizeof_array(array_size - delete_count);
+ } else {
+ uint32_t map_size = mp_decode_map(&parent);
+ assert(delete_count == 1);
+ return size - mp_sizeof_map(map_size) +
+ mp_sizeof_map(map_size - 1);
+ }
+ }
+ default: {
+ return field->size - field->bar.point_size +
+ field->bar.op->new_field_len;
+ }
+ }
+}
+
+uint32_t
+xrow_update_bar_store(struct xrow_update_field *field, char *out, char *out_end)
+{
+ assert(field->type == XUPDATE_BAR);
+ (void) out_end;
+ struct xrow_update_op *op = field->bar.op;
+ char *out_saved = out;
+ switch(op->opcode) {
+ case '!': {
+ const char *pos = field->bar.parent;
+ uint32_t before_parent = pos - field->data;
+ /* Before parent. */
+ memcpy(out, field->data, before_parent);
+ out += before_parent;
+ if (mp_typeof(*pos) == MP_ARRAY) {
+ /* New array header. */
+ uint32_t size = mp_decode_array(&pos);
+ out = mp_encode_array(out, size + 1);
+ /* Before insertion point. */
+ size = field->bar.point - pos;
+ memcpy(out, pos, size);
+ out += size;
+ pos += size;
+ } else {
+ /* New map header. */
+ uint32_t size = mp_decode_map(&pos);
+ out = mp_encode_map(out, size + 1);
+ /* New key. */
+ out = mp_encode_str(out, field->bar.new_key,
+ field->bar.new_key_len);
+ }
+ /* New value. */
+ memcpy(out, op->arg.set.value, op->arg.set.length);
+ out += op->arg.set.length;
+ /* Old values and field tail. */
+ uint32_t after_point = field->data + field->size - pos;
+ memcpy(out, pos, after_point);
+ out += after_point;
+ return out - out_saved;
+ }
+ case '#': {
+ const char *pos = field->bar.parent;
+ uint32_t size, before_parent = pos - field->data;
+ memcpy(out, field->data, before_parent);
+ out += before_parent;
+ if (mp_typeof(*pos) == MP_ARRAY) {
+ size = mp_decode_array(&pos);
+ out = mp_encode_array(out, size - op->arg.del.count);
+ } else {
+ size = mp_decode_map(&pos);
+ out = mp_encode_map(out, size - 1);
+ }
+ size = field->bar.point - pos;
+ memcpy(out, pos, size);
+ out += size;
+ pos = field->bar.point + field->bar.point_size;
+
+ size = field->data + field->size - pos;
+ memcpy(out, pos, size);
+ return out + size - out_saved;
+ }
+ default: {
+ uint32_t before_point = field->bar.point - field->data;
+ const char *field_end = field->data + field->size;
+ const char *point_end =
+ field->bar.point + field->bar.point_size;
+ uint32_t after_point = field_end - point_end;
+
+ memcpy(out, field->data, before_point);
+ out += before_point;
+ op->meta->store(op, field->bar.point, out);
+ out += op->new_field_len;
+ memcpy(out, point_end, after_point);
+ return out + after_point - out_saved;
+ }
+ }
+}
diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c
index c694e17e9..de865a21d 100644
--- a/src/box/xrow_update_field.c
+++ b/src/box/xrow_update_field.c
@@ -38,7 +38,9 @@
static inline const char *
xrow_update_op_field_str(const struct xrow_update_op *op)
{
- if (op->field_no >= 0)
+ if (op->lexer.src != NULL)
+ return tt_sprintf("'%.*s'", op->lexer.src_len, op->lexer.src);
+ else if (op->field_no >= 0)
return tt_sprintf("%d", op->field_no + TUPLE_INDEX_BASE);
else
return tt_sprintf("%d", op->field_no);
@@ -80,8 +82,13 @@ xrow_update_err_splice_bound(const struct xrow_update_op *op)
int
xrow_update_err_no_such_field(const struct xrow_update_op *op)
{
- diag_set(ClientError, ER_NO_SUCH_FIELD_NO, op->field_no >= 0 ?
- TUPLE_INDEX_BASE + op->field_no : op->field_no);
+ if (op->lexer.src == NULL) {
+ diag_set(ClientError, ER_NO_SUCH_FIELD_NO, op->field_no +
+ (op->field_no >= 0 ? TUPLE_INDEX_BASE : 0));
+ return -1;
+ }
+ diag_set(ClientError, ER_NO_SUCH_FIELD_NAME,
+ xrow_update_op_field_str(op));
return -1;
}
@@ -105,6 +112,8 @@ xrow_update_field_sizeof(struct xrow_update_field *field)
return field->scalar.op->new_field_len;
case XUPDATE_ARRAY:
return xrow_update_array_sizeof(field);
+ case XUPDATE_BAR:
+ return xrow_update_bar_sizeof(field);
default:
unreachable();
}
@@ -130,6 +139,8 @@ xrow_update_field_store(struct xrow_update_field *field, char *out,
return size;
case XUPDATE_ARRAY:
return xrow_update_array_store(field, out, out_end);
+ case XUPDATE_BAR:
+ return xrow_update_bar_store(field, out, out_end);
default:
unreachable();
}
@@ -632,6 +643,7 @@ xrow_update_op_decode(struct xrow_update_op *op, int index_base,
switch(mp_typeof(**expr)) {
case MP_INT:
case MP_UINT: {
+ json_lexer_create(&op->lexer, NULL, 0, 0);
if (xrow_update_mp_read_int32(op, expr, &field_no) != 0)
return -1;
if (field_no - index_base >= 0) {
@@ -647,14 +659,35 @@ xrow_update_op_decode(struct xrow_update_op *op, int index_base,
case MP_STR: {
const char *path = mp_decode_str(expr, &len);
uint32_t field_no, hash = field_name_hash(path, len);
+ json_lexer_create(&op->lexer, path, len, TUPLE_INDEX_BASE);
if (tuple_fieldno_by_name(dict, path, len, hash,
&field_no) == 0) {
op->field_no = (int32_t) field_no;
+ op->lexer.offset = len;
break;
}
- diag_set(ClientError, ER_NO_SUCH_FIELD_NAME,
- tt_cstr(path, len));
- return -1;
+ struct json_token token;
+ int rc = json_lexer_next_token(&op->lexer, &token);
+ if (rc != 0)
+ return xrow_update_err_bad_json(op, rc);
+ switch (token.type) {
+ case JSON_TOKEN_NUM:
+ op->field_no = token.num;
+ break;
+ case JSON_TOKEN_STR:
+ hash = field_name_hash(token.str, token.len);
+ if (tuple_fieldno_by_name(dict, token.str, token.len,
+ hash, &field_no) == 0) {
+ op->field_no = (int32_t) field_no;
+ break;
+ }
+ FALLTHROUGH;
+ default:
+ diag_set(ClientError, ER_NO_SUCH_FIELD_NAME,
+ tt_cstr(path, len));
+ return -1;
+ }
+ break;
}
default:
diag_set(ClientError, ER_ILLEGAL_PARAMS,
diff --git a/src/box/xrow_update_field.h b/src/box/xrow_update_field.h
index e90095b9e..bda9222cc 100644
--- a/src/box/xrow_update_field.h
+++ b/src/box/xrow_update_field.h
@@ -32,6 +32,7 @@
*/
#include "trivia/util.h"
#include "tt_static.h"
+#include "json/json.h"
#include "bit/int96.h"
#include "mp_decimal.h"
#include <stdint.h>
@@ -184,6 +185,12 @@ struct xrow_update_op {
uint32_t new_field_len;
/** Opcode symbol: = + - / ... */
char opcode;
+ /**
+ * Operation target path and its lexer in one. This lexer
+ * is used when the operation is applied down through the
+ * update tree.
+ */
+ struct json_lexer lexer;
};
/**
@@ -200,6 +207,21 @@ int
xrow_update_op_decode(struct xrow_update_op *op, int index_base,
struct tuple_dictionary *dict, const char **expr);
+/**
+ * Check if the operation should be applied on the current path
+ * node, i.e. it is terminal. When an operation is just decoded
+ * and is applied to the top level of a tuple, a head of the JSON
+ * path is cut out. If nothing left, it is applied there.
+ * Otherwise the operation is applied to the next level of the
+ * tuple, according to where the path goes, and so on. In the end
+ * it reaches the target point, where it becomes terminal.
+ */
+static inline bool
+xrow_update_op_is_term(const struct xrow_update_op *op)
+{
+ return json_lexer_is_eof(&op->lexer);
+}
+
/* }}} xrow_update_op */
/* {{{ xrow_update_field */
@@ -224,6 +246,14 @@ enum xrow_update_type {
* of individual fields.
*/
XUPDATE_ARRAY,
+ /**
+ * Field of this type stores such update, that has
+ * non-empty JSON path isolated from all other update
+ * operations. In such optimized case it is possible to do
+ * not allocate neither fields nor ops nor anything for
+ * path nodes. And this is the most common case.
+ */
+ XUPDATE_BAR,
};
/**
@@ -255,6 +285,55 @@ struct xrow_update_field {
struct {
struct xrow_update_rope *rope;
} array;
+ /**
+ * Bar update - by an isolated JSON path not
+ * intersected with any another update operation.
+ */
+ struct {
+ /**
+ * Bar update is a single operation
+ * always, no children, by definition.
+ */
+ struct xrow_update_op *op;
+ /**
+ * Always has a non-empty path leading
+ * inside this field's data. This is used
+ * to find the longest common prefix, when
+ * a new update operation intersects with
+ * this bar.
+ */
+ const char *path;
+ int path_len;
+ /**
+ * For insertion/deletion to change
+ * parent's header.
+ */
+ const char *parent;
+ union {
+ /**
+ * For scalar op; insertion into
+ * array; deletion. This is the
+ * point to delete, change or
+ * insert after.
+ */
+ struct {
+ const char *point;
+ uint32_t point_size;
+ };
+ /*
+ * For insertion into map. New
+ * key. On insertion into a map
+ * there is no strict order as in
+ * array and no point. The field
+ * is inserted just right after
+ * the parent header.
+ */
+ struct {
+ const char *new_key;
+ uint32_t new_key_len;
+ };
+ };
+ } bar;
};
};
@@ -351,6 +430,18 @@ OP_DECL_GENERIC(array)
/* }}} xrow_update_field.array */
+/* {{{ update_field.bar */
+
+OP_DECL_GENERIC(bar)
+
+/* }}} update_field.bar */
+
+/* {{{ update_field.nop */
+
+OP_DECL_GENERIC(nop)
+
+/* }}} update_field.nop */
+
#undef OP_DECL_GENERIC
/* {{{ Common helpers. */
@@ -372,6 +463,10 @@ xrow_update_op_do_field_##op_type(struct xrow_update_op *op, \
switch (field->type) { \
case XUPDATE_ARRAY: \
return xrow_update_op_do_array_##op_type(op, field); \
+ case XUPDATE_NOP: \
+ return xrow_update_op_do_nop_##op_type(op, field); \
+ case XUPDATE_BAR: \
+ return xrow_update_op_do_bar_##op_type(op, field); \
default: \
unreachable(); \
} \
@@ -439,6 +534,26 @@ xrow_update_err_double(const struct xrow_update_op *op)
return xrow_update_err(op, "double update of the same field");
}
+static inline int
+xrow_update_err_bad_json(const struct xrow_update_op *op, int pos)
+{
+ return xrow_update_err(op, tt_sprintf("invalid JSON in position %d",
+ pos));
+}
+
+static inline int
+xrow_update_err_delete1(const struct xrow_update_op *op)
+{
+ return xrow_update_err(op, "can delete only 1 field from a map in a "\
+ "row");
+}
+
+static inline int
+xrow_update_err_duplicate(const struct xrow_update_op *op)
+{
+ return xrow_update_err(op, "the key exists already");
+}
+
/** }}} Error helpers. */
#endif /* TARANTOOL_BOX_TUPLE_UPDATE_FIELD_H */
diff --git a/test/box/update.result b/test/box/update.result
index c6a5a25a7..ee38c100f 100644
--- a/test/box/update.result
+++ b/test/box/update.result
@@ -834,7 +834,7 @@ s:update({0}, {{'+', 0}})
...
s:update({0}, {{'+', '+', '+'}})
---
-- error: Field '+' was not found in the tuple
+- error: 'Field ''+'' UPDATE error: invalid JSON in position 1'
...
s:update({0}, {{0, 0, 0}})
---
@@ -889,3 +889,411 @@ s:update(1, {{'=', 3, map}})
s:drop()
---
...
+--
+-- gh-1261: update by JSON path.
+--
+format = {}
+---
+...
+format[1] = {'field1', 'unsigned'}
+---
+...
+format[2] = {'f', 'map'}
+---
+...
+format[3] = {'g', 'array'}
+---
+...
+s = box.schema.create_space('test', {format = format})
+---
+...
+pk = s:create_index('pk')
+---
+...
+t = {}
+---
+...
+t[1] = 1
+---
+...
+t[2] = { \
+ a = 100, \
+ b = 200, \
+ c = { \
+ d = 400, \
+ e = 500, \
+ f = {4, 5, 6, 7, 8}, \
+ g = {k = 600, l = 700} \
+ }, \
+ m = true, \
+ g = {800, 900} \
+}; \
+t[3] = { \
+ 100, \
+ 200, \
+ { \
+ {300, 350}, \
+ {400, 450} \
+ }, \
+ {a = 500, b = 600}, \
+ {c = 700, d = 800} \
+}
+---
+...
+t = s:insert(t)
+---
+...
+t4_array = t:update({{'!', 4, setmetatable({}, {__serialize = 'array'})}})
+---
+...
+t4_map = t:update({{'!', 4, setmetatable({}, {__serialize = 'map'})}})
+---
+...
+t
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+--
+-- At first, test simple non-intersected paths.
+--
+--
+-- !
+--
+t:update({{'!', 'f.c.f[1]', 3}, {'!', '[3][1]', {100, 200, 300}}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [3, 4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [[100, 200, 300], 100, 200, [
+ [300, 350], [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'!', 'f.g[3]', 1000}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900, 1000]}, [100, 200, [[300, 350],
+ [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'!', 'g[6]', 'new element'}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}, 'new element']]
+...
+t:update({{'!', 'f.e', 300}, {'!', 'g[4].c', 700}})
+---
+- [1, {'b': 200, 'm': true, 'g': [800, 900], 'a': 100, 'c': {'d': 400, 'f': [4, 5,
+ 6, 7, 8], 'e': 500, 'g': {'k': 600, 'l': 700}}, 'e': 300}, [100, 200, [[300,
+ 350], [400, 450]], {'b': 600, 'c': 700, 'a': 500}, {'c': 700, 'd': 800}]]
+...
+t:update({{'!', 'f.c.f[2]', 4.5}, {'!', 'g[3][2][2]', 425}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 4.5, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 425, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t2 = t:update({{'!', 'g[6]', {100}}})
+---
+...
+-- Test single element array update.
+t2:update({{'!', 'g[6][2]', 200}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}, [100, 200]]]
+...
+t2:update({{'!', 'g[6][1]', 50}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}, [50, 100]]]
+...
+-- Test empty array/map.
+t4_array:update({{'!', '[4][1]', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}], [100]]
+...
+t4_map:update({{'!', '[4].a', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}], {'a': 100}]
+...
+-- Test errors.
+t:update({{'!', 'a', 100}}) -- No such field.
+---
+- error: Field 'a' was not found in the tuple
+...
+t:update({{'!', 'f.a', 300}}) -- Key already exists.
+---
+- error: 'Field ''f.a'' UPDATE error: the key exists already'
+...
+t:update({{'!', 'f.c.f[0]', 3.5}}) -- No such index, too small.
+---
+- error: 'Field ''f.c.f[0]'' UPDATE error: invalid JSON in position 7'
+...
+t:update({{'!', 'f.c.f[100]', 100}}) -- No such index, too big.
+---
+- error: Field ''f.c.f[100]'' was not found in the tuple
+...
+t:update({{'!', 'g[4][100]', 700}}) -- Insert index into map.
+---
+- error: 'Field ''g[4][100]'' UPDATE error: can not access by index a non-array field'
+...
+t:update({{'!', 'g[1][1]', 300}})
+---
+- error: 'Field ''g[1][1]'' UPDATE error: can not access by index a non-array field'
+...
+t:update({{'!', 'f.g.a', 700}}) -- Insert key into array.
+---
+- error: 'Field ''f.g.a'' UPDATE error: can not access by key a non-map field'
+...
+t:update({{'!', 'f.g[1].a', 700}})
+---
+- error: 'Field ''f.g[1].a'' UPDATE error: can not access by key a non-map field'
+...
+t:update({{'!', 'f[*].k', 20}}) -- 'Any' is not considered valid JSON.
+---
+- error: 'Field ''f[*].k'' UPDATE error: invalid JSON in position 3'
+...
+-- JSON error after the not existing field to insert.
+t:update({{'!', '[2].e.100000', 100}})
+---
+- error: 'Field ''[2].e.100000'' UPDATE error: invalid JSON in position 7'
+...
+-- Correct JSON, but next to last field does not exist. '!' can't
+-- create the whole path.
+t:update({{'!', '[2].e.f', 100}})
+---
+- error: Field ''[2].e.f'' was not found in the tuple
+...
+--
+-- =
+--
+-- Set existing fields.
+t:update({{'=', 'f.a', 150}, {'=', 'g[3][1][2]', 400}})
+---
+- [1, {'b': 200, 'm': true, 'a': 150, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 400], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'=', 'f', {a = 100, b = 200}}})
+---
+- [1, {'a': 100, 'b': 200}, [100, 200, [[300, 350], [400, 450]], {'a': 500, 'b': 600},
+ {'c': 700, 'd': 800}]]
+...
+t:update({{'=', 'g[4].b', 700}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 700}, {'c': 700, 'd': 800}]]
+...
+-- Insert via set.
+t:update({{'=', 'f.e', 300}})
+---
+- [1, {'b': 200, 'm': true, 'g': [800, 900], 'a': 100, 'c': {'d': 400, 'f': [4, 5,
+ 6, 7, 8], 'e': 500, 'g': {'k': 600, 'l': 700}}, 'e': 300}, [100, 200, [[300,
+ 350], [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'=', 'f.g[3]', 1000}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900, 1000]}, [100, 200, [[300, 350],
+ [400, 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'=', 'f.g[1]', 0}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [0, 900]}, [100, 200, [[300, 350], [400, 450]],
+ {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Test empty array/map.
+t4_array:update({{'=', '[4][1]', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}], [100]]
+...
+t4_map:update({{'=', '[4]["a"]', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}], {'a': 100}]
+...
+-- Test errors.
+t:update({{'=', 'f.a[1]', 100}})
+---
+- error: 'Field ''f.a[1]'' UPDATE error: can not access by index a non-array field'
+...
+t:update({{'=', 'f.a.k', 100}})
+---
+- error: 'Field ''f.a.k'' UPDATE error: can not access by key a non-map field'
+...
+t:update({{'=', 'f.c.f[1]', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [100, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'=', 'f.c.f[100]', 100}})
+---
+- error: Field ''f.c.f[100]'' was not found in the tuple
+...
+t:update({{'=', '[2].c.f 1 1 1 1', 100}})
+---
+- error: 'Field ''[2].c.f 1 1 1 1'' UPDATE error: invalid JSON in position 8'
+...
+--
+-- #
+--
+t:update({{'#', '[2].b', 1}})
+---
+- [1, {'a': 100, 'm': true, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500, 'g': {
+ 'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400, 450]],
+ {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'#', 'f.c.f[1]', 1}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'#', 'f.c.f[1]', 2}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [6, 7, 8], 'e': 500, 'g': {
+ 'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400, 450]],
+ {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'#', 'f.c.f[1]', 100}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [], 'e': 500, 'g': {'k': 600,
+ 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400, 450]], {'a': 500,
+ 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'#', 'f.c.f[5]', 1}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'#', 'f.c.f[5]', 2}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+-- Test errors.
+t:update({{'#', 'f.h', 1}})
+---
+- error: Field ''f.h'' was not found in the tuple
+...
+t:update({{'#', 'f.c.f[100]', 1}})
+---
+- error: Field ''f.c.f[100]'' was not found in the tuple
+...
+t:update({{'#', 'f.b', 2}})
+---
+- error: 'Field ''f.b'' UPDATE error: can delete only 1 field from a map in a row'
+...
+t:update({{'#', 'f.b', 0}})
+---
+- error: 'Field ''f.b'' UPDATE error: cannot delete 0 fields'
+...
+t:update({{'#', 'f', 0}})
+---
+- error: 'Field ''f'' UPDATE error: cannot delete 0 fields'
+...
+--
+-- Scalar operations.
+--
+t:update({{'+', 'f.a', 50}})
+---
+- [1, {'b': 200, 'm': true, 'a': 150, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'-', 'f.c.f[1]', 0.5}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [3.5, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t:update({{'&', 'f.c.f[2]', 4}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 4, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+t2 = t:update({{'=', 4, {str = 'abcd'}}})
+---
+...
+t2:update({{':', '[4].str', 2, 2, 'e'}})
+---
+- [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [4, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[300, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}], {'str': 'aed'}]
+...
+-- Test errors.
+t:update({{'+', 'g[3]', 50}})
+---
+- error: 'Argument type in operation ''+'' on field ''g[3]'' does not match field
+ type: expected a number'
+...
+t:update({{'+', '[2].b.......', 100}})
+---
+- error: 'Field ''[2].b.......'' UPDATE error: invalid JSON in position 7'
+...
+t:update({{'+', '[2].b.c.d.e', 100}})
+---
+- error: Field ''[2].b.c.d.e'' was not found in the tuple
+...
+t:update({{'-', '[2][*]', 20}})
+---
+- error: 'Field ''[2][*]'' UPDATE error: invalid JSON in position 5'
+...
+-- Vinyl normalizes field numbers. It should not touch paths,
+-- and they should not affect squashing.
+format = {}
+---
+...
+format[1] = {'field1', 'unsigned'}
+---
+...
+format[2] = {'field2', 'any'}
+---
+...
+vy_s = box.schema.create_space('test2', {engine = 'vinyl', format = format})
+---
+...
+pk = vy_s:create_index('pk')
+---
+...
+_ = vy_s:replace(t)
+---
+...
+box.begin()
+---
+...
+-- Use a scalar operation, only they can be squashed.
+vy_s:upsert({1, 1}, {{'+', 'field2.c.f[1]', 1}})
+---
+...
+vy_s:upsert({1, 1}, {{'+', '[3][3][1][1]', 1}})
+---
+...
+box.commit()
+---
+...
+vy_s:select()
+---
+- - [1, {'b': 200, 'm': true, 'a': 100, 'c': {'d': 400, 'f': [5, 5, 6, 7, 8], 'e': 500,
+ 'g': {'k': 600, 'l': 700}}, 'g': [800, 900]}, [100, 200, [[301, 350], [400,
+ 450]], {'a': 500, 'b': 600}, {'c': 700, 'd': 800}]]
+...
+vy_s:drop()
+---
+...
+s:drop()
+---
+...
diff --git a/test/box/update.test.lua b/test/box/update.test.lua
index ac7698ce9..60e669d27 100644
--- a/test/box/update.test.lua
+++ b/test/box/update.test.lua
@@ -280,3 +280,148 @@ t:update({{'=', 3, map}})
s:update(1, {{'=', 3, map}})
s:drop()
+
+--
+-- gh-1261: update by JSON path.
+--
+format = {}
+format[1] = {'field1', 'unsigned'}
+format[2] = {'f', 'map'}
+format[3] = {'g', 'array'}
+s = box.schema.create_space('test', {format = format})
+pk = s:create_index('pk')
+t = {}
+t[1] = 1
+t[2] = { \
+ a = 100, \
+ b = 200, \
+ c = { \
+ d = 400, \
+ e = 500, \
+ f = {4, 5, 6, 7, 8}, \
+ g = {k = 600, l = 700} \
+ }, \
+ m = true, \
+ g = {800, 900} \
+}; \
+t[3] = { \
+ 100, \
+ 200, \
+ { \
+ {300, 350}, \
+ {400, 450} \
+ }, \
+ {a = 500, b = 600}, \
+ {c = 700, d = 800} \
+}
+t = s:insert(t)
+
+t4_array = t:update({{'!', 4, setmetatable({}, {__serialize = 'array'})}})
+t4_map = t:update({{'!', 4, setmetatable({}, {__serialize = 'map'})}})
+
+t
+--
+-- At first, test simple non-intersected paths.
+--
+
+--
+-- !
+--
+t:update({{'!', 'f.c.f[1]', 3}, {'!', '[3][1]', {100, 200, 300}}})
+t:update({{'!', 'f.g[3]', 1000}})
+t:update({{'!', 'g[6]', 'new element'}})
+t:update({{'!', 'f.e', 300}, {'!', 'g[4].c', 700}})
+t:update({{'!', 'f.c.f[2]', 4.5}, {'!', 'g[3][2][2]', 425}})
+t2 = t:update({{'!', 'g[6]', {100}}})
+-- Test single element array update.
+t2:update({{'!', 'g[6][2]', 200}})
+t2:update({{'!', 'g[6][1]', 50}})
+-- Test empty array/map.
+t4_array:update({{'!', '[4][1]', 100}})
+t4_map:update({{'!', '[4].a', 100}})
+-- Test errors.
+t:update({{'!', 'a', 100}}) -- No such field.
+t:update({{'!', 'f.a', 300}}) -- Key already exists.
+t:update({{'!', 'f.c.f[0]', 3.5}}) -- No such index, too small.
+t:update({{'!', 'f.c.f[100]', 100}}) -- No such index, too big.
+t:update({{'!', 'g[4][100]', 700}}) -- Insert index into map.
+t:update({{'!', 'g[1][1]', 300}})
+t:update({{'!', 'f.g.a', 700}}) -- Insert key into array.
+t:update({{'!', 'f.g[1].a', 700}})
+t:update({{'!', 'f[*].k', 20}}) -- 'Any' is not considered valid JSON.
+-- JSON error after the not existing field to insert.
+t:update({{'!', '[2].e.100000', 100}})
+-- Correct JSON, but next to last field does not exist. '!' can't
+-- create the whole path.
+t:update({{'!', '[2].e.f', 100}})
+
+--
+-- =
+--
+-- Set existing fields.
+t:update({{'=', 'f.a', 150}, {'=', 'g[3][1][2]', 400}})
+t:update({{'=', 'f', {a = 100, b = 200}}})
+t:update({{'=', 'g[4].b', 700}})
+-- Insert via set.
+t:update({{'=', 'f.e', 300}})
+t:update({{'=', 'f.g[3]', 1000}})
+t:update({{'=', 'f.g[1]', 0}})
+-- Test empty array/map.
+t4_array:update({{'=', '[4][1]', 100}})
+t4_map:update({{'=', '[4]["a"]', 100}})
+-- Test errors.
+t:update({{'=', 'f.a[1]', 100}})
+t:update({{'=', 'f.a.k', 100}})
+t:update({{'=', 'f.c.f[1]', 100}})
+t:update({{'=', 'f.c.f[100]', 100}})
+t:update({{'=', '[2].c.f 1 1 1 1', 100}})
+
+--
+-- #
+--
+t:update({{'#', '[2].b', 1}})
+t:update({{'#', 'f.c.f[1]', 1}})
+t:update({{'#', 'f.c.f[1]', 2}})
+t:update({{'#', 'f.c.f[1]', 100}})
+t:update({{'#', 'f.c.f[5]', 1}})
+t:update({{'#', 'f.c.f[5]', 2}})
+-- Test errors.
+t:update({{'#', 'f.h', 1}})
+t:update({{'#', 'f.c.f[100]', 1}})
+t:update({{'#', 'f.b', 2}})
+t:update({{'#', 'f.b', 0}})
+t:update({{'#', 'f', 0}})
+
+--
+-- Scalar operations.
+--
+t:update({{'+', 'f.a', 50}})
+t:update({{'-', 'f.c.f[1]', 0.5}})
+t:update({{'&', 'f.c.f[2]', 4}})
+t2 = t:update({{'=', 4, {str = 'abcd'}}})
+t2:update({{':', '[4].str', 2, 2, 'e'}})
+-- Test errors.
+t:update({{'+', 'g[3]', 50}})
+t:update({{'+', '[2].b.......', 100}})
+t:update({{'+', '[2].b.c.d.e', 100}})
+t:update({{'-', '[2][*]', 20}})
+
+-- Vinyl normalizes field numbers. It should not touch paths,
+-- and they should not affect squashing.
+format = {}
+format[1] = {'field1', 'unsigned'}
+format[2] = {'field2', 'any'}
+vy_s = box.schema.create_space('test2', {engine = 'vinyl', format = format})
+pk = vy_s:create_index('pk')
+_ = vy_s:replace(t)
+
+box.begin()
+-- Use a scalar operation, only they can be squashed.
+vy_s:upsert({1, 1}, {{'+', 'field2.c.f[1]', 1}})
+vy_s:upsert({1, 1}, {{'+', '[3][3][1][1]', 1}})
+box.commit()
+
+vy_s:select()
+vy_s:drop()
+
+s:drop()
diff --git a/test/engine/update.result b/test/engine/update.result
index f181924f3..ddb13bd5b 100644
--- a/test/engine/update.result
+++ b/test/engine/update.result
@@ -843,11 +843,6 @@ t:update({{'+', '[1]', 50}})
---
- [1, [10, 11, 12], {'b': 21, 'a': 20, 'c': 22}, 'abcdefgh', true, -100, 250]
...
--- JSON paths are not allowed yet.
-t:update({{'=', 'field2[1]', 13}})
----
-- error: Field 'field2[1]' was not found in the tuple
-...
s:update({1}, {{'=', 'field3', {d = 30, e = 31, f = 32}}})
---
- [1, [10, 11, 12], {'d': 30, 'f': 32, 'e': 31}, 'abcdefgh', true, -100, 200]
diff --git a/test/engine/update.test.lua b/test/engine/update.test.lua
index 4ca2589e4..31fca2b7b 100644
--- a/test/engine/update.test.lua
+++ b/test/engine/update.test.lua
@@ -156,8 +156,6 @@ t:update({{':', 'field4', 3, 3, 'bbccdd'}, {'+', 'field6', 50}, {'!', 7, 300}})
-- Any path is interpreted as a field name first. And only then
-- as JSON.
t:update({{'+', '[1]', 50}})
--- JSON paths are not allowed yet.
-t:update({{'=', 'field2[1]', 13}})
s:update({1}, {{'=', 'field3', {d = 30, e = 31, f = 32}}})
diff --git a/test/unit/column_mask.c b/test/unit/column_mask.c
index 3beef5ce0..8401a4f7f 100644
--- a/test/unit/column_mask.c
+++ b/test/unit/column_mask.c
@@ -225,16 +225,87 @@ basic_test()
column_masks[i]);
}
+static void
+test_paths(void)
+{
+ header();
+ plan(2);
+
+ char buffer1[1024];
+ char *pos1 = mp_encode_array(buffer1, 7);
+
+ pos1 = mp_encode_uint(pos1, 1);
+ pos1 = mp_encode_uint(pos1, 2);
+ pos1 = mp_encode_array(pos1, 2);
+ pos1 = mp_encode_uint(pos1, 3);
+ pos1 = mp_encode_uint(pos1, 4);
+ pos1 = mp_encode_uint(pos1, 5);
+ pos1 = mp_encode_array(pos1, 2);
+ pos1 = mp_encode_uint(pos1, 6);
+ pos1 = mp_encode_uint(pos1, 7);
+ pos1 = mp_encode_uint(pos1, 8);
+ pos1 = mp_encode_uint(pos1, 9);
+
+
+ char buffer2[1024];
+ char *pos2 = mp_encode_array(buffer2, 2);
+
+ pos2 = mp_encode_array(pos2, 3);
+ pos2 = mp_encode_str(pos2, "!", 1);
+ pos2 = mp_encode_str(pos2, "[3][1]", 6);
+ pos2 = mp_encode_double(pos2, 2.5);
+
+ pos2 = mp_encode_array(pos2, 3);
+ pos2 = mp_encode_str(pos2, "#", 1);
+ pos2 = mp_encode_str(pos2, "[5][1]", 6);
+ pos2 = mp_encode_uint(pos2, 1);
+
+ struct region *gc = &fiber()->gc;
+ size_t svp = region_used(gc);
+ uint32_t result_size;
+ uint64_t column_mask;
+ const char *result =
+ xrow_update_execute(buffer2, pos2, buffer1, pos1,
+ box_tuple_format_default()->dict,
+ &result_size, 1, &column_mask);
+ isnt(result, NULL, "JSON update works");
+
+ /*
+ * Updates on their first level change fields [3] and [5],
+ * or 2 and 4 if 0-based. If that was the single level,
+ * the operations '!' and '#' would change the all the
+ * fields from 2. But each of these operations are not for
+ * the root and therefore does not affect anything except
+ * [3] and [5] on the first level.
+ */
+ uint64_t expected_mask = 0;
+ column_mask_set_fieldno(&expected_mask, 2);
+ column_mask_set_fieldno(&expected_mask, 4);
+ is(column_mask, expected_mask, "column mask match");
+
+ region_truncate(gc, svp);
+
+ check_plan();
+ footer();
+}
+
+static uint32_t
+simple_hash(const char* str, uint32_t len)
+{
+ return str[0] + len;
+}
+
int
main()
{
memory_init();
fiber_init(fiber_c_invoke);
- tuple_init(NULL);
+ tuple_init(simple_hash);
header();
- plan(27);
+ plan(28);
basic_test();
+ test_paths();
footer();
check_plan();
diff --git a/test/unit/column_mask.result b/test/unit/column_mask.result
index 9309e6cdc..1d87a2f24 100644
--- a/test/unit/column_mask.result
+++ b/test/unit/column_mask.result
@@ -1,5 +1,5 @@
*** main ***
-1..27
+1..28
ok 1 - check result length
ok 2 - tuple update is correct
ok 3 - column_mask is correct
@@ -27,4 +27,10 @@ ok 24 - column_mask is correct
ok 25 - check result length
ok 26 - tuple update is correct
ok 27 - column_mask is correct
+ *** test_paths ***
+ 1..2
+ ok 1 - JSON update works
+ ok 2 - column mask match
+ok 28 - subtests
+ *** test_paths: done ***
*** main: done ***
--
2.21.0 (Apple Git-122.2)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
2019-11-21 23:19 [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates Vladislav Shpilevoy
@ 2019-12-09 20:18 ` Sergey Ostanevich
2019-12-13 23:34 ` Vladislav Shpilevoy
2019-12-19 8:34 ` Kirill Yukhin
1 sibling, 1 reply; 5+ messages in thread
From: Sergey Ostanevich @ 2019-12-09 20:18 UTC (permalink / raw)
To: Vladislav Shpilevoy; +Cc: tarantool-patches
Hi!
Thanks for the patch!
Please, see 4 comments below.
Regards,
Sergos
> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
> index 767e40006..15a136f81 100644
> --- a/src/box/vinyl.c
> +++ b/src/box/vinyl.c
> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
> ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>
> int field_no;
> - if (mp_typeof(*pos) == MP_INT) {
> + const char *field_name;
> + switch (mp_typeof(*pos)) {
> + case MP_INT:
> field_no = mp_decode_int(&pos);
> ops_end = mp_encode_int(ops_end, field_no);
> - } else {
> + break;
> + case MP_UINT:
> field_no = mp_decode_uint(&pos);
> field_no -= request->index_base;
> ops_end = mp_encode_uint(ops_end, field_no);
> + break;
> + case MP_STR:
> + field_name = pos;
> + mp_next(&pos);
> + memcpy(ops_end, field_name, pos - field_name);
> + ops_end += pos - field_name;
> + break;
Are you intentionally skip encode/decode here? Is it safe enough?
> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
> index 123db081a..ecda17544 100644
> --- a/src/box/xrow_update.c
> +++ b/src/box/xrow_update.c
> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
> if (op->opcode != '+' && op->opcode != '-' &&
> op->opcode != '=')
> return NULL;
> + /*
> + * Not terminal operation means, that the
> + * update is not flat, and squash would
> + * need to build a tree of operations to
> + * find matches. That is too complex,
> + * squash is skipped.
> + */
> + if (! xrow_update_op_is_term(op))
> + return NULL;
Please, no space after unary op - and further on in the patch -
according to $3.1 of
https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
> new file mode 100644
> index 000000000..737673e8a
> --- /dev/null
> +++ b/src/box/xrow_update_bar.c
> @@ -0,0 +1,454 @@
> +/*
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the
> + * following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +#include "xrow_update_field.h"
> +#include "tuple.h"
> +
> +/**
> + * Locate a field to update by @a op's JSON path and initialize
> + * @a field as a bar update.
> + *
> + * @param op Update operation.
> + * @param field Field to locate in.
> + * @param[out] key_len_or_index One parameter for two values,
> + * depending on where the target point is located: in an
> + * array or a map. In case of map it is size of a key
> + * before the found point. It is used to find range of the
> + * both key and value in '#' operation to drop the pair.
> + * In case of array it is index of the array element to be
> + * able to check how many fields are left for deletion.
> + *
> + * @retval 0 Success.
> + * @retval -1 Not found or invalid JSON.
> + */
> +static inline int
> +xrow_update_bar_locate(struct xrow_update_op *op,
> + struct xrow_update_field *field,
> + int *key_len_or_index)
> +{
> + /*
> + * Bar update is not flat by definition. It always has a
> + * non empty path. This is why op is expected to be not
> + * terminal.
> + */
> + assert(! xrow_update_op_is_term(op));
> + int rc;
> + field->type = XUPDATE_BAR;
> + field->bar.op = op;
> + field->bar.path = op->lexer.src + op->lexer.offset;
> + field->bar.path_len = op->lexer.src_len - op->lexer.offset;
> + const char *pos = field->data;
> + struct json_token token;
> + while ((rc = json_lexer_next_token(&op->lexer, &token)) == 0 &&
> + token.type != JSON_TOKEN_END) {
> +
> + switch (token.type) {
> + case JSON_TOKEN_NUM:
> + field->bar.parent = pos;
> + *key_len_or_index = token.num;
> + rc = tuple_field_go_to_index(&pos, token.num);
> + break;
> + case JSON_TOKEN_STR:
> + field->bar.parent = pos;
> + *key_len_or_index = token.len;
> + rc = tuple_field_go_to_key(&pos, token.str, token.len);
> + break;
> + default:
> + assert(token.type == JSON_TOKEN_ANY);
> + rc = op->lexer.symbol_count - 1;
> + return xrow_update_err_bad_json(op, rc);
> + }
> + if (rc != 0)
> + return xrow_update_err_no_such_field(op);
> + }
> + if (rc > 0)
> + return xrow_update_err_bad_json(op, rc);
> +
> + field->bar.point = pos;
> + mp_next(&pos);
> + field->bar.point_size = pos - field->bar.point;
> + return 0;
> +}
> +
> +/**
> + * Locate an optional field to update by @a op's JSON path. If
> + * found or only a last path part is not found, initialize @a
> + * field as a bar update. Last path part may not exist and it is
> + * ok, for example, for '!' and '=' operations.
> + */
> +static inline int
> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
> + struct xrow_update_field *field, bool *is_found,
> + int *key_len_or_index)
> +{
> + /*
> + * Bar update is not flat by definition. It always has a
> + * non empty path. This is why op is expected to be not
> + * terminal.
> + */
> + assert(! xrow_update_op_is_term(op));
> + int rc;
> + field->type = XUPDATE_BAR;
> + field->bar.op = op;
> + field->bar.path = op->lexer.src + op->lexer.offset;
> + field->bar.path_len = op->lexer.src_len - op->lexer.offset;
> + const char *pos = field->data;
> + struct json_token token;
> + do {
> + rc = json_lexer_next_token(&op->lexer, &token);
> + if (rc != 0)
> + return xrow_update_err_bad_json(op, rc);
> +
> + switch (token.type) {
> + case JSON_TOKEN_END:
> + *is_found = true;
> + field->bar.point = pos;
> + mp_next(&pos);
> + field->bar.point_size = pos - field->bar.point;
> + return 0;
> + case JSON_TOKEN_NUM:
> + field->bar.parent = pos;
> + *key_len_or_index = token.num;
> + rc = tuple_field_go_to_index(&pos, token.num);
> + break;
> + case JSON_TOKEN_STR:
> + field->bar.parent = pos;
> + *key_len_or_index = token.len;
> + rc = tuple_field_go_to_key(&pos, token.str, token.len);
> + break;
> + default:
> + assert(token.type == JSON_TOKEN_ANY);
> + rc = op->lexer.symbol_count - 1;
> + return xrow_update_err_bad_json(op, rc);
> + }
> + } while (rc == 0);
> + assert(rc == -1);
> + /* Ensure, that 'token' is next to last path part. */
> + struct json_token tmp_token;
> + rc = json_lexer_next_token(&op->lexer, &tmp_token);
> + if (rc != 0)
> + return xrow_update_err_bad_json(op, rc);
> + if (tmp_token.type != JSON_TOKEN_END)
> + return xrow_update_err_no_such_field(op);
> +
> + *is_found = false;
> + if (token.type == JSON_TOKEN_NUM) {
> + const char *tmp = field->bar.parent;
> + if (mp_typeof(*tmp) != MP_ARRAY) {
> + return xrow_update_err(op, "can not access by index a "\
> + "non-array field");
> + }
> + uint32_t size = mp_decode_array(&tmp);
> + if ((uint32_t) token.num > size)
> + return xrow_update_err_no_such_field(op);
IMHO there will be more informative to mention the array and an index
that is out of bounds.
> + /*
> + * The updated point is in an array, its position
> + * was not found, and its index is <= size. The
> + * only way how can that happen - the update tries
> + * to append a new array element. The following
> + * code tries to find the array's end.
> + */
> + assert((uint32_t) token.num == size);
> + if (field->bar.parent == field->data) {
> + /*
> + * Optimization for the case when the path
> + * is short. So parent of the updated
> + * point is the field itself. It allows
> + * not to decode anything at all. It is
> + * worth doing, since the paths are
> + * usually short.
> + */
> + field->bar.point = field->data + field->size;
> + } else {
> + field->bar.point = field->bar.parent;
> + mp_next(&field->bar.point);
> + }
> + field->bar.point_size = 0;
> + } else {
> + assert(token.type == JSON_TOKEN_STR);
> + field->bar.new_key = token.str;
> + field->bar.new_key_len = token.len;
> + if (mp_typeof(*field->bar.parent) != MP_MAP) {
> + return xrow_update_err(op, "can not access by key a "\
> + "non-map field");
> + }
> + }
> + return 0;
> +}
> +
> +/**
> + * Nop fields are those which are not updated. And when they
> + * receive an update via one of xrow_update_op_do_nop_* functions,
> + * it means, that there is a non terminal path digging inside this
> + * not updated field. It turns nop field into a bar field. How
> + * exactly - depends on a concrete operation.
> + */
> +
> +int
> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
> + struct xrow_update_field *field)
> +{
> + assert(op->opcode == '!');
> + assert(field->type == XUPDATE_NOP);
Hmm. Do you expect to handle all possible cases in debug mode?
Otherwise here should be some sort of gaceful fail to make prod more
stable.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
2019-12-09 20:18 ` Sergey Ostanevich
@ 2019-12-13 23:34 ` Vladislav Shpilevoy
2019-12-14 9:20 ` Sergey Ostanevich
0 siblings, 1 reply; 5+ messages in thread
From: Vladislav Shpilevoy @ 2019-12-13 23:34 UTC (permalink / raw)
To: Sergey Ostanevich; +Cc: tarantool-patches
Hi! Thanks for the review!
On 09/12/2019 21:18, Sergey Ostanevich wrote:
> Hi!
>
> Thanks for the patch!
> Please, see 4 comments below.
>
> Regards,
> Sergos
>
>
>> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
>> index 767e40006..15a136f81 100644
>> --- a/src/box/vinyl.c
>> +++ b/src/box/vinyl.c
>> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
>> ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>>
>> int field_no;
>> - if (mp_typeof(*pos) == MP_INT) {
>> + const char *field_name;
>> + switch (mp_typeof(*pos)) {
>> + case MP_INT:
>> field_no = mp_decode_int(&pos);
>> ops_end = mp_encode_int(ops_end, field_no);
>> - } else {
>> + break;
>> + case MP_UINT:
>> field_no = mp_decode_uint(&pos);
>> field_no -= request->index_base;
>> ops_end = mp_encode_uint(ops_end, field_no);
>> + break;
>> + case MP_STR:
>> + field_name = pos;
>> + mp_next(&pos);
>> + memcpy(ops_end, field_name, pos - field_name);
>> + ops_end += pos - field_name;
>> + break;
>
> Are you intentionally skip encode/decode here? Is it safe enough?
mp_next() does decode. Memcpy is the same as encode here. Just without
'if's. It is safe.
>> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>> index 123db081a..ecda17544 100644
>> --- a/src/box/xrow_update.c
>> +++ b/src/box/xrow_update.c
>> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
>> if (op->opcode != '+' && op->opcode != '-' &&
>> op->opcode != '=')
>> return NULL;
>> + /*
>> + * Not terminal operation means, that the
>> + * update is not flat, and squash would
>> + * need to build a tree of operations to
>> + * find matches. That is too complex,
>> + * squash is skipped.
>> + */
>> + if (! xrow_update_op_is_term(op))
>> + return NULL;
>
> Please, no space after unary op - and further on in the patch -
> according to $3.1 of
> https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
>
Ok, here it is:
==============================================================================
diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
index ecda17544..0cee50017 100644
--- a/src/box/xrow_update.c
+++ b/src/box/xrow_update.c
@@ -441,7 +441,7 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
* find matches. That is too complex,
* squash is skipped.
*/
- if (! xrow_update_op_is_term(op))
+ if (!xrow_update_op_is_term(op))
return NULL;
if (op->field_no <= prev_field_no)
return NULL;
diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
index 1cc49f861..57427e39c 100644
--- a/src/box/xrow_update_array.c
+++ b/src/box/xrow_update_array.c
@@ -221,7 +221,7 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
{
assert(field->type == XUPDATE_ARRAY);
struct xrow_update_array_item *item;
- if (! xrow_update_op_is_term(op)) {
+ if (!xrow_update_op_is_term(op)) {
item = xrow_update_array_extract_item(field, op);
if (item == NULL)
return -1;
@@ -256,7 +256,7 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
xrow_update_array_extract_item(field, op);
if (item == NULL)
return -1;
- if (! xrow_update_op_is_term(op))
+ if (!xrow_update_op_is_term(op))
return xrow_update_op_do_field_set(op, &item->field);
op->new_field_len = op->arg.set.length;
/*
@@ -275,7 +275,7 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
struct xrow_update_field *field)
{
assert(field->type == XUPDATE_ARRAY);
- if (! xrow_update_op_is_term(op)) {
+ if (!xrow_update_op_is_term(op)) {
struct xrow_update_array_item *item =
xrow_update_array_extract_item(field, op);
if (item == NULL)
@@ -305,7 +305,7 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op, \
xrow_update_array_extract_item(field, op); \
if (item == NULL) \
return -1; \
- if (! xrow_update_op_is_term(op)) \
+ if (!xrow_update_op_is_term(op)) \
return xrow_update_op_do_field_##op_type(op, &item->field); \
if (item->field.type != XUPDATE_NOP) \
return xrow_update_err_double(op); \
diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
index 737673e8a..7285c7e2b 100644
--- a/src/box/xrow_update_bar.c
+++ b/src/box/xrow_update_bar.c
@@ -58,7 +58,7 @@ xrow_update_bar_locate(struct xrow_update_op *op,
* non empty path. This is why op is expected to be not
* terminal.
*/
- assert(! xrow_update_op_is_term(op));
+ assert(!xrow_update_op_is_term(op));
int rc;
field->type = XUPDATE_BAR;
field->bar.op = op;
@@ -113,7 +113,7 @@ xrow_update_bar_locate_opt(struct xrow_update_op *op,
* non empty path. This is why op is expected to be not
* terminal.
*/
- assert(! xrow_update_op_is_term(op));
+ assert(!xrow_update_op_is_term(op));
int rc;
field->type = XUPDATE_BAR;
field->bar.op = op;
@@ -245,7 +245,7 @@ xrow_update_op_do_nop_set(struct xrow_update_op *op,
if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
return -1;
op->new_field_len = op->arg.set.length;
- if (! is_found) {
+ if (!is_found) {
op->opcode = '!';
if (mp_typeof(*field->bar.parent) == MP_MAP)
op->new_field_len += mp_sizeof_str(key_len);
==============================================================================
>> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>> new file mode 100644
>> index 000000000..737673e8a
>> --- /dev/null
>> +++ b/src/box/xrow_update_bar.c
>> +static inline int
>> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
>> + struct xrow_update_field *field, bool *is_found,
>> + int *key_len_or_index)
>> +{
>> + /*
>> + * Bar update is not flat by definition. It always has a
>> + * non empty path. This is why op is expected to be not
>> + * terminal.
>> + */
>> + assert(! xrow_update_op_is_term(op));
>> + int rc;
>> + field->type = XUPDATE_BAR;
>> + field->bar.op = op;
>> + field->bar.path = op->lexer.src + op->lexer.offset;
>> + field->bar.path_len = op->lexer.src_len - op->lexer.offset;
>> + const char *pos = field->data;
>> + struct json_token token;
>> + do {
>> + rc = json_lexer_next_token(&op->lexer, &token);
>> + if (rc != 0)
>> + return xrow_update_err_bad_json(op, rc);
>> +
>> + switch (token.type) {
>> + case JSON_TOKEN_END:
>> + *is_found = true;
>> + field->bar.point = pos;
>> + mp_next(&pos);
>> + field->bar.point_size = pos - field->bar.point;
>> + return 0;
>> + case JSON_TOKEN_NUM:
>> + field->bar.parent = pos;
>> + *key_len_or_index = token.num;
>> + rc = tuple_field_go_to_index(&pos, token.num);
>> + break;
>> + case JSON_TOKEN_STR:
>> + field->bar.parent = pos;
>> + *key_len_or_index = token.len;
>> + rc = tuple_field_go_to_key(&pos, token.str, token.len);
>> + break;
>> + default:
>> + assert(token.type == JSON_TOKEN_ANY);
>> + rc = op->lexer.symbol_count - 1;
>> + return xrow_update_err_bad_json(op, rc);
>> + }
>> + } while (rc == 0);
>> + assert(rc == -1);
>> + /* Ensure, that 'token' is next to last path part. */
>> + struct json_token tmp_token;
>> + rc = json_lexer_next_token(&op->lexer, &tmp_token);
>> + if (rc != 0)
>> + return xrow_update_err_bad_json(op, rc);
>> + if (tmp_token.type != JSON_TOKEN_END)
>> + return xrow_update_err_no_such_field(op);
>> +
>> + *is_found = false;
>> + if (token.type == JSON_TOKEN_NUM) {
>> + const char *tmp = field->bar.parent;
>> + if (mp_typeof(*tmp) != MP_ARRAY) {
>> + return xrow_update_err(op, "can not access by index a "\
>> + "non-array field");
>> + }
>> + uint32_t size = mp_decode_array(&tmp);
>> + if ((uint32_t) token.num > size)
>> + return xrow_update_err_no_such_field(op);
>
> IMHO there will be more informative to mention the array and an index
> that is out of bounds.
>
Well, xrow_update_err_no_such_field() does it. It adds the whole
path to the error message. So a user will see what a path caused
the problem.
>> + /*
>> + * The updated point is in an array, its position
>> + * was not found, and its index is <= size. The
>> + * only way how can that happen - the update tries
>> + * to append a new array element. The following
>> + * code tries to find the array's end.
>> + */
>> + assert((uint32_t) token.num == size);
>> + if (field->bar.parent == field->data) {
>> + /*
>> + * Optimization for the case when the path
>> + * is short. So parent of the updated
>> + * point is the field itself. It allows
>> + * not to decode anything at all. It is
>> + * worth doing, since the paths are
>> + * usually short.
>> + */
>> + field->bar.point = field->data + field->size;
>> + } else {
>> + field->bar.point = field->bar.parent;
>> + mp_next(&field->bar.point);
>> + }
>> + field->bar.point_size = 0;
>> + } else {
>> + assert(token.type == JSON_TOKEN_STR);
>> + field->bar.new_key = token.str;
>> + field->bar.new_key_len = token.len;
>> + if (mp_typeof(*field->bar.parent) != MP_MAP) {
>> + return xrow_update_err(op, "can not access by key a "\
>> + "non-map field");
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> +/**
>> + * Nop fields are those which are not updated. And when they
>> + * receive an update via one of xrow_update_op_do_nop_* functions,
>> + * it means, that there is a non terminal path digging inside this
>> + * not updated field. It turns nop field into a bar field. How
>> + * exactly - depends on a concrete operation.
>> + */
>> +
>> +int
>> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
>> + struct xrow_update_field *field)
>> +{
>> + assert(op->opcode == '!');
>> + assert(field->type == XUPDATE_NOP);
>
> Hmm. Do you expect to handle all possible cases in debug mode?
> Otherwise here should be some sort of gaceful fail to make prod more
> stable.
>
In Tarantool we usually rely on the tests and debug mode. Concretely
this case I covered with tests on 100% not including OOM (we can't
test malloc failures, basically). This is done to not slowdown release
build with never passing checks such as this. Removal of assertion
checks from release gives very significant perf impact. However in some
places which are not hot paths there is a thing related to what you
mean: https://github.com/tarantool/tarantool/issues/2571
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
2019-12-13 23:34 ` Vladislav Shpilevoy
@ 2019-12-14 9:20 ` Sergey Ostanevich
0 siblings, 0 replies; 5+ messages in thread
From: Sergey Ostanevich @ 2019-12-14 9:20 UTC (permalink / raw)
To: Vladislav Shpilevoy; +Cc: tarantool-patches
[-- Attachment #1: Type: text/plain, Size: 10810 bytes --]
Thanks, LGTM.
Best regards,
Sergos
Saturday, 14 December 2019, 02:34 +0300 from Vladislav Shpilevoy <v.shpilevoy@tarantool.org>:
>Hi! Thanks for the review!
>
>On 09/12/2019 21:18, Sergey Ostanevich wrote:
>> Hi!
>>
>> Thanks for the patch!
>> Please, see 4 comments below.
>>
>> Regards,
>> Sergos
>>
>>
>>> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
>>> index 767e40006..15a136f81 100644
>>> --- a/src/box/vinyl.c
>>> +++ b/src/box/vinyl.c
>>> @@ -2004,13 +2004,25 @@ request_normalize_ops(struct request *request)
>>> ops_end = mp_encode_str(ops_end, op_name, op_name_len);
>>>
>>> int field_no;
>>> - if (mp_typeof(*pos) == MP_INT) {
>>> + const char *field_name;
>>> + switch (mp_typeof(*pos)) {
>>> + case MP_INT:
>>> field_no = mp_decode_int(&pos);
>>> ops_end = mp_encode_int(ops_end, field_no);
>>> - } else {
>>> + break;
>>> + case MP_UINT:
>>> field_no = mp_decode_uint(&pos);
>>> field_no -= request->index_base;
>>> ops_end = mp_encode_uint(ops_end, field_no);
>>> + break;
>>> + case MP_STR:
>>> + field_name = pos;
>>> + mp_next(&pos);
>>> + memcpy(ops_end, field_name, pos - field_name);
>>> + ops_end += pos - field_name;
>>> + break;
>>
>> Are you intentionally skip encode/decode here? Is it safe enough?
>
>mp_next() does decode. Memcpy is the same as encode here. Just without
>'if's. It is safe.
>
>>> diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>>> index 123db081a..ecda17544 100644
>>> --- a/src/box/xrow_update.c
>>> +++ b/src/box/xrow_update.c
>>> @@ -412,6 +434,15 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
>>> if (op->opcode != '+' && op->opcode != '-' &&
>>> op->opcode != '=')
>>> return NULL;
>>> + /*
>>> + * Not terminal operation means, that the
>>> + * update is not flat, and squash would
>>> + * need to build a tree of operations to
>>> + * find matches. That is too complex,
>>> + * squash is skipped.
>>> + */
>>> + if (! xrow_update_op_is_term(op))
>>> + return NULL;
>>
>> Please, no space after unary op - and further on in the patch -
>> according to $3.1 of
>> https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/
>>
>
>Ok, here it is:
>
>==============================================================================
>
>diff --git a/src/box/xrow_update.c b/src/box/xrow_update.c
>index ecda17544..0cee50017 100644
>--- a/src/box/xrow_update.c
>+++ b/src/box/xrow_update.c
>@@ -441,7 +441,7 @@ xrow_upsert_squash(const char *expr1, const char *expr1_end,
> * find matches. That is too complex,
> * squash is skipped.
> */
>- if (! xrow_update_op_is_term(op))
>+ if (!xrow_update_op_is_term(op))
> return NULL;
> if (op->field_no <= prev_field_no)
> return NULL;
>diff --git a/src/box/xrow_update_array.c b/src/box/xrow_update_array.c
>index 1cc49f861..57427e39c 100644
>--- a/src/box/xrow_update_array.c
>+++ b/src/box/xrow_update_array.c
>@@ -221,7 +221,7 @@ xrow_update_op_do_array_insert(struct xrow_update_op *op,
> {
> assert(field->type == XUPDATE_ARRAY);
> struct xrow_update_array_item *item;
>- if (! xrow_update_op_is_term(op)) {
>+ if (!xrow_update_op_is_term(op)) {
> item = xrow_update_array_extract_item(field, op);
> if (item == NULL)
> return -1;
>@@ -256,7 +256,7 @@ xrow_update_op_do_array_set(struct xrow_update_op *op,
> xrow_update_array_extract_item(field, op);
> if (item == NULL)
> return -1;
>- if (! xrow_update_op_is_term(op))
>+ if (!xrow_update_op_is_term(op))
> return xrow_update_op_do_field_set(op, &item->field);
> op->new_field_len = op->arg.set.length;
> /*
>@@ -275,7 +275,7 @@ xrow_update_op_do_array_delete(struct xrow_update_op *op,
> struct xrow_update_field *field)
> {
> assert(field->type == XUPDATE_ARRAY);
>- if (! xrow_update_op_is_term(op)) {
>+ if (!xrow_update_op_is_term(op)) {
> struct xrow_update_array_item *item =
> xrow_update_array_extract_item(field, op);
> if (item == NULL)
>@@ -305,7 +305,7 @@ xrow_update_op_do_array_##op_type(struct xrow_update_op *op, \
> xrow_update_array_extract_item(field, op); \
> if (item == NULL) \
> return -1; \
>- if (! xrow_update_op_is_term(op)) \
>+ if (!xrow_update_op_is_term(op)) \
> return xrow_update_op_do_field_##op_type(op, &item->field); \
> if (item->field.type != XUPDATE_NOP) \
> return xrow_update_err_double(op); \
>diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>index 737673e8a..7285c7e2b 100644
>--- a/src/box/xrow_update_bar.c
>+++ b/src/box/xrow_update_bar.c
>@@ -58,7 +58,7 @@ xrow_update_bar_locate(struct xrow_update_op *op,
> * non empty path. This is why op is expected to be not
> * terminal.
> */
>- assert(! xrow_update_op_is_term(op));
>+ assert(!xrow_update_op_is_term(op));
> int rc;
> field->type = XUPDATE_BAR;
> field->bar.op = op;
>@@ -113,7 +113,7 @@ xrow_update_bar_locate_opt(struct xrow_update_op *op,
> * non empty path. This is why op is expected to be not
> * terminal.
> */
>- assert(! xrow_update_op_is_term(op));
>+ assert(!xrow_update_op_is_term(op));
> int rc;
> field->type = XUPDATE_BAR;
> field->bar.op = op;
>@@ -245,7 +245,7 @@ xrow_update_op_do_nop_set(struct xrow_update_op *op,
> if (xrow_update_bar_locate_opt(op, field, &is_found, &key_len) != 0)
> return -1;
> op->new_field_len = op->arg.set.length;
>- if (! is_found) {
>+ if (!is_found) {
> op->opcode = '!';
> if (mp_typeof(*field->bar.parent) == MP_MAP)
> op->new_field_len += mp_sizeof_str(key_len);
>
>==============================================================================
>
>>> diff --git a/src/box/xrow_update_bar.c b/src/box/xrow_update_bar.c
>>> new file mode 100644
>>> index 000000000..737673e8a
>>> --- /dev/null
>>> +++ b/src/box/xrow_update_bar.c
>>> +static inline int
>>> +xrow_update_bar_locate_opt(struct xrow_update_op *op,
>>> + struct xrow_update_field *field, bool *is_found,
>>> + int *key_len_or_index)
>>> +{
>>> + /*
>>> + * Bar update is not flat by definition. It always has a
>>> + * non empty path. This is why op is expected to be not
>>> + * terminal.
>>> + */
>>> + assert(! xrow_update_op_is_term(op));
>>> + int rc;
>>> + field->type = XUPDATE_BAR;
>>> + field->bar.op = op;
>>> + field->bar.path = op->lexer.src + op->lexer.offset;
>>> + field->bar.path_len = op->lexer.src_len - op->lexer.offset;
>>> + const char *pos = field->data;
>>> + struct json_token token;
>>> + do {
>>> + rc = json_lexer_next_token(&op->lexer, &token);
>>> + if (rc != 0)
>>> + return xrow_update_err_bad_json(op, rc);
>>> +
>>> + switch (token.type) {
>>> + case JSON_TOKEN_END:
>>> + *is_found = true;
>>> + field->bar.point = pos;
>>> + mp_next(&pos);
>>> + field->bar.point_size = pos - field->bar.point;
>>> + return 0;
>>> + case JSON_TOKEN_NUM:
>>> + field->bar.parent = pos;
>>> + *key_len_or_index = token.num;
>>> + rc = tuple_field_go_to_index(&pos, token.num);
>>> + break;
>>> + case JSON_TOKEN_STR:
>>> + field->bar.parent = pos;
>>> + *key_len_or_index = token.len;
>>> + rc = tuple_field_go_to_key(&pos, token.str, token.len);
>>> + break;
>>> + default:
>>> + assert(token.type == JSON_TOKEN_ANY);
>>> + rc = op->lexer.symbol_count - 1;
>>> + return xrow_update_err_bad_json(op, rc);
>>> + }
>>> + } while (rc == 0);
>>> + assert(rc == -1);
>>> + /* Ensure, that 'token' is next to last path part. */
>>> + struct json_token tmp_token;
>>> + rc = json_lexer_next_token(&op->lexer, &tmp_token);
>>> + if (rc != 0)
>>> + return xrow_update_err_bad_json(op, rc);
>>> + if (tmp_token.type != JSON_TOKEN_END)
>>> + return xrow_update_err_no_such_field(op);
>>> +
>>> + *is_found = false;
>>> + if (token.type == JSON_TOKEN_NUM) {
>>> + const char *tmp = field->bar.parent;
>>> + if (mp_typeof(*tmp) != MP_ARRAY) {
>>> + return xrow_update_err(op, "can not access by index a "\
>>> + "non-array field");
>>> + }
>>> + uint32_t size = mp_decode_array(&tmp);
>>> + if ((uint32_t) token.num > size)
>>> + return xrow_update_err_no_such_field(op);
>>
>> IMHO there will be more informative to mention the array and an index
>> that is out of bounds.
>>
>
>Well, xrow_update_err_no_such_field() does it. It adds the whole
>path to the error message. So a user will see what a path caused
>the problem.
>
>>> + /*
>>> + * The updated point is in an array, its position
>>> + * was not found, and its index is <= size. The
>>> + * only way how can that happen - the update tries
>>> + * to append a new array element. The following
>>> + * code tries to find the array's end.
>>> + */
>>> + assert((uint32_t) token.num == size);
>>> + if (field->bar.parent == field->data) {
>>> + /*
>>> + * Optimization for the case when the path
>>> + * is short. So parent of the updated
>>> + * point is the field itself. It allows
>>> + * not to decode anything at all. It is
>>> + * worth doing, since the paths are
>>> + * usually short.
>>> + */
>>> + field->bar.point = field->data + field->size;
>>> + } else {
>>> + field->bar.point = field->bar.parent;
>>> + mp_next(&field->bar.point);
>>> + }
>>> + field->bar.point_size = 0;
>>> + } else {
>>> + assert(token.type == JSON_TOKEN_STR);
>>> + field->bar.new_key = token.str;
>>> + field->bar.new_key_len = token.len;
>>> + if (mp_typeof(*field->bar.parent) != MP_MAP) {
>>> + return xrow_update_err(op, "can not access by key a "\
>>> + "non-map field");
>>> + }
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +/**
>>> + * Nop fields are those which are not updated. And when they
>>> + * receive an update via one of xrow_update_op_do_nop_* functions,
>>> + * it means, that there is a non terminal path digging inside this
>>> + * not updated field. It turns nop field into a bar field. How
>>> + * exactly - depends on a concrete operation.
>>> + */
>>> +
>>> +int
>>> +xrow_update_op_do_nop_insert(struct xrow_update_op *op,
>>> + struct xrow_update_field *field)
>>> +{
>>> + assert(op->opcode == '!');
>>> + assert(field->type == XUPDATE_NOP);
>>
>> Hmm. Do you expect to handle all possible cases in debug mode?
>> Otherwise here should be some sort of gaceful fail to make prod more
>> stable.
>>
>
>In Tarantool we usually rely on the tests and debug mode. Concretely
>this case I covered with tests on 100% not including OOM (we can't
>test malloc failures, basically). This is done to not slowdown release
>build with never passing checks such as this. Removal of assertion
>checks from release gives very significant perf impact. However in some
>places which are not hot paths there is a thing related to what you
>mean: https://github.com/tarantool/tarantool/issues/2571
[-- Attachment #2: Type: text/html, Size: 24674 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates
2019-11-21 23:19 [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates Vladislav Shpilevoy
2019-12-09 20:18 ` Sergey Ostanevich
@ 2019-12-19 8:34 ` Kirill Yukhin
1 sibling, 0 replies; 5+ messages in thread
From: Kirill Yukhin @ 2019-12-19 8:34 UTC (permalink / raw)
To: Vladislav Shpilevoy; +Cc: tarantool-patches
Hello,
On 22 ноя 00:19, Vladislav Shpilevoy wrote:
> Isolated tuple update is an update by JSON path, which hasn't a
> common prefix with any other JSON update operation in the same
> set. For example, these JSON update operations are isolated:
>
> {'=', '[1][2][3]', 100},
> {'+', '[2].b.c', 200}
>
> Their JSON paths has no a common prefix. But these operations are
> not isolated:
>
> {'=', '[1][2][3]', 100},
> {'+', '[1].b.c', 200}
>
> They have a common prefix '[1]'.
>
> Isolated updates are a first part of fully functional JSON
> updates. Their feature is that their implementation is relatively
> simple and lightweight - an isolated JSON update does not store
> each part of the JSON path as a separate object. Isolated update
> stores just string with JSON and pointer to the MessagePack
> object to update.
>
> Such isolated updates are called 'bar update'. They are a basic
> brick of more complex JSON updates.
>
> Part of #1261
> ---
> Branch: https://github.com/tarantool/tarantool/tree/gerold103/gh-1261-json-bar-update
> Issue: https://github.com/tarantool/tarantool/issues/1261
The patch LGTM. I've checked it into master.
--
Regards, Kirill Yukhin
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2019-12-19 8:34 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-21 23:19 [Tarantool-patches] [PATCH 1/1] tuple: enable isolated JSON updates Vladislav Shpilevoy
2019-12-09 20:18 ` Sergey Ostanevich
2019-12-13 23:34 ` Vladislav Shpilevoy
2019-12-14 9:20 ` Sergey Ostanevich
2019-12-19 8:34 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox