From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Subject: [tarantool-patches] [PATCH v2 2/8] tuple: rework updates to improve code extendibility Date: Sat, 31 Aug 2019 23:35:52 +0200 [thread overview] Message-ID: <40775b37b9067dc2bace5b97d4694d0d869dd035.1567287197.git.v.shpilevoy@tarantool.org> (raw) In-Reply-To: <cover.1567287197.git.v.shpilevoy@tarantool.org> Before the patch update was implemented as a set of operations applicable for arrays only. It was ok until field names and JSON paths appearance, because tuple is array on the top level. But now there are four reasons to allow more complex updates of tuple field internals via JSON paths: * tuple field access by JSON path is allowed so for consistency JSON paths should be allowed in updates as well; * JSON indexes are supported. JSON update should be able to change an indexed field without rewriting half of a tuple; * Tarantool is going to support documents in storage so JSON path updates is one more step forward; * JSON updates are going to be faster than get + in-memory Lua/connector update + replace (or update of a whole tuple field). The patch prepares current update code to be extended by updates of map, so named 'bar' updates, and multilevel updates. The concept is to build a tree of update objects. Each updates a scalar terminal value (leaf of that tree), or a complex object like array or map. In the latter case these objects contain >= 2 children updates. This organization allows to split update implementation into several independent files-modules for each update type. Next commits will introduce them one by one. Needed for #1261 --- src/box/CMakeLists.txt | 2 + src/box/errcode.h | 10 +- src/box/tuple_update.c | 1125 ++------------------------------- src/box/update/update_array.c | 288 +++++++++ src/box/update/update_field.c | 649 +++++++++++++++++++ src/box/update/update_field.h | 410 ++++++++++++ test/box/misc.result | 2 +- test/box/tuple.result | 4 +- 8 files changed, 1402 insertions(+), 1088 deletions(-) create mode 100644 src/box/update/update_array.c create mode 100644 src/box/update/update_field.c create mode 100644 src/box/update/update_field.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 9d2fcea4b..62737aafe 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(tuple STATIC field_map.c tuple_format.c tuple_update.c + update/update_field.c + update/update_array.c tuple_compare.cc tuple_extract_key.cc tuple_hash.cc diff --git a/src/box/errcode.h b/src/box/errcode.h index 0c1310bdf..f8347f6ec 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -77,11 +77,11 @@ struct errcode_record { /* 22 */_(ER_TUPLE_NOT_ARRAY, "Tuple/Key must be MsgPack array") \ /* 23 */_(ER_FIELD_TYPE, "Tuple field %s type does not match one required by operation: expected %s") \ /* 24 */_(ER_INDEX_PART_TYPE_MISMATCH, "Field %s has type '%s' in one index, but type '%s' in another") \ - /* 25 */_(ER_SPLICE, "SPLICE error on field %u: %s") \ - /* 26 */_(ER_UPDATE_ARG_TYPE, "Argument type in operation '%c' on field %u does not match field type: expected %s") \ + /* 25 */_(ER_UPDATE_SPLICE, "SPLICE error on field %s: %s") \ + /* 26 */_(ER_UPDATE_ARG_TYPE, "Argument type in operation '%c' on field %s does not match field type: expected %s") \ /* 27 */_(ER_FORMAT_MISMATCH_INDEX_PART, "Field %s has type '%s' in space format, but type '%s' in index definition") \ /* 28 */_(ER_UNKNOWN_UPDATE_OP, "Unknown UPDATE operation") \ - /* 29 */_(ER_UPDATE_FIELD, "Field %u UPDATE error: %s") \ + /* 29 */_(ER_UPDATE_FIELD, "Field %s UPDATE error: %s") \ /* 30 */_(ER_FUNCTION_TX_ACTIVE, "Transaction is active at return from function") \ /* 31 */_(ER_KEY_PART_COUNT, "Invalid key part count (expected [0..%u], got %u)") \ /* 32 */_(ER_PROC_LUA, "%s") \ @@ -147,7 +147,7 @@ struct errcode_record { /* 92 */_(ER_ROLE_NOT_GRANTED, "User '%s' does not have role '%s'") \ /* 93 */_(ER_MISSING_SNAPSHOT, "Can't find snapshot") \ /* 94 */_(ER_CANT_UPDATE_PRIMARY_KEY, "Attempt to modify a tuple field which is part of index '%s' in space '%s'") \ - /* 95 */_(ER_UPDATE_INTEGER_OVERFLOW, "Integer overflow when performing '%c' operation on field %u") \ + /* 95 */_(ER_UPDATE_INTEGER_OVERFLOW, "Integer overflow when performing '%c' operation on field %s") \ /* 96 */_(ER_GUEST_USER_PASSWORD, "Setting password for guest user has no effect") \ /* 97 */_(ER_TRANSACTION_CONFLICT, "Transaction has been aborted by conflict") \ /* 98 */_(ER_UNSUPPORTED_PRIV, "Unsupported %s privilege '%s'") \ @@ -212,7 +212,7 @@ struct errcode_record { /*157 */_(ER_SQL_BIND_TYPE, "Bind value type %s for parameter %s is not supported") \ /*158 */_(ER_SQL_BIND_PARAMETER_MAX, "SQL bind parameter limit reached: %d") \ /*159 */_(ER_SQL_EXECUTE, "Failed to execute SQL statement: %s") \ - /*160 */_(ER_UPDATE_DECIMAL_OVERFLOW, "Decimal overflow when performing operation '%c' on field %u") \ + /*160 */_(ER_UPDATE_DECIMAL_OVERFLOW, "Decimal overflow when performing operation '%c' on field %s") \ /*161 */_(ER_SQL_BIND_NOT_FOUND, "Parameter %s was not found in the statement") \ /*162 */_(ER_ACTION_MISMATCH, "Field %s contains %s on conflict action, but %s in index parts") \ /*163 */_(ER_VIEW_MISSING_SQL, "Space declared as a view must have SQL statement") \ diff --git a/src/box/tuple_update.c b/src/box/tuple_update.c index dd1d78997..87946919d 100644 --- a/src/box/tuple_update.c +++ b/src/box/tuple_update.c @@ -28,1060 +28,31 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include "tuple_update.h" -#include <stdbool.h> - -#include "say.h" #include "error.h" #include "diag.h" -#include "trivia/util.h" -#include "third_party/queue.h" #include <msgpuck/msgpuck.h> -#include <bit/int96.h> #include "column_mask.h" -#include "mp_extension_types.h" -#include "mp_decimal.h" #include "fiber.h" -#include "tuple_dictionary.h" -#include "tt_static.h" - -/** UPDATE request implementation. - * UPDATE request is represented by a sequence of operations, each - * working with a single field. There also are operations which - * add or remove fields. Only one operation on the same field - * is allowed. - * - * Supported field change operations are: SET, ADD, SUBTRACT; - * bitwise AND, XOR and OR; SPLICE. - * - * Supported tuple change operations are: SET, DELETE, INSERT, - * PUSH and POP. - * If the number of fields in a tuple is altered by an operation, - * field index of all following operations is evaluated against the - * new tuple. - * - * Despite the allowed complexity, a typical use case for UPDATE - * is when the operation count is much less than field count in - * a tuple. - * - * With the common case in mind, UPDATE tries to minimize - * the amount of unnecessary temporary tuple copies. - * - * First, operations are parsed and initialized. Then, the - * resulting tuple length is calculated. A new tuple is allocated. - * Finally, operations are applied sequentially, each copying data - * from the old tuple to the new tuple. - * - * With this approach, cost of UPDATE is proportional to O(tuple - * length) + O(C * log C), where C is the number of operations in - * the request, and data is copied from the old tuple to the new - * one only once. - * - * As long as INSERT, DELETE, PUSH and POP change the relative - * field order, an auxiliary data structure is necessary to look - * up fields in the "old" tuple by field number. Such field - * index is built on demand, using "rope" data structure. - * - * A rope is a binary tree designed to store long strings built - * from pieces. Each tree node points to a substring of a large - * string. In our case, each rope node points at a range of - * fields, initially in the old tuple, and then, as fields are - * added and deleted by UPDATE, in the "current" tuple. - * Note, that the tuple itself is not materialized: when - * operations which affect field count are initialized, the rope - * is updated to reflect the new field order. - * In particular, if a field is deleted by an operation, - * it disappears from the rope and all subsequent operations - * on this field number instead affect the field following the - * deleted one. - */ - -static inline void * -update_alloc(struct region *region, size_t size) -{ - void *ptr = region_aligned_alloc(region, size, alignof(uint64_t)); - if (ptr == NULL) - diag_set(OutOfMemory, size, "region_aligned_alloc", - "update internals"); - return ptr; -} +#include "update/update_field.h" -/** Update internal state */ -struct tuple_update -{ - struct region *region; - struct rope *rope; +/** Tuple update operations and arguments. */ +struct tuple_update { + /** Operations array. */ struct update_op *ops; + /** Length of ops. */ uint32_t op_count; - int index_base; /* 0 for C and 1 for Lua */ - /** A bitmask of all columns modified by this update */ - uint64_t column_mask; -}; - -/** Argument of SET (and INSERT) operation. */ -struct op_set_arg { - uint32_t length; - const char *value; -}; - -/** Argument of DELETE operation. */ -struct op_del_arg { - uint32_t count; -}; - -/** - * MsgPack format code of an arithmetic argument or result. - * MsgPack codes are not used to simplify type calculation. - */ -enum arith_type { - AT_DECIMAL = 0, /* MP_EXT + MP_DECIMAL */ - AT_DOUBLE = 1, /* MP_DOUBLE */ - AT_FLOAT = 2, /* MP_FLOAT */ - AT_INT = 3 /* MP_INT/MP_UINT */ -}; - -/** - * Argument (left and right) and result of ADD, SUBTRACT. - * - * To perform an arithmetic operation, update first loads - * left and right arguments into corresponding value objects, - * then performs arithmetics on types of arguments, thus - * calculating the type of the result, and then - * performs the requested operation according to the calculated - * type rules. - * - * The rules are as follows: - * - when one of the argument types is double, the result is - * double - * - when one of the argument types is float, the result is - * float - * - for integer arguments, the result type code depends on - * the range in which falls the result of the operation. - * If the result is in negative range, it's MP_INT, otherwise - * it's MP_UINT. If the result is out of bounds of (-2^63, - * 2^64), and exception is raised for overflow. - */ -struct op_arith_arg { - enum arith_type type; - union { - double dbl; - float flt; - struct int96_num int96; - decimal_t dec; - }; -}; - -/** Argument of AND, XOR, OR operations. */ -struct op_bit_arg { - uint64_t val; -}; - -/** Argument of SPLICE. */ -struct op_splice_arg { - int32_t offset; /** splice position */ - int32_t cut_length; /** cut this many bytes. */ - const char *paste; /** paste what? */ - uint32_t paste_length; /** paste this many bytes. */ - - /** Offset of the tail in the old field */ - int32_t tail_offset; - /** Size of the tail. */ - int32_t tail_length; -}; - -union update_op_arg { - struct op_set_arg set; - struct op_del_arg del; - struct op_arith_arg arith; - struct op_bit_arg bit; - struct op_splice_arg splice; -}; - -struct update_field; -struct update_op; - -static struct update_field * -update_field_split(struct region *region, struct update_field *data, - size_t size, size_t offset); - -#define ROPE_SPLIT_F update_field_split -#define ROPE_ALLOC_F update_alloc -#define rope_data_t struct update_field * -#define rope_ctx_t struct region * - -#include "salad/rope.h" - -typedef int (*do_op_func)(struct tuple_update *update, struct update_op *op); -typedef int (*read_arg_func)(int index_base, struct update_op *op, - const char **expr); -typedef void (*store_op_func)(union update_op_arg *arg, const char *in, - char *out); - -/** A set of functions and properties to initialize and do an op. */ -struct update_op_meta { - read_arg_func read_arg; - do_op_func do_op; - store_op_func store; - /* Argument count */ - uint32_t args; -}; - -/** A single UPDATE operation. */ -struct update_op { - const struct update_op_meta *meta; - union update_op_arg arg; - /* Subject field no. */ - int32_t field_no; - uint32_t new_field_len; - uint8_t opcode; -}; - -/** - * We can have more than one operation on the same field. - * A descriptor of one changed field. - */ -struct update_field { - /** UPDATE operation against the first field in the range. */ - struct update_op *op; - /** Points at start of field *data* in the old tuple. */ - const char *old; - /** End of the old field. */ - const char *tail; /** - * Length of the "tail" in the old tuple from end - * of old data to the beginning of the field in the - * next update_field structure. + * Index base for MessagePack update operations. If update + * is from Lua, then the base is 1. Otherwise 0. */ - uint32_t tail_len; + int index_base; + /** A bitmask of all columns modified by this update. */ + uint64_t column_mask; + /** First level of update tree. It is always array. */ + struct update_field root_array; }; -static void -update_field_init(struct update_field *field, - const char *old, uint32_t old_len, uint32_t tail_len) -{ - field->op = NULL; - field->old = old; - field->tail = old + old_len; - field->tail_len = tail_len; -} - -/* {{{ read_arg helpers */ - -/** Read a field index or any other integer field. */ -static inline int -mp_read_i32(int index_base, struct update_op *op, - const char **expr, int32_t *ret) -{ - if (mp_read_int32(expr, ret) == 0) - return 0; - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)op->opcode, - index_base + op->field_no, "an integer"); - return -1; -} - -static inline int -mp_read_uint(int index_base, struct update_op *op, - const char **expr, uint64_t *ret) -{ - if (mp_typeof(**expr) == MP_UINT) { - *ret = mp_decode_uint(expr); - return 0; - } else { - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)op->opcode, - index_base + op->field_no, "a positive integer"); - return -1; - } -} - -/** - * Load an argument of an arithmetic operation either from tuple - * or from the UPDATE command. - */ -static inline int -mp_read_arith_arg(int index_base, struct update_op *op, - const char **expr, struct op_arith_arg *ret) -{ - if (mp_typeof(**expr) == MP_UINT) { - ret->type = AT_INT; - int96_set_unsigned(&ret->int96, mp_decode_uint(expr)); - } else if (mp_typeof(**expr) == MP_INT) { - ret->type = AT_INT; - int96_set_signed(&ret->int96, mp_decode_int(expr)); - } else if (mp_typeof(**expr) == MP_DOUBLE) { - ret->type = AT_DOUBLE; - ret->dbl = mp_decode_double(expr); - } else if (mp_typeof(**expr) == MP_FLOAT) { - ret->type = AT_FLOAT; - ret->flt = mp_decode_float(expr); - } else if (mp_typeof(**expr) == MP_EXT) { - int8_t ext_type; - uint32_t len = mp_decode_extl(expr, &ext_type); - switch (ext_type) { - case MP_DECIMAL: - ret->type = AT_DECIMAL; - decimal_unpack(expr, len, &ret->dec); - break; - default: - goto err; - } - } else { -err: diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)op->opcode, - index_base + op->field_no, "a number"); - return -1; - } - return 0; -} - -static inline int -mp_read_str(int index_base, struct update_op *op, - const char **expr, uint32_t *len, const char **ret) -{ - if (mp_typeof(**expr) != MP_STR) { - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char) op->opcode, - index_base + op->field_no, "a string"); - return -1; - } - *ret = mp_decode_str(expr, len); /* value */ - return 0; -} - -/* }}} read_arg helpers */ - -/* {{{ read_arg */ - -static int -read_arg_set(int index_base, struct update_op *op, - const char **expr) -{ - (void)index_base; - op->arg.set.value = *expr; - mp_next(expr); - op->arg.set.length = (uint32_t) (*expr - op->arg.set.value); - return 0; -} - -static int -read_arg_insert(int index_base, struct update_op *op, - const char **expr) -{ - return read_arg_set(index_base, op, expr); -} - -static int -read_arg_delete(int index_base, struct update_op *op, - const char **expr) -{ - if (mp_typeof(**expr) == MP_UINT) { - op->arg.del.count = (uint32_t) mp_decode_uint(expr); - return 0; - } else { - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)op->opcode, - index_base + op->field_no, - "a number of fields to delete"); - return -1; - } -} - -static int -read_arg_arith(int index_base, struct update_op *op, - const char **expr) -{ - return mp_read_arith_arg(index_base, op, expr, &op->arg.arith); -} - -static int -read_arg_bit(int index_base, struct update_op *op, - const char **expr) -{ - struct op_bit_arg *arg = &op->arg.bit; - return mp_read_uint(index_base, op, expr, &arg->val); -} - -static int -read_arg_splice(int index_base, struct update_op *op, - const char **expr) -{ - struct op_splice_arg *arg = &op->arg.splice; - if (mp_read_i32(index_base, op, expr, &arg->offset)) - return -1; - /* cut length */ - if (mp_read_i32(index_base, op, expr, &arg->cut_length)) - return -1; - /* value */ - return mp_read_str(index_base, op, expr, &arg->paste_length, - &arg->paste); -} - -/* }}} read_arg */ - -/* {{{ do_op helpers */ - -static inline int -op_adjust_field_no(struct tuple_update *update, struct update_op *op, - int32_t field_max) -{ - if (op->field_no >= 0) { - if (op->field_no < field_max) - return 0; - diag_set(ClientError, ER_NO_SUCH_FIELD_NO, update->index_base + - op->field_no); - return -1; - } else { - if (op->field_no + field_max >= 0) { - op->field_no += field_max; - return 0; - } - diag_set(ClientError, ER_NO_SUCH_FIELD_NO, op->field_no); - return -1; - } -} - -static inline double -cast_arith_arg_to_double(struct op_arith_arg arg) -{ - if (arg.type == AT_DOUBLE) { - return arg.dbl; - } else if (arg.type == AT_FLOAT) { - return arg.flt; - } else { - assert(arg.type == AT_INT); - if (int96_is_uint64(&arg.int96)) { - return int96_extract_uint64(&arg.int96); - } else { - assert(int96_is_neg_int64(&arg.int96)); - return int96_extract_neg_int64(&arg.int96); - } - } -} - -static inline decimal_t * -cast_arith_arg_to_decimal(struct op_arith_arg arg, decimal_t *dec) -{ - decimal_t *ret; - if (arg.type == AT_DECIMAL) { - *dec = arg.dec; - return dec; - } else if (arg.type == AT_DOUBLE) { - ret = decimal_from_double(dec, arg.dbl); - } else if (arg.type == AT_FLOAT) { - ret = decimal_from_double(dec, arg.flt); - } else { - assert(arg.type == AT_INT); - if (int96_is_uint64(&arg.int96)) { - uint64_t val = int96_extract_uint64(&arg.int96); - ret = decimal_from_uint64(dec, val); - } else { - assert(int96_is_neg_int64(&arg.int96)); - int64_t val = int96_extract_neg_int64(&arg.int96); - ret = decimal_from_int64(dec, val); - } - } - - return ret; -} - -/** Return the MsgPack size of an arithmetic operation result. */ -static inline uint32_t -mp_sizeof_op_arith_arg(struct op_arith_arg arg) -{ - if (arg.type == AT_INT) { - if (int96_is_uint64(&arg.int96)) { - uint64_t val = int96_extract_uint64(&arg.int96); - return mp_sizeof_uint(val); - } else { - int64_t val = int96_extract_neg_int64(&arg.int96); - return mp_sizeof_int(val); - } - } else if (arg.type == AT_DOUBLE) { - return mp_sizeof_double(arg.dbl); - } else if (arg.type == AT_FLOAT) { - return mp_sizeof_float(arg.flt); - } else { - assert(arg.type == AT_DECIMAL); - return mp_sizeof_decimal(&arg.dec); - } -} - -static inline int -make_arith_operation(struct op_arith_arg arg1, struct op_arith_arg arg2, - char opcode, uint32_t err_fieldno, struct op_arith_arg *ret) -{ - enum arith_type lowest_type = arg1.type; - if (arg1.type > arg2.type) - lowest_type = arg2.type; - - if (lowest_type == AT_INT) { - switch(opcode) { - case '+': - int96_add(&arg1.int96, &arg2.int96); - break; - case '-': - int96_invert(&arg2.int96); - int96_add(&arg1.int96, &arg2.int96); - break; - default: - unreachable(); /* checked by update_read_ops */ - break; - } - if (!int96_is_uint64(&arg1.int96) && - !int96_is_neg_int64(&arg1.int96)) { - diag_set(ClientError, - ER_UPDATE_INTEGER_OVERFLOW, - opcode, err_fieldno); - return -1; - } - *ret = arg1; - return 0; - } else if (lowest_type >= AT_DOUBLE) { - /* At least one of operands is double or float */ - double a = cast_arith_arg_to_double(arg1); - double b = cast_arith_arg_to_double(arg2); - double c; - switch(opcode) { - case '+': c = a + b; break; - case '-': c = a - b; break; - default: - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)opcode, - err_fieldno, "a positive integer"); - return -1; - } - if (lowest_type == AT_DOUBLE) { - /* result is DOUBLE */ - ret->type = AT_DOUBLE; - ret->dbl = c; - } else { - /* result is FLOAT */ - assert(lowest_type == AT_FLOAT); - ret->type = AT_FLOAT; - ret->flt = (float)c; - } - } else { - /* At least one of the operands is decimal. */ - decimal_t a, b, c; - if (! cast_arith_arg_to_decimal(arg1, &a) || - ! cast_arith_arg_to_decimal(arg2, &b)) { - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)opcode, - err_fieldno, "a number convertible to decimal."); - return -1; - } - - switch(opcode) { - case '+': - if (decimal_add(&c, &a, &b) == NULL) { - diag_set(ClientError, ER_UPDATE_DECIMAL_OVERFLOW, - opcode, err_fieldno); - return -1; - } - break; - case '-': - if (decimal_sub(&c, &a, &b) == NULL) { - diag_set(ClientError, ER_UPDATE_DECIMAL_OVERFLOW, - opcode, err_fieldno); - return -1; - } - break; - default: - diag_set(ClientError, ER_UPDATE_ARG_TYPE, (char)opcode, - err_fieldno); - return -1; - } - ret->type = AT_DECIMAL; - ret->dec = c; - } - return 0; -} - -/* }}} do_op helpers */ - -/* {{{ do_op */ - -static int -do_op_insert(struct tuple_update *update, struct update_op *op) -{ - if (op_adjust_field_no(update, op, rope_size(update->rope) + 1)) - return -1; - struct update_field *field = (struct update_field *) - update_alloc(update->region, sizeof(*field)); - if (field == NULL) - return -1; - update_field_init(field, op->arg.set.value, op->arg.set.length, 0); - return rope_insert(update->rope, op->field_no, field, 1); -} - -static int -do_op_set(struct tuple_update *update, struct update_op *op) -{ - /* intepret '=' for n +1 field as insert */ - if (op->field_no == (int32_t) rope_size(update->rope)) - return do_op_insert(update, op); - if (op_adjust_field_no(update, op, rope_size(update->rope))) - return -1; - struct update_field *field = rope_extract(update->rope, op->field_no); - if (field == NULL) - return -1; - /* Ignore the previous op, if any. */ - field->op = op; - op->new_field_len = op->arg.set.length; - return 0; -} - -static int -do_op_delete(struct tuple_update *update, struct update_op *op) -{ - if (op_adjust_field_no(update, op, rope_size(update->rope))) - return -1; - uint32_t delete_count = op->arg.del.count; - - if ((uint64_t) op->field_no + delete_count > rope_size(update->rope)) - delete_count = rope_size(update->rope) - op->field_no; - - if (delete_count == 0) { - diag_set(ClientError, ER_UPDATE_FIELD, - update->index_base + op->field_no, - "cannot delete 0 fields"); - return -1; - } - - for (uint32_t u = 0; u < delete_count; u++) - rope_erase(update->rope, op->field_no); - return 0; -} - -static int -do_op_arith(struct tuple_update *update, struct update_op *op) -{ - if (op_adjust_field_no(update, op, rope_size(update->rope))) - return -1; - - struct update_field *field = rope_extract(update->rope, op->field_no); - if (field == NULL) - return -1; - if (field->op) { - diag_set(ClientError, ER_UPDATE_FIELD, - update->index_base + op->field_no, - "double update of the same field"); - return -1; - } - const char *old = field->old; - struct op_arith_arg left_arg; - if (mp_read_arith_arg(update->index_base, op, &old, &left_arg)) - return -1; - - struct op_arith_arg right_arg = op->arg.arith; - if (make_arith_operation(left_arg, right_arg, op->opcode, - update->index_base + op->field_no, - &op->arg.arith)) - return -1; - field->op = op; - op->new_field_len = mp_sizeof_op_arith_arg(op->arg.arith); - return 0; -} - -static int -do_op_bit(struct tuple_update *update, struct update_op *op) -{ - if (op_adjust_field_no(update, op, rope_size(update->rope))) - return -1; - struct update_field *field = rope_extract(update->rope, op->field_no); - if (field == NULL) - return -1; - struct op_bit_arg *arg = &op->arg.bit; - if (field->op) { - diag_set(ClientError, ER_UPDATE_FIELD, - update->index_base + op->field_no, - "double update of the same field"); - return -1; - } - const char *old = field->old; - uint64_t val; - if (mp_read_uint(update->index_base, op, &old, &val)) - return -1; - switch (op->opcode) { - case '&': - arg->val &= val; - break; - case '^': - arg->val ^= val; - break; - case '|': - arg->val |= val; - break; - default: - unreachable(); /* checked by update_read_ops */ - } - field->op = op; - op->new_field_len = mp_sizeof_uint(arg->val); - return 0; -} - -static int -do_op_splice(struct tuple_update *update, struct update_op *op) -{ - if (op_adjust_field_no(update, op, rope_size(update->rope))) - return -1; - struct update_field *field = rope_extract(update->rope, op->field_no); - if (field == NULL) - return -1; - if (field->op) { - diag_set(ClientError, ER_UPDATE_FIELD, - update->index_base + op->field_no, - "double update of the same field"); - return -1; - } - - struct op_splice_arg *arg = &op->arg.splice; - - const char *in = field->old; - int32_t str_len; - if (mp_read_str(update->index_base, op, &in, (uint32_t *) &str_len, &in)) - return -1; - - if (arg->offset < 0) { - if (-arg->offset > str_len + 1) { - diag_set(ClientError, ER_SPLICE, - update->index_base + op->field_no, - "offset is out of bound"); - return -1; - } - arg->offset = arg->offset + str_len + 1; - } else if (arg->offset - update->index_base >= 0) { - arg->offset -= update->index_base; - if (arg->offset > str_len) - arg->offset = str_len; - } else /* (offset <= 0) */ { - diag_set(ClientError, ER_SPLICE, - update->index_base + op->field_no, - "offset is out of bound"); - return -1; - } - - assert(arg->offset >= 0 && arg->offset <= str_len); - - if (arg->cut_length < 0) { - if (-arg->cut_length > (str_len - arg->offset)) - arg->cut_length = 0; - else - arg->cut_length += str_len - arg->offset; - } else if (arg->cut_length > str_len - arg->offset) { - arg->cut_length = str_len - arg->offset; - } - - assert(arg->offset <= str_len); - - /* Fill tail part */ - arg->tail_offset = arg->offset + arg->cut_length; - arg->tail_length = str_len - arg->tail_offset; - - - field->op = op; - /* Record the new field length (maximal). */ - op->new_field_len = mp_sizeof_str(arg->offset + arg->paste_length + - arg->tail_length); - return 0; -} - -/* }}} do_op */ - -/* {{{ store_op */ - -static void -store_op_set(struct op_set_arg *arg, const char *in, char *out) -{ - (void)in; - memcpy(out, arg->value, arg->length); -} - -static void -store_op_insert(struct op_set_arg *arg, const char *in, char *out) -{ - (void)in; - memcpy(out, arg->value, arg->length); -} - -static void -store_op_arith(struct op_arith_arg *arg, const char *in, char *out) -{ - (void)in; - if (arg->type == AT_INT) { - if (int96_is_uint64(&arg->int96)) { - mp_encode_uint(out, int96_extract_uint64(&arg->int96)); - } else { - assert(int96_is_neg_int64(&arg->int96)); - mp_encode_int(out, int96_extract_neg_int64(&arg->int96)); - } - } else if (arg->type == AT_DOUBLE) { - mp_encode_double(out, arg->dbl); - } else if (arg->type == AT_FLOAT) { - mp_encode_float(out, arg->flt); - } else { - assert (arg->type == AT_DECIMAL); - mp_encode_decimal(out, &arg->dec); - } -} - -static void -store_op_bit(struct op_bit_arg *arg, const char *in, char *out) -{ - (void)in; - mp_encode_uint(out, arg->val); -} - -static void -store_op_splice(struct op_splice_arg *arg, const char *in, char *out) -{ - uint32_t new_str_len = arg->offset + arg->paste_length - + arg->tail_length; - - (void) mp_decode_strl(&in); - - out = mp_encode_strl(out, new_str_len); - memcpy(out, in, arg->offset); /* copy field head. */ - out = out + arg->offset; - memcpy(out, arg->paste, arg->paste_length); /* copy the paste */ - out = out + arg->paste_length; - memcpy(out, in + arg->tail_offset, arg->tail_length); /* copy tail */ -} - -/* }}} store_op */ - -static const struct update_op_meta op_set = - { read_arg_set, do_op_set, (store_op_func) store_op_set, 3 }; -static const struct update_op_meta op_insert = - { read_arg_insert, do_op_insert, (store_op_func) store_op_insert, 3 }; -static const struct update_op_meta op_arith = - { read_arg_arith, do_op_arith, (store_op_func) store_op_arith, 3 }; -static const struct update_op_meta op_bit = - { read_arg_bit, do_op_bit, (store_op_func) store_op_bit, 3 }; -static const struct update_op_meta op_splice = - { read_arg_splice, do_op_splice, (store_op_func) store_op_splice, 5 }; -static const struct update_op_meta op_delete = - { read_arg_delete, do_op_delete, (store_op_func) NULL, 3 }; - -/** Split a range of fields in two, allocating update_field - * context for the new range. - */ -static struct update_field * -update_field_split(struct region *region, struct update_field *prev, - size_t size, size_t offset) -{ - (void) size; - struct update_field *next = - (struct update_field *) update_alloc(region, sizeof(*next)); - if (next == NULL) - return NULL; - assert(offset > 0 && prev->tail_len > 0); - - const char *field = prev->tail; - const char *end = field + prev->tail_len; - - for (uint32_t i = 1; i < offset; i++) { - mp_next(&field); - } - - prev->tail_len = field - prev->tail; - const char *f = field; - mp_next(&f); - uint32_t field_len = f - field; - - update_field_init(next, field, field_len, end - field - field_len); - return next; -} - -/** - * We found a tuple to do the update on. Prepare a rope - * to perform operations on. - * @param update Update meta. - * @param tuple_data MessagePack array without the array header. - * @param tuple_data_end End of the @tuple_data. - * @param field_count Field count in @tuple_data. - * - * @retval 0 Success. - * @retval -1 Error. - */ -struct rope * -tuple_rope_new(struct region *region, const char *tuple_data, - const char *tuple_data_end, uint32_t field_count) -{ - struct rope *rope = rope_new(region); - if (rope == NULL) - return NULL; - /* Initialize the rope with the old tuple. */ - struct update_field *first = - (struct update_field *) update_alloc(region, sizeof(*first)); - if (first == NULL) - return NULL; - const char *field = tuple_data; - const char *end = tuple_data_end; - if (field == end) - return rope; - - /* Add first field to rope */ - mp_next(&tuple_data); - uint32_t field_len = tuple_data - field; - update_field_init(first, field, field_len, end - field - field_len); - - return rope_append(rope, first, field_count) != 0 ? NULL : rope; -} - -static uint32_t -update_calc_tuple_length(struct tuple_update *update) -{ - uint32_t res = mp_sizeof_array(rope_size(update->rope)); - struct rope_iter it; - struct rope_node *node; - - rope_iter_create(&it, update->rope); - for (node = rope_iter_start(&it); node; node = rope_iter_next(&it)) { - struct update_field *field = rope_leaf_data(node); - uint32_t field_len = (field->op ? field->op->new_field_len : - (uint32_t)(field->tail - field->old)); - res += field_len + field->tail_len; - } - - return res; -} - -static uint32_t -update_write_tuple(struct tuple_update *update, char *buffer, char *buffer_end) -{ - char *new_data = buffer; - new_data = mp_encode_array(new_data, rope_size(update->rope)); - - (void) buffer_end; - - uint32_t total_field_count = 0; - - struct rope_iter it; - struct rope_node *node; - - rope_iter_create(&it, update->rope); - for (node = rope_iter_start(&it); node; node = rope_iter_next(&it)) { - struct update_field *field = rope_leaf_data(node); - uint32_t field_count = rope_leaf_size(node); - const char *old_field = field->old; - struct update_op *op = field->op; - if (op) { - op->meta->store(&op->arg, old_field, new_data); - new_data += op->new_field_len; - } else { - uint32_t field_len = field->tail - field->old; - memcpy(new_data, old_field, field_len); - new_data += field_len; - } - /* Copy tail_len from the old tuple. */ - assert(field->tail_len == 0 || field_count > 1); - if (field_count > 1) { - memcpy(new_data, field->tail, field->tail_len); - new_data += field->tail_len; - } - total_field_count += field_count; - } - - assert(rope_size(update->rope) == total_field_count); - assert(new_data <= buffer_end); - return new_data - buffer; /* real_tuple_size */ -} - -static const struct update_op_meta * -update_op_by(char opcode) -{ - switch (opcode) { - case '=': - return &op_set; - case '+': - case '-': - return &op_arith; - case '&': - case '|': - case '^': - return &op_bit; - case ':': - return &op_splice; - case '#': - return &op_delete; - case '!': - return &op_insert; - default: - diag_set(ClientError, ER_UNKNOWN_UPDATE_OP); - return NULL; - } -} - -/** - * Decode an update operation from MessagePack. - * @param[out] op Update operation. - * @param index_base Field numbers base: 0 or 1. - * @param dict Dictionary to lookup field number by a name. - * @param expr MessagePack. - * - * @retval 0 Success. - * @retval -1 Client error. - */ -static inline int -update_op_decode(struct update_op *op, int index_base, - struct tuple_dictionary *dict, const char **expr) -{ - if (mp_typeof(**expr) != MP_ARRAY) { - diag_set(ClientError, ER_ILLEGAL_PARAMS, "update operation " - "must be an array {op,..}"); - return -1; - } - uint32_t len, args = mp_decode_array(expr); - if (args < 1) { - diag_set(ClientError, ER_ILLEGAL_PARAMS, "update operation "\ - "must be an array {op,..}, got empty array"); - return -1; - } - if (mp_typeof(**expr) != MP_STR) { - diag_set(ClientError, ER_ILLEGAL_PARAMS, - "update operation name must be a string"); - return -1; - } - op->opcode = *mp_decode_str(expr, &len); - op->meta = update_op_by(op->opcode); - if (op->meta == NULL) - return -1; - if (args != op->meta->args) { - diag_set(ClientError, ER_UNKNOWN_UPDATE_OP); - return -1; - } - int32_t field_no; - switch(mp_typeof(**expr)) { - case MP_INT: - case MP_UINT: { - if (mp_read_i32(index_base, op, expr, &field_no) != 0) - return -1; - if (field_no - index_base >= 0) { - op->field_no = field_no - index_base; - } else if (field_no < 0) { - op->field_no = field_no; - } else { - diag_set(ClientError, ER_NO_SUCH_FIELD_NO, field_no); - return -1; - } - break; - } - case MP_STR: { - const char *path = mp_decode_str(expr, &len); - uint32_t field_no, hash = field_name_hash(path, len); - if (tuple_fieldno_by_name(dict, path, len, hash, - &field_no) != 0) { - diag_set(ClientError, ER_NO_SUCH_FIELD_NAME, - tt_cstr(path, len)); - return -1; - } - op->field_no = (int32_t) field_no; - break; - } - default: - diag_set(ClientError, ER_ILLEGAL_PARAMS, - "field id must be a number or a string"); - return -1; - } - return op->meta->read_arg(index_base, op, expr); -} - /** * Read and check update operations and fill column mask. * @@ -1112,20 +83,18 @@ update_read_ops(struct tuple_update *update, const char *expr, return -1; } uint64_t column_mask = 0; - /* number of operations */ update->op_count = mp_decode_array(&expr); - if (update->op_count > BOX_UPDATE_OP_CNT_MAX) { diag_set(ClientError, ER_ILLEGAL_PARAMS, "too many operations for update"); return -1; } - - /* Read update operations. */ - update->ops = (struct update_op *) update_alloc(update->region, - update->op_count * sizeof(struct update_op)); - if (update->ops == NULL) + int size = update->op_count * sizeof(update->ops[0]); + update->ops = (struct update_op *) region_alloc(&fiber()->gc, size); + if (update->ops == NULL) { + diag_set(OutOfMemory, size, "region_alloc", "update->ops"); return -1; + } struct update_op *op = update->ops; struct update_op *ops_end = op + update->op_count; for (; op < ops_end; op++) { @@ -1227,14 +196,13 @@ static int update_do_ops(struct tuple_update *update, const char *old_data, const char *old_data_end, uint32_t part_count) { - update->rope = tuple_rope_new(update->region, old_data, old_data_end, - part_count); - if (update->rope == NULL) + if (update_array_create(&update->root_array, old_data, old_data_end, + part_count) != 0) return -1; struct update_op *op = update->ops; struct update_op *ops_end = op + update->op_count; for (; op < ops_end; op++) { - if (op->meta->do_op(update, op)) + if (op->meta->do_cb(op, &update->root_array) != 0) return -1; } return 0; @@ -1250,14 +218,13 @@ upsert_do_ops(struct tuple_update *update, const char *old_data, const char *old_data_end, uint32_t part_count, bool suppress_error) { - update->rope = tuple_rope_new(update->region, old_data, old_data_end, - part_count); - if (update->rope == NULL) + if (update_array_create(&update->root_array, old_data, old_data_end, + part_count) != 0) return -1; struct update_op *op = update->ops; struct update_op *ops_end = op + update->op_count; for (; op < ops_end; op++) { - if (op->meta->do_op(update, op) == 0) + if (op->meta->do_cb(op, &update->root_array) == 0) continue; struct error *e = diag_last_error(diag_get()); if (e->type != &type_ClientError) @@ -1274,23 +241,22 @@ static void update_init(struct tuple_update *update, int index_base) { memset(update, 0, sizeof(*update)); - update->region = &fiber()->gc; - /* - * Base field offset, e.g. 0 for C and 1 for Lua. Used only for - * error messages. All fields numbers must be zero-based! - */ update->index_base = index_base; } -const char * +static const char * update_finish(struct tuple_update *update, uint32_t *p_tuple_len) { - uint32_t tuple_len = update_calc_tuple_length(update); - char *buffer = (char *) update_alloc(update->region, tuple_len); - if (buffer == NULL) + uint32_t tuple_size = update_array_sizeof(&update->root_array); + char *out = (char *) region_alloc(&fiber()->gc, tuple_size); + if (out == NULL) { + diag_set(OutOfMemory, tuple_size, "region_alloc", "buffer"); return NULL; - *p_tuple_len = update_write_tuple(update, buffer, buffer + tuple_len); - return buffer; + } + *p_tuple_len = update_array_store(&update->root_array, out, + out + tuple_size); + assert(*p_tuple_len == tuple_size); + return out; } int @@ -1371,10 +337,13 @@ tuple_upsert_squash(const char *expr1, const char *expr1_end, } size_t possible_size = expr1_end - expr1 + expr2_end - expr2; const uint32_t space_for_arr_tag = 5; - char *buf = (char *) update_alloc(&fiber()->gc, + char *buf = (char *) region_alloc(&fiber()->gc, possible_size + space_for_arr_tag); - if (buf == NULL) + if (buf == NULL) { + diag_set(OutOfMemory, possible_size + space_for_arr_tag, + "region_alloc", "buf"); return NULL; + } /* reserve some space for mp array header */ char *res_ops = buf + space_for_arr_tag; uint32_t res_count = 0; /* number of resulting operations */ @@ -1428,20 +397,16 @@ tuple_upsert_squash(const char *expr1, const char *expr1_end, op[0]->opcode = '+'; int96_invert(&op[0]->arg.arith.int96); } - struct op_arith_arg res; - if (make_arith_operation(op[0]->arg.arith, op[1]->arg.arith, - op[1]->opcode, - update[0].index_base + - op[0]->field_no, &res)) + struct update_op res; + if (make_arith_operation(op[1], op[0]->arg.arith, + &res.arg.arith) != 0) return NULL; res_ops = mp_encode_array(res_ops, 3); - res_ops = mp_encode_str(res_ops, - (const char *)&op[0]->opcode, 1); - res_ops = mp_encode_uint(res_ops, - op[0]->field_no + - update[0].index_base); + res_ops = mp_encode_str(res_ops, &op[0]->opcode, 1); + res_ops = mp_encode_uint(res_ops, op[0]->field_no + + update[0].index_base); store_op_arith(&res, NULL, res_ops); - res_ops += mp_sizeof_op_arith_arg(res); + res_ops += update_arith_sizeof(&res.arg.arith); mp_next(&expr[0]); mp_next(&expr[1]); op_no[0]++; diff --git a/src/box/update/update_array.c b/src/box/update/update_array.c new file mode 100644 index 000000000..985d555af --- /dev/null +++ b/src/box/update/update_array.c @@ -0,0 +1,288 @@ +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "update_field.h" +#include "msgpuck.h" +#include "fiber.h" + +/** + * Make field number positive and check for the field existence. + * @param op Update operation. + * @param field_count Field count in the array having this field. + * + * @retval 0 Success. + * @retval -1 Error. + */ +static inline int +update_op_adjust_field_no(struct update_op *op, int32_t field_count) +{ + if (op->field_no >= 0) { + if (op->field_no < field_count) + return 0; + return update_err_no_such_field(op); + } + if (op->field_no + field_count >= 0) { + op->field_no += field_count; + return 0; + } + return update_err_no_such_field(op); +} + +/** + * Update of an array consists of array items. Each item is a + * range of fields. Item updates first field of the range and + * stores size of others to save them with no changes into a new + * tuple later. It allows on update of a single field in an array + * store at most 2 objects - item for the previous fields, and + * item for this one + its tail. This is how rope data structure + * works. + */ +struct update_array_item { + /** Update of the first field in the range. */ + struct update_field field; + /** Size of other fields in the range. */ + uint32_t tail_size; +}; + +/** Initialize an array item. */ +static inline void +update_array_item_create(struct update_array_item *item, enum update_type type, + const char *data, uint32_t size, uint32_t tail_size) +{ + item->field.type = type; + item->field.data = data; + item->field.size = size; + item->tail_size = tail_size; +} + +/** Rope allocator for nodes, paths, items etc. */ +static inline void * +rope_alloc(struct region *region, size_t size) +{ + void *ptr = region_aligned_alloc(region, size, alignof(uint64_t)); + if (ptr == NULL) { + diag_set(OutOfMemory, size, "region_aligned_alloc", + "update internals"); + } + return ptr; +} + +/** Split a range of fields in two. */ +static struct update_array_item * +rope_field_split(struct region *region, struct update_array_item *prev, + size_t size, size_t offset) +{ + (void) size; + struct update_array_item *next = + (struct update_array_item *) rope_alloc(region, sizeof(*next)); + if (next == NULL) + return NULL; + assert(offset > 0 && prev->tail_size > 0); + + const char *tail = prev->field.data + prev->field.size; + const char *field = tail; + const char *range_end = tail + prev->tail_size; + + for (uint32_t i = 1; i < offset; ++i) + mp_next(&field); + + prev->tail_size = field - tail; + const char *field_end = field; + mp_next(&field_end); + update_array_item_create(next, UPDATE_NOP, field, field_end - field, + range_end - field_end); + return next; +} + +#define ROPE_SPLIT_F rope_field_split +#define ROPE_ALLOC_F rope_alloc +#define rope_data_t struct update_array_item * +#define rope_ctx_t struct region * + +#include "salad/rope.h" + +/** + * Extract from the array an item whose range starts from the + * field affected by @a op. + */ +static inline struct update_array_item * +update_array_extract_item(struct update_field *field, struct update_op *op) +{ + assert(field->type == UPDATE_ARRAY); + struct rope *rope = field->array.rope; + if (update_op_adjust_field_no(op, rope_size(rope)) != 0) + return NULL; + return rope_extract(rope, op->field_no); +} + +int +update_array_create(struct update_field *field, const char *data, + const char *data_end, uint32_t field_count) +{ + field->type = UPDATE_ARRAY; + field->data = data; + field->size = data_end - data; + field->array.rope = rope_new(&fiber()->gc); + if (field->array.rope == NULL) + return -1; + struct update_array_item *item = (struct update_array_item *) + rope_alloc(&fiber()->gc, sizeof(*item)); + if (item == NULL) + return -1; + if (data == data_end) + return 0; + /* + * Initial item consists of one range - the whole array. + */ + const char *begin = data; + mp_next(&data); + update_array_item_create(item, UPDATE_NOP, begin, data - begin, + data_end - data); + return rope_append(field->array.rope, item, field_count); +} + +uint32_t +update_array_sizeof(struct update_field *field) +{ + assert(field->type == UPDATE_ARRAY); + struct rope_iter it; + rope_iter_create(&it, field->array.rope); + + uint32_t res = mp_sizeof_array(rope_size(field->array.rope)); + for (struct rope_node *node = rope_iter_start(&it); node != NULL; + node = rope_iter_next(&it)) { + struct update_array_item *item = rope_leaf_data(node); + res += update_field_sizeof(&item->field) + item->tail_size; + } + return res; +} + +uint32_t +update_array_store(struct update_field *field, char *out, char *out_end) +{ + assert(field->type == UPDATE_ARRAY); + char *out_begin = out; + out = mp_encode_array(out, rope_size(field->array.rope)); + uint32_t total_field_count = 0; + struct rope_iter it; + rope_iter_create(&it, field->array.rope); + for (struct rope_node *node = rope_iter_start(&it); node != NULL; + node = rope_iter_next(&it)) { + struct update_array_item *item = rope_leaf_data(node); + uint32_t field_count = rope_leaf_size(node); + out += update_field_store(&item->field, out, out_end); + assert(item->tail_size == 0 || field_count > 1); + memcpy(out, item->field.data + item->field.size, + item->tail_size); + out += item->tail_size; + total_field_count += field_count; + } + (void) total_field_count; + assert(rope_size(field->array.rope) == total_field_count); + assert(out <= out_end); + return out - out_begin; +} + +int +do_op_array_insert(struct update_op *op, struct update_field *field) +{ + assert(field->type == UPDATE_ARRAY); + struct rope *rope = field->array.rope; + if (update_op_adjust_field_no(op, rope_size(rope) + 1) != 0) + return -1; + + struct update_array_item *item = + (struct update_array_item *) rope_alloc(rope->ctx, + sizeof(*item)); + if (item == NULL) + return -1; + update_array_item_create(item, UPDATE_NOP, op->arg.set.value, + op->arg.set.length, 0); + return rope_insert(rope, op->field_no, item, 1); +} + +int +do_op_array_set(struct update_op *op, struct update_field *field) +{ + assert(field->type == UPDATE_ARRAY); + struct rope *rope = field->array.rope; + /* Interpret '=' for n + 1 field as insert. */ + if (op->field_no == (int32_t) rope_size(rope)) + return do_op_array_insert(op, field); + + struct update_array_item *item = + update_array_extract_item(field, op); + if (item == NULL) + return -1; + op->new_field_len = op->arg.set.length; + /* Ignore the previous op, if any. */ + item->field.type = UPDATE_SCALAR; + item->field.scalar.op = op; + return 0; +} + +int +do_op_array_delete(struct update_op *op, struct update_field *field) +{ + assert(field->type == UPDATE_ARRAY); + struct rope *rope = field->array.rope; + uint32_t size = rope_size(rope); + if (update_op_adjust_field_no(op, size) != 0) + return -1; + uint32_t delete_count = op->arg.del.count; + if ((uint64_t) op->field_no + delete_count > size) + delete_count = size - op->field_no; + for (uint32_t u = delete_count; u != 0; --u) + rope_erase(rope, op->field_no); + return 0; +} + +#define DO_SCALAR_OP_GENERIC(op_type) \ +int \ +do_op_array_##op_type(struct update_op *op, struct update_field *field) \ +{ \ + struct update_array_item *item = \ + update_array_extract_item(field, op); \ + if (item == NULL) \ + return -1; \ + if (item->field.type != UPDATE_NOP) \ + return update_err_double(op); \ + if (update_op_do_##op_type(op, item->field.data) != 0) \ + return -1; \ + item->field.type = UPDATE_SCALAR; \ + item->field.scalar.op = op; \ + return 0; \ +} + +DO_SCALAR_OP_GENERIC(arith) + +DO_SCALAR_OP_GENERIC(bit) + +DO_SCALAR_OP_GENERIC(splice) diff --git a/src/box/update/update_field.c b/src/box/update/update_field.c new file mode 100644 index 000000000..b4ede54db --- /dev/null +++ b/src/box/update/update_field.c @@ -0,0 +1,649 @@ +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "update_field.h" +#include "box/tuple_format.h" +#include "mp_extension_types.h" + +/* {{{ Error helpers. */ + +/** + * Take a string identifier of a field being updated by @a op. + * @param op Operation to convert field of. + * @return String with field ID. + */ +static inline const char * +update_op_field_str(const struct update_op *op) +{ + if (op->field_no >= 0) + return tt_sprintf("%d", op->field_no + TUPLE_INDEX_BASE); + else + return tt_sprintf("%d", op->field_no); +} + +static inline int +update_err_arg_type(const struct update_op *op, const char *needed_type) +{ + diag_set(ClientError, ER_UPDATE_ARG_TYPE, op->opcode, + update_op_field_str(op), needed_type); + return -1; +} + +static inline int +update_err_int_overflow(const struct update_op *op) +{ + diag_set(ClientError, ER_UPDATE_INTEGER_OVERFLOW, op->opcode, + update_op_field_str(op)); + return -1; +} + +static inline int +update_err_decimal_overflow(const struct update_op *op) +{ + diag_set(ClientError, ER_UPDATE_DECIMAL_OVERFLOW, op->opcode, + update_op_field_str(op)); + return -1; +} + +static inline int +update_err_splice_bound(const struct update_op *op) +{ + diag_set(ClientError, ER_UPDATE_SPLICE, update_op_field_str(op), + "offset is out of bound"); + return -1; +} + +int +update_err_no_such_field(const struct update_op *op) +{ + diag_set(ClientError, ER_NO_SUCH_FIELD_NO, op->field_no >= 0 ? + TUPLE_INDEX_BASE + op->field_no : op->field_no); + return -1; +} + +int +update_err(const struct update_op *op, const char *reason) +{ + diag_set(ClientError, ER_UPDATE_FIELD, update_op_field_str(op), + reason); + return -1; +} + +/* }}} Error helpers. */ + +uint32_t +update_field_sizeof(struct update_field *field) +{ + switch (field->type) { + case UPDATE_NOP: + return field->size; + case UPDATE_SCALAR: + return field->scalar.op->new_field_len; + case UPDATE_ARRAY: + return update_array_sizeof(field); + default: + unreachable(); + } + return 0; +} + +uint32_t +update_field_store(struct update_field *field, char *out, char *out_end) +{ + struct update_op *op; + uint32_t size; + switch(field->type) { + case UPDATE_NOP: + assert(out_end - out >= field->size); + memcpy(out, field->data, field->size); + return field->size; + case UPDATE_SCALAR: + op = field->scalar.op; + size = op->new_field_len; + assert(out_end - out >= size); + op->meta->store_cb(op, field->data, out); + return size; + case UPDATE_ARRAY: + return update_array_store(field, out, out_end); + default: + unreachable(); + } + return 0; +} + +/* {{{ read_arg helpers. */ + +static inline int +mp_read_i32(struct update_op *op, const char **expr, int32_t *ret) +{ + if (mp_read_int32(expr, ret) == 0) + return 0; + return update_err_arg_type(op, "an integer"); +} + +static inline int +mp_read_uint(struct update_op *op, const char **expr, uint64_t *ret) +{ + if (mp_typeof(**expr) == MP_UINT) { + *ret = mp_decode_uint(expr); + return 0; + } + return update_err_arg_type(op, "a positive integer"); +} + +static inline int +mp_read_arith_arg(struct update_op *op, const char **expr, + struct op_arith_arg *ret) +{ + int8_t ext_type; + uint32_t len; + switch(mp_typeof(**expr)) { + case MP_UINT: + ret->type = AT_INT; + int96_set_unsigned(&ret->int96, mp_decode_uint(expr)); + return 0; + case MP_INT: + ret->type = AT_INT; + int96_set_signed(&ret->int96, mp_decode_int(expr)); + return 0; + case MP_DOUBLE: + ret->type = AT_DOUBLE; + ret->dbl = mp_decode_double(expr); + return 0; + case MP_FLOAT: + ret->type = AT_FLOAT; + ret->flt = mp_decode_float(expr); + return 0; + case MP_EXT: + len = mp_decode_extl(expr, &ext_type); + if (ext_type == MP_DECIMAL) { + ret->type = AT_DECIMAL; + decimal_unpack(expr, len, &ret->dec); + return 0; + } + FALLTHROUGH; + default: + return update_err_arg_type(op, "a number"); + } +} + +static inline int +mp_read_str(struct update_op *op, const char **expr, uint32_t *len, + const char **ret) +{ + if (mp_typeof(**expr) == MP_STR) { + *ret = mp_decode_str(expr, len); + return 0; + } + return update_err_arg_type(op, "a string"); +} + +/* }}} read_arg helpers. */ + +/* {{{ read_arg */ + +static int +read_arg_set(struct update_op *op, const char **expr, int index_base) +{ + (void) index_base; + op->arg.set.value = *expr; + mp_next(expr); + op->arg.set.length = (uint32_t) (*expr - op->arg.set.value); + return 0; +} + +static int +read_arg_insert(struct update_op *op, const char **expr, int index_base) +{ + return read_arg_set(op, expr, index_base); +} + +static int +read_arg_delete(struct update_op *op, const char **expr, int index_base) +{ + (void) index_base; + if (mp_typeof(**expr) == MP_UINT) { + op->arg.del.count = (uint32_t) mp_decode_uint(expr); + if (op->arg.del.count != 0) + return 0; + return update_err(op, "cannot delete 0 fields"); + } + return update_err_arg_type(op, "a number of fields to delete"); +} + +static int +read_arg_arith(struct update_op *op, const char **expr, int index_base) +{ + (void) index_base; + return mp_read_arith_arg(op, expr, &op->arg.arith); +} + +static int +read_arg_bit(struct update_op *op, const char **expr, int index_base) +{ + (void) index_base; + return mp_read_uint(op, expr, &op->arg.bit.val); +} + +static int +read_arg_splice(struct update_op *op, const char **expr, int index_base) +{ + struct op_splice_arg *arg = &op->arg.splice; + if (mp_read_i32(op, expr, &arg->offset)) + return -1; + if (arg->offset >= 0) { + if (arg->offset - index_base < 0) + return update_err_splice_bound(op); + arg->offset -= index_base; + } + if (mp_read_i32(op, expr, &arg->cut_length) == 0) + return mp_read_str(op, expr, &arg->paste_length, &arg->paste); + return -1; +} + +/* }}} read_arg */ + +/* {{{ do_op helpers. */ + +static inline double +cast_arith_arg_to_double(struct op_arith_arg arg) +{ + if (arg.type == AT_DOUBLE) { + return arg.dbl; + } else if (arg.type == AT_FLOAT) { + return arg.flt; + } else { + assert(arg.type == AT_INT); + if (int96_is_uint64(&arg.int96)) { + return int96_extract_uint64(&arg.int96); + } else { + assert(int96_is_neg_int64(&arg.int96)); + return int96_extract_neg_int64(&arg.int96); + } + } +} + +static inline decimal_t * +cast_arith_arg_to_decimal(struct op_arith_arg arg, decimal_t *dec) +{ + if (arg.type == AT_DECIMAL) { + *dec = arg.dec; + return dec; + } else if (arg.type == AT_DOUBLE) { + return decimal_from_double(dec, arg.dbl); + } else if (arg.type == AT_FLOAT) { + return decimal_from_double(dec, arg.flt); + } else { + assert(arg.type == AT_INT); + if (int96_is_uint64(&arg.int96)) { + uint64_t val = int96_extract_uint64(&arg.int96); + return decimal_from_uint64(dec, val); + } else { + assert(int96_is_neg_int64(&arg.int96)); + int64_t val = int96_extract_neg_int64(&arg.int96); + return decimal_from_int64(dec, val); + } + } +} + +uint32_t +update_arith_sizeof(struct op_arith_arg *arg) +{ + switch (arg->type) { + case AT_INT: + if (int96_is_uint64(&arg->int96)) { + uint64_t val = int96_extract_uint64(&arg->int96); + return mp_sizeof_uint(val); + } else { + int64_t val = int96_extract_neg_int64(&arg->int96); + return mp_sizeof_int(val); + } + break; + case AT_DOUBLE: + return mp_sizeof_double(arg->dbl); + case AT_FLOAT: + return mp_sizeof_float(arg->flt); + default: + assert(arg->type == AT_DECIMAL); + return mp_sizeof_decimal(&arg->dec); + } +} + +int +make_arith_operation(struct update_op *op, struct op_arith_arg arg, + struct op_arith_arg *ret) +{ + struct op_arith_arg value = op->arg.arith; + enum arith_type lowest_type = arg.type; + if (arg.type > value.type) + lowest_type = value.type; + + if (lowest_type == AT_INT) { + switch(op->opcode) { + case '+': + int96_add(&arg.int96, &value.int96); + break; + case '-': + int96_invert(&value.int96); + int96_add(&arg.int96, &value.int96); + break; + default: + unreachable(); + break; + } + if (!int96_is_uint64(&arg.int96) && + !int96_is_neg_int64(&arg.int96)) + return update_err_int_overflow(op); + *ret = arg; + } else if (lowest_type >= AT_DOUBLE) { + /* At least one of operands is double or float. */ + double a = cast_arith_arg_to_double(arg); + double b = cast_arith_arg_to_double(value); + double c; + switch(op->opcode) { + case '+': + c = a + b; + break; + case '-': + c = a - b; + break; + default: + unreachable(); + break; + } + if (lowest_type == AT_DOUBLE) { + ret->type = AT_DOUBLE; + ret->dbl = c; + } else { + assert(lowest_type == AT_FLOAT); + ret->type = AT_FLOAT; + ret->flt = (float)c; + } + } else { + decimal_t a, b, c; + if (! cast_arith_arg_to_decimal(arg, &a) || + ! cast_arith_arg_to_decimal(value, &b)) { + return update_err_arg_type(op, "a number convertible "\ + "to decimal"); + } + switch(op->opcode) { + case '+': + if (decimal_add(&c, &a, &b) == NULL) + return update_err_decimal_overflow(op); + break; + case '-': + if (decimal_sub(&c, &a, &b) == NULL) + return update_err_decimal_overflow(op); + break; + default: + unreachable(); + break; + } + ret->type = AT_DECIMAL; + ret->dec = c; + } + return 0; +} + +int +update_op_do_arith(struct update_op *op, const char *old) +{ + struct op_arith_arg left_arg; + if (mp_read_arith_arg(op, &old, &left_arg) != 0 || + make_arith_operation(op, left_arg, &op->arg.arith) != 0) + return -1; + op->new_field_len = update_arith_sizeof(&op->arg.arith); + return 0; +} + +int +update_op_do_bit(struct update_op *op, const char *old) +{ + uint64_t val; + if (mp_read_uint(op, &old, &val) != 0) + return -1; + struct op_bit_arg *arg = &op->arg.bit; + switch (op->opcode) { + case '&': + arg->val &= val; + break; + case '^': + arg->val ^= val; + break; + case '|': + arg->val |= val; + break; + default: + /* Checked by update_read_ops. */ + unreachable(); + } + op->new_field_len = mp_sizeof_uint(arg->val); + return 0; +} + +int +update_op_do_splice(struct update_op *op, const char *old) +{ + struct op_splice_arg *arg = &op->arg.splice; + int32_t str_len; + if (mp_read_str(op, &old, (uint32_t *) &str_len, &old) != 0) + return -1; + + if (arg->offset < 0) { + if (-arg->offset > str_len + 1) + return update_err_splice_bound(op); + arg->offset += str_len + 1; + } else if (arg->offset > str_len) { + arg->offset = str_len; + } + assert(arg->offset >= 0 && arg->offset <= str_len); + if (arg->cut_length < 0) { + if (-arg->cut_length > (str_len - arg->offset)) + arg->cut_length = 0; + else + arg->cut_length += str_len - arg->offset; + } else if (arg->cut_length > str_len - arg->offset) { + arg->cut_length = str_len - arg->offset; + } + assert(arg->offset <= str_len); + + arg->tail_offset = arg->offset + arg->cut_length; + arg->tail_length = str_len - arg->tail_offset; + op->new_field_len = mp_sizeof_str(arg->offset + arg->paste_length + + arg->tail_length); + return 0; +} + +/* }}} do_op helpers. */ + +/* {{{ store_op */ + +static void +store_op_set(struct update_op *op, const char *in, char *out) +{ + (void) in; + memcpy(out, op->arg.set.value, op->arg.set.length); +} + +void +store_op_arith(struct update_op *op, const char *in, char *out) +{ + (void) in; + struct op_arith_arg *arg = &op->arg.arith; + switch (arg->type) { + case AT_INT: + if (int96_is_uint64(&arg->int96)) { + mp_encode_uint(out, int96_extract_uint64(&arg->int96)); + } else { + assert(int96_is_neg_int64(&arg->int96)); + mp_encode_int(out, int96_extract_neg_int64(&arg->int96)); + } + break; + case AT_DOUBLE: + mp_encode_double(out, arg->dbl); + break; + case AT_FLOAT: + mp_encode_float(out, arg->flt); + break; + default: + assert(arg->type == AT_DECIMAL); + mp_encode_decimal(out, &arg->dec); + break; + } +} + +static void +store_op_bit(struct update_op *op, const char *in, char *out) +{ + (void) in; + mp_encode_uint(out, op->arg.bit.val); +} + +static void +store_op_splice(struct update_op *op, const char *in, char *out) +{ + struct op_splice_arg *arg = &op->arg.splice; + uint32_t new_str_len = arg->offset + arg->paste_length + + arg->tail_length; + (void) mp_decode_strl(&in); + out = mp_encode_strl(out, new_str_len); + /* Copy field head. */ + memcpy(out, in, arg->offset); + out = out + arg->offset; + /* Copy the paste. */ + memcpy(out, arg->paste, arg->paste_length); + out = out + arg->paste_length; + /* Copy tail. */ + memcpy(out, in + arg->tail_offset, arg->tail_length); +} + +/* }}} store_op */ + +static const struct update_op_meta op_set = + { read_arg_set, do_op_set, store_op_set, 3 }; +static const struct update_op_meta op_insert = + { read_arg_insert, do_op_insert, store_op_set, 3 }; +static const struct update_op_meta op_arith = + { read_arg_arith, do_op_arith, store_op_arith, 3 }; +static const struct update_op_meta op_bit = + { read_arg_bit, do_op_bit, store_op_bit, 3 }; +static const struct update_op_meta op_splice = + { read_arg_splice, do_op_splice, store_op_splice, 5 }; +static const struct update_op_meta op_delete = + { read_arg_delete, do_op_delete, NULL, 3 }; + +static inline const struct update_op_meta * +update_op_by(char opcode) +{ + switch (opcode) { + case '=': + return &op_set; + case '+': + case '-': + return &op_arith; + case '&': + case '|': + case '^': + return &op_bit; + case ':': + return &op_splice; + case '#': + return &op_delete; + case '!': + return &op_insert; + default: + diag_set(ClientError, ER_UNKNOWN_UPDATE_OP); + return NULL; + } +} + +int +update_op_decode(struct update_op *op, int index_base, + struct tuple_dictionary *dict, const char **expr) +{ + if (mp_typeof(**expr) != MP_ARRAY) { + diag_set(ClientError, ER_ILLEGAL_PARAMS, "update operation " + "must be an array {op,..}"); + return -1; + } + uint32_t len, arg_count = mp_decode_array(expr); + if (arg_count < 1) { + diag_set(ClientError, ER_ILLEGAL_PARAMS, "update operation "\ + "must be an array {op,..}, got empty array"); + return -1; + } + if (mp_typeof(**expr) != MP_STR) { + diag_set(ClientError, ER_ILLEGAL_PARAMS, + "update operation name must be a string"); + return -1; + } + op->opcode = *mp_decode_str(expr, &len); + op->meta = update_op_by(op->opcode); + if (op->meta == NULL) + return -1; + if (arg_count != op->meta->arg_count) { + diag_set(ClientError, ER_UNKNOWN_UPDATE_OP); + return -1; + } + int32_t field_no; + switch(mp_typeof(**expr)) { + case MP_INT: + case MP_UINT: { + if (mp_read_i32(op, expr, &field_no) != 0) + return -1; + if (field_no - index_base >= 0) { + op->field_no = field_no - index_base; + } else if (field_no < 0) { + op->field_no = field_no; + } else { + diag_set(ClientError, ER_NO_SUCH_FIELD_NO, field_no); + return -1; + } + break; + } + case MP_STR: { + const char *path = mp_decode_str(expr, &len); + uint32_t field_no, hash = field_name_hash(path, len); + if (tuple_fieldno_by_name(dict, path, len, hash, + &field_no) == 0) { + op->field_no = (int32_t) field_no; + break; + } + diag_set(ClientError, ER_NO_SUCH_FIELD_NAME, + tt_cstr(path, len)); + return -1; + } + default: + diag_set(ClientError, ER_ILLEGAL_PARAMS, + "field id must be a number or a string"); + return -1; + } + return op->meta->read_arg_cb(op, expr, index_base); +} diff --git a/src/box/update/update_field.h b/src/box/update/update_field.h new file mode 100644 index 000000000..87f1d0b74 --- /dev/null +++ b/src/box/update/update_field.h @@ -0,0 +1,410 @@ +#ifndef TARANTOOL_BOX_TUPLE_UPDATE_FIELD_H +#define TARANTOOL_BOX_TUPLE_UPDATE_FIELD_H +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "trivia/util.h" +#include "tt_static.h" +#include <stdint.h> +#include "bit/int96.h" +#include "mp_decimal.h" + +/** + * This file is a link between all the update operations for all + * the field types. It functions like a router, when an update + * operation is being parsed step by step, going down the update + * tree. For example, when an update operation goes through an + * array, a map, another array, and ends with a scalar operation, + * at the end of each step the operation goes to the next one via + * functions of this file: update_array.c -> update_field.c -> + * update_map.c -> update_field.c -> update_array.c -> ... . The + * functions, doing the routing, are do_op_<operation_type>(), see + * them below. + */ + +struct rope; +struct update_field; +struct update_op; +struct tuple_dictionary; + +/* {{{ update_op */ + +/** Argument of SET (and INSERT) operation. */ +struct op_set_arg { + uint32_t length; + const char *value; +}; + +/** Argument of DELETE operation. */ +struct op_del_arg { + uint32_t count; +}; + +/** + * MsgPack format code of an arithmetic argument or result. + * MsgPack codes are not used to simplify type calculation. + */ +enum arith_type { + AT_DECIMAL = 0, /* MP_EXT + MP_DECIMAL */ + AT_DOUBLE = 1, /* MP_DOUBLE */ + AT_FLOAT = 2, /* MP_FLOAT */ + AT_INT = 3 /* MP_INT/MP_UINT */ +}; + +/** + * Argument (left and right) and result of ADD, SUBTRACT. + * + * To perform an arithmetic operation, update first loads left + * and right arguments into corresponding value objects, then + * performs arithmetics on types of arguments, thus calculating + * the type of the result, and then performs the requested + * operation according to the calculated type rules. + * + * The rules are as follows: + * - when one of the argument types is double, the result is + * double; + * - when one of the argument types is float, the result is float; + * - for integer arguments, the result type code depends on the + * range in which falls the result of the operation. If the + * result is in negative range, it's MP_INT, otherwise it's + * MP_UINT. If the result is out of bounds of (-2^63, 2^64), an + * exception is raised for overflow. + */ +struct op_arith_arg { + enum arith_type type; + union { + double dbl; + float flt; + struct int96_num int96; + decimal_t dec; + }; +}; + +/** Argument of AND, XOR, OR operations. */ +struct op_bit_arg { + uint64_t val; +}; + +/** Argument of SPLICE. */ +struct op_splice_arg { + /** Splice position. */ + int32_t offset; + /** Byte count to delete. */ + int32_t cut_length; + /** New content. */ + const char *paste; + /** New content length. */ + uint32_t paste_length; + + /** Offset of the tail in the old field. */ + int32_t tail_offset; + /** Size of the tail. */ + int32_t tail_length; +}; + +/** Update operation argument. */ +union update_op_arg { + struct op_set_arg set; + struct op_del_arg del; + struct op_arith_arg arith; + struct op_bit_arg bit; + struct op_splice_arg splice; +}; + +typedef int +(*update_op_do_f)(struct update_op *op, struct update_field *field); + +typedef int +(*update_op_read_arg_f)(struct update_op *op, const char **expr, + int index_base); + +typedef void +(*update_op_store_f)(struct update_op *op, const char *in, char *out); + +/** + * A set of functions and properties to initialize, do and store + * an operation. + */ +struct update_op_meta { + /** + * Virtual function to read the arguments of the + * operation. + */ + update_op_read_arg_f read_arg_cb; + /** Virtual function to execute the operation. */ + update_op_do_f do_cb; + /** + * Virtual function to store a result of the operation. + */ + update_op_store_f store_cb; + /** Argument count. */ + uint32_t arg_count; +}; + +/** A single UPDATE operation. */ +struct update_op { + /** Operation meta depending on the op type. */ + const struct update_op_meta *meta; + /** Operation arguments. */ + union update_op_arg arg; + /** First level field no. */ + int32_t field_no; + /** Size of a new field after it is updated. */ + uint32_t new_field_len; + /** Opcode symbol: = + - / ... */ + char opcode; +}; + +/** + * Decode an update operation from MessagePack. + * @param[out] op Update operation. + * @param index_base Field numbers base: 0 or 1. + * @param dict Dictionary to lookup field number by a name. + * @param expr MessagePack. + * + * @retval 0 Success. + * @retval -1 Client error. + */ +int +update_op_decode(struct update_op *op, int index_base, + struct tuple_dictionary *dict, const char **expr); + +/* }}} update_op */ + +/* {{{ update_field */ + +/** Types of field update. */ +enum update_type { + /** + * Field is not updated. Just save it as is. It is used, + * for example, when a rope is split in two parts: not + * changed left range of fields, and a right range with + * its first field changed. In this case the left range is + * NOP. + */ + UPDATE_NOP, + /** + * Field is a scalar value, updated via set, arith, bit, + * splice, or any other scalar-applicable operation. + */ + UPDATE_SCALAR, + /** + * Field is an updated array. Check the rope for updates + * of individual fields. + */ + UPDATE_ARRAY, +}; + +/** + * Generic structure describing update of a field. It can be + * tuple field, field of a tuple field, or any another tuple + * internal object: map, array, scalar, or unchanged field of any + * type without op. This is a node of the whole update tree. + * + * If the field is array or map, it contains more children fields + * inside corresponding sub-structures. + */ +struct update_field { + /** + * Type of this field's update. Union below depends on it. + */ + enum update_type type; + /** Field data to update. */ + const char *data; + uint32_t size; + union { + /** + * This update is terminal. It does a scalar + * operation and has no children fields. + */ + struct { + struct update_op *op; + } scalar; + /** + * This update changes an array. Children fields + * are stored in rope nodes. + */ + struct { + struct rope *rope; + } array; + }; +}; + +/** + * Size of the updated field, including all children recursively. + */ +uint32_t +update_field_sizeof(struct update_field *field); + +/** Save the updated field, including all children recursively. */ +uint32_t +update_field_store(struct update_field *field, char *out, char *out_end); + +/** + * Generate declarations for a concrete field type: array, bar + * etc. Each complex type has basic operations of the same + * signature: insert, set, delete, arith, bit, splice. + * + * These operations can call each other going down a JSON path. + */ +#define OP_DECL_GENERIC(type) \ +int \ +do_op_##type##_insert(struct update_op *op, struct update_field *field); \ + \ +int \ +do_op_##type##_set(struct update_op *op, struct update_field *field); \ + \ +int \ +do_op_##type##_delete(struct update_op *op, struct update_field *field); \ + \ +int \ +do_op_##type##_arith(struct update_op *op, struct update_field *field); \ + \ +int \ +do_op_##type##_bit(struct update_op *op, struct update_field *field); \ + \ +int \ +do_op_##type##_splice(struct update_op *op, struct update_field *field); \ + \ +uint32_t \ +update_##type##_sizeof(struct update_field *field); \ + \ +uint32_t \ +update_##type##_store(struct update_field *field, char *out, char *out_end); + +/* }}} update_field */ + +/* {{{ update_field.array */ + +/** + * Initialize @a field as an array to update. + * @param[out] field Field to initialize. + * @param data MessagePack data of the array to update. + * @param data_end End of @a data. + * @param field_count Field count in @data. + * + * @retval 0 Success. + * @retval -1 Error. + */ +int +update_array_create(struct update_field *field, const char *data, + const char *data_end, uint32_t field_count); + +OP_DECL_GENERIC(array) + +/* }}} update_field.array */ + +#undef OP_DECL_GENERIC + +/* {{{ Common helpers. */ + +/** + * These helper functions are used when a caller function doesn't + * know type of a child node to propagate an update down the tree. + * For example, do_op_array_set calls do_op_set on its childs. + * Each child can be another array, a bar, a route, a map - + * anything. The functions below help to make such places shorter + * and simpler. + */ +#define OP_DECL_GENERIC(op_type) \ +static inline int \ +do_op_##op_type(struct update_op *op, struct update_field *field) \ +{ \ + switch (field->type) { \ + case UPDATE_ARRAY: \ + return do_op_array_##op_type(op, field); \ + default: \ + unreachable(); \ + } \ + return 0; \ +} + +OP_DECL_GENERIC(insert) + +OP_DECL_GENERIC(set) + +OP_DECL_GENERIC(delete) + +OP_DECL_GENERIC(arith) + +OP_DECL_GENERIC(bit) + +OP_DECL_GENERIC(splice) + +#undef OP_DECL_GENERIC + +/* }}} Common helpers. */ + +/* {{{ Scalar helpers. */ + +int +make_arith_operation(struct update_op *op, struct op_arith_arg arg, + struct op_arith_arg *ret); + +void +store_op_arith(struct update_op *op, const char *in, char *out); + +uint32_t +update_arith_sizeof(struct op_arith_arg *arg); + +int +update_op_do_arith(struct update_op *op, const char *old); + +int +update_op_do_bit(struct update_op *op, const char *old); + +int +update_op_do_splice(struct update_op *op, const char *old); + +/* }}} Scalar helpers. */ + +/** {{{ Error helpers. */ +/** + * All the error helpers below set diag with appropriate error + * code, taking into account field_no < 0, complex paths. They all + * return -1 to shorten error returning in a caller function to + * single line. + */ + +int +update_err_no_such_field(const struct update_op *op); + +int +update_err(const struct update_op *op, const char *reason); + +static inline int +update_err_double(const struct update_op *op) +{ + return update_err(op, "double update of the same field"); +} + +/** }}} Error helpers. */ + +#endif /* TARANTOOL_BOX_TUPLE_UPDATE_FIELD_H */ diff --git a/test/box/misc.result b/test/box/misc.result index c46c5a9d6..125d9139d 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -356,7 +356,7 @@ t; 22: box.error.TUPLE_NOT_ARRAY 23: box.error.FIELD_TYPE 24: box.error.INDEX_PART_TYPE_MISMATCH - 25: box.error.SPLICE + 25: box.error.UPDATE_SPLICE 26: box.error.UPDATE_ARG_TYPE 27: box.error.FORMAT_MISMATCH_INDEX_PART 28: box.error.UNKNOWN_UPDATE_OP diff --git a/test/box/tuple.result b/test/box/tuple.result index 895462518..c748c886a 100644 --- a/test/box/tuple.result +++ b/test/box/tuple.result @@ -1371,13 +1371,13 @@ d:update{{'-', 1, ffi.new('float', 635)}} d:update{{'+', 1, 0/0}} --- - error: 'Argument type in operation ''+'' on field 1 does not match field type: expected - a number convertible to decimal.' + a number convertible to decimal' ... -- inf d:update{{'-', 1, 1/0}} --- - error: 'Argument type in operation ''-'' on field 1 does not match field type: expected - a number convertible to decimal.' + a number convertible to decimal' ... -- decimal overflow d = box.tuple.new(dec.new('9e37')) -- 2.20.1 (Apple Git-117)
next prev parent reply other threads:[~2019-08-31 21:32 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-08-31 21:35 [tarantool-patches] [PATCH v2 0/8] JSON updates Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 1/8] tuple: expose JSON go_to_key and go_to_index functions Vladislav Shpilevoy 2019-08-31 21:35 ` Vladislav Shpilevoy [this message] [not found] ` <20190903192059.GE15611@atlas> [not found] ` <6ee759cf-a975-e6a9-6f52-f855958ffe06@tarantool.org> [not found] ` <20191005132055.GD3913@atlas> [not found] ` <20191005135037.GJ3913@atlas> 2019-10-19 15:11 ` [Tarantool-patches] [tarantool-patches] Re: [PATCH v2 2/8] tuple: rework updates to improve code extendibility Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 3/8] json: lexer_eof and token_cmp helper functions Vladislav Shpilevoy [not found] ` <20190903192433.GF15611@atlas> [not found] ` <f5612e04-dc56-f4bd-1298-c5841ac909f5@tarantool.org> [not found] ` <20191005132231.GE3913@atlas> [not found] ` <20191005135014.GI3913@atlas> 2019-10-19 15:08 ` [Tarantool-patches] [tarantool-patches] " Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 4/8] tuple: account the whole array in field.data and size Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 5/8] tuple: enable JSON bar updates Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 6/8] tuple: make update operation tokens consumable Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 7/8] tuple: JSON updates support intersection by arrays Vladislav Shpilevoy 2019-08-31 21:35 ` [tarantool-patches] [PATCH v2 8/8] tuple: JSON updates support intersection by maps Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=40775b37b9067dc2bace5b97d4694d0d869dd035.1567287197.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [tarantool-patches] [PATCH v2 2/8] tuple: rework updates to improve code extendibility' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox