[Tarantool-patches] [PATCH v1 1/1] sql: introduce operator []

Konstantin Osipov kostja.osipov at gmail.com
Fri Nov 19 20:25:02 MSK 2021


* Mergen Imeev via Tarantool-patches <tarantool-patches at dev.tarantool.org> [21/11/19 17:49]:
> This patch introduces operator [] that allows to get elements from MAP,
> ARRAY and ANY values.
> 
> Part of #4762
> Part of #4763
> 

Have you benchmarked this? This is going to be a very hot path.
There is already tuple field access in the box by path, why not reuse it?



> @TarantoolBot document
> Title: Operator [] in SQL
> 
> Operator [] allows to get elements of MAP, ARRAY and ANY values.
> 
> Rules for operator []:
> 1) operator applied to the value to the left of `[` ("left-value");
> 2) if left-value is not MAP, ARRAY or ANY, an error is thrown;
> 3) if there is no values between `[` and `]`, left-value is returned as
>    the result;
> 4) if there is one value between `[` and `]` ("right-value"), the result
>    is:
>   a) if left-value is ANY and its primitive type is not MAP or ARRAY,
>      the result is NULL;
>   b) if the type or primitive type of left-value is ARRAY, and
>      right-value is INTEGER and its value is greater than 0 and not
>      greater than the number of elements in ARRAY, the result will be a
>      value with right-value as the index, otherwise the result will be
>      NULL;
>   c) if the type or primitive type of left-value is MAP and it contains
>      right-value as one of its keys, the result is the value with
>      right-value as the key in left-value, otherwise the result is NULL;
> 5) if there is more than one value between `[` and `]` than
>    left-value[a, b, c, ...] == left-value[a][b][c]... except it will
>    return NULL, if any of the `[]` operators return NULL.
> 
> Examples:
> ```
> tarantool> box.execute([[SELECT {'a' : 12, 3 : 34}[3];]])
> ---
> - metadata:
>   - name: COLUMN_1
>     type: any
>   rows:
>   - [34]
> ...
> ```
> 
> ```
> tarantool> box.execute([[SELECT [12, [4, 5, 6], 34][2, 2];]])
> ---
> - metadata:
>   - name: COLUMN_1
>     type: any
>   rows:
>   - [5]
> ...
> ```
> ---
> https://github.com/tarantool/tarantool/issues/4762
> https://github.com/tarantool/tarantool/issues/4763
> https://github.com/tarantool/tarantool/tree/imeevma/gh-4763-syntax-for-map-array
> 
>  extra/addopcodes.sh           |   1 +
>  src/box/sql/expr.c            |  28 +++++
>  src/box/sql/mem.c             |  59 +++++++++++
>  src/box/sql/mem.h             |  18 ++++
>  src/box/sql/parse.y           |  17 ++++
>  src/box/sql/vdbe.c            |  33 ++++++
>  test/sql-tap/engine.cfg       |   1 +
>  test/sql-tap/msgpack.test.lua | 186 ++++++++++++++++++++++++++++++++++
>  8 files changed, 343 insertions(+)
>  create mode 100755 test/sql-tap/msgpack.test.lua
> 
> diff --git a/extra/addopcodes.sh b/extra/addopcodes.sh
> index 3f8cfdf02..51acfe38e 100755
> --- a/extra/addopcodes.sh
> +++ b/extra/addopcodes.sh
> @@ -53,6 +53,7 @@ extras="            \
>      LINEFEED        \
>      SPACE           \
>      ILLEGAL         \
> +    GETITEM         \
>  "
>  
>  IFS=" "
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 789d8906c..abf47b8ef 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -3461,6 +3461,30 @@ expr_code_map(struct Parse *parser, struct Expr *expr, int reg)
>  	sqlVdbeAddOp3(vdbe, OP_Map, count, reg, values_reg);
>  }
>  
> +static void
> +expr_code_getitem(struct Parse *parser, struct Expr *expr, int reg)
> +{
> +	struct Vdbe *vdbe = parser->pVdbe;
> +	struct ExprList *list = expr->x.pList;
> +	assert(list != NULL);
> +	int count = list->nExpr;
> +	struct Expr *value = list->a[count - 1].pExpr;
> +
> +	enum field_type type = value->op != TK_NULL ? sql_expr_type(value) :
> +			       field_type_MAX;
> +	if (value->op != TK_VARIABLE && type != FIELD_TYPE_MAP &&
> +	    type != FIELD_TYPE_ARRAY && type != FIELD_TYPE_ANY) {
> +		diag_set(ClientError, ER_SQL_PARSER_GENERIC, "Selecting is "
> +			 "only possible from any, map and array values");
> +		parser->is_aborted = true;
> +		return;
> +	}
> +	int reg_operands = parser->nMem + 1;
> +	parser->nMem += count;
> +	sqlExprCodeExprList(parser, list, reg_operands, 0, SQL_ECEL_FACTOR);
> +	sqlVdbeAddOp3(vdbe, OP_Getitem, count, reg, reg_operands);
> +}
> +
>  /*
>   * Erase column-cache entry number i
>   */
> @@ -3920,6 +3944,10 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>  		expr_code_map(pParse, pExpr, target);
>  		return target;
>  
> +	case TK_GETITEM:
> +		expr_code_getitem(pParse, pExpr, target);
> +		return target;
> +
>  	case TK_LT:
>  	case TK_LE:
>  	case TK_GT:
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index cfb88bffe..195dfde2b 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -3107,6 +3107,65 @@ mem_encode_map(const struct Mem *mems, uint32_t count, uint32_t *size,
>  	return map;
>  }
>  
> +static int
> +mp_getitem(const char **data, const struct Mem *key)
> +{
> +	if ((mp_typeof(**data) != MP_ARRAY && mp_typeof(**data) != MP_MAP)) {
> +		*data = NULL;
> +		return 0;
> +	}
> +	const char *end = *data;
> +	if (mp_typeof(**data) == MP_ARRAY) {
> +		uint32_t size = mp_decode_array(data);
> +		if (!mem_is_uint(key) || key->u.u == 0 || key->u.u > size) {
> +			*data = NULL;
> +			return 0;
> +		}
> +		for (uint32_t i = 0; i < key->u.u - 1; ++i)
> +			mp_next(data);
> +		return 0;
> +	}
> +	struct Mem mem;
> +	mem_create(&mem);
> +	uint32_t size = mp_decode_map(data);
> +	for (uint32_t i = 0; i < size; ++i) {
> +		uint32_t len;
> +		if (mem_from_mp_ephemeral(&mem, *data, &len) != 0)
> +			return -1;
> +		*data += len;
> +		assert(!mem_is_map(&mem) && !mem_is_array(&mem));
> +		if (mem_cmp_scalar(&mem, key, NULL) == 0)
> +			return 0;
> +		mp_next(data);
> +	}
> +	mp_next(&end);
> +	if (*data == end)
> +		*data = NULL;
> +	return 0;
> +}
> +
> +int
> +mem_getitem(const struct Mem *mem, const struct Mem *keys, int count,
> +	    struct Mem *res)
> +{
> +	assert(count > 0);
> +	assert(mem_is_map(mem) || mem_is_array(mem));
> +	const char *data = mem->z;
> +	for (int i = 0; i < count && data != NULL; ++i) {
> +		if (mp_getitem(&data, &keys[i]) != 0)
> +			return -1;
> +	}
> +	if (data == NULL) {
> +		mem_set_null(res);
> +		return 0;
> +	}
> +	uint32_t len;
> +	if (mem_from_mp(res, data, &len) != 0)
> +		return -1;
> +	res->flags |= MEM_Any;
> +	return 0;
> +}
> +
>  /**
>   * Allocate a sequence of initialized vdbe memory registers
>   * on region.
> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index f442d8673..90eb3b137 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -135,6 +135,19 @@ mem_is_num(const struct Mem *mem)
>  			MEM_TYPE_DEC)) != 0;
>  }
>  
> +static inline bool
> +mem_is_any(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Any) != 0;
> +}
> +
> +static inline bool
> +mem_is_doc(const struct Mem *mem)
> +{
> +	return (mem->type & (MEM_TYPE_MAP | MEM_TYPE_ARRAY)) != 0 ||
> +	       (mem->flags & MEM_Any) != 0;
> +}
> +
>  static inline bool
>  mem_is_metatype(const struct Mem *mem)
>  {
> @@ -884,3 +897,8 @@ mem_encode_array(const struct Mem *mems, uint32_t count, uint32_t *size,
>  char *
>  mem_encode_map(const struct Mem *mems, uint32_t count, uint32_t *size,
>  	       struct region *region);
> +
> +/** Return a value from ANY, MAP, or ARRAY MEM using the MEM array as keys. */
> +int
> +mem_getitem(const struct Mem *mem, const struct Mem *keys, int count,
> +	    struct Mem *res);
> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index db7fef71a..30922b7f6 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -155,6 +155,7 @@ cmdx ::= cmd.
>  %left CONCAT.
>  %left COLLATE.
>  %right BITNOT.
> +%left LB.
>  
>  
>  ///////////////////// Begin and end transactions. ////////////////////////////
> @@ -1126,6 +1127,22 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
>    sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
>  }
>  
> +expr(A) ::= expr(X) LB exprlist(Y) RB(E). {
> +  struct Expr *expr = sql_expr_new_dequoted(pParse->db, TK_GETITEM, NULL);
> +  if (expr == NULL) {
> +    sql_expr_list_delete(pParse->db, Y);
> +    pParse->is_aborted = true;
> +    return;
> +  }
> +  Y = sql_expr_list_append(pParse->db, Y, X.pExpr);
> +  expr->x.pList = Y;
> +  expr->type = FIELD_TYPE_ANY;
> +  sqlExprSetHeightAndFlags(pParse, expr);
> +  A.pExpr = expr;
> +  A.zStart = X.zStart;
> +  A.zEnd = &E.z[E.n];
> +}
> +
>  expr(A) ::= LB(X) exprlist(Y) RB(E). {
>    struct Expr *expr = sql_expr_new_dequoted(pParse->db, TK_ARRAY, NULL);
>    if (expr == NULL) {
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 86de3f98a..6b1810508 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -1458,6 +1458,39 @@ case OP_Map: {
>  	break;
>  }
>  
> +/**
> + * Opcode: Getitem P1 P2 P3 * *
> + * Synopsis: r[P2] = value[P3@(P1 - 1)]
> + *
> + * Get an element from the value in register P3[P1 - 1] using values in
> + * registers P3, ... P3 + (P1 - 2).
> + */
> +case OP_Getitem: {
> +	int count = pOp->p1 - 1;
> +	struct Mem *value = &aMem[pOp->p3 + count];
> +	if (!mem_is_doc(value) ) {
> +		diag_set(ClientError, ER_SQL_TYPE_MISMATCH, mem_str(value),
> +			 "any, map or array");
> +		goto abort_due_to_error;
> +	}
> +
> +	pOut = &aMem[pOp->p2];
> +	if (count == 0) {
> +		if (mem_copy(pOut, value) != 0)
> +			goto abort_due_to_error;
> +		break;
> +	}
> +	if (!mem_is_map(value) && !mem_is_array(value)) {
> +		mem_set_null(pOut);
> +		break;
> +	}
> +
> +	struct Mem *keys = &aMem[pOp->p3];
> +	if (mem_getitem(value, keys, count, pOut) != 0)
> +		goto abort_due_to_error;
> +	break;
> +}
> +
>  /* Opcode: Eq P1 P2 P3 P4 P5
>   * Synopsis: IF r[P3]==r[P1]
>   *
> diff --git a/test/sql-tap/engine.cfg b/test/sql-tap/engine.cfg
> index 528212ab6..52ce5a1f6 100644
> --- a/test/sql-tap/engine.cfg
> +++ b/test/sql-tap/engine.cfg
> @@ -35,6 +35,7 @@
>      "built-in-functions.test.lua": {
>          "memtx": {"engine": "memtx"}
>      },
> +    "msgpack.test.lua": {},
>      "gh-6157-unnecessary-free-on-string.test.lua": {},
>      "gh-6376-wrong-double-to-dec-cmp.test.lua": {},
>      "gh-4077-iproto-execute-no-bind.test.lua": {},
> diff --git a/test/sql-tap/msgpack.test.lua b/test/sql-tap/msgpack.test.lua
> new file mode 100755
> index 000000000..7872426a5
> --- /dev/null
> +++ b/test/sql-tap/msgpack.test.lua
> @@ -0,0 +1,186 @@
> +#!/usr/bin/env tarantool
> +local test = require("sqltester")
> +test:plan(22)
> +
> +-- Make sure that it is possible to get elements from MAP, ARRAY and ANY.
> +test:do_execsql_test(
> +    "msgpack-1.1",
> +    [[
> +        SELECT [123, 234, 356, 467][2];
> +    ]], {
> +        234
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.2",
> +    [[
> +        SELECT CAST([123, 234, 356, 467] AS ANY)[3];
> +    ]], {
> +        356
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.3",
> +    [[
> +        SELECT CAST([123, 234, 356, 467] AS ANY)['3'];
> +    ]], {
> +        ""
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.4",
> +    [[
> +        SELECT {'one' : 123, 3 : 'two', '123' : true}[3];
> +    ]], {
> +        'two'
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.5",
> +    [[
> +        SELECT {'one' : 123, 3 : 'two', '123' : true}['one'];
> +    ]], {
> +        123
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.6",
> +    [[
> +        SELECT {'one' : 123, 3 : 'two', '123' : true}['three'];
> +    ]], {
> +        ""
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.7",
> +    [[
> +        SELECT CAST({'one' : 123, 3 : 'two', '123' : true} AS ANY)['123'];
> +    ]], {
> +        true
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.8",
> +    [[
> +        SELECT CAST(1 AS ANY)[2];
> +    ]], {
> +        ""
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.9",
> +    [[
> +        SELECT CAST('1' AS ANY)['asd'];
> +    ]], {
> +        ""
> +    })
> +
> +test:do_execsql_test(
> +    "msgpack-1.10",
> +    [[
> +        SELECT CAST('1' AS ANY)[];
> +    ]], {
> +        '1'
> +    })
> +
> +--
> +-- Make sure that operator [] cannot get elements from values of types other
> +-- than MAP, ARRAY and ANY.
> +--
> +test:do_catchsql_test(
> +    "msgpack-2.1",
> +    [[
> +        SELECT 1[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.2",
> +    [[
> +        SELECT -1[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.3",
> +    [[
> +        SELECT 1.1[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.4",
> +    [[
> +        SELECT 1.2e0[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.5",
> +    [[
> +        SELECT '1'[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.6",
> +    [[
> +        SELECT x'31'[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.7",
> +    [[
> +        SELECT uuid()[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.8",
> +    [[
> +        SELECT true[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.9",
> +    [[
> +        SELECT CAST(1 AS NUMBER)[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.10",
> +    [[
> +        SELECT CAST('a' AS STRING)[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.11",
> +    [[
> +        SELECT NULL[];
> +    ]], {
> +        1, "Selecting is only possible from any, map and array values"
> +    })
> +
> +test:do_catchsql_test(
> +    "msgpack-2.12",
> +    [[
> +        SELECT CAST(NULL AS ANY)[];
> +    ]], {
> +        1, "Type mismatch: can not convert NULL to any, map or array"
> +    })
> +
> +test:finish_test()
> -- 
> 2.25.1

-- 
Konstantin Osipov, Moscow, Russia


More information about the Tarantool-patches mailing list