* [tarantool-patches] [PATCH 0/2] Compute derived collation for concatenation @ 2019-01-16 13:13 Nikita Pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik ` (2 more replies) 0 siblings, 3 replies; 15+ messages in thread From: Nikita Pettik @ 2019-01-16 13:13 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik Branch: https://github.com/tarantool/tarantool/tree/np/gh-3937-concatenation-collation Issue: https://github.com/tarantool/tarantool/issues/3937 According to ANSI concatenation operator should derive collation from its operands. This small patch-set implements this procedure according to rules described in SQL standard. Nikita Pettik (2): sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions sql: compute resulting collation for concatenation src/box/sql/expr.c | 125 +++++++++++++++++++++++++++++--------------- src/box/sql/select.c | 27 ++++++---- src/box/sql/sqliteInt.h | 14 +++-- src/box/sql/where.c | 56 ++++++++++---------- src/box/sql/whereexpr.c | 23 ++++---- test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++ test/sql/collation.test.lua | 46 ++++++++++++++++ 7 files changed, 296 insertions(+), 97 deletions(-) -- 2.15.1 ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions 2019-01-16 13:13 [tarantool-patches] [PATCH 0/2] Compute derived collation for concatenation Nikita Pettik @ 2019-01-16 13:13 ` Nikita Pettik 2019-01-17 13:28 ` [tarantool-patches] " Konstantin Osipov 2019-01-24 18:28 ` Vladislav Shpilevoy 2019-01-16 13:13 ` [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Nikita Pettik 2019-02-25 11:29 ` [tarantool-patches] Re: [PATCH 0/2] Compute derived " Kirill Yukhin 2 siblings, 2 replies; 15+ messages in thread From: Nikita Pettik @ 2019-01-16 13:13 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik Lets make sql_expr_coll() return error code for two reasons. Firstly, we are going to use this function to detect operands of concatenation with incompatible collations. Secondly, pointer to struct coll in most cases is redundant since collation id (which in turn returned via output parameter) is enough to proceed the same operations. For the same reason lets make sql_binary_compare_coll_seq() return collation id instead of struct coll* and remove corresponding output parameter. Needed for #3947 --- src/box/sql/expr.c | 82 ++++++++++++++++++++++++------------------------- src/box/sql/select.c | 27 ++++++++++------ src/box/sql/sqliteInt.h | 14 ++++----- src/box/sql/where.c | 56 ++++++++++++++++----------------- src/box/sql/whereexpr.c | 23 ++++++++------ 5 files changed, 105 insertions(+), 97 deletions(-) diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index b67b22c23..f8819f779 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -189,10 +189,9 @@ sqlite3ExprSkipCollate(Expr * pExpr) return pExpr; } -struct coll * +int sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) { - struct coll *coll = NULL; *is_explicit_coll = false; *coll_id = COLL_NONE; while (p != NULL) { @@ -205,7 +204,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) } if (op == TK_COLLATE || (op == TK_REGISTER && p->op2 == TK_COLLATE)) { - coll = sql_get_coll_seq(parse, p->u.zToken, coll_id); + (void) sql_get_coll_seq(parse, p->u.zToken, coll_id); *is_explicit_coll = true; break; } @@ -217,7 +216,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) */ int j = p->iColumn; if (j >= 0) { - coll = sql_column_collation(p->space_def, j, + (void) sql_column_collation(p->space_def, j, coll_id); } break; @@ -253,7 +252,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) break; } } - return coll; + return 0; } enum affinity_type @@ -377,27 +376,28 @@ illegal_collation_mix: return -1; } -struct coll * -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, - uint32_t *coll_id) +uint32_t +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) { assert(left != NULL); bool is_lhs_forced; bool is_rhs_forced; uint32_t lhs_coll_id; uint32_t rhs_coll_id; - struct coll *lhs_coll = sql_expr_coll(parser, left, &is_lhs_forced, - &lhs_coll_id); - struct coll *rhs_coll = sql_expr_coll(parser, right, &is_rhs_forced, - &rhs_coll_id); + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) + goto err; + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) + goto err; + uint32_t coll_id; if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, rhs_coll_id, is_rhs_forced, - coll_id) != 0) { - parser->rc = SQL_TARANTOOL_ERROR; - parser->nErr++; - return NULL; - } - return *coll_id == rhs_coll_id ? rhs_coll : lhs_coll;; + &coll_id) != 0) + goto err; + return coll_id; +err: + parser->rc = SQL_TARANTOOL_ERROR; + parser->nErr++; + return COLL_NONE; } /* @@ -413,12 +413,11 @@ codeCompare(Parse * pParse, /* The parsing (and code generating) context */ int jumpIfNull /* If true, jump if either operand is NULL */ ) { - uint32_t id; - struct coll *p4 = - sql_binary_compare_coll_seq(pParse, pLeft, pRight, &id); + uint32_t id = sql_binary_compare_coll_seq(pParse, pLeft, pRight); + struct coll *coll = coll_by_id(id)->coll; int p5 = binaryCompareP5(pLeft, pRight, jumpIfNull); int addr = sqlite3VdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1, - (void *)p4, P4_COLLSEQ); + (void *)coll, P4_COLLSEQ); sqlite3VdbeChangeP5(pParse->pVdbe, (u8) p5); return addr; } @@ -2489,20 +2488,15 @@ sqlite3FindInIndex(Parse * pParse, /* Parsing context */ for (i = 0; i < nExpr; i++) { Expr *pLhs = sqlite3VectorFieldSubexpr(pX->pLeft, i); Expr *pRhs = pEList->a[i].pExpr; - uint32_t id; - struct coll *pReq = sql_binary_compare_coll_seq - (pParse, pLhs, pRhs, &id); + uint32_t id = + sql_binary_compare_coll_seq(pParse, pLhs, pRhs); int j; for (j = 0; j < nExpr; j++) { if ((int) parts[j].fieldno != pRhs->iColumn) continue; - - struct coll *idx_coll = - parts[j].coll; - if (pReq != NULL && - pReq != idx_coll) + if (id != parts[j].coll_id) continue; break; } @@ -2815,9 +2809,9 @@ sqlite3CodeSubselect(Parse * pParse, /* Parsing context */ Expr *p = sqlite3VectorFieldSubexpr (pLeft, i); - sql_binary_compare_coll_seq(pParse, p, - pEList->a[i].pExpr, - &key_info->parts[i].coll_id); + key_info->parts[i].coll_id = + sql_binary_compare_coll_seq(pParse, p, + pEList->a[i].pExpr); } } } else if (ALWAYS(pExpr->x.pList != 0)) { @@ -2839,8 +2833,9 @@ sqlite3CodeSubselect(Parse * pParse, /* Parsing context */ affinity = AFFINITY_BLOB; } bool unused; - sql_expr_coll(pParse, pExpr->pLeft, - &unused, &key_info->parts[0].coll_id); + (void) sql_expr_coll(pParse, pExpr->pLeft, + &unused, + &key_info->parts[0].coll_id); /* Loop through each expression in <exprlist>. */ r1 = sqlite3GetTempReg(pParse); @@ -3100,8 +3095,9 @@ sqlite3ExprCodeIN(Parse * pParse, /* Parsing and code generating context */ bool unused; uint32_t id; ExprList *pList = pExpr->x.pList; - struct coll *coll = sql_expr_coll(pParse, pExpr->pLeft, - &unused, &id); + if (sql_expr_coll(pParse, pExpr->pLeft, &unused, &id) != 0) + goto sqlite3ExprCodeIN_finished; + struct coll *coll = coll_by_id(id)->coll; int labelOk = sqlite3VdbeMakeLabel(v); int r2, regToFree; int regCkNull = 0; @@ -3226,7 +3222,9 @@ sqlite3ExprCodeIN(Parse * pParse, /* Parsing and code generating context */ uint32_t id; int r3 = sqlite3GetTempReg(pParse); Expr *p = sqlite3VectorFieldSubexpr(pLeft, i); - struct coll *pColl = sql_expr_coll(pParse, p, &unused, &id); + if (sql_expr_coll(pParse, p, &unused, &id) != 0) + goto sqlite3ExprCodeIN_finished; + struct coll *pColl = coll_by_id(id)->coll; /* Tarantool: Replace i -> aiMap [i], since original order of columns * is preserved. */ @@ -4052,9 +4050,11 @@ sqlite3ExprCodeTarget(Parse * pParse, Expr * pExpr, int target) 0 && coll == NULL) { bool unused; uint32_t id; - coll = sql_expr_coll(pParse, - pFarg->a[i].pExpr, - &unused, &id); + if (sql_expr_coll(pParse, + pFarg->a[i].pExpr, + &unused, &id) != 0) + break; + coll = coll_by_id(id)->coll;; } } if (pFarg) { diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 02ee225f1..8ba9554f6 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1042,10 +1042,11 @@ selectInnerLoop(Parse * pParse, /* The parser context */ for (i = 0; i < nResultCol; i++) { bool is_found; uint32_t id; - struct coll *coll = - sql_expr_coll(pParse, + if (sql_expr_coll(pParse, pEList->a[i].pExpr, - &is_found, &id); + &is_found, &id) != 0) + break; + struct coll *coll = coll_by_id(id)->coll; if (i < nResultCol - 1) { sqlite3VdbeAddOp3(v, OP_Ne, regResult + i, @@ -1424,7 +1425,10 @@ sql_expr_list_to_key_info(struct Parse *parse, struct ExprList *list, int start) struct key_part_def *part = &key_info->parts[i - start]; bool unused; uint32_t id; - sql_expr_coll(parse, item->pExpr, &unused, &id); + if (sql_expr_coll(parse, item->pExpr, &unused, &id) != 0) { + sqlite3DbFree(parse->db, key_info); + return NULL; + } part->coll_id = id; part->sort_order = item->sort_order; } @@ -1974,7 +1978,7 @@ sqlite3SelectAddColumnTypeAndCollation(Parse * pParse, /* Parsing contexts */ uint32_t coll_id; if (pTab->def->fields[i].coll_id == COLL_NONE && - sql_expr_coll(pParse, p, &is_found, &coll_id) && + sql_expr_coll(pParse, p, &is_found, &coll_id) == 0 && coll_id != COLL_NONE) pTab->def->fields[i].coll_id = coll_id; } @@ -2196,8 +2200,9 @@ multi_select_coll_seq_r(struct Parse *parser, struct Select *p, int n, * resolution and we would not have got this far. */ assert(n >= 0 && n < p->pEList->nExpr); - sql_expr_coll(parser, p->pEList->a[n].pExpr, &is_current_forced, - ¤t_coll_id); + if (sql_expr_coll(parser, p->pEList->a[n].pExpr, &is_current_forced, + ¤t_coll_id) != 0) + return COLL_NONE; uint32_t res_coll_id; if (collations_check_compatibility(prior_coll_id, is_prior_forced, current_coll_id, is_current_forced, @@ -2253,7 +2258,7 @@ sql_multiselect_orderby_to_key_info(struct Parse *parse, struct Select *s, uint32_t id; bool unused; if ((term->flags & EP_Collate) != 0) { - sql_expr_coll(parse, term, &unused, &id); + (void) sql_expr_coll(parse, term, &unused, &id); } else { id = multi_select_coll_seq(parse, s, item->u.x.iOrderByCol - 1); @@ -5328,8 +5333,10 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo) uint32_t id; for (j = 0, pItem = pList->a; coll == NULL && j < nArg; j++, pItem++) { - coll = sql_expr_coll(pParse, pItem->pExpr, - &unused, &id); + if (sql_expr_coll(pParse, pItem->pExpr, + &unused, &id) != 0) + return; + coll = coll_by_id(id)->coll; } if (regHit == 0 && pAggInfo->nAccumulator) regHit = ++pParse->nMem; diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h index 7501fadc8..50d297815 100644 --- a/src/box/sql/sqliteInt.h +++ b/src/box/sql/sqliteInt.h @@ -4337,10 +4337,10 @@ const char *sqlite3ErrStr(int); * clause is used. * @param[out] coll_id Collation identifier. * - * @retval Pointer to collation. + * @retval Return code: < 0 in case of error, 0 on success. */ -struct coll * -sql_expr_coll(Parse * pParse, Expr * pExpr, bool *is_explicit_coll, +int +sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id); Expr *sqlite3ExprAddCollateToken(Parse * pParse, Expr *, const Token *, int); @@ -4684,13 +4684,11 @@ collations_check_compatibility(uint32_t lhs_id, bool is_lhs_forced, * @param parser Parser. * @param left Left expression. * @param right Right expression. Can be NULL. - * @param[out] coll_id Collation identifier. * - * @retval Collation object. + * @retval Id of collation object. */ -struct coll * -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, - uint32_t *coll_id); +uint32_t +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right); int sqlite3TempInMemory(const sqlite3 *); #ifndef SQLITE_OMIT_CTE With *sqlite3WithAdd(Parse *, With *, Token *, ExprList *, Select *); diff --git a/src/box/sql/where.c b/src/box/sql/where.c index 571b5af78..d1edbdfa9 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -304,11 +304,14 @@ whereScanNext(WhereScan * pScan) Parse *pParse = pWC->pWInfo->pParse; assert(pX->pLeft); - uint32_t unused; - struct coll *coll = + uint32_t id = sql_binary_compare_coll_seq( pParse, pX->pLeft, - pX->pRight, &unused); + pX->pRight); + struct coll *coll = + id != COLL_NONE ? + coll_by_id(id)->coll : + NULL; if (coll != pScan->coll) continue; } @@ -556,10 +559,10 @@ findIndexCol(Parse * pParse, /* Parse context */ p->iColumn == (int) part_to_match->fieldno) { bool is_found; uint32_t id; - struct coll *coll = sql_expr_coll(pParse, - pList->a[i].pExpr, - &is_found, &id); - if (coll == part_to_match->coll) + if (sql_expr_coll(pParse, pList->a[i].pExpr, + &is_found, &id) != 0) + return -1; + if (id == part_to_match->coll_id) return i; } } @@ -2263,7 +2266,6 @@ whereRangeVectorLen(Parse * pParse, /* Parsing context */ */ char aff; /* Comparison affinity */ char idxaff = 0; /* Indexed columns affinity */ - struct coll *pColl; /* Comparison collation sequence */ Expr *pLhs = pTerm->pExpr->pLeft->x.pList->a[i].pExpr; Expr *pRhs = pTerm->pExpr->pRight; if (pRhs->flags & EP_xIsSelect) { @@ -2288,11 +2290,10 @@ whereRangeVectorLen(Parse * pParse, /* Parsing context */ sqlite3TableColumnAffinity(space->def, pLhs->iColumn); if (aff != idxaff) break; - uint32_t unused; - pColl = sql_binary_compare_coll_seq(pParse, pLhs, pRhs, &unused); - if (pColl == 0) + uint32_t id = sql_binary_compare_coll_seq(pParse, pLhs, pRhs); + if (id == COLL_NONE) break; - if (idx_def->key_def->parts[i + nEq].coll != pColl) + if (idx_def->key_def->parts[i + nEq].coll_id != id) break; } return i; @@ -3269,16 +3270,18 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */ } if ((pTerm->eOperator & WO_EQ) != 0 && pOBExpr->iColumn >= 0) { - struct coll *coll1, *coll2; bool unused; - uint32_t id; - coll1 = sql_expr_coll(pWInfo->pParse, - pOrderBy->a[i].pExpr, - &unused, &id); - coll2 = sql_expr_coll(pWInfo->pParse, - pTerm->pExpr, - &unused, &id); - if (coll1 != coll2) + uint32_t lhs_id; + uint32_t rhs_id; + if (sql_expr_coll(pWInfo->pParse, + pOrderBy->a[i].pExpr, &unused, + &lhs_id) != 0) + return 0; + if (sql_expr_coll(pWInfo->pParse, + pTerm->pExpr, &unused, + &rhs_id) != 0) + return 0; + if (lhs_id != rhs_id) continue; } obSat |= MASKBIT(i); @@ -3401,13 +3404,10 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */ if (iColumn >= 0) { bool is_found; uint32_t id; - struct coll *coll = - sql_expr_coll(pWInfo->pParse, - pOrderBy->a[i].pExpr, - &is_found, &id); - struct coll *idx_coll = - idx_def->key_def->parts[j].coll; - if (coll != idx_coll) + sql_expr_coll(pWInfo->pParse, + pOrderBy->a[i].pExpr, + &is_found, &id); + if (idx_def->key_def->parts[j].coll_id != id) continue; } isMatch = 1; diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c index 342064ec8..e371cda35 100644 --- a/src/box/sql/whereexpr.c +++ b/src/box/sql/whereexpr.c @@ -37,6 +37,7 @@ * readability and editabiliity. This file contains utility routines for * analyzing Expr objects in the WHERE clause. */ +#include "box/coll_id_cache.h" #include "coll.h" #include "sqliteInt.h" #include "whereInt.h" @@ -167,7 +168,8 @@ exprCommute(Parse * pParse, Expr * pExpr) } else { bool is_found; uint32_t id; - sql_expr_coll(pParse, pExpr->pLeft, &is_found, &id); + (void) sql_expr_coll(pParse, pExpr->pLeft, &is_found, + &id); if (id != COLL_NONE) { /* * Neither X nor Y have COLLATE @@ -851,17 +853,18 @@ termIsEquivalence(Parse * pParse, Expr * pExpr) ) { return 0; } - uint32_t unused; - struct coll *coll1 = - sql_binary_compare_coll_seq(pParse, pExpr->pLeft, pExpr->pRight, - &unused); - if (coll1 == NULL) + uint32_t id = sql_binary_compare_coll_seq(pParse, pExpr->pLeft, + pExpr->pRight); + if (id == COLL_NONE) return 1; bool unused1; - coll1 = sql_expr_coll(pParse, pExpr->pLeft, &unused1, &unused); - struct coll *coll2 = sql_expr_coll(pParse, pExpr->pRight, &unused1, - &unused); - return coll1 != NULL && coll1 == coll2; + uint32_t lhs_id; + uint32_t rhs_id; + if (sql_expr_coll(pParse, pExpr->pLeft, &unused1, &lhs_id) != 0) + return 0; + if (sql_expr_coll(pParse, pExpr->pRight, &unused1, &rhs_id) != 0) + return 0; + return lhs_id != COLL_NONE && lhs_id == rhs_id; } /* -- 2.15.1 ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik @ 2019-01-17 13:28 ` Konstantin Osipov 2019-01-24 18:28 ` Vladislav Shpilevoy 1 sibling, 0 replies; 15+ messages in thread From: Konstantin Osipov @ 2019-01-17 13:28 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik * Nikita Pettik <korablev@tarantool.org> [19/01/16 17:06]: > Lets make sql_expr_coll() return error code for two reasons. Firstly, > we are going to use this function to detect operands of concatenation > with incompatible collations. Secondly, pointer to struct coll in most > cases is redundant since collation id (which in turn returned via output > parameter) is enough to proceed the same operations. It is redundant but it is potentially faster. What are benefits of changing a pointer to an id? This looks like de-optimization. > For the same reason lets make sql_binary_compare_coll_seq() return > collation id instead of struct coll* and remove corresponding output > parameter. It should be always possible to query id from struct coll. I don't see a reason to operate with ids in the source code anywhere but in DDL. Moreover, if we keep changing it back and forth to our taste it will be a mess. the general rule for space id, index id, coll id is that they should not be used unless necessary for locking/consistency reasons. -- Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 http://tarantool.io - www.twitter.com/kostja_osipov ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik 2019-01-17 13:28 ` [tarantool-patches] " Konstantin Osipov @ 2019-01-24 18:28 ` Vladislav Shpilevoy 2019-02-14 23:26 ` n.pettik 1 sibling, 1 reply; 15+ messages in thread From: Vladislav Shpilevoy @ 2019-01-24 18:28 UTC (permalink / raw) To: tarantool-patches, Nikita Pettik Hi! Thanks for the patch! On 16/01/2019 16:13, Nikita Pettik wrote: > Lets make sql_expr_coll() return error code for two reasons. Firstly, > we are going to use this function to detect operands of concatenation > with incompatible collations. Secondly, pointer to struct coll in most > cases is redundant since collation id (which in turn returned via output > parameter) is enough to proceed the same operations. > For the same reason lets make sql_binary_compare_coll_seq() return > collation id instead of struct coll* and remove corresponding output > parameter. First of all, I agree with Kostja - it does not help in anything that you removed coll * from output parameters/return value. It just slows down some places like codeCompare, sqlite3ExprCodeIN, sqlite3ExprCodeTarget etc. - you just do coll_by_id() twice. Moreover, maybe making some places more accurate and compact, it pads out other ones and involves additional headers like in whereexpr.c. Please, return as it was. Nevertheless, see 4 comments below. > > Needed for #3947 > --- > src/box/sql/expr.c | 82 ++++++++++++++++++++++++------------------------- > src/box/sql/select.c | 27 ++++++++++------ > src/box/sql/sqliteInt.h | 14 ++++----- > src/box/sql/where.c | 56 ++++++++++++++++----------------- > src/box/sql/whereexpr.c | 23 ++++++++------ > 5 files changed, 105 insertions(+), 97 deletions(-) > > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index b67b22c23..f8819f779 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c > @@ -205,7 +204,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) > } > if (op == TK_COLLATE || > (op == TK_REGISTER && p->op2 == TK_COLLATE)) { > - coll = sql_get_coll_seq(parse, p->u.zToken, coll_id); > + (void) sql_get_coll_seq(parse, p->u.zToken, coll_id); 1. It is not necessary to prefix (void) each function result of which you ignore. By the way, I do not understand why do you ignore it. Here and in all other places. > *is_explicit_coll = true; > break; > } > @@ -377,27 +376,28 @@ illegal_collation_mix: > return -1; > } > > -struct coll * > -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, > - uint32_t *coll_id) > +uint32_t > +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) > { > assert(left != NULL); > bool is_lhs_forced; > bool is_rhs_forced; > uint32_t lhs_coll_id; > uint32_t rhs_coll_id; > - struct coll *lhs_coll = sql_expr_coll(parser, left, &is_lhs_forced, > - &lhs_coll_id); > - struct coll *rhs_coll = sql_expr_coll(parser, right, &is_rhs_forced, > - &rhs_coll_id); > + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) > + goto err; > + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) > + goto err; > + uint32_t coll_id; > if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, > rhs_coll_id, is_rhs_forced, > - coll_id) != 0) { > - parser->rc = SQL_TARANTOOL_ERROR; > - parser->nErr++; > - return NULL; > - } > - return *coll_id == rhs_coll_id ? rhs_coll : lhs_coll;; > + &coll_id) != 0) > + goto err; > + return coll_id; > +err: > + parser->rc = SQL_TARANTOOL_ERROR; > + parser->nErr++; > + return COLL_NONE; 2. Please, do not return some obscure error codes - we use only 0/-1 and NULL/not NULL almost everywhere except some archaic or misreviewed code. If you want to return an error, it is better to move coll_id to out parameters. > } > > /* > diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h > index 7501fadc8..50d297815 100644 > --- a/src/box/sql/sqliteInt.h > +++ b/src/box/sql/sqliteInt.h > @@ -4337,10 +4337,10 @@ const char *sqlite3ErrStr(int); > * clause is used. > * @param[out] coll_id Collation identifier. > * > - * @retval Pointer to collation. > + * @retval Return code: < 0 in case of error, 0 on success. 3. Please, use separate @retval for each value. > */ > -struct coll * > -sql_expr_coll(Parse * pParse, Expr * pExpr, bool *is_explicit_coll, > +int > +sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, > uint32_t *coll_id); > > Expr *sqlite3ExprAddCollateToken(Parse * pParse, Expr *, const Token *, int); > @@ -4684,13 +4684,11 @@ collations_check_compatibility(uint32_t lhs_id, bool is_lhs_forced, > * @param parser Parser. > * @param left Left expression. > * @param right Right expression. Can be NULL. > - * @param[out] coll_id Collation identifier. > * > - * @retval Collation object. > + * @retval Id of collation object. 4. You didn't mentioned that it can be COLL_NONE, that means an error. > */ > -struct coll * > -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, > - uint32_t *coll_id); > +uint32_t > +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right); > int sqlite3TempInMemory(const sqlite3 *); > #ifndef SQLITE_OMIT_CTE > With *sqlite3WithAdd(Parse *, With *, Token *, ExprList *, Select *); ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions 2019-01-24 18:28 ` Vladislav Shpilevoy @ 2019-02-14 23:26 ` n.pettik 0 siblings, 0 replies; 15+ messages in thread From: n.pettik @ 2019-02-14 23:26 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy > On 16/01/2019 16:13, Nikita Pettik wrote: >> Lets make sql_expr_coll() return error code for two reasons. Firstly, >> we are going to use this function to detect operands of concatenation >> with incompatible collations. Secondly, pointer to struct coll in most >> cases is redundant since collation id (which in turn returned via output >> parameter) is enough to proceed the same operations. >> For the same reason lets make sql_binary_compare_coll_seq() return >> collation id instead of struct coll* and remove corresponding output >> parameter. > > First of all, I agree with Kostja - it does not help in anything that > you removed coll * from output parameters/return value. It just slows > down some places like codeCompare, sqlite3ExprCodeIN, sqlite3ExprCodeTarget > etc. - you just do coll_by_id() twice. > Moreover, maybe making some > places more accurate and compact, it pads out other ones and involves > additional headers like in whereexpr.c. Please, return as it was. Ok, then I will move struct coll * to output params for sql_expr_coll(). As for sql_binary_compare_coll_seq(), struct coll is needed only once - in codeCompare(); other 5 usages of this function leads to throwing away struct coll. In this case I believe it is really reasonable to operate only with id. Anyway, id moved to output params, now it returns error code. >> Needed for #3947 >> --- >> src/box/sql/expr.c | 82 ++++++++++++++++++++++++------------------------- >> src/box/sql/select.c | 27 ++++++++++------ >> src/box/sql/sqliteInt.h | 14 ++++----- >> src/box/sql/where.c | 56 ++++++++++++++++----------------- >> src/box/sql/whereexpr.c | 23 ++++++++------ >> 5 files changed, 105 insertions(+), 97 deletions(-) >> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c >> index b67b22c23..f8819f779 100644 >> --- a/src/box/sql/expr.c >> +++ b/src/box/sql/expr.c >> @@ -205,7 +204,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) >> } >> if (op == TK_COLLATE || >> (op == TK_REGISTER && p->op2 == TK_COLLATE)) { >> - coll = sql_get_coll_seq(parse, p->u.zToken, coll_id); >> + (void) sql_get_coll_seq(parse, p->u.zToken, coll_id); > > 1. It is not necessary to prefix (void) each function result of which > you ignore. By the way, I do not understand why do you ignore it. Here > and in all other places. Yep, it is not necessary, but this way is more descriptive. Refactored whole function, so it is not longer ignored. >> @@ -377,27 +376,28 @@ illegal_collation_mix: >> return -1; >> } >> -struct coll * >> -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, >> - uint32_t *coll_id) >> +uint32_t >> +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) >> { >> assert(left != NULL); >> bool is_lhs_forced; >> bool is_rhs_forced; >> uint32_t lhs_coll_id; >> uint32_t rhs_coll_id; >> - struct coll *lhs_coll = sql_expr_coll(parser, left, &is_lhs_forced, >> - &lhs_coll_id); >> - struct coll *rhs_coll = sql_expr_coll(parser, right, &is_rhs_forced, >> - &rhs_coll_id); >> + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) >> + goto err; >> + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) >> + goto err; >> + uint32_t coll_id; >> if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, >> rhs_coll_id, is_rhs_forced, >> - coll_id) != 0) { >> - parser->rc = SQL_TARANTOOL_ERROR; >> - parser->nErr++; >> - return NULL; >> - } >> - return *coll_id == rhs_coll_id ? rhs_coll : lhs_coll;; >> + &coll_id) != 0) >> + goto err; >> + return coll_id; >> +err: >> + parser->rc = SQL_TARANTOOL_ERROR; >> + parser->nErr++; >> + return COLL_NONE; > > 2. Please, do not return some obscure error codes - we use only 0/-1 > and NULL/not NULL almost everywhere except some archaic or misreviewed > code. If you want to return an error, it is better to move coll_id to > out parameters. Ok, that’s what I did. See diff below. >> diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h >> index 7501fadc8..50d297815 100644 >> --- a/src/box/sql/sqliteInt.h >> +++ b/src/box/sql/sqliteInt.h >> @@ -4337,10 +4337,10 @@ const char *sqlite3ErrStr(int); >> * clause is used. >> * @param[out] coll_id Collation identifier. >> * >> - * @retval Pointer to collation. >> + * @retval Return code: < 0 in case of error, 0 on success. > > 3. Please, use separate @retval for each value. As you wish. It is not matter of discussion, but this rule seems to be inconsistent: somewhere we use one retval clause for all possible return values; in other places - several ones. >> */ >> -struct coll * >> -sql_expr_coll(Parse * pParse, Expr * pExpr, bool *is_explicit_coll, >> +int >> +sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, >> uint32_t *coll_id); >> Expr *sqlite3ExprAddCollateToken(Parse * pParse, Expr *, const Token *, int); >> @@ -4684,13 +4684,11 @@ collations_check_compatibility(uint32_t lhs_id, bool is_lhs_forced, >> * @param parser Parser. >> * @param left Left expression. >> * @param right Right expression. Can be NULL. >> - * @param[out] coll_id Collation identifier. >> * >> - * @retval Collation object. >> + * @retval Id of collation object. > > 4. You didn't mentioned that it can be COLL_NONE, that means an error. Skipped this comment due to refactoring of this function. diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index a367a07d4..5617e0f62 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -193,10 +193,13 @@ sqlExprSkipCollate(Expr * pExpr) } int -sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) +sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id, + struct coll **coll) { + assert(coll != NULL); *is_explicit_coll = false; *coll_id = COLL_NONE; + *coll = NULL; while (p != NULL) { int op = p->op; if (op == TK_CAST || op == TK_UPLUS) { @@ -205,7 +208,9 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) } if (op == TK_COLLATE || (op == TK_REGISTER && p->op2 == TK_COLLATE)) { - (void) sql_get_coll_seq(parse, p->u.zToken, coll_id); + *coll = sql_get_coll_seq(parse, p->u.zToken, coll_id); + if (coll == NULL) + return -1; *is_explicit_coll = true; break; } @@ -217,8 +222,8 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) */ int j = p->iColumn; if (j >= 0) { - (void) sql_column_collation(p->space_def, j, - coll_id); + *coll = sql_column_collation(p->space_def, j, + coll_id); } break; } @@ -335,28 +340,30 @@ illegal_collation_mix: return -1; } -uint32_t -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) +int +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, + uint32_t *id) { assert(left != NULL); + assert(id != NULL); bool is_lhs_forced; bool is_rhs_forced; uint32_t lhs_coll_id; uint32_t rhs_coll_id; - if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) - goto err; - if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) - goto err; - uint32_t coll_id; + struct coll *unused; + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id, + &unused) != 0) + return -1; + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id, + &unused) != 0) + return -1; if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, - rhs_coll_id, is_rhs_forced, - &coll_id) != 0) - goto err; - return coll_id; -err: - parser->rc = SQL_TARANTOOL_ERROR; - parser->nErr++; - return COLL_NONE; + rhs_coll_id, is_rhs_forced, id) != 0) { + parser->rc = SQL_TARANTOOL_ERROR; + parser->nErr++; + return -1; + } + return 0; } /* @@ -372,7 +379,9 @@ codeCompare(Parse * pParse, /* The parsing (and code generating) context */ int jumpIfNull /* If true, jump if either operand is NULL */ ) { - uint32_t id = sql_binary_compare_coll_seq(pParse, pLeft, pRight); + uint32_t id; + if (sql_binary_compare_coll_seq(pParse, pLeft, pRight, &id) != 0) + return -1; struct coll *coll = coll_by_id(id)->coll; int p5 = binaryCompareP5(pLeft, pRight, jumpIfNull); int addr = sqlVdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1, @@ -2428,8 +2437,9 @@ sqlFindInIndex(Parse * pParse, /* Parsing context */ for (i = 0; i < nExpr; i++) { Expr *pLhs = sqlVectorFieldSubexpr(pX->pLeft, i); Expr *pRhs = pEList->a[i].pExpr; - uint32_t id = - sql_binary_compare_coll_seq(pParse, pLhs, pRhs); + uint32_t id; + if (sql_binary_compare_coll_seq(pParse, pLhs, pRhs, &id) != 0) + break; int j; for (j = 0; j < nExpr; j++) { @@ -2749,9 +2759,9 @@ sqlCodeSubselect(Parse * pParse, /* Parsing context */ Expr *p = sqlVectorFieldSubexpr (pLeft, i); - key_info->parts[i].coll_id = - sql_binary_compare_coll_seq(pParse, p, - pEList->a[i].pExpr); + if (sql_binary_compare_coll_seq(pParse, p, pEList->a[i].pExpr, + &key_info->parts[i].coll_id) != 0) + return 0; } } } else if (ALWAYS(pExpr->x.pList != 0)) { @@ -2770,9 +2780,11 @@ sqlCodeSubselect(Parse * pParse, /* Parsing context */ enum field_type lhs_type = sql_expr_type(pLeft); bool unused; - (void) sql_expr_coll(pParse, pExpr->pLeft, - &unused, - &key_info->parts[0].coll_id); + struct coll *unused_coll; + if (sql_expr_coll(pParse, pExpr->pLeft, &unused, + &key_info->parts[0].coll_id, + &unused_coll) != 0) + return 0; /* Loop through each expression in <exprlist>. */ r1 = sqlGetTempReg(pParse); @@ -3035,9 +3047,10 @@ sqlExprCodeIN(Parse * pParse, /* Parsing and code generating context */ bool unused; uint32_t id; ExprList *pList = pExpr->x.pList; - if (sql_expr_coll(pParse, pExpr->pLeft, &unused, &id) != 0) + struct coll *coll; + if (sql_expr_coll(pParse, pExpr->pLeft, &unused, &id, + &coll) != 0) goto sqlExprCodeIN_finished; - struct coll *coll = coll_by_id(id)->coll; int labelOk = sqlVdbeMakeLabel(v); int r2, regToFree; int regCkNull = 0; @@ -3169,9 +3182,9 @@ sqlExprCodeIN(Parse * pParse, /* Parsing and code generating context */ uint32_t id; int r3 = sqlGetTempReg(pParse); Expr *p = sqlVectorFieldSubexpr(pLeft, i); - if (sql_expr_coll(pParse, p, &unused, &id) != 0) + struct coll *pColl; + if (sql_expr_coll(pParse, p, &unused, &id, &pColl) != 0) goto sqlExprCodeIN_finished; - struct coll *pColl = coll_by_id(id)->coll; /* Tarantool: Replace i -> aiMap [i], since original order of columns * is preserved. */ @@ -4001,9 +4014,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target) uint32_t id; if (sql_expr_coll(pParse, pFarg->a[i].pExpr, - &unused, &id) != 0) - break; - coll = coll_by_id(id)->coll;; + &unused, &id, + &coll) != 0) + return 0; } } if (pFarg) { diff --git a/src/box/sql/select.c b/src/box/sql/select.c index aed40455b..926f5d011 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1042,11 +1042,12 @@ selectInnerLoop(Parse * pParse, /* The parser context */ for (i = 0; i < nResultCol; i++) { bool is_found; uint32_t id; + struct coll *coll; if (sql_expr_coll(pParse, pEList->a[i].pExpr, - &is_found, &id) != 0) + &is_found, &id, + &coll) != 0) break; - struct coll *coll = coll_by_id(id)->coll; if (i < nResultCol - 1) { sqlVdbeAddOp3(v, OP_Ne, regResult + i, @@ -1428,7 +1429,9 @@ sql_expr_list_to_key_info(struct Parse *parse, struct ExprList *list, int start) struct key_part_def *part = &key_info->parts[i - start]; bool unused; uint32_t id; - if (sql_expr_coll(parse, item->pExpr, &unused, &id) != 0) { + struct coll *unused_coll; + if (sql_expr_coll(parse, item->pExpr, &unused, &id, + &unused_coll) != 0) { sqlDbFree(parse->db, key_info); return NULL; } @@ -1970,10 +1973,10 @@ sqlSelectAddColumnTypeAndCollation(Parse * pParse, /* Parsing contexts */ pTab->def->fields[i].type = sql_expr_type(p); bool is_found; uint32_t coll_id; - + struct coll *unused; if (pTab->def->fields[i].coll_id == COLL_NONE && - sql_expr_coll(pParse, p, &is_found, &coll_id) == 0 && - coll_id != COLL_NONE) + sql_expr_coll(pParse, p, &is_found, &coll_id, + &unused) == 0 && coll_id != COLL_NONE) pTab->def->fields[i].coll_id = coll_id; } } @@ -2221,9 +2224,10 @@ multi_select_coll_seq_r(struct Parse *parser, struct Select *p, int n, * resolution and we would not have got this far. */ assert(n >= 0 && n < p->pEList->nExpr); + struct coll *unused; if (sql_expr_coll(parser, p->pEList->a[n].pExpr, &is_current_forced, - ¤t_coll_id) != 0) - return COLL_NONE; + ¤t_coll_id, &unused) != 0) + return 0; uint32_t res_coll_id; if (collations_check_compatibility(prior_coll_id, is_prior_forced, current_coll_id, is_current_forced, @@ -2279,7 +2283,10 @@ sql_multiselect_orderby_to_key_info(struct Parse *parse, struct Select *s, uint32_t id; bool unused; if ((term->flags & EP_Collate) != 0) { - (void) sql_expr_coll(parse, term, &unused, &id); + struct coll *unused_coll; + if (sql_expr_coll(parse, term, &unused, &id, + &unused_coll) != 0) + return 0; } else { id = multi_select_coll_seq(parse, s, item->u.x.iOrderByCol - 1); @@ -5357,9 +5364,8 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo) for (j = 0, pItem = pList->a; coll == NULL && j < nArg; j++, pItem++) { if (sql_expr_coll(pParse, pItem->pExpr, - &unused, &id) != 0) + &unused, &id, &coll) != 0) return; - coll = coll_by_id(id)->coll; } if (regHit == 0 && pAggInfo->nAccumulator) regHit = ++pParse->nMem; diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index 87cdbbcc1..70f971f8c 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -4344,12 +4344,14 @@ const char *sqlErrStr(int); * @param[out] is_explicit_coll Flag set if explicit COLLATE * clause is used. * @param[out] coll_id Collation identifier. + * @param[out] coll Collation object. * - * @retval Return code: < 0 in case of error, 0 on success. + * @retval 0 on success. + * @retval -1 in case of error. */ int -sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, - uint32_t *coll_id); +sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id, + struct coll **coll); Expr *sqlExprAddCollateToken(Parse * pParse, Expr *, const Token *, int); Expr *sqlExprAddCollateString(Parse *, Expr *, const char *); @@ -4681,11 +4683,14 @@ collations_check_compatibility(uint32_t lhs_id, bool is_lhs_forced, * @param parser Parser. * @param left Left expression. * @param right Right expression. Can be NULL. + * @param[out] id Id of resulting collation. * - * @retval Id of collation object. + * @retval 0 on success. + * @retval -1 in case of error (e.g. no collation found). */ -uint32_t -sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right); +int +sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right, + uint32_t *id); int sqlTempInMemory(const sql *); #ifndef SQL_OMIT_CTE With *sqlWithAdd(Parse *, With *, Token *, ExprList *, Select *); diff --git a/src/box/sql/where.c b/src/box/sql/where.c index f27de5dd7..631a5feba 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -306,10 +306,12 @@ whereScanNext(WhereScan * pScan) Parse *pParse = pWC->pWInfo->pParse; assert(pX->pLeft); - uint32_t id = - sql_binary_compare_coll_seq( - pParse, pX->pLeft, - pX->pRight); + uint32_t id; + if (sql_binary_compare_coll_seq( + pParse, pX->pLeft, + pX->pRight, &id) != 0) + break; struct coll *coll = id != COLL_NONE ? coll_by_id(id)->coll : @@ -559,8 +561,9 @@ findIndexCol(Parse * pParse, /* Parse context */ p->iColumn == (int) part_to_match->fieldno) { bool is_found; uint32_t id; + struct coll *unused; if (sql_expr_coll(pParse, pList->a[i].pExpr, - &is_found, &id) != 0) + &is_found, &id, &unused) != 0) return -1; if (id == part_to_match->coll_id) return i; @@ -2280,7 +2283,9 @@ whereRangeVectorLen(Parse * pParse, /* Parsing context */ space->def->fields[pLhs->iColumn].type : FIELD_TYPE_INTEGER; if (type != idx_type) break; - uint32_t id = sql_binary_compare_coll_seq(pParse, pLhs, pRhs); + uint32_t id; + if (sql_binary_compare_coll_seq(pParse, pLhs, pRhs, &id) != 0) + break; if (id == COLL_NONE) break; if (idx_def->key_def->parts[i + nEq].coll_id != id) @@ -3264,13 +3269,14 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */ bool unused; uint32_t lhs_id; uint32_t rhs_id; + struct coll *unused_coll; if (sql_expr_coll(pWInfo->pParse, pOrderBy->a[i].pExpr, &unused, - &lhs_id) != 0) + &lhs_id, &unused_coll) != 0) return 0; if (sql_expr_coll(pWInfo->pParse, pTerm->pExpr, &unused, - &rhs_id) != 0) + &rhs_id, &unused_coll) != 0) return 0; if (lhs_id != rhs_id) continue; @@ -3395,9 +3401,11 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */ if (iColumn >= 0) { bool is_found; uint32_t id; - sql_expr_coll(pWInfo->pParse, - pOrderBy->a[i].pExpr, - &is_found, &id); + struct coll *unused; + if (sql_expr_coll(pWInfo->pParse, + pOrderBy->a[i].pExpr, + &is_found, &id, &unused) != 0) + return 0; if (idx_def->key_def->parts[j].coll_id != id) continue; } diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c index 758c92896..703980f02 100644 --- a/src/box/sql/whereexpr.c +++ b/src/box/sql/whereexpr.c @@ -168,8 +168,10 @@ exprCommute(Parse * pParse, Expr * pExpr) } else { bool is_found; uint32_t id; - (void) sql_expr_coll(pParse, pExpr->pLeft, &is_found, - &id); + struct coll *unused; + if (sql_expr_coll(pParse, pExpr->pLeft, &is_found, &id, + &unused) != 0) + return; if (id != COLL_NONE) { /* * Neither X nor Y have COLLATE @@ -849,16 +851,21 @@ termIsEquivalence(Parse * pParse, Expr * pExpr) if (lhs_type != rhs_type && (!sql_type_is_numeric(lhs_type) || !sql_type_is_numeric(rhs_type))) return 0; - uint32_t id = sql_binary_compare_coll_seq(pParse, pExpr->pLeft, - pExpr->pRight); + uint32_t id; + if (sql_binary_compare_coll_seq(pParse, pExpr->pLeft, pExpr->pRight, + &id) != 0) + return 0; if (id == COLL_NONE) return 1; bool unused1; uint32_t lhs_id; uint32_t rhs_id; - if (sql_expr_coll(pParse, pExpr->pLeft, &unused1, &lhs_id) != 0) + struct coll *unused; + if (sql_expr_coll(pParse, pExpr->pLeft, &unused1, &lhs_id, + &unused) != 0) return 0; - if (sql_expr_coll(pParse, pExpr->pRight, &unused1, &rhs_id) != 0) + if (sql_expr_coll(pParse, pExpr->pRight, &unused1, &rhs_id, + &unused) != 0) return 0; return lhs_id != COLL_NONE && lhs_id == rhs_id; } ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-16 13:13 [tarantool-patches] [PATCH 0/2] Compute derived collation for concatenation Nikita Pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik @ 2019-01-16 13:13 ` Nikita Pettik 2019-01-17 13:33 ` [tarantool-patches] " Konstantin Osipov 2019-01-24 18:29 ` Vladislav Shpilevoy 2019-02-25 11:29 ` [tarantool-patches] Re: [PATCH 0/2] Compute derived " Kirill Yukhin 2 siblings, 2 replies; 15+ messages in thread From: Nikita Pettik @ 2019-01-16 13:13 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik According to ANSI, result of concatenation operation should derive collation sequence from its operands. Now it is not true: result is always comes with no ("none") collation. In a nutshell*, rules are quite simple: a) If some data type has an explicit collation EC1, then every data type that has an explicit collation shall have declared type collation that is EC1. The collation derivation is explicit and the collation is EC1. b) If every data type has an implicit collation, then: - If every data type has the same declared type collation IC1, then the collation derivation is implicit and the collation is IC1. - Otherwise, the collation derivation is none. c) Otherwise, the collation derivation is none. *Read complete statement at 9.5 Result of data type combinations Closes #3937 --- src/box/sql/expr.c | 47 +++++++++++++++++++- test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++++++++++ test/sql/collation.test.lua | 46 ++++++++++++++++++++ 3 files changed, 193 insertions(+), 2 deletions(-) diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index f8819f779..e6f536757 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -221,6 +221,45 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) } break; } + if (op == TK_CONCAT) { + /* + * Despite the fact that procedure below + * is very similar to collation_check_compatability(), + * it is slightly different: when both + * operands have different implicit collations, + * derived collation should be "none", + * i.e. no collation is used at all + * (instead of raising error). + */ + bool is_lhs_forced; + uint32_t lhs_coll_id; + if (sql_expr_coll(parse, p->pLeft, &is_lhs_forced, + &lhs_coll_id) != 0) + return -1; + bool is_rhs_forced; + uint32_t rhs_coll_id; + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, + &rhs_coll_id) != 0) + return -1; + if (is_lhs_forced && is_rhs_forced) { + if (lhs_coll_id != rhs_coll_id) + return -1; + } + if (is_lhs_forced) { + *coll_id = lhs_coll_id; + *is_explicit_coll = true; + return 0; + } + if (is_rhs_forced) { + *coll_id = rhs_coll_id; + *is_explicit_coll = true; + return 0; + } + if (rhs_coll_id != lhs_coll_id) + return 0; + *coll_id = lhs_coll_id; + return 0; + } if (p->flags & EP_Collate) { if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) { p = p->pLeft; @@ -384,10 +423,14 @@ sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) bool is_rhs_forced; uint32_t lhs_coll_id; uint32_t rhs_coll_id; - if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) { + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); goto err; - if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) + } + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) { + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); goto err; + } uint32_t coll_id; if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, rhs_coll_id, is_rhs_forced, diff --git a/test/sql/collation.result b/test/sql/collation.result index c69510fe7..a13738f49 100644 --- a/test/sql/collation.result +++ b/test/sql/collation.result @@ -325,3 +325,105 @@ box.sql.execute("DROP TABLE t1;") box.sql.execute("DROP TABLE t0;") --- ... +-- gh-3937: result of concatination has derived collation. +-- +box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +--- +... +box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);") +--- +... +box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);") +--- +... +-- Only LHS of concatenation has implicitly set collation. +-- Hence, no collation is used. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;") +--- +- - [1] + - [3] +... +-- BINARY collation is forced for comparison operator as +-- a derivation from concatenation. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;") +--- +- - [3] +... +-- Both operands of concatenation have explicit but different +-- collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;") +--- +- error: Illegal mix of collations +... +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";") +--- +- error: Illegal mix of collations +... +-- No collation is used since LHS and RHS of concatenation +-- operator have different implicit collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');") +--- +- - [3] +... +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +--- +- - [3] +... +box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);") +--- +... +-- Operands have the same implicit collation, so it is derived. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +--- +- - [3] +... +-- Couple of other possible combinations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);") +--- +- - [3] +... +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);") +--- +- error: Illegal mix of collations +... +box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);") +--- +... +-- Results are sorted with case-insensitive order. +-- Otherwise capital latters come first. +-- +box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''") +--- +- - ['abc'] + - ['BCD'] + - ['ghi'] + - ['gHz'] +... +box.sql.execute("SELECT a FROM t4b ORDER BY a || b") +--- +- - ['abc'] + - ['BCD'] + - ['ghi'] + - ['gHz'] +... +box.space.T4A:drop() +--- +... +box.space.T4B:drop() +--- +... diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua index 4ad2d5e50..8f1d57502 100644 --- a/test/sql/collation.test.lua +++ b/test/sql/collation.test.lua @@ -126,3 +126,49 @@ box.sql.execute("SELECT * FROM t1;") box.sql.execute("SELECT * FROM t0;") box.sql.execute("DROP TABLE t1;") box.sql.execute("DROP TABLE t0;") + +-- gh-3937: result of concatination has derived collation. +-- +box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);") +box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);") +-- Only LHS of concatenation has implicitly set collation. +-- Hence, no collation is used. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;") +-- BINARY collation is forced for comparison operator as +-- a derivation from concatenation. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;") +-- Both operands of concatenation have explicit but different +-- collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;") +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";") +-- No collation is used since LHS and RHS of concatenation +-- operator have different implicit collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');") +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") + +box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);") +box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);") +-- Operands have the same implicit collation, so it is derived. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +-- Couple of other possible combinations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);") +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);") + +box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);") +box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);") +-- Results are sorted with case-insensitive order. +-- Otherwise capital latters come first. +-- +box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''") +box.sql.execute("SELECT a FROM t4b ORDER BY a || b") + +box.space.T4A:drop() +box.space.T4B:drop() -- 2.15.1 ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-16 13:13 ` [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Nikita Pettik @ 2019-01-17 13:33 ` Konstantin Osipov 2019-01-17 19:19 ` n.pettik 2019-01-18 9:59 ` Kirill Yukhin 2019-01-24 18:29 ` Vladislav Shpilevoy 1 sibling, 2 replies; 15+ messages in thread From: Konstantin Osipov @ 2019-01-17 13:33 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik * Nikita Pettik <korablev@tarantool.org> [19/01/16 17:06]: > According to ANSI, result of concatenation operation should derive > collation sequence from its operands. Now it is not true: result is > always comes with no ("none") collation. Generally, it should be very cheap to introduce expression static analysis phase by adding static analysis state to struct Expr. Yes, it's a blasphemy from separation of concerns point of view but it seems to be a lesser evil than invoking partial static analysis here and there during code generation. What i mean is that instead of changing signature of sql_expr_coll() one should be able to do: /** * Fills expr->coll for every node in the expression tree or * returns an appropriate error if there is a type error. */ int sql_expr_static_analysis(struct Expr *expr); -- Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 http://tarantool.io - www.twitter.com/kostja_osipov ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-17 13:33 ` [tarantool-patches] " Konstantin Osipov @ 2019-01-17 19:19 ` n.pettik 2019-01-18 9:59 ` Kirill Yukhin 1 sibling, 0 replies; 15+ messages in thread From: n.pettik @ 2019-01-17 19:19 UTC (permalink / raw) To: tarantool-patches; +Cc: Konstantin Osipov, Vladislav Shpilevoy > On 17 Jan 2019, at 16:33, Konstantin Osipov <kostja@tarantool.org> wrote: > > * Nikita Pettik <korablev@tarantool.org> [19/01/16 17:06]: >> According to ANSI, result of concatenation operation should derive >> collation sequence from its operands. Now it is not true: result is >> always comes with no ("none") collation. > > Generally, it should be very cheap to introduce expression static > analysis phase by adding static analysis state to struct Expr. > Yes, it's a blasphemy from separation of concerns point of view > but it seems to be a lesser evil than invoking partial static > analysis here and there during code generation. > > What i mean is that instead of changing signature of > sql_expr_coll() one should be able to do: > > /** > * Fills expr->coll for every node in the expression tree or > * returns an appropriate error if there is a type error. *Type and collation analysis are slightly different things I guess.* > */ > int > sql_expr_static_analysis(struct Expr *expr); Implement decent static analysis pass could be not so easy as it seems to be. Firstly, I guess we should deal with https://github.com/tarantool/tarantool/issues/3861 In a nutshell, now walker uses recursive approach. Hence, due to very limited size of fiber’s stack we get stack overflow on not so giant expressions. One more recursive routine may make things even worse. Secondly, we should decide where to place this analysis. There is no one entry point before code generation as well as there is no strict separation between parsing and code generation. Moreover, complete AST may not be constructed at all. I’m simply inlining part of R. Hipp’s recent respond from SQLite maling list: > SQLite does not necessarily generate a complete AST, then hand > that AST off to a code generator for evaluation, all in one neat step. > Depending on the SQL statement, the byte code may be generated using > techniques reminiscent of "syntax directed translation". Control hops > back and forth between parsing and code generation. Sections of > bytecode might generated at multiple reduce actions within the parse. In case you bless me and allow to spend extra time I can attempt at investigating this issue. Current approach at least works now. ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-17 13:33 ` [tarantool-patches] " Konstantin Osipov 2019-01-17 19:19 ` n.pettik @ 2019-01-18 9:59 ` Kirill Yukhin 1 sibling, 0 replies; 15+ messages in thread From: Kirill Yukhin @ 2019-01-18 9:59 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik Hello, On 17 Jan 16:33, Konstantin Osipov wrote: > * Nikita Pettik <korablev@tarantool.org> [19/01/16 17:06]: > > According to ANSI, result of concatenation operation should derive > > collation sequence from its operands. Now it is not true: result is > > always comes with no ("none") collation. > > Generally, it should be very cheap to introduce expression static > analysis phase by adding static analysis state to struct Expr. > Yes, it's a blasphemy from separation of concerns point of view > but it seems to be a lesser evil than invoking partial static > analysis here and there during code generation. I think that static analysis pass is of course very useful, but it is big volume of work. I propose to move this effort to the next release and adopt proposed solution. -- Regards, Kirill Yukhin ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-16 13:13 ` [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Nikita Pettik 2019-01-17 13:33 ` [tarantool-patches] " Konstantin Osipov @ 2019-01-24 18:29 ` Vladislav Shpilevoy 2019-02-14 23:26 ` n.pettik 1 sibling, 1 reply; 15+ messages in thread From: Vladislav Shpilevoy @ 2019-01-24 18:29 UTC (permalink / raw) To: Nikita Pettik, tarantool-patches Thanks for the patch! See 7 comments below. On 16/01/2019 16:13, Nikita Pettik wrote: > According to ANSI, result of concatenation operation should derive > collation sequence from its operands. Now it is not true: result is > always comes with no ("none") collation. > > In a nutshell*, rules are quite simple: 1. If you want to put a reference, then use [<number>], like in literature and like Vova does in his commits. Firstly, I thought that '*' was a typo. > > a) If some data type has an explicit collation EC1, then every data 2. Double space after 'type'. In some other places below too. > type that has an explicit collation shall have declared type collation > that is EC1. The collation derivation is explicit and the collation is > EC1. > > b) If every data type has an implicit collation, then: > > - If every data type has the same declared type collation IC1, then > the collation derivation is implicit and the collation is IC1. > > - Otherwise, the collation derivation is none. > > c) Otherwise, the collation derivation is none. > > *Read complete statement at 9.5 Result of data type combinations 3. Please, say a bit more words: that 9.5 is a chapter in an SQL standard, and the standard of which year it is. > > Closes #3937 > --- > src/box/sql/expr.c | 47 +++++++++++++++++++- > test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++++++++++ > test/sql/collation.test.lua | 46 ++++++++++++++++++++ > 3 files changed, 193 insertions(+), 2 deletions(-) > > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index f8819f779..e6f536757 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c > @@ -221,6 +221,45 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) > } > break; > } > + if (op == TK_CONCAT) { > + /* > + * Despite the fact that procedure below > + * is very similar to collation_check_compatability(), > + * it is slightly different: when both > + * operands have different implicit collations, > + * derived collation should be "none", > + * i.e. no collation is used at all > + * (instead of raising error). 4. Typo: collation_check_compatability -> collation*S*_check_compat*I*bility. Also, I think that it is not worth mentioning the difference here with that function, especially in such a big comment, looks like an excuse. It is better to put a link to the standard. > + */ > + bool is_lhs_forced; > + uint32_t lhs_coll_id; > + if (sql_expr_coll(parse, p->pLeft, &is_lhs_forced, > + &lhs_coll_id) != 0) > + return -1; > + bool is_rhs_forced; > + uint32_t rhs_coll_id; > + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, > + &rhs_coll_id) != 0) > + return -1; > + if (is_lhs_forced && is_rhs_forced) { > + if (lhs_coll_id != rhs_coll_id) > + return -1; 5. Did you miss diag_set? > + } > + if (is_lhs_forced) { > + *coll_id = lhs_coll_id; > + *is_explicit_coll = true; > + return 0; 6. In this function (sql_expr_coll) to break the cycle 'break' keyword is used, so lets be consistent and use 'break' as well. > + } > + if (is_rhs_forced) { > + *coll_id = rhs_coll_id; > + *is_explicit_coll = true; > + return 0; > + } > + if (rhs_coll_id != lhs_coll_id) > + return 0; > + *coll_id = lhs_coll_id; > + return 0; > + } > if (p->flags & EP_Collate) { > if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) { > p = p->pLeft; > @@ -384,10 +423,14 @@ sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) > bool is_rhs_forced; > uint32_t lhs_coll_id; > uint32_t rhs_coll_id; > - if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) > + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) { > + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); > goto err; > - if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) > + } > + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) { > + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); 7. Why do you set it here and not on the point 5 above? sql_expr_coll can return an error not only because of illegal collation mix, but also if a collation does not exist, for example. > goto err; > + } > uint32_t coll_id; > if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, > rhs_coll_id, is_rhs_forced, ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-01-24 18:29 ` Vladislav Shpilevoy @ 2019-02-14 23:26 ` n.pettik 2019-02-22 11:23 ` Vladislav Shpilevoy 0 siblings, 1 reply; 15+ messages in thread From: n.pettik @ 2019-02-14 23:26 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy > On 24 Jan 2019, at 21:29, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: > > Thanks for the patch! See 7 comments below. Sorry for delayed answer. > On 16/01/2019 16:13, Nikita Pettik wrote: >> According to ANSI, result of concatenation operation should derive >> collation sequence from its operands. Now it is not true: result is >> always comes with no ("none") collation. >> In a nutshell*, rules are quite simple: > > 1. If you want to put a reference, then use [<number>], like in > literature and like Vova does in his commits. Firstly, I thought > that '*' was a typo. Ok, fixed. >> a) If some data type has an explicit collation EC1, then every data > > 2. Double space after 'type'. In some other places below too. Ok, fixed. As a rule I use vim auto-formatting utility, which aligns borders to 72 chars automatically putting extra spaces between sentences. I think it is kind of normal. >> type that has an explicit collation shall have declared type collation >> that is EC1. The collation derivation is explicit and the collation is >> EC1. >> b) If every data type has an implicit collation, then: >> - If every data type has the same declared type collation IC1, then >> the collation derivation is implicit and the collation is IC1. >> - Otherwise, the collation derivation is none. >> c) Otherwise, the collation derivation is none. >> *Read complete statement at 9.5 Result of data type combinations > > 3. Please, say a bit more words: that 9.5 is a chapter in an SQL > standard, and the standard of which year it is. Ok, done. Fixed commit message: Author: Nikita Pettik <korablev@tarantool.org> Date: Tue Jan 15 15:18:37 2019 +0300 sql: compute resulting collation for concatenation According to ANSI, result of concatenation operation should derive collation sequence from its operands. Now it is not true: result is always comes with no ("none") collation. In a nutshell[1], rules are quite simple: a) If some data type has an explicit collation EC1, then every data type that has an explicit collation shall have declared type collation that is EC1. The collation derivation is explicit and the collation is EC1. b) If every data type has an implicit collation, then: - If every data type has the same declared type collation IC1, then the collation derivation is implicit and the collation is IC1. - Otherwise, the collation derivation is none. c) Otherwise, the collation derivation is none. [1] Read complete statement at chapter 9.5 Result of data type combinations, ANSI 2013, Part 2: Foundations. Closes #3937 >> Closes #3937 >> --- >> src/box/sql/expr.c | 47 +++++++++++++++++++- >> test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++++++++++ >> test/sql/collation.test.lua | 46 ++++++++++++++++++++ >> 3 files changed, 193 insertions(+), 2 deletions(-) >> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c >> index f8819f779..e6f536757 100644 >> --- a/src/box/sql/expr.c >> +++ b/src/box/sql/expr.c >> @@ -221,6 +221,45 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) >> } >> break; >> } >> + if (op == TK_CONCAT) { >> + /* >> + * Despite the fact that procedure below >> + * is very similar to collation_check_compatability(), >> + * it is slightly different: when both >> + * operands have different implicit collations, >> + * derived collation should be "none", >> + * i.e. no collation is used at all >> + * (instead of raising error). > > 4. Typo: collation_check_compatability -> collation*S*_check_compat*I*bility. > > Also, I think that it is not worth mentioning the difference here > with that function, especially in such a big comment, looks like an excuse. > It is better to put a link to the standard. As you wish: diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index e6f536757..031b1f16f 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -223,13 +223,10 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) } if (op == TK_CONCAT) { /* - * Despite the fact that procedure below - * is very similar to collation_check_compatability(), - * it is slightly different: when both - * operands have different implicit collations, - * derived collation should be "none", - * i.e. no collation is used at all - * (instead of raising error). + * Procedure below provides compatibility + * checks declared in ANSI SQL 2013: + * chapter 9.5 Result of data type + * combinations. */ >> + bool is_rhs_forced; >> + uint32_t rhs_coll_id; >> + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, >> + &rhs_coll_id) != 0) >> + return -1; >> + if (is_lhs_forced && is_rhs_forced) { >> + if (lhs_coll_id != rhs_coll_id) >> + return -1; > > 5. Did you miss diag_set? No, I did it on purpose. Firstly, this function is recursive, so in case error occurred on bottom levels of recursion, diag would be reseted on each level above (and parser’s counter of errors would be incremented several times as well). No terrible consequences would take place in this case, but it looks like a bad pattern. Anyway, fixed it according to your suggestion. Secondly, many functions which call sql_expr_coll don’t expect that it can return real error and they are not adapted to handle them (e.g. wherePathSatisfiesOrderBy). In original SQLite this function don’t fail under any circumstances. Sure, if error is set as Parse->err, sooner or later it will arise (I guess this is likely to be global problem of SQLite). Diff (cut from context, sorry): + if (is_lhs_forced && is_rhs_forced) { + if (lhs_coll_id != rhs_coll_id) { + diag_set(ClientError, + ER_ILLEGAL_COLLATION_MIX); + parse->nErr++; + parse->rc = SQL_TARANTOOL_ERROR; + return -1; + } + } >> + } >> + if (is_lhs_forced) { >> + *coll_id = lhs_coll_id; >> + *is_explicit_coll = true; >> + return 0; > > 6. In this function (sql_expr_coll) to break the cycle 'break' > keyword is used, so lets be consistent and use 'break' as well. Ok: @@ -248,17 +245,17 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) if (is_lhs_forced) { *coll_id = lhs_coll_id; *is_explicit_coll = true; - return 0; + break; } if (is_rhs_forced) { *coll_id = rhs_coll_id; *is_explicit_coll = true; - return 0; + break; } if (rhs_coll_id != lhs_coll_id) - return 0; + break; *coll_id = lhs_coll_id; - return 0; + break; } if (p->flags & EP_Collate) { if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) { >> >> @@ -384,10 +423,14 @@ sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) >> bool is_rhs_forced; >> uint32_t lhs_coll_id; >> uint32_t rhs_coll_id; >> - if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) >> + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) { >> + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); >> goto err; >> - if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) >> + } >> + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) { >> + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); > > 7. Why do you set it here and not on the point 5 above? sql_expr_coll > can return an error not only because of illegal collation mix, but also > if a collation does not exist, for example. Hm, I missed this fact. Ok, I’ve done it, see above. ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-02-14 23:26 ` n.pettik @ 2019-02-22 11:23 ` Vladislav Shpilevoy 2019-02-22 13:40 ` n.pettik 0 siblings, 1 reply; 15+ messages in thread From: Vladislav Shpilevoy @ 2019-02-22 11:23 UTC (permalink / raw) To: n.pettik, tarantool-patches Hi! Thanks for the fixes! >>> a) If some data type has an explicit collation EC1, then every data >> >> 2. Double space after 'type'. In some other places below too. > > Ok, fixed. As a rule I use vim auto-formatting utility, which aligns > borders to 72 chars automatically putting extra spaces between > sentences. I think it is kind of normal. I tried to do it too back in the days, but Kostja said we must not justify text. >>> + bool is_rhs_forced; >>> + uint32_t rhs_coll_id; >>> + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, >>> + &rhs_coll_id) != 0) >>> + return -1; >>> + if (is_lhs_forced && is_rhs_forced) { >>> + if (lhs_coll_id != rhs_coll_id) >>> + return -1; >> >> 5. Did you miss diag_set? > > No, I did it on purpose. Firstly, this function is recursive, > so in case error occurred on bottom levels of recursion, > diag would be reseted on each level above (and parser’s > counter of errors would be incremented several times as > well). No terrible consequences would take place in this case, > but it looks like a bad pattern. Anyway, fixed it according to > your suggestion. Each ClientError is accounted in a global errors counter. It means, that the same error should be set multiple times, otherwise the statistics would be wrong. Instead of resetting diag each time check if parser->nErr > 0, and if it is, then do nothing. diag_is_empty() can not be used, because it happens sometimes, that there are no errors, but diag is not cleared from a previous error. ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-02-22 11:23 ` Vladislav Shpilevoy @ 2019-02-22 13:40 ` n.pettik 2019-02-22 13:51 ` Vladislav Shpilevoy 0 siblings, 1 reply; 15+ messages in thread From: n.pettik @ 2019-02-22 13:40 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy >>>> + bool is_rhs_forced; >>>> + uint32_t rhs_coll_id; >>>> + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, >>>> + &rhs_coll_id) != 0) >>>> + return -1; >>>> + if (is_lhs_forced && is_rhs_forced) { >>>> + if (lhs_coll_id != rhs_coll_id) >>>> + return -1; >>> >>> 5. Did you miss diag_set? >> No, I did it on purpose. Firstly, this function is recursive, >> so in case error occurred on bottom levels of recursion, >> diag would be reseted on each level above (and parser’s >> counter of errors would be incremented several times as >> well). No terrible consequences would take place in this case, >> but it looks like a bad pattern. Anyway, fixed it according to >> your suggestion. > > Each ClientError is accounted in a global errors counter. It > means, that the same error should be set multiple times, otherwise > the statistics would be wrong. Instead of resetting diag each time > check if parser->nErr > 0, and if it is, then do nothing. > diag_is_empty() can not be used, because it happens sometimes, that > there are no errors, but diag is not cleared from a previous error. Hmm, Mergen is trying to remove nErr, if I’m not mistaken. So, soon I guess we will be able to use diag_is_empty(). Now I’ve implemented your suggestion. Diff: diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 680075792..0de9a46a5 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -246,10 +246,17 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id, return -1; if (is_lhs_forced && is_rhs_forced) { if (lhs_coll_id != rhs_coll_id) { - diag_set(ClientError, - ER_ILLEGAL_COLLATION_MIX); - parse->nErr++; - parse->rc = SQL_TARANTOOL_ERROR; + /* + * Don't set the same error + * several times: this + * function is recursive. + */ + if (parse->nErr == 0) { + diag_set(ClientError, + ER_ILLEGAL_COLLATION_MIX); + parse->nErr++; + parse->rc = SQL_TARANTOOL_ERROR; + } return -1; } } ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 2/2] sql: compute resulting collation for concatenation 2019-02-22 13:40 ` n.pettik @ 2019-02-22 13:51 ` Vladislav Shpilevoy 0 siblings, 0 replies; 15+ messages in thread From: Vladislav Shpilevoy @ 2019-02-22 13:51 UTC (permalink / raw) To: n.pettik, tarantool-patches, Kirill Yukhin On 22/02/2019 16:40, n.pettik wrote: > >>>>> + bool is_rhs_forced; >>>>> + uint32_t rhs_coll_id; >>>>> + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, >>>>> + &rhs_coll_id) != 0) >>>>> + return -1; >>>>> + if (is_lhs_forced && is_rhs_forced) { >>>>> + if (lhs_coll_id != rhs_coll_id) >>>>> + return -1; >>>> >>>> 5. Did you miss diag_set? >>> No, I did it on purpose. Firstly, this function is recursive, >>> so in case error occurred on bottom levels of recursion, >>> diag would be reseted on each level above (and parser’s >>> counter of errors would be incremented several times as >>> well). No terrible consequences would take place in this case, >>> but it looks like a bad pattern. Anyway, fixed it according to >>> your suggestion. >> >> Each ClientError is accounted in a global errors counter. It >> means, that the same error should be set multiple times, otherwise >> the statistics would be wrong. Instead of resetting diag each time >> check if parser->nErr > 0, and if it is, then do nothing. >> diag_is_empty() can not be used, because it happens sometimes, that >> there are no errors, but diag is not cleared from a previous error. > > Hmm, Mergen is trying to remove nErr, if I’m not mistaken. > So, soon I guess we will be able to use diag_is_empty(). > Now I’ve implemented your suggestion. Diff: Yes, after Mergen did, we can remove nErr from there. Now your patch does not increase nErr usage, so it is ok. The whole patchset LGTM. ^ permalink raw reply [flat|nested] 15+ messages in thread
* [tarantool-patches] Re: [PATCH 0/2] Compute derived collation for concatenation 2019-01-16 13:13 [tarantool-patches] [PATCH 0/2] Compute derived collation for concatenation Nikita Pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Nikita Pettik @ 2019-02-25 11:29 ` Kirill Yukhin 2 siblings, 0 replies; 15+ messages in thread From: Kirill Yukhin @ 2019-02-25 11:29 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy, Nikita Pettik Hello, On 16 Jan 16:13, Nikita Pettik wrote: > Branch: https://github.com/tarantool/tarantool/tree/np/gh-3937-concatenation-collation > Issue: https://github.com/tarantool/tarantool/issues/3937 > > According to ANSI concatenation operator should derive collation > from its operands. This small patch-set implements this procedure > according to rules described in SQL standard. > > Nikita Pettik (2): > sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions > sql: compute resulting collation for concatenation > > src/box/sql/expr.c | 125 +++++++++++++++++++++++++++++--------------- > src/box/sql/select.c | 27 ++++++---- > src/box/sql/sqliteInt.h | 14 +++-- > src/box/sql/where.c | 56 ++++++++++---------- > src/box/sql/whereexpr.c | 23 ++++---- > test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++ > test/sql/collation.test.lua | 46 ++++++++++++++++ > 7 files changed, 296 insertions(+), 97 deletions(-) Your patches were pushed into 2.1 branch few days ago. -- Regards, Kirill Yukhin ^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2019-02-25 11:29 UTC | newest] Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-01-16 13:13 [tarantool-patches] [PATCH 0/2] Compute derived collation for concatenation Nikita Pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 1/2] sql: refactor sql_expr_coll and sql_binary_compare_coll_seq functions Nikita Pettik 2019-01-17 13:28 ` [tarantool-patches] " Konstantin Osipov 2019-01-24 18:28 ` Vladislav Shpilevoy 2019-02-14 23:26 ` n.pettik 2019-01-16 13:13 ` [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Nikita Pettik 2019-01-17 13:33 ` [tarantool-patches] " Konstantin Osipov 2019-01-17 19:19 ` n.pettik 2019-01-18 9:59 ` Kirill Yukhin 2019-01-24 18:29 ` Vladislav Shpilevoy 2019-02-14 23:26 ` n.pettik 2019-02-22 11:23 ` Vladislav Shpilevoy 2019-02-22 13:40 ` n.pettik 2019-02-22 13:51 ` Vladislav Shpilevoy 2019-02-25 11:29 ` [tarantool-patches] Re: [PATCH 0/2] Compute derived " Kirill Yukhin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox