From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 215E328783 for ; Mon, 11 Mar 2019 11:04:47 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id szi_KG_-rX4d for ; Mon, 11 Mar 2019 11:04:47 -0400 (EDT) Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id A38EC28C01 for ; Mon, 11 Mar 2019 11:04:46 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v2 7/7] sql: store regular identifiers in case-normal form References: <6f979039-61fc-1ee7-e006-859e85195b33@tarantool.org> From: Kirill Shcherbatov Message-ID: <152eb589-a416-01d4-e75c-8876c9538c9d@tarantool.org> Date: Mon, 11 Mar 2019 18:04:44 +0300 MIME-Version: 1.0 In-Reply-To: <6f979039-61fc-1ee7-e006-859e85195b33@tarantool.org> Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org, Vladislav Shpilevoy > First of all, I do not see a test on failed normalization. > Please, add. I've implemented a new errinj test. > 1. You have sql_parser_error, and you used it already in > some other places. Fix it, please, in all touched places. Ok, done. > 2. Why? Because I've added a new test that have created a new additional table. ====================================================================== Introduced a new sql_normalize_name routine performing SQL name conversion to case-normal form via unicode character folding. For example, ß is converted to SS. The result is similar to SQL UPPER function. Closes #3931 --- src/box/lua/lua_sql.c | 11 +++-- src/box/sql/build.c | 35 +++++++++----- src/box/sql/expr.c | 70 ++++++++++++++++++--------- src/box/sql/parse.y | 25 ++++++++-- src/box/sql/select.c | 19 ++++++-- src/box/sql/sqlInt.h | 27 ++++++++++- src/box/sql/trigger.c | 18 +++++-- src/box/sql/util.c | 46 ++++++++++++------ src/lib/core/errinj.h | 1 + test/box/errinj.result | 20 ++++++++ test/box/errinj.test.lua | 8 +++ test/sql-tap/identifier_case.test.lua | 12 +++-- 12 files changed, 218 insertions(+), 74 deletions(-) diff --git a/src/box/lua/lua_sql.c b/src/box/lua/lua_sql.c index f5a7b7819..c27ca818e 100644 --- a/src/box/lua/lua_sql.c +++ b/src/box/lua/lua_sql.c @@ -176,13 +176,16 @@ lbox_sql_create_function(struct lua_State *L) } size_t name_len; const char *name = lua_tolstring(L, 1, &name_len); + int normalized_name_len = sql_normalize_name(NULL, 0, name, name_len); + if (normalized_name_len < 0) + return luaT_error(L); char *normalized_name = (char *) region_alloc(&fiber()->gc, - name_len + 1); + normalized_name_len + 1); if (normalized_name == NULL) return luaL_error(L, "out of memory"); - memcpy(normalized_name, name, name_len); - normalized_name[name_len] = '\0'; - sqlNormalizeName(normalized_name); + if (sql_normalize_name(normalized_name, normalized_name_len + 1, name, + name_len) < 0) + return luaT_error(L); struct lua_sql_func_info *func_info = (struct lua_sql_func_info *) malloc(sizeof(*func_info)); if (func_info == NULL) diff --git a/src/box/sql/build.c b/src/box/sql/build.c index 8eac9fdb3..7ca6555dd 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -236,13 +236,20 @@ char * sql_name_from_token(struct sql *db, struct Token *name_token) { assert(name_token != NULL && name_token->z != NULL); - char *name = sqlDbStrNDup(db, (char *)name_token->z, name_token->n); + int name_len = + sql_normalize_name(NULL, 0, name_token->z, name_token->n); + if (name_len < 0) + return NULL; + char *name = sqlDbMallocRawNN(db, name_len + 1); if (name == NULL) { - diag_set(OutOfMemory, name_token->n + 1, "sqlDbStrNDup", - "name"); + diag_set(OutOfMemory, name_len + 1, "sqlDbMallocRawNN", "name"); + return NULL; + } + if (sql_normalize_name(name, name_len + 1, name_token->z, + name_token->n) < 0) { + sqlDbFree(db, name); return NULL; } - sqlNormalizeName(name); return name; } @@ -438,17 +445,16 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def) if (sql_field_retrieve(pParse, def, def->field_count) == NULL) return; struct region *region = &pParse->region; - z = region_alloc(region, pName->n + 1); + int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n); + if (name_len < 0) + goto tarantool_error; + z = region_alloc(region, name_len + 1); if (z == NULL) { - diag_set(OutOfMemory, pName->n + 1, - "region_alloc", "z"); - pParse->rc = SQL_TARANTOOL_ERROR; - pParse->nErr++; - return; + diag_set(OutOfMemory, name_len + 1, "region_alloc", "z"); + goto tarantool_error; } - memcpy(z, pName->z, pName->n); - z[pName->n] = 0; - sqlNormalizeName(z); + if (sql_normalize_name(z, name_len + 1, pName->z, pName->n) < 0) + goto tarantool_error; for (uint32_t i = 0; i < def->field_count; i++) { if (strcmp(z, def->fields[i].name) == 0) { diag_set(ClientError, ER_SPACE_FIELD_IS_DUPLICATE, z); @@ -469,6 +475,9 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def) column_def->type = type_def->type; def->field_count++; pParse->constraintName.n = 0; + return; +tarantool_error: + sql_parser_error(pParse); } void diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index f5b2926c8..24a051e04 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -863,7 +863,16 @@ sql_expr_new(struct sql *db, int op, const Token *token, bool dequote) if (token != NULL) { if (op != TK_INTEGER || token->z == NULL || sqlGetInt32(token->z, &val) == 0) { - extra_sz = token->n + 1; + if (op == TK_ID || op == TK_COLLATE || + op == TK_FUNCTION) { + extra_sz = sql_normalize_name(NULL, 0, token->z, + token->n); + if (extra_sz < 0) + return NULL; + } else { + extra_sz = token->n; + } + extra_sz += 1; assert(val >= 0); } } @@ -889,15 +898,20 @@ sql_expr_new(struct sql *db, int op, const Token *token, bool dequote) } else { expr->u.zToken = (char *)&expr[1]; assert(token->z != NULL || token->n == 0); - memcpy(expr->u.zToken, token->z, token->n); - expr->u.zToken[token->n] = '\0'; - if (dequote) { - if (expr->u.zToken[0] == '"') - expr->flags |= EP_DblQuoted; - if (expr->op == TK_ID || expr->op == TK_COLLATE || - expr->op == TK_FUNCTION) - sqlNormalizeName(expr->u.zToken); - else + if (dequote && expr->u.zToken[0] == '"') + expr->flags |= EP_DblQuoted; + if (dequote && + (expr->op == TK_ID || expr->op == TK_COLLATE || + expr->op == TK_FUNCTION)) { + if (sql_normalize_name(expr->u.zToken, extra_sz, + token->z, token->n) < 0) { + sqlDbFree(db, expr); + return NULL; + } + } else { + memcpy(expr->u.zToken, token->z, token->n); + expr->u.zToken[token->n] = '\0'; + if (dequote) sqlDequote(expr->u.zToken); } } @@ -1776,18 +1790,30 @@ sqlExprListSetName(Parse * pParse, /* Parsing context */ ) { assert(pList != 0 || pParse->db->mallocFailed != 0); - if (pList) { - struct ExprList_item *pItem; - assert(pList->nExpr > 0); - pItem = &pList->a[pList->nExpr - 1]; - assert(pItem->zName == 0); - pItem->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n); - if (dequote) - sqlNormalizeName(pItem->zName); - /* n = 0 in case of select * */ - if (pName->n != 0) - sqlCheckIdentifierName(pParse, pItem->zName); - } + if (pList == NULL || pName->n == 0) + return; + assert(pList->nExpr > 0); + struct ExprList_item *item = &pList->a[pList->nExpr - 1]; + assert(item->zName == NULL); + if (dequote) { + int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n); + if (name_len < 0) + goto tarantool_error; + item->zName = sqlDbMallocRawNN(pParse->db, name_len + 1); + if (item->zName == NULL) + return; + if (sql_normalize_name(item->zName, name_len + 1, pName->z, + pName->n) < 0) + goto tarantool_error; + } else { + item->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n); + if (item->zName == NULL) + return; + } + sqlCheckIdentifierName(pParse, item->zName); + return; +tarantool_error: + sql_parser_error(pParse); } /* diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 1a5882dda..c8488ec10 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -836,7 +836,16 @@ idlist(A) ::= nm(Y). { ** that created the expression. */ static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ - Expr *p = sqlDbMallocRawNN(pParse->db, sizeof(Expr)+t.n+1); + int name_len = 0; + struct Expr *p = NULL; + if (op != TK_VARIABLE) { + name_len = sql_normalize_name(NULL, 0, t.z, t.n); + if (name_len < 0) + goto tarantool_error; + } else { + name_len = t.n; + } + p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_len + 1); if( p ){ memset(p, 0, sizeof(Expr)); switch (op) { @@ -869,10 +878,12 @@ idlist(A) ::= nm(Y). { p->flags = EP_Leaf; p->iAgg = -1; p->u.zToken = (char*)&p[1]; - memcpy(p->u.zToken, t.z, t.n); - p->u.zToken[t.n] = 0; - if (op != TK_VARIABLE){ - sqlNormalizeName(p->u.zToken); + if (op != TK_VARIABLE) { + if (sql_normalize_name(p->u.zToken, name_len + 1, t.z, t.n) < 0) + goto tarantool_error; + } else { + memcpy(p->u.zToken, t.z, t.n); + p->u.zToken[t.n] = 0; } #if SQL_MAX_EXPR_DEPTH>0 p->nHeight = 1; @@ -881,6 +892,10 @@ idlist(A) ::= nm(Y). { pOut->pExpr = p; pOut->zStart = t.z; pOut->zEnd = &t.z[t.n]; + return; +tarantool_error: + sqlDbFree(pParse->db, p); + sql_parser_error(pParse); } } diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 5434a2ab0..aa83593b1 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -4193,10 +4193,18 @@ flattenSubquery(Parse * pParse, /* Parsing context */ pList = pParent->pEList; for (i = 0; i < pList->nExpr; i++) { if (pList->a[i].zName == 0) { - char *zName = - sqlDbStrDup(db, pList->a[i].zSpan); - sqlNormalizeName(zName); - pList->a[i].zName = zName; + char *str = pList->a[i].zSpan; + int len = strlen(str); + int name_len = + sql_normalize_name(NULL, 0, str, len); + if (name_len < 0) + goto tarantool_error; + char *name = sqlDbMallocRawNN(db, name_len + 1); + if (name != NULL && + sql_normalize_name(name, name_len + 1, str, + len) < 0) + goto tarantool_error; + pList->a[i].zName = name; } } if (pSub->pOrderBy) { @@ -4273,6 +4281,9 @@ flattenSubquery(Parse * pParse, /* Parsing context */ } #endif + return 1; +tarantool_error: + sql_parser_error(pParse); return 1; } diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index 4401ea8b7..e3fa4024f 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -3182,7 +3182,32 @@ void sqlTreeViewWith(TreeView *, const With *); void sqlSetString(char **, sql *, const char *); void sqlErrorMsg(Parse *, const char *, ...); void sqlDequote(char *); -void sqlNormalizeName(char *z); + +/** + * Perform SQL name normalization: cast name to the upper-case + * (via Unicode Character Folding). Casing is locale-dependent + * and context-sensitive. The result may be longer or shorter + * than the original. The source string and the destination buffer + * must not overlap. + * For example, ß is converted to SS. + * The result is similar to SQL UPPER function. + * + * @param dst A buffer for the result string. The result will be + * NUL-terminated if the buffer is large enough. The + * contents is undefined in case of failure. + * @param dst_size The size of the buffer (number of bytes). + * If it is 0, then dest may be NULL and the + * function will only return the length of the + * result without writing any of the result + * string. + * @param src The original string. + * @param src_len The length of the original string. + * @retval The length of the result string, on success. + * @retval < 0 Otherwise. The diag message is set. + */ +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); + void sqlTokenInit(Token *, char *); int sqlKeywordCode(const unsigned char *, int); int sqlRunParser(Parse *, const char *, char **); diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c index 96c3a4a2f..5fbf96a10 100644 --- a/src/box/sql/trigger.c +++ b/src/box/sql/trigger.c @@ -280,15 +280,23 @@ sql_trigger_select_step(struct sql *db, struct Select *select) static struct TriggerStep * sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) { - int size = sizeof(TriggerStep) + target_name->n + 1; - struct TriggerStep *trigger_step = sqlDbMallocZero(db, size); + struct TriggerStep *trigger_step = NULL; + int name_len = + sql_normalize_name(NULL, 0, target_name->z, target_name->n); + if (name_len < 0) + return NULL; + trigger_step = sqlDbMallocZero(db, sizeof(TriggerStep) + name_len + 1); if (trigger_step == NULL) { - diag_set(OutOfMemory, size, "sqlDbMallocZero", "trigger_step"); + diag_set(OutOfMemory, name_len + 1, "sqlDbMallocZero", + "trigger_step"); return NULL; } char *z = (char *)&trigger_step[1]; - memcpy(z, target_name->z, target_name->n); - sqlNormalizeName(z); + if (sql_normalize_name(z, name_len + 1, target_name->z, + target_name->n) < 0) { + sqlDbFree(db, trigger_step); + return NULL; + } trigger_step->zTarget = z; trigger_step->op = op; return trigger_step; diff --git a/src/box/sql/util.c b/src/box/sql/util.c index c89e2e8ab..60133758a 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -41,6 +41,8 @@ #if HAVE_ISNAN || SQL_HAVE_ISNAN #include #endif +#include +#include "errinj.h" /* * Routine needed to support the testcase() macro. @@ -292,23 +294,37 @@ sqlDequote(char *z) z[j] = 0; } - -void -sqlNormalizeName(char *z) +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) { - char quote; - int i=0; - if (z == 0) - return; - quote = z[0]; - if (sqlIsquote(quote)){ - sqlDequote(z); - return; - } - while(z[i]!=0){ - z[i] = (char)sqlToupper(z[i]); - i++; + assert(src != NULL); + if (sqlIsquote(src[0])){ + if (dst_size == 0) + return src_len; + memcpy(dst, src, src_len); + dst[src_len] = '\0'; + sqlDequote(dst); + return src_len; } + UErrorCode status = U_ZERO_ERROR; + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { + status = U_MEMORY_ALLOCATION_ERROR; + goto error; + }); + UCaseMap *case_map = ucasemap_open(NULL, 0, &status); + if (case_map == NULL) + goto error; + int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, + &status); + ucasemap_close(case_map); + assert(U_SUCCESS(status) || + (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)); + return len; +error: + diag_set(CollationError, + "string conversion to the uppercase failed: %s", + u_errorName(status)); + return -1; } /* diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 41783cc74..c823d3597 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -125,6 +125,7 @@ struct errinj { _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 8e76b21b3..5bdb71d7b 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -22,6 +22,8 @@ errinj.info() state: false ERRINJ_SNAP_WRITE_ROW_TIMEOUT: state: 0 + ERRINJ_SQL_NAME_NORMALIZATION: + state: false ERRINJ_VY_SCHED_TIMEOUT: state: 0 ERRINJ_WAL_WRITE_PARTIAL: @@ -1672,3 +1674,21 @@ fio = require('fio') box.space.test:drop() --- ... +-- +-- gh-3931: Store regular identifiers in case-normal form +-- +errinj = box.error.injection +--- +... +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) +--- +- ok +... +box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") +--- +- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR' +... +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) +--- +- ok +... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index c876f9afb..9e8095295 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -587,3 +587,11 @@ fio = require('fio') #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, box.space.test.id, 0, '*.index.inprogress')) == 0 box.space.test:drop() + +-- +-- gh-3931: Store regular identifiers in case-normal form +-- +errinj = box.error.injection +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) +box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua index 923d5e66a..aaa4cc85a 100755 --- a/test/sql-tap/identifier_case.test.lua +++ b/test/sql-tap/identifier_case.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(71) +test:plan(73) local test_prefix = "identifier_case-" @@ -13,8 +13,10 @@ local data = { { 6, [[ "Table1" ]], {0} }, -- non ASCII characters case is not supported { 7, [[ русский ]], {0} }, - { 8, [[ Русский ]], {0} }, - { 9, [[ "русский" ]], {"/already exists/"} }, + { 8, [[ "русский" ]], {0} }, + { 9, [[ Großschreibweise ]], {0} }, + { 10, [[ Русский ]], {"/already exists/"} }, + { 11, [[ Grossschreibweise ]], {"/already exists/"} }, } for _, row in ipairs(data) do @@ -35,7 +37,7 @@ data = { { 5, [[ "table1" ]], {5}}, { 6, [[ "Table1" ]], {6}}, { 7, [[ русский ]], {7}}, - { 8, [[ Русский ]], {8}}, + { 8, [[ "русский" ]], {8}}, } for _, row in ipairs(data) do @@ -66,7 +68,7 @@ test:do_test( function () return test:drop_all_tables() end, - 3) + 4) data = { { 1, [[ columnn ]], {0} }, -- 2.21.0