From: Kirill Shcherbatov <kshcherbatov@tarantool.org> To: tarantool-patches@freelists.org, v.shpilevoy@tarantool.org Cc: Kirill Shcherbatov <kshcherbatov@tarantool.org> Subject: [tarantool-patches] [PATCH v1 4/4] sql: store regular identifiers in case-normal form Date: Fri, 15 Feb 2019 16:30:51 +0300 [thread overview] Message-ID: <fc8824833a93752ee289cd5e5d36fa0907474469.1550237391.git.kshcherbatov@tarantool.org> (raw) In-Reply-To: <cover.1550237391.git.kshcherbatov@tarantool.org> Introduced a new sql_normalize_name routine performing SQL name conversion to case-normal form via unicode character folding. For example, ß is converted to SS. The result is similar to SQL UPPER function. Closes #3931 --- src/box/lua/lua_sql.c | 11 ++-- src/box/sql/build.c | 38 ++++++++----- src/box/sql/expr.c | 78 ++++++++++++++++++--------- src/box/sql/parse.y | 26 +++++++-- src/box/sql/select.c | 20 +++++-- src/box/sql/sqlInt.h | 26 ++++++++- src/box/sql/trigger.c | 18 +++++-- src/box/sql/util.c | 42 +++++++++------ test/sql-tap/identifier_case.test.lua | 12 +++-- 9 files changed, 197 insertions(+), 74 deletions(-) diff --git a/src/box/lua/lua_sql.c b/src/box/lua/lua_sql.c index f5a7b7819..c27ca818e 100644 --- a/src/box/lua/lua_sql.c +++ b/src/box/lua/lua_sql.c @@ -176,13 +176,16 @@ lbox_sql_create_function(struct lua_State *L) } size_t name_len; const char *name = lua_tolstring(L, 1, &name_len); + int normalized_name_len = sql_normalize_name(NULL, 0, name, name_len); + if (normalized_name_len < 0) + return luaT_error(L); char *normalized_name = (char *) region_alloc(&fiber()->gc, - name_len + 1); + normalized_name_len + 1); if (normalized_name == NULL) return luaL_error(L, "out of memory"); - memcpy(normalized_name, name, name_len); - normalized_name[name_len] = '\0'; - sqlNormalizeName(normalized_name); + if (sql_normalize_name(normalized_name, normalized_name_len + 1, name, + name_len) < 0) + return luaT_error(L); struct lua_sql_func_info *func_info = (struct lua_sql_func_info *) malloc(sizeof(*func_info)); if (func_info == NULL) diff --git a/src/box/sql/build.c b/src/box/sql/build.c index a08148a97..ac4dbcc51 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -299,10 +299,21 @@ sql_name_from_token(struct Parse *parser, struct Token *name_token) { if (name_token == NULL || name_token->z == NULL) return NULL; - char *name = sqlDbStrNDup(parser->db, (char *)name_token->z, - name_token->n); - sqlNormalizeName(name); + char *name = NULL; + int name_len = + sql_normalize_name(NULL, 0, name_token->z, name_token->n); + if (name_len < 0) + goto tarantool_error; + name = sqlDbMallocRawNN(parser->db, name_len + 1); + if (sql_normalize_name(name, name_len + 1, name_token->z, + name_token->n) < 0) + goto tarantool_error; return name; +tarantool_error: + sqlDbFree(parser->db, name); + parser->rc = SQL_TARANTOOL_ERROR; + parser->nErr++; + return NULL; } /* @@ -505,17 +516,16 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def) (uint32_t) p->def->field_count) == NULL) return; struct region *region = &pParse->region; - z = region_alloc(region, pName->n + 1); + int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n); + if (name_len < 0) + goto tarantool_error; + z = region_alloc(region, name_len + 1); if (z == NULL) { - diag_set(OutOfMemory, pName->n + 1, - "region_alloc", "z"); - pParse->rc = SQL_TARANTOOL_ERROR; - pParse->nErr++; - return; + diag_set(OutOfMemory, name_len + 1, "region_alloc", "z"); + goto tarantool_error; } - memcpy(z, pName->z, pName->n); - z[pName->n] = 0; - sqlNormalizeName(z); + if (sql_normalize_name(z, name_len + 1, pName->z, pName->n) < 0) + goto tarantool_error; for (i = 0; i < (int)p->def->field_count; i++) { if (strcmp(z, p->def->fields[i].name) == 0) { sqlErrorMsg(pParse, "duplicate column name: %s", z); @@ -535,6 +545,10 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def) column_def->type = type_def->type; p->def->field_count++; pParse->constraintName.n = 0; + return; +tarantool_error: + pParse->rc = SQL_TARANTOOL_ERROR; + pParse->nErr++; } void diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 42531c107..c02dc682c 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -799,15 +799,24 @@ sql_expr_create(struct Parse *parser, int op, const Token *token, bool dequote) { int extra_sz = 0; int val = 0; + struct Expr *expr = NULL; if (token != NULL) { if (op != TK_INTEGER || token->z == NULL || sqlGetInt32(token->z, &val) == 0) { - extra_sz = token->n + 1; + if (op == TK_ID || op == TK_COLLATE || + op == TK_FUNCTION) { + extra_sz = sql_normalize_name(NULL, 0, token->z, + token->n); + if (extra_sz < 0) + goto tarantool_error; + } else { + extra_sz = token->n; + } + extra_sz += 1; assert(val >= 0); } } - struct Expr *expr = - sqlDbMallocRawNN(parser->db, sizeof(*expr) + extra_sz); + expr = sqlDbMallocRawNN(parser->db, sizeof(*expr) + extra_sz); if (expr == NULL) return NULL; @@ -826,19 +835,27 @@ sql_expr_create(struct Parse *parser, int op, const Token *token, bool dequote) } else { expr->u.zToken = (char *)&expr[1]; assert(token->z != NULL || token->n == 0); - memcpy(expr->u.zToken, token->z, token->n); - expr->u.zToken[token->n] = '\0'; - if (dequote) { - if (expr->u.zToken[0] == '"') - expr->flags |= EP_DblQuoted; - if (expr->op == TK_ID || expr->op == TK_COLLATE || - expr->op == TK_FUNCTION) - sqlNormalizeName(expr->u.zToken); - else + if (dequote && expr->u.zToken[0] == '"') + expr->flags |= EP_DblQuoted; + if (dequote && + (expr->op == TK_ID || expr->op == TK_COLLATE || + expr->op == TK_FUNCTION)) { + if (sql_normalize_name(expr->u.zToken, extra_sz, + token->z, token->n) < 0) + goto tarantool_error; + } else { + memcpy(expr->u.zToken, token->z, token->n); + expr->u.zToken[token->n] = '\0'; + if (dequote) sqlDequote(expr->u.zToken); } } return expr; +tarantool_error: + sqlDbFree(parser->db, expr); + parser->rc = SQL_TARANTOOL_ERROR; + parser->nErr++; + return NULL; } struct Expr * @@ -1714,18 +1731,31 @@ sqlExprListSetName(Parse * pParse, /* Parsing context */ ) { assert(pList != 0 || pParse->db->mallocFailed != 0); - if (pList) { - struct ExprList_item *pItem; - assert(pList->nExpr > 0); - pItem = &pList->a[pList->nExpr - 1]; - assert(pItem->zName == 0); - pItem->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n); - if (dequote) - sqlNormalizeName(pItem->zName); - /* n = 0 in case of select * */ - if (pName->n != 0) - sqlCheckIdentifierName(pParse, pItem->zName); - } + if (pList == NULL || pName->n == 0) + return; + assert(pList->nExpr > 0); + struct ExprList_item *item = &pList->a[pList->nExpr - 1]; + assert(item->zName == NULL); + if (dequote) { + int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n); + if (name_len < 0) + goto tarantool_error; + item->zName = sqlDbMallocRawNN(pParse->db, name_len + 1); + if (item->zName == NULL) + return; + if (sql_normalize_name(item->zName, name_len + 1, pName->z, + pName->n) < 0) + goto tarantool_error; + } else { + item->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n); + if (item->zName == NULL) + return; + } + sqlCheckIdentifierName(pParse, item->zName); + return; +tarantool_error: + pParse->rc = SQL_TARANTOOL_ERROR; + pParse->nErr++; } /* diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index d7b721695..631bcb8f7 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -813,7 +813,16 @@ idlist(A) ::= nm(Y). ** that created the expression. */ static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ - Expr *p = sqlDbMallocRawNN(pParse->db, sizeof(Expr)+t.n+1); + int name_len = 0; + struct Expr *p = NULL; + if (op != TK_VARIABLE) { + name_len = sql_normalize_name(NULL, 0, t.z, t.n); + if (name_len < 0) + goto tarantool_error; + } else { + name_len = t.n; + } + p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_len + 1); if( p ){ memset(p, 0, sizeof(Expr)); switch (op) { @@ -846,10 +855,12 @@ idlist(A) ::= nm(Y). p->flags = EP_Leaf; p->iAgg = -1; p->u.zToken = (char*)&p[1]; - memcpy(p->u.zToken, t.z, t.n); - p->u.zToken[t.n] = 0; - if (op != TK_VARIABLE){ - sqlNormalizeName(p->u.zToken); + if (op != TK_VARIABLE) { + if (sql_normalize_name(p->u.zToken, name_len + 1, t.z, t.n) < 0) + goto tarantool_error; + } else { + memcpy(p->u.zToken, t.z, t.n); + p->u.zToken[t.n] = 0; } #if SQL_MAX_EXPR_DEPTH>0 p->nHeight = 1; @@ -858,6 +869,11 @@ idlist(A) ::= nm(Y). pOut->pExpr = p; pOut->zStart = t.z; pOut->zEnd = &t.z[t.n]; + return; +tarantool_error: + sqlDbFree(pParse->db, p); + pParse->rc = SQL_TARANTOOL_ERROR; + pParse->nErr++; } } diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 5a1bc0c81..e05d082a7 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -4172,10 +4172,18 @@ flattenSubquery(Parse * pParse, /* Parsing context */ pList = pParent->pEList; for (i = 0; i < pList->nExpr; i++) { if (pList->a[i].zName == 0) { - char *zName = - sqlDbStrDup(db, pList->a[i].zSpan); - sqlNormalizeName(zName); - pList->a[i].zName = zName; + char *str = pList->a[i].zSpan; + int len = strlen(str); + int name_len = + sql_normalize_name(NULL, 0, str, len); + if (name_len < 0) + goto tarantool_error; + char *name = sqlDbMallocRawNN(db, name_len + 1); + if (name != NULL && + sql_normalize_name(name, name_len + 1, str, + len) < 0) + goto tarantool_error; + pList->a[i].zName = name; } } if (pSub->pOrderBy) { @@ -4248,6 +4256,10 @@ flattenSubquery(Parse * pParse, /* Parsing context */ } #endif + return 1; +tarantool_error: + pParse->rc = SQL_TARANTOOL_ERROR; + pParse->nErr++; return 1; } diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index 048c814e5..380b42d90 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -3264,7 +3264,31 @@ void sqlTreeViewWith(TreeView *, const With *); void sqlSetString(char **, sql *, const char *); void sqlErrorMsg(Parse *, const char *, ...); void sqlDequote(char *); -void sqlNormalizeName(char *z); + +/** + * Perform SQL name normalization: cast name to the upper-case + * (via Unicode Character Folding). Casing is locale-dependent + * and context-sensitive. The result may be longer or shorter + * than the original. The source string and the destination buffer + * must not overlap. + * For example, ß is converted to SS. + * The result is similar to SQL UPPER function. + * @param dst A buffer for the result string. The result will be + * NUL-terminated if the buffer is large enough. The + * contents is undefined in case of failure. + * @param dst_size The size of the buffer (number of bytes). + * If it is 0, then dest may be NULL and the + * function will only return the length of the + * result without writing any of the result + * string. + * @param src The original string. + * @param src_len The length of the original string. + * @retval The length of the result string, on success. + * @retval < 0 otherwise. + */ +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); + void sqlTokenInit(Token *, char *); int sqlKeywordCode(const unsigned char *, int); int sqlRunParser(Parse *, const char *, char **); diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c index a5b6cb4fd..767c2e09f 100644 --- a/src/box/sql/trigger.c +++ b/src/box/sql/trigger.c @@ -279,16 +279,26 @@ sql_trigger_step_allocate(struct Parse *parser, u8 op, struct Token *target_name) { struct sql *db = parser->db; - struct TriggerStep *trigger_step = - sqlDbMallocZero(db, sizeof(TriggerStep) + target_name->n + 1); + struct TriggerStep *trigger_step = NULL; + int name_len = + sql_normalize_name(NULL, 0, target_name->z, target_name->n); + if (name_len < 0) + goto tarantool_error; + trigger_step = sqlDbMallocZero(db, sizeof(TriggerStep) + name_len + 1); if (trigger_step != NULL) { char *z = (char *)&trigger_step[1]; - memcpy(z, target_name->z, target_name->n); - sqlNormalizeName(z); + if (sql_normalize_name(z, name_len + 1, target_name->z, + target_name->n) < 0) + goto tarantool_error; trigger_step->zTarget = z; trigger_step->op = op; } return trigger_step; +tarantool_error: + sqlDbFree(db, trigger_step); + parser->rc = SQL_TARANTOOL_ERROR; + parser->nErr++; + return NULL; } struct TriggerStep * diff --git a/src/box/sql/util.c b/src/box/sql/util.c index dadae1839..d0a352965 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -41,6 +41,7 @@ #if HAVE_ISNAN || SQL_HAVE_ISNAN #include <math.h> #endif +#include <unicode/ucasemap.h> /* * Routine needed to support the testcase() macro. @@ -289,23 +290,34 @@ sqlDequote(char *z) z[j] = 0; } - -void -sqlNormalizeName(char *z) +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) { - char quote; - int i=0; - if (z == 0) - return; - quote = z[0]; - if (sqlIsquote(quote)){ - sqlDequote(z); - return; - } - while(z[i]!=0){ - z[i] = (char)sqlToupper(z[i]); - i++; + assert(src != NULL); + if (sqlIsquote(src[0])){ + if (dst_size == 0) + return src_len; + memcpy(dst, src, src_len); + dst[src_len] = '\0'; + sqlDequote(dst); + return src_len; } + UErrorCode status = U_ZERO_ERROR; + UCaseMap *case_map = ucasemap_open(NULL, 0, &status); + if (case_map == NULL) + goto error; + int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, + &status); + ucasemap_close(case_map); + if (!U_SUCCESS(status) && + !(dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)) + goto error; + return len; +error: + diag_set(CollationError, + "string conversion to the uppercase failed: %s", + u_errorName(status)); + return -1; } /* diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua index f26399eb6..bc4daf0ff 100755 --- a/test/sql-tap/identifier_case.test.lua +++ b/test/sql-tap/identifier_case.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(71) +test:plan(73) local test_prefix = "identifier_case-" @@ -13,8 +13,10 @@ local data = { { 6, [[ "Table1" ]], {0} }, -- non ASCII characters case is not supported { 7, [[ русский ]], {0} }, - { 8, [[ Русский ]], {0} }, - { 9, [[ "русский" ]], {"/already exists/"} }, + { 8, [[ "русский" ]], {0} }, + { 9, [[ Großschreibweise ]], {0} }, + { 10, [[ Русский ]], {"/already exists/"} }, + { 11, [[ Grossschreibweise ]], {"/already exists/"} }, } for _, row in ipairs(data) do @@ -35,7 +37,7 @@ data = { { 5, [[ "table1" ]], {5}}, { 6, [[ "Table1" ]], {6}}, { 7, [[ русский ]], {7}}, - { 8, [[ Русский ]], {8}}, + { 8, [[ "русский" ]], {8}}, } for _, row in ipairs(data) do @@ -66,7 +68,7 @@ test:do_test( function () return test:drop_all_tables() end, - 3) + 4) data = { { 1, [[ columnn ]], {0} }, -- 2.20.1
next prev parent reply other threads:[~2019-02-15 13:30 UTC|newest] Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-02-15 13:30 [tarantool-patches] [PATCH v1 0/4] " Kirill Shcherbatov 2019-02-15 13:30 ` [tarantool-patches] [PATCH v1 1/4] sql: patch sql_name_from_token to use Parser Kirill Shcherbatov 2019-02-15 13:30 ` [tarantool-patches] [PATCH v1 2/4] sql: patch sql_trigger_step_allocate " Kirill Shcherbatov 2019-02-15 13:30 ` [tarantool-patches] [PATCH v1 3/4] sql: patch sql_expr_create routine " Kirill Shcherbatov 2019-02-15 13:30 ` Kirill Shcherbatov [this message] 2019-02-22 12:20 ` [tarantool-patches] Re: [PATCH v1 0/4] sql: store regular identifiers in case-normal form Vladislav Shpilevoy 2019-02-22 12:38 ` Kirill Shcherbatov 2019-02-22 12:43 ` Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=fc8824833a93752ee289cd5e5d36fa0907474469.1550237391.git.kshcherbatov@tarantool.org \ --to=kshcherbatov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [tarantool-patches] [PATCH v1 4/4] sql: store regular identifiers in case-normal form' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox