[tarantool-patches] [PATCH v2 7/7] sql: store regular identifiers in case-normal form
Kirill Shcherbatov
kshcherbatov at tarantool.org
Wed Feb 27 14:13:18 MSK 2019
Introduced a new sql_normalize_name routine performing SQL name
conversion to case-normal form via unicode character folding.
For example, ß is converted to SS. The result is similar to SQL
UPPER function.
Closes #3931
---
src/box/lua/lua_sql.c | 11 +++--
src/box/sql/build.c | 36 +++++++++-----
src/box/sql/expr.c | 71 ++++++++++++++++++---------
src/box/sql/parse.y | 26 ++++++++--
src/box/sql/select.c | 20 ++++++--
src/box/sql/sqlInt.h | 26 +++++++++-
src/box/sql/trigger.c | 18 +++++--
src/box/sql/util.c | 42 ++++++++++------
test/sql-tap/identifier_case.test.lua | 12 +++--
9 files changed, 188 insertions(+), 74 deletions(-)
diff --git a/src/box/lua/lua_sql.c b/src/box/lua/lua_sql.c
index f5a7b7819..c27ca818e 100644
--- a/src/box/lua/lua_sql.c
+++ b/src/box/lua/lua_sql.c
@@ -176,13 +176,16 @@ lbox_sql_create_function(struct lua_State *L)
}
size_t name_len;
const char *name = lua_tolstring(L, 1, &name_len);
+ int normalized_name_len = sql_normalize_name(NULL, 0, name, name_len);
+ if (normalized_name_len < 0)
+ return luaT_error(L);
char *normalized_name = (char *) region_alloc(&fiber()->gc,
- name_len + 1);
+ normalized_name_len + 1);
if (normalized_name == NULL)
return luaL_error(L, "out of memory");
- memcpy(normalized_name, name, name_len);
- normalized_name[name_len] = '\0';
- sqlNormalizeName(normalized_name);
+ if (sql_normalize_name(normalized_name, normalized_name_len + 1, name,
+ name_len) < 0)
+ return luaT_error(L);
struct lua_sql_func_info *func_info =
(struct lua_sql_func_info *) malloc(sizeof(*func_info));
if (func_info == NULL)
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index 24f20836b..d167a5714 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -236,13 +236,20 @@ char *
sql_name_from_token(struct sql *db, struct Token *name_token)
{
assert(name_token != NULL && name_token->z != NULL);
- char *name = sqlDbStrNDup(db, (char *)name_token->z, name_token->n);
+ int name_len =
+ sql_normalize_name(NULL, 0, name_token->z, name_token->n);
+ if (name_len < 0)
+ return NULL;
+ char *name = sqlDbMallocRawNN(db, name_len + 1);
if (name == NULL) {
- diag_set(OutOfMemory, name_token->n + 1, "sqlDbStrNDup",
- "name");
+ diag_set(OutOfMemory, name_len + 1, "sqlDbMallocRawNN", "name");
+ return NULL;
+ }
+ if (sql_normalize_name(name, name_len + 1, name_token->z,
+ name_token->n) < 0) {
+ sqlDbFree(db, name);
return NULL;
}
- sqlNormalizeName(name);
return name;
}
@@ -441,17 +448,16 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def)
if (sql_field_retrieve(pParse, def, def->field_count) == NULL)
return;
struct region *region = &pParse->region;
- z = region_alloc(region, pName->n + 1);
+ int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n);
+ if (name_len < 0)
+ goto tarantool_error;
+ z = region_alloc(region, name_len + 1);
if (z == NULL) {
- diag_set(OutOfMemory, pName->n + 1,
- "region_alloc", "z");
- pParse->rc = SQL_TARANTOOL_ERROR;
- pParse->nErr++;
- return;
+ diag_set(OutOfMemory, name_len + 1, "region_alloc", "z");
+ goto tarantool_error;
}
- memcpy(z, pName->z, pName->n);
- z[pName->n] = 0;
- sqlNormalizeName(z);
+ if (sql_normalize_name(z, name_len + 1, pName->z, pName->n) < 0)
+ goto tarantool_error;
for (uint32_t i = 0; i < def->field_count; i++) {
if (strcmp(z, def->fields[i].name) == 0) {
diag_set(ClientError, ER_SPACE_FIELD_IS_DUPLICATE, z);
@@ -472,6 +478,10 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def)
column_def->type = type_def->type;
def->field_count++;
pParse->constraintName.n = 0;
+ return;
+tarantool_error:
+ pParse->rc = SQL_TARANTOOL_ERROR;
+ pParse->nErr++;
}
void
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index dd5e2c28d..c03873c02 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -863,7 +863,16 @@ sql_expr_create(struct sql *db, int op, const Token *token, bool dequote)
if (token != NULL) {
if (op != TK_INTEGER || token->z == NULL ||
sqlGetInt32(token->z, &val) == 0) {
- extra_sz = token->n + 1;
+ if (op == TK_ID || op == TK_COLLATE ||
+ op == TK_FUNCTION) {
+ extra_sz = sql_normalize_name(NULL, 0, token->z,
+ token->n);
+ if (extra_sz < 0)
+ return NULL;
+ } else {
+ extra_sz = token->n;
+ }
+ extra_sz += 1;
assert(val >= 0);
}
}
@@ -889,15 +898,20 @@ sql_expr_create(struct sql *db, int op, const Token *token, bool dequote)
} else {
expr->u.zToken = (char *)&expr[1];
assert(token->z != NULL || token->n == 0);
- memcpy(expr->u.zToken, token->z, token->n);
- expr->u.zToken[token->n] = '\0';
- if (dequote) {
- if (expr->u.zToken[0] == '"')
- expr->flags |= EP_DblQuoted;
- if (expr->op == TK_ID || expr->op == TK_COLLATE ||
- expr->op == TK_FUNCTION)
- sqlNormalizeName(expr->u.zToken);
- else
+ if (dequote && expr->u.zToken[0] == '"')
+ expr->flags |= EP_DblQuoted;
+ if (dequote &&
+ (expr->op == TK_ID || expr->op == TK_COLLATE ||
+ expr->op == TK_FUNCTION)) {
+ if (sql_normalize_name(expr->u.zToken, extra_sz,
+ token->z, token->n) < 0) {
+ sqlDbFree(db, expr);
+ return NULL;
+ }
+ } else {
+ memcpy(expr->u.zToken, token->z, token->n);
+ expr->u.zToken[token->n] = '\0';
+ if (dequote)
sqlDequote(expr->u.zToken);
}
}
@@ -1778,18 +1792,31 @@ sqlExprListSetName(Parse * pParse, /* Parsing context */
)
{
assert(pList != 0 || pParse->db->mallocFailed != 0);
- if (pList) {
- struct ExprList_item *pItem;
- assert(pList->nExpr > 0);
- pItem = &pList->a[pList->nExpr - 1];
- assert(pItem->zName == 0);
- pItem->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n);
- if (dequote)
- sqlNormalizeName(pItem->zName);
- /* n = 0 in case of select * */
- if (pName->n != 0)
- sqlCheckIdentifierName(pParse, pItem->zName);
- }
+ if (pList == NULL || pName->n == 0)
+ return;
+ assert(pList->nExpr > 0);
+ struct ExprList_item *item = &pList->a[pList->nExpr - 1];
+ assert(item->zName == NULL);
+ if (dequote) {
+ int name_len = sql_normalize_name(NULL, 0, pName->z, pName->n);
+ if (name_len < 0)
+ goto tarantool_error;
+ item->zName = sqlDbMallocRawNN(pParse->db, name_len + 1);
+ if (item->zName == NULL)
+ return;
+ if (sql_normalize_name(item->zName, name_len + 1, pName->z,
+ pName->n) < 0)
+ goto tarantool_error;
+ } else {
+ item->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n);
+ if (item->zName == NULL)
+ return;
+ }
+ sqlCheckIdentifierName(pParse, item->zName);
+ return;
+tarantool_error:
+ pParse->rc = SQL_TARANTOOL_ERROR;
+ pParse->nErr++;
}
/*
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index d25995ec0..3204833e9 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -836,7 +836,16 @@ idlist(A) ::= nm(Y). {
** that created the expression.
*/
static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
- Expr *p = sqlDbMallocRawNN(pParse->db, sizeof(Expr)+t.n+1);
+ int name_len = 0;
+ struct Expr *p = NULL;
+ if (op != TK_VARIABLE) {
+ name_len = sql_normalize_name(NULL, 0, t.z, t.n);
+ if (name_len < 0)
+ goto tarantool_error;
+ } else {
+ name_len = t.n;
+ }
+ p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_len + 1);
if( p ){
memset(p, 0, sizeof(Expr));
switch (op) {
@@ -869,10 +878,12 @@ idlist(A) ::= nm(Y). {
p->flags = EP_Leaf;
p->iAgg = -1;
p->u.zToken = (char*)&p[1];
- memcpy(p->u.zToken, t.z, t.n);
- p->u.zToken[t.n] = 0;
- if (op != TK_VARIABLE){
- sqlNormalizeName(p->u.zToken);
+ if (op != TK_VARIABLE) {
+ if (sql_normalize_name(p->u.zToken, name_len + 1, t.z, t.n) < 0)
+ goto tarantool_error;
+ } else {
+ memcpy(p->u.zToken, t.z, t.n);
+ p->u.zToken[t.n] = 0;
}
#if SQL_MAX_EXPR_DEPTH>0
p->nHeight = 1;
@@ -881,6 +892,11 @@ idlist(A) ::= nm(Y). {
pOut->pExpr = p;
pOut->zStart = t.z;
pOut->zEnd = &t.z[t.n];
+ return;
+tarantool_error:
+ sqlDbFree(pParse->db, p);
+ pParse->rc = SQL_TARANTOOL_ERROR;
+ pParse->nErr++;
}
}
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 2ae27e6c4..7b2a38101 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -4195,10 +4195,18 @@ flattenSubquery(Parse * pParse, /* Parsing context */
pList = pParent->pEList;
for (i = 0; i < pList->nExpr; i++) {
if (pList->a[i].zName == 0) {
- char *zName =
- sqlDbStrDup(db, pList->a[i].zSpan);
- sqlNormalizeName(zName);
- pList->a[i].zName = zName;
+ char *str = pList->a[i].zSpan;
+ int len = strlen(str);
+ int name_len =
+ sql_normalize_name(NULL, 0, str, len);
+ if (name_len < 0)
+ goto tarantool_error;
+ char *name = sqlDbMallocRawNN(db, name_len + 1);
+ if (name != NULL &&
+ sql_normalize_name(name, name_len + 1, str,
+ len) < 0)
+ goto tarantool_error;
+ pList->a[i].zName = name;
}
}
if (pSub->pOrderBy) {
@@ -4276,6 +4284,10 @@ flattenSubquery(Parse * pParse, /* Parsing context */
}
#endif
+ return 1;
+tarantool_error:
+ pParse->rc = SQL_TARANTOOL_ERROR;
+ pParse->nErr++;
return 1;
}
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 82aad0077..026a409c4 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3218,7 +3218,31 @@ void sqlTreeViewWith(TreeView *, const With *);
void sqlSetString(char **, sql *, const char *);
void sqlErrorMsg(Parse *, const char *, ...);
void sqlDequote(char *);
-void sqlNormalizeName(char *z);
+
+/**
+ * Perform SQL name normalization: cast name to the upper-case
+ * (via Unicode Character Folding). Casing is locale-dependent
+ * and context-sensitive. The result may be longer or shorter
+ * than the original. The source string and the destination buffer
+ * must not overlap.
+ * For example, ß is converted to SS.
+ * The result is similar to SQL UPPER function.
+ * @param dst A buffer for the result string. The result will be
+ * NUL-terminated if the buffer is large enough. The
+ * contents is undefined in case of failure.
+ * @param dst_size The size of the buffer (number of bytes).
+ * If it is 0, then dest may be NULL and the
+ * function will only return the length of the
+ * result without writing any of the result
+ * string.
+ * @param src The original string.
+ * @param src_len The length of the original string.
+ * @retval The length of the result string, on success.
+ * @retval < 0 otherwise.
+ */
+int
+sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
+
void sqlTokenInit(Token *, char *);
int sqlKeywordCode(const unsigned char *, int);
int sqlRunParser(Parse *, const char *, char **);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index af62a5eff..d43c7bad4 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -278,15 +278,23 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
static struct TriggerStep *
sql_trigger_step_allocate(struct sql *db, u8 op, struct Token *target_name)
{
- int size = sizeof(TriggerStep) + target_name->n + 1;
- struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
+ struct TriggerStep *trigger_step = NULL;
+ int name_len =
+ sql_normalize_name(NULL, 0, target_name->z, target_name->n);
+ if (name_len < 0)
+ return NULL;
+ trigger_step = sqlDbMallocZero(db, sizeof(TriggerStep) + name_len + 1);
if (trigger_step == NULL) {
- diag_set(OutOfMemory, size, "sqlDbMallocZero", "trigger_step");
+ diag_set(OutOfMemory, name_len + 1, "sqlDbMallocZero",
+ "trigger_step");
return NULL;
}
char *z = (char *)&trigger_step[1];
- memcpy(z, target_name->z, target_name->n);
- sqlNormalizeName(z);
+ if (sql_normalize_name(z, name_len + 1, target_name->z,
+ target_name->n) < 0) {
+ sqlDbFree(db, trigger_step);
+ return NULL;
+ }
trigger_step->zTarget = z;
trigger_step->op = op;
return trigger_step;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index c89e2e8ab..924149d62 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -41,6 +41,7 @@
#if HAVE_ISNAN || SQL_HAVE_ISNAN
#include <math.h>
#endif
+#include <unicode/ucasemap.h>
/*
* Routine needed to support the testcase() macro.
@@ -292,23 +293,34 @@ sqlDequote(char *z)
z[j] = 0;
}
-
-void
-sqlNormalizeName(char *z)
+int
+sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
{
- char quote;
- int i=0;
- if (z == 0)
- return;
- quote = z[0];
- if (sqlIsquote(quote)){
- sqlDequote(z);
- return;
- }
- while(z[i]!=0){
- z[i] = (char)sqlToupper(z[i]);
- i++;
+ assert(src != NULL);
+ if (sqlIsquote(src[0])){
+ if (dst_size == 0)
+ return src_len;
+ memcpy(dst, src, src_len);
+ dst[src_len] = '\0';
+ sqlDequote(dst);
+ return src_len;
}
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
+ if (case_map == NULL)
+ goto error;
+ int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
+ &status);
+ ucasemap_close(case_map);
+ if (!U_SUCCESS(status) &&
+ !(dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR))
+ goto error;
+ return len;
+error:
+ diag_set(CollationError,
+ "string conversion to the uppercase failed: %s",
+ u_errorName(status));
+ return -1;
}
/*
diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua
index 923d5e66a..aaa4cc85a 100755
--- a/test/sql-tap/identifier_case.test.lua
+++ b/test/sql-tap/identifier_case.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
test = require("sqltester")
-test:plan(71)
+test:plan(73)
local test_prefix = "identifier_case-"
@@ -13,8 +13,10 @@ local data = {
{ 6, [[ "Table1" ]], {0} },
-- non ASCII characters case is not supported
{ 7, [[ русский ]], {0} },
- { 8, [[ Русский ]], {0} },
- { 9, [[ "русский" ]], {"/already exists/"} },
+ { 8, [[ "русский" ]], {0} },
+ { 9, [[ Großschreibweise ]], {0} },
+ { 10, [[ Русский ]], {"/already exists/"} },
+ { 11, [[ Grossschreibweise ]], {"/already exists/"} },
}
for _, row in ipairs(data) do
@@ -35,7 +37,7 @@ data = {
{ 5, [[ "table1" ]], {5}},
{ 6, [[ "Table1" ]], {6}},
{ 7, [[ русский ]], {7}},
- { 8, [[ Русский ]], {8}},
+ { 8, [[ "русский" ]], {8}},
}
for _, row in ipairs(data) do
@@ -66,7 +68,7 @@ test:do_test(
function ()
return test:drop_all_tables()
end,
- 3)
+ 4)
data = {
{ 1, [[ columnn ]], {0} },
--
2.20.1
More information about the Tarantool-patches
mailing list