[tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization
Kirill Shcherbatov
kshcherbatov at tarantool.org
Thu Apr 4 17:07:13 MSK 2019
Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time DDL performance felt by 15%.
This patch should eliminate some of the negative effects of using
ICU for name normalization.
Thanks @avtikhon for a bechmark
Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
https://github.com/tarantool/msgpuck/tree/kshch/gh-3931-perfomance-fixup
https://github.com/tarantool/tarantool/issues/3931
src/box/sql.c | 8 ++++
src/box/sql.h | 9 +++++
src/box/sql/expr.c | 25 ++++++-------
src/box/sql/parse.y | 22 ++++++-----
src/box/sql/sqlInt.h | 9 +----
src/box/sql/trigger.c | 22 +++++++----
src/box/sql/util.c | 80 ++++++++++++++++++++--------------------
src/lib/core/errinj.h | 1 -
test/box/errinj.result | 2 -
test/sql/errinj.result | 18 ---------
test/sql/errinj.test.lua | 8 ----
11 files changed, 97 insertions(+), 107 deletions(-)
diff --git a/src/box/sql.c b/src/box/sql.c
index 4fac020b0..872bfc4f4 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -54,6 +54,7 @@
#include "iproto_constants.h"
#include "fk_constraint.h"
#include "mpstream.h"
+#include <unicode/ucasemap.h>
static sql *db = NULL;
@@ -64,6 +65,8 @@ static const uint32_t default_sql_flags = SQL_ShortColNames
| SQL_AutoIndex
| SQL_RecTriggers;
+UCaseMap *sql_case_map;
+
void
sql_init()
{
@@ -74,6 +77,11 @@ sql_init()
if (sql_init_db(&db) != SQL_OK)
panic("failed to initialize SQL subsystem");
+ UErrorCode status = U_ZERO_ERROR;
+ sql_case_map = ucasemap_open(NULL, 0, &status);
+ if (sql_case_map == NULL)
+ panic("failed to initialize SQL subsystem");
+
assert(db != NULL);
}
diff --git a/src/box/sql.h b/src/box/sql.h
index 400360f59..a3e892595 100644
--- a/src/box/sql.h
+++ b/src/box/sql.h
@@ -59,6 +59,15 @@ sql_load_schema();
struct sql *
sql_get();
+struct UCaseMap;
+
+/**
+ * Opaque service object for ICU case mapping functions is
+ * initialized for default locale. Used to perform name
+ * normalization in SQL.
+*/
+extern struct UCaseMap *sql_case_map;
+
struct Expr;
struct Parse;
struct Select;
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 838fbd21a..0668fcca6 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,7 +965,7 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
struct Expr *
sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
{
- int extra_size = 0;
+ int extra_size = 0, rc;
bool is_name = false;
if (token != NULL) {
int val;
@@ -973,14 +973,7 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
if (sql_expr_token_to_int(op, token, &val) == 0)
return sql_expr_new_int(db, val);
is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
- if (is_name) {
- extra_size = sql_normalize_name(NULL, 0, token->z,
- token->n);
- if (extra_size < 0)
- return NULL;
- } else {
- extra_size = token->n + 1;
- }
+ extra_size = token->n + 1;
}
struct Expr *e = sql_expr_new_empty(db, op, extra_size);
if (e == NULL || token == NULL || token->n == 0)
@@ -992,10 +985,16 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
memcpy(e->u.zToken, token->z, token->n);
e->u.zToken[token->n] = '\0';
sqlDequote(e->u.zToken);
- } else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
- token->n) < 0) {
- sql_expr_delete(db, e, false);
- return NULL;
+ } else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+ token->n)) > extra_size) {
+ extra_size = rc;
+ e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+ if (e == NULL)
+ return NULL;
+ e->u.zToken = (char *) &e[1];
+ if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+ token->n) > extra_size)
+ unreachable();
}
return e;
}
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index c37b8d429..357335d23 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -856,15 +856,8 @@ idlist(A) ::= nm(Y). {
** that created the expression.
*/
static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
- int name_sz = 0;
struct Expr *p = NULL;
- if (op != TK_VARIABLE) {
- name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
- if (name_sz < 0)
- goto tarantool_error;
- } else {
- name_sz = t.n + 1;
- }
+ int name_sz = t.n + 1;
p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
if( p ){
memset(p, 0, sizeof(Expr));
@@ -899,8 +892,17 @@ idlist(A) ::= nm(Y). {
p->iAgg = -1;
p->u.zToken = (char*)&p[1];
if (op != TK_VARIABLE) {
- if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
- goto tarantool_error;
+ int rc;
+ if ((rc = sql_normalize_name(p->u.zToken, name_sz, t.z,
+ t.n)) > name_sz) {
+ name_sz = rc;
+ p = sqlDbReallocOrFree(pParse->db, p, sizeof(Expr) + name_sz);
+ if (p == NULL)
+ goto tarantool_error;
+ p->u.zToken = (char*) &p[1];
+ if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+ unreachable();
+ }
} else {
memcpy(p->u.zToken, t.z, t.n);
p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 4a2197f96..1374f364c 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3207,15 +3207,10 @@ void sqlDequote(char *);
* @param dst A buffer for the result string. The result will be
* 0-terminated if the buffer is large enough. The contents
* is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- * is 0, then dest may be NULL and the function will only
- * return the length of the result without writing any of
- * the result string
+ * @param dst_size The size of the buffer (number of bytes).
* @param src The original string.
* @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- * success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written.
*/
int
sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index c94880086..add792c63 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -279,10 +279,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
static struct TriggerStep *
sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
{
- int name_size =
- sql_normalize_name(NULL, 0, target_name->z, target_name->n);
- if (name_size < 0)
- return NULL;
+ int name_size = target_name->n + 1;
int size = sizeof(struct TriggerStep) + name_size;
struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
if (trigger_step == NULL) {
@@ -290,10 +287,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
return NULL;
}
char *z = (char *)&trigger_step[1];
- if (sql_normalize_name(z, name_size, target_name->z,
- target_name->n) < 0) {
- sqlDbFree(db, trigger_step);
- return NULL;
+ int rc;
+ if ((rc = sql_normalize_name(z, name_size, target_name->z,
+ target_name->n)) > name_size) {
+ name_size = rc;
+ trigger_step = sqlDbReallocOrFree(db, trigger_step,
+ sizeof(*trigger_step) +
+ name_size);
+ if (trigger_step == NULL)
+ return NULL;
+ z = (char *) &trigger_step[1];
+ if (sql_normalize_name(z, name_size, target_name->z,
+ target_name->n) > name_size)
+ unreachable();
}
trigger_step->zTarget = z;
trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index e9553b3a4..b365966d5 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -259,67 +259,67 @@ int
sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
{
assert(src != NULL);
+ assert(dst != NULL && dst_size > 0);
if (sqlIsquote(src[0])){
- if (dst_size == 0)
- return src_len + 1;
memcpy(dst, src, src_len);
dst[src_len] = '\0';
sqlDequote(dst);
return src_len + 1;
}
UErrorCode status = U_ZERO_ERROR;
- ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto error;
- });
- UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
- if (case_map == NULL)
- goto error;
- int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
- &status);
- ucasemap_close(case_map);
- assert(U_SUCCESS(status) ||
- (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+ assert(sql_case_map != NULL);
+ int len = ucasemap_utf8ToUpper(sql_case_map, dst, dst_size, src,
+ src_len, &status);
+ assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
return len + 1;
-error:
- diag_set(CollationError,
- "string conversion to the uppercase failed: %s",
- u_errorName(status));
- return -1;
}
char *
sql_normalized_name_db_new(struct sql *db, const char *name, int len)
{
- int size = sql_normalize_name(NULL, 0, name, len);
- if (size < 0)
- return NULL;
+ int size = len + 1, rc;
char *res = sqlDbMallocRawNN(db, size);
- if (res == NULL) {
- diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
- return NULL;
- }
- if (sql_normalize_name(res, size, name, len) < 0) {
- sqlDbFree(db, res);
- return NULL;
- }
+ if (res == NULL)
+ goto oom_error;
+ if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+ return res;
+
+ size = rc;
+ res = sqlDbReallocOrFree(db, res, size);
+ if (res == NULL)
+ goto oom_error;
+ if (sql_normalize_name(res, size, name, len) > size)
+ unreachable();
return res;
+
+oom_error:
+ diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+ return NULL;
}
char *
sql_normalized_name_region_new(struct region *r, const char *name, int len)
{
- int size = sql_normalize_name(NULL, 0, name, len);
- if (size < 0)
- return NULL;
- char *res = (char *) region_alloc(r, size);
- if (res == NULL) {
- diag_set(OutOfMemory, size, "region_alloc", "res");
- return NULL;
- }
- if (sql_normalize_name(res, size, name, len) < 0)
- return NULL;
+ int size = len + 1, rc;
+ size_t region_svp = region_used(r);
+ char *res = region_alloc(r, size);
+ if (res == NULL)
+ goto oom_error;
+ if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+ return res;
+
+ size = rc;
+ region_truncate(r, region_svp);
+ res = region_alloc(r, size);
+ if (res == NULL)
+ goto oom_error;
+ if (sql_normalize_name(res, size, name, len) > size)
+ unreachable();
return res;
+
+oom_error:
+ diag_set(OutOfMemory, size, "region", "res");
+ return NULL;
}
/*
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index c823d3597..41783cc74 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,7 +125,6 @@ struct errinj {
_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
- _(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \
ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index b657234e1..8e76b21b3 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -22,8 +22,6 @@ errinj.info()
state: false
ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
state: 0
- ERRINJ_SQL_NAME_NORMALIZATION:
- state: false
ERRINJ_VY_SCHED_TIMEOUT:
state: 0
ERRINJ_WAL_WRITE_PARTIAL:
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index c974ab714..a1e7cc4a3 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -388,21 +388,3 @@ errinj.set("ERRINJ_WAL_DELAY", false)
---
- ok
...
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
----
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
----
-- ok
-...
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
----
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
----
-- ok
-...
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index f9e7a3c49..d8833feb4 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -139,11 +139,3 @@ box.sql.execute("INSERT INTO t VALUES (2);")
box.sql.execute("UPDATE t SET id = 2;")
-- Finish drop space.
errinj.set("ERRINJ_WAL_DELAY", false)
-
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
--
2.21.0
More information about the Tarantool-patches
mailing list