[tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Apr 4 17:07:13 MSK 2019


Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time DDL performance felt by 15%.

This patch should eliminate some of the negative effects of using
ICU for name normalization.

Thanks @avtikhon for a bechmark

Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
https://github.com/tarantool/msgpuck/tree/kshch/gh-3931-perfomance-fixup
https://github.com/tarantool/tarantool/issues/3931

 src/box/sql.c            |  8 ++++
 src/box/sql.h            |  9 +++++
 src/box/sql/expr.c       | 25 ++++++-------
 src/box/sql/parse.y      | 22 ++++++-----
 src/box/sql/sqlInt.h     |  9 +----
 src/box/sql/trigger.c    | 22 +++++++----
 src/box/sql/util.c       | 80 ++++++++++++++++++++--------------------
 src/lib/core/errinj.h    |  1 -
 test/box/errinj.result   |  2 -
 test/sql/errinj.result   | 18 ---------
 test/sql/errinj.test.lua |  8 ----
 11 files changed, 97 insertions(+), 107 deletions(-)

diff --git a/src/box/sql.c b/src/box/sql.c
index 4fac020b0..872bfc4f4 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -54,6 +54,7 @@
 #include "iproto_constants.h"
 #include "fk_constraint.h"
 #include "mpstream.h"
+#include <unicode/ucasemap.h>
 
 static sql *db = NULL;
 
@@ -64,6 +65,8 @@ static const uint32_t default_sql_flags = SQL_ShortColNames
 					  | SQL_AutoIndex
 					  | SQL_RecTriggers;
 
+UCaseMap *sql_case_map;
+
 void
 sql_init()
 {
@@ -74,6 +77,11 @@ sql_init()
 	if (sql_init_db(&db) != SQL_OK)
 		panic("failed to initialize SQL subsystem");
 
+	UErrorCode status = U_ZERO_ERROR;
+	sql_case_map = ucasemap_open(NULL, 0, &status);
+	if (sql_case_map == NULL)
+		panic("failed to initialize SQL subsystem");
+
 	assert(db != NULL);
 }
 
diff --git a/src/box/sql.h b/src/box/sql.h
index 400360f59..a3e892595 100644
--- a/src/box/sql.h
+++ b/src/box/sql.h
@@ -59,6 +59,15 @@ sql_load_schema();
 struct sql *
 sql_get();
 
+struct UCaseMap;
+
+/**
+ * Opaque service object for ICU case mapping functions is
+ * initialized for default locale. Used to perform name
+ * normalization in SQL.
+*/
+extern struct UCaseMap *sql_case_map;
+
 struct Expr;
 struct Parse;
 struct Select;
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 838fbd21a..0668fcca6 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,7 +965,7 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
 struct Expr *
 sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 {
-	int extra_size = 0;
+	int extra_size = 0, rc;
 	bool is_name = false;
 	if (token != NULL) {
 		int val;
@@ -973,14 +973,7 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 		if (sql_expr_token_to_int(op, token, &val) == 0)
 			return sql_expr_new_int(db, val);
 		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
-		if (is_name) {
-			extra_size = sql_normalize_name(NULL, 0, token->z,
-							token->n);
-			if (extra_size < 0)
-				return NULL;
-		} else {
-			extra_size = token->n + 1;
-		}
+		extra_size = token->n + 1;
 	}
 	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
 	if (e == NULL || token == NULL || token->n == 0)
@@ -992,10 +985,16 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 		memcpy(e->u.zToken, token->z, token->n);
 		e->u.zToken[token->n] = '\0';
 		sqlDequote(e->u.zToken);
-	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
-				      token->n) < 0) {
-		sql_expr_delete(db, e, false);
-		return NULL;
+	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+					    token->n)) > extra_size) {
+		extra_size = rc;
+		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+		if (e == NULL)
+			return NULL;
+		e->u.zToken = (char *) &e[1];
+		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+				       token->n) > extra_size)
+			unreachable();
 	}
 	return e;
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index c37b8d429..357335d23 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -856,15 +856,8 @@ idlist(A) ::= nm(Y). {
   ** that created the expression.
   */
   static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
-    int name_sz = 0;
     struct Expr *p = NULL;
-    if (op != TK_VARIABLE) {
-      name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
-      if (name_sz < 0)
-        goto tarantool_error;
-    } else {
-      name_sz = t.n + 1;
-    }
+    int name_sz = t.n + 1;
     p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
     if( p ){
       memset(p, 0, sizeof(Expr));
@@ -899,8 +892,17 @@ idlist(A) ::= nm(Y). {
       p->iAgg = -1;
       p->u.zToken = (char*)&p[1];
       if (op != TK_VARIABLE) {
-        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
-          goto tarantool_error;
+        int rc;
+        if ((rc = sql_normalize_name(p->u.zToken, name_sz, t.z,
+                                     t.n)) > name_sz) {
+          name_sz = rc;
+          p = sqlDbReallocOrFree(pParse->db, p, sizeof(Expr) + name_sz);
+          if (p == NULL)
+            goto tarantool_error;
+          p->u.zToken = (char*) &p[1];
+          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+              unreachable();
+        }
       } else {
         memcpy(p->u.zToken, t.z, t.n);
         p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 4a2197f96..1374f364c 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3207,15 +3207,10 @@ void sqlDequote(char *);
  * @param dst A buffer for the result string. The result will be
  *        0-terminated if the buffer is large enough. The contents
  *        is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- *        is 0, then dest may be NULL and the function will only
- *        return the length of the result without writing any of
- *        the result string
+ * @param dst_size The size of the buffer (number of bytes).
  * @param src The original string.
  * @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- *         success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written.
  */
 int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index c94880086..add792c63 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -279,10 +279,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
 static struct TriggerStep *
 sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 {
-	int name_size =
-		sql_normalize_name(NULL, 0, target_name->z, target_name->n);
-	if (name_size < 0)
-		return NULL;
+	int name_size = target_name->n + 1;
 	int size = sizeof(struct TriggerStep) + name_size;
 	struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
 	if (trigger_step == NULL) {
@@ -290,10 +287,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 		return NULL;
 	}
 	char *z = (char *)&trigger_step[1];
-	if (sql_normalize_name(z, name_size, target_name->z,
-			       target_name->n) < 0) {
-		sqlDbFree(db, trigger_step);
-		return NULL;
+	int rc;
+	if ((rc = sql_normalize_name(z, name_size, target_name->z,
+				     target_name->n)) > name_size) {
+		name_size = rc;
+		trigger_step = sqlDbReallocOrFree(db, trigger_step,
+						  sizeof(*trigger_step) +
+						  name_size);
+		if (trigger_step == NULL)
+			return NULL;
+		z = (char *) &trigger_step[1];
+		if (sql_normalize_name(z, name_size, target_name->z,
+				       target_name->n) > name_size)
+			unreachable();
 	}
 	trigger_step->zTarget = z;
 	trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index e9553b3a4..b365966d5 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -259,67 +259,67 @@ int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
 {
 	assert(src != NULL);
+	assert(dst != NULL && dst_size > 0);
 	if (sqlIsquote(src[0])){
-		if (dst_size == 0)
-			return src_len + 1;
 		memcpy(dst, src, src_len);
 		dst[src_len] = '\0';
 		sqlDequote(dst);
 		return src_len + 1;
 	}
 	UErrorCode status = U_ZERO_ERROR;
-	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
-		status = U_MEMORY_ALLOCATION_ERROR;
-		goto error;
-	});
-	UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
-	if (case_map == NULL)
-		goto error;
-	int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
-				       &status);
-	ucasemap_close(case_map);
-	assert(U_SUCCESS(status) ||
-	       (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+	assert(sql_case_map != NULL);
+	int len = ucasemap_utf8ToUpper(sql_case_map, dst, dst_size, src,
+				       src_len, &status);
+	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
 	return len + 1;
-error:
-	diag_set(CollationError,
-		 "string conversion to the uppercase failed: %s",
-		 u_errorName(status));
-	return -1;
 }
 
 char *
 sql_normalized_name_db_new(struct sql *db, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
+	int size = len + 1, rc;
 	char *res = sqlDbMallocRawNN(db, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
-		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0) {
-		sqlDbFree(db, res);
-		return NULL;
-	}
+	if (res == NULL)
+		goto oom_error;
+	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+		return res;
+
+	size = rc;
+	res = sqlDbReallocOrFree(db, res, size);
+	if (res == NULL)
+		goto oom_error;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+	return NULL;
 }
 
 char *
 sql_normalized_name_region_new(struct region *r, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
-	char *res = (char *) region_alloc(r, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "region_alloc", "res");
-		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0)
-		return NULL;
+	int size = len + 1, rc;
+	size_t region_svp = region_used(r);
+	char *res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+		return res;
+
+	size = rc;
+	region_truncate(r, region_svp);
+	res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "region", "res");
+	return NULL;
 }
 
 /*
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index c823d3597..41783cc74 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,7 +125,6 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
-	_(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index b657234e1..8e76b21b3 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -22,8 +22,6 @@ errinj.info()
     state: false
   ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
     state: 0
-  ERRINJ_SQL_NAME_NORMALIZATION:
-    state: false
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
   ERRINJ_WAL_WRITE_PARTIAL:
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index c974ab714..a1e7cc4a3 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -388,21 +388,3 @@ errinj.set("ERRINJ_WAL_DELAY", false)
 ---
 - ok
 ...
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
----
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
----
-- ok
-...
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
----
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
----
-- ok
-...
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index f9e7a3c49..d8833feb4 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -139,11 +139,3 @@ box.sql.execute("INSERT INTO t VALUES (2);")
 box.sql.execute("UPDATE t SET id = 2;")
 -- Finish drop space.
 errinj.set("ERRINJ_WAL_DELAY", false)
-
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
-- 
2.21.0





More information about the Tarantool-patches mailing list