[tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization

Tarantool development patches archive
 help / color / mirror / Atom feed

* [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization
@ 2019-04-04 14:07 Kirill Shcherbatov
  2019-04-04 15:03 ` [tarantool-patches] " Vladislav Shpilevoy
  2019-04-05 11:33 ` Kirill Yukhin
  0 siblings, 2 replies; 7+ messages in thread
From: Kirill Shcherbatov @ 2019-04-04 14:07 UTC (permalink / raw)
  To: tarantool-patches, v.shpilevoy; +Cc: Kirill Shcherbatov

Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time DDL performance felt by 15%.

This patch should eliminate some of the negative effects of using
ICU for name normalization.

Thanks @avtikhon for a bechmark

Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
https://github.com/tarantool/msgpuck/tree/kshch/gh-3931-perfomance-fixup
https://github.com/tarantool/tarantool/issues/3931

 src/box/sql.c            |  8 ++++
 src/box/sql.h            |  9 +++++
 src/box/sql/expr.c       | 25 ++++++-------
 src/box/sql/parse.y      | 22 ++++++-----
 src/box/sql/sqlInt.h     |  9 +----
 src/box/sql/trigger.c    | 22 +++++++----
 src/box/sql/util.c       | 80 ++++++++++++++++++++--------------------
 src/lib/core/errinj.h    |  1 -
 test/box/errinj.result   |  2 -
 test/sql/errinj.result   | 18 ---------
 test/sql/errinj.test.lua |  8 ----
 11 files changed, 97 insertions(+), 107 deletions(-)

diff --git a/src/box/sql.c b/src/box/sql.c
index 4fac020b0..872bfc4f4 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -54,6 +54,7 @@
 #include "iproto_constants.h"
 #include "fk_constraint.h"
 #include "mpstream.h"
+#include <unicode/ucasemap.h>
 
 static sql *db = NULL;
 
@@ -64,6 +65,8 @@ static const uint32_t default_sql_flags = SQL_ShortColNames
 					  | SQL_AutoIndex
 					  | SQL_RecTriggers;
 
+UCaseMap *sql_case_map;
+
 void
 sql_init()
 {
@@ -74,6 +77,11 @@ sql_init()
 	if (sql_init_db(&db) != SQL_OK)
 		panic("failed to initialize SQL subsystem");
 
+	UErrorCode status = U_ZERO_ERROR;
+	sql_case_map = ucasemap_open(NULL, 0, &status);
+	if (sql_case_map == NULL)
+		panic("failed to initialize SQL subsystem");
+
 	assert(db != NULL);
 }
 
diff --git a/src/box/sql.h b/src/box/sql.h
index 400360f59..a3e892595 100644
--- a/src/box/sql.h
+++ b/src/box/sql.h
@@ -59,6 +59,15 @@ sql_load_schema();
 struct sql *
 sql_get();
 
+struct UCaseMap;
+
+/**
+ * Opaque service object for ICU case mapping functions is
+ * initialized for default locale. Used to perform name
+ * normalization in SQL.
+*/
+extern struct UCaseMap *sql_case_map;
+
 struct Expr;
 struct Parse;
 struct Select;
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 838fbd21a..0668fcca6 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,7 +965,7 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
 struct Expr *
 sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 {
-	int extra_size = 0;
+	int extra_size = 0, rc;
 	bool is_name = false;
 	if (token != NULL) {
 		int val;
@@ -973,14 +973,7 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 		if (sql_expr_token_to_int(op, token, &val) == 0)
 			return sql_expr_new_int(db, val);
 		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
-		if (is_name) {
-			extra_size = sql_normalize_name(NULL, 0, token->z,
-							token->n);
-			if (extra_size < 0)
-				return NULL;
-		} else {
-			extra_size = token->n + 1;
-		}
+		extra_size = token->n + 1;
 	}
 	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
 	if (e == NULL || token == NULL || token->n == 0)
@@ -992,10 +985,16 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 		memcpy(e->u.zToken, token->z, token->n);
 		e->u.zToken[token->n] = '\0';
 		sqlDequote(e->u.zToken);
-	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
-				      token->n) < 0) {
-		sql_expr_delete(db, e, false);
-		return NULL;
+	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+					    token->n)) > extra_size) {
+		extra_size = rc;
+		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+		if (e == NULL)
+			return NULL;
+		e->u.zToken = (char *) &e[1];
+		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+				       token->n) > extra_size)
+			unreachable();
 	}
 	return e;
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index c37b8d429..357335d23 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -856,15 +856,8 @@ idlist(A) ::= nm(Y). {
   ** that created the expression.
   */
   static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
-    int name_sz = 0;
     struct Expr *p = NULL;
-    if (op != TK_VARIABLE) {
-      name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
-      if (name_sz < 0)
-        goto tarantool_error;
-    } else {
-      name_sz = t.n + 1;
-    }
+    int name_sz = t.n + 1;
     p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
     if( p ){
       memset(p, 0, sizeof(Expr));
@@ -899,8 +892,17 @@ idlist(A) ::= nm(Y). {
       p->iAgg = -1;
       p->u.zToken = (char*)&p[1];
       if (op != TK_VARIABLE) {
-        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
-          goto tarantool_error;
+        int rc;
+        if ((rc = sql_normalize_name(p->u.zToken, name_sz, t.z,
+                                     t.n)) > name_sz) {
+          name_sz = rc;
+          p = sqlDbReallocOrFree(pParse->db, p, sizeof(Expr) + name_sz);
+          if (p == NULL)
+            goto tarantool_error;
+          p->u.zToken = (char*) &p[1];
+          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+              unreachable();
+        }
       } else {
         memcpy(p->u.zToken, t.z, t.n);
         p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 4a2197f96..1374f364c 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3207,15 +3207,10 @@ void sqlDequote(char *);
  * @param dst A buffer for the result string. The result will be
  *        0-terminated if the buffer is large enough. The contents
  *        is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- *        is 0, then dest may be NULL and the function will only
- *        return the length of the result without writing any of
- *        the result string
+ * @param dst_size The size of the buffer (number of bytes).
  * @param src The original string.
  * @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- *         success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written.
  */
 int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index c94880086..add792c63 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -279,10 +279,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
 static struct TriggerStep *
 sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 {
-	int name_size =
-		sql_normalize_name(NULL, 0, target_name->z, target_name->n);
-	if (name_size < 0)
-		return NULL;
+	int name_size = target_name->n + 1;
 	int size = sizeof(struct TriggerStep) + name_size;
 	struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
 	if (trigger_step == NULL) {
@@ -290,10 +287,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 		return NULL;
 	}
 	char *z = (char *)&trigger_step[1];
-	if (sql_normalize_name(z, name_size, target_name->z,
-			       target_name->n) < 0) {
-		sqlDbFree(db, trigger_step);
-		return NULL;
+	int rc;
+	if ((rc = sql_normalize_name(z, name_size, target_name->z,
+				     target_name->n)) > name_size) {
+		name_size = rc;
+		trigger_step = sqlDbReallocOrFree(db, trigger_step,
+						  sizeof(*trigger_step) +
+						  name_size);
+		if (trigger_step == NULL)
+			return NULL;
+		z = (char *) &trigger_step[1];
+		if (sql_normalize_name(z, name_size, target_name->z,
+				       target_name->n) > name_size)
+			unreachable();
 	}
 	trigger_step->zTarget = z;
 	trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index e9553b3a4..b365966d5 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -259,67 +259,67 @@ int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
 {
 	assert(src != NULL);
+	assert(dst != NULL && dst_size > 0);
 	if (sqlIsquote(src[0])){
-		if (dst_size == 0)
-			return src_len + 1;
 		memcpy(dst, src, src_len);
 		dst[src_len] = '\0';
 		sqlDequote(dst);
 		return src_len + 1;
 	}
 	UErrorCode status = U_ZERO_ERROR;
-	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
-		status = U_MEMORY_ALLOCATION_ERROR;
-		goto error;
-	});
-	UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
-	if (case_map == NULL)
-		goto error;
-	int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
-				       &status);
-	ucasemap_close(case_map);
-	assert(U_SUCCESS(status) ||
-	       (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+	assert(sql_case_map != NULL);
+	int len = ucasemap_utf8ToUpper(sql_case_map, dst, dst_size, src,
+				       src_len, &status);
+	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
 	return len + 1;
-error:
-	diag_set(CollationError,
-		 "string conversion to the uppercase failed: %s",
-		 u_errorName(status));
-	return -1;
 }
 
 char *
 sql_normalized_name_db_new(struct sql *db, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
+	int size = len + 1, rc;
 	char *res = sqlDbMallocRawNN(db, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
-		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0) {
-		sqlDbFree(db, res);
-		return NULL;
-	}
+	if (res == NULL)
+		goto oom_error;
+	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+		return res;
+
+	size = rc;
+	res = sqlDbReallocOrFree(db, res, size);
+	if (res == NULL)
+		goto oom_error;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+	return NULL;
 }
 
 char *
 sql_normalized_name_region_new(struct region *r, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
-	char *res = (char *) region_alloc(r, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "region_alloc", "res");
-		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0)
-		return NULL;
+	int size = len + 1, rc;
+	size_t region_svp = region_used(r);
+	char *res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
+		return res;
+
+	size = rc;
+	region_truncate(r, region_svp);
+	res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "region", "res");
+	return NULL;
 }
 
 /*
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index c823d3597..41783cc74 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,7 +125,6 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
-	_(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index b657234e1..8e76b21b3 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -22,8 +22,6 @@ errinj.info()
     state: false
   ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
     state: 0
-  ERRINJ_SQL_NAME_NORMALIZATION:
-    state: false
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
   ERRINJ_WAL_WRITE_PARTIAL:
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index c974ab714..a1e7cc4a3 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -388,21 +388,3 @@ errinj.set("ERRINJ_WAL_DELAY", false)
 ---
 - ok
 ...
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
----
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
----
-- ok
-...
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
----
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
-...
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
----
-- ok
-...
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index f9e7a3c49..d8833feb4 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -139,11 +139,3 @@ box.sql.execute("INSERT INTO t VALUES (2);")
 box.sql.execute("UPDATE t SET id = 2;")
 -- Finish drop space.
 errinj.set("ERRINJ_WAL_DELAY", false)
-
---
--- gh-3931: Store regular identifiers in case-normal form
---
-errinj = box.error.injection
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
-box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
-errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
-- 
2.21.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 14:07 [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization Kirill Shcherbatov
@ 2019-04-04 15:03 ` Vladislav Shpilevoy
  2019-04-04 17:12   ` Kirill Shcherbatov
  2019-04-05 11:33 ` Kirill Yukhin
  1 sibling, 1 reply; 7+ messages in thread
From: Vladislav Shpilevoy @ 2019-04-04 15:03 UTC (permalink / raw)
  To: tarantool-patches, Kirill Shcherbatov

Hi! Thanks for the patch. See 13 comments below.

On 04/04/2019 17:07, Kirill Shcherbatov wrote:
> Because sql_normalize_name used to be called twice - to estimate
> the size of the name buffer and to process data querying the
> UCaseMap object each time DDL performance felt by 15%.

1. It is not about DDL only. Names are normalized always, on DML
and DQL as well. You just can't look up spaces and columns by
not normalized names.

> 
> This patch should eliminate some of the negative effects of using
> ICU for name normalization.
> 
> Thanks @avtikhon for a bechmark
> 
> Follow up e7558062d3559e6bcc18f91eacb88269428321dc
> ---

2. As I see, you did the patch on top of 2.1 branch, but our
policy is implement patches on top of the master, and then
cherry-pick.

> https://github.com/tarantool/msgpuck/tree/kshch/gh-3931-perfomance-fixup
> https://github.com/tarantool/tarantool/issues/3931
> 
>  src/box/sql.c            |  8 ++++
>  src/box/sql.h            |  9 +++++
>  src/box/sql/expr.c       | 25 ++++++-------
>  src/box/sql/parse.y      | 22 ++++++-----
>  src/box/sql/sqlInt.h     |  9 +----
>  src/box/sql/trigger.c    | 22 +++++++----
>  src/box/sql/util.c       | 80 ++++++++++++++++++++--------------------
>  src/lib/core/errinj.h    |  1 -
>  test/box/errinj.result   |  2 -
>  test/sql/errinj.result   | 18 ---------
>  test/sql/errinj.test.lua |  8 ----
>  11 files changed, 97 insertions(+), 107 deletions(-)
> 
> diff --git a/src/box/sql.c b/src/box/sql.c
> index 4fac020b0..872bfc4f4 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c> @@ -74,6 +77,11 @@ sql_init()
>  	if (sql_init_db(&db) != SQL_OK)
>  		panic("failed to initialize SQL subsystem");
>  
> +	UErrorCode status = U_ZERO_ERROR;
> +	sql_case_map = ucasemap_open(NULL, 0, &status);
> +	if (sql_case_map == NULL)
> +		panic("failed to initialize SQL subsystem");

3. As you can see, there are two exactly the same messages.
Please, avoid this duplication.

================================================================
@@ -71,15 +71,10 @@ void
 sql_init()
 {
 	default_flags |= default_sql_flags;
-
 	current_session()->sql_flags |= default_sql_flags;
-
-	if (sql_init_db(&db) != SQL_OK)
-		panic("failed to initialize SQL subsystem");
-
 	UErrorCode status = U_ZERO_ERROR;
 	sql_case_map = ucasemap_open(NULL, 0, &status);
-	if (sql_case_map == NULL)
+	if (sql_init_db(&db) != SQL_OK || sql_case_map == NULL)
 		panic("failed to initialize SQL subsystem");
 
 	assert(db != NULL);
================================================================

> +
>  	assert(db != NULL);
>  }
>  
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 838fbd21a..0668fcca6 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c> @@ -973,14 +973,7 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
>  		if (sql_expr_token_to_int(op, token, &val) == 0)
>  			return sql_expr_new_int(db, val);
>  		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
> -		if (is_name) {
> -			extra_size = sql_normalize_name(NULL, 0, token->z,
> -							token->n);
> -			if (extra_size < 0)
> -				return NULL;
> -		} else {
> -			extra_size = token->n + 1;
> -		}
> +		extra_size = token->n + 1;

4. is_name now is not necessary to keep as a variable. Please,
inline it in its single usage place.

>  	}
>  	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
>  	if (e == NULL || token == NULL || token->n == 0)
> @@ -992,10 +985,16 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
>  		memcpy(e->u.zToken, token->z, token->n);
>  		e->u.zToken[token->n] = '\0';
>  		sqlDequote(e->u.zToken);
> -	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
> -				      token->n) < 0) {
> -		sql_expr_delete(db, e, false);
> -		return NULL;
> +	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
> +					    token->n)) > extra_size) {
> +		extra_size = rc;
> +		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
> +		if (e == NULL)
> +			return NULL;
> +		e->u.zToken = (char *) &e[1];
> +		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
> +				       token->n) > extra_size)
> +			unreachable();
>  	}
>  	return e;
>  }
> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index c37b8d429..357335d23 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -899,8 +892,17 @@ idlist(A) ::= nm(Y). {
>        p->iAgg = -1;
>        p->u.zToken = (char*)&p[1];
>        if (op != TK_VARIABLE) {
> -        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
> -          goto tarantool_error;
> +        int rc;
> +        if ((rc = sql_normalize_name(p->u.zToken, name_sz, t.z,
> +                                     t.n)) > name_sz) {

5. Why do you declare 'rc' and on the next line set it inside 'if'?
It does not make the code shorter, not more readable. Just write
rc = sql_normalize...(); and check its result on the next line.

> +          name_sz = rc;
> +          p = sqlDbReallocOrFree(pParse->db, p, sizeof(Expr) + name_sz);

6. Either sizeof(struct Expr), or sizeof(*p). We do not use typedefs
normally, and typedef Expr will be dropped someday.

> +          if (p == NULL)
> +            goto tarantool_error;
> +          p->u.zToken = (char*) &p[1];

7. In our code we put whitespace before '*'.

> +          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
> +              unreachable();
> +        }
>        } else {
>          memcpy(p->u.zToken, t.z, t.n);
>          p->u.zToken[t.n] = 0;
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index 4a2197f96..1374f364c 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -3207,15 +3207,10 @@ void sqlDequote(char *);
>   * @param dst A buffer for the result string. The result will be
>   *        0-terminated if the buffer is large enough. The contents
>   *        is undefined in case of failure.
> - * @param dst_size The size of the buffer (number of bytes). If it
> - *        is 0, then dest may be NULL and the function will only
> - *        return the length of the result without writing any of
> - *        the result string
> + * @param dst_size The size of the buffer (number of bytes).
>   * @param src The original string.
>   * @param src_len The length of the original string.
> - * @retval The count of bytes written(or need to be written) on
> - *         success.
> - * @retval < 0 Otherwise. The diag message is set.
> + * @retval The count of bytes written.

8. What is a motivation of dropping '(or need to be written)' ? I
understand removal of 'on success', but why did you drop the text in
the braces? You still can get a value > dst_size from that function.
And since you dropped the comment on dst_size, here you need to write
a more detailed one, when and why it can return > dst_size.

>   */
>  int
>  sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
> diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
> index c94880086..add792c63 100644
> --- a/src/box/sql/trigger.c
> +++ b/src/box/sql/trigger.c
> @@ -290,10 +287,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
>  		return NULL;
>  	}
>  	char *z = (char *)&trigger_step[1];
> -	if (sql_normalize_name(z, name_size, target_name->z,
> -			       target_name->n) < 0) {
> -		sqlDbFree(db, trigger_step);
> -		return NULL;
> +	int rc;
> +	if ((rc = sql_normalize_name(z, name_size, target_name->z,
> +				     target_name->n)) > name_size) {

9. The same as in parse.y.

> +		name_size = rc;
> +		trigger_step = sqlDbReallocOrFree(db, trigger_step,
> +						  sizeof(*trigger_step) +
> +						  name_size);
> +		if (trigger_step == NULL)
> +			return NULL;
> +		z = (char *) &trigger_step[1];
> +		if (sql_normalize_name(z, name_size, target_name->z,
> +				       target_name->n) > name_size)
> +			unreachable();
>  	}
>  	trigger_step->zTarget = z;
>  	trigger_step->op = op;
> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
> index e9553b3a4..b365966d5 100644
> --- a/src/box/sql/util.c
> +++ b/src/box/sql/util.c>  
>  char *
>  sql_normalized_name_db_new(struct sql *db, const char *name, int len)
>  {
> -	int size = sql_normalize_name(NULL, 0, name, len);
> -	if (size < 0)
> -		return NULL;
> +	int size = len + 1, rc;
>  	char *res = sqlDbMallocRawNN(db, size);
> -	if (res == NULL) {
> -		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
> -		return NULL;
> -	}
> -	if (sql_normalize_name(res, size, name, len) < 0) {
> -		sqlDbFree(db, res);
> -		return NULL;
> -	}
> +	if (res == NULL)
> +		goto oom_error;

10. Funny thing, but if we looked at the history of sql malloc usages
and changes we could see that firstly it was not setting diag. Then
Mergen started setting diag right inside malloc.c. Then we started to
set diag twice in certain places in scope of our normalization patch,
because I missed Mergen's patch. Then I see this patch, and here you
set diag in half of it, but do not in the other half.

As I understand, sqlDbMallocRawNN already sets diag, and you don't need
to do it again here.

> +	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
> +		return res;
> +
> +	size = rc;
> +	res = sqlDbReallocOrFree(db, res, size);
> +	if (res == NULL)
> +		goto oom_error;
> +	if (sql_normalize_name(res, size, name, len) > size)
> +		unreachable();
>  	return res;
> +
> +oom_error:
> +	diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
> +	return NULL;
>  }
>  
>  char *
>  sql_normalized_name_region_new(struct region *r, const char *name, int len)
>  {
> -	int size = sql_normalize_name(NULL, 0, name, len);
> -	if (size < 0)
> -		return NULL;
> -	char *res = (char *) region_alloc(r, size);
> -	if (res == NULL) {
> -		diag_set(OutOfMemory, size, "region_alloc", "res");
> -		return NULL;
> -	}
> -	if (sql_normalize_name(res, size, name, len) < 0)
> -		return NULL;
> +	int size = len + 1, rc;
> +	size_t region_svp = region_used(r);
> +	char *res = region_alloc(r, size);
> +	if (res == NULL)
> +		goto oom_error;
> +	if ((rc = sql_normalize_name(res, size, name, len)) <= size)
> +		return res;
> +
> +	size = rc;
> +	region_truncate(r, region_svp);
> +	res = region_alloc(r, size);
> +	if (res == NULL)
> +		goto oom_error;
> +	if (sql_normalize_name(res, size, name, len) > size)
> +		unreachable();
>  	return res;
> +
> +oom_error:
> +	diag_set(OutOfMemory, size, "region", "res");

11. Third argument of OutOfMemory is a name of the function
failed to allocate memory. Here you call region_alloc, not region.

> +	return NULL;
>  }
>  
>  /*
> diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
> index c823d3597..41783cc74 100644
> --- a/src/lib/core/errinj.h
> +++ b/src/lib/core/errinj.h
> @@ -125,7 +125,6 @@ struct errinj {
>  	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
>  	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
> -	_(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \

12. What is a motivation of drop of that test? You still can
fail in OOM, but a bit later, in sqlDbReallocOrFree in sql_expr_new_dequoted.
The test was not about OOM specifically, but about normalization error,
and that the error goes up to user normally, without being replaced or
ignored.

>  
>  ENUM0(errinj_id, ERRINJ_LIST);
>  extern struct errinj errinjs[];

13. We already have global case_map in lua/utf8.c. I think, it is worth
moving it to lib/coll and use both in lua/utf8.c and here.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 15:03 ` [tarantool-patches] " Vladislav Shpilevoy
@ 2019-04-04 17:12   ` Kirill Shcherbatov
  2019-04-04 17:31     ` Vladislav Shpilevoy
  0 siblings, 1 reply; 7+ messages in thread
From: Kirill Shcherbatov @ 2019-04-04 17:12 UTC (permalink / raw)
  To: tarantool-patches, Vladislav Shpilevoy

Hi! Thank you for review!

> 1. It is not about DDL only. Names are normalized always, on DML
> and DQL as well. You just can't look up spaces and columns by
> not normalized names.
DDL -> SQL

> 2. As I see, you did the patch on top of 2.1 branch, but our
> policy is implement patches on top of the master, and then
> cherry-pick.
Done, based on master.

> 3. As you can see, there are two exactly the same messages.
> Please, avoid this duplication.
Non-actual with last proposal.>> +		extra_size = token->n + 1;
> 
> 4. is_name now is not necessary to keep as a variable. Please,
> inline it in its single usage place.
Got rid of.

> 5. Why do you declare 'rc' and on the next line set it inside 'if'?
> It does not make the code shorter, not more readable. Just write
> rc = sql_normalize...(); and check its result on the next line.
I like this more. Ok. Reworked here and everywhere.

> 6. Either sizeof(struct Expr), or sizeof(*p). We do not use typedefs
> normally, and typedef Expr will be dropped someday.
 sizeof(*p).

> 7. In our code we put whitespace before '*'.
Fixed.

> 8. What is a motivation of dropping '(or need to be written)' ? I
> understand removal of 'on success', but why did you drop the text in
> the braces? You still can get a value > dst_size from that function.
> And since you dropped the comment on dst_size, here you need to write
> a more detailed one, when and why it can return > dst_size.
No real motivation. Returned.

> 9. The same as in parse.y.
Ok

> 10. Funny thing, but if we looked at the history of sql malloc usages
> and changes we could see that firstly it was not setting diag. Then
> Mergen started setting diag right inside malloc.c. Then we started to
> set diag twice in certain places in scope of our normalization patch,
> because I missed Mergen's patch. Then I see this patch, and here you
> set diag in half of it, but do not in the other half.
> 
> As I understand, sqlDbMallocRawNN already sets diag, and you don't need
> to do it again here.
You are right.

> 11. Third argument of OutOfMemory is a name of the function
> failed to allocate memory. Here you call region_alloc, not region.
Vova prefer "region" in such cases if I not mistaken. I don't care. "region_alloc"

> 12. What is a motivation of drop of that test? You still can
> fail in OOM, but a bit later, in sqlDbReallocOrFree in sql_expr_new_dequoted.
> The test was not about OOM specifically, but about normalization error,
> and that the error goes up to user normally, without being replaced or
> ignored.
New ERRINJ:
 char *
 sql_normalized_name_db_new(struct sql *db, const char *name, int len)
 {
-       int size = sql_normalize_name(NULL, 0, name, len);
-       if (size < 0)
+       int size = len + 1;
+       ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
+               diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
                return NULL;
+       });


 char *
 sql_normalized_name_region_new(struct region *r, const char *name, int len)
 {
-       int size = sql_normalize_name(NULL, 0, name, len);
-       if (size < 0)
-               return NULL;
-       char *res = (char *) region_alloc(r, size);
-       if (res == NULL) {
+       int size = len + 1;
+       ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
                diag_set(OutOfMemory, size, "region_alloc", "res");
                return NULL;
-       }

+++ b/test/sql/errinj.result
@@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
 ...
 box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
 ---
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
+- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res
+...
+dummy_f = function(int) return 1 end
+---
+...
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
+---
+- error: Failed to allocate 9 bytes in region_alloc for res
 ...


> 13. We already have global case_map in lua/utf8.c. I think, it is worth
> moving it to lib/coll and use both in lua/utf8.c and here.

Looks good.

========================================================

Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time performance in SQL felt by 15%.

This patch should eliminate some of the negative effects of using
ICU for name normalization.

Thanks @avtikhon for a bechmark

Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
 src/box/sql/expr.c       | 29 ++++++++--------
 src/box/sql/parse.y      | 21 ++++++------
 src/box/sql/sqlInt.h     |  9 ++---
 src/box/sql/trigger.c    | 22 ++++++++-----
 src/box/sql/util.c       | 71 +++++++++++++++++++++-------------------
 src/lib/coll/coll.c      |  8 ++++-
 src/lib/coll/coll.h      |  3 ++
 src/lua/utf8.c           | 11 +------
 test/sql/errinj.result   |  9 ++++-
 test/sql/errinj.test.lua |  2 ++
 10 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 4b98bd175..0d61be344 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
 struct Expr *
 sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 {
-	int extra_size = 0;
-	bool is_name = false;
+	int extra_size = 0, rc;
 	if (token != NULL) {
 		int val;
 		assert(token->z != NULL || token->n == 0);
 		if (sql_expr_token_to_int(op, token, &val) == 0)
 			return sql_expr_new_int(db, val);
-		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
-		if (is_name) {
-			extra_size = sql_normalize_name(NULL, 0, token->z,
-							token->n);
-			if (extra_size < 0)
-				return NULL;
-		} else {
-			extra_size = token->n + 1;
-		}
+		extra_size = token->n + 1;
 	}
 	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
 	if (e == NULL || token == NULL || token->n == 0)
@@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 	e->u.zToken = (char *) &e[1];
 	if (token->z[0] == '"')
 		e->flags |= EP_DblQuoted;
-	if (! is_name) {
+	if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) {
 		memcpy(e->u.zToken, token->z, token->n);
 		e->u.zToken[token->n] = '\0';
 		sqlDequote(e->u.zToken);
-	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
-				      token->n) < 0) {
-		sql_expr_delete(db, e, false);
-		return NULL;
+	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+					    token->n)) > extra_size) {
+		extra_size = rc;
+		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+		if (e == NULL)
+			return NULL;
+		e->u.zToken = (char *) &e[1];
+		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+				       token->n) > extra_size)
+			unreachable();
 	}
 	return e;
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index ed5c05436..4581ef6e7 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). {
   ** that created the expression.
   */
   static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
-    int name_sz = 0;
     struct Expr *p = NULL;
-    if (op != TK_VARIABLE) {
-      name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
-      if (name_sz < 0)
-        goto tarantool_error;
-    } else {
-      name_sz = t.n + 1;
-    }
+    int name_sz = t.n + 1;
     p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
     if( p ){
       memset(p, 0, sizeof(Expr));
@@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). {
       p->iAgg = -1;
       p->u.zToken = (char*)&p[1];
       if (op != TK_VARIABLE) {
-        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
-          goto tarantool_error;
+        int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
+        if (rc > name_sz) {
+          name_sz = rc;
+          p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
+          if (p == NULL)
+            goto tarantool_error;
+          p->u.zToken = (char *) &p[1];
+          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+              unreachable();
+        }
       } else {
         memcpy(p->u.zToken, t.z, t.n);
         p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index d8dc03284..b322602dc 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3175,15 +3175,10 @@ void sqlDequote(char *);
  * @param dst A buffer for the result string. The result will be
  *        0-terminated if the buffer is large enough. The contents
  *        is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- *        is 0, then dest may be NULL and the function will only
- *        return the length of the result without writing any of
- *        the result string
+ * @param dst_size The size of the buffer (number of bytes).
  * @param src The original string.
  * @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- *         success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written (or need to be written).
  */
 int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index 20b2c970c..14e4198a8 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
 static struct TriggerStep *
 sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 {
-	int name_size =
-		sql_normalize_name(NULL, 0, target_name->z, target_name->n);
-	if (name_size < 0)
-		return NULL;
+	int name_size = target_name->n + 1;
 	int size = sizeof(struct TriggerStep) + name_size;
 	struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
 	if (trigger_step == NULL) {
@@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 		return NULL;
 	}
 	char *z = (char *)&trigger_step[1];
-	if (sql_normalize_name(z, name_size, target_name->z,
-			       target_name->n) < 0) {
-		sqlDbFree(db, trigger_step);
-		return NULL;
+	int rc = sql_normalize_name(z, name_size, target_name->z,
+				    target_name->n);
+	if (rc > name_size) {
+		name_size = rc;
+		trigger_step = sqlDbReallocOrFree(db, trigger_step,
+						  sizeof(*trigger_step) +
+						  name_size);
+		if (trigger_step == NULL)
+			return NULL;
+		z = (char *) &trigger_step[1];
+		if (sql_normalize_name(z, name_size, target_name->z,
+				       target_name->n) > name_size)
+			unreachable();
 	}
 	trigger_step->zTarget = z;
 	trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index a13efa682..2f3c17c9a 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -41,6 +41,7 @@
 #if HAVE_ISNAN || SQL_HAVE_ISNAN
 #include <math.h>
 #endif
+#include "coll/coll.h"
 #include <unicode/ucasemap.h>
 #include "errinj.h"
 
@@ -259,66 +260,68 @@ int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
 {
 	assert(src != NULL);
+	assert(dst != NULL && dst_size > 0);
 	if (sqlIsquote(src[0])){
-		if (dst_size == 0)
-			return src_len + 1;
 		memcpy(dst, src, src_len);
 		dst[src_len] = '\0';
 		sqlDequote(dst);
 		return src_len + 1;
 	}
 	UErrorCode status = U_ZERO_ERROR;
-	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
-		status = U_MEMORY_ALLOCATION_ERROR;
-		goto error;
-	});
-	UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
-	if (case_map == NULL)
-		goto error;
-	int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
-				       &status);
-	ucasemap_close(case_map);
-	assert(U_SUCCESS(status) ||
-	       (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+	assert(root_map != NULL);
+	int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src,
+				       src_len, &status);
+	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
 	return len + 1;
-error:
-	diag_set(CollationError,
-		 "string conversion to the uppercase failed: %s",
-		 u_errorName(status));
-	return -1;
 }
 
 char *
 sql_normalized_name_db_new(struct sql *db, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
+	int size = len + 1;
+	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
+		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
 		return NULL;
+	});
 	char *res = sqlDbMallocRawNN(db, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+	if (res == NULL)
 		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0) {
-		sqlDbFree(db, res);
+	int rc = sql_normalize_name(res, size, name, len);
+	if (rc <= size)
+		return res;
+
+	size = rc;
+	res = sqlDbReallocOrFree(db, res, size);
+	if (res == NULL)
 		return NULL;
-	}
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
 }
 
 char *
 sql_normalized_name_region_new(struct region *r, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
-	char *res = (char *) region_alloc(r, size);
-	if (res == NULL) {
+	int size = len + 1;
+	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
 		diag_set(OutOfMemory, size, "region_alloc", "res");
 		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0)
+	});
+	size_t region_svp = region_used(r);
+	char *res = region_alloc(r, size);
+	if (res == NULL)
+		return NULL;
+	int rc = sql_normalize_name(res, size, name, len);
+	if (rc <= size)
+		return res;
+
+	size = rc;
+	region_truncate(r, region_svp);
+	res = region_alloc(r, size);
+	if (res == NULL)
 		return NULL;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
 }
 
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index b83f0fdc7..21f2489d4 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -34,8 +34,11 @@
 #include "diag.h"
 #include "assoc.h"
 #include <unicode/ucol.h>
+#include <unicode/ucasemap.h>
 #include <trivia/config.h>
 
+struct UCaseMap *root_map = NULL;
+
 #define mh_name _coll
 struct mh_coll_key_t {
 	const char *str;
@@ -414,13 +417,16 @@ coll_unref(struct coll *coll)
 void
 coll_init()
 {
+	UErrorCode err = U_ZERO_ERROR;
 	coll_cache = mh_coll_new();
-	if (coll_cache == NULL)
+	root_map = ucasemap_open("", 0, &err);
+	if (coll_cache == NULL || root_map == NULL)
 		panic("Can not create system collations cache");
 }
 
 void
 coll_free()
 {
+	ucasemap_close(root_map);
 	mh_coll_delete(coll_cache);
 }
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index c505804f6..713c9646e 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
 
 struct UCollator;
 
+/** Default universal casemap for case transformations. */
+extern struct UCaseMap *root_map;
+
 /**
  * Collation. It has no unique features like name, id or owner.
  * Only functional part - comparator, locale, ICU settings.
diff --git a/src/lua/utf8.c b/src/lua/utf8.c
index de026a921..311698bda 100644
--- a/src/lua/utf8.c
+++ b/src/lua/utf8.c
@@ -38,9 +38,6 @@
 
 extern struct ibuf *tarantool_lua_ibuf;
 
-/** Default universal casemap for case transformations. */
-static UCaseMap *root_map = NULL;
-
 /** Collations for cmp/casecmp functions. */
 static struct coll *unicode_coll = NULL;
 static struct coll *unicode_ci_coll = NULL;
@@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
 void
 tarantool_lua_utf8_init(struct lua_State *L)
 {
-	UErrorCode err = U_ZERO_ERROR;
-	root_map = ucasemap_open("", 0, &err);
-	if (root_map == NULL) {
-		luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s",
-					 u_errorName(err)));
-	}
+	assert(root_map != NULL);
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
 	def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
@@ -474,7 +466,6 @@ error_coll:
 void
 tarantool_lua_utf8_free()
 {
-	ucasemap_close(root_map);
 	if (unicode_coll != NULL)
 		coll_unref(unicode_coll);
 	if (unicode_ci_coll != NULL)
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index dead00f7d..5a427e855 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
 ...
 box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
 ---
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
+- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res
+...
+dummy_f = function(int) return 1 end
+---
+...
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
+---
+- error: Failed to allocate 9 bytes in region_alloc for res
 ...
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
 ---
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index 736e66957..7f4b30baf 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false)
 errinj = box.error.injection
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
 box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
+dummy_f = function(int) return 1 end
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
-- 
2.21.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 17:12   ` Kirill Shcherbatov
@ 2019-04-04 17:31     ` Vladislav Shpilevoy
  2019-04-04 18:08       ` Kirill Shcherbatov
  0 siblings, 1 reply; 7+ messages in thread
From: Vladislav Shpilevoy @ 2019-04-04 17:31 UTC (permalink / raw)
  To: Kirill Shcherbatov, tarantool-patches


>> 5. Why do you declare 'rc' and on the next line set it inside 'if'?
>> It does not make the code shorter, not more readable. Just write
>> rc = sql_normalize...(); and check its result on the next line.
> I like this more. Ok. Reworked here and everywhere.

I could understand that if not the fact, that through the patch
you used both, sometimes in one function on neighbour lines.

>> 11. Third argument of OutOfMemory is a name of the function
>> failed to allocate memory. Here you call region_alloc, not region.
> Vova prefer "region" in such cases if I not mistaken. I don't care. "region_alloc"

Do not remember if Vova even once said me something like this.
But clearly remember that Kostja said to use function name,
and we do that through the code.

See 3 comments below.

> 
> Because sql_normalize_name used to be called twice - to estimate
> the size of the name buffer and to process data querying the
> UCaseMap object each time performance in SQL felt by 15%.
> 
> This patch should eliminate some of the negative effects of using
> ICU for name normalization.
> 
> Thanks @avtikhon for a bechmark
> 
> Follow up e7558062d3559e6bcc18f91eacb88269428321dc
> ---
>  src/box/sql/expr.c       | 29 ++++++++--------
>  src/box/sql/parse.y      | 21 ++++++------
>  src/box/sql/sqlInt.h     |  9 ++---
>  src/box/sql/trigger.c    | 22 ++++++++-----
>  src/box/sql/util.c       | 71 +++++++++++++++++++++-------------------
>  src/lib/coll/coll.c      |  8 ++++-
>  src/lib/coll/coll.h      |  3 ++
>  src/lua/utf8.c           | 11 +------
>  test/sql/errinj.result   |  9 ++++-
>  test/sql/errinj.test.lua |  2 ++
>  10 files changed, 98 insertions(+), 87 deletions(-)
> 
> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
> index a13efa682..2f3c17c9a 100644
> --- a/src/box/sql/util.c
> +++ b/src/box/sql/util.c
> @@ -259,66 +260,68 @@ int
>  char *
>  sql_normalized_name_region_new(struct region *r, const char *name, int len)
>  {
> -	int size = sql_normalize_name(NULL, 0, name, len);
> -	if (size < 0)
> -		return NULL;
> -	char *res = (char *) region_alloc(r, size);
> -	if (res == NULL) {
> +	int size = len + 1;
> +	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
>  		diag_set(OutOfMemory, size, "region_alloc", "res");
>  		return NULL;
> -	}
> -	if (sql_normalize_name(res, size, name, len) < 0)
> +	});
> +	size_t region_svp = region_used(r);
> +	char *res = region_alloc(r, size);
> +	if (res == NULL)
> +		return NULL;

1. Missed diag_set.

> +	int rc = sql_normalize_name(res, size, name, len);
> +	if (rc <= size)
> +		return res;
> +
> +	size = rc;
> +	region_truncate(r, region_svp);
> +	res = region_alloc(r, size);
> +	if (res == NULL)
>  		return NULL;

2. Again.

> +	if (sql_normalize_name(res, size, name, len) > size)
> +		unreachable();
>  	return res;
>  }
>  
> diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
> index b83f0fdc7..21f2489d4 100644
> --- a/src/lib/coll/coll.c
> +++ b/src/lib/coll/coll.c
> @@ -34,8 +34,11 @@
>  #include "diag.h"
>  #include "assoc.h"
>  #include <unicode/ucol.h>
> +#include <unicode/ucasemap.h>
>  #include <trivia/config.h>
>  
> +struct UCaseMap *root_map = NULL;

3. That name was ok to be local for utf8.c, but now
it is global, and 'root_map' in the whole scope of
tarantool looks ambiguous. What it is? MessagePack map?
Lua table map? RB-tree map? What is 'root'? I propose

    icu_ucase_default_map

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 17:31     ` Vladislav Shpilevoy
@ 2019-04-04 18:08       ` Kirill Shcherbatov
  2019-04-04 20:15         ` Vladislav Shpilevoy
  0 siblings, 1 reply; 7+ messages in thread
From: Kirill Shcherbatov @ 2019-04-04 18:08 UTC (permalink / raw)
  To: tarantool-patches, Vladislav Shpilevoy

> 1. Missed diag_set.
> 2. Again.
Sorry, accidentally dropped it

> 3. That name was ok to be local for utf8.c, but now
> it is global, and 'root_map' in the whole scope of
> tarantool looks ambiguous. What it is? MessagePack map?
> Lua table map? RB-tree map? What is 'root'? I propose
> 
>     icu_ucase_default_map
Songs good.

==========================================
Diff:
==========================================
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 2f3c17c9a..52a86635c 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -268,9 +268,9 @@ sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
 		return src_len + 1;
 	}
 	UErrorCode status = U_ZERO_ERROR;
-	assert(root_map != NULL);
-	int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src,
-				       src_len, &status);
+	assert(icu_ucase_default_map != NULL);
+	int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
+				       src, src_len, &status);
 	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
 	return len + 1;
 }
@@ -310,7 +310,7 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
 	size_t region_svp = region_used(r);
 	char *res = region_alloc(r, size);
 	if (res == NULL)
-		return NULL;
+		goto oom_error;
 	int rc = sql_normalize_name(res, size, name, len);
 	if (rc <= size)
 		return res;
@@ -319,10 +319,14 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
 	region_truncate(r, region_svp);
 	res = region_alloc(r, size);
 	if (res == NULL)
-		return NULL;
+		goto oom_error;
 	if (sql_normalize_name(res, size, name, len) > size)
 		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "region_alloc", "res");
+	return NULL;
 }
 
 /* Convenient short-hand */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index 21f2489d4..495eb6d64 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -37,7 +37,7 @@
 #include <unicode/ucasemap.h>
 #include <trivia/config.h>
 
-struct UCaseMap *root_map = NULL;
+struct UCaseMap *icu_ucase_default_map = NULL;
 
 #define mh_name _coll
 struct mh_coll_key_t {
@@ -419,14 +419,14 @@ coll_init()
 {
 	UErrorCode err = U_ZERO_ERROR;
 	coll_cache = mh_coll_new();
-	root_map = ucasemap_open("", 0, &err);
-	if (coll_cache == NULL || root_map == NULL)
+	icu_ucase_default_map = ucasemap_open("", 0, &err);
+	if (coll_cache == NULL || icu_ucase_default_map == NULL)
 		panic("Can not create system collations cache");
 }
 
 void
 coll_free()
 {
-	ucasemap_close(root_map);
+	ucasemap_close(icu_ucase_default_map);
 	mh_coll_delete(coll_cache);
 }
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 713c9646e..1f5b29a2e 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -54,7 +54,7 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
 struct UCollator;
 
 /** Default universal casemap for case transformations. */
-extern struct UCaseMap *root_map;
+extern struct UCaseMap *icu_ucase_default_map;
 
 /**
  * Collation. It has no unique features like name, id or owner.
diff --git a/src/lua/utf8.c b/src/lua/utf8.c
index 311698bda..d829b57b4 100644
--- a/src/lua/utf8.c
+++ b/src/lua/utf8.c
@@ -61,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
 		}
 		int real_bsize;
 		if (is_to_upper) {
-			real_bsize = ucasemap_utf8ToUpper(root_map, dst,
+			real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
 							  dst_bsize, src,
 							  src_bsize, &err);
 		} else {
-			real_bsize = ucasemap_utf8ToLower(root_map, dst,
+			real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
 							  dst_bsize, src,
 							  src_bsize, &err);
 		}
@@ -444,7 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
 void
 tarantool_lua_utf8_init(struct lua_State *L)
 {
-	assert(root_map != NULL);
+	assert(icu_ucase_default_map != NULL);
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
 	def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;

==========================================
Patch:
==========================================
From a779238e296a78830c68bdee305bb305336c5a34 Mon Sep 17 00:00:00 2001
Message-Id: <a779238e296a78830c68bdee305bb305336c5a34.1554401202.git.kshcherbatov@tarantool.org>
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Date: Thu, 4 Apr 2019 15:53:02 +0300
Subject: [PATCH v1 1/1] sql: fix perf degradation on name normalization

Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time performance in SQL felt by 15%.

This patch should eliminate some of the negative effects of using
ICU for name normalization.

Thanks @avtikhon for a bechmark

Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
 src/box/sql/expr.c       | 29 +++++++--------
 src/box/sql/parse.y      | 21 +++++------
 src/box/sql/sqlInt.h     |  9 ++---
 src/box/sql/trigger.c    | 22 +++++++-----
 src/box/sql/util.c       | 77 ++++++++++++++++++++++------------------
 src/lib/coll/coll.c      |  8 ++++-
 src/lib/coll/coll.h      |  3 ++
 src/lua/utf8.c           | 15 ++------
 test/sql/errinj.result   |  9 ++++-
 test/sql/errinj.test.lua |  2 ++
 10 files changed, 105 insertions(+), 90 deletions(-)

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 4b98bd175..0d61be344 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
 struct Expr *
 sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 {
-	int extra_size = 0;
-	bool is_name = false;
+	int extra_size = 0, rc;
 	if (token != NULL) {
 		int val;
 		assert(token->z != NULL || token->n == 0);
 		if (sql_expr_token_to_int(op, token, &val) == 0)
 			return sql_expr_new_int(db, val);
-		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
-		if (is_name) {
-			extra_size = sql_normalize_name(NULL, 0, token->z,
-							token->n);
-			if (extra_size < 0)
-				return NULL;
-		} else {
-			extra_size = token->n + 1;
-		}
+		extra_size = token->n + 1;
 	}
 	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
 	if (e == NULL || token == NULL || token->n == 0)
@@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
 	e->u.zToken = (char *) &e[1];
 	if (token->z[0] == '"')
 		e->flags |= EP_DblQuoted;
-	if (! is_name) {
+	if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) {
 		memcpy(e->u.zToken, token->z, token->n);
 		e->u.zToken[token->n] = '\0';
 		sqlDequote(e->u.zToken);
-	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
-				      token->n) < 0) {
-		sql_expr_delete(db, e, false);
-		return NULL;
+	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+					    token->n)) > extra_size) {
+		extra_size = rc;
+		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+		if (e == NULL)
+			return NULL;
+		e->u.zToken = (char *) &e[1];
+		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+				       token->n) > extra_size)
+			unreachable();
 	}
 	return e;
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index ed5c05436..4581ef6e7 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). {
   ** that created the expression.
   */
   static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
-    int name_sz = 0;
     struct Expr *p = NULL;
-    if (op != TK_VARIABLE) {
-      name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
-      if (name_sz < 0)
-        goto tarantool_error;
-    } else {
-      name_sz = t.n + 1;
-    }
+    int name_sz = t.n + 1;
     p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
     if( p ){
       memset(p, 0, sizeof(Expr));
@@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). {
       p->iAgg = -1;
       p->u.zToken = (char*)&p[1];
       if (op != TK_VARIABLE) {
-        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
-          goto tarantool_error;
+        int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
+        if (rc > name_sz) {
+          name_sz = rc;
+          p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
+          if (p == NULL)
+            goto tarantool_error;
+          p->u.zToken = (char *) &p[1];
+          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+              unreachable();
+        }
       } else {
         memcpy(p->u.zToken, t.z, t.n);
         p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index d8dc03284..b322602dc 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3175,15 +3175,10 @@ void sqlDequote(char *);
  * @param dst A buffer for the result string. The result will be
  *        0-terminated if the buffer is large enough. The contents
  *        is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- *        is 0, then dest may be NULL and the function will only
- *        return the length of the result without writing any of
- *        the result string
+ * @param dst_size The size of the buffer (number of bytes).
  * @param src The original string.
  * @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- *         success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written (or need to be written).
  */
 int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index 20b2c970c..14e4198a8 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
 static struct TriggerStep *
 sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 {
-	int name_size =
-		sql_normalize_name(NULL, 0, target_name->z, target_name->n);
-	if (name_size < 0)
-		return NULL;
+	int name_size = target_name->n + 1;
 	int size = sizeof(struct TriggerStep) + name_size;
 	struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
 	if (trigger_step == NULL) {
@@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
 		return NULL;
 	}
 	char *z = (char *)&trigger_step[1];
-	if (sql_normalize_name(z, name_size, target_name->z,
-			       target_name->n) < 0) {
-		sqlDbFree(db, trigger_step);
-		return NULL;
+	int rc = sql_normalize_name(z, name_size, target_name->z,
+				    target_name->n);
+	if (rc > name_size) {
+		name_size = rc;
+		trigger_step = sqlDbReallocOrFree(db, trigger_step,
+						  sizeof(*trigger_step) +
+						  name_size);
+		if (trigger_step == NULL)
+			return NULL;
+		z = (char *) &trigger_step[1];
+		if (sql_normalize_name(z, name_size, target_name->z,
+				       target_name->n) > name_size)
+			unreachable();
 	}
 	trigger_step->zTarget = z;
 	trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index a13efa682..52a86635c 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -41,6 +41,7 @@
 #if HAVE_ISNAN || SQL_HAVE_ISNAN
 #include <math.h>
 #endif
+#include "coll/coll.h"
 #include <unicode/ucasemap.h>
 #include "errinj.h"
 
@@ -259,67 +260,73 @@ int
 sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
 {
 	assert(src != NULL);
+	assert(dst != NULL && dst_size > 0);
 	if (sqlIsquote(src[0])){
-		if (dst_size == 0)
-			return src_len + 1;
 		memcpy(dst, src, src_len);
 		dst[src_len] = '\0';
 		sqlDequote(dst);
 		return src_len + 1;
 	}
 	UErrorCode status = U_ZERO_ERROR;
-	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
-		status = U_MEMORY_ALLOCATION_ERROR;
-		goto error;
-	});
-	UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
-	if (case_map == NULL)
-		goto error;
-	int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
-				       &status);
-	ucasemap_close(case_map);
-	assert(U_SUCCESS(status) ||
-	       (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+	assert(icu_ucase_default_map != NULL);
+	int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
+				       src, src_len, &status);
+	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
 	return len + 1;
-error:
-	diag_set(CollationError,
-		 "string conversion to the uppercase failed: %s",
-		 u_errorName(status));
-	return -1;
 }
 
 char *
 sql_normalized_name_db_new(struct sql *db, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
+	int size = len + 1;
+	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
+		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
 		return NULL;
+	});
 	char *res = sqlDbMallocRawNN(db, size);
-	if (res == NULL) {
-		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+	if (res == NULL)
 		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0) {
-		sqlDbFree(db, res);
+	int rc = sql_normalize_name(res, size, name, len);
+	if (rc <= size)
+		return res;
+
+	size = rc;
+	res = sqlDbReallocOrFree(db, res, size);
+	if (res == NULL)
 		return NULL;
-	}
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
 }
 
 char *
 sql_normalized_name_region_new(struct region *r, const char *name, int len)
 {
-	int size = sql_normalize_name(NULL, 0, name, len);
-	if (size < 0)
-		return NULL;
-	char *res = (char *) region_alloc(r, size);
-	if (res == NULL) {
+	int size = len + 1;
+	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
 		diag_set(OutOfMemory, size, "region_alloc", "res");
 		return NULL;
-	}
-	if (sql_normalize_name(res, size, name, len) < 0)
-		return NULL;
+	});
+	size_t region_svp = region_used(r);
+	char *res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	int rc = sql_normalize_name(res, size, name, len);
+	if (rc <= size)
+		return res;
+
+	size = rc;
+	region_truncate(r, region_svp);
+	res = region_alloc(r, size);
+	if (res == NULL)
+		goto oom_error;
+	if (sql_normalize_name(res, size, name, len) > size)
+		unreachable();
 	return res;
+
+oom_error:
+	diag_set(OutOfMemory, size, "region_alloc", "res");
+	return NULL;
 }
 
 /* Convenient short-hand */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index b83f0fdc7..495eb6d64 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -34,8 +34,11 @@
 #include "diag.h"
 #include "assoc.h"
 #include <unicode/ucol.h>
+#include <unicode/ucasemap.h>
 #include <trivia/config.h>
 
+struct UCaseMap *icu_ucase_default_map = NULL;
+
 #define mh_name _coll
 struct mh_coll_key_t {
 	const char *str;
@@ -414,13 +417,16 @@ coll_unref(struct coll *coll)
 void
 coll_init()
 {
+	UErrorCode err = U_ZERO_ERROR;
 	coll_cache = mh_coll_new();
-	if (coll_cache == NULL)
+	icu_ucase_default_map = ucasemap_open("", 0, &err);
+	if (coll_cache == NULL || icu_ucase_default_map == NULL)
 		panic("Can not create system collations cache");
 }
 
 void
 coll_free()
 {
+	ucasemap_close(icu_ucase_default_map);
 	mh_coll_delete(coll_cache);
 }
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index c505804f6..1f5b29a2e 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
 
 struct UCollator;
 
+/** Default universal casemap for case transformations. */
+extern struct UCaseMap *icu_ucase_default_map;
+
 /**
  * Collation. It has no unique features like name, id or owner.
  * Only functional part - comparator, locale, ICU settings.
diff --git a/src/lua/utf8.c b/src/lua/utf8.c
index de026a921..d829b57b4 100644
--- a/src/lua/utf8.c
+++ b/src/lua/utf8.c
@@ -38,9 +38,6 @@
 
 extern struct ibuf *tarantool_lua_ibuf;
 
-/** Default universal casemap for case transformations. */
-static UCaseMap *root_map = NULL;
-
 /** Collations for cmp/casecmp functions. */
 static struct coll *unicode_coll = NULL;
 static struct coll *unicode_ci_coll = NULL;
@@ -64,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
 		}
 		int real_bsize;
 		if (is_to_upper) {
-			real_bsize = ucasemap_utf8ToUpper(root_map, dst,
+			real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
 							  dst_bsize, src,
 							  src_bsize, &err);
 		} else {
-			real_bsize = ucasemap_utf8ToLower(root_map, dst,
+			real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
 							  dst_bsize, src,
 							  src_bsize, &err);
 		}
@@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
 void
 tarantool_lua_utf8_init(struct lua_State *L)
 {
-	UErrorCode err = U_ZERO_ERROR;
-	root_map = ucasemap_open("", 0, &err);
-	if (root_map == NULL) {
-		luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s",
-					 u_errorName(err)));
-	}
+	assert(icu_ucase_default_map != NULL);
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
 	def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
@@ -474,7 +466,6 @@ error_coll:
 void
 tarantool_lua_utf8_free()
 {
-	ucasemap_close(root_map);
 	if (unicode_coll != NULL)
 		coll_unref(unicode_coll);
 	if (unicode_ci_coll != NULL)
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index dead00f7d..5a427e855 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
 ...
 box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
 ---
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
+- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res
+...
+dummy_f = function(int) return 1 end
+---
+...
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
+---
+- error: Failed to allocate 9 bytes in region_alloc for res
 ...
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
 ---
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index 736e66957..7f4b30baf 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false)
 errinj = box.error.injection
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
 box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
+dummy_f = function(int) return 1 end
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
 errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
-- 
2.21.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 18:08       ` Kirill Shcherbatov
@ 2019-04-04 20:15         ` Vladislav Shpilevoy
  0 siblings, 0 replies; 7+ messages in thread
From: Vladislav Shpilevoy @ 2019-04-04 20:15 UTC (permalink / raw)
  To: Kirill Shcherbatov, tarantool-patches, Kirill Yukhin

LGTM.

On 04/04/2019 21:08, Kirill Shcherbatov wrote:
>> 1. Missed diag_set.
>> 2. Again.
> Sorry, accidentally dropped it
> 
>> 3. That name was ok to be local for utf8.c, but now
>> it is global, and 'root_map' in the whole scope of
>> tarantool looks ambiguous. What it is? MessagePack map?
>> Lua table map? RB-tree map? What is 'root'? I propose
>>
>>     icu_ucase_default_map
> Songs good.
> 
> ==========================================
> Diff:
> ==========================================
> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
> index 2f3c17c9a..52a86635c 100644
> --- a/src/box/sql/util.c
> +++ b/src/box/sql/util.c
> @@ -268,9 +268,9 @@ sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
>  		return src_len + 1;
>  	}
>  	UErrorCode status = U_ZERO_ERROR;
> -	assert(root_map != NULL);
> -	int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src,
> -				       src_len, &status);
> +	assert(icu_ucase_default_map != NULL);
> +	int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
> +				       src, src_len, &status);
>  	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
>  	return len + 1;
>  }
> @@ -310,7 +310,7 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
>  	size_t region_svp = region_used(r);
>  	char *res = region_alloc(r, size);
>  	if (res == NULL)
> -		return NULL;
> +		goto oom_error;
>  	int rc = sql_normalize_name(res, size, name, len);
>  	if (rc <= size)
>  		return res;
> @@ -319,10 +319,14 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
>  	region_truncate(r, region_svp);
>  	res = region_alloc(r, size);
>  	if (res == NULL)
> -		return NULL;
> +		goto oom_error;
>  	if (sql_normalize_name(res, size, name, len) > size)
>  		unreachable();
>  	return res;
> +
> +oom_error:
> +	diag_set(OutOfMemory, size, "region_alloc", "res");
> +	return NULL;
>  }
>  
>  /* Convenient short-hand */
> diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
> index 21f2489d4..495eb6d64 100644
> --- a/src/lib/coll/coll.c
> +++ b/src/lib/coll/coll.c
> @@ -37,7 +37,7 @@
>  #include <unicode/ucasemap.h>
>  #include <trivia/config.h>
>  
> -struct UCaseMap *root_map = NULL;
> +struct UCaseMap *icu_ucase_default_map = NULL;
>  
>  #define mh_name _coll
>  struct mh_coll_key_t {
> @@ -419,14 +419,14 @@ coll_init()
>  {
>  	UErrorCode err = U_ZERO_ERROR;
>  	coll_cache = mh_coll_new();
> -	root_map = ucasemap_open("", 0, &err);
> -	if (coll_cache == NULL || root_map == NULL)
> +	icu_ucase_default_map = ucasemap_open("", 0, &err);
> +	if (coll_cache == NULL || icu_ucase_default_map == NULL)
>  		panic("Can not create system collations cache");
>  }
>  
>  void
>  coll_free()
>  {
> -	ucasemap_close(root_map);
> +	ucasemap_close(icu_ucase_default_map);
>  	mh_coll_delete(coll_cache);
>  }
> diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
> index 713c9646e..1f5b29a2e 100644
> --- a/src/lib/coll/coll.h
> +++ b/src/lib/coll/coll.h
> @@ -54,7 +54,7 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
>  struct UCollator;
>  
>  /** Default universal casemap for case transformations. */
> -extern struct UCaseMap *root_map;
> +extern struct UCaseMap *icu_ucase_default_map;
>  
>  /**
>   * Collation. It has no unique features like name, id or owner.
> diff --git a/src/lua/utf8.c b/src/lua/utf8.c
> index 311698bda..d829b57b4 100644
> --- a/src/lua/utf8.c
> +++ b/src/lua/utf8.c
> @@ -61,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
>  		}
>  		int real_bsize;
>  		if (is_to_upper) {
> -			real_bsize = ucasemap_utf8ToUpper(root_map, dst,
> +			real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
>  							  dst_bsize, src,
>  							  src_bsize, &err);
>  		} else {
> -			real_bsize = ucasemap_utf8ToLower(root_map, dst,
> +			real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
>  							  dst_bsize, src,
>  							  src_bsize, &err);
>  		}
> @@ -444,7 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
>  void
>  tarantool_lua_utf8_init(struct lua_State *L)
>  {
> -	assert(root_map != NULL);
> +	assert(icu_ucase_default_map != NULL);
>  	struct coll_def def;
>  	memset(&def, 0, sizeof(def));
>  	def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
> 
> ==========================================
> Patch:
> ==========================================
> From a779238e296a78830c68bdee305bb305336c5a34 Mon Sep 17 00:00:00 2001
> Message-Id: <a779238e296a78830c68bdee305bb305336c5a34.1554401202.git.kshcherbatov@tarantool.org>
> From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
> Date: Thu, 4 Apr 2019 15:53:02 +0300
> Subject: [PATCH v1 1/1] sql: fix perf degradation on name normalization
> 
> Because sql_normalize_name used to be called twice - to estimate
> the size of the name buffer and to process data querying the
> UCaseMap object each time performance in SQL felt by 15%.
> 
> This patch should eliminate some of the negative effects of using
> ICU for name normalization.
> 
> Thanks @avtikhon for a bechmark
> 
> Follow up e7558062d3559e6bcc18f91eacb88269428321dc
> ---
>  src/box/sql/expr.c       | 29 +++++++--------
>  src/box/sql/parse.y      | 21 +++++------
>  src/box/sql/sqlInt.h     |  9 ++---
>  src/box/sql/trigger.c    | 22 +++++++-----
>  src/box/sql/util.c       | 77 ++++++++++++++++++++++------------------
>  src/lib/coll/coll.c      |  8 ++++-
>  src/lib/coll/coll.h      |  3 ++
>  src/lua/utf8.c           | 15 ++------
>  test/sql/errinj.result   |  9 ++++-
>  test/sql/errinj.test.lua |  2 ++
>  10 files changed, 105 insertions(+), 90 deletions(-)
> 
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 4b98bd175..0d61be344 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
>  struct Expr *
>  sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
>  {
> -	int extra_size = 0;
> -	bool is_name = false;
> +	int extra_size = 0, rc;
>  	if (token != NULL) {
>  		int val;
>  		assert(token->z != NULL || token->n == 0);
>  		if (sql_expr_token_to_int(op, token, &val) == 0)
>  			return sql_expr_new_int(db, val);
> -		is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
> -		if (is_name) {
> -			extra_size = sql_normalize_name(NULL, 0, token->z,
> -							token->n);
> -			if (extra_size < 0)
> -				return NULL;
> -		} else {
> -			extra_size = token->n + 1;
> -		}
> +		extra_size = token->n + 1;
>  	}
>  	struct Expr *e = sql_expr_new_empty(db, op, extra_size);
>  	if (e == NULL || token == NULL || token->n == 0)
> @@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
>  	e->u.zToken = (char *) &e[1];
>  	if (token->z[0] == '"')
>  		e->flags |= EP_DblQuoted;
> -	if (! is_name) {
> +	if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) {
>  		memcpy(e->u.zToken, token->z, token->n);
>  		e->u.zToken[token->n] = '\0';
>  		sqlDequote(e->u.zToken);
> -	} else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
> -				      token->n) < 0) {
> -		sql_expr_delete(db, e, false);
> -		return NULL;
> +	} else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
> +					    token->n)) > extra_size) {
> +		extra_size = rc;
> +		e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
> +		if (e == NULL)
> +			return NULL;
> +		e->u.zToken = (char *) &e[1];
> +		if (sql_normalize_name(e->u.zToken, extra_size, token->z,
> +				       token->n) > extra_size)
> +			unreachable();
>  	}
>  	return e;
>  }
> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index ed5c05436..4581ef6e7 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). {
>    ** that created the expression.
>    */
>    static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
> -    int name_sz = 0;
>      struct Expr *p = NULL;
> -    if (op != TK_VARIABLE) {
> -      name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
> -      if (name_sz < 0)
> -        goto tarantool_error;
> -    } else {
> -      name_sz = t.n + 1;
> -    }
> +    int name_sz = t.n + 1;
>      p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
>      if( p ){
>        memset(p, 0, sizeof(Expr));
> @@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). {
>        p->iAgg = -1;
>        p->u.zToken = (char*)&p[1];
>        if (op != TK_VARIABLE) {
> -        if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
> -          goto tarantool_error;
> +        int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
> +        if (rc > name_sz) {
> +          name_sz = rc;
> +          p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
> +          if (p == NULL)
> +            goto tarantool_error;
> +          p->u.zToken = (char *) &p[1];
> +          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
> +              unreachable();
> +        }
>        } else {
>          memcpy(p->u.zToken, t.z, t.n);
>          p->u.zToken[t.n] = 0;
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index d8dc03284..b322602dc 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -3175,15 +3175,10 @@ void sqlDequote(char *);
>   * @param dst A buffer for the result string. The result will be
>   *        0-terminated if the buffer is large enough. The contents
>   *        is undefined in case of failure.
> - * @param dst_size The size of the buffer (number of bytes). If it
> - *        is 0, then dest may be NULL and the function will only
> - *        return the length of the result without writing any of
> - *        the result string
> + * @param dst_size The size of the buffer (number of bytes).
>   * @param src The original string.
>   * @param src_len The length of the original string.
> - * @retval The count of bytes written(or need to be written) on
> - *         success.
> - * @retval < 0 Otherwise. The diag message is set.
> + * @retval The count of bytes written (or need to be written).
>   */
>  int
>  sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
> diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
> index 20b2c970c..14e4198a8 100644
> --- a/src/box/sql/trigger.c
> +++ b/src/box/sql/trigger.c
> @@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
>  static struct TriggerStep *
>  sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
>  {
> -	int name_size =
> -		sql_normalize_name(NULL, 0, target_name->z, target_name->n);
> -	if (name_size < 0)
> -		return NULL;
> +	int name_size = target_name->n + 1;
>  	int size = sizeof(struct TriggerStep) + name_size;
>  	struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
>  	if (trigger_step == NULL) {
> @@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
>  		return NULL;
>  	}
>  	char *z = (char *)&trigger_step[1];
> -	if (sql_normalize_name(z, name_size, target_name->z,
> -			       target_name->n) < 0) {
> -		sqlDbFree(db, trigger_step);
> -		return NULL;
> +	int rc = sql_normalize_name(z, name_size, target_name->z,
> +				    target_name->n);
> +	if (rc > name_size) {
> +		name_size = rc;
> +		trigger_step = sqlDbReallocOrFree(db, trigger_step,
> +						  sizeof(*trigger_step) +
> +						  name_size);
> +		if (trigger_step == NULL)
> +			return NULL;
> +		z = (char *) &trigger_step[1];
> +		if (sql_normalize_name(z, name_size, target_name->z,
> +				       target_name->n) > name_size)
> +			unreachable();
>  	}
>  	trigger_step->zTarget = z;
>  	trigger_step->op = op;
> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
> index a13efa682..52a86635c 100644
> --- a/src/box/sql/util.c
> +++ b/src/box/sql/util.c
> @@ -41,6 +41,7 @@
>  #if HAVE_ISNAN || SQL_HAVE_ISNAN
>  #include <math.h>
>  #endif
> +#include "coll/coll.h"
>  #include <unicode/ucasemap.h>
>  #include "errinj.h"
>  
> @@ -259,67 +260,73 @@ int
>  sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
>  {
>  	assert(src != NULL);
> +	assert(dst != NULL && dst_size > 0);
>  	if (sqlIsquote(src[0])){
> -		if (dst_size == 0)
> -			return src_len + 1;
>  		memcpy(dst, src, src_len);
>  		dst[src_len] = '\0';
>  		sqlDequote(dst);
>  		return src_len + 1;
>  	}
>  	UErrorCode status = U_ZERO_ERROR;
> -	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
> -		status = U_MEMORY_ALLOCATION_ERROR;
> -		goto error;
> -	});
> -	UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
> -	if (case_map == NULL)
> -		goto error;
> -	int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
> -				       &status);
> -	ucasemap_close(case_map);
> -	assert(U_SUCCESS(status) ||
> -	       (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
> +	assert(icu_ucase_default_map != NULL);
> +	int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
> +				       src, src_len, &status);
> +	assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
>  	return len + 1;
> -error:
> -	diag_set(CollationError,
> -		 "string conversion to the uppercase failed: %s",
> -		 u_errorName(status));
> -	return -1;
>  }
>  
>  char *
>  sql_normalized_name_db_new(struct sql *db, const char *name, int len)
>  {
> -	int size = sql_normalize_name(NULL, 0, name, len);
> -	if (size < 0)
> +	int size = len + 1;
> +	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
> +		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
>  		return NULL;
> +	});
>  	char *res = sqlDbMallocRawNN(db, size);
> -	if (res == NULL) {
> -		diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
> +	if (res == NULL)
>  		return NULL;
> -	}
> -	if (sql_normalize_name(res, size, name, len) < 0) {
> -		sqlDbFree(db, res);
> +	int rc = sql_normalize_name(res, size, name, len);
> +	if (rc <= size)
> +		return res;
> +
> +	size = rc;
> +	res = sqlDbReallocOrFree(db, res, size);
> +	if (res == NULL)
>  		return NULL;
> -	}
> +	if (sql_normalize_name(res, size, name, len) > size)
> +		unreachable();
>  	return res;
>  }
>  
>  char *
>  sql_normalized_name_region_new(struct region *r, const char *name, int len)
>  {
> -	int size = sql_normalize_name(NULL, 0, name, len);
> -	if (size < 0)
> -		return NULL;
> -	char *res = (char *) region_alloc(r, size);
> -	if (res == NULL) {
> +	int size = len + 1;
> +	ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
>  		diag_set(OutOfMemory, size, "region_alloc", "res");
>  		return NULL;
> -	}
> -	if (sql_normalize_name(res, size, name, len) < 0)
> -		return NULL;
> +	});
> +	size_t region_svp = region_used(r);
> +	char *res = region_alloc(r, size);
> +	if (res == NULL)
> +		goto oom_error;
> +	int rc = sql_normalize_name(res, size, name, len);
> +	if (rc <= size)
> +		return res;
> +
> +	size = rc;
> +	region_truncate(r, region_svp);
> +	res = region_alloc(r, size);
> +	if (res == NULL)
> +		goto oom_error;
> +	if (sql_normalize_name(res, size, name, len) > size)
> +		unreachable();
>  	return res;
> +
> +oom_error:
> +	diag_set(OutOfMemory, size, "region_alloc", "res");
> +	return NULL;
>  }
>  
>  /* Convenient short-hand */
> diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
> index b83f0fdc7..495eb6d64 100644
> --- a/src/lib/coll/coll.c
> +++ b/src/lib/coll/coll.c
> @@ -34,8 +34,11 @@
>  #include "diag.h"
>  #include "assoc.h"
>  #include <unicode/ucol.h>
> +#include <unicode/ucasemap.h>
>  #include <trivia/config.h>
>  
> +struct UCaseMap *icu_ucase_default_map = NULL;
> +
>  #define mh_name _coll
>  struct mh_coll_key_t {
>  	const char *str;
> @@ -414,13 +417,16 @@ coll_unref(struct coll *coll)
>  void
>  coll_init()
>  {
> +	UErrorCode err = U_ZERO_ERROR;
>  	coll_cache = mh_coll_new();
> -	if (coll_cache == NULL)
> +	icu_ucase_default_map = ucasemap_open("", 0, &err);
> +	if (coll_cache == NULL || icu_ucase_default_map == NULL)
>  		panic("Can not create system collations cache");
>  }
>  
>  void
>  coll_free()
>  {
> +	ucasemap_close(icu_ucase_default_map);
>  	mh_coll_delete(coll_cache);
>  }
> diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
> index c505804f6..1f5b29a2e 100644
> --- a/src/lib/coll/coll.h
> +++ b/src/lib/coll/coll.h
> @@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
>  
>  struct UCollator;
>  
> +/** Default universal casemap for case transformations. */
> +extern struct UCaseMap *icu_ucase_default_map;
> +
>  /**
>   * Collation. It has no unique features like name, id or owner.
>   * Only functional part - comparator, locale, ICU settings.
> diff --git a/src/lua/utf8.c b/src/lua/utf8.c
> index de026a921..d829b57b4 100644
> --- a/src/lua/utf8.c
> +++ b/src/lua/utf8.c
> @@ -38,9 +38,6 @@
>  
>  extern struct ibuf *tarantool_lua_ibuf;
>  
> -/** Default universal casemap for case transformations. */
> -static UCaseMap *root_map = NULL;
> -
>  /** Collations for cmp/casecmp functions. */
>  static struct coll *unicode_coll = NULL;
>  static struct coll *unicode_ci_coll = NULL;
> @@ -64,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
>  		}
>  		int real_bsize;
>  		if (is_to_upper) {
> -			real_bsize = ucasemap_utf8ToUpper(root_map, dst,
> +			real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
>  							  dst_bsize, src,
>  							  src_bsize, &err);
>  		} else {
> -			real_bsize = ucasemap_utf8ToLower(root_map, dst,
> +			real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
>  							  dst_bsize, src,
>  							  src_bsize, &err);
>  		}
> @@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
>  void
>  tarantool_lua_utf8_init(struct lua_State *L)
>  {
> -	UErrorCode err = U_ZERO_ERROR;
> -	root_map = ucasemap_open("", 0, &err);
> -	if (root_map == NULL) {
> -		luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s",
> -					 u_errorName(err)));
> -	}
> +	assert(icu_ucase_default_map != NULL);
>  	struct coll_def def;
>  	memset(&def, 0, sizeof(def));
>  	def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
> @@ -474,7 +466,6 @@ error_coll:
>  void
>  tarantool_lua_utf8_free()
>  {
> -	ucasemap_close(root_map);
>  	if (unicode_coll != NULL)
>  		coll_unref(unicode_coll);
>  	if (unicode_ci_coll != NULL)
> diff --git a/test/sql/errinj.result b/test/sql/errinj.result
> index dead00f7d..5a427e855 100644
> --- a/test/sql/errinj.result
> +++ b/test/sql/errinj.result
> @@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
>  ...
>  box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
>  ---
> -- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
> +- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res
> +...
> +dummy_f = function(int) return 1 end
> +---
> +...
> +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
> +---
> +- error: Failed to allocate 9 bytes in region_alloc for res
>  ...
>  errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
>  ---
> diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
> index 736e66957..7f4b30baf 100644
> --- a/test/sql/errinj.test.lua
> +++ b/test/sql/errinj.test.lua
> @@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false)
>  errinj = box.error.injection
>  errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
>  box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
> +dummy_f = function(int) return 1 end
> +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
>  errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
> -- 
> 2.21.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
  2019-04-04 14:07 [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization Kirill Shcherbatov
  2019-04-04 15:03 ` [tarantool-patches] " Vladislav Shpilevoy
@ 2019-04-05 11:33 ` Kirill Yukhin
  1 sibling, 0 replies; 7+ messages in thread
From: Kirill Yukhin @ 2019-04-05 11:33 UTC (permalink / raw)
  To: tarantool-patches; +Cc: v.shpilevoy, Kirill Shcherbatov

Hello,

On 04 Apr 17:07, Kirill Shcherbatov wrote:
> Because sql_normalize_name used to be called twice - to estimate
> the size of the name buffer and to process data querying the
> UCaseMap object each time DDL performance felt by 15%.
> 
> This patch should eliminate some of the negative effects of using
> ICU for name normalization.
> 
> Thanks @avtikhon for a bechmark
> 
> Follow up e7558062d3559e6bcc18f91eacb88269428321dc

I've checked yout patch into master and 2.1 branch.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-04-05 11:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-04 14:07 [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization Kirill Shcherbatov
2019-04-04 15:03 ` [tarantool-patches] " Vladislav Shpilevoy
2019-04-04 17:12   ` Kirill Shcherbatov
2019-04-04 17:31     ` Vladislav Shpilevoy
2019-04-04 18:08       ` Kirill Shcherbatov
2019-04-04 20:15         ` Vladislav Shpilevoy
2019-04-05 11:33 ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox