[tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization
Kirill Shcherbatov
kshcherbatov at tarantool.org
Thu Apr 4 21:08:26 MSK 2019
> 1. Missed diag_set.
> 2. Again.
Sorry, accidentally dropped it
> 3. That name was ok to be local for utf8.c, but now
> it is global, and 'root_map' in the whole scope of
> tarantool looks ambiguous. What it is? MessagePack map?
> Lua table map? RB-tree map? What is 'root'? I propose
>
> icu_ucase_default_map
Songs good.
==========================================
Diff:
==========================================
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 2f3c17c9a..52a86635c 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -268,9 +268,9 @@ sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
return src_len + 1;
}
UErrorCode status = U_ZERO_ERROR;
- assert(root_map != NULL);
- int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src,
- src_len, &status);
+ assert(icu_ucase_default_map != NULL);
+ int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
+ src, src_len, &status);
assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
return len + 1;
}
@@ -310,7 +310,7 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
size_t region_svp = region_used(r);
char *res = region_alloc(r, size);
if (res == NULL)
- return NULL;
+ goto oom_error;
int rc = sql_normalize_name(res, size, name, len);
if (rc <= size)
return res;
@@ -319,10 +319,14 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len)
region_truncate(r, region_svp);
res = region_alloc(r, size);
if (res == NULL)
- return NULL;
+ goto oom_error;
if (sql_normalize_name(res, size, name, len) > size)
unreachable();
return res;
+
+oom_error:
+ diag_set(OutOfMemory, size, "region_alloc", "res");
+ return NULL;
}
/* Convenient short-hand */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index 21f2489d4..495eb6d64 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -37,7 +37,7 @@
#include <unicode/ucasemap.h>
#include <trivia/config.h>
-struct UCaseMap *root_map = NULL;
+struct UCaseMap *icu_ucase_default_map = NULL;
#define mh_name _coll
struct mh_coll_key_t {
@@ -419,14 +419,14 @@ coll_init()
{
UErrorCode err = U_ZERO_ERROR;
coll_cache = mh_coll_new();
- root_map = ucasemap_open("", 0, &err);
- if (coll_cache == NULL || root_map == NULL)
+ icu_ucase_default_map = ucasemap_open("", 0, &err);
+ if (coll_cache == NULL || icu_ucase_default_map == NULL)
panic("Can not create system collations cache");
}
void
coll_free()
{
- ucasemap_close(root_map);
+ ucasemap_close(icu_ucase_default_map);
mh_coll_delete(coll_cache);
}
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 713c9646e..1f5b29a2e 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -54,7 +54,7 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
struct UCollator;
/** Default universal casemap for case transformations. */
-extern struct UCaseMap *root_map;
+extern struct UCaseMap *icu_ucase_default_map;
/**
* Collation. It has no unique features like name, id or owner.
diff --git a/src/lua/utf8.c b/src/lua/utf8.c
index 311698bda..d829b57b4 100644
--- a/src/lua/utf8.c
+++ b/src/lua/utf8.c
@@ -61,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
}
int real_bsize;
if (is_to_upper) {
- real_bsize = ucasemap_utf8ToUpper(root_map, dst,
+ real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
dst_bsize, src,
src_bsize, &err);
} else {
- real_bsize = ucasemap_utf8ToLower(root_map, dst,
+ real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
dst_bsize, src,
src_bsize, &err);
}
@@ -444,7 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
void
tarantool_lua_utf8_init(struct lua_State *L)
{
- assert(root_map != NULL);
+ assert(icu_ucase_default_map != NULL);
struct coll_def def;
memset(&def, 0, sizeof(def));
def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
==========================================
Patch:
==========================================
>From a779238e296a78830c68bdee305bb305336c5a34 Mon Sep 17 00:00:00 2001
Message-Id: <a779238e296a78830c68bdee305bb305336c5a34.1554401202.git.kshcherbatov at tarantool.org>
From: Kirill Shcherbatov <kshcherbatov at tarantool.org>
Date: Thu, 4 Apr 2019 15:53:02 +0300
Subject: [PATCH v1 1/1] sql: fix perf degradation on name normalization
Because sql_normalize_name used to be called twice - to estimate
the size of the name buffer and to process data querying the
UCaseMap object each time performance in SQL felt by 15%.
This patch should eliminate some of the negative effects of using
ICU for name normalization.
Thanks @avtikhon for a bechmark
Follow up e7558062d3559e6bcc18f91eacb88269428321dc
---
src/box/sql/expr.c | 29 +++++++--------
src/box/sql/parse.y | 21 +++++------
src/box/sql/sqlInt.h | 9 ++---
src/box/sql/trigger.c | 22 +++++++-----
src/box/sql/util.c | 77 ++++++++++++++++++++++------------------
src/lib/coll/coll.c | 8 ++++-
src/lib/coll/coll.h | 3 ++
src/lua/utf8.c | 15 ++------
test/sql/errinj.result | 9 ++++-
test/sql/errinj.test.lua | 2 ++
10 files changed, 105 insertions(+), 90 deletions(-)
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 4b98bd175..0d61be344 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token)
struct Expr *
sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
{
- int extra_size = 0;
- bool is_name = false;
+ int extra_size = 0, rc;
if (token != NULL) {
int val;
assert(token->z != NULL || token->n == 0);
if (sql_expr_token_to_int(op, token, &val) == 0)
return sql_expr_new_int(db, val);
- is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION;
- if (is_name) {
- extra_size = sql_normalize_name(NULL, 0, token->z,
- token->n);
- if (extra_size < 0)
- return NULL;
- } else {
- extra_size = token->n + 1;
- }
+ extra_size = token->n + 1;
}
struct Expr *e = sql_expr_new_empty(db, op, extra_size);
if (e == NULL || token == NULL || token->n == 0)
@@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token)
e->u.zToken = (char *) &e[1];
if (token->z[0] == '"')
e->flags |= EP_DblQuoted;
- if (! is_name) {
+ if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) {
memcpy(e->u.zToken, token->z, token->n);
e->u.zToken[token->n] = '\0';
sqlDequote(e->u.zToken);
- } else if (sql_normalize_name(e->u.zToken, extra_size, token->z,
- token->n) < 0) {
- sql_expr_delete(db, e, false);
- return NULL;
+ } else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z,
+ token->n)) > extra_size) {
+ extra_size = rc;
+ e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size);
+ if (e == NULL)
+ return NULL;
+ e->u.zToken = (char *) &e[1];
+ if (sql_normalize_name(e->u.zToken, extra_size, token->z,
+ token->n) > extra_size)
+ unreachable();
}
return e;
}
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index ed5c05436..4581ef6e7 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). {
** that created the expression.
*/
static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){
- int name_sz = 0;
struct Expr *p = NULL;
- if (op != TK_VARIABLE) {
- name_sz = sql_normalize_name(NULL, 0, t.z, t.n);
- if (name_sz < 0)
- goto tarantool_error;
- } else {
- name_sz = t.n + 1;
- }
+ int name_sz = t.n + 1;
p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz);
if( p ){
memset(p, 0, sizeof(Expr));
@@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). {
p->iAgg = -1;
p->u.zToken = (char*)&p[1];
if (op != TK_VARIABLE) {
- if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0)
- goto tarantool_error;
+ int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
+ if (rc > name_sz) {
+ name_sz = rc;
+ p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
+ if (p == NULL)
+ goto tarantool_error;
+ p->u.zToken = (char *) &p[1];
+ if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
+ unreachable();
+ }
} else {
memcpy(p->u.zToken, t.z, t.n);
p->u.zToken[t.n] = 0;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index d8dc03284..b322602dc 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3175,15 +3175,10 @@ void sqlDequote(char *);
* @param dst A buffer for the result string. The result will be
* 0-terminated if the buffer is large enough. The contents
* is undefined in case of failure.
- * @param dst_size The size of the buffer (number of bytes). If it
- * is 0, then dest may be NULL and the function will only
- * return the length of the result without writing any of
- * the result string
+ * @param dst_size The size of the buffer (number of bytes).
* @param src The original string.
* @param src_len The length of the original string.
- * @retval The count of bytes written(or need to be written) on
- * success.
- * @retval < 0 Otherwise. The diag message is set.
+ * @retval The count of bytes written (or need to be written).
*/
int
sql_normalize_name(char *dst, int dst_size, const char *src, int src_len);
diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
index 20b2c970c..14e4198a8 100644
--- a/src/box/sql/trigger.c
+++ b/src/box/sql/trigger.c
@@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select)
static struct TriggerStep *
sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
{
- int name_size =
- sql_normalize_name(NULL, 0, target_name->z, target_name->n);
- if (name_size < 0)
- return NULL;
+ int name_size = target_name->n + 1;
int size = sizeof(struct TriggerStep) + name_size;
struct TriggerStep *trigger_step = sqlDbMallocZero(db, size);
if (trigger_step == NULL) {
@@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name)
return NULL;
}
char *z = (char *)&trigger_step[1];
- if (sql_normalize_name(z, name_size, target_name->z,
- target_name->n) < 0) {
- sqlDbFree(db, trigger_step);
- return NULL;
+ int rc = sql_normalize_name(z, name_size, target_name->z,
+ target_name->n);
+ if (rc > name_size) {
+ name_size = rc;
+ trigger_step = sqlDbReallocOrFree(db, trigger_step,
+ sizeof(*trigger_step) +
+ name_size);
+ if (trigger_step == NULL)
+ return NULL;
+ z = (char *) &trigger_step[1];
+ if (sql_normalize_name(z, name_size, target_name->z,
+ target_name->n) > name_size)
+ unreachable();
}
trigger_step->zTarget = z;
trigger_step->op = op;
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index a13efa682..52a86635c 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -41,6 +41,7 @@
#if HAVE_ISNAN || SQL_HAVE_ISNAN
#include <math.h>
#endif
+#include "coll/coll.h"
#include <unicode/ucasemap.h>
#include "errinj.h"
@@ -259,67 +260,73 @@ int
sql_normalize_name(char *dst, int dst_size, const char *src, int src_len)
{
assert(src != NULL);
+ assert(dst != NULL && dst_size > 0);
if (sqlIsquote(src[0])){
- if (dst_size == 0)
- return src_len + 1;
memcpy(dst, src, src_len);
dst[src_len] = '\0';
sqlDequote(dst);
return src_len + 1;
}
UErrorCode status = U_ZERO_ERROR;
- ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
- status = U_MEMORY_ALLOCATION_ERROR;
- goto error;
- });
- UCaseMap *case_map = ucasemap_open(NULL, 0, &status);
- if (case_map == NULL)
- goto error;
- int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len,
- &status);
- ucasemap_close(case_map);
- assert(U_SUCCESS(status) ||
- (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR));
+ assert(icu_ucase_default_map != NULL);
+ int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size,
+ src, src_len, &status);
+ assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
return len + 1;
-error:
- diag_set(CollationError,
- "string conversion to the uppercase failed: %s",
- u_errorName(status));
- return -1;
}
char *
sql_normalized_name_db_new(struct sql *db, const char *name, int len)
{
- int size = sql_normalize_name(NULL, 0, name, len);
- if (size < 0)
+ int size = len + 1;
+ ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
+ diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
return NULL;
+ });
char *res = sqlDbMallocRawNN(db, size);
- if (res == NULL) {
- diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res");
+ if (res == NULL)
return NULL;
- }
- if (sql_normalize_name(res, size, name, len) < 0) {
- sqlDbFree(db, res);
+ int rc = sql_normalize_name(res, size, name, len);
+ if (rc <= size)
+ return res;
+
+ size = rc;
+ res = sqlDbReallocOrFree(db, res, size);
+ if (res == NULL)
return NULL;
- }
+ if (sql_normalize_name(res, size, name, len) > size)
+ unreachable();
return res;
}
char *
sql_normalized_name_region_new(struct region *r, const char *name, int len)
{
- int size = sql_normalize_name(NULL, 0, name, len);
- if (size < 0)
- return NULL;
- char *res = (char *) region_alloc(r, size);
- if (res == NULL) {
+ int size = len + 1;
+ ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, {
diag_set(OutOfMemory, size, "region_alloc", "res");
return NULL;
- }
- if (sql_normalize_name(res, size, name, len) < 0)
- return NULL;
+ });
+ size_t region_svp = region_used(r);
+ char *res = region_alloc(r, size);
+ if (res == NULL)
+ goto oom_error;
+ int rc = sql_normalize_name(res, size, name, len);
+ if (rc <= size)
+ return res;
+
+ size = rc;
+ region_truncate(r, region_svp);
+ res = region_alloc(r, size);
+ if (res == NULL)
+ goto oom_error;
+ if (sql_normalize_name(res, size, name, len) > size)
+ unreachable();
return res;
+
+oom_error:
+ diag_set(OutOfMemory, size, "region_alloc", "res");
+ return NULL;
}
/* Convenient short-hand */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index b83f0fdc7..495eb6d64 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -34,8 +34,11 @@
#include "diag.h"
#include "assoc.h"
#include <unicode/ucol.h>
+#include <unicode/ucasemap.h>
#include <trivia/config.h>
+struct UCaseMap *icu_ucase_default_map = NULL;
+
#define mh_name _coll
struct mh_coll_key_t {
const char *str;
@@ -414,13 +417,16 @@ coll_unref(struct coll *coll)
void
coll_init()
{
+ UErrorCode err = U_ZERO_ERROR;
coll_cache = mh_coll_new();
- if (coll_cache == NULL)
+ icu_ucase_default_map = ucasemap_open("", 0, &err);
+ if (coll_cache == NULL || icu_ucase_default_map == NULL)
panic("Can not create system collations cache");
}
void
coll_free()
{
+ ucasemap_close(icu_ucase_default_map);
mh_coll_delete(coll_cache);
}
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index c505804f6..1f5b29a2e 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
struct UCollator;
+/** Default universal casemap for case transformations. */
+extern struct UCaseMap *icu_ucase_default_map;
+
/**
* Collation. It has no unique features like name, id or owner.
* Only functional part - comparator, locale, ICU settings.
diff --git a/src/lua/utf8.c b/src/lua/utf8.c
index de026a921..d829b57b4 100644
--- a/src/lua/utf8.c
+++ b/src/lua/utf8.c
@@ -38,9 +38,6 @@
extern struct ibuf *tarantool_lua_ibuf;
-/** Default universal casemap for case transformations. */
-static UCaseMap *root_map = NULL;
-
/** Collations for cmp/casecmp functions. */
static struct coll *unicode_coll = NULL;
static struct coll *unicode_ci_coll = NULL;
@@ -64,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize,
}
int real_bsize;
if (is_to_upper) {
- real_bsize = ucasemap_utf8ToUpper(root_map, dst,
+ real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst,
dst_bsize, src,
src_bsize, &err);
} else {
- real_bsize = ucasemap_utf8ToLower(root_map, dst,
+ real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst,
dst_bsize, src,
src_bsize, &err);
}
@@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = {
void
tarantool_lua_utf8_init(struct lua_State *L)
{
- UErrorCode err = U_ZERO_ERROR;
- root_map = ucasemap_open("", 0, &err);
- if (root_map == NULL) {
- luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s",
- u_errorName(err)));
- }
+ assert(icu_ucase_default_map != NULL);
struct coll_def def;
memset(&def, 0, sizeof(def));
def.icu.strength = COLL_ICU_STRENGTH_TERTIARY;
@@ -474,7 +466,6 @@ error_coll:
void
tarantool_lua_utf8_free()
{
- ucasemap_close(root_map);
if (unicode_coll != NULL)
coll_unref(unicode_coll);
if (unicode_ci_coll != NULL)
diff --git a/test/sql/errinj.result b/test/sql/errinj.result
index dead00f7d..5a427e855 100644
--- a/test/sql/errinj.result
+++ b/test/sql/errinj.result
@@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
...
box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
---
-- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR'
+- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res
+...
+dummy_f = function(int) return 1 end
+---
+...
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
+---
+- error: Failed to allocate 9 bytes in region_alloc for res
...
errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
---
diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
index 736e66957..7f4b30baf 100644
--- a/test/sql/errinj.test.lua
+++ b/test/sql/errinj.test.lua
@@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false)
errinj = box.error.injection
errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true)
box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);")
+dummy_f = function(int) return 1 end
+box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false)
errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false)
--
2.21.0
More information about the Tarantool-patches
mailing list