From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 354132BC2B for ; Thu, 4 Apr 2019 14:08:30 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id KcgmwnkGu7Yc for ; Thu, 4 Apr 2019 14:08:30 -0400 (EDT) Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id B71272B33C for ; Thu, 4 Apr 2019 14:08:29 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization References: <09c8ef39eaf35ae7fa6825236a3b32b54d13dec5.1554386791.git.kshcherbatov@tarantool.org> <7cdb5d69-8ace-3899-872e-c97c477866e6@tarantool.org> From: Kirill Shcherbatov Message-ID: <39ef8ba0-6a10-3270-47e4-2c55c542fc91@tarantool.org> Date: Thu, 4 Apr 2019 21:08:26 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org, Vladislav Shpilevoy > 1. Missed diag_set. > 2. Again. Sorry, accidentally dropped it > 3. That name was ok to be local for utf8.c, but now > it is global, and 'root_map' in the whole scope of > tarantool looks ambiguous. What it is? MessagePack map? > Lua table map? RB-tree map? What is 'root'? I propose > > icu_ucase_default_map Songs good. ========================================== Diff: ========================================== diff --git a/src/box/sql/util.c b/src/box/sql/util.c index 2f3c17c9a..52a86635c 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -268,9 +268,9 @@ sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) return src_len + 1; } UErrorCode status = U_ZERO_ERROR; - assert(root_map != NULL); - int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src, - src_len, &status); + assert(icu_ucase_default_map != NULL); + int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size, + src, src_len, &status); assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); return len + 1; } @@ -310,7 +310,7 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len) size_t region_svp = region_used(r); char *res = region_alloc(r, size); if (res == NULL) - return NULL; + goto oom_error; int rc = sql_normalize_name(res, size, name, len); if (rc <= size) return res; @@ -319,10 +319,14 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len) region_truncate(r, region_svp); res = region_alloc(r, size); if (res == NULL) - return NULL; + goto oom_error; if (sql_normalize_name(res, size, name, len) > size) unreachable(); return res; + +oom_error: + diag_set(OutOfMemory, size, "region_alloc", "res"); + return NULL; } /* Convenient short-hand */ diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index 21f2489d4..495eb6d64 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -37,7 +37,7 @@ #include #include -struct UCaseMap *root_map = NULL; +struct UCaseMap *icu_ucase_default_map = NULL; #define mh_name _coll struct mh_coll_key_t { @@ -419,14 +419,14 @@ coll_init() { UErrorCode err = U_ZERO_ERROR; coll_cache = mh_coll_new(); - root_map = ucasemap_open("", 0, &err); - if (coll_cache == NULL || root_map == NULL) + icu_ucase_default_map = ucasemap_open("", 0, &err); + if (coll_cache == NULL || icu_ucase_default_map == NULL) panic("Can not create system collations cache"); } void coll_free() { - ucasemap_close(root_map); + ucasemap_close(icu_ucase_default_map); mh_coll_delete(coll_cache); } diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index 713c9646e..1f5b29a2e 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -54,7 +54,7 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf, struct UCollator; /** Default universal casemap for case transformations. */ -extern struct UCaseMap *root_map; +extern struct UCaseMap *icu_ucase_default_map; /** * Collation. It has no unique features like name, id or owner. diff --git a/src/lua/utf8.c b/src/lua/utf8.c index 311698bda..d829b57b4 100644 --- a/src/lua/utf8.c +++ b/src/lua/utf8.c @@ -61,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize, } int real_bsize; if (is_to_upper) { - real_bsize = ucasemap_utf8ToUpper(root_map, dst, + real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_bsize, src, src_bsize, &err); } else { - real_bsize = ucasemap_utf8ToLower(root_map, dst, + real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst, dst_bsize, src, src_bsize, &err); } @@ -444,7 +444,7 @@ static const struct luaL_Reg utf8_lib[] = { void tarantool_lua_utf8_init(struct lua_State *L) { - assert(root_map != NULL); + assert(icu_ucase_default_map != NULL); struct coll_def def; memset(&def, 0, sizeof(def)); def.icu.strength = COLL_ICU_STRENGTH_TERTIARY; ========================================== Patch: ========================================== >From a779238e296a78830c68bdee305bb305336c5a34 Mon Sep 17 00:00:00 2001 Message-Id: From: Kirill Shcherbatov Date: Thu, 4 Apr 2019 15:53:02 +0300 Subject: [PATCH v1 1/1] sql: fix perf degradation on name normalization Because sql_normalize_name used to be called twice - to estimate the size of the name buffer and to process data querying the UCaseMap object each time performance in SQL felt by 15%. This patch should eliminate some of the negative effects of using ICU for name normalization. Thanks @avtikhon for a bechmark Follow up e7558062d3559e6bcc18f91eacb88269428321dc --- src/box/sql/expr.c | 29 +++++++-------- src/box/sql/parse.y | 21 +++++------ src/box/sql/sqlInt.h | 9 ++--- src/box/sql/trigger.c | 22 +++++++----- src/box/sql/util.c | 77 ++++++++++++++++++++++------------------ src/lib/coll/coll.c | 8 ++++- src/lib/coll/coll.h | 3 ++ src/lua/utf8.c | 15 ++------ test/sql/errinj.result | 9 ++++- test/sql/errinj.test.lua | 2 ++ 10 files changed, 105 insertions(+), 90 deletions(-) diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 4b98bd175..0d61be344 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token) struct Expr * sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) { - int extra_size = 0; - bool is_name = false; + int extra_size = 0, rc; if (token != NULL) { int val; assert(token->z != NULL || token->n == 0); if (sql_expr_token_to_int(op, token, &val) == 0) return sql_expr_new_int(db, val); - is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION; - if (is_name) { - extra_size = sql_normalize_name(NULL, 0, token->z, - token->n); - if (extra_size < 0) - return NULL; - } else { - extra_size = token->n + 1; - } + extra_size = token->n + 1; } struct Expr *e = sql_expr_new_empty(db, op, extra_size); if (e == NULL || token == NULL || token->n == 0) @@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) e->u.zToken = (char *) &e[1]; if (token->z[0] == '"') e->flags |= EP_DblQuoted; - if (! is_name) { + if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) { memcpy(e->u.zToken, token->z, token->n); e->u.zToken[token->n] = '\0'; sqlDequote(e->u.zToken); - } else if (sql_normalize_name(e->u.zToken, extra_size, token->z, - token->n) < 0) { - sql_expr_delete(db, e, false); - return NULL; + } else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z, + token->n)) > extra_size) { + extra_size = rc; + e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size); + if (e == NULL) + return NULL; + e->u.zToken = (char *) &e[1]; + if (sql_normalize_name(e->u.zToken, extra_size, token->z, + token->n) > extra_size) + unreachable(); } return e; } diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index ed5c05436..4581ef6e7 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). { ** that created the expression. */ static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ - int name_sz = 0; struct Expr *p = NULL; - if (op != TK_VARIABLE) { - name_sz = sql_normalize_name(NULL, 0, t.z, t.n); - if (name_sz < 0) - goto tarantool_error; - } else { - name_sz = t.n + 1; - } + int name_sz = t.n + 1; p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz); if( p ){ memset(p, 0, sizeof(Expr)); @@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). { p->iAgg = -1; p->u.zToken = (char*)&p[1]; if (op != TK_VARIABLE) { - if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0) - goto tarantool_error; + int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n); + if (rc > name_sz) { + name_sz = rc; + p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz); + if (p == NULL) + goto tarantool_error; + p->u.zToken = (char *) &p[1]; + if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz) + unreachable(); + } } else { memcpy(p->u.zToken, t.z, t.n); p->u.zToken[t.n] = 0; diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index d8dc03284..b322602dc 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -3175,15 +3175,10 @@ void sqlDequote(char *); * @param dst A buffer for the result string. The result will be * 0-terminated if the buffer is large enough. The contents * is undefined in case of failure. - * @param dst_size The size of the buffer (number of bytes). If it - * is 0, then dest may be NULL and the function will only - * return the length of the result without writing any of - * the result string + * @param dst_size The size of the buffer (number of bytes). * @param src The original string. * @param src_len The length of the original string. - * @retval The count of bytes written(or need to be written) on - * success. - * @retval < 0 Otherwise. The diag message is set. + * @retval The count of bytes written (or need to be written). */ int sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c index 20b2c970c..14e4198a8 100644 --- a/src/box/sql/trigger.c +++ b/src/box/sql/trigger.c @@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select) static struct TriggerStep * sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) { - int name_size = - sql_normalize_name(NULL, 0, target_name->z, target_name->n); - if (name_size < 0) - return NULL; + int name_size = target_name->n + 1; int size = sizeof(struct TriggerStep) + name_size; struct TriggerStep *trigger_step = sqlDbMallocZero(db, size); if (trigger_step == NULL) { @@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) return NULL; } char *z = (char *)&trigger_step[1]; - if (sql_normalize_name(z, name_size, target_name->z, - target_name->n) < 0) { - sqlDbFree(db, trigger_step); - return NULL; + int rc = sql_normalize_name(z, name_size, target_name->z, + target_name->n); + if (rc > name_size) { + name_size = rc; + trigger_step = sqlDbReallocOrFree(db, trigger_step, + sizeof(*trigger_step) + + name_size); + if (trigger_step == NULL) + return NULL; + z = (char *) &trigger_step[1]; + if (sql_normalize_name(z, name_size, target_name->z, + target_name->n) > name_size) + unreachable(); } trigger_step->zTarget = z; trigger_step->op = op; diff --git a/src/box/sql/util.c b/src/box/sql/util.c index a13efa682..52a86635c 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -41,6 +41,7 @@ #if HAVE_ISNAN || SQL_HAVE_ISNAN #include #endif +#include "coll/coll.h" #include #include "errinj.h" @@ -259,67 +260,73 @@ int sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) { assert(src != NULL); + assert(dst != NULL && dst_size > 0); if (sqlIsquote(src[0])){ - if (dst_size == 0) - return src_len + 1; memcpy(dst, src, src_len); dst[src_len] = '\0'; sqlDequote(dst); return src_len + 1; } UErrorCode status = U_ZERO_ERROR; - ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { - status = U_MEMORY_ALLOCATION_ERROR; - goto error; - }); - UCaseMap *case_map = ucasemap_open(NULL, 0, &status); - if (case_map == NULL) - goto error; - int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, - &status); - ucasemap_close(case_map); - assert(U_SUCCESS(status) || - (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)); + assert(icu_ucase_default_map != NULL); + int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size, + src, src_len, &status); + assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); return len + 1; -error: - diag_set(CollationError, - "string conversion to the uppercase failed: %s", - u_errorName(status)); - return -1; } char * sql_normalized_name_db_new(struct sql *db, const char *name, int len) { - int size = sql_normalize_name(NULL, 0, name, len); - if (size < 0) + int size = len + 1; + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { + diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); return NULL; + }); char *res = sqlDbMallocRawNN(db, size); - if (res == NULL) { - diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); + if (res == NULL) return NULL; - } - if (sql_normalize_name(res, size, name, len) < 0) { - sqlDbFree(db, res); + int rc = sql_normalize_name(res, size, name, len); + if (rc <= size) + return res; + + size = rc; + res = sqlDbReallocOrFree(db, res, size); + if (res == NULL) return NULL; - } + if (sql_normalize_name(res, size, name, len) > size) + unreachable(); return res; } char * sql_normalized_name_region_new(struct region *r, const char *name, int len) { - int size = sql_normalize_name(NULL, 0, name, len); - if (size < 0) - return NULL; - char *res = (char *) region_alloc(r, size); - if (res == NULL) { + int size = len + 1; + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { diag_set(OutOfMemory, size, "region_alloc", "res"); return NULL; - } - if (sql_normalize_name(res, size, name, len) < 0) - return NULL; + }); + size_t region_svp = region_used(r); + char *res = region_alloc(r, size); + if (res == NULL) + goto oom_error; + int rc = sql_normalize_name(res, size, name, len); + if (rc <= size) + return res; + + size = rc; + region_truncate(r, region_svp); + res = region_alloc(r, size); + if (res == NULL) + goto oom_error; + if (sql_normalize_name(res, size, name, len) > size) + unreachable(); return res; + +oom_error: + diag_set(OutOfMemory, size, "region_alloc", "res"); + return NULL; } /* Convenient short-hand */ diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index b83f0fdc7..495eb6d64 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -34,8 +34,11 @@ #include "diag.h" #include "assoc.h" #include +#include #include +struct UCaseMap *icu_ucase_default_map = NULL; + #define mh_name _coll struct mh_coll_key_t { const char *str; @@ -414,13 +417,16 @@ coll_unref(struct coll *coll) void coll_init() { + UErrorCode err = U_ZERO_ERROR; coll_cache = mh_coll_new(); - if (coll_cache == NULL) + icu_ucase_default_map = ucasemap_open("", 0, &err); + if (coll_cache == NULL || icu_ucase_default_map == NULL) panic("Can not create system collations cache"); } void coll_free() { + ucasemap_close(icu_ucase_default_map); mh_coll_delete(coll_cache); } diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index c505804f6..1f5b29a2e 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf, struct UCollator; +/** Default universal casemap for case transformations. */ +extern struct UCaseMap *icu_ucase_default_map; + /** * Collation. It has no unique features like name, id or owner. * Only functional part - comparator, locale, ICU settings. diff --git a/src/lua/utf8.c b/src/lua/utf8.c index de026a921..d829b57b4 100644 --- a/src/lua/utf8.c +++ b/src/lua/utf8.c @@ -38,9 +38,6 @@ extern struct ibuf *tarantool_lua_ibuf; -/** Default universal casemap for case transformations. */ -static UCaseMap *root_map = NULL; - /** Collations for cmp/casecmp functions. */ static struct coll *unicode_coll = NULL; static struct coll *unicode_ci_coll = NULL; @@ -64,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize, } int real_bsize; if (is_to_upper) { - real_bsize = ucasemap_utf8ToUpper(root_map, dst, + real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_bsize, src, src_bsize, &err); } else { - real_bsize = ucasemap_utf8ToLower(root_map, dst, + real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst, dst_bsize, src, src_bsize, &err); } @@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = { void tarantool_lua_utf8_init(struct lua_State *L) { - UErrorCode err = U_ZERO_ERROR; - root_map = ucasemap_open("", 0, &err); - if (root_map == NULL) { - luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s", - u_errorName(err))); - } + assert(icu_ucase_default_map != NULL); struct coll_def def; memset(&def, 0, sizeof(def)); def.icu.strength = COLL_ICU_STRENGTH_TERTIARY; @@ -474,7 +466,6 @@ error_coll: void tarantool_lua_utf8_free() { - ucasemap_close(root_map); if (unicode_coll != NULL) coll_unref(unicode_coll); if (unicode_ci_coll != NULL) diff --git a/test/sql/errinj.result b/test/sql/errinj.result index dead00f7d..5a427e855 100644 --- a/test/sql/errinj.result +++ b/test/sql/errinj.result @@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) ... box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") --- -- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR' +- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res +... +dummy_f = function(int) return 1 end +--- +... +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false) +--- +- error: Failed to allocate 9 bytes in region_alloc for res ... errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) --- diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua index 736e66957..7f4b30baf 100644 --- a/test/sql/errinj.test.lua +++ b/test/sql/errinj.test.lua @@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false) errinj = box.error.injection errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") +dummy_f = function(int) return 1 end +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false) errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) -- 2.21.0