From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id CA0782AD7D for ; Thu, 4 Apr 2019 16:15:36 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id fRy7M5aKQTTj for ; Thu, 4 Apr 2019 16:15:36 -0400 (EDT) Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id CFE3F2B29F for ; Thu, 4 Apr 2019 16:15:35 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v1 1/1] sql: fix perf degradation on name normalization References: <09c8ef39eaf35ae7fa6825236a3b32b54d13dec5.1554386791.git.kshcherbatov@tarantool.org> <7cdb5d69-8ace-3899-872e-c97c477866e6@tarantool.org> <39ef8ba0-6a10-3270-47e4-2c55c542fc91@tarantool.org> From: Vladislav Shpilevoy Message-ID: Date: Thu, 4 Apr 2019 23:15:32 +0300 MIME-Version: 1.0 In-Reply-To: <39ef8ba0-6a10-3270-47e4-2c55c542fc91@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: Kirill Shcherbatov , tarantool-patches@freelists.org, Kirill Yukhin LGTM. On 04/04/2019 21:08, Kirill Shcherbatov wrote: >> 1. Missed diag_set. >> 2. Again. > Sorry, accidentally dropped it > >> 3. That name was ok to be local for utf8.c, but now >> it is global, and 'root_map' in the whole scope of >> tarantool looks ambiguous. What it is? MessagePack map? >> Lua table map? RB-tree map? What is 'root'? I propose >> >> icu_ucase_default_map > Songs good. > > ========================================== > Diff: > ========================================== > diff --git a/src/box/sql/util.c b/src/box/sql/util.c > index 2f3c17c9a..52a86635c 100644 > --- a/src/box/sql/util.c > +++ b/src/box/sql/util.c > @@ -268,9 +268,9 @@ sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) > return src_len + 1; > } > UErrorCode status = U_ZERO_ERROR; > - assert(root_map != NULL); > - int len = ucasemap_utf8ToUpper(root_map, dst, dst_size, src, > - src_len, &status); > + assert(icu_ucase_default_map != NULL); > + int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size, > + src, src_len, &status); > assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); > return len + 1; > } > @@ -310,7 +310,7 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len) > size_t region_svp = region_used(r); > char *res = region_alloc(r, size); > if (res == NULL) > - return NULL; > + goto oom_error; > int rc = sql_normalize_name(res, size, name, len); > if (rc <= size) > return res; > @@ -319,10 +319,14 @@ sql_normalized_name_region_new(struct region *r, const char *name, int len) > region_truncate(r, region_svp); > res = region_alloc(r, size); > if (res == NULL) > - return NULL; > + goto oom_error; > if (sql_normalize_name(res, size, name, len) > size) > unreachable(); > return res; > + > +oom_error: > + diag_set(OutOfMemory, size, "region_alloc", "res"); > + return NULL; > } > > /* Convenient short-hand */ > diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c > index 21f2489d4..495eb6d64 100644 > --- a/src/lib/coll/coll.c > +++ b/src/lib/coll/coll.c > @@ -37,7 +37,7 @@ > #include > #include > > -struct UCaseMap *root_map = NULL; > +struct UCaseMap *icu_ucase_default_map = NULL; > > #define mh_name _coll > struct mh_coll_key_t { > @@ -419,14 +419,14 @@ coll_init() > { > UErrorCode err = U_ZERO_ERROR; > coll_cache = mh_coll_new(); > - root_map = ucasemap_open("", 0, &err); > - if (coll_cache == NULL || root_map == NULL) > + icu_ucase_default_map = ucasemap_open("", 0, &err); > + if (coll_cache == NULL || icu_ucase_default_map == NULL) > panic("Can not create system collations cache"); > } > > void > coll_free() > { > - ucasemap_close(root_map); > + ucasemap_close(icu_ucase_default_map); > mh_coll_delete(coll_cache); > } > diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h > index 713c9646e..1f5b29a2e 100644 > --- a/src/lib/coll/coll.h > +++ b/src/lib/coll/coll.h > @@ -54,7 +54,7 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf, > struct UCollator; > > /** Default universal casemap for case transformations. */ > -extern struct UCaseMap *root_map; > +extern struct UCaseMap *icu_ucase_default_map; > > /** > * Collation. It has no unique features like name, id or owner. > diff --git a/src/lua/utf8.c b/src/lua/utf8.c > index 311698bda..d829b57b4 100644 > --- a/src/lua/utf8.c > +++ b/src/lua/utf8.c > @@ -61,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize, > } > int real_bsize; > if (is_to_upper) { > - real_bsize = ucasemap_utf8ToUpper(root_map, dst, > + real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, > dst_bsize, src, > src_bsize, &err); > } else { > - real_bsize = ucasemap_utf8ToLower(root_map, dst, > + real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst, > dst_bsize, src, > src_bsize, &err); > } > @@ -444,7 +444,7 @@ static const struct luaL_Reg utf8_lib[] = { > void > tarantool_lua_utf8_init(struct lua_State *L) > { > - assert(root_map != NULL); > + assert(icu_ucase_default_map != NULL); > struct coll_def def; > memset(&def, 0, sizeof(def)); > def.icu.strength = COLL_ICU_STRENGTH_TERTIARY; > > ========================================== > Patch: > ========================================== > From a779238e296a78830c68bdee305bb305336c5a34 Mon Sep 17 00:00:00 2001 > Message-Id: > From: Kirill Shcherbatov > Date: Thu, 4 Apr 2019 15:53:02 +0300 > Subject: [PATCH v1 1/1] sql: fix perf degradation on name normalization > > Because sql_normalize_name used to be called twice - to estimate > the size of the name buffer and to process data querying the > UCaseMap object each time performance in SQL felt by 15%. > > This patch should eliminate some of the negative effects of using > ICU for name normalization. > > Thanks @avtikhon for a bechmark > > Follow up e7558062d3559e6bcc18f91eacb88269428321dc > --- > src/box/sql/expr.c | 29 +++++++-------- > src/box/sql/parse.y | 21 +++++------ > src/box/sql/sqlInt.h | 9 ++--- > src/box/sql/trigger.c | 22 +++++++----- > src/box/sql/util.c | 77 ++++++++++++++++++++++------------------ > src/lib/coll/coll.c | 8 ++++- > src/lib/coll/coll.h | 3 ++ > src/lua/utf8.c | 15 ++------ > test/sql/errinj.result | 9 ++++- > test/sql/errinj.test.lua | 2 ++ > 10 files changed, 105 insertions(+), 90 deletions(-) > > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index 4b98bd175..0d61be344 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c > @@ -965,22 +965,13 @@ sql_expr_new(struct sql *db, int op, const struct Token *token) > struct Expr * > sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) > { > - int extra_size = 0; > - bool is_name = false; > + int extra_size = 0, rc; > if (token != NULL) { > int val; > assert(token->z != NULL || token->n == 0); > if (sql_expr_token_to_int(op, token, &val) == 0) > return sql_expr_new_int(db, val); > - is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION; > - if (is_name) { > - extra_size = sql_normalize_name(NULL, 0, token->z, > - token->n); > - if (extra_size < 0) > - return NULL; > - } else { > - extra_size = token->n + 1; > - } > + extra_size = token->n + 1; > } > struct Expr *e = sql_expr_new_empty(db, op, extra_size); > if (e == NULL || token == NULL || token->n == 0) > @@ -988,14 +979,20 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) > e->u.zToken = (char *) &e[1]; > if (token->z[0] == '"') > e->flags |= EP_DblQuoted; > - if (! is_name) { > + if (op != TK_ID && op != TK_COLLATE && op != TK_FUNCTION) { > memcpy(e->u.zToken, token->z, token->n); > e->u.zToken[token->n] = '\0'; > sqlDequote(e->u.zToken); > - } else if (sql_normalize_name(e->u.zToken, extra_size, token->z, > - token->n) < 0) { > - sql_expr_delete(db, e, false); > - return NULL; > + } else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z, > + token->n)) > extra_size) { > + extra_size = rc; > + e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size); > + if (e == NULL) > + return NULL; > + e->u.zToken = (char *) &e[1]; > + if (sql_normalize_name(e->u.zToken, extra_size, token->z, > + token->n) > extra_size) > + unreachable(); > } > return e; > } > diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y > index ed5c05436..4581ef6e7 100644 > --- a/src/box/sql/parse.y > +++ b/src/box/sql/parse.y > @@ -893,15 +893,8 @@ idlist(A) ::= nm(Y). { > ** that created the expression. > */ > static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ > - int name_sz = 0; > struct Expr *p = NULL; > - if (op != TK_VARIABLE) { > - name_sz = sql_normalize_name(NULL, 0, t.z, t.n); > - if (name_sz < 0) > - goto tarantool_error; > - } else { > - name_sz = t.n + 1; > - } > + int name_sz = t.n + 1; > p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz); > if( p ){ > memset(p, 0, sizeof(Expr)); > @@ -936,8 +929,16 @@ idlist(A) ::= nm(Y). { > p->iAgg = -1; > p->u.zToken = (char*)&p[1]; > if (op != TK_VARIABLE) { > - if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0) > - goto tarantool_error; > + int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n); > + if (rc > name_sz) { > + name_sz = rc; > + p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz); > + if (p == NULL) > + goto tarantool_error; > + p->u.zToken = (char *) &p[1]; > + if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz) > + unreachable(); > + } > } else { > memcpy(p->u.zToken, t.z, t.n); > p->u.zToken[t.n] = 0; > diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h > index d8dc03284..b322602dc 100644 > --- a/src/box/sql/sqlInt.h > +++ b/src/box/sql/sqlInt.h > @@ -3175,15 +3175,10 @@ void sqlDequote(char *); > * @param dst A buffer for the result string. The result will be > * 0-terminated if the buffer is large enough. The contents > * is undefined in case of failure. > - * @param dst_size The size of the buffer (number of bytes). If it > - * is 0, then dest may be NULL and the function will only > - * return the length of the result without writing any of > - * the result string > + * @param dst_size The size of the buffer (number of bytes). > * @param src The original string. > * @param src_len The length of the original string. > - * @retval The count of bytes written(or need to be written) on > - * success. > - * @retval < 0 Otherwise. The diag message is set. > + * @retval The count of bytes written (or need to be written). > */ > int > sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); > diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c > index 20b2c970c..14e4198a8 100644 > --- a/src/box/sql/trigger.c > +++ b/src/box/sql/trigger.c > @@ -278,10 +278,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select) > static struct TriggerStep * > sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) > { > - int name_size = > - sql_normalize_name(NULL, 0, target_name->z, target_name->n); > - if (name_size < 0) > - return NULL; > + int name_size = target_name->n + 1; > int size = sizeof(struct TriggerStep) + name_size; > struct TriggerStep *trigger_step = sqlDbMallocZero(db, size); > if (trigger_step == NULL) { > @@ -289,10 +286,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) > return NULL; > } > char *z = (char *)&trigger_step[1]; > - if (sql_normalize_name(z, name_size, target_name->z, > - target_name->n) < 0) { > - sqlDbFree(db, trigger_step); > - return NULL; > + int rc = sql_normalize_name(z, name_size, target_name->z, > + target_name->n); > + if (rc > name_size) { > + name_size = rc; > + trigger_step = sqlDbReallocOrFree(db, trigger_step, > + sizeof(*trigger_step) + > + name_size); > + if (trigger_step == NULL) > + return NULL; > + z = (char *) &trigger_step[1]; > + if (sql_normalize_name(z, name_size, target_name->z, > + target_name->n) > name_size) > + unreachable(); > } > trigger_step->zTarget = z; > trigger_step->op = op; > diff --git a/src/box/sql/util.c b/src/box/sql/util.c > index a13efa682..52a86635c 100644 > --- a/src/box/sql/util.c > +++ b/src/box/sql/util.c > @@ -41,6 +41,7 @@ > #if HAVE_ISNAN || SQL_HAVE_ISNAN > #include > #endif > +#include "coll/coll.h" > #include > #include "errinj.h" > > @@ -259,67 +260,73 @@ int > sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) > { > assert(src != NULL); > + assert(dst != NULL && dst_size > 0); > if (sqlIsquote(src[0])){ > - if (dst_size == 0) > - return src_len + 1; > memcpy(dst, src, src_len); > dst[src_len] = '\0'; > sqlDequote(dst); > return src_len + 1; > } > UErrorCode status = U_ZERO_ERROR; > - ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { > - status = U_MEMORY_ALLOCATION_ERROR; > - goto error; > - }); > - UCaseMap *case_map = ucasemap_open(NULL, 0, &status); > - if (case_map == NULL) > - goto error; > - int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, > - &status); > - ucasemap_close(case_map); > - assert(U_SUCCESS(status) || > - (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)); > + assert(icu_ucase_default_map != NULL); > + int len = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, dst_size, > + src, src_len, &status); > + assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); > return len + 1; > -error: > - diag_set(CollationError, > - "string conversion to the uppercase failed: %s", > - u_errorName(status)); > - return -1; > } > > char * > sql_normalized_name_db_new(struct sql *db, const char *name, int len) > { > - int size = sql_normalize_name(NULL, 0, name, len); > - if (size < 0) > + int size = len + 1; > + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { > + diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); > return NULL; > + }); > char *res = sqlDbMallocRawNN(db, size); > - if (res == NULL) { > - diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); > + if (res == NULL) > return NULL; > - } > - if (sql_normalize_name(res, size, name, len) < 0) { > - sqlDbFree(db, res); > + int rc = sql_normalize_name(res, size, name, len); > + if (rc <= size) > + return res; > + > + size = rc; > + res = sqlDbReallocOrFree(db, res, size); > + if (res == NULL) > return NULL; > - } > + if (sql_normalize_name(res, size, name, len) > size) > + unreachable(); > return res; > } > > char * > sql_normalized_name_region_new(struct region *r, const char *name, int len) > { > - int size = sql_normalize_name(NULL, 0, name, len); > - if (size < 0) > - return NULL; > - char *res = (char *) region_alloc(r, size); > - if (res == NULL) { > + int size = len + 1; > + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { > diag_set(OutOfMemory, size, "region_alloc", "res"); > return NULL; > - } > - if (sql_normalize_name(res, size, name, len) < 0) > - return NULL; > + }); > + size_t region_svp = region_used(r); > + char *res = region_alloc(r, size); > + if (res == NULL) > + goto oom_error; > + int rc = sql_normalize_name(res, size, name, len); > + if (rc <= size) > + return res; > + > + size = rc; > + region_truncate(r, region_svp); > + res = region_alloc(r, size); > + if (res == NULL) > + goto oom_error; > + if (sql_normalize_name(res, size, name, len) > size) > + unreachable(); > return res; > + > +oom_error: > + diag_set(OutOfMemory, size, "region_alloc", "res"); > + return NULL; > } > > /* Convenient short-hand */ > diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c > index b83f0fdc7..495eb6d64 100644 > --- a/src/lib/coll/coll.c > +++ b/src/lib/coll/coll.c > @@ -34,8 +34,11 @@ > #include "diag.h" > #include "assoc.h" > #include > +#include > #include > > +struct UCaseMap *icu_ucase_default_map = NULL; > + > #define mh_name _coll > struct mh_coll_key_t { > const char *str; > @@ -414,13 +417,16 @@ coll_unref(struct coll *coll) > void > coll_init() > { > + UErrorCode err = U_ZERO_ERROR; > coll_cache = mh_coll_new(); > - if (coll_cache == NULL) > + icu_ucase_default_map = ucasemap_open("", 0, &err); > + if (coll_cache == NULL || icu_ucase_default_map == NULL) > panic("Can not create system collations cache"); > } > > void > coll_free() > { > + ucasemap_close(icu_ucase_default_map); > mh_coll_delete(coll_cache); > } > diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h > index c505804f6..1f5b29a2e 100644 > --- a/src/lib/coll/coll.h > +++ b/src/lib/coll/coll.h > @@ -53,6 +53,9 @@ typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf, > > struct UCollator; > > +/** Default universal casemap for case transformations. */ > +extern struct UCaseMap *icu_ucase_default_map; > + > /** > * Collation. It has no unique features like name, id or owner. > * Only functional part - comparator, locale, ICU settings. > diff --git a/src/lua/utf8.c b/src/lua/utf8.c > index de026a921..d829b57b4 100644 > --- a/src/lua/utf8.c > +++ b/src/lua/utf8.c > @@ -38,9 +38,6 @@ > > extern struct ibuf *tarantool_lua_ibuf; > > -/** Default universal casemap for case transformations. */ > -static UCaseMap *root_map = NULL; > - > /** Collations for cmp/casecmp functions. */ > static struct coll *unicode_coll = NULL; > static struct coll *unicode_ci_coll = NULL; > @@ -64,11 +61,11 @@ utf8_str_to_case(struct lua_State *L, const char *src, int src_bsize, > } > int real_bsize; > if (is_to_upper) { > - real_bsize = ucasemap_utf8ToUpper(root_map, dst, > + real_bsize = ucasemap_utf8ToUpper(icu_ucase_default_map, dst, > dst_bsize, src, > src_bsize, &err); > } else { > - real_bsize = ucasemap_utf8ToLower(root_map, dst, > + real_bsize = ucasemap_utf8ToLower(icu_ucase_default_map, dst, > dst_bsize, src, > src_bsize, &err); > } > @@ -447,12 +444,7 @@ static const struct luaL_Reg utf8_lib[] = { > void > tarantool_lua_utf8_init(struct lua_State *L) > { > - UErrorCode err = U_ZERO_ERROR; > - root_map = ucasemap_open("", 0, &err); > - if (root_map == NULL) { > - luaL_error(L, tt_sprintf("error in ICU ucasemap_open: %s", > - u_errorName(err))); > - } > + assert(icu_ucase_default_map != NULL); > struct coll_def def; > memset(&def, 0, sizeof(def)); > def.icu.strength = COLL_ICU_STRENGTH_TERTIARY; > @@ -474,7 +466,6 @@ error_coll: > void > tarantool_lua_utf8_free() > { > - ucasemap_close(root_map); > if (unicode_coll != NULL) > coll_unref(unicode_coll); > if (unicode_ci_coll != NULL) > diff --git a/test/sql/errinj.result b/test/sql/errinj.result > index dead00f7d..5a427e855 100644 > --- a/test/sql/errinj.result > +++ b/test/sql/errinj.result > @@ -450,7 +450,14 @@ errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) > ... > box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") > --- > -- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR' > +- error: Failed to allocate 6 bytes in sqlDbMallocRawNN for res > +... > +dummy_f = function(int) return 1 end > +--- > +... > +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false) > +--- > +- error: Failed to allocate 9 bytes in region_alloc for res > ... > errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) > --- > diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua > index 736e66957..7f4b30baf 100644 > --- a/test/sql/errinj.test.lua > +++ b/test/sql/errinj.test.lua > @@ -146,4 +146,6 @@ errinj.set("ERRINJ_WAL_DELAY", false) > errinj = box.error.injection > errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) > box.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") > +dummy_f = function(int) return 1 end > +box.internal.sql_create_function("counter1", "INT", dummy_f, -1, false) > errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) > -- > 2.21.0 >