From: Kirill Shcherbatov <kshcherbatov@tarantool.org> To: tarantool-patches@freelists.org, v.shpilevoy@tarantool.org Cc: Kirill Shcherbatov <kshcherbatov@tarantool.org> Subject: [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization Date: Thu, 4 Apr 2019 17:07:13 +0300 [thread overview] Message-ID: <09c8ef39eaf35ae7fa6825236a3b32b54d13dec5.1554386791.git.kshcherbatov@tarantool.org> (raw) Because sql_normalize_name used to be called twice - to estimate the size of the name buffer and to process data querying the UCaseMap object each time DDL performance felt by 15%. This patch should eliminate some of the negative effects of using ICU for name normalization. Thanks @avtikhon for a bechmark Follow up e7558062d3559e6bcc18f91eacb88269428321dc --- https://github.com/tarantool/msgpuck/tree/kshch/gh-3931-perfomance-fixup https://github.com/tarantool/tarantool/issues/3931 src/box/sql.c | 8 ++++ src/box/sql.h | 9 +++++ src/box/sql/expr.c | 25 ++++++------- src/box/sql/parse.y | 22 ++++++----- src/box/sql/sqlInt.h | 9 +---- src/box/sql/trigger.c | 22 +++++++---- src/box/sql/util.c | 80 ++++++++++++++++++++-------------------- src/lib/core/errinj.h | 1 - test/box/errinj.result | 2 - test/sql/errinj.result | 18 --------- test/sql/errinj.test.lua | 8 ---- 11 files changed, 97 insertions(+), 107 deletions(-) diff --git a/src/box/sql.c b/src/box/sql.c index 4fac020b0..872bfc4f4 100644 --- a/src/box/sql.c +++ b/src/box/sql.c @@ -54,6 +54,7 @@ #include "iproto_constants.h" #include "fk_constraint.h" #include "mpstream.h" +#include <unicode/ucasemap.h> static sql *db = NULL; @@ -64,6 +65,8 @@ static const uint32_t default_sql_flags = SQL_ShortColNames | SQL_AutoIndex | SQL_RecTriggers; +UCaseMap *sql_case_map; + void sql_init() { @@ -74,6 +77,11 @@ sql_init() if (sql_init_db(&db) != SQL_OK) panic("failed to initialize SQL subsystem"); + UErrorCode status = U_ZERO_ERROR; + sql_case_map = ucasemap_open(NULL, 0, &status); + if (sql_case_map == NULL) + panic("failed to initialize SQL subsystem"); + assert(db != NULL); } diff --git a/src/box/sql.h b/src/box/sql.h index 400360f59..a3e892595 100644 --- a/src/box/sql.h +++ b/src/box/sql.h @@ -59,6 +59,15 @@ sql_load_schema(); struct sql * sql_get(); +struct UCaseMap; + +/** + * Opaque service object for ICU case mapping functions is + * initialized for default locale. Used to perform name + * normalization in SQL. +*/ +extern struct UCaseMap *sql_case_map; + struct Expr; struct Parse; struct Select; diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 838fbd21a..0668fcca6 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -965,7 +965,7 @@ sql_expr_new(struct sql *db, int op, const struct Token *token) struct Expr * sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) { - int extra_size = 0; + int extra_size = 0, rc; bool is_name = false; if (token != NULL) { int val; @@ -973,14 +973,7 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) if (sql_expr_token_to_int(op, token, &val) == 0) return sql_expr_new_int(db, val); is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION; - if (is_name) { - extra_size = sql_normalize_name(NULL, 0, token->z, - token->n); - if (extra_size < 0) - return NULL; - } else { - extra_size = token->n + 1; - } + extra_size = token->n + 1; } struct Expr *e = sql_expr_new_empty(db, op, extra_size); if (e == NULL || token == NULL || token->n == 0) @@ -992,10 +985,16 @@ sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) memcpy(e->u.zToken, token->z, token->n); e->u.zToken[token->n] = '\0'; sqlDequote(e->u.zToken); - } else if (sql_normalize_name(e->u.zToken, extra_size, token->z, - token->n) < 0) { - sql_expr_delete(db, e, false); - return NULL; + } else if ((rc = sql_normalize_name(e->u.zToken, extra_size, token->z, + token->n)) > extra_size) { + extra_size = rc; + e = sqlDbReallocOrFree(db, e, sizeof(*e) + extra_size); + if (e == NULL) + return NULL; + e->u.zToken = (char *) &e[1]; + if (sql_normalize_name(e->u.zToken, extra_size, token->z, + token->n) > extra_size) + unreachable(); } return e; } diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index c37b8d429..357335d23 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -856,15 +856,8 @@ idlist(A) ::= nm(Y). { ** that created the expression. */ static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ - int name_sz = 0; struct Expr *p = NULL; - if (op != TK_VARIABLE) { - name_sz = sql_normalize_name(NULL, 0, t.z, t.n); - if (name_sz < 0) - goto tarantool_error; - } else { - name_sz = t.n + 1; - } + int name_sz = t.n + 1; p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz); if( p ){ memset(p, 0, sizeof(Expr)); @@ -899,8 +892,17 @@ idlist(A) ::= nm(Y). { p->iAgg = -1; p->u.zToken = (char*)&p[1]; if (op != TK_VARIABLE) { - if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0) - goto tarantool_error; + int rc; + if ((rc = sql_normalize_name(p->u.zToken, name_sz, t.z, + t.n)) > name_sz) { + name_sz = rc; + p = sqlDbReallocOrFree(pParse->db, p, sizeof(Expr) + name_sz); + if (p == NULL) + goto tarantool_error; + p->u.zToken = (char*) &p[1]; + if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz) + unreachable(); + } } else { memcpy(p->u.zToken, t.z, t.n); p->u.zToken[t.n] = 0; diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index 4a2197f96..1374f364c 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -3207,15 +3207,10 @@ void sqlDequote(char *); * @param dst A buffer for the result string. The result will be * 0-terminated if the buffer is large enough. The contents * is undefined in case of failure. - * @param dst_size The size of the buffer (number of bytes). If it - * is 0, then dest may be NULL and the function will only - * return the length of the result without writing any of - * the result string + * @param dst_size The size of the buffer (number of bytes). * @param src The original string. * @param src_len The length of the original string. - * @retval The count of bytes written(or need to be written) on - * success. - * @retval < 0 Otherwise. The diag message is set. + * @retval The count of bytes written. */ int sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c index c94880086..add792c63 100644 --- a/src/box/sql/trigger.c +++ b/src/box/sql/trigger.c @@ -279,10 +279,7 @@ sql_trigger_select_step(struct sql *db, struct Select *select) static struct TriggerStep * sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) { - int name_size = - sql_normalize_name(NULL, 0, target_name->z, target_name->n); - if (name_size < 0) - return NULL; + int name_size = target_name->n + 1; int size = sizeof(struct TriggerStep) + name_size; struct TriggerStep *trigger_step = sqlDbMallocZero(db, size); if (trigger_step == NULL) { @@ -290,10 +287,19 @@ sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) return NULL; } char *z = (char *)&trigger_step[1]; - if (sql_normalize_name(z, name_size, target_name->z, - target_name->n) < 0) { - sqlDbFree(db, trigger_step); - return NULL; + int rc; + if ((rc = sql_normalize_name(z, name_size, target_name->z, + target_name->n)) > name_size) { + name_size = rc; + trigger_step = sqlDbReallocOrFree(db, trigger_step, + sizeof(*trigger_step) + + name_size); + if (trigger_step == NULL) + return NULL; + z = (char *) &trigger_step[1]; + if (sql_normalize_name(z, name_size, target_name->z, + target_name->n) > name_size) + unreachable(); } trigger_step->zTarget = z; trigger_step->op = op; diff --git a/src/box/sql/util.c b/src/box/sql/util.c index e9553b3a4..b365966d5 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -259,67 +259,67 @@ int sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) { assert(src != NULL); + assert(dst != NULL && dst_size > 0); if (sqlIsquote(src[0])){ - if (dst_size == 0) - return src_len + 1; memcpy(dst, src, src_len); dst[src_len] = '\0'; sqlDequote(dst); return src_len + 1; } UErrorCode status = U_ZERO_ERROR; - ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { - status = U_MEMORY_ALLOCATION_ERROR; - goto error; - }); - UCaseMap *case_map = ucasemap_open(NULL, 0, &status); - if (case_map == NULL) - goto error; - int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, - &status); - ucasemap_close(case_map); - assert(U_SUCCESS(status) || - (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)); + assert(sql_case_map != NULL); + int len = ucasemap_utf8ToUpper(sql_case_map, dst, dst_size, src, + src_len, &status); + assert(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); return len + 1; -error: - diag_set(CollationError, - "string conversion to the uppercase failed: %s", - u_errorName(status)); - return -1; } char * sql_normalized_name_db_new(struct sql *db, const char *name, int len) { - int size = sql_normalize_name(NULL, 0, name, len); - if (size < 0) - return NULL; + int size = len + 1, rc; char *res = sqlDbMallocRawNN(db, size); - if (res == NULL) { - diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); - return NULL; - } - if (sql_normalize_name(res, size, name, len) < 0) { - sqlDbFree(db, res); - return NULL; - } + if (res == NULL) + goto oom_error; + if ((rc = sql_normalize_name(res, size, name, len)) <= size) + return res; + + size = rc; + res = sqlDbReallocOrFree(db, res, size); + if (res == NULL) + goto oom_error; + if (sql_normalize_name(res, size, name, len) > size) + unreachable(); return res; + +oom_error: + diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); + return NULL; } char * sql_normalized_name_region_new(struct region *r, const char *name, int len) { - int size = sql_normalize_name(NULL, 0, name, len); - if (size < 0) - return NULL; - char *res = (char *) region_alloc(r, size); - if (res == NULL) { - diag_set(OutOfMemory, size, "region_alloc", "res"); - return NULL; - } - if (sql_normalize_name(res, size, name, len) < 0) - return NULL; + int size = len + 1, rc; + size_t region_svp = region_used(r); + char *res = region_alloc(r, size); + if (res == NULL) + goto oom_error; + if ((rc = sql_normalize_name(res, size, name, len)) <= size) + return res; + + size = rc; + region_truncate(r, region_svp); + res = region_alloc(r, size); + if (res == NULL) + goto oom_error; + if (sql_normalize_name(res, size, name, len) > size) + unreachable(); return res; + +oom_error: + diag_set(OutOfMemory, size, "region", "res"); + return NULL; } /* diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index c823d3597..41783cc74 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -125,7 +125,6 @@ struct errinj { _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ - _(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index b657234e1..8e76b21b3 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -22,8 +22,6 @@ errinj.info() state: false ERRINJ_SNAP_WRITE_ROW_TIMEOUT: state: 0 - ERRINJ_SQL_NAME_NORMALIZATION: - state: false ERRINJ_VY_SCHED_TIMEOUT: state: 0 ERRINJ_WAL_WRITE_PARTIAL: diff --git a/test/sql/errinj.result b/test/sql/errinj.result index c974ab714..a1e7cc4a3 100644 --- a/test/sql/errinj.result +++ b/test/sql/errinj.result @@ -388,21 +388,3 @@ errinj.set("ERRINJ_WAL_DELAY", false) --- - ok ... --- --- gh-3931: Store regular identifiers in case-normal form --- -errinj = box.error.injection ---- -... -errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) ---- -- ok -... -box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") ---- -- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR' -... -errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) ---- -- ok -... diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua index f9e7a3c49..d8833feb4 100644 --- a/test/sql/errinj.test.lua +++ b/test/sql/errinj.test.lua @@ -139,11 +139,3 @@ box.sql.execute("INSERT INTO t VALUES (2);") box.sql.execute("UPDATE t SET id = 2;") -- Finish drop space. errinj.set("ERRINJ_WAL_DELAY", false) - --- --- gh-3931: Store regular identifiers in case-normal form --- -errinj = box.error.injection -errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) -box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") -errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) -- 2.21.0
next reply other threads:[~2019-04-04 14:07 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-04-04 14:07 Kirill Shcherbatov [this message] 2019-04-04 15:03 ` [tarantool-patches] " Vladislav Shpilevoy 2019-04-04 17:12 ` Kirill Shcherbatov 2019-04-04 17:31 ` Vladislav Shpilevoy 2019-04-04 18:08 ` Kirill Shcherbatov 2019-04-04 20:15 ` Vladislav Shpilevoy 2019-04-05 11:33 ` Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=09c8ef39eaf35ae7fa6825236a3b32b54d13dec5.1554386791.git.kshcherbatov@tarantool.org \ --to=kshcherbatov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [tarantool-patches] [PATCH v1 1/1] sql: fix perf degradation on name normalization' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox