From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Subject: [PATCH 1/1] identifier: do not use ICU UConverter for checks Date: Wed, 4 Apr 2018 15:35:41 +0300 [thread overview] Message-ID: <9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org> (raw) I makes no sense to create a converter, when there is nothing to convert. To check an identifier it is enough to use stateless ICU macros: U8_NEXT_OR_FFFD, that also allows to eliminate 0xFFFD symbol as a special one - this macros returns this code on any error, or when it is actually this symbol. --- Branch: https://github.com/tarantool/tarantool/tree/identifier-do-not-use-uconverter src/box/box.cc | 2 -- src/box/identifier.c | 51 ++++++++++----------------------------------- src/box/identifier.h | 13 ------------ test/unit/vy_point_lookup.c | 2 -- 4 files changed, 11 insertions(+), 57 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index cb3199624..d2dfc5b5f 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1518,7 +1518,6 @@ box_free(void) gc_free(); engine_shutdown(); wal_thread_stop(); - identifier_destroy(); } fiber_cond_destroy(&ro_cond); @@ -1724,7 +1723,6 @@ box_cfg_xc(void) engine_init(); if (module_init() != 0) diag_raise(); - identifier_init(); schema_init(); replication_init(); port_init(); diff --git a/src/box/identifier.c b/src/box/identifier.c index e73e666b7..318f914f6 100644 --- a/src/box/identifier.c +++ b/src/box/identifier.c @@ -33,44 +33,34 @@ #include "say.h" #include "diag.h" -#include <unicode/ucnv.h> +#include <unicode/utf8.h> #include <unicode/uchar.h> -/* ICU returns this character in case of unknown symbol */ -#define REPLACEMENT_CHARACTER (0xFFFD) - -static UConverter* utf8conv = NULL; int identifier_check(const char *str, size_t str_len) { - assert(utf8conv); const char *end = str + str_len; if (str == end) goto error; - ucnv_reset(utf8conv); - - while (str < end) { - int8_t type; - UErrorCode status = U_ZERO_ERROR; - UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status); - - if (U_FAILURE(status)) + UChar32 c; + uint32_t offset = 0; + while (offset < str_len) { + U8_NEXT_OR_FFFD(str, offset, str_len, c) + if (c == 0xFFFD) goto error; - type = u_charType(c); + int8_t type = u_charType(c); /** * The icu library has a function named u_isprint, however, * this function does not return any errors. * Here the `c` symbol printability is determined by comparison * with unicode category types explicitly. */ - if (c == REPLACEMENT_CHARACTER || - type == U_UNASSIGNED || - type == U_LINE_SEPARATOR || - type == U_CONTROL_CHAR || - type == U_PARAGRAPH_SEPARATOR) - + if (type == U_UNASSIGNED || + type == U_LINE_SEPARATOR || + type == U_CONTROL_CHAR || + type == U_PARAGRAPH_SEPARATOR) goto error; } return 0; @@ -78,22 +68,3 @@ error: diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len)); return -1; } - -void -identifier_init() -{ - assert(utf8conv == NULL); - UErrorCode status = U_ZERO_ERROR ; - utf8conv = ucnv_open("utf8", &status); - if (U_FAILURE(status)) - panic("ICU ucnv_open(\"utf8\") failed"); -} - -void -identifier_destroy() -{ - assert(utf8conv); - ucnv_close(utf8conv); - utf8conv = NULL; -} - diff --git a/src/box/identifier.h b/src/box/identifier.h index f1e36fe2a..30e6fdb69 100644 --- a/src/box/identifier.h +++ b/src/box/identifier.h @@ -49,19 +49,6 @@ extern "C" { int identifier_check(const char *str, size_t str_len); -/** - * Init identifier check mechanism. - * This function allocates necessary for icu structures. - */ -void -identifier_init(); - -/** - * Clean icu structures. - */ -void -identifier_destroy(); - #if defined(__cplusplus) } /* extern "C" */ diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index 6a3802def..629cd3efa 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -327,7 +327,6 @@ test_basic() int main() { - identifier_init(); plan(1); vy_iterator_C_test_init(128 * 1024); @@ -337,6 +336,5 @@ main() vy_iterator_C_test_finish(); - identifier_destroy(); return check_plan(); } -- 2.14.3 (Apple Git-98)
next reply other threads:[~2018-04-04 12:35 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-04-04 12:35 Vladislav Shpilevoy [this message] 2018-04-04 12:38 ` [tarantool-patches] " Vladislav Shpilevoy 2018-04-06 13:53 ` Vladimir Davydov 2018-04-07 20:34 ` [tarantool-patches] " Vladislav Shpilevoy 2018-04-10 10:37 ` Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=vdavydov.dev@gmail.com \ --subject='Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox