From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladislav Shpilevoy Subject: [PATCH 1/1] identifier: do not use ICU UConverter for checks Date: Wed, 4 Apr 2018 15:35:41 +0300 Message-Id: <9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org> To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, Vladislav Shpilevoy List-ID: I makes no sense to create a converter, when there is nothing to convert. To check an identifier it is enough to use stateless ICU macros: U8_NEXT_OR_FFFD, that also allows to eliminate 0xFFFD symbol as a special one - this macros returns this code on any error, or when it is actually this symbol. --- Branch: https://github.com/tarantool/tarantool/tree/identifier-do-not-use-uconverter src/box/box.cc | 2 -- src/box/identifier.c | 51 ++++++++++----------------------------------- src/box/identifier.h | 13 ------------ test/unit/vy_point_lookup.c | 2 -- 4 files changed, 11 insertions(+), 57 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index cb3199624..d2dfc5b5f 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1518,7 +1518,6 @@ box_free(void) gc_free(); engine_shutdown(); wal_thread_stop(); - identifier_destroy(); } fiber_cond_destroy(&ro_cond); @@ -1724,7 +1723,6 @@ box_cfg_xc(void) engine_init(); if (module_init() != 0) diag_raise(); - identifier_init(); schema_init(); replication_init(); port_init(); diff --git a/src/box/identifier.c b/src/box/identifier.c index e73e666b7..318f914f6 100644 --- a/src/box/identifier.c +++ b/src/box/identifier.c @@ -33,44 +33,34 @@ #include "say.h" #include "diag.h" -#include +#include #include -/* ICU returns this character in case of unknown symbol */ -#define REPLACEMENT_CHARACTER (0xFFFD) - -static UConverter* utf8conv = NULL; int identifier_check(const char *str, size_t str_len) { - assert(utf8conv); const char *end = str + str_len; if (str == end) goto error; - ucnv_reset(utf8conv); - - while (str < end) { - int8_t type; - UErrorCode status = U_ZERO_ERROR; - UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status); - - if (U_FAILURE(status)) + UChar32 c; + uint32_t offset = 0; + while (offset < str_len) { + U8_NEXT_OR_FFFD(str, offset, str_len, c) + if (c == 0xFFFD) goto error; - type = u_charType(c); + int8_t type = u_charType(c); /** * The icu library has a function named u_isprint, however, * this function does not return any errors. * Here the `c` symbol printability is determined by comparison * with unicode category types explicitly. */ - if (c == REPLACEMENT_CHARACTER || - type == U_UNASSIGNED || - type == U_LINE_SEPARATOR || - type == U_CONTROL_CHAR || - type == U_PARAGRAPH_SEPARATOR) - + if (type == U_UNASSIGNED || + type == U_LINE_SEPARATOR || + type == U_CONTROL_CHAR || + type == U_PARAGRAPH_SEPARATOR) goto error; } return 0; @@ -78,22 +68,3 @@ error: diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len)); return -1; } - -void -identifier_init() -{ - assert(utf8conv == NULL); - UErrorCode status = U_ZERO_ERROR ; - utf8conv = ucnv_open("utf8", &status); - if (U_FAILURE(status)) - panic("ICU ucnv_open(\"utf8\") failed"); -} - -void -identifier_destroy() -{ - assert(utf8conv); - ucnv_close(utf8conv); - utf8conv = NULL; -} - diff --git a/src/box/identifier.h b/src/box/identifier.h index f1e36fe2a..30e6fdb69 100644 --- a/src/box/identifier.h +++ b/src/box/identifier.h @@ -49,19 +49,6 @@ extern "C" { int identifier_check(const char *str, size_t str_len); -/** - * Init identifier check mechanism. - * This function allocates necessary for icu structures. - */ -void -identifier_init(); - -/** - * Clean icu structures. - */ -void -identifier_destroy(); - #if defined(__cplusplus) } /* extern "C" */ diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index 6a3802def..629cd3efa 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -327,7 +327,6 @@ test_basic() int main() { - identifier_init(); plan(1); vy_iterator_C_test_init(128 * 1024); @@ -337,6 +336,5 @@ main() vy_iterator_C_test_finish(); - identifier_destroy(); return check_plan(); } -- 2.14.3 (Apple Git-98)