From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 10 Apr 2018 13:37:35 +0300 From: Vladimir Davydov Subject: Re: [tarantool-patches] Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks Message-ID: <20180410103735.vklallvl4225cghq@esperanza> References: <9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org> <20180406135302.fhv6kn5e5k7v2gvk@esperanza> <647f33dc-797c-88ea-a585-bec6cb56d893@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <647f33dc-797c-88ea-a585-bec6cb56d893@tarantool.org> To: Vladislav Shpilevoy Cc: tarantool-patches@freelists.org List-ID: ACK > From 26f02c3be72636c68d951dc4bc0e9c94ea1e278b Mon Sep 17 00:00:00 2001 > From: Vladislav Shpilevoy > Date: Wed, 4 Apr 2018 15:30:32 +0300 > Subject: [PATCH] identifier: do not use ICU UConverter for checks > > It makes no sense to create a converter, when there is > nothing to convert. To check an identifier it is > enough to use stateless ICU macros: U8_NEXT. > > diff --git a/src/box/box.cc b/src/box/box.cc > index cb319962..d2dfc5b5 100644 > --- a/src/box/box.cc > +++ b/src/box/box.cc > @@ -1518,7 +1518,6 @@ box_free(void) > gc_free(); > engine_shutdown(); > wal_thread_stop(); > - identifier_destroy(); > } > > fiber_cond_destroy(&ro_cond); > @@ -1724,7 +1723,6 @@ box_cfg_xc(void) > engine_init(); > if (module_init() != 0) > diag_raise(); > - identifier_init(); > schema_init(); > replication_init(); > port_init(); > diff --git a/src/box/identifier.c b/src/box/identifier.c > index e73e666b..252d5361 100644 > --- a/src/box/identifier.c > +++ b/src/box/identifier.c > @@ -33,44 +33,34 @@ > #include "say.h" > #include "diag.h" > > -#include > +#include > #include > -/* ICU returns this character in case of unknown symbol */ > -#define REPLACEMENT_CHARACTER (0xFFFD) > - > -static UConverter* utf8conv = NULL; > > int > -identifier_check(const char *str, size_t str_len) > +identifier_check(const char *str, int str_len) > { > - assert(utf8conv); > const char *end = str + str_len; > if (str == end) > goto error; > > - ucnv_reset(utf8conv); > - > - while (str < end) { > - int8_t type; > - UErrorCode status = U_ZERO_ERROR; > - UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status); > - > - if (U_FAILURE(status)) > + UChar32 c; > + int offset = 0; > + while (offset < str_len) { > + U8_NEXT(str, offset, str_len, c); > + if (c == U_SENTINEL || c == 0xFFFD) > goto error; > > - type = u_charType(c); > + int8_t type = u_charType(c); > /** > * The icu library has a function named u_isprint, however, > * this function does not return any errors. > * Here the `c` symbol printability is determined by comparison > * with unicode category types explicitly. > */ > - if (c == REPLACEMENT_CHARACTER || > - type == U_UNASSIGNED || > - type == U_LINE_SEPARATOR || > - type == U_CONTROL_CHAR || > - type == U_PARAGRAPH_SEPARATOR) > - > + if (type == U_UNASSIGNED || > + type == U_LINE_SEPARATOR || > + type == U_CONTROL_CHAR || > + type == U_PARAGRAPH_SEPARATOR) > goto error; > } > return 0; > @@ -78,22 +68,3 @@ error: > diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len)); > return -1; > } > - > -void > -identifier_init() > -{ > - assert(utf8conv == NULL); > - UErrorCode status = U_ZERO_ERROR ; > - utf8conv = ucnv_open("utf8", &status); > - if (U_FAILURE(status)) > - panic("ICU ucnv_open(\"utf8\") failed"); > -} > - > -void > -identifier_destroy() > -{ > - assert(utf8conv); > - ucnv_close(utf8conv); > - utf8conv = NULL; > -} > - > diff --git a/src/box/identifier.h b/src/box/identifier.h > index f1e36fe2..f34380d6 100644 > --- a/src/box/identifier.h > +++ b/src/box/identifier.h > @@ -47,20 +47,7 @@ extern "C" { > * @retval -1 error, diagnostics area is set > */ > int > -identifier_check(const char *str, size_t str_len); > - > -/** > - * Init identifier check mechanism. > - * This function allocates necessary for icu structures. > - */ > -void > -identifier_init(); > - > -/** > - * Clean icu structures. > - */ > -void > -identifier_destroy(); > +identifier_check(const char *str, int str_len); > > #if defined(__cplusplus) > } /* extern "C" */ > @@ -69,7 +56,7 @@ identifier_destroy(); > * Throw an error if identifier is not valid. > */ > static inline void > -identifier_check_xc(const char *str, size_t str_len) > +identifier_check_xc(const char *str, int str_len) > { > if (identifier_check(str, str_len)) > diag_raise(); > diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c > index 6a3802de..629cd3ef 100644 > --- a/test/unit/vy_point_lookup.c > +++ b/test/unit/vy_point_lookup.c > @@ -327,7 +327,6 @@ test_basic() > int > main() > { > - identifier_init(); > plan(1); > > vy_iterator_C_test_init(128 * 1024); > @@ -337,6 +336,5 @@ main() > > vy_iterator_C_test_finish(); > > - identifier_destroy(); > return check_plan(); > }