From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Cc: tarantool-patches@freelists.org
Subject: Re: [tarantool-patches] Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks
Date: Tue, 10 Apr 2018 13:37:35 +0300 [thread overview]
Message-ID: <20180410103735.vklallvl4225cghq@esperanza> (raw)
In-Reply-To: <647f33dc-797c-88ea-a585-bec6cb56d893@tarantool.org>
ACK
> From 26f02c3be72636c68d951dc4bc0e9c94ea1e278b Mon Sep 17 00:00:00 2001
> From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
> Date: Wed, 4 Apr 2018 15:30:32 +0300
> Subject: [PATCH] identifier: do not use ICU UConverter for checks
>
> It makes no sense to create a converter, when there is
> nothing to convert. To check an identifier it is
> enough to use stateless ICU macros: U8_NEXT.
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index cb319962..d2dfc5b5 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1518,7 +1518,6 @@ box_free(void)
> gc_free();
> engine_shutdown();
> wal_thread_stop();
> - identifier_destroy();
> }
>
> fiber_cond_destroy(&ro_cond);
> @@ -1724,7 +1723,6 @@ box_cfg_xc(void)
> engine_init();
> if (module_init() != 0)
> diag_raise();
> - identifier_init();
> schema_init();
> replication_init();
> port_init();
> diff --git a/src/box/identifier.c b/src/box/identifier.c
> index e73e666b..252d5361 100644
> --- a/src/box/identifier.c
> +++ b/src/box/identifier.c
> @@ -33,44 +33,34 @@
> #include "say.h"
> #include "diag.h"
>
> -#include <unicode/ucnv.h>
> +#include <unicode/utf8.h>
> #include <unicode/uchar.h>
> -/* ICU returns this character in case of unknown symbol */
> -#define REPLACEMENT_CHARACTER (0xFFFD)
> -
> -static UConverter* utf8conv = NULL;
>
> int
> -identifier_check(const char *str, size_t str_len)
> +identifier_check(const char *str, int str_len)
> {
> - assert(utf8conv);
> const char *end = str + str_len;
> if (str == end)
> goto error;
>
> - ucnv_reset(utf8conv);
> -
> - while (str < end) {
> - int8_t type;
> - UErrorCode status = U_ZERO_ERROR;
> - UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status);
> -
> - if (U_FAILURE(status))
> + UChar32 c;
> + int offset = 0;
> + while (offset < str_len) {
> + U8_NEXT(str, offset, str_len, c);
> + if (c == U_SENTINEL || c == 0xFFFD)
> goto error;
>
> - type = u_charType(c);
> + int8_t type = u_charType(c);
> /**
> * The icu library has a function named u_isprint, however,
> * this function does not return any errors.
> * Here the `c` symbol printability is determined by comparison
> * with unicode category types explicitly.
> */
> - if (c == REPLACEMENT_CHARACTER ||
> - type == U_UNASSIGNED ||
> - type == U_LINE_SEPARATOR ||
> - type == U_CONTROL_CHAR ||
> - type == U_PARAGRAPH_SEPARATOR)
> -
> + if (type == U_UNASSIGNED ||
> + type == U_LINE_SEPARATOR ||
> + type == U_CONTROL_CHAR ||
> + type == U_PARAGRAPH_SEPARATOR)
> goto error;
> }
> return 0;
> @@ -78,22 +68,3 @@ error:
> diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len));
> return -1;
> }
> -
> -void
> -identifier_init()
> -{
> - assert(utf8conv == NULL);
> - UErrorCode status = U_ZERO_ERROR ;
> - utf8conv = ucnv_open("utf8", &status);
> - if (U_FAILURE(status))
> - panic("ICU ucnv_open(\"utf8\") failed");
> -}
> -
> -void
> -identifier_destroy()
> -{
> - assert(utf8conv);
> - ucnv_close(utf8conv);
> - utf8conv = NULL;
> -}
> -
> diff --git a/src/box/identifier.h b/src/box/identifier.h
> index f1e36fe2..f34380d6 100644
> --- a/src/box/identifier.h
> +++ b/src/box/identifier.h
> @@ -47,20 +47,7 @@ extern "C" {
> * @retval -1 error, diagnostics area is set
> */
> int
> -identifier_check(const char *str, size_t str_len);
> -
> -/**
> - * Init identifier check mechanism.
> - * This function allocates necessary for icu structures.
> - */
> -void
> -identifier_init();
> -
> -/**
> - * Clean icu structures.
> - */
> -void
> -identifier_destroy();
> +identifier_check(const char *str, int str_len);
>
> #if defined(__cplusplus)
> } /* extern "C" */
> @@ -69,7 +56,7 @@ identifier_destroy();
> * Throw an error if identifier is not valid.
> */
> static inline void
> -identifier_check_xc(const char *str, size_t str_len)
> +identifier_check_xc(const char *str, int str_len)
> {
> if (identifier_check(str, str_len))
> diag_raise();
> diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
> index 6a3802de..629cd3ef 100644
> --- a/test/unit/vy_point_lookup.c
> +++ b/test/unit/vy_point_lookup.c
> @@ -327,7 +327,6 @@ test_basic()
> int
> main()
> {
> - identifier_init();
> plan(1);
>
> vy_iterator_C_test_init(128 * 1024);
> @@ -337,6 +336,5 @@ main()
>
> vy_iterator_C_test_finish();
>
> - identifier_destroy();
> return check_plan();
> }
prev parent reply other threads:[~2018-04-10 10:37 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-04 12:35 Vladislav Shpilevoy
2018-04-04 12:38 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-06 13:53 ` Vladimir Davydov
2018-04-07 20:34 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-10 10:37 ` Vladimir Davydov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180410103735.vklallvl4225cghq@esperanza \
--to=vdavydov.dev@gmail.com \
--cc=tarantool-patches@freelists.org \
--cc=v.shpilevoy@tarantool.org \
--subject='Re: [tarantool-patches] Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox