From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: vdavydov.dev@gmail.com, Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Subject: [PATCH 1/1] identifier: do not use ICU UConverter for checks
Date: Wed, 4 Apr 2018 15:35:41 +0300 [thread overview]
Message-ID: <9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org> (raw)
I makes no sense to create a converter, when there is
nothing to convert. To check an identifier it is
enough to use stateless ICU macros: U8_NEXT_OR_FFFD,
that also allows to eliminate 0xFFFD symbol as a
special one - this macros returns this code on any
error, or when it is actually this symbol.
---
Branch: https://github.com/tarantool/tarantool/tree/identifier-do-not-use-uconverter
src/box/box.cc | 2 --
src/box/identifier.c | 51 ++++++++++-----------------------------------
src/box/identifier.h | 13 ------------
test/unit/vy_point_lookup.c | 2 --
4 files changed, 11 insertions(+), 57 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index cb3199624..d2dfc5b5f 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1518,7 +1518,6 @@ box_free(void)
gc_free();
engine_shutdown();
wal_thread_stop();
- identifier_destroy();
}
fiber_cond_destroy(&ro_cond);
@@ -1724,7 +1723,6 @@ box_cfg_xc(void)
engine_init();
if (module_init() != 0)
diag_raise();
- identifier_init();
schema_init();
replication_init();
port_init();
diff --git a/src/box/identifier.c b/src/box/identifier.c
index e73e666b7..318f914f6 100644
--- a/src/box/identifier.c
+++ b/src/box/identifier.c
@@ -33,44 +33,34 @@
#include "say.h"
#include "diag.h"
-#include <unicode/ucnv.h>
+#include <unicode/utf8.h>
#include <unicode/uchar.h>
-/* ICU returns this character in case of unknown symbol */
-#define REPLACEMENT_CHARACTER (0xFFFD)
-
-static UConverter* utf8conv = NULL;
int
identifier_check(const char *str, size_t str_len)
{
- assert(utf8conv);
const char *end = str + str_len;
if (str == end)
goto error;
- ucnv_reset(utf8conv);
-
- while (str < end) {
- int8_t type;
- UErrorCode status = U_ZERO_ERROR;
- UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status);
-
- if (U_FAILURE(status))
+ UChar32 c;
+ uint32_t offset = 0;
+ while (offset < str_len) {
+ U8_NEXT_OR_FFFD(str, offset, str_len, c)
+ if (c == 0xFFFD)
goto error;
- type = u_charType(c);
+ int8_t type = u_charType(c);
/**
* The icu library has a function named u_isprint, however,
* this function does not return any errors.
* Here the `c` symbol printability is determined by comparison
* with unicode category types explicitly.
*/
- if (c == REPLACEMENT_CHARACTER ||
- type == U_UNASSIGNED ||
- type == U_LINE_SEPARATOR ||
- type == U_CONTROL_CHAR ||
- type == U_PARAGRAPH_SEPARATOR)
-
+ if (type == U_UNASSIGNED ||
+ type == U_LINE_SEPARATOR ||
+ type == U_CONTROL_CHAR ||
+ type == U_PARAGRAPH_SEPARATOR)
goto error;
}
return 0;
@@ -78,22 +68,3 @@ error:
diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len));
return -1;
}
-
-void
-identifier_init()
-{
- assert(utf8conv == NULL);
- UErrorCode status = U_ZERO_ERROR ;
- utf8conv = ucnv_open("utf8", &status);
- if (U_FAILURE(status))
- panic("ICU ucnv_open(\"utf8\") failed");
-}
-
-void
-identifier_destroy()
-{
- assert(utf8conv);
- ucnv_close(utf8conv);
- utf8conv = NULL;
-}
-
diff --git a/src/box/identifier.h b/src/box/identifier.h
index f1e36fe2a..30e6fdb69 100644
--- a/src/box/identifier.h
+++ b/src/box/identifier.h
@@ -49,19 +49,6 @@ extern "C" {
int
identifier_check(const char *str, size_t str_len);
-/**
- * Init identifier check mechanism.
- * This function allocates necessary for icu structures.
- */
-void
-identifier_init();
-
-/**
- * Clean icu structures.
- */
-void
-identifier_destroy();
-
#if defined(__cplusplus)
} /* extern "C" */
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index 6a3802def..629cd3efa 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -327,7 +327,6 @@ test_basic()
int
main()
{
- identifier_init();
plan(1);
vy_iterator_C_test_init(128 * 1024);
@@ -337,6 +336,5 @@ main()
vy_iterator_C_test_finish();
- identifier_destroy();
return check_plan();
}
--
2.14.3 (Apple Git-98)
next reply other threads:[~2018-04-04 12:35 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-04 12:35 Vladislav Shpilevoy [this message]
2018-04-04 12:38 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-06 13:53 ` Vladimir Davydov
2018-04-07 20:34 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-10 10:37 ` Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9c79f4be5957793acf8938387d9108e7b976c1b8.1522845248.git.v.shpilevoy@tarantool.org \
--to=v.shpilevoy@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=vdavydov.dev@gmail.com \
--subject='Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox