[tarantool-patches] Re: [PATCH 1/1] identifier: do not use ICU UConverter for checks

Vladimir Davydov vdavydov.dev at gmail.com
Tue Apr 10 13:37:35 MSK 2018


ACK

> From 26f02c3be72636c68d951dc4bc0e9c94ea1e278b Mon Sep 17 00:00:00 2001
> From: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
> Date: Wed, 4 Apr 2018 15:30:32 +0300
> Subject: [PATCH] identifier: do not use ICU UConverter for checks
> 
> It makes no sense to create a converter, when there is
> nothing to convert. To check an identifier it is
> enough to use stateless ICU macros: U8_NEXT.
> 
> diff --git a/src/box/box.cc b/src/box/box.cc
> index cb319962..d2dfc5b5 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1518,7 +1518,6 @@ box_free(void)
>  		gc_free();
>  		engine_shutdown();
>  		wal_thread_stop();
> -		identifier_destroy();
>  	}
>  
>  	fiber_cond_destroy(&ro_cond);
> @@ -1724,7 +1723,6 @@ box_cfg_xc(void)
>  	engine_init();
>  	if (module_init() != 0)
>  		diag_raise();
> -	identifier_init();
>  	schema_init();
>  	replication_init();
>  	port_init();
> diff --git a/src/box/identifier.c b/src/box/identifier.c
> index e73e666b..252d5361 100644
> --- a/src/box/identifier.c
> +++ b/src/box/identifier.c
> @@ -33,44 +33,34 @@
>  #include "say.h"
>  #include "diag.h"
>  
> -#include <unicode/ucnv.h>
> +#include <unicode/utf8.h>
>  #include <unicode/uchar.h>
> -/* ICU returns this character in case of unknown symbol */
> -#define REPLACEMENT_CHARACTER (0xFFFD)
> -
> -static UConverter* utf8conv = NULL;
>  
>  int
> -identifier_check(const char *str, size_t str_len)
> +identifier_check(const char *str, int str_len)
>  {
> -	assert(utf8conv);
>  	const char *end = str + str_len;
>  	if (str == end)
>  		goto error;
>  
> -	ucnv_reset(utf8conv);
> -
> -	while (str < end) {
> -		int8_t type;
> -		UErrorCode status = U_ZERO_ERROR;
> -		UChar32 c = ucnv_getNextUChar(utf8conv, &str, end, &status);
> -
> -		if (U_FAILURE(status))
> +	UChar32 c;
> +	int offset = 0;
> +	while (offset < str_len) {
> +		U8_NEXT(str, offset, str_len, c);
> +		if (c == U_SENTINEL || c == 0xFFFD)
>  			goto error;
>  
> -		type = u_charType(c);
> +		int8_t type = u_charType(c);
>  		/**
>  		 * The icu library has a function named u_isprint, however,
>  		 * this function does not return any errors.
>  		 * Here the `c` symbol printability is determined by comparison
>  		 * with unicode category types explicitly.
>  		 */
> -		if (c == REPLACEMENT_CHARACTER ||
> -			type == U_UNASSIGNED ||
> -			type == U_LINE_SEPARATOR ||
> -			type == U_CONTROL_CHAR ||
> -			type == U_PARAGRAPH_SEPARATOR)
> -
> +		if (type == U_UNASSIGNED ||
> +		    type == U_LINE_SEPARATOR ||
> +		    type == U_CONTROL_CHAR ||
> +		    type == U_PARAGRAPH_SEPARATOR)
>  			goto error;
>  	}
>  	return 0;
> @@ -78,22 +68,3 @@ error:
>  	diag_set(ClientError, ER_IDENTIFIER, tt_cstr(str, str_len));
>  	return -1;
>  }
> -
> -void
> -identifier_init()
> -{
> -	assert(utf8conv == NULL);
> -	UErrorCode status = U_ZERO_ERROR ;
> -	utf8conv = ucnv_open("utf8", &status);
> -	if (U_FAILURE(status))
> -		panic("ICU ucnv_open(\"utf8\") failed");
> -}
> -
> -void
> -identifier_destroy()
> -{
> -	assert(utf8conv);
> -	ucnv_close(utf8conv);
> -	utf8conv = NULL;
> -}
> -
> diff --git a/src/box/identifier.h b/src/box/identifier.h
> index f1e36fe2..f34380d6 100644
> --- a/src/box/identifier.h
> +++ b/src/box/identifier.h
> @@ -47,20 +47,7 @@ extern "C" {
>   * @retval -1 error, diagnostics area is set
>   */
>  int
> -identifier_check(const char *str, size_t str_len);
> -
> -/**
> - * Init identifier check mechanism.
> - * This function allocates necessary for icu structures.
> - */
> -void
> -identifier_init();
> -
> -/**
> - * Clean icu structures.
> - */
> -void
> -identifier_destroy();
> +identifier_check(const char *str, int str_len);
>  
>  #if defined(__cplusplus)
>  } /* extern "C" */
> @@ -69,7 +56,7 @@ identifier_destroy();
>   * Throw an error if identifier is not valid.
>   */
>  static inline void
> -identifier_check_xc(const char *str, size_t str_len)
> +identifier_check_xc(const char *str, int str_len)
>  {
>  	if (identifier_check(str, str_len))
>  		diag_raise();
> diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
> index 6a3802de..629cd3ef 100644
> --- a/test/unit/vy_point_lookup.c
> +++ b/test/unit/vy_point_lookup.c
> @@ -327,7 +327,6 @@ test_basic()
>  int
>  main()
>  {
> -	identifier_init();
>  	plan(1);
>  
>  	vy_iterator_C_test_init(128 * 1024);
> @@ -337,6 +336,5 @@ main()
>  
>  	vy_iterator_C_test_finish();
>  
> -	identifier_destroy();
>  	return check_plan();
>  }



More information about the Tarantool-patches mailing list