Tarantool development patches archive
 help / color / mirror / Atom feed
From: Konstantin Osipov <kostja@tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Cc: tarantool-patches@freelists.org
Subject: [tarantool-patches] Re: [PATCH 1/1] collation: refactoring
Date: Fri, 18 May 2018 17:36:28 +0300	[thread overview]
Message-ID: <20180518143628.GD26429@atlas> (raw)
In-Reply-To: <4e9bf7b49b322fcbc66d1f40722132c367ccbf10.1526643169.git.v.shpilevoy@tarantool.org>

* Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [18/05/18 14:58]:
> Simplify collation code.
> ---
> Branch: https://github.com/tarantool/tarantool/tree/coll-refactoring
> Issue: none
> 
>  src/box/alter.cc   | 29 +++++++++++++++--------------
>  src/coll.c         | 35 ++++++++++++-----------------------
>  src/coll.h         |  9 ++-------
>  src/coll_def.h     | 10 +++++++---
>  test/unit/coll.cpp | 19 +++++++------------
>  5 files changed, 43 insertions(+), 59 deletions(-)

This is OK to push.

> 
> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 7858af989..518f515bc 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -2294,6 +2294,11 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
>  	def->id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_ID);
>  	def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len);
>  	def->name_len = name_len;
> +	if (name_len > BOX_NAME_MAX)
> +		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
> +			  "collation name is too long");
> +	identifier_check_xc(def->name, name_len);
> +
>  	def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID);
>  	struct coll_def *base = &def->base;
>  	const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE,
> @@ -2302,23 +2307,19 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
>  	if (base->type == coll_type_MAX)
>  		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
>  			  "unknown collation type");
> -	base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
> -					  &locale_len);
> -	base->locale_len = locale_len;
> -	const char *options =
> -		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
> -					 MP_MAP);
> -
> -	if (name_len > BOX_NAME_MAX)
> -		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
> -			  "collation name is too long");
> -	if (locale_len > BOX_NAME_MAX)
> +	const char *locale =
> +		tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
> +				   &locale_len);
> +	if (locale_len > COLL_LOCALE_LEN_MAX)
>  		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
>  			  "collation locale is too long");
> -	/* Locale is an optional argument and can be NULL. */
>  	if (locale_len > 0)
> -		identifier_check_xc(base->locale, locale_len);
> -	identifier_check_xc(def->name, name_len);
> +		identifier_check_xc(locale, locale_len);
> +	snprintf(base->locale, sizeof(base->locale), "%.*s", locale_len,
> +		 locale);
> +	const char *options =
> +		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
> +					 MP_MAP);
>  
>  	assert(base->type == COLL_TYPE_ICU);
>  	if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
> diff --git a/src/coll.c b/src/coll.c
> index d0e36827d..6a76f1f0b 100644
> --- a/src/coll.c
> +++ b/src/coll.c
> @@ -66,11 +66,7 @@ struct mh_coll_node_t {
>  /** Table fingerprint -> collation. */
>  static struct mh_coll_t *coll_cache = NULL;
>  
> -enum {
> -	MAX_LOCALE = 1024,
> -};
> -
> -static_assert(MAX_LOCALE <= TT_STATIC_BUF_LEN,
> +static_assert(COLL_LOCALE_LEN_MAX <= TT_STATIC_BUF_LEN,
>  	      "static buf is used to 0-terminate locale name");
>  
>  /** Compare two string using ICU collation. */
> @@ -78,19 +74,19 @@ static int
>  coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
>  	     const struct coll *coll)
>  {
> -	assert(coll->icu.collator != NULL);
> +	assert(coll->collator != NULL);
>  
>  	UErrorCode status = U_ZERO_ERROR;
>  
>  #ifdef HAVE_ICU_STRCOLLUTF8
> -	UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
> -						   s, slen, t, tlen, &status);
> +	UCollationResult result = ucol_strcollUTF8(coll->collator, s, slen, t,
> +						   tlen, &status);
>  #else
>  	UCharIterator s_iter, t_iter;
>  	uiter_setUTF8(&s_iter, s, slen);
>  	uiter_setUTF8(&t_iter, t, tlen);
> -	UCollationResult result = ucol_strcollIter(coll->icu.collator,
> -						   &s_iter, &t_iter, &status);
> +	UCollationResult result = ucol_strcollIter(coll->collator, &s_iter,
> +						   &t_iter, &status);
>  #endif
>  	assert(!U_FAILURE(status));
>  	return (int)result;
> @@ -109,7 +105,7 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
>  	UErrorCode status = U_ZERO_ERROR;
>  	int32_t got;
>  	do {
> -		got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf,
> +		got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
>  					   TT_STATIC_BUF_LEN, &status);
>  		PMurHash32_Process(ph, pcarry, buf, got);
>  		total_size += got;
> @@ -127,20 +123,13 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
>  static int
>  coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
>  {
> -	if (def->locale_len >= MAX_LOCALE) {
> -		diag_set(CollationError, "too long locale");
> -		return -1;
> -	}
> -	char *locale = tt_static_buf();
> -	memcpy(locale, def->locale, def->locale_len);
> -	locale[def->locale_len] = '\0';
>  	UErrorCode status = U_ZERO_ERROR;
> -	struct UCollator *collator = ucol_open(locale, &status);
> +	struct UCollator *collator = ucol_open(def->locale, &status);
>  	if (U_FAILURE(status)) {
>  		diag_set(CollationError, u_errorName(status));
>  		return -1;
>  	}
> -	coll->icu.collator = collator;
> +	coll->collator = collator;
>  
>  	if (def->icu.french_collation != COLL_ICU_DEFAULT) {
>  		enum coll_icu_on_off w = def->icu.french_collation;
> @@ -273,8 +262,8 @@ static int
>  coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def)
>  {
>  	int total = 0;
> -	SNPRINT(total, snprintf, buffer, size, "{locale: %.*s, type = %d, "\
> -	        "icu: ", (int) def->locale_len, def->locale, (int) def->type);
> +	SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\
> +	        "icu: ", def->locale, (int) def->type);
>  	SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu);
>  	SNPRINT(total, snprintf, buffer, size, "}");
>  	return total;
> @@ -331,7 +320,7 @@ coll_unref(struct coll *coll)
>  			len, mh_strn_hash(coll->fingerprint, len), coll
>  		};
>  		mh_coll_remove(coll_cache, &node, NULL);
> -		ucol_close(coll->icu.collator);
> +		ucol_close(coll->collator);
>  		free(coll);
>  	}
>  }
> diff --git a/src/coll.h b/src/coll.h
> index 7e950d164..9c725712a 100644
> --- a/src/coll.h
> +++ b/src/coll.h
> @@ -47,13 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
>  typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
>  				uint32_t *pcarry, struct coll *coll);
>  
> -/** ICU collation specific data. */
>  struct UCollator;
>  
> -struct coll_icu {
> -	struct UCollator *collator;
> -};
> -
>  /**
>   * Collation. It has no unique features like name, id or owner.
>   * Only functional part - comparator, locale, ICU settings.
> @@ -61,8 +56,8 @@ struct coll_icu {
>  struct coll {
>  	/** Collation type. */
>  	enum coll_type type;
> -	/** Type specific data. */
> -	struct coll_icu icu;
> +	/** ICU collation specific data. */
> +	struct UCollator *collator;
>  	/** String comparator. */
>  	coll_cmp_f cmp;
>  	coll_hash_f hash;
> diff --git a/src/coll_def.h b/src/coll_def.h
> index 10dbc860e..7c20abf66 100644
> --- a/src/coll_def.h
> +++ b/src/coll_def.h
> @@ -41,6 +41,11 @@ enum coll_type {
>  
>  extern const char *coll_type_strs[];
>  
> +/** Maximal length of locale name. */
> +enum {
> +	COLL_LOCALE_LEN_MAX = 16,
> +};
> +
>  /*
>   * ICU collation options. See
>   * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
> @@ -103,13 +108,12 @@ struct coll_icu_def {
>  
>  /** Collation definition. */
>  struct coll_def {
> -	/** Locale. */
> -	size_t locale_len;
> -	const char *locale;
>  	/** Collation type. */
>  	enum coll_type type;
>  	/** Type specific options. */
>  	struct coll_icu_def icu;
> +	/** Locale. */
> +	char locale[COLL_LOCALE_LEN_MAX + 1];
>  };
>  
>  #endif /* TARANTOOL_COLL_DEF_H_INCLUDED */
> diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
> index eeee739b7..94374a7b0 100644
> --- a/test/unit/coll.cpp
> +++ b/test/unit/coll.cpp
> @@ -49,8 +49,7 @@ manual_test()
>  	vector<const char *> strings;
>  	struct coll_def def;
>  	memset(&def, 0, sizeof(def));
> -	def.locale = "ru_RU";
> -	def.locale_len = strlen(def.locale);
> +	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
>  	def.type = COLL_TYPE_ICU;
>  	struct coll *coll;
>  
> @@ -95,8 +94,7 @@ manual_test()
>  	coll_unref(coll);
>  
>  	cout << " -- en_EN -- " << endl;
> -	def.locale = "en_EN-EN";
> -	def.locale_len = strlen(def.locale);
> +	snprintf(def.locale, sizeof(def.locale), "%s", "en_EN-EN");
>  	coll = coll_new(&def);
>  	assert(coll != NULL);
>  	strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
> @@ -104,8 +102,7 @@ manual_test()
>  	coll_unref(coll);
>  
>  	cout << " -- cs_CZ -- " << endl;
> -	def.locale = "cs_CZ";
> -	def.locale_len = strlen(def.locale);
> +	snprintf(def.locale, sizeof(def.locale), "%s", "cs_CZ");
>  	coll = coll_new(&def);
>  	assert(coll != NULL);
>  	strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
> @@ -132,8 +129,7 @@ hash_test()
>  
>  	struct coll_def def;
>  	memset(&def, 0, sizeof(def));
> -	def.locale = "ru_RU";
> -	def.locale_len = strlen(def.locale);
> +	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
>  	def.type = COLL_TYPE_ICU;
>  	struct coll *coll;
>  
> @@ -167,8 +163,7 @@ cache_test()
>  
>  	struct coll_def def;
>  	memset(&def, 0, sizeof(def));
> -	def.locale = "ru_RU";
> -	def.locale_len = strlen(def.locale);
> +	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
>  	def.type = COLL_TYPE_ICU;
>  
>  	struct coll *coll1 = coll_new(&def);
> @@ -176,7 +171,7 @@ cache_test()
>  	is(coll1, coll2,
>  	   "collations with the same definition are not duplicated");
>  	coll_unref(coll2);
> -	def.locale = "en_EN";
> +	snprintf(def.locale, sizeof(def.locale), "%s", "en_EN");
>  	coll2 = coll_new(&def);
>  	isnt(coll1, coll2,
>  	     "collations with different definitions are different objects");
> @@ -199,4 +194,4 @@ main(int, const char**)
>  	fiber_free();
>  	memory_free();
>  	coll_free();
> -}
> \ No newline at end of file
> +}
> -- 
> 2.15.1 (Apple Git-101)

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

      reply	other threads:[~2018-05-18 14:38 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-18 11:33 [tarantool-patches] " Vladislav Shpilevoy
2018-05-18 14:36 ` Konstantin Osipov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180518143628.GD26429@atlas \
    --to=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='[tarantool-patches] Re: [PATCH 1/1] collation: refactoring' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox