[tarantool-patches] [PATCH 1/1] collation: refactoring

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri May 18 14:33:36 MSK 2018


Simplify collation code.
---
Branch: https://github.com/tarantool/tarantool/tree/coll-refactoring
Issue: none

 src/box/alter.cc   | 29 +++++++++++++++--------------
 src/coll.c         | 35 ++++++++++++-----------------------
 src/coll.h         |  9 ++-------
 src/coll_def.h     | 10 +++++++---
 test/unit/coll.cpp | 19 +++++++------------
 5 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 7858af989..518f515bc 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -2294,6 +2294,11 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
 	def->id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_ID);
 	def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len);
 	def->name_len = name_len;
+	if (name_len > BOX_NAME_MAX)
+		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
+			  "collation name is too long");
+	identifier_check_xc(def->name, name_len);
+
 	def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID);
 	struct coll_def *base = &def->base;
 	const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE,
@@ -2302,23 +2307,19 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
 	if (base->type == coll_type_MAX)
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "unknown collation type");
-	base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
-					  &locale_len);
-	base->locale_len = locale_len;
-	const char *options =
-		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
-					 MP_MAP);
-
-	if (name_len > BOX_NAME_MAX)
-		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
-			  "collation name is too long");
-	if (locale_len > BOX_NAME_MAX)
+	const char *locale =
+		tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
+				   &locale_len);
+	if (locale_len > COLL_LOCALE_LEN_MAX)
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "collation locale is too long");
-	/* Locale is an optional argument and can be NULL. */
 	if (locale_len > 0)
-		identifier_check_xc(base->locale, locale_len);
-	identifier_check_xc(def->name, name_len);
+		identifier_check_xc(locale, locale_len);
+	snprintf(base->locale, sizeof(base->locale), "%.*s", locale_len,
+		 locale);
+	const char *options =
+		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
+					 MP_MAP);
 
 	assert(base->type == COLL_TYPE_ICU);
 	if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
diff --git a/src/coll.c b/src/coll.c
index d0e36827d..6a76f1f0b 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -66,11 +66,7 @@ struct mh_coll_node_t {
 /** Table fingerprint -> collation. */
 static struct mh_coll_t *coll_cache = NULL;
 
-enum {
-	MAX_LOCALE = 1024,
-};
-
-static_assert(MAX_LOCALE <= TT_STATIC_BUF_LEN,
+static_assert(COLL_LOCALE_LEN_MAX <= TT_STATIC_BUF_LEN,
 	      "static buf is used to 0-terminate locale name");
 
 /** Compare two string using ICU collation. */
@@ -78,19 +74,19 @@ static int
 coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
 	     const struct coll *coll)
 {
-	assert(coll->icu.collator != NULL);
+	assert(coll->collator != NULL);
 
 	UErrorCode status = U_ZERO_ERROR;
 
 #ifdef HAVE_ICU_STRCOLLUTF8
-	UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
-						   s, slen, t, tlen, &status);
+	UCollationResult result = ucol_strcollUTF8(coll->collator, s, slen, t,
+						   tlen, &status);
 #else
 	UCharIterator s_iter, t_iter;
 	uiter_setUTF8(&s_iter, s, slen);
 	uiter_setUTF8(&t_iter, t, tlen);
-	UCollationResult result = ucol_strcollIter(coll->icu.collator,
-						   &s_iter, &t_iter, &status);
+	UCollationResult result = ucol_strcollIter(coll->collator, &s_iter,
+						   &t_iter, &status);
 #endif
 	assert(!U_FAILURE(status));
 	return (int)result;
@@ -109,7 +105,7 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 	UErrorCode status = U_ZERO_ERROR;
 	int32_t got;
 	do {
-		got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf,
+		got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
 					   TT_STATIC_BUF_LEN, &status);
 		PMurHash32_Process(ph, pcarry, buf, got);
 		total_size += got;
@@ -127,20 +123,13 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 static int
 coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
 {
-	if (def->locale_len >= MAX_LOCALE) {
-		diag_set(CollationError, "too long locale");
-		return -1;
-	}
-	char *locale = tt_static_buf();
-	memcpy(locale, def->locale, def->locale_len);
-	locale[def->locale_len] = '\0';
 	UErrorCode status = U_ZERO_ERROR;
-	struct UCollator *collator = ucol_open(locale, &status);
+	struct UCollator *collator = ucol_open(def->locale, &status);
 	if (U_FAILURE(status)) {
 		diag_set(CollationError, u_errorName(status));
 		return -1;
 	}
-	coll->icu.collator = collator;
+	coll->collator = collator;
 
 	if (def->icu.french_collation != COLL_ICU_DEFAULT) {
 		enum coll_icu_on_off w = def->icu.french_collation;
@@ -273,8 +262,8 @@ static int
 coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def)
 {
 	int total = 0;
-	SNPRINT(total, snprintf, buffer, size, "{locale: %.*s, type = %d, "\
-	        "icu: ", (int) def->locale_len, def->locale, (int) def->type);
+	SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\
+	        "icu: ", def->locale, (int) def->type);
 	SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu);
 	SNPRINT(total, snprintf, buffer, size, "}");
 	return total;
@@ -331,7 +320,7 @@ coll_unref(struct coll *coll)
 			len, mh_strn_hash(coll->fingerprint, len), coll
 		};
 		mh_coll_remove(coll_cache, &node, NULL);
-		ucol_close(coll->icu.collator);
+		ucol_close(coll->collator);
 		free(coll);
 	}
 }
diff --git a/src/coll.h b/src/coll.h
index 7e950d164..9c725712a 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -47,13 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
 typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
 				uint32_t *pcarry, struct coll *coll);
 
-/** ICU collation specific data. */
 struct UCollator;
 
-struct coll_icu {
-	struct UCollator *collator;
-};
-
 /**
  * Collation. It has no unique features like name, id or owner.
  * Only functional part - comparator, locale, ICU settings.
@@ -61,8 +56,8 @@ struct coll_icu {
 struct coll {
 	/** Collation type. */
 	enum coll_type type;
-	/** Type specific data. */
-	struct coll_icu icu;
+	/** ICU collation specific data. */
+	struct UCollator *collator;
 	/** String comparator. */
 	coll_cmp_f cmp;
 	coll_hash_f hash;
diff --git a/src/coll_def.h b/src/coll_def.h
index 10dbc860e..7c20abf66 100644
--- a/src/coll_def.h
+++ b/src/coll_def.h
@@ -41,6 +41,11 @@ enum coll_type {
 
 extern const char *coll_type_strs[];
 
+/** Maximal length of locale name. */
+enum {
+	COLL_LOCALE_LEN_MAX = 16,
+};
+
 /*
  * ICU collation options. See
  * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
@@ -103,13 +108,12 @@ struct coll_icu_def {
 
 /** Collation definition. */
 struct coll_def {
-	/** Locale. */
-	size_t locale_len;
-	const char *locale;
 	/** Collation type. */
 	enum coll_type type;
 	/** Type specific options. */
 	struct coll_icu_def icu;
+	/** Locale. */
+	char locale[COLL_LOCALE_LEN_MAX + 1];
 };
 
 #endif /* TARANTOOL_COLL_DEF_H_INCLUDED */
diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
index eeee739b7..94374a7b0 100644
--- a/test/unit/coll.cpp
+++ b/test/unit/coll.cpp
@@ -49,8 +49,7 @@ manual_test()
 	vector<const char *> strings;
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
-	def.locale = "ru_RU";
-	def.locale_len = strlen(def.locale);
+	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
 	def.type = COLL_TYPE_ICU;
 	struct coll *coll;
 
@@ -95,8 +94,7 @@ manual_test()
 	coll_unref(coll);
 
 	cout << " -- en_EN -- " << endl;
-	def.locale = "en_EN-EN";
-	def.locale_len = strlen(def.locale);
+	snprintf(def.locale, sizeof(def.locale), "%s", "en_EN-EN");
 	coll = coll_new(&def);
 	assert(coll != NULL);
 	strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
@@ -104,8 +102,7 @@ manual_test()
 	coll_unref(coll);
 
 	cout << " -- cs_CZ -- " << endl;
-	def.locale = "cs_CZ";
-	def.locale_len = strlen(def.locale);
+	snprintf(def.locale, sizeof(def.locale), "%s", "cs_CZ");
 	coll = coll_new(&def);
 	assert(coll != NULL);
 	strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
@@ -132,8 +129,7 @@ hash_test()
 
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
-	def.locale = "ru_RU";
-	def.locale_len = strlen(def.locale);
+	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
 	def.type = COLL_TYPE_ICU;
 	struct coll *coll;
 
@@ -167,8 +163,7 @@ cache_test()
 
 	struct coll_def def;
 	memset(&def, 0, sizeof(def));
-	def.locale = "ru_RU";
-	def.locale_len = strlen(def.locale);
+	snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
 	def.type = COLL_TYPE_ICU;
 
 	struct coll *coll1 = coll_new(&def);
@@ -176,7 +171,7 @@ cache_test()
 	is(coll1, coll2,
 	   "collations with the same definition are not duplicated");
 	coll_unref(coll2);
-	def.locale = "en_EN";
+	snprintf(def.locale, sizeof(def.locale), "%s", "en_EN");
 	coll2 = coll_new(&def);
 	isnt(coll1, coll2,
 	     "collations with different definitions are different objects");
@@ -199,4 +194,4 @@ main(int, const char**)
 	fiber_free();
 	memory_free();
 	coll_free();
-}
\ No newline at end of file
+}
-- 
2.15.1 (Apple Git-101)





More information about the Tarantool-patches mailing list