[tarantool-patches] [PATCH 1/1] collation: refactoring
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri May 18 14:33:36 MSK 2018
Simplify collation code.
---
Branch: https://github.com/tarantool/tarantool/tree/coll-refactoring
Issue: none
src/box/alter.cc | 29 +++++++++++++++--------------
src/coll.c | 35 ++++++++++++-----------------------
src/coll.h | 9 ++-------
src/coll_def.h | 10 +++++++---
test/unit/coll.cpp | 19 +++++++------------
5 files changed, 43 insertions(+), 59 deletions(-)
diff --git a/src/box/alter.cc b/src/box/alter.cc
index 7858af989..518f515bc 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -2294,6 +2294,11 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
def->id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_ID);
def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len);
def->name_len = name_len;
+ if (name_len > BOX_NAME_MAX)
+ tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
+ "collation name is too long");
+ identifier_check_xc(def->name, name_len);
+
def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID);
struct coll_def *base = &def->base;
const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE,
@@ -2302,23 +2307,19 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
if (base->type == coll_type_MAX)
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"unknown collation type");
- base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
- &locale_len);
- base->locale_len = locale_len;
- const char *options =
- tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
- MP_MAP);
-
- if (name_len > BOX_NAME_MAX)
- tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
- "collation name is too long");
- if (locale_len > BOX_NAME_MAX)
+ const char *locale =
+ tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
+ &locale_len);
+ if (locale_len > COLL_LOCALE_LEN_MAX)
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"collation locale is too long");
- /* Locale is an optional argument and can be NULL. */
if (locale_len > 0)
- identifier_check_xc(base->locale, locale_len);
- identifier_check_xc(def->name, name_len);
+ identifier_check_xc(locale, locale_len);
+ snprintf(base->locale, sizeof(base->locale), "%.*s", locale_len,
+ locale);
+ const char *options =
+ tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
+ MP_MAP);
assert(base->type == COLL_TYPE_ICU);
if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
diff --git a/src/coll.c b/src/coll.c
index d0e36827d..6a76f1f0b 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -66,11 +66,7 @@ struct mh_coll_node_t {
/** Table fingerprint -> collation. */
static struct mh_coll_t *coll_cache = NULL;
-enum {
- MAX_LOCALE = 1024,
-};
-
-static_assert(MAX_LOCALE <= TT_STATIC_BUF_LEN,
+static_assert(COLL_LOCALE_LEN_MAX <= TT_STATIC_BUF_LEN,
"static buf is used to 0-terminate locale name");
/** Compare two string using ICU collation. */
@@ -78,19 +74,19 @@ static int
coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
const struct coll *coll)
{
- assert(coll->icu.collator != NULL);
+ assert(coll->collator != NULL);
UErrorCode status = U_ZERO_ERROR;
#ifdef HAVE_ICU_STRCOLLUTF8
- UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
- s, slen, t, tlen, &status);
+ UCollationResult result = ucol_strcollUTF8(coll->collator, s, slen, t,
+ tlen, &status);
#else
UCharIterator s_iter, t_iter;
uiter_setUTF8(&s_iter, s, slen);
uiter_setUTF8(&t_iter, t, tlen);
- UCollationResult result = ucol_strcollIter(coll->icu.collator,
- &s_iter, &t_iter, &status);
+ UCollationResult result = ucol_strcollIter(coll->collator, &s_iter,
+ &t_iter, &status);
#endif
assert(!U_FAILURE(status));
return (int)result;
@@ -109,7 +105,7 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
UErrorCode status = U_ZERO_ERROR;
int32_t got;
do {
- got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf,
+ got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
TT_STATIC_BUF_LEN, &status);
PMurHash32_Process(ph, pcarry, buf, got);
total_size += got;
@@ -127,20 +123,13 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
static int
coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
{
- if (def->locale_len >= MAX_LOCALE) {
- diag_set(CollationError, "too long locale");
- return -1;
- }
- char *locale = tt_static_buf();
- memcpy(locale, def->locale, def->locale_len);
- locale[def->locale_len] = '\0';
UErrorCode status = U_ZERO_ERROR;
- struct UCollator *collator = ucol_open(locale, &status);
+ struct UCollator *collator = ucol_open(def->locale, &status);
if (U_FAILURE(status)) {
diag_set(CollationError, u_errorName(status));
return -1;
}
- coll->icu.collator = collator;
+ coll->collator = collator;
if (def->icu.french_collation != COLL_ICU_DEFAULT) {
enum coll_icu_on_off w = def->icu.french_collation;
@@ -273,8 +262,8 @@ static int
coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def)
{
int total = 0;
- SNPRINT(total, snprintf, buffer, size, "{locale: %.*s, type = %d, "\
- "icu: ", (int) def->locale_len, def->locale, (int) def->type);
+ SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\
+ "icu: ", def->locale, (int) def->type);
SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu);
SNPRINT(total, snprintf, buffer, size, "}");
return total;
@@ -331,7 +320,7 @@ coll_unref(struct coll *coll)
len, mh_strn_hash(coll->fingerprint, len), coll
};
mh_coll_remove(coll_cache, &node, NULL);
- ucol_close(coll->icu.collator);
+ ucol_close(coll->collator);
free(coll);
}
}
diff --git a/src/coll.h b/src/coll.h
index 7e950d164..9c725712a 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -47,13 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
uint32_t *pcarry, struct coll *coll);
-/** ICU collation specific data. */
struct UCollator;
-struct coll_icu {
- struct UCollator *collator;
-};
-
/**
* Collation. It has no unique features like name, id or owner.
* Only functional part - comparator, locale, ICU settings.
@@ -61,8 +56,8 @@ struct coll_icu {
struct coll {
/** Collation type. */
enum coll_type type;
- /** Type specific data. */
- struct coll_icu icu;
+ /** ICU collation specific data. */
+ struct UCollator *collator;
/** String comparator. */
coll_cmp_f cmp;
coll_hash_f hash;
diff --git a/src/coll_def.h b/src/coll_def.h
index 10dbc860e..7c20abf66 100644
--- a/src/coll_def.h
+++ b/src/coll_def.h
@@ -41,6 +41,11 @@ enum coll_type {
extern const char *coll_type_strs[];
+/** Maximal length of locale name. */
+enum {
+ COLL_LOCALE_LEN_MAX = 16,
+};
+
/*
* ICU collation options. See
* http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
@@ -103,13 +108,12 @@ struct coll_icu_def {
/** Collation definition. */
struct coll_def {
- /** Locale. */
- size_t locale_len;
- const char *locale;
/** Collation type. */
enum coll_type type;
/** Type specific options. */
struct coll_icu_def icu;
+ /** Locale. */
+ char locale[COLL_LOCALE_LEN_MAX + 1];
};
#endif /* TARANTOOL_COLL_DEF_H_INCLUDED */
diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
index eeee739b7..94374a7b0 100644
--- a/test/unit/coll.cpp
+++ b/test/unit/coll.cpp
@@ -49,8 +49,7 @@ manual_test()
vector<const char *> strings;
struct coll_def def;
memset(&def, 0, sizeof(def));
- def.locale = "ru_RU";
- def.locale_len = strlen(def.locale);
+ snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
def.type = COLL_TYPE_ICU;
struct coll *coll;
@@ -95,8 +94,7 @@ manual_test()
coll_unref(coll);
cout << " -- en_EN -- " << endl;
- def.locale = "en_EN-EN";
- def.locale_len = strlen(def.locale);
+ snprintf(def.locale, sizeof(def.locale), "%s", "en_EN-EN");
coll = coll_new(&def);
assert(coll != NULL);
strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
@@ -104,8 +102,7 @@ manual_test()
coll_unref(coll);
cout << " -- cs_CZ -- " << endl;
- def.locale = "cs_CZ";
- def.locale_len = strlen(def.locale);
+ snprintf(def.locale, sizeof(def.locale), "%s", "cs_CZ");
coll = coll_new(&def);
assert(coll != NULL);
strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" };
@@ -132,8 +129,7 @@ hash_test()
struct coll_def def;
memset(&def, 0, sizeof(def));
- def.locale = "ru_RU";
- def.locale_len = strlen(def.locale);
+ snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
def.type = COLL_TYPE_ICU;
struct coll *coll;
@@ -167,8 +163,7 @@ cache_test()
struct coll_def def;
memset(&def, 0, sizeof(def));
- def.locale = "ru_RU";
- def.locale_len = strlen(def.locale);
+ snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
def.type = COLL_TYPE_ICU;
struct coll *coll1 = coll_new(&def);
@@ -176,7 +171,7 @@ cache_test()
is(coll1, coll2,
"collations with the same definition are not duplicated");
coll_unref(coll2);
- def.locale = "en_EN";
+ snprintf(def.locale, sizeof(def.locale), "%s", "en_EN");
coll2 = coll_new(&def);
isnt(coll1, coll2,
"collations with different definitions are different objects");
@@ -199,4 +194,4 @@ main(int, const char**)
fiber_free();
memory_free();
coll_free();
-}
\ No newline at end of file
+}
--
2.15.1 (Apple Git-101)
More information about the Tarantool-patches
mailing list