From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id C038525691 for ; Fri, 18 May 2018 07:33:40 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id ITP5K0e_38qP for ; Fri, 18 May 2018 07:33:40 -0400 (EDT) Received: from smtp20.mail.ru (smtp20.mail.ru [94.100.179.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id E177D2568E for ; Fri, 18 May 2018 07:33:39 -0400 (EDT) From: Vladislav Shpilevoy Subject: [tarantool-patches] [PATCH 1/1] collation: refactoring Date: Fri, 18 May 2018 14:33:36 +0300 Message-Id: <4e9bf7b49b322fcbc66d1f40722132c367ccbf10.1526643169.git.v.shpilevoy@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Simplify collation code. --- Branch: https://github.com/tarantool/tarantool/tree/coll-refactoring Issue: none src/box/alter.cc | 29 +++++++++++++++-------------- src/coll.c | 35 ++++++++++++----------------------- src/coll.h | 9 ++------- src/coll_def.h | 10 +++++++--- test/unit/coll.cpp | 19 +++++++------------ 5 files changed, 43 insertions(+), 59 deletions(-) diff --git a/src/box/alter.cc b/src/box/alter.cc index 7858af989..518f515bc 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -2294,6 +2294,11 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def) def->id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_ID); def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len); def->name_len = name_len; + if (name_len > BOX_NAME_MAX) + tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, + "collation name is too long"); + identifier_check_xc(def->name, name_len); + def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID); struct coll_def *base = &def->base; const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE, @@ -2302,23 +2307,19 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def) if (base->type == coll_type_MAX) tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, "unknown collation type"); - base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE, - &locale_len); - base->locale_len = locale_len; - const char *options = - tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS, - MP_MAP); - - if (name_len > BOX_NAME_MAX) - tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, - "collation name is too long"); - if (locale_len > BOX_NAME_MAX) + const char *locale = + tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE, + &locale_len); + if (locale_len > COLL_LOCALE_LEN_MAX) tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, "collation locale is too long"); - /* Locale is an optional argument and can be NULL. */ if (locale_len > 0) - identifier_check_xc(base->locale, locale_len); - identifier_check_xc(def->name, name_len); + identifier_check_xc(locale, locale_len); + snprintf(base->locale, sizeof(base->locale), "%.*s", locale_len, + locale); + const char *options = + tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS, + MP_MAP); assert(base->type == COLL_TYPE_ICU); if (opts_decode(&base->icu, coll_icu_opts_reg, &options, diff --git a/src/coll.c b/src/coll.c index d0e36827d..6a76f1f0b 100644 --- a/src/coll.c +++ b/src/coll.c @@ -66,11 +66,7 @@ struct mh_coll_node_t { /** Table fingerprint -> collation. */ static struct mh_coll_t *coll_cache = NULL; -enum { - MAX_LOCALE = 1024, -}; - -static_assert(MAX_LOCALE <= TT_STATIC_BUF_LEN, +static_assert(COLL_LOCALE_LEN_MAX <= TT_STATIC_BUF_LEN, "static buf is used to 0-terminate locale name"); /** Compare two string using ICU collation. */ @@ -78,19 +74,19 @@ static int coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen, const struct coll *coll) { - assert(coll->icu.collator != NULL); + assert(coll->collator != NULL); UErrorCode status = U_ZERO_ERROR; #ifdef HAVE_ICU_STRCOLLUTF8 - UCollationResult result = ucol_strcollUTF8(coll->icu.collator, - s, slen, t, tlen, &status); + UCollationResult result = ucol_strcollUTF8(coll->collator, s, slen, t, + tlen, &status); #else UCharIterator s_iter, t_iter; uiter_setUTF8(&s_iter, s, slen); uiter_setUTF8(&t_iter, t, tlen); - UCollationResult result = ucol_strcollIter(coll->icu.collator, - &s_iter, &t_iter, &status); + UCollationResult result = ucol_strcollIter(coll->collator, &s_iter, + &t_iter, &status); #endif assert(!U_FAILURE(status)); return (int)result; @@ -109,7 +105,7 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, UErrorCode status = U_ZERO_ERROR; int32_t got; do { - got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf, + got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, TT_STATIC_BUF_LEN, &status); PMurHash32_Process(ph, pcarry, buf, got); total_size += got; @@ -127,20 +123,13 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, static int coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) { - if (def->locale_len >= MAX_LOCALE) { - diag_set(CollationError, "too long locale"); - return -1; - } - char *locale = tt_static_buf(); - memcpy(locale, def->locale, def->locale_len); - locale[def->locale_len] = '\0'; UErrorCode status = U_ZERO_ERROR; - struct UCollator *collator = ucol_open(locale, &status); + struct UCollator *collator = ucol_open(def->locale, &status); if (U_FAILURE(status)) { diag_set(CollationError, u_errorName(status)); return -1; } - coll->icu.collator = collator; + coll->collator = collator; if (def->icu.french_collation != COLL_ICU_DEFAULT) { enum coll_icu_on_off w = def->icu.french_collation; @@ -273,8 +262,8 @@ static int coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def) { int total = 0; - SNPRINT(total, snprintf, buffer, size, "{locale: %.*s, type = %d, "\ - "icu: ", (int) def->locale_len, def->locale, (int) def->type); + SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\ + "icu: ", def->locale, (int) def->type); SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu); SNPRINT(total, snprintf, buffer, size, "}"); return total; @@ -331,7 +320,7 @@ coll_unref(struct coll *coll) len, mh_strn_hash(coll->fingerprint, len), coll }; mh_coll_remove(coll_cache, &node, NULL); - ucol_close(coll->icu.collator); + ucol_close(coll->collator); free(coll); } } diff --git a/src/coll.h b/src/coll.h index 7e950d164..9c725712a 100644 --- a/src/coll.h +++ b/src/coll.h @@ -47,13 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); -/** ICU collation specific data. */ struct UCollator; -struct coll_icu { - struct UCollator *collator; -}; - /** * Collation. It has no unique features like name, id or owner. * Only functional part - comparator, locale, ICU settings. @@ -61,8 +56,8 @@ struct coll_icu { struct coll { /** Collation type. */ enum coll_type type; - /** Type specific data. */ - struct coll_icu icu; + /** ICU collation specific data. */ + struct UCollator *collator; /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; diff --git a/src/coll_def.h b/src/coll_def.h index 10dbc860e..7c20abf66 100644 --- a/src/coll_def.h +++ b/src/coll_def.h @@ -41,6 +41,11 @@ enum coll_type { extern const char *coll_type_strs[]; +/** Maximal length of locale name. */ +enum { + COLL_LOCALE_LEN_MAX = 16, +}; + /* * ICU collation options. See * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c @@ -103,13 +108,12 @@ struct coll_icu_def { /** Collation definition. */ struct coll_def { - /** Locale. */ - size_t locale_len; - const char *locale; /** Collation type. */ enum coll_type type; /** Type specific options. */ struct coll_icu_def icu; + /** Locale. */ + char locale[COLL_LOCALE_LEN_MAX + 1]; }; #endif /* TARANTOOL_COLL_DEF_H_INCLUDED */ diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp index eeee739b7..94374a7b0 100644 --- a/test/unit/coll.cpp +++ b/test/unit/coll.cpp @@ -49,8 +49,7 @@ manual_test() vector strings; struct coll_def def; memset(&def, 0, sizeof(def)); - def.locale = "ru_RU"; - def.locale_len = strlen(def.locale); + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); def.type = COLL_TYPE_ICU; struct coll *coll; @@ -95,8 +94,7 @@ manual_test() coll_unref(coll); cout << " -- en_EN -- " << endl; - def.locale = "en_EN-EN"; - def.locale_len = strlen(def.locale); + snprintf(def.locale, sizeof(def.locale), "%s", "en_EN-EN"); coll = coll_new(&def); assert(coll != NULL); strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; @@ -104,8 +102,7 @@ manual_test() coll_unref(coll); cout << " -- cs_CZ -- " << endl; - def.locale = "cs_CZ"; - def.locale_len = strlen(def.locale); + snprintf(def.locale, sizeof(def.locale), "%s", "cs_CZ"); coll = coll_new(&def); assert(coll != NULL); strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; @@ -132,8 +129,7 @@ hash_test() struct coll_def def; memset(&def, 0, sizeof(def)); - def.locale = "ru_RU"; - def.locale_len = strlen(def.locale); + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); def.type = COLL_TYPE_ICU; struct coll *coll; @@ -167,8 +163,7 @@ cache_test() struct coll_def def; memset(&def, 0, sizeof(def)); - def.locale = "ru_RU"; - def.locale_len = strlen(def.locale); + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); def.type = COLL_TYPE_ICU; struct coll *coll1 = coll_new(&def); @@ -176,7 +171,7 @@ cache_test() is(coll1, coll2, "collations with the same definition are not duplicated"); coll_unref(coll2); - def.locale = "en_EN"; + snprintf(def.locale, sizeof(def.locale), "%s", "en_EN"); coll2 = coll_new(&def); isnt(coll1, coll2, "collations with different definitions are different objects"); @@ -199,4 +194,4 @@ main(int, const char**) fiber_free(); memory_free(); coll_free(); -} \ No newline at end of file +} -- 2.15.1 (Apple Git-101)