From: Konstantin Osipov <kostja@tarantool.org> To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Cc: tarantool-patches@freelists.org Subject: [tarantool-patches] Re: [PATCH 1/1] collation: refactoring Date: Fri, 18 May 2018 17:36:28 +0300 [thread overview] Message-ID: <20180518143628.GD26429@atlas> (raw) In-Reply-To: <4e9bf7b49b322fcbc66d1f40722132c367ccbf10.1526643169.git.v.shpilevoy@tarantool.org> * Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [18/05/18 14:58]: > Simplify collation code. > --- > Branch: https://github.com/tarantool/tarantool/tree/coll-refactoring > Issue: none > > src/box/alter.cc | 29 +++++++++++++++-------------- > src/coll.c | 35 ++++++++++++----------------------- > src/coll.h | 9 ++------- > src/coll_def.h | 10 +++++++--- > test/unit/coll.cpp | 19 +++++++------------ > 5 files changed, 43 insertions(+), 59 deletions(-) This is OK to push. > > diff --git a/src/box/alter.cc b/src/box/alter.cc > index 7858af989..518f515bc 100644 > --- a/src/box/alter.cc > +++ b/src/box/alter.cc > @@ -2294,6 +2294,11 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def) > def->id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_ID); > def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len); > def->name_len = name_len; > + if (name_len > BOX_NAME_MAX) > + tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, > + "collation name is too long"); > + identifier_check_xc(def->name, name_len); > + > def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID); > struct coll_def *base = &def->base; > const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE, > @@ -2302,23 +2307,19 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def) > if (base->type == coll_type_MAX) > tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, > "unknown collation type"); > - base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE, > - &locale_len); > - base->locale_len = locale_len; > - const char *options = > - tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS, > - MP_MAP); > - > - if (name_len > BOX_NAME_MAX) > - tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, > - "collation name is too long"); > - if (locale_len > BOX_NAME_MAX) > + const char *locale = > + tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE, > + &locale_len); > + if (locale_len > COLL_LOCALE_LEN_MAX) > tnt_raise(ClientError, ER_CANT_CREATE_COLLATION, > "collation locale is too long"); > - /* Locale is an optional argument and can be NULL. */ > if (locale_len > 0) > - identifier_check_xc(base->locale, locale_len); > - identifier_check_xc(def->name, name_len); > + identifier_check_xc(locale, locale_len); > + snprintf(base->locale, sizeof(base->locale), "%.*s", locale_len, > + locale); > + const char *options = > + tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS, > + MP_MAP); > > assert(base->type == COLL_TYPE_ICU); > if (opts_decode(&base->icu, coll_icu_opts_reg, &options, > diff --git a/src/coll.c b/src/coll.c > index d0e36827d..6a76f1f0b 100644 > --- a/src/coll.c > +++ b/src/coll.c > @@ -66,11 +66,7 @@ struct mh_coll_node_t { > /** Table fingerprint -> collation. */ > static struct mh_coll_t *coll_cache = NULL; > > -enum { > - MAX_LOCALE = 1024, > -}; > - > -static_assert(MAX_LOCALE <= TT_STATIC_BUF_LEN, > +static_assert(COLL_LOCALE_LEN_MAX <= TT_STATIC_BUF_LEN, > "static buf is used to 0-terminate locale name"); > > /** Compare two string using ICU collation. */ > @@ -78,19 +74,19 @@ static int > coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen, > const struct coll *coll) > { > - assert(coll->icu.collator != NULL); > + assert(coll->collator != NULL); > > UErrorCode status = U_ZERO_ERROR; > > #ifdef HAVE_ICU_STRCOLLUTF8 > - UCollationResult result = ucol_strcollUTF8(coll->icu.collator, > - s, slen, t, tlen, &status); > + UCollationResult result = ucol_strcollUTF8(coll->collator, s, slen, t, > + tlen, &status); > #else > UCharIterator s_iter, t_iter; > uiter_setUTF8(&s_iter, s, slen); > uiter_setUTF8(&t_iter, t, tlen); > - UCollationResult result = ucol_strcollIter(coll->icu.collator, > - &s_iter, &t_iter, &status); > + UCollationResult result = ucol_strcollIter(coll->collator, &s_iter, > + &t_iter, &status); > #endif > assert(!U_FAILURE(status)); > return (int)result; > @@ -109,7 +105,7 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, > UErrorCode status = U_ZERO_ERROR; > int32_t got; > do { > - got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf, > + got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, > TT_STATIC_BUF_LEN, &status); > PMurHash32_Process(ph, pcarry, buf, got); > total_size += got; > @@ -127,20 +123,13 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, > static int > coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) > { > - if (def->locale_len >= MAX_LOCALE) { > - diag_set(CollationError, "too long locale"); > - return -1; > - } > - char *locale = tt_static_buf(); > - memcpy(locale, def->locale, def->locale_len); > - locale[def->locale_len] = '\0'; > UErrorCode status = U_ZERO_ERROR; > - struct UCollator *collator = ucol_open(locale, &status); > + struct UCollator *collator = ucol_open(def->locale, &status); > if (U_FAILURE(status)) { > diag_set(CollationError, u_errorName(status)); > return -1; > } > - coll->icu.collator = collator; > + coll->collator = collator; > > if (def->icu.french_collation != COLL_ICU_DEFAULT) { > enum coll_icu_on_off w = def->icu.french_collation; > @@ -273,8 +262,8 @@ static int > coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def) > { > int total = 0; > - SNPRINT(total, snprintf, buffer, size, "{locale: %.*s, type = %d, "\ > - "icu: ", (int) def->locale_len, def->locale, (int) def->type); > + SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\ > + "icu: ", def->locale, (int) def->type); > SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu); > SNPRINT(total, snprintf, buffer, size, "}"); > return total; > @@ -331,7 +320,7 @@ coll_unref(struct coll *coll) > len, mh_strn_hash(coll->fingerprint, len), coll > }; > mh_coll_remove(coll_cache, &node, NULL); > - ucol_close(coll->icu.collator); > + ucol_close(coll->collator); > free(coll); > } > } > diff --git a/src/coll.h b/src/coll.h > index 7e950d164..9c725712a 100644 > --- a/src/coll.h > +++ b/src/coll.h > @@ -47,13 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, > typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, > uint32_t *pcarry, struct coll *coll); > > -/** ICU collation specific data. */ > struct UCollator; > > -struct coll_icu { > - struct UCollator *collator; > -}; > - > /** > * Collation. It has no unique features like name, id or owner. > * Only functional part - comparator, locale, ICU settings. > @@ -61,8 +56,8 @@ struct coll_icu { > struct coll { > /** Collation type. */ > enum coll_type type; > - /** Type specific data. */ > - struct coll_icu icu; > + /** ICU collation specific data. */ > + struct UCollator *collator; > /** String comparator. */ > coll_cmp_f cmp; > coll_hash_f hash; > diff --git a/src/coll_def.h b/src/coll_def.h > index 10dbc860e..7c20abf66 100644 > --- a/src/coll_def.h > +++ b/src/coll_def.h > @@ -41,6 +41,11 @@ enum coll_type { > > extern const char *coll_type_strs[]; > > +/** Maximal length of locale name. */ > +enum { > + COLL_LOCALE_LEN_MAX = 16, > +}; > + > /* > * ICU collation options. See > * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c > @@ -103,13 +108,12 @@ struct coll_icu_def { > > /** Collation definition. */ > struct coll_def { > - /** Locale. */ > - size_t locale_len; > - const char *locale; > /** Collation type. */ > enum coll_type type; > /** Type specific options. */ > struct coll_icu_def icu; > + /** Locale. */ > + char locale[COLL_LOCALE_LEN_MAX + 1]; > }; > > #endif /* TARANTOOL_COLL_DEF_H_INCLUDED */ > diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp > index eeee739b7..94374a7b0 100644 > --- a/test/unit/coll.cpp > +++ b/test/unit/coll.cpp > @@ -49,8 +49,7 @@ manual_test() > vector<const char *> strings; > struct coll_def def; > memset(&def, 0, sizeof(def)); > - def.locale = "ru_RU"; > - def.locale_len = strlen(def.locale); > + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); > def.type = COLL_TYPE_ICU; > struct coll *coll; > > @@ -95,8 +94,7 @@ manual_test() > coll_unref(coll); > > cout << " -- en_EN -- " << endl; > - def.locale = "en_EN-EN"; > - def.locale_len = strlen(def.locale); > + snprintf(def.locale, sizeof(def.locale), "%s", "en_EN-EN"); > coll = coll_new(&def); > assert(coll != NULL); > strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; > @@ -104,8 +102,7 @@ manual_test() > coll_unref(coll); > > cout << " -- cs_CZ -- " << endl; > - def.locale = "cs_CZ"; > - def.locale_len = strlen(def.locale); > + snprintf(def.locale, sizeof(def.locale), "%s", "cs_CZ"); > coll = coll_new(&def); > assert(coll != NULL); > strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; > @@ -132,8 +129,7 @@ hash_test() > > struct coll_def def; > memset(&def, 0, sizeof(def)); > - def.locale = "ru_RU"; > - def.locale_len = strlen(def.locale); > + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); > def.type = COLL_TYPE_ICU; > struct coll *coll; > > @@ -167,8 +163,7 @@ cache_test() > > struct coll_def def; > memset(&def, 0, sizeof(def)); > - def.locale = "ru_RU"; > - def.locale_len = strlen(def.locale); > + snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); > def.type = COLL_TYPE_ICU; > > struct coll *coll1 = coll_new(&def); > @@ -176,7 +171,7 @@ cache_test() > is(coll1, coll2, > "collations with the same definition are not duplicated"); > coll_unref(coll2); > - def.locale = "en_EN"; > + snprintf(def.locale, sizeof(def.locale), "%s", "en_EN"); > coll2 = coll_new(&def); > isnt(coll1, coll2, > "collations with different definitions are different objects"); > @@ -199,4 +194,4 @@ main(int, const char**) > fiber_free(); > memory_free(); > coll_free(); > -} > \ No newline at end of file > +} > -- > 2.15.1 (Apple Git-101) -- Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 http://tarantool.io - www.twitter.com/kostja_osipov
prev parent reply other threads:[~2018-05-18 14:38 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-05-18 11:33 [tarantool-patches] " Vladislav Shpilevoy 2018-05-18 14:36 ` Konstantin Osipov [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20180518143628.GD26429@atlas \ --to=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=v.shpilevoy@tarantool.org \ --subject='[tarantool-patches] Re: [PATCH 1/1] collation: refactoring' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox