[tarantool-patches] [PATCH v3 2/4] collation: split collation into core and box objects
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Tue May 15 22:54:06 MSK 2018
In the issue #3290 the important problem appeared - Tarantool can
not create completely internal collations with no ID, name,
owner. Just for internal usage.
Original struct coll can not be used for this since
* it has fields that are not needed in internals;
* collation name is public thing, and the collation cache uses
it, so it would be necessary to forbid to a user usage of some
system names;
* when multiple collations has the same comparator and only their
names/owners/IDs are different, the separate UCollator objects
are created, but it would be good to be able to reference a
single one.
This patch renames coll to box_coll, coll_def to box_call_def and
introduces coll - pure collation object with no any user defined
things.
Needed for #3290.
---
src/CMakeLists.txt | 2 +
src/box/alter.cc | 72 +++++++-------
src/box/coll.c | 247 ++++-------------------------------------------
src/box/coll.h | 59 +++--------
src/box/coll_cache.c | 44 +++++----
src/box/coll_cache.h | 17 ++--
src/box/coll_def.c | 32 ------
src/box/coll_def.h | 86 +----------------
src/box/key_def.cc | 22 +++--
src/box/key_def.h | 5 +-
src/box/lua/space.cc | 8 +-
src/box/schema.cc | 8 +-
src/box/tuple.c | 4 +-
src/box/tuple_compare.cc | 5 +-
src/box/tuple_hash.cc | 4 +-
src/coll.c | 234 ++++++++++++++++++++++++++++++++++++++++++++
src/coll.h | 98 +++++++++++++++++++
src/coll_def.c | 63 ++++++++++++
src/coll_def.h | 115 ++++++++++++++++++++++
test/unit/coll.cpp | 8 +-
20 files changed, 653 insertions(+), 480 deletions(-)
create mode 100644 src/coll.c
create mode 100644 src/coll.h
create mode 100644 src/coll_def.c
create mode 100644 src/coll_def.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8ab09e968..5bf17614b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -94,6 +94,8 @@ set (core_sources
random.c
trigger.cc
http_parser.c
+ coll.c
+ coll_def.c
)
if (TARGET_OS_NETBSD)
diff --git a/src/box/alter.cc b/src/box/alter.cc
index 8766c8171..d72b9a3bb 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -35,6 +35,7 @@
#include "index.h"
#include "func.h"
#include "coll_cache.h"
+#include "coll_def.h"
#include "txn.h"
#include "tuple.h"
#include "fiber.h" /* for gc_pool */
@@ -2286,7 +2287,7 @@ on_replace_dd_func(struct trigger * /* trigger */, void *event)
/** Create a collation definition from tuple. */
void
-coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
+box_coll_def_new_from_tuple(const struct tuple *tuple, struct box_coll_def *def)
{
memset(def, 0, sizeof(*def));
uint32_t name_len, locale_len, type_len;
@@ -2294,15 +2295,16 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len);
def->name_len = name_len;
def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID);
+ struct coll_def *base = &def->base;
const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE,
&type_len);
- def->type = STRN2ENUM(coll_type, type, type_len);
- if (def->type == coll_type_MAX)
+ base->type = STRN2ENUM(coll_type, type, type_len);
+ if (base->type == coll_type_MAX)
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"unknown collation type");
- def->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
- &locale_len);
- def->locale_len = locale_len;
+ base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
+ &locale_len);
+ base->locale_len = locale_len;
const char *options =
tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
MP_MAP);
@@ -2315,53 +2317,53 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
"collation locale is too long");
/* Locale is an optional argument and can be NULL. */
if (locale_len > 0)
- identifier_check_xc(def->locale, locale_len);
+ identifier_check_xc(base->locale, locale_len);
identifier_check_xc(def->name, name_len);
- assert(def->type == COLL_TYPE_ICU); /* no more defined now */
- if (opts_decode(&def->icu, coll_icu_opts_reg, &options,
+ assert(base->type == COLL_TYPE_ICU);
+ if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
ER_WRONG_COLLATION_OPTIONS,
BOX_COLLATION_FIELD_OPTIONS, NULL) != 0)
diag_raise();
- if (def->icu.french_collation == coll_icu_on_off_MAX) {
+ if (base->icu.french_collation == coll_icu_on_off_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong french_collation option setting, "
"expected ON | OFF");
}
- if (def->icu.alternate_handling == coll_icu_alternate_handling_MAX) {
+ if (base->icu.alternate_handling == coll_icu_alternate_handling_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong alternate_handling option setting, "
"expected NON_IGNORABLE | SHIFTED");
}
- if (def->icu.case_first == coll_icu_case_first_MAX) {
+ if (base->icu.case_first == coll_icu_case_first_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong case_first option setting, "
"expected OFF | UPPER_FIRST | LOWER_FIRST");
}
- if (def->icu.case_level == coll_icu_on_off_MAX) {
+ if (base->icu.case_level == coll_icu_on_off_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong case_level option setting, "
"expected ON | OFF");
}
- if (def->icu.normalization_mode == coll_icu_on_off_MAX) {
+ if (base->icu.normalization_mode == coll_icu_on_off_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong normalization_mode option setting, "
"expected ON | OFF");
}
- if (def->icu.strength == coll_icu_strength_MAX) {
+ if (base->icu.strength == coll_icu_strength_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong strength option setting, "
"expected PRIMARY | SECONDARY | "
"TERTIARY | QUATERNARY | IDENTICAL");
}
- if (def->icu.numeric_collation == coll_icu_on_off_MAX) {
+ if (base->icu.numeric_collation == coll_icu_on_off_MAX) {
tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
"ICU wrong numeric_collation option setting, "
"expected ON | OFF");
@@ -2373,16 +2375,16 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
* A change is only INSERT or DELETE, UPDATE is not supported.
*/
static void
-coll_cache_rollback(struct trigger *trigger, void *event)
+box_coll_cache_rollback(struct trigger *trigger, void *event)
{
- struct coll *coll = (struct coll *) trigger->data;
+ struct box_coll *coll = (struct box_coll *) trigger->data;
struct txn_stmt *stmt = txn_last_stmt((struct txn*) event);
if (stmt->new_tuple == NULL) {
/* Rollback DELETE: put the collation back. */
assert(stmt->old_tuple != NULL);
- struct coll *replaced;
- if (coll_cache_replace(coll, &replaced) != 0) {
+ struct box_coll *replaced;
+ if (box_coll_cache_replace(coll, &replaced) != 0) {
panic("Out of memory on insertion into collation "\
"cache");
}
@@ -2390,19 +2392,19 @@ coll_cache_rollback(struct trigger *trigger, void *event)
} else {
/* INSERT: remove and free the new collation */
assert(stmt->old_tuple == NULL);
- coll_cache_delete(coll);
- coll_unref(coll);
+ box_coll_cache_delete(coll);
+ box_coll_delete(coll);
}
}
/** Dereference a deleted collation on commit. */
static void
-coll_cache_commit(struct trigger *trigger, void *event)
+box_coll_cache_commit(struct trigger *trigger, void *event)
{
(void) event;
- struct coll *coll = (struct coll *) trigger->data;
- coll_unref(coll);
+ struct box_coll *coll = (struct box_coll *) trigger->data;
+ box_coll_delete(coll);
}
/**
@@ -2418,15 +2420,15 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event)
struct tuple *new_tuple = stmt->new_tuple;
txn_check_singlestatement_xc(txn, "Space _collation");
struct trigger *on_rollback =
- txn_alter_trigger_new(coll_cache_rollback, NULL);
+ txn_alter_trigger_new(box_coll_cache_rollback, NULL);
struct trigger *on_commit =
- txn_alter_trigger_new(coll_cache_commit, NULL);
+ txn_alter_trigger_new(box_coll_cache_commit, NULL);
if (new_tuple == NULL && old_tuple != NULL) {
/* DELETE */
/* TODO: Check that no index uses the collation */
int32_t old_id = tuple_field_u32_xc(old_tuple,
BOX_COLLATION_FIELD_ID);
- struct coll *old_coll = coll_by_id(old_id);
+ struct box_coll *old_coll = box_coll_by_id(old_id);
assert(old_coll != NULL);
access_check_ddl(old_coll->name, old_coll->owner_id,
SC_COLLATION, PRIV_D, false);
@@ -2435,23 +2437,23 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event)
* deletion from the cache to make trigger logic
* simple..
*/
- coll_cache_delete(old_coll);
+ box_coll_cache_delete(old_coll);
on_rollback->data = old_coll;
on_commit->data = old_coll;
txn_on_rollback(txn, on_rollback);
txn_on_commit(txn, on_commit);
} else if (new_tuple != NULL && old_tuple == NULL) {
/* INSERT */
- struct coll_def new_def;
- coll_def_new_from_tuple(new_tuple, &new_def);
+ struct box_coll_def new_def;
+ box_coll_def_new_from_tuple(new_tuple, &new_def);
access_check_ddl(new_def.name, new_def.owner_id, SC_COLLATION,
PRIV_C, false);
- struct coll *new_coll = coll_new(&new_def);
+ struct box_coll *new_coll = box_coll_new(&new_def);
if (new_coll == NULL)
diag_raise();
- struct coll *replaced;
- if (coll_cache_replace(new_coll, &replaced) != 0) {
- coll_unref(new_coll);
+ struct box_coll *replaced;
+ if (box_coll_cache_replace(new_coll, &replaced) != 0) {
+ box_coll_delete(new_coll);
diag_raise();
}
assert(replaced == NULL);
diff --git a/src/box/coll.c b/src/box/coll.c
index 436d8d127..3bf3aff3c 100644
--- a/src/box/coll.c
+++ b/src/box/coll.c
@@ -28,252 +28,39 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
#include "coll.h"
-#include "third_party/PMurHash.h"
+#include <coll.h>
+#include "coll_def.h"
#include "error.h"
#include "diag.h"
-#include <unicode/ucol.h>
-#include <trivia/config.h>
-
-enum {
- MAX_HASH_BUFFER = 1024,
- MAX_LOCALE = 1024,
-};
-
-/**
- * Compare two string using ICU collation.
- */
-static int
-coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
- const struct coll *coll)
-{
- assert(coll->icu.collator != NULL);
-
- UErrorCode status = U_ZERO_ERROR;
-
-#ifdef HAVE_ICU_STRCOLLUTF8
- UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
- s, slen, t, tlen, &status);
-#else
- UCharIterator s_iter, t_iter;
- uiter_setUTF8(&s_iter, s, slen);
- uiter_setUTF8(&t_iter, t, tlen);
- UCollationResult result = ucol_strcollIter(coll->icu.collator,
- &s_iter, &t_iter, &status);
-#endif
- assert(!U_FAILURE(status));
- return (int)result;
-}
-
-/**
- * Get a hash of a string using ICU collation.
- */
-static uint32_t
-coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
- struct coll *coll)
-{
- uint32_t total_size = 0;
- UCharIterator itr;
- uiter_setUTF8(&itr, s, s_len);
- uint8_t buf[MAX_HASH_BUFFER];
- uint32_t state[2] = {0, 0};
- UErrorCode status = U_ZERO_ERROR;
- while (true) {
- int32_t got = ucol_nextSortKeyPart(coll->icu.collator,
- &itr, state, buf,
- MAX_HASH_BUFFER, &status);
- PMurHash32_Process(ph, pcarry, buf, got);
- total_size += got;
- if (got < MAX_HASH_BUFFER)
- break;
- }
- return total_size;
-}
-/**
- * Set up ICU collator and init cmp and hash members of collation.
- * @param coll - collation to set up.
- * @param def - collation definition.
- * @return 0 on success, -1 on error.
- */
-static int
-coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
+struct box_coll *
+box_coll_new(const struct box_coll_def *def)
{
- if (coll->icu.collator != NULL) {
- ucol_close(coll->icu.collator);
- coll->icu.collator = NULL;
- }
-
- if (def->locale_len >= MAX_LOCALE) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "too long locale");
- return -1;
- }
- char locale[MAX_LOCALE];
- memcpy(locale, def->locale, def->locale_len);
- locale[def->locale_len] = '\0';
- UErrorCode status = U_ZERO_ERROR;
- struct UCollator *collator = ucol_open(locale, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- u_errorName(status));
- return -1;
- }
- coll->icu.collator = collator;
-
- if (def->icu.french_collation != COLL_ICU_DEFAULT) {
- enum coll_icu_on_off w = def->icu.french_collation;
- UColAttributeValue v =
- w == COLL_ICU_ON ? UCOL_ON :
- w == COLL_ICU_OFF ? UCOL_OFF :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set french_collation");
- return -1;
- }
- }
- if (def->icu.alternate_handling != COLL_ICU_AH_DEFAULT) {
- enum coll_icu_alternate_handling w = def->icu.alternate_handling;
- UColAttributeValue v =
- w == COLL_ICU_AH_NON_IGNORABLE ? UCOL_NON_IGNORABLE :
- w == COLL_ICU_AH_SHIFTED ? UCOL_SHIFTED :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set alternate_handling");
- return -1;
- }
- }
- if (def->icu.case_first != COLL_ICU_CF_DEFAULT) {
- enum coll_icu_case_first w = def->icu.case_first;
- UColAttributeValue v =
- w == COLL_ICU_CF_OFF ? UCOL_OFF :
- w == COLL_ICU_CF_UPPER_FIRST ? UCOL_UPPER_FIRST :
- w == COLL_ICU_CF_LOWER_FIRST ? UCOL_LOWER_FIRST :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_CASE_FIRST, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set case_first");
- return -1;
- }
- }
- if (def->icu.case_level != COLL_ICU_DEFAULT) {
- enum coll_icu_on_off w = def->icu.case_level;
- UColAttributeValue v =
- w == COLL_ICU_ON ? UCOL_ON :
- w == COLL_ICU_OFF ? UCOL_OFF :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_CASE_LEVEL , v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set case_level");
- return -1;
- }
- }
- if (def->icu.normalization_mode != COLL_ICU_DEFAULT) {
- enum coll_icu_on_off w = def->icu.normalization_mode;
- UColAttributeValue v =
- w == COLL_ICU_ON ? UCOL_ON :
- w == COLL_ICU_OFF ? UCOL_OFF :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set normalization_mode");
- return -1;
- }
- }
- if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) {
- enum coll_icu_strength w = def->icu.strength;
- UColAttributeValue v =
- w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
- w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
- w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
- w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
- w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set strength");
- return -1;
- }
- }
- if (def->icu.numeric_collation != COLL_ICU_DEFAULT) {
- enum coll_icu_on_off w = def->icu.numeric_collation;
- UColAttributeValue v =
- w == COLL_ICU_ON ? UCOL_ON :
- w == COLL_ICU_OFF ? UCOL_OFF :
- UCOL_DEFAULT;
- ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, v, &status);
- if (U_FAILURE(status)) {
- diag_set(ClientError, ER_CANT_CREATE_COLLATION,
- "failed to set numeric_collation");
- return -1;
- }
- }
-
- coll->cmp = coll_icu_cmp;
- coll->hash = coll_icu_hash;
- return 0;
-}
-
-/**
- * Destroy ICU collation.
- */
-static void
-coll_icu_destroy(struct coll *coll)
-{
- if (coll->icu.collator != NULL)
- ucol_close(coll->icu.collator);
-}
-
-/**
- * Create a collation by definition.
- * @param def - collation definition.
- * @return - the collation OR NULL on memory error (diag is set).
- */
-struct coll *
-coll_new(const struct coll_def *def)
-{
- assert(def->type == COLL_TYPE_ICU); /* no more types are implemented yet */
-
- size_t total_len = sizeof(struct coll) + def->name_len + 1;
- struct coll *coll = (struct coll *)calloc(1, total_len);
+ assert(def->base.type == COLL_TYPE_ICU);
+ size_t total_len = sizeof(struct box_coll) + def->name_len + 1;
+ struct box_coll *coll = (struct box_coll *) malloc(total_len);
if (coll == NULL) {
- diag_set(OutOfMemory, total_len, "malloc", "struct coll");
+ diag_set(OutOfMemory, total_len, "malloc", "coll");
+ return NULL;
+ }
+ coll->base = coll_new(&def->base);
+ if (coll->base == NULL) {
+ diag_reset(ClientError, ER_CANT_CREATE_COLLATION);
+ free(coll);
return NULL;
}
-
- coll->refs = 1;
coll->id = def->id;
coll->owner_id = def->owner_id;
- coll->type = def->type;
coll->name_len = def->name_len;
memcpy(coll->name, def->name, def->name_len);
coll->name[coll->name_len] = 0;
-
- if (coll_icu_init_cmp(coll, def) != 0) {
- free(coll);
- return NULL;
- }
-
return coll;
}
void
-coll_unref(struct coll *coll)
+box_coll_delete(struct box_coll *coll)
{
- /* No more types are implemented yet. */
- assert(coll->type == COLL_TYPE_ICU);
- assert(coll->refs > 0);
- if (--coll->refs == 0) {
- coll_icu_destroy(coll);
- free(coll);
- }
+ coll_unref(coll->base);
+ free(coll);
}
diff --git a/src/box/coll.h b/src/box/coll.h
index 248500ab4..dd91f2c4c 100644
--- a/src/box/coll.h
+++ b/src/box/coll.h
@@ -30,8 +30,6 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
-#include "coll_def.h"
#include <stddef.h>
#include <stdint.h>
@@ -39,65 +37,40 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct box_coll_def;
struct coll;
-typedef int (*coll_cmp_f)(const char *s, size_t s_len,
- const char *t, size_t t_len,
- const struct coll *coll);
-
-typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len,
- uint32_t *ph, uint32_t *pcarry,
- struct coll *coll);
-
/**
- * ICU collation specific data.
+ * A box collation. Box collation is not the same as core one. Box
+ * collation has name, owner and identifier, and each user defined
+ * collation has its own box_coll object. Multiple box_coll can
+ * reference the same core collation if their functional parts
+ * match.
*/
-struct UCollator;
-
-struct coll_icu {
- struct UCollator *collator;
-};
-
-/**
- * A collation.
- */
-struct coll {
+struct box_coll {
/** Personal ID */
uint32_t id;
/** Owner ID */
uint32_t owner_id;
- /** Collation type. */
- enum coll_type type;
- /** Type specific data. */
- struct coll_icu icu;
- /** String comparator. */
- coll_cmp_f cmp;
- coll_hash_f hash;
- /** Reference counter. */
- int refs;
+ /** Core collation. */
+ struct coll *base;
/** Collation name. */
size_t name_len;
char name[0];
};
/**
- * Create a collation by definition.
- * @param def - collation definition.
- * @return - the collation OR NULL on memory error (diag is set).
+ * Create a box collation by definition.
+ * @param def Collation definition.
+ * @retval NULL Illegal parameters or memory error.
+ * @retval not NULL Collation.
*/
-struct coll *
-coll_new(const struct coll_def *def);
+struct box_coll *
+box_coll_new(const struct box_coll_def *def);
/** Increment reference counter. */
-static inline void
-coll_ref(struct coll *coll)
-{
- ++coll->refs;
-}
-
-/** Decrement reference counter. Delete when 0. */
void
-coll_unref(struct coll *coll);
+box_coll_delete(struct box_coll *coll);
#if defined(__cplusplus)
} /* extern "C" */
diff --git a/src/box/coll_cache.c b/src/box/coll_cache.c
index b7eb3edb9..6695dad22 100644
--- a/src/box/coll_cache.c
+++ b/src/box/coll_cache.c
@@ -29,20 +29,21 @@
* SUCH DAMAGE.
*/
#include "coll_cache.h"
+#include "coll.h"
#include "diag.h"
#include "assoc.h"
/** mhash table (id -> collation) */
-static struct mh_i32ptr_t *coll_cache_id = NULL;
+static struct mh_i32ptr_t *box_coll_cache_id = NULL;
/** Create global hash tables if necessary. */
int
-coll_cache_init()
+box_coll_cache_init()
{
- coll_cache_id = mh_i32ptr_new();
- if (coll_cache_id == NULL) {
- diag_set(OutOfMemory, sizeof(*coll_cache_id), "malloc",
- "coll_cache_id");
+ box_coll_cache_id = mh_i32ptr_new();
+ if (box_coll_cache_id == NULL) {
+ diag_set(OutOfMemory, sizeof(*box_coll_cache_id), "malloc",
+ "box_coll_cache_id");
return -1;
}
return 0;
@@ -50,9 +51,9 @@ coll_cache_init()
/** Delete global hash tables. */
void
-coll_cache_destroy()
+box_coll_cache_destroy()
{
- mh_i32ptr_delete(coll_cache_id);
+ mh_i32ptr_delete(box_coll_cache_id);
}
/**
@@ -61,14 +62,15 @@ coll_cache_destroy()
* @return - NULL if inserted, replaced collation if replaced.
*/
int
-coll_cache_replace(struct coll *coll, struct coll **replaced)
+box_coll_cache_replace(struct box_coll *coll, struct box_coll **replaced)
{
const struct mh_i32ptr_node_t id_node = {coll->id, coll};
struct mh_i32ptr_node_t repl_id_node = {0, NULL};
struct mh_i32ptr_node_t *prepl_id_node = &repl_id_node;
- if (mh_i32ptr_put(coll_cache_id, &id_node, &prepl_id_node, NULL) ==
- mh_end(coll_cache_id)) {
- diag_set(OutOfMemory, sizeof(id_node), "malloc", "coll_cache_id");
+ if (mh_i32ptr_put(box_coll_cache_id, &id_node, &prepl_id_node, NULL) ==
+ mh_end(box_coll_cache_id)) {
+ diag_set(OutOfMemory, sizeof(id_node), "malloc",
+ "box_coll_cache_id");
return -1;
}
assert(repl_id_node.val == NULL);
@@ -81,22 +83,22 @@ coll_cache_replace(struct coll *coll, struct coll **replaced)
* @param coll - collation to delete.
*/
void
-coll_cache_delete(const struct coll *coll)
+box_coll_cache_delete(const struct box_coll *coll)
{
- mh_int_t i = mh_i32ptr_find(coll_cache_id, coll->id, NULL);
- if (i == mh_end(coll_cache_id))
+ mh_int_t i = mh_i32ptr_find(box_coll_cache_id, coll->id, NULL);
+ if (i == mh_end(box_coll_cache_id))
return;
- mh_i32ptr_del(coll_cache_id, i, NULL);
+ mh_i32ptr_del(box_coll_cache_id, i, NULL);
}
/**
* Find a collation object by its id.
*/
-struct coll *
-coll_by_id(uint32_t id)
+struct box_coll *
+box_coll_by_id(uint32_t id)
{
- mh_int_t pos = mh_i32ptr_find(coll_cache_id, id, NULL);
- if (pos == mh_end(coll_cache_id))
+ mh_int_t pos = mh_i32ptr_find(box_coll_cache_id, id, NULL);
+ if (pos == mh_end(box_coll_cache_id))
return NULL;
- return mh_i32ptr_node(coll_cache_id, pos)->val;
+ return mh_i32ptr_node(box_coll_cache_id, pos)->val;
}
diff --git a/src/box/coll_cache.h b/src/box/coll_cache.h
index 418de4e35..21bf22701 100644
--- a/src/box/coll_cache.h
+++ b/src/box/coll_cache.h
@@ -30,23 +30,24 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
-#include "coll.h"
+#include <stdint.h>
#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
+struct box_coll;
+
/**
* Create global hash tables.
* @return - 0 on success, -1 on memory error.
*/
int
-coll_cache_init();
+box_coll_cache_init();
/** Delete global hash tables. */
void
-coll_cache_destroy();
+box_coll_cache_destroy();
/**
* Insert or replace a collation into collation cache.
@@ -55,20 +56,20 @@ coll_cache_destroy();
* @return - 0 on success, -1 on memory error.
*/
int
-coll_cache_replace(struct coll *coll, struct coll **replaced);
+box_coll_cache_replace(struct box_coll *coll, struct box_coll **replaced);
/**
* Delete a collation from collation cache.
* @param coll - collation to delete.
*/
void
-coll_cache_delete(const struct coll *coll);
+box_coll_cache_delete(const struct box_coll *coll);
/**
* Find a collation object by its id.
*/
-struct coll *
-coll_by_id(uint32_t id);
+struct box_coll *
+box_coll_by_id(uint32_t id);
#if defined(__cplusplus)
} /* extern "C" */
diff --git a/src/box/coll_def.c b/src/box/coll_def.c
index f849845b3..fa003bc63 100644
--- a/src/box/coll_def.c
+++ b/src/box/coll_def.c
@@ -31,38 +31,6 @@
#include "coll_def.h"
-const char *coll_type_strs[] = {
- "ICU"
-};
-
-const char *coll_icu_on_off_strs[] = {
- "DEFAULT",
- "ON",
- "OFF"
-};
-
-const char *coll_icu_alternate_handling_strs[] = {
- "DEFAULT",
- "NON_IGNORABLE",
- "SHIFTED"
-};
-
-const char *coll_icu_case_first_strs[] = {
- "DEFAULT",
- "OFF",
- "UPPER_FIRST",
- "LOWER_FIRST"
-};
-
-const char *coll_icu_strength_strs[] = {
- "DEFAULT",
- "PRIMARY",
- "SECONDARY",
- "TERTIARY",
- "QUATERNARY",
- "IDENTICAL"
-};
-
static int64_t
icu_on_off_from_str(const char *str, uint32_t len)
{
diff --git a/src/box/coll_def.h b/src/box/coll_def.h
index 7a1027a1e..4d475fab5 100644
--- a/src/box/coll_def.h
+++ b/src/box/coll_def.h
@@ -33,86 +33,15 @@
#include <stddef.h>
#include <stdint.h>
+#include <coll_def.h>
#include "opt_def.h"
#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
-/**
- * The supported collation types
- */
-enum coll_type {
- COLL_TYPE_ICU = 0,
- coll_type_MAX,
-};
-
-extern const char *coll_type_strs[];
-
-/*
- * ICU collation options. See
- * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
- */
-
-/** Settings for simple ICU on/off options */
-enum coll_icu_on_off {
- COLL_ICU_DEFAULT = 0,
- COLL_ICU_ON,
- COLL_ICU_OFF,
- coll_icu_on_off_MAX
-};
-
-extern const char *coll_icu_on_off_strs[];
-
-/** Alternate handling ICU settings */
-enum coll_icu_alternate_handling {
- COLL_ICU_AH_DEFAULT = 0,
- COLL_ICU_AH_NON_IGNORABLE,
- COLL_ICU_AH_SHIFTED,
- coll_icu_alternate_handling_MAX
-};
-
-extern const char *coll_icu_alternate_handling_strs[];
-
-/** Case first ICU settings */
-enum coll_icu_case_first {
- COLL_ICU_CF_DEFAULT = 0,
- COLL_ICU_CF_OFF,
- COLL_ICU_CF_UPPER_FIRST,
- COLL_ICU_CF_LOWER_FIRST,
- coll_icu_case_first_MAX
-};
-
-extern const char *coll_icu_case_first_strs[];
-
-/** Strength ICU settings */
-enum coll_icu_strength {
- COLL_ICU_STRENGTH_DEFAULT = 0,
- COLL_ICU_STRENGTH_PRIMARY,
- COLL_ICU_STRENGTH_SECONDARY,
- COLL_ICU_STRENGTH_TERTIARY,
- COLL_ICU_STRENGTH_QUATERNARY,
- COLL_ICU_STRENGTH_IDENTICAL,
- coll_icu_strength_MAX
-};
-
-extern const char *coll_icu_strength_strs[];
-
-/** Collection of ICU settings */
-struct coll_icu_def {
- enum coll_icu_on_off french_collation;
- enum coll_icu_alternate_handling alternate_handling;
- enum coll_icu_case_first case_first;
- enum coll_icu_on_off case_level;
- enum coll_icu_on_off normalization_mode;
- enum coll_icu_strength strength;
- enum coll_icu_on_off numeric_collation;
-};
-
-/**
- * Definition of a collation.
- */
-struct coll_def {
+/** Box collation definition. */
+struct box_coll_def {
/** Perconal ID */
uint32_t id;
/** Owner ID */
@@ -120,13 +49,8 @@ struct coll_def {
/** Collation name. */
size_t name_len;
const char *name;
- /** Locale. */
- size_t locale_len;
- const char *locale;
- /** Collation type. */
- enum coll_type type;
- /** Type specific options. */
- struct coll_icu_def icu;
+ /** Core collation definition. */
+ struct coll_def base;
};
extern const struct opt_def coll_icu_opts_reg[];
diff --git a/src/box/key_def.cc b/src/box/key_def.cc
index 45997ae83..8f08cfd22 100644
--- a/src/box/key_def.cc
+++ b/src/box/key_def.cc
@@ -156,16 +156,18 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count)
struct key_part_def *part = &parts[i];
struct coll *coll = NULL;
if (part->coll_id != COLL_NONE) {
- coll = coll_by_id(part->coll_id);
- if (coll == NULL) {
+ struct box_coll *box_coll =
+ box_coll_by_id(part->coll_id);
+ if (box_coll == NULL) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
i + 1, "collation was not found by ID");
key_def_delete(def);
return NULL;
}
+ coll = box_coll->base;
}
key_def_set_part(def, i, part->fieldno, part->type,
- part->is_nullable, coll);
+ part->is_nullable, coll, part->coll_id);
}
return def;
}
@@ -179,8 +181,7 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
part_def->fieldno = part->fieldno;
part_def->type = part->type;
part_def->is_nullable = part->is_nullable;
- part_def->coll_id = (part->coll != NULL ?
- part->coll->id : COLL_NONE);
+ part_def->coll_id = part->coll_id;
}
}
@@ -194,7 +195,8 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
for (uint32_t item = 0; item < part_count; ++item) {
key_def_set_part(key_def, item, fields[item],
(enum field_type)types[item],
- key_part_def_default.is_nullable, NULL);
+ key_part_def_default.is_nullable, NULL,
+ COLL_NONE);
}
return key_def;
}
@@ -246,7 +248,8 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
- enum field_type type, bool is_nullable, struct coll *coll)
+ enum field_type type, bool is_nullable, struct coll *coll,
+ uint32_t coll_id)
{
assert(part_no < def->part_count);
assert(type < field_type_MAX);
@@ -255,6 +258,7 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
def->parts[part_no].fieldno = fieldno;
def->parts[part_no].type = type;
def->parts[part_no].coll = coll;
+ def->parts[part_no].coll_id = coll_id;
column_mask_set_fieldno(&def->column_mask, fieldno);
/**
* When all parts are set, initialize the tuple
@@ -554,7 +558,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
end = part + first->part_count;
for (; part != end; part++) {
key_def_set_part(new_def, pos++, part->fieldno, part->type,
- part->is_nullable, part->coll);
+ part->is_nullable, part->coll, part->coll_id);
}
/* Set-append second key def's part to the new key def. */
@@ -564,7 +568,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
if (key_def_find(first, part->fieldno))
continue;
key_def_set_part(new_def, pos++, part->fieldno, part->type,
- part->is_nullable, part->coll);
+ part->is_nullable, part->coll, part->coll_id);
}
return new_def;
}
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 12016a51a..0e9b5f5f3 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -68,6 +68,8 @@ struct key_part {
uint32_t fieldno;
/** Type of the tuple field */
enum field_type type;
+ /** Collation ID for string comparison. */
+ uint32_t coll_id;
/** Collation definition for string comparison */
struct coll *coll;
/** True if a part can store NULLs. */
@@ -249,7 +251,8 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
*/
void
key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
- enum field_type type, bool is_nullable, struct coll *coll);
+ enum field_type type, bool is_nullable, struct coll *coll,
+ uint32_t coll_id);
/**
* Update 'has_optional_parts' of @a key_def with correspondence
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 333b6370f..385c2374a 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -46,6 +46,7 @@ extern "C" {
#include "box/txn.h"
#include "box/vclock.h" /* VCLOCK_MAX */
#include "box/sequence.h"
+#include "box/coll_cache.h"
/**
* Trigger function for all spaces
@@ -291,8 +292,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
lua_pushboolean(L, part->is_nullable);
lua_setfield(L, -2, "is_nullable");
- if (part->coll != NULL) {
- lua_pushstring(L, part->coll->name);
+ if (part->coll_id != COLL_NONE) {
+ struct box_coll *coll =
+ box_coll_by_id(part->coll_id);
+ assert(coll != NULL);
+ lua_pushstring(L, coll->name);
lua_setfield(L, -2, "collation");
}
diff --git a/src/box/schema.cc b/src/box/schema.cc
index 1b96f978c..8df4aa73b 100644
--- a/src/box/schema.cc
+++ b/src/box/schema.cc
@@ -281,13 +281,13 @@ schema_init()
auto key_def_guard = make_scoped_guard([&] { key_def_delete(key_def); });
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_STRING, false, NULL);
+ FIELD_TYPE_STRING, false, NULL, COLL_NONE);
sc_space_new(BOX_SCHEMA_ID, "_schema", key_def, &on_replace_schema,
NULL);
/* _space - home for all spaces. */
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
/* _collation - collation description. */
sc_space_new(BOX_COLLATION_ID, "_collation", key_def,
@@ -335,10 +335,10 @@ schema_init()
diag_raise();
/* space no */
key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
/* index no */
key_def_set_part(key_def, 1 /* part no */, 1 /* field no */,
- FIELD_TYPE_UNSIGNED, false, NULL);
+ FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
sc_space_new(BOX_INDEX_ID, "_index", key_def,
&alter_space_on_replace_index, &on_stmt_begin_index);
}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index d4760f3b1..665af2ba9 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -207,7 +207,7 @@ tuple_init(field_name_hash_f hash)
box_tuple_last = NULL;
- if (coll_cache_init() != 0)
+ if (box_coll_cache_init() != 0)
return -1;
return 0;
@@ -260,7 +260,7 @@ tuple_free(void)
tuple_format_free();
- coll_cache_destroy();
+ box_coll_cache_destroy();
}
box_tuple_format_t *
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index cfee00496..c82995d1a 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -32,7 +32,7 @@
#include "tuple.h"
#include "trivia/util.h" /* NOINLINE */
#include <math.h>
-#include "coll_def.h"
+#include <coll.h>
/* {{{ tuple_compare */
@@ -295,8 +295,7 @@ mp_compare_str(const char *field_a, const char *field_b)
}
static inline int
-mp_compare_str_coll(const char *field_a, const char *field_b,
- struct coll *coll)
+mp_compare_str_coll(const char *field_a, const char *field_b, struct coll *coll)
{
uint32_t size_a = mp_decode_strl(&field_a);
uint32_t size_b = mp_decode_strl(&field_b);
diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
index 0fa8ea561..a2a237b4a 100644
--- a/src/box/tuple_hash.cc
+++ b/src/box/tuple_hash.cc
@@ -28,11 +28,9 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
#include "tuple_hash.h"
-
+#include <coll.h>
#include "third_party/PMurHash.h"
-#include "coll.h"
/* Tuple and key hasher */
namespace {
diff --git a/src/coll.c b/src/coll.c
new file mode 100644
index 000000000..eacb643f2
--- /dev/null
+++ b/src/coll.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "coll.h"
+#include "third_party/PMurHash.h"
+#include "diag.h"
+#include <unicode/ucol.h>
+#include <trivia/config.h>
+
+enum {
+ MAX_HASH_BUFFER = 1024,
+ MAX_LOCALE = 1024,
+};
+
+/** Compare two string using ICU collation. */
+static int
+coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
+ const struct coll *coll)
+{
+ assert(coll->icu.collator != NULL);
+
+ UErrorCode status = U_ZERO_ERROR;
+
+#ifdef HAVE_ICU_STRCOLLUTF8
+ UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
+ s, slen, t, tlen, &status);
+#else
+ UCharIterator s_iter, t_iter;
+ uiter_setUTF8(&s_iter, s, slen);
+ uiter_setUTF8(&t_iter, t, tlen);
+ UCollationResult result = ucol_strcollIter(coll->icu.collator,
+ &s_iter, &t_iter, &status);
+#endif
+ assert(!U_FAILURE(status));
+ return (int)result;
+}
+
+/** Get a hash of a string using ICU collation. */
+static uint32_t
+coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
+ struct coll *coll)
+{
+ uint32_t total_size = 0;
+ UCharIterator itr;
+ uiter_setUTF8(&itr, s, s_len);
+ uint8_t buf[MAX_HASH_BUFFER];
+ uint32_t state[2] = {0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t got;
+ do {
+ got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf,
+ MAX_HASH_BUFFER, &status);
+ PMurHash32_Process(ph, pcarry, buf, got);
+ total_size += got;
+ } while (got == MAX_HASH_BUFFER);
+ return total_size;
+}
+
+/**
+ * Set up ICU collator and init cmp and hash members of collation.
+ * @param coll Collation to set up.
+ * @param def Collation definition.
+ * @retval 0 Success.
+ * @retval -1 Illegal parameters or memory error.
+ */
+static int
+coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
+{
+ if (def->locale_len >= MAX_LOCALE) {
+ diag_set(IllegalParams, "too long locale");
+ return -1;
+ }
+ char locale[MAX_LOCALE];
+ memcpy(locale, def->locale, def->locale_len);
+ locale[def->locale_len] = '\0';
+ UErrorCode status = U_ZERO_ERROR;
+ struct UCollator *collator = ucol_open(locale, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, u_errorName(status));
+ return -1;
+ }
+ coll->icu.collator = collator;
+
+ if (def->icu.french_collation != COLL_ICU_DEFAULT) {
+ enum coll_icu_on_off w = def->icu.french_collation;
+ UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+ w == COLL_ICU_OFF ? UCOL_OFF :
+ UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, v, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "french_collation: %s", u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.alternate_handling != COLL_ICU_AH_DEFAULT) {
+ enum coll_icu_alternate_handling w =
+ def->icu.alternate_handling;
+ UColAttributeValue v =
+ w == COLL_ICU_AH_NON_IGNORABLE ? UCOL_NON_IGNORABLE :
+ w == COLL_ICU_AH_SHIFTED ? UCOL_SHIFTED : UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, v,
+ &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "alternate_handling: %s",
+ u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.case_first != COLL_ICU_CF_DEFAULT) {
+ enum coll_icu_case_first w = def->icu.case_first;
+ UColAttributeValue v = w == COLL_ICU_CF_OFF ? UCOL_OFF :
+ w == COLL_ICU_CF_UPPER_FIRST ? UCOL_UPPER_FIRST :
+ w == COLL_ICU_CF_LOWER_FIRST ? UCOL_LOWER_FIRST :
+ UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_CASE_FIRST, v, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "case_first: %s", u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.case_level != COLL_ICU_DEFAULT) {
+ enum coll_icu_on_off w = def->icu.case_level;
+ UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+ w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_CASE_LEVEL , v, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "case_level: %s", u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.normalization_mode != COLL_ICU_DEFAULT) {
+ enum coll_icu_on_off w = def->icu.normalization_mode;
+ UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+ w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, v,
+ &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "normalization_mode: %s",
+ u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) {
+ enum coll_icu_strength w = def->icu.strength;
+ UColAttributeValue v =
+ w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
+ w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
+ w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
+ w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
+ w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
+ UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "strength: %s", u_errorName(status)));
+ return -1;
+ }
+ }
+ if (def->icu.numeric_collation != COLL_ICU_DEFAULT) {
+ enum coll_icu_on_off w = def->icu.numeric_collation;
+ UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+ w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+ ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, v, &status);
+ if (U_FAILURE(status)) {
+ diag_set(IllegalParams, tt_sprintf("failed to set "\
+ "numeric_collation: %s", u_errorName(status)));
+ return -1;
+ }
+ }
+ coll->cmp = coll_icu_cmp;
+ coll->hash = coll_icu_hash;
+ return 0;
+}
+
+struct coll *
+coll_new(const struct coll_def *def)
+{
+ assert(def->type == COLL_TYPE_ICU);
+ struct coll *coll = (struct coll *) malloc(sizeof(*coll));
+ if (coll == NULL) {
+ diag_set(OutOfMemory, sizeof(*coll), "malloc", "coll");
+ return NULL;
+ }
+ coll->refs = 1;
+ coll->type = def->type;
+ if (coll_icu_init_cmp(coll, def) != 0) {
+ free(coll);
+ return NULL;
+ }
+ return coll;
+}
+
+void
+coll_unref(struct coll *coll)
+{
+ assert(coll->refs > 0);
+ if (--coll->refs == 0) {
+ ucol_close(coll->icu.collator);
+ free(coll);
+ }
+}
diff --git a/src/coll.h b/src/coll.h
new file mode 100644
index 000000000..8798d9491
--- /dev/null
+++ b/src/coll.h
@@ -0,0 +1,98 @@
+#ifndef TARANTOOL_COLL_H_INCLUDED
+#define TARANTOOL_COLL_H_INCLUDED
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "coll_def.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+struct coll;
+
+typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
+ size_t t_len, const struct coll *coll);
+
+typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
+ uint32_t *pcarry, struct coll *coll);
+
+/** ICU collation specific data. */
+struct UCollator;
+
+struct coll_icu {
+ struct UCollator *collator;
+};
+
+/**
+ * A core collation. It has no any unique features like name, id
+ * or owner. Only functional part - comparator, locale, ICU
+ * settings.
+ */
+struct coll {
+ /** Collation type. */
+ enum coll_type type;
+ /** Type specific data. */
+ struct coll_icu icu;
+ /** String comparator. */
+ coll_cmp_f cmp;
+ coll_hash_f hash;
+ /** Reference counter. */
+ int refs;
+};
+
+/**
+ * Create a core collation by definition.
+ * @param def Core collation definition.
+ * @retval NULL Illegal parameters or memory error.
+ * @retval not NULL Collation.
+ */
+struct coll *
+coll_new(const struct coll_def *def);
+
+/** Increment reference counter. */
+static inline void
+coll_ref(struct coll *coll)
+{
+ ++coll->refs;
+}
+
+/** Decrement reference counter. Delete when 0. */
+void
+coll_unref(struct coll *coll);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_COLL_H_INCLUDED */
diff --git a/src/coll_def.c b/src/coll_def.c
new file mode 100644
index 000000000..df58caca8
--- /dev/null
+++ b/src/coll_def.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "coll_def.h"
+
+const char *coll_type_strs[] = {
+ "ICU"
+};
+
+const char *coll_icu_on_off_strs[] = {
+ "DEFAULT",
+ "ON",
+ "OFF"
+};
+
+const char *coll_icu_alternate_handling_strs[] = {
+ "DEFAULT",
+ "NON_IGNORABLE",
+ "SHIFTED"
+};
+
+const char *coll_icu_case_first_strs[] = {
+ "DEFAULT",
+ "OFF",
+ "UPPER_FIRST",
+ "LOWER_FIRST"
+};
+
+const char *coll_icu_strength_strs[] = {
+ "DEFAULT",
+ "PRIMARY",
+ "SECONDARY",
+ "TERTIARY",
+ "QUATERNARY",
+ "IDENTICAL"
+};
diff --git a/src/coll_def.h b/src/coll_def.h
new file mode 100644
index 000000000..c8921b41a
--- /dev/null
+++ b/src/coll_def.h
@@ -0,0 +1,115 @@
+#ifndef TARANTOOL_COLL_DEF_H_INCLUDED
+#define TARANTOOL_COLL_DEF_H_INCLUDED
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <stddef.h>
+#include <stdint.h>
+
+/** The supported collation types */
+enum coll_type {
+ COLL_TYPE_ICU = 0,
+ coll_type_MAX,
+};
+
+extern const char *coll_type_strs[];
+
+/*
+ * ICU collation options. See
+ * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
+ */
+
+/** Settings for simple ICU on/off options */
+enum coll_icu_on_off {
+ COLL_ICU_DEFAULT = 0,
+ COLL_ICU_ON,
+ COLL_ICU_OFF,
+ coll_icu_on_off_MAX
+};
+
+extern const char *coll_icu_on_off_strs[];
+
+/** Alternate handling ICU settings */
+enum coll_icu_alternate_handling {
+ COLL_ICU_AH_DEFAULT = 0,
+ COLL_ICU_AH_NON_IGNORABLE,
+ COLL_ICU_AH_SHIFTED,
+ coll_icu_alternate_handling_MAX
+};
+
+extern const char *coll_icu_alternate_handling_strs[];
+
+/** Case first ICU settings */
+enum coll_icu_case_first {
+ COLL_ICU_CF_DEFAULT = 0,
+ COLL_ICU_CF_OFF,
+ COLL_ICU_CF_UPPER_FIRST,
+ COLL_ICU_CF_LOWER_FIRST,
+ coll_icu_case_first_MAX
+};
+
+extern const char *coll_icu_case_first_strs[];
+
+/** Strength ICU settings */
+enum coll_icu_strength {
+ COLL_ICU_STRENGTH_DEFAULT = 0,
+ COLL_ICU_STRENGTH_PRIMARY,
+ COLL_ICU_STRENGTH_SECONDARY,
+ COLL_ICU_STRENGTH_TERTIARY,
+ COLL_ICU_STRENGTH_QUATERNARY,
+ COLL_ICU_STRENGTH_IDENTICAL,
+ coll_icu_strength_MAX
+};
+
+extern const char *coll_icu_strength_strs[];
+
+/** Collection of ICU settings */
+struct coll_icu_def {
+ enum coll_icu_on_off french_collation;
+ enum coll_icu_alternate_handling alternate_handling;
+ enum coll_icu_case_first case_first;
+ enum coll_icu_on_off case_level;
+ enum coll_icu_on_off normalization_mode;
+ enum coll_icu_strength strength;
+ enum coll_icu_on_off numeric_collation;
+};
+
+/** Core collation definition. */
+struct coll_def {
+ /** Locale. */
+ size_t locale_len;
+ const char *locale;
+ /** Collation type. */
+ enum coll_type type;
+ /** Type specific options. */
+ struct coll_icu_def icu;
+};
+
+#endif /* TARANTOOL_COLL_DEF_H_INCLUDED */
diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
index d77959606..17f26ea07 100644
--- a/test/unit/coll.cpp
+++ b/test/unit/coll.cpp
@@ -1,9 +1,9 @@
-#include "box/coll.h"
#include <iostream>
#include <vector>
#include <algorithm>
#include <string.h>
-#include <box/coll_def.h>
+#include <coll_def.h>
+#include <coll.h>
#include <assert.h>
#include <msgpuck.h>
#include <diag.h>
@@ -51,8 +51,6 @@ manual_test()
def.locale = "ru_RU";
def.locale_len = strlen(def.locale);
def.type = COLL_TYPE_ICU;
- def.name = "test";
- def.name_len = strlen(def.name);
struct coll *coll;
cout << " -- default ru_RU -- " << endl;
@@ -136,8 +134,6 @@ hash_test()
def.locale = "ru_RU";
def.locale_len = strlen(def.locale);
def.type = COLL_TYPE_ICU;
- def.name = "test";
- def.name_len = strlen(def.name);
struct coll *coll;
/* Case sensitive */
--
2.15.1 (Apple Git-101)
More information about the Tarantool-patches
mailing list