Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: kostja@tarantool.org
Subject: [tarantool-patches] [PATCH v3 2/4] collation: split collation into core and box objects
Date: Tue, 15 May 2018 22:54:06 +0300	[thread overview]
Message-ID: <b008cb67706a1f246b9058fa1de3fb90bc04603c.1526414017.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1526414017.git.v.shpilevoy@tarantool.org>
In-Reply-To: <cover.1526414017.git.v.shpilevoy@tarantool.org>

In the issue #3290 the important problem appeared - Tarantool can
not create completely internal collations with no ID, name,
owner. Just for internal usage.

Original struct coll can not be used for this since
* it has fields that are not needed in internals;
* collation name is public thing, and the collation cache uses
  it, so it would be necessary to forbid to a user usage of some
  system names;
* when multiple collations has the same comparator and only their
  names/owners/IDs are different, the separate UCollator objects
  are created, but it would be good to be able to reference a
  single one.

This patch renames coll to box_coll, coll_def to box_call_def and
introduces coll - pure collation object with no any user defined
things.

Needed for #3290.
---
 src/CMakeLists.txt       |   2 +
 src/box/alter.cc         |  72 +++++++-------
 src/box/coll.c           | 247 ++++-------------------------------------------
 src/box/coll.h           |  59 +++--------
 src/box/coll_cache.c     |  44 +++++----
 src/box/coll_cache.h     |  17 ++--
 src/box/coll_def.c       |  32 ------
 src/box/coll_def.h       |  86 +----------------
 src/box/key_def.cc       |  22 +++--
 src/box/key_def.h        |   5 +-
 src/box/lua/space.cc     |   8 +-
 src/box/schema.cc        |   8 +-
 src/box/tuple.c          |   4 +-
 src/box/tuple_compare.cc |   5 +-
 src/box/tuple_hash.cc    |   4 +-
 src/coll.c               | 234 ++++++++++++++++++++++++++++++++++++++++++++
 src/coll.h               |  98 +++++++++++++++++++
 src/coll_def.c           |  63 ++++++++++++
 src/coll_def.h           | 115 ++++++++++++++++++++++
 test/unit/coll.cpp       |   8 +-
 20 files changed, 653 insertions(+), 480 deletions(-)
 create mode 100644 src/coll.c
 create mode 100644 src/coll.h
 create mode 100644 src/coll_def.c
 create mode 100644 src/coll_def.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8ab09e968..5bf17614b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -94,6 +94,8 @@ set (core_sources
      random.c
      trigger.cc
      http_parser.c
+     coll.c
+     coll_def.c
  )
 
 if (TARGET_OS_NETBSD)
diff --git a/src/box/alter.cc b/src/box/alter.cc
index 8766c8171..d72b9a3bb 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -35,6 +35,7 @@
 #include "index.h"
 #include "func.h"
 #include "coll_cache.h"
+#include "coll_def.h"
 #include "txn.h"
 #include "tuple.h"
 #include "fiber.h" /* for gc_pool */
@@ -2286,7 +2287,7 @@ on_replace_dd_func(struct trigger * /* trigger */, void *event)
 
 /** Create a collation definition from tuple. */
 void
-coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
+box_coll_def_new_from_tuple(const struct tuple *tuple, struct box_coll_def *def)
 {
 	memset(def, 0, sizeof(*def));
 	uint32_t name_len, locale_len, type_len;
@@ -2294,15 +2295,16 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
 	def->name = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_NAME, &name_len);
 	def->name_len = name_len;
 	def->owner_id = tuple_field_u32_xc(tuple, BOX_COLLATION_FIELD_UID);
+	struct coll_def *base = &def->base;
 	const char *type = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_TYPE,
 					      &type_len);
-	def->type = STRN2ENUM(coll_type, type, type_len);
-	if (def->type == coll_type_MAX)
+	base->type = STRN2ENUM(coll_type, type, type_len);
+	if (base->type == coll_type_MAX)
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "unknown collation type");
-	def->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
-					 &locale_len);
-	def->locale_len = locale_len;
+	base->locale = tuple_field_str_xc(tuple, BOX_COLLATION_FIELD_LOCALE,
+					  &locale_len);
+	base->locale_len = locale_len;
 	const char *options =
 		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
 					 MP_MAP);
@@ -2315,53 +2317,53 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
 			  "collation locale is too long");
 	/* Locale is an optional argument and can be NULL. */
 	if (locale_len > 0)
-		identifier_check_xc(def->locale, locale_len);
+		identifier_check_xc(base->locale, locale_len);
 	identifier_check_xc(def->name, name_len);
 
-	assert(def->type == COLL_TYPE_ICU); /* no more defined now */
-	if (opts_decode(&def->icu, coll_icu_opts_reg, &options,
+	assert(base->type == COLL_TYPE_ICU);
+	if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
 			ER_WRONG_COLLATION_OPTIONS,
 			BOX_COLLATION_FIELD_OPTIONS, NULL) != 0)
 		diag_raise();
 
-	if (def->icu.french_collation == coll_icu_on_off_MAX) {
+	if (base->icu.french_collation == coll_icu_on_off_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong french_collation option setting, "
 				  "expected ON | OFF");
 	}
 
-	if (def->icu.alternate_handling == coll_icu_alternate_handling_MAX) {
+	if (base->icu.alternate_handling == coll_icu_alternate_handling_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong alternate_handling option setting, "
 				  "expected NON_IGNORABLE | SHIFTED");
 	}
 
-	if (def->icu.case_first == coll_icu_case_first_MAX) {
+	if (base->icu.case_first == coll_icu_case_first_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong case_first option setting, "
 				  "expected OFF | UPPER_FIRST | LOWER_FIRST");
 	}
 
-	if (def->icu.case_level == coll_icu_on_off_MAX) {
+	if (base->icu.case_level == coll_icu_on_off_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong case_level option setting, "
 				  "expected ON | OFF");
 	}
 
-	if (def->icu.normalization_mode == coll_icu_on_off_MAX) {
+	if (base->icu.normalization_mode == coll_icu_on_off_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong normalization_mode option setting, "
 				  "expected ON | OFF");
 	}
 
-	if (def->icu.strength == coll_icu_strength_MAX) {
+	if (base->icu.strength == coll_icu_strength_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong strength option setting, "
 				  "expected PRIMARY | SECONDARY | "
 				  "TERTIARY | QUATERNARY | IDENTICAL");
 	}
 
-	if (def->icu.numeric_collation == coll_icu_on_off_MAX) {
+	if (base->icu.numeric_collation == coll_icu_on_off_MAX) {
 		tnt_raise(ClientError, ER_CANT_CREATE_COLLATION,
 			  "ICU wrong numeric_collation option setting, "
 				  "expected ON | OFF");
@@ -2373,16 +2375,16 @@ coll_def_new_from_tuple(const struct tuple *tuple, struct coll_def *def)
  * A change is only INSERT or DELETE, UPDATE is not supported.
  */
 static void
-coll_cache_rollback(struct trigger *trigger, void *event)
+box_coll_cache_rollback(struct trigger *trigger, void *event)
 {
-	struct coll *coll = (struct coll *) trigger->data;
+	struct box_coll *coll = (struct box_coll *) trigger->data;
 	struct txn_stmt *stmt = txn_last_stmt((struct txn*) event);
 
 	if (stmt->new_tuple == NULL) {
 		/*  Rollback DELETE: put the collation back. */
 		assert(stmt->old_tuple != NULL);
-		struct coll *replaced;
-		if (coll_cache_replace(coll, &replaced) != 0) {
+		struct box_coll *replaced;
+		if (box_coll_cache_replace(coll, &replaced) != 0) {
 			panic("Out of memory on insertion into collation "\
 			      "cache");
 		}
@@ -2390,19 +2392,19 @@ coll_cache_rollback(struct trigger *trigger, void *event)
 	} else {
 		/* INSERT: remove and free the new collation */
 		assert(stmt->old_tuple == NULL);
-		coll_cache_delete(coll);
-		coll_unref(coll);
+		box_coll_cache_delete(coll);
+		box_coll_delete(coll);
 	}
 }
 
 
 /** Dereference a deleted collation on commit. */
 static void
-coll_cache_commit(struct trigger *trigger, void *event)
+box_coll_cache_commit(struct trigger *trigger, void *event)
 {
 	(void) event;
-	struct coll *coll = (struct coll *) trigger->data;
-	coll_unref(coll);
+	struct box_coll *coll = (struct box_coll *) trigger->data;
+	box_coll_delete(coll);
 }
 
 /**
@@ -2418,15 +2420,15 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event)
 	struct tuple *new_tuple = stmt->new_tuple;
 	txn_check_singlestatement_xc(txn, "Space _collation");
 	struct trigger *on_rollback =
-		txn_alter_trigger_new(coll_cache_rollback, NULL);
+		txn_alter_trigger_new(box_coll_cache_rollback, NULL);
 	struct trigger *on_commit =
-		txn_alter_trigger_new(coll_cache_commit, NULL);
+		txn_alter_trigger_new(box_coll_cache_commit, NULL);
 	if (new_tuple == NULL && old_tuple != NULL) {
 		/* DELETE */
 		/* TODO: Check that no index uses the collation */
 		int32_t old_id = tuple_field_u32_xc(old_tuple,
 						    BOX_COLLATION_FIELD_ID);
-		struct coll *old_coll = coll_by_id(old_id);
+		struct box_coll *old_coll = box_coll_by_id(old_id);
 		assert(old_coll != NULL);
 		access_check_ddl(old_coll->name, old_coll->owner_id,
 				 SC_COLLATION, PRIV_D, false);
@@ -2435,23 +2437,23 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event)
 		 * deletion from the cache to make trigger logic
 		 * simple..
 		 */
-		coll_cache_delete(old_coll);
+		box_coll_cache_delete(old_coll);
 		on_rollback->data = old_coll;
 		on_commit->data = old_coll;
 		txn_on_rollback(txn, on_rollback);
 		txn_on_commit(txn, on_commit);
 	} else if (new_tuple != NULL && old_tuple == NULL) {
 		/* INSERT */
-		struct coll_def new_def;
-		coll_def_new_from_tuple(new_tuple, &new_def);
+		struct box_coll_def new_def;
+		box_coll_def_new_from_tuple(new_tuple, &new_def);
 		access_check_ddl(new_def.name, new_def.owner_id, SC_COLLATION,
 				 PRIV_C, false);
-		struct coll *new_coll = coll_new(&new_def);
+		struct box_coll *new_coll = box_coll_new(&new_def);
 		if (new_coll == NULL)
 			diag_raise();
-		struct coll *replaced;
-		if (coll_cache_replace(new_coll, &replaced) != 0) {
-			coll_unref(new_coll);
+		struct box_coll *replaced;
+		if (box_coll_cache_replace(new_coll, &replaced) != 0) {
+			box_coll_delete(new_coll);
 			diag_raise();
 		}
 		assert(replaced == NULL);
diff --git a/src/box/coll.c b/src/box/coll.c
index 436d8d127..3bf3aff3c 100644
--- a/src/box/coll.c
+++ b/src/box/coll.c
@@ -28,252 +28,39 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
 #include "coll.h"
-#include "third_party/PMurHash.h"
+#include <coll.h>
+#include "coll_def.h"
 #include "error.h"
 #include "diag.h"
-#include <unicode/ucol.h>
-#include <trivia/config.h>
-
-enum {
-	MAX_HASH_BUFFER = 1024,
-	MAX_LOCALE = 1024,
-};
-
-/**
- * Compare two string using ICU collation.
- */
-static int
-coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
-	     const struct coll *coll)
-{
-	assert(coll->icu.collator != NULL);
-
-	UErrorCode status = U_ZERO_ERROR;
-
-#ifdef HAVE_ICU_STRCOLLUTF8
-	UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
-						   s, slen, t, tlen, &status);
-#else
-	UCharIterator s_iter, t_iter;
-	uiter_setUTF8(&s_iter, s, slen);
-	uiter_setUTF8(&t_iter, t, tlen);
-	UCollationResult result = ucol_strcollIter(coll->icu.collator,
-						   &s_iter, &t_iter, &status);
-#endif
-	assert(!U_FAILURE(status));
-	return (int)result;
-}
-
-/**
- * Get a hash of a string using ICU collation.
- */
-static uint32_t
-coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
-	      struct coll *coll)
-{
-	uint32_t total_size = 0;
-	UCharIterator itr;
-	uiter_setUTF8(&itr, s, s_len);
-	uint8_t buf[MAX_HASH_BUFFER];
-	uint32_t state[2] = {0, 0};
-	UErrorCode status = U_ZERO_ERROR;
-	while (true) {
-		int32_t got = ucol_nextSortKeyPart(coll->icu.collator,
-						   &itr, state, buf,
-						   MAX_HASH_BUFFER, &status);
-		PMurHash32_Process(ph, pcarry, buf, got);
-		total_size += got;
-		if (got < MAX_HASH_BUFFER)
-			break;
-	}
-	return total_size;
-}
 
-/**
- * Set up ICU collator and init cmp and hash members of collation.
- * @param coll - collation to set up.
- * @param def - collation definition.
- * @return 0 on success, -1 on error.
- */
-static int
-coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
+struct box_coll *
+box_coll_new(const struct box_coll_def *def)
 {
-	if (coll->icu.collator != NULL) {
-		ucol_close(coll->icu.collator);
-		coll->icu.collator = NULL;
-	}
-
-	if (def->locale_len >= MAX_LOCALE) {
-		diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-			 "too long locale");
-		return -1;
-	}
-	char locale[MAX_LOCALE];
-	memcpy(locale, def->locale, def->locale_len);
-	locale[def->locale_len] = '\0';
-	UErrorCode status = U_ZERO_ERROR;
-	struct UCollator *collator = ucol_open(locale, &status);
-	if (U_FAILURE(status)) {
-		diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-			 u_errorName(status));
-		return -1;
-	}
-	coll->icu.collator = collator;
-
-	if (def->icu.french_collation != COLL_ICU_DEFAULT) {
-		enum coll_icu_on_off w = def->icu.french_collation;
-		UColAttributeValue v =
-			w == COLL_ICU_ON ? UCOL_ON :
-			w == COLL_ICU_OFF ? UCOL_OFF :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set french_collation");
-			return -1;
-		}
-	}
-	if (def->icu.alternate_handling != COLL_ICU_AH_DEFAULT) {
-		enum coll_icu_alternate_handling w = def->icu.alternate_handling;
-		UColAttributeValue v =
-			w == COLL_ICU_AH_NON_IGNORABLE ? UCOL_NON_IGNORABLE :
-			w == COLL_ICU_AH_SHIFTED ? UCOL_SHIFTED :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set alternate_handling");
-			return -1;
-		}
-	}
-	if (def->icu.case_first != COLL_ICU_CF_DEFAULT) {
-		enum coll_icu_case_first w = def->icu.case_first;
-		UColAttributeValue v =
-			w == COLL_ICU_CF_OFF ? UCOL_OFF :
-			w == COLL_ICU_CF_UPPER_FIRST ? UCOL_UPPER_FIRST :
-			w == COLL_ICU_CF_LOWER_FIRST ? UCOL_LOWER_FIRST :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_CASE_FIRST, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set case_first");
-			return -1;
-		}
-	}
-	if (def->icu.case_level != COLL_ICU_DEFAULT) {
-		enum coll_icu_on_off w = def->icu.case_level;
-		UColAttributeValue v =
-			w == COLL_ICU_ON ? UCOL_ON :
-			w == COLL_ICU_OFF ? UCOL_OFF :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_CASE_LEVEL , v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set case_level");
-			return -1;
-		}
-	}
-	if (def->icu.normalization_mode != COLL_ICU_DEFAULT) {
-		enum coll_icu_on_off w = def->icu.normalization_mode;
-		UColAttributeValue v =
-			w == COLL_ICU_ON ? UCOL_ON :
-			w == COLL_ICU_OFF ? UCOL_OFF :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set normalization_mode");
-			return -1;
-		}
-	}
-	if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) {
-		enum coll_icu_strength w = def->icu.strength;
-		UColAttributeValue v =
-			w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
-			w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
-			w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
-			w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
-			w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set strength");
-			return -1;
-		}
-	}
-	if (def->icu.numeric_collation != COLL_ICU_DEFAULT) {
-		enum coll_icu_on_off w = def->icu.numeric_collation;
-		UColAttributeValue v =
-			w == COLL_ICU_ON ? UCOL_ON :
-			w == COLL_ICU_OFF ? UCOL_OFF :
-			UCOL_DEFAULT;
-		ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, v, &status);
-		if (U_FAILURE(status)) {
-			diag_set(ClientError, ER_CANT_CREATE_COLLATION,
-				 "failed to set numeric_collation");
-			return -1;
-		}
-	}
-
-	coll->cmp = coll_icu_cmp;
-	coll->hash = coll_icu_hash;
-	return 0;
-}
-
-/**
- * Destroy ICU collation.
- */
-static void
-coll_icu_destroy(struct coll *coll)
-{
-	if (coll->icu.collator != NULL)
-		ucol_close(coll->icu.collator);
-}
-
-/**
- * Create a collation by definition.
- * @param def - collation definition.
- * @return - the collation OR NULL on memory error (diag is set).
- */
-struct coll *
-coll_new(const struct coll_def *def)
-{
-	assert(def->type == COLL_TYPE_ICU); /* no more types are implemented yet */
-
-	size_t total_len = sizeof(struct coll) + def->name_len + 1;
-	struct coll *coll = (struct coll *)calloc(1, total_len);
+	assert(def->base.type == COLL_TYPE_ICU);
+	size_t total_len = sizeof(struct box_coll) + def->name_len + 1;
+	struct box_coll *coll = (struct box_coll *) malloc(total_len);
 	if (coll == NULL) {
-		diag_set(OutOfMemory, total_len, "malloc", "struct coll");
+		diag_set(OutOfMemory, total_len, "malloc", "coll");
+		return NULL;
+	}
+	coll->base = coll_new(&def->base);
+	if (coll->base == NULL) {
+		diag_reset(ClientError, ER_CANT_CREATE_COLLATION);
+		free(coll);
 		return NULL;
 	}
-
-	coll->refs = 1;
 	coll->id = def->id;
 	coll->owner_id = def->owner_id;
-	coll->type = def->type;
 	coll->name_len = def->name_len;
 	memcpy(coll->name, def->name, def->name_len);
 	coll->name[coll->name_len] = 0;
-
-	if (coll_icu_init_cmp(coll, def) != 0) {
-		free(coll);
-		return NULL;
-	}
-
 	return coll;
 }
 
 void
-coll_unref(struct coll *coll)
+box_coll_delete(struct box_coll *coll)
 {
-	/* No more types are implemented yet. */
-	assert(coll->type == COLL_TYPE_ICU);
-	assert(coll->refs > 0);
-	if (--coll->refs == 0) {
-		coll_icu_destroy(coll);
-		free(coll);
-	}
+	coll_unref(coll->base);
+	free(coll);
 }
diff --git a/src/box/coll.h b/src/box/coll.h
index 248500ab4..dd91f2c4c 100644
--- a/src/box/coll.h
+++ b/src/box/coll.h
@@ -30,8 +30,6 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
-#include "coll_def.h"
 #include <stddef.h>
 #include <stdint.h>
 
@@ -39,65 +37,40 @@
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+struct box_coll_def;
 struct coll;
 
-typedef int (*coll_cmp_f)(const char *s, size_t s_len,
-			  const char *t, size_t t_len,
-			  const struct coll *coll);
-
-typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len,
-				uint32_t *ph, uint32_t *pcarry,
-				struct coll *coll);
-
 /**
- * ICU collation specific data.
+ * A box collation. Box collation is not the same as core one. Box
+ * collation has name, owner and identifier, and each user defined
+ * collation has its own box_coll object. Multiple box_coll can
+ * reference the same core collation if their functional parts
+ * match.
  */
-struct UCollator;
-
-struct coll_icu {
-	struct UCollator *collator;
-};
-
-/**
- * A collation.
- */
-struct coll {
+struct box_coll {
 	/** Personal ID */
 	uint32_t id;
 	/** Owner ID */
 	uint32_t owner_id;
-	/** Collation type. */
-	enum coll_type type;
-	/** Type specific data. */
-	struct coll_icu icu;
-	/** String comparator. */
-	coll_cmp_f cmp;
-	coll_hash_f hash;
-	/** Reference counter. */
-	int refs;
+	/** Core collation. */
+	struct coll *base;
 	/** Collation name. */
 	size_t name_len;
 	char name[0];
 };
 
 /**
- * Create a collation by definition.
- * @param def - collation definition.
- * @return - the collation OR NULL on memory error (diag is set).
+ * Create a box collation by definition.
+ * @param def Collation definition.
+ * @retval NULL Illegal parameters or memory error.
+ * @retval not NULL Collation.
  */
-struct coll *
-coll_new(const struct coll_def *def);
+struct box_coll *
+box_coll_new(const struct box_coll_def *def);
 
 /** Increment reference counter. */
-static inline void
-coll_ref(struct coll *coll)
-{
-	++coll->refs;
-}
-
-/** Decrement reference counter. Delete when 0. */
 void
-coll_unref(struct coll *coll);
+box_coll_delete(struct box_coll *coll);
 
 #if defined(__cplusplus)
 } /* extern "C" */
diff --git a/src/box/coll_cache.c b/src/box/coll_cache.c
index b7eb3edb9..6695dad22 100644
--- a/src/box/coll_cache.c
+++ b/src/box/coll_cache.c
@@ -29,20 +29,21 @@
  * SUCH DAMAGE.
  */
 #include "coll_cache.h"
+#include "coll.h"
 #include "diag.h"
 #include "assoc.h"
 
 /** mhash table (id -> collation) */
-static struct mh_i32ptr_t *coll_cache_id = NULL;
+static struct mh_i32ptr_t *box_coll_cache_id = NULL;
 
 /** Create global hash tables if necessary. */
 int
-coll_cache_init()
+box_coll_cache_init()
 {
-	coll_cache_id = mh_i32ptr_new();
-	if (coll_cache_id == NULL) {
-		diag_set(OutOfMemory, sizeof(*coll_cache_id), "malloc",
-			 "coll_cache_id");
+	box_coll_cache_id = mh_i32ptr_new();
+	if (box_coll_cache_id == NULL) {
+		diag_set(OutOfMemory, sizeof(*box_coll_cache_id), "malloc",
+			 "box_coll_cache_id");
 		return -1;
 	}
 	return 0;
@@ -50,9 +51,9 @@ coll_cache_init()
 
 /** Delete global hash tables. */
 void
-coll_cache_destroy()
+box_coll_cache_destroy()
 {
-	mh_i32ptr_delete(coll_cache_id);
+	mh_i32ptr_delete(box_coll_cache_id);
 }
 
 /**
@@ -61,14 +62,15 @@ coll_cache_destroy()
  * @return - NULL if inserted, replaced collation if replaced.
  */
 int
-coll_cache_replace(struct coll *coll, struct coll **replaced)
+box_coll_cache_replace(struct box_coll *coll, struct box_coll **replaced)
 {
 	const struct mh_i32ptr_node_t id_node = {coll->id, coll};
 	struct mh_i32ptr_node_t repl_id_node = {0, NULL};
 	struct mh_i32ptr_node_t *prepl_id_node = &repl_id_node;
-	if (mh_i32ptr_put(coll_cache_id, &id_node, &prepl_id_node, NULL) ==
-	    mh_end(coll_cache_id)) {
-		diag_set(OutOfMemory, sizeof(id_node), "malloc", "coll_cache_id");
+	if (mh_i32ptr_put(box_coll_cache_id, &id_node, &prepl_id_node, NULL) ==
+	    mh_end(box_coll_cache_id)) {
+		diag_set(OutOfMemory, sizeof(id_node), "malloc",
+			 "box_coll_cache_id");
 		return -1;
 	}
 	assert(repl_id_node.val == NULL);
@@ -81,22 +83,22 @@ coll_cache_replace(struct coll *coll, struct coll **replaced)
  * @param coll - collation to delete.
  */
 void
-coll_cache_delete(const struct coll *coll)
+box_coll_cache_delete(const struct box_coll *coll)
 {
-	mh_int_t i = mh_i32ptr_find(coll_cache_id, coll->id, NULL);
-	if (i == mh_end(coll_cache_id))
+	mh_int_t i = mh_i32ptr_find(box_coll_cache_id, coll->id, NULL);
+	if (i == mh_end(box_coll_cache_id))
 		return;
-	mh_i32ptr_del(coll_cache_id, i, NULL);
+	mh_i32ptr_del(box_coll_cache_id, i, NULL);
 }
 
 /**
  * Find a collation object by its id.
  */
-struct coll *
-coll_by_id(uint32_t id)
+struct box_coll *
+box_coll_by_id(uint32_t id)
 {
-	mh_int_t pos = mh_i32ptr_find(coll_cache_id, id, NULL);
-	if (pos == mh_end(coll_cache_id))
+	mh_int_t pos = mh_i32ptr_find(box_coll_cache_id, id, NULL);
+	if (pos == mh_end(box_coll_cache_id))
 		return NULL;
-	return mh_i32ptr_node(coll_cache_id, pos)->val;
+	return mh_i32ptr_node(box_coll_cache_id, pos)->val;
 }
diff --git a/src/box/coll_cache.h b/src/box/coll_cache.h
index 418de4e35..21bf22701 100644
--- a/src/box/coll_cache.h
+++ b/src/box/coll_cache.h
@@ -30,23 +30,24 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
-#include "coll.h"
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+struct box_coll;
+
 /**
  * Create global hash tables.
  * @return - 0 on success, -1 on memory error.
  */
 int
-coll_cache_init();
+box_coll_cache_init();
 
 /** Delete global hash tables. */
 void
-coll_cache_destroy();
+box_coll_cache_destroy();
 
 /**
  * Insert or replace a collation into collation cache.
@@ -55,20 +56,20 @@ coll_cache_destroy();
  * @return - 0 on success, -1 on memory error.
  */
 int
-coll_cache_replace(struct coll *coll, struct coll **replaced);
+box_coll_cache_replace(struct box_coll *coll, struct box_coll **replaced);
 
 /**
  * Delete a collation from collation cache.
  * @param coll - collation to delete.
  */
 void
-coll_cache_delete(const struct coll *coll);
+box_coll_cache_delete(const struct box_coll *coll);
 
 /**
  * Find a collation object by its id.
  */
-struct coll *
-coll_by_id(uint32_t id);
+struct box_coll *
+box_coll_by_id(uint32_t id);
 
 #if defined(__cplusplus)
 } /* extern "C" */
diff --git a/src/box/coll_def.c b/src/box/coll_def.c
index f849845b3..fa003bc63 100644
--- a/src/box/coll_def.c
+++ b/src/box/coll_def.c
@@ -31,38 +31,6 @@
 
 #include "coll_def.h"
 
-const char *coll_type_strs[] = {
-	"ICU"
-};
-
-const char *coll_icu_on_off_strs[] = {
-	"DEFAULT",
-	"ON",
-	"OFF"
-};
-
-const char *coll_icu_alternate_handling_strs[] = {
-	"DEFAULT",
-	"NON_IGNORABLE",
-	"SHIFTED"
-};
-
-const char *coll_icu_case_first_strs[] = {
-	"DEFAULT",
-	"OFF",
-	"UPPER_FIRST",
-	"LOWER_FIRST"
-};
-
-const char *coll_icu_strength_strs[] = {
-	"DEFAULT",
-	"PRIMARY",
-	"SECONDARY",
-	"TERTIARY",
-	"QUATERNARY",
-	"IDENTICAL"
-};
-
 static int64_t
 icu_on_off_from_str(const char *str, uint32_t len)
 {
diff --git a/src/box/coll_def.h b/src/box/coll_def.h
index 7a1027a1e..4d475fab5 100644
--- a/src/box/coll_def.h
+++ b/src/box/coll_def.h
@@ -33,86 +33,15 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <coll_def.h>
 #include "opt_def.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif /* defined(__cplusplus) */
 
-/**
- * The supported collation types
- */
-enum coll_type {
-	COLL_TYPE_ICU = 0,
-	coll_type_MAX,
-};
-
-extern const char *coll_type_strs[];
-
-/*
- * ICU collation options. See
- * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
- */
-
-/** Settings for simple ICU on/off options */
-enum coll_icu_on_off {
-	COLL_ICU_DEFAULT = 0,
-	COLL_ICU_ON,
-	COLL_ICU_OFF,
-	coll_icu_on_off_MAX
-};
-
-extern const char *coll_icu_on_off_strs[];
-
-/** Alternate handling ICU settings */
-enum coll_icu_alternate_handling {
-	COLL_ICU_AH_DEFAULT = 0,
-	COLL_ICU_AH_NON_IGNORABLE,
-	COLL_ICU_AH_SHIFTED,
-	coll_icu_alternate_handling_MAX
-};
-
-extern const char *coll_icu_alternate_handling_strs[];
-
-/** Case first ICU settings */
-enum coll_icu_case_first {
-	COLL_ICU_CF_DEFAULT = 0,
-	COLL_ICU_CF_OFF,
-	COLL_ICU_CF_UPPER_FIRST,
-	COLL_ICU_CF_LOWER_FIRST,
-	coll_icu_case_first_MAX
-};
-
-extern const char *coll_icu_case_first_strs[];
-
-/** Strength ICU settings */
-enum coll_icu_strength {
-	COLL_ICU_STRENGTH_DEFAULT = 0,
-	COLL_ICU_STRENGTH_PRIMARY,
-	COLL_ICU_STRENGTH_SECONDARY,
-	COLL_ICU_STRENGTH_TERTIARY,
-	COLL_ICU_STRENGTH_QUATERNARY,
-	COLL_ICU_STRENGTH_IDENTICAL,
-	coll_icu_strength_MAX
-};
-
-extern const char *coll_icu_strength_strs[];
-
-/** Collection of ICU settings */
-struct coll_icu_def {
-	enum coll_icu_on_off french_collation;
-	enum coll_icu_alternate_handling alternate_handling;
-	enum coll_icu_case_first case_first;
-	enum coll_icu_on_off case_level;
-	enum coll_icu_on_off normalization_mode;
-	enum coll_icu_strength strength;
-	enum coll_icu_on_off numeric_collation;
-};
-
-/**
- * Definition of a collation.
- */
-struct coll_def {
+/** Box collation definition. */
+struct box_coll_def {
 	/** Perconal ID */
 	uint32_t id;
 	/** Owner ID */
@@ -120,13 +49,8 @@ struct coll_def {
 	/** Collation name. */
 	size_t name_len;
 	const char *name;
-	/** Locale. */
-	size_t locale_len;
-	const char *locale;
-	/** Collation type. */
-	enum coll_type type;
-	/** Type specific options. */
-	struct coll_icu_def icu;
+	/** Core collation definition. */
+	struct coll_def base;
 };
 
 extern const struct opt_def coll_icu_opts_reg[];
diff --git a/src/box/key_def.cc b/src/box/key_def.cc
index 45997ae83..8f08cfd22 100644
--- a/src/box/key_def.cc
+++ b/src/box/key_def.cc
@@ -156,16 +156,18 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count)
 		struct key_part_def *part = &parts[i];
 		struct coll *coll = NULL;
 		if (part->coll_id != COLL_NONE) {
-			coll = coll_by_id(part->coll_id);
-			if (coll == NULL) {
+			struct box_coll *box_coll =
+				box_coll_by_id(part->coll_id);
+			if (box_coll == NULL) {
 				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
 					 i + 1, "collation was not found by ID");
 				key_def_delete(def);
 				return NULL;
 			}
+			coll = box_coll->base;
 		}
 		key_def_set_part(def, i, part->fieldno, part->type,
-				 part->is_nullable, coll);
+				 part->is_nullable, coll, part->coll_id);
 	}
 	return def;
 }
@@ -179,8 +181,7 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts)
 		part_def->fieldno = part->fieldno;
 		part_def->type = part->type;
 		part_def->is_nullable = part->is_nullable;
-		part_def->coll_id = (part->coll != NULL ?
-				     part->coll->id : COLL_NONE);
+		part_def->coll_id = part->coll_id;
 	}
 }
 
@@ -194,7 +195,8 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 	for (uint32_t item = 0; item < part_count; ++item) {
 		key_def_set_part(key_def, item, fields[item],
 				 (enum field_type)types[item],
-				 key_part_def_default.is_nullable, NULL);
+				 key_part_def_default.is_nullable, NULL,
+				 COLL_NONE);
 	}
 	return key_def;
 }
@@ -246,7 +248,8 @@ key_part_cmp(const struct key_part *parts1, uint32_t part_count1,
 
 void
 key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
-		 enum field_type type, bool is_nullable, struct coll *coll)
+		 enum field_type type, bool is_nullable, struct coll *coll,
+		 uint32_t coll_id)
 {
 	assert(part_no < def->part_count);
 	assert(type < field_type_MAX);
@@ -255,6 +258,7 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
 	def->parts[part_no].fieldno = fieldno;
 	def->parts[part_no].type = type;
 	def->parts[part_no].coll = coll;
+	def->parts[part_no].coll_id = coll_id;
 	column_mask_set_fieldno(&def->column_mask, fieldno);
 	/**
 	 * When all parts are set, initialize the tuple
@@ -554,7 +558,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 	end = part + first->part_count;
 	for (; part != end; part++) {
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
-				 part->is_nullable, part->coll);
+				 part->is_nullable, part->coll, part->coll_id);
 	}
 
 	/* Set-append second key def's part to the new key def. */
@@ -564,7 +568,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 		if (key_def_find(first, part->fieldno))
 			continue;
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
-				 part->is_nullable, part->coll);
+				 part->is_nullable, part->coll, part->coll_id);
 	}
 	return new_def;
 }
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 12016a51a..0e9b5f5f3 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -68,6 +68,8 @@ struct key_part {
 	uint32_t fieldno;
 	/** Type of the tuple field */
 	enum field_type type;
+	/** Collation ID for string comparison. */
+	uint32_t coll_id;
 	/** Collation definition for string comparison */
 	struct coll *coll;
 	/** True if a part can store NULLs. */
@@ -249,7 +251,8 @@ key_def_dump_parts(const struct key_def *def, struct key_part_def *parts);
  */
 void
 key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
-		 enum field_type type, bool is_nullable, struct coll *coll);
+		 enum field_type type, bool is_nullable, struct coll *coll,
+		 uint32_t coll_id);
 
 /**
  * Update 'has_optional_parts' of @a key_def with correspondence
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 333b6370f..385c2374a 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -46,6 +46,7 @@ extern "C" {
 #include "box/txn.h"
 #include "box/vclock.h" /* VCLOCK_MAX */
 #include "box/sequence.h"
+#include "box/coll_cache.h"
 
 /**
  * Trigger function for all spaces
@@ -291,8 +292,11 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
 			lua_pushboolean(L, part->is_nullable);
 			lua_setfield(L, -2, "is_nullable");
 
-			if (part->coll != NULL) {
-				lua_pushstring(L, part->coll->name);
+			if (part->coll_id != COLL_NONE) {
+				struct box_coll *coll =
+					box_coll_by_id(part->coll_id);
+				assert(coll != NULL);
+				lua_pushstring(L, coll->name);
 				lua_setfield(L, -2, "collation");
 			}
 
diff --git a/src/box/schema.cc b/src/box/schema.cc
index 1b96f978c..8df4aa73b 100644
--- a/src/box/schema.cc
+++ b/src/box/schema.cc
@@ -281,13 +281,13 @@ schema_init()
 	auto key_def_guard = make_scoped_guard([&] { key_def_delete(key_def); });
 
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_STRING, false, NULL);
+			 FIELD_TYPE_STRING, false, NULL, COLL_NONE);
 	sc_space_new(BOX_SCHEMA_ID, "_schema", key_def, &on_replace_schema,
 		     NULL);
 
 	/* _space - home for all spaces. */
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
 
 	/* _collation - collation description. */
 	sc_space_new(BOX_COLLATION_ID, "_collation", key_def,
@@ -335,10 +335,10 @@ schema_init()
 		diag_raise();
 	/* space no */
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
 	/* index no */
 	key_def_set_part(key_def, 1 /* part no */, 1 /* field no */,
-			 FIELD_TYPE_UNSIGNED, false, NULL);
+			 FIELD_TYPE_UNSIGNED, false, NULL, COLL_NONE);
 	sc_space_new(BOX_INDEX_ID, "_index", key_def,
 		     &alter_space_on_replace_index, &on_stmt_begin_index);
 }
diff --git a/src/box/tuple.c b/src/box/tuple.c
index d4760f3b1..665af2ba9 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -207,7 +207,7 @@ tuple_init(field_name_hash_f hash)
 
 	box_tuple_last = NULL;
 
-	if (coll_cache_init() != 0)
+	if (box_coll_cache_init() != 0)
 		return -1;
 
 	return 0;
@@ -260,7 +260,7 @@ tuple_free(void)
 
 	tuple_format_free();
 
-	coll_cache_destroy();
+	box_coll_cache_destroy();
 }
 
 box_tuple_format_t *
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index cfee00496..c82995d1a 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -32,7 +32,7 @@
 #include "tuple.h"
 #include "trivia/util.h" /* NOINLINE */
 #include <math.h>
-#include "coll_def.h"
+#include <coll.h>
 
 /* {{{ tuple_compare */
 
@@ -295,8 +295,7 @@ mp_compare_str(const char *field_a, const char *field_b)
 }
 
 static inline int
-mp_compare_str_coll(const char *field_a, const char *field_b,
-		    struct coll *coll)
+mp_compare_str_coll(const char *field_a, const char *field_b, struct coll *coll)
 {
 	uint32_t size_a = mp_decode_strl(&field_a);
 	uint32_t size_b = mp_decode_strl(&field_b);
diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
index 0fa8ea561..a2a237b4a 100644
--- a/src/box/tuple_hash.cc
+++ b/src/box/tuple_hash.cc
@@ -28,11 +28,9 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
 #include "tuple_hash.h"
-
+#include <coll.h>
 #include "third_party/PMurHash.h"
-#include "coll.h"
 
 /* Tuple and key hasher */
 namespace {
diff --git a/src/coll.c b/src/coll.c
new file mode 100644
index 000000000..eacb643f2
--- /dev/null
+++ b/src/coll.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "coll.h"
+#include "third_party/PMurHash.h"
+#include "diag.h"
+#include <unicode/ucol.h>
+#include <trivia/config.h>
+
+enum {
+	MAX_HASH_BUFFER = 1024,
+	MAX_LOCALE = 1024,
+};
+
+/** Compare two string using ICU collation. */
+static int
+coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
+	     const struct coll *coll)
+{
+	assert(coll->icu.collator != NULL);
+
+	UErrorCode status = U_ZERO_ERROR;
+
+#ifdef HAVE_ICU_STRCOLLUTF8
+	UCollationResult result = ucol_strcollUTF8(coll->icu.collator,
+						   s, slen, t, tlen, &status);
+#else
+	UCharIterator s_iter, t_iter;
+	uiter_setUTF8(&s_iter, s, slen);
+	uiter_setUTF8(&t_iter, t, tlen);
+	UCollationResult result = ucol_strcollIter(coll->icu.collator,
+						   &s_iter, &t_iter, &status);
+#endif
+	assert(!U_FAILURE(status));
+	return (int)result;
+}
+
+/** Get a hash of a string using ICU collation. */
+static uint32_t
+coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
+	      struct coll *coll)
+{
+	uint32_t total_size = 0;
+	UCharIterator itr;
+	uiter_setUTF8(&itr, s, s_len);
+	uint8_t buf[MAX_HASH_BUFFER];
+	uint32_t state[2] = {0, 0};
+	UErrorCode status = U_ZERO_ERROR;
+	int32_t got;
+	do {
+		got = ucol_nextSortKeyPart(coll->icu.collator, &itr, state, buf,
+					   MAX_HASH_BUFFER, &status);
+		PMurHash32_Process(ph, pcarry, buf, got);
+		total_size += got;
+	} while (got == MAX_HASH_BUFFER);
+	return total_size;
+}
+
+/**
+ * Set up ICU collator and init cmp and hash members of collation.
+ * @param coll Collation to set up.
+ * @param def Collation definition.
+ * @retval  0 Success.
+ * @retval -1 Illegal parameters or memory error.
+ */
+static int
+coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
+{
+	if (def->locale_len >= MAX_LOCALE) {
+		diag_set(IllegalParams, "too long locale");
+		return -1;
+	}
+	char locale[MAX_LOCALE];
+	memcpy(locale, def->locale, def->locale_len);
+	locale[def->locale_len] = '\0';
+	UErrorCode status = U_ZERO_ERROR;
+	struct UCollator *collator = ucol_open(locale, &status);
+	if (U_FAILURE(status)) {
+		diag_set(IllegalParams, u_errorName(status));
+		return -1;
+	}
+	coll->icu.collator = collator;
+
+	if (def->icu.french_collation != COLL_ICU_DEFAULT) {
+		enum coll_icu_on_off w = def->icu.french_collation;
+		UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+				       w == COLL_ICU_OFF ? UCOL_OFF :
+				       UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, v, &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "french_collation: %s", u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.alternate_handling != COLL_ICU_AH_DEFAULT) {
+		enum coll_icu_alternate_handling w =
+			def->icu.alternate_handling;
+		UColAttributeValue v =
+			w == COLL_ICU_AH_NON_IGNORABLE ? UCOL_NON_IGNORABLE :
+			w == COLL_ICU_AH_SHIFTED ? UCOL_SHIFTED : UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, v,
+				  &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "alternate_handling: %s",
+				 u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.case_first != COLL_ICU_CF_DEFAULT) {
+		enum coll_icu_case_first w = def->icu.case_first;
+		UColAttributeValue v = w == COLL_ICU_CF_OFF ? UCOL_OFF :
+			w == COLL_ICU_CF_UPPER_FIRST ? UCOL_UPPER_FIRST :
+			w == COLL_ICU_CF_LOWER_FIRST ? UCOL_LOWER_FIRST :
+			UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_CASE_FIRST, v, &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "case_first: %s", u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.case_level != COLL_ICU_DEFAULT) {
+		enum coll_icu_on_off w = def->icu.case_level;
+		UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+			w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_CASE_LEVEL , v, &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "case_level: %s", u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.normalization_mode != COLL_ICU_DEFAULT) {
+		enum coll_icu_on_off w = def->icu.normalization_mode;
+		UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+			w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, v,
+				  &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "normalization_mode: %s",
+				 u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) {
+		enum coll_icu_strength w = def->icu.strength;
+		UColAttributeValue v =
+			w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
+			w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
+			w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
+			w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
+			w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
+			UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "strength: %s", u_errorName(status)));
+			return -1;
+		}
+	}
+	if (def->icu.numeric_collation != COLL_ICU_DEFAULT) {
+		enum coll_icu_on_off w = def->icu.numeric_collation;
+		UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
+			w == COLL_ICU_OFF ? UCOL_OFF : UCOL_DEFAULT;
+		ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, v, &status);
+		if (U_FAILURE(status)) {
+			diag_set(IllegalParams, tt_sprintf("failed to set "\
+				 "numeric_collation: %s", u_errorName(status)));
+			return -1;
+		}
+	}
+	coll->cmp = coll_icu_cmp;
+	coll->hash = coll_icu_hash;
+	return 0;
+}
+
+struct coll *
+coll_new(const struct coll_def *def)
+{
+	assert(def->type == COLL_TYPE_ICU);
+	struct coll *coll = (struct coll *) malloc(sizeof(*coll));
+	if (coll == NULL) {
+		diag_set(OutOfMemory, sizeof(*coll), "malloc", "coll");
+		return NULL;
+	}
+	coll->refs = 1;
+	coll->type = def->type;
+	if (coll_icu_init_cmp(coll, def) != 0) {
+		free(coll);
+		return NULL;
+	}
+	return coll;
+}
+
+void
+coll_unref(struct coll *coll)
+{
+	assert(coll->refs > 0);
+	if (--coll->refs == 0) {
+		ucol_close(coll->icu.collator);
+		free(coll);
+	}
+}
diff --git a/src/coll.h b/src/coll.h
new file mode 100644
index 000000000..8798d9491
--- /dev/null
+++ b/src/coll.h
@@ -0,0 +1,98 @@
+#ifndef TARANTOOL_COLL_H_INCLUDED
+#define TARANTOOL_COLL_H_INCLUDED
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "coll_def.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+struct coll;
+
+typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
+			  size_t t_len, const struct coll *coll);
+
+typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
+				uint32_t *pcarry, struct coll *coll);
+
+/** ICU collation specific data. */
+struct UCollator;
+
+struct coll_icu {
+	struct UCollator *collator;
+};
+
+/**
+ * A core collation. It has no any unique features like name, id
+ * or owner. Only functional part - comparator, locale, ICU
+ * settings.
+ */
+struct coll {
+	/** Collation type. */
+	enum coll_type type;
+	/** Type specific data. */
+	struct coll_icu icu;
+	/** String comparator. */
+	coll_cmp_f cmp;
+	coll_hash_f hash;
+	/** Reference counter. */
+	int refs;
+};
+
+/**
+ * Create a core collation by definition.
+ * @param def Core collation definition.
+ * @retval NULL Illegal parameters or memory error.
+ * @retval not NULL Collation.
+ */
+struct coll *
+coll_new(const struct coll_def *def);
+
+/** Increment reference counter. */
+static inline void
+coll_ref(struct coll *coll)
+{
+	++coll->refs;
+}
+
+/** Decrement reference counter. Delete when 0. */
+void
+coll_unref(struct coll *coll);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_COLL_H_INCLUDED */
diff --git a/src/coll_def.c b/src/coll_def.c
new file mode 100644
index 000000000..df58caca8
--- /dev/null
+++ b/src/coll_def.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "coll_def.h"
+
+const char *coll_type_strs[] = {
+	"ICU"
+};
+
+const char *coll_icu_on_off_strs[] = {
+	"DEFAULT",
+	"ON",
+	"OFF"
+};
+
+const char *coll_icu_alternate_handling_strs[] = {
+	"DEFAULT",
+	"NON_IGNORABLE",
+	"SHIFTED"
+};
+
+const char *coll_icu_case_first_strs[] = {
+	"DEFAULT",
+	"OFF",
+	"UPPER_FIRST",
+	"LOWER_FIRST"
+};
+
+const char *coll_icu_strength_strs[] = {
+	"DEFAULT",
+	"PRIMARY",
+	"SECONDARY",
+	"TERTIARY",
+	"QUATERNARY",
+	"IDENTICAL"
+};
diff --git a/src/coll_def.h b/src/coll_def.h
new file mode 100644
index 000000000..c8921b41a
--- /dev/null
+++ b/src/coll_def.h
@@ -0,0 +1,115 @@
+#ifndef TARANTOOL_COLL_DEF_H_INCLUDED
+#define TARANTOOL_COLL_DEF_H_INCLUDED
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <stddef.h>
+#include <stdint.h>
+
+/** The supported collation types */
+enum coll_type {
+	COLL_TYPE_ICU = 0,
+	coll_type_MAX,
+};
+
+extern const char *coll_type_strs[];
+
+/*
+ * ICU collation options. See
+ * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c
+ */
+
+/** Settings for simple ICU on/off options */
+enum coll_icu_on_off {
+	COLL_ICU_DEFAULT = 0,
+	COLL_ICU_ON,
+	COLL_ICU_OFF,
+	coll_icu_on_off_MAX
+};
+
+extern const char *coll_icu_on_off_strs[];
+
+/** Alternate handling ICU settings */
+enum coll_icu_alternate_handling {
+	COLL_ICU_AH_DEFAULT = 0,
+	COLL_ICU_AH_NON_IGNORABLE,
+	COLL_ICU_AH_SHIFTED,
+	coll_icu_alternate_handling_MAX
+};
+
+extern const char *coll_icu_alternate_handling_strs[];
+
+/** Case first ICU settings */
+enum coll_icu_case_first {
+	COLL_ICU_CF_DEFAULT = 0,
+	COLL_ICU_CF_OFF,
+	COLL_ICU_CF_UPPER_FIRST,
+	COLL_ICU_CF_LOWER_FIRST,
+	coll_icu_case_first_MAX
+};
+
+extern const char *coll_icu_case_first_strs[];
+
+/** Strength ICU settings */
+enum coll_icu_strength {
+	COLL_ICU_STRENGTH_DEFAULT = 0,
+	COLL_ICU_STRENGTH_PRIMARY,
+	COLL_ICU_STRENGTH_SECONDARY,
+	COLL_ICU_STRENGTH_TERTIARY,
+	COLL_ICU_STRENGTH_QUATERNARY,
+	COLL_ICU_STRENGTH_IDENTICAL,
+	coll_icu_strength_MAX
+};
+
+extern const char *coll_icu_strength_strs[];
+
+/** Collection of ICU settings */
+struct coll_icu_def {
+	enum coll_icu_on_off french_collation;
+	enum coll_icu_alternate_handling alternate_handling;
+	enum coll_icu_case_first case_first;
+	enum coll_icu_on_off case_level;
+	enum coll_icu_on_off normalization_mode;
+	enum coll_icu_strength strength;
+	enum coll_icu_on_off numeric_collation;
+};
+
+/** Core collation definition. */
+struct coll_def {
+	/** Locale. */
+	size_t locale_len;
+	const char *locale;
+	/** Collation type. */
+	enum coll_type type;
+	/** Type specific options. */
+	struct coll_icu_def icu;
+};
+
+#endif /* TARANTOOL_COLL_DEF_H_INCLUDED */
diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
index d77959606..17f26ea07 100644
--- a/test/unit/coll.cpp
+++ b/test/unit/coll.cpp
@@ -1,9 +1,9 @@
-#include "box/coll.h"
 #include <iostream>
 #include <vector>
 #include <algorithm>
 #include <string.h>
-#include <box/coll_def.h>
+#include <coll_def.h>
+#include <coll.h>
 #include <assert.h>
 #include <msgpuck.h>
 #include <diag.h>
@@ -51,8 +51,6 @@ manual_test()
 	def.locale = "ru_RU";
 	def.locale_len = strlen(def.locale);
 	def.type = COLL_TYPE_ICU;
-	def.name = "test";
-	def.name_len = strlen(def.name);
 	struct coll *coll;
 
 	cout << " -- default ru_RU -- " << endl;
@@ -136,8 +134,6 @@ hash_test()
 	def.locale = "ru_RU";
 	def.locale_len = strlen(def.locale);
 	def.type = COLL_TYPE_ICU;
-	def.name = "test";
-	def.name_len = strlen(def.name);
 	struct coll *coll;
 
 	/* Case sensitive */
-- 
2.15.1 (Apple Git-101)

  parent reply	other threads:[~2018-05-15 19:54 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-15 19:54 [tarantool-patches] [PATCH v3 0/4] Lua utf8 module Vladislav Shpilevoy
2018-05-15 19:54 ` [tarantool-patches] [PATCH v3 1/4] error: introduce error rebulding API Vladislav Shpilevoy
2018-05-16 17:06   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-15 19:54 ` Vladislav Shpilevoy [this message]
2018-05-16 17:07   ` [tarantool-patches] Re: [PATCH v3 2/4] collation: split collation into core and box objects Vladislav Shpilevoy
2018-05-16 17:17     ` Konstantin Osipov
2018-05-16 17:19       ` Vladislav Shpilevoy
2018-05-17 19:23   ` Vladislav Shpilevoy
2018-05-15 19:54 ` [tarantool-patches] [PATCH v3 3/4] collation: introduce collation fingerprint Vladislav Shpilevoy
2018-05-17 19:24   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-15 19:54 ` [tarantool-patches] [PATCH v3 4/4] lua: introduce utf8 built-in globaly visible module Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b008cb67706a1f246b9058fa1de3fb90bc04603c.1526414017.git.v.shpilevoy@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v3 2/4] collation: split collation into core and box objects' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox