[tarantool-patches] Re: [PATCH v2 1/1] box: update collation strength option.

Imeev Mergen imeevma at tarantool.org
Mon Oct 15 21:30:27 MSK 2018


Hello! Thank you for review! After discussion decided to left as
it is now.

Nikita, can you do next review? Links and diff below.

On 10/13/2018 03:17 PM, Vladislav Shpilevoy wrote:
> Hi!
>
>>      Replace couldn't be used in space box.space._collation:
>>      tarantool> function update_collation_strength_field()
>>               >     local _collation =
>>      box.space[box.schema.COLLATION_ID]
>>               >     for _, collation in ipairs(_collation:select()) do
>>               >         if (collation.opts.strength == nil) then
>>               >             local new_collation =
>>      _collation:get{collation.id}:totable()
>>               >             new_collation[6].strength = 'identical'
>>               >             _collation:replace(new_collation)
>>               >         end
>>               >     end
>>               > end
>>      ---
>>      ...
>>      tarantool> update_collation_strength_field()
>>      ---
>>      - error: collation does not support alter
>>      ...
>
> I just found that it is not normal. Triggers on system
> spaces are turned off before upgrade. But for _collation
> space they are not. Please, turn off the triggers and use
> replace. See function set_system_triggers in upgrade.lua.
>
>>  >>+        end
>>  >>        +    end
>>  >>        +end
>>  >>        +
>>  >>        +local function upgrade_to_2_1_1()
>>  >>        +    update_collation_strength_field()
>>  >>        +end
>>  >>        +
>>  >>        +
>>
*Issue:* https://github.com/tarantool/tarantool/issues/3573
*Branch:* 
https://github.com/tarantool/tarantool/tree/imeevma/gh-3573-fix-collation-strength
*
Patch:*

commit e55064f796d9320035db5fb2e3d5030601ee7333
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Tue Sep 25 21:30:36 2018 +0300

     box: update collation strength option.

     At the moment all collations that don't have their "strength"
     option set works the same way as ones with this option set to
     "identical". It is not efficient, according to ICU Comparison
     Levels. This patch updates this option of old collations and set
     its default value to "primary" for new ones.

     Closes #3573

diff --git a/src/box/bootstrap.snap b/src/box/bootstrap.snap
index 6573938..3d3232f 100644
Binary files a/src/box/bootstrap.snap and b/src/box/bootstrap.snap differ
diff --git a/src/box/coll_id_def.c b/src/box/coll_id_def.c
index 9fe0cda..43f3b63 100644
--- a/src/box/coll_id_def.c
+++ b/src/box/coll_id_def.c
@@ -55,8 +55,8 @@ icu_case_first_from_str(const char *str, uint32_t len)
  static int64_t
  icu_strength_from_str(const char *str, uint32_t len)
  {
-    return strnindex(coll_icu_strength_strs + 1, str, len,
-             coll_icu_strength_MAX - 1) + 1;
+    return strnindex(coll_icu_strength_strs, str, len,
+             coll_icu_strength_MAX);
  }

  const struct opt_def coll_icu_opts_reg[] = {
diff --git a/src/box/lua/upgrade.lua b/src/box/lua/upgrade.lua
index d9c2ae4..66dcef4 100644
--- a/src/box/lua/upgrade.lua
+++ b/src/box/lua/upgrade.lua
@@ -578,6 +578,27 @@ local function upgrade_to_2_1_0()
      box.space._schema:format(format)
  end

+--------------------------------------------------------------------------------
+-- Tarantool 2.1.1
+--------------------------------------------------------------------------------
+
+function update_collation_strength_field()
+    local _collation = box.space[box.schema.COLLATION_ID]
+    for _, collation in ipairs(_collation:select()) do
+        if (collation.opts.strength == nil) then
+            local new_collation = _collation:get{collation.id}:totable()
+            new_collation[6].strength = 'identical'
+            _collation:delete{collation.id}
+            _collation:insert(new_collation)
+        end
+    end
+end
+
+local function upgrade_to_2_1_1()
+    update_collation_strength_field()
+end
+
+
  local function get_version()
      local version = box.space._schema:get{'version'}
      if version == nil then
@@ -605,7 +626,8 @@ local function upgrade(options)
          {version = mkversion(1, 7, 7), func = upgrade_to_1_7_7, auto = 
true},
          {version = mkversion(1, 10, 0), func = upgrade_to_1_10_0, auto 
= true},
          {version = mkversion(1, 10, 2), func = upgrade_to_1_10_2, auto 
= true},
-        {version = mkversion(2, 1, 0), func = upgrade_to_2_1_0, auto = 
true}
+        {version = mkversion(2, 1, 0), func = upgrade_to_2_1_0, auto = 
true},
+        {version = mkversion(2, 1, 1), func = upgrade_to_2_1_1, auto = 
true}
      }

      for _, handler in ipairs(handlers) do
diff --git a/src/coll.c b/src/coll.c
index 6a76f1f..9b719f2 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -193,22 +193,25 @@ coll_icu_init_cmp(struct coll *coll, const struct 
coll_def *def)
              return -1;
          }
      }
-    if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) {
-        enum coll_icu_strength w = def->icu.strength;
-        UColAttributeValue v =
-            w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
-            w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
-            w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
-            w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
-            w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
-            UCOL_DEFAULT;
-        ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
-        if (U_FAILURE(status)) {
-            diag_set(CollationError, "failed to set strength: %s",
-                 u_errorName(status));
-            return -1;
-        }
+    /*
+     * Set "strength" option of collation. Default values is
+     * "primary".
+     */
+    enum coll_icu_strength w = def->icu.strength;
+    UColAttributeValue v =
+        w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY :
+        w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY :
+        w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY :
+        w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY :
+        w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL :
+        UCOL_DEFAULT;
+    ucol_setAttribute(collator, UCOL_STRENGTH, v, &status);
+    if (U_FAILURE(status)) {
+        diag_set(CollationError, "failed to set strength: %s",
+             u_errorName(status));
+        return -1;
      }
+
      if (def->icu.numeric_collation != COLL_ICU_DEFAULT) {
          enum coll_icu_on_off w = def->icu.numeric_collation;
          UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON :
diff --git a/src/coll_def.c b/src/coll_def.c
index df58cac..1323643 100644
--- a/src/coll_def.c
+++ b/src/coll_def.c
@@ -54,7 +54,6 @@ const char *coll_icu_case_first_strs[] = {
  };

  const char *coll_icu_strength_strs[] = {
-    "DEFAULT",
      "PRIMARY",
      "SECONDARY",
      "TERTIARY",
diff --git a/src/coll_def.h b/src/coll_def.h
index 7c20abf..2a1cb68 100644
--- a/src/coll_def.h
+++ b/src/coll_def.h
@@ -84,8 +84,7 @@ extern const char *coll_icu_case_first_strs[];

  /** Strength ICU settings */
  enum coll_icu_strength {
-    COLL_ICU_STRENGTH_DEFAULT = 0,
-    COLL_ICU_STRENGTH_PRIMARY,
+    COLL_ICU_STRENGTH_PRIMARY = 0,
      COLL_ICU_STRENGTH_SECONDARY,
      COLL_ICU_STRENGTH_TERTIARY,
      COLL_ICU_STRENGTH_QUATERNARY,
diff --git a/test/box-py/bootstrap.result b/test/box-py/bootstrap.result
index 506aca3..2532b70 100644
--- a/test/box-py/bootstrap.result
+++ b/test/box-py/bootstrap.result
@@ -5,7 +5,7 @@ box.space._schema:select{}
  ---
  - - ['cluster', '<cluster uuid>']
    - ['max_id', 511]
-  - ['version', 2, 1, 0]
+  - ['version', 2, 1, 1]
  ...
  box.space._cluster:select{}
  ---
diff --git a/test/box/ddl.result b/test/box/ddl.result
index c9a8e96..396318f 100644
--- a/test/box/ddl.result
+++ b/test/box/ddl.result
@@ -484,14 +484,14 @@ box.space._collation:auto_increment{'test', 0, 
'ICU', 'ru_RU', setmap{}}
  ...
  box.space._collation:select{}
  ---
-- - [1, 'unicode', 1, 'ICU', '', {}]
+- - [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}]
    - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}]
    - [3, 'test', 0, 'ICU', 'ru_RU', {}]
  ...
  test_run:cmd('restart server default')
  box.space._collation:select{}
  ---
-- - [1, 'unicode', 1, 'ICU', '', {}]
+- - [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}]
    - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}]
    - [3, 'test', 0, 'ICU', 'ru_RU', {}]
  ...
@@ -512,7 +512,7 @@ utf8.cmp('abc', 'def')
  ...
  box.space._collation:replace(t)
  ---
-- [1, 'unicode', 1, 'ICU', '', {}]
+- [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}]
  ...
  --
  -- gh-2839: allow to store custom fields in field definition.
diff --git a/test/box/tree_pk.result b/test/box/tree_pk.result
index df3c78b..7366ed8 100644
--- a/test/box/tree_pk.result
+++ b/test/box/tree_pk.result
@@ -690,7 +690,7 @@ s:drop()
  ...
  --https://github.com/tarantool/tarantool/issues/2649
  -- create standart index and alter it to collation index
-box.internal.collation.create('test', 'ICU', 'ru-RU')
+box.internal.collation.create('test', 'ICU', 'ru-RU', {strength = 
'identical'})
  ---
  ...
  box.internal.collation.create('test-ci', 'ICU', 'ru-RU', {strength = 
'secondary'})
diff --git a/test/box/tree_pk.test.lua b/test/box/tree_pk.test.lua
index 1190ab4..b4ee65c 100644
--- a/test/box/tree_pk.test.lua
+++ b/test/box/tree_pk.test.lua
@@ -238,7 +238,7 @@ s:drop()

  --https://github.com/tarantool/tarantool/issues/2649
  -- create standart index and alter it to collation index
-box.internal.collation.create('test', 'ICU', 'ru-RU')
+box.internal.collation.create('test', 'ICU', 'ru-RU', {strength = 
'identical'})
  box.internal.collation.create('test-ci', 'ICU', 'ru-RU', {strength = 
'secondary'})
  s = box.schema.space.create('test')
  i1 = s:create_index('i1', { type = 'tree', parts = {{1, 'str'}}, 
unique = true })
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 79ba9ab..5fdb8ee 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -110,3 +110,77 @@ cn:close()
  box.schema.user.revoke('guest', 'read,write,execute', 'universe')
  ---
  ...
+--
+-- gh-3573: Strength in the _collation space
+-- Collation without 'strength' option set now works as one with
+-- 'strength' set to 'primary'.
+--
+box.internal.collation.create('c0', 'ICU', 'unicode')
+---
+...
+box.internal.collation.create('c1', 'ICU', 'unicode', {strength='primary'})
+---
+...
+box.internal.collation.create('c2', 'ICU', 'unicode', 
{strength='secondary'})
+---
+...
+box.internal.collation.create('c5', 'ICU', 'unicode', 
{strength='identical'})
+---
+...
+box.sql.execute([[create table tc (id int primary key autoincrement, s0 
string collate "c0", s1 string collate "c1", s2 string collate "c2", s5 
string collate "c5")]])
+---
+...
+box.sql.execute([[insert into tc values (null, 'a', 'a', 'a', 'a')]])
+---
+...
+box.sql.execute([[insert into tc values (null, 'A', 'A', 'A', 'A')]])
+---
+...
+box.sql.execute([[insert into tc values (null, 'á', 'á', 'á', 'á')]])
+---
+...
+box.sql.execute([[insert into tc values (null, 'â', 'â', 'â', 'â')]])
+---
+...
+-- Should be same as the next one.
+box.sql.execute([[select * from tc where s0 = 'a']])
+---
+- - [1, 'a', 'a', 'a', 'a']
+  - [2, 'A', 'A', 'A', 'A']
+  - [3, 'á', 'á', 'á', 'á']
+  - [4, 'â', 'â', 'â', 'â']
+...
+-- Should return all records.
+box.sql.execute([[select * from tc where s1 = 'a']])
+---
+- - [1, 'a', 'a', 'a', 'a']
+  - [2, 'A', 'A', 'A', 'A']
+  - [3, 'á', 'á', 'á', 'á']
+  - [4, 'â', 'â', 'â', 'â']
+...
+-- Should return two records.
+box.sql.execute([[select * from tc where s2 = 'a']])
+---
+- - [1, 'a', 'a', 'a', 'a']
+  - [2, 'A', 'A', 'A', 'A']
+...
+-- Should return one record.
+box.sql.execute([[select * from tc where s5 = 'a']])
+---
+- - [1, 'a', 'a', 'a', 'a']
+...
+box.sql.execute([[drop table tc]])
+---
+...
+box.internal.collation.drop('c0')
+---
+...
+box.internal.collation.drop('c1')
+---
+...
+box.internal.collation.drop('c2')
+---
+...
+box.internal.collation.drop('c5')
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 935dea8..130245b 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -43,3 +43,32 @@ cn:execute('select 1 limit ? collate not_exist', {1})

  cn:close()
  box.schema.user.revoke('guest', 'read,write,execute', 'universe')
+
+--
+-- gh-3573: Strength in the _collation space
+-- Collation without 'strength' option set now works as one with
+-- 'strength' set to 'primary'.
+--
+box.internal.collation.create('c0', 'ICU', 'unicode')
+box.internal.collation.create('c1', 'ICU', 'unicode', {strength='primary'})
+box.internal.collation.create('c2', 'ICU', 'unicode', 
{strength='secondary'})
+box.internal.collation.create('c5', 'ICU', 'unicode', 
{strength='identical'})
+box.sql.execute([[create table tc (id int primary key autoincrement, s0 
string collate "c0", s1 string collate "c1", s2 string collate "c2", s5 
string collate "c5")]])
+box.sql.execute([[insert into tc values (null, 'a', 'a', 'a', 'a')]])
+box.sql.execute([[insert into tc values (null, 'A', 'A', 'A', 'A')]])
+box.sql.execute([[insert into tc values (null, 'á', 'á', 'á', 'á')]])
+box.sql.execute([[insert into tc values (null, 'â', 'â', 'â', 'â')]])
+-- Should be same as the next one.
+box.sql.execute([[select * from tc where s0 = 'a']])
+-- Should return all records.
+box.sql.execute([[select * from tc where s1 = 'a']])
+-- Should return two records.
+box.sql.execute([[select * from tc where s2 = 'a']])
+-- Should return one record.
+box.sql.execute([[select * from tc where s5 = 'a']])
+
+box.sql.execute([[drop table tc]])
+box.internal.collation.drop('c0')
+box.internal.collation.drop('c1')
+box.internal.collation.drop('c2')
+box.internal.collation.drop('c5')
diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp
index 94374a7..acbc7f4 100644
--- a/test/unit/coll.cpp
+++ b/test/unit/coll.cpp
@@ -51,6 +51,7 @@ manual_test()
      memset(&def, 0, sizeof(def));
      snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU");
      def.type = COLL_TYPE_ICU;
+    def.icu.strength = COLL_ICU_STRENGTH_IDENTICAL;
      struct coll *coll;

      cout << " -- default ru_RU -- " << endl;
@@ -134,6 +135,7 @@ hash_test()
      struct coll *coll;

      /* Case sensitive */
+    def.icu.strength = COLL_ICU_STRENGTH_IDENTICAL;
      coll = coll_new(&def);
      assert(coll != NULL);
      cout << "Case sensitive" << endl;

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20181015/b68f951a/attachment.html>


More information about the Tarantool-patches mailing list