[tarantool-patches] Re: [PATCH v2 1/1] box: update collation strength option.

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Oct 10 14:18:10 MSK 2018



On 06/10/2018 12:04, imeevma at tarantool.org wrote:
> At the moment all collations that don't have their "strength"
> option set works the same way as ones with this option set to
> "identical". It is not efficient, according to ICU Comparison
> Levels. This patch updates this option of old collations and set
> its default value to "primary" for new ones.
> 
> Closes #3573
> ---
> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3573-fix-collation-strength
> Issue: https://github.com/tarantool/tarantool/issues/3573
> 
>   src/box/bootstrap.snap       | Bin 1888 -> 1886 bytes
>   src/box/coll_id_def.c        |   4 +-
>   src/box/lua/upgrade.lua      |  29 ++++++++++++++-
>   src/coll.c                   |  33 ++++++++--------
>   src/coll_def.c               |   1 -
>   src/coll_def.h               |   3 +-
>   test/box-py/bootstrap.result |   2 +-
>   test/box/ddl.result          |   6 +--
>   test/box/tree_pk.result      |   2 +-
>   test/box/tree_pk.test.lua    |   2 +-
>   test/sql/collation.result    |  87 +++++++++++++++++++++++++++++++++++++++++++
>   test/sql/collation.test.lua  |  32 ++++++++++++++++
>   test/unit/coll.cpp           |   2 +
>   13 files changed, 176 insertions(+), 27 deletions(-)
> 
> diff --git a/src/box/bootstrap.snap b/src/box/bootstrap.snap

1. Please, remove binary diff before sending an email.

> index 65739384a66d6ba4a538553ccf4677536ba15280..84b27c0d8b9d6d683692e2ef55034ffcc68d11d0 100644
> GIT binary patch
> delta 1884
> zcmV-i2c!7l4&Dxs7k at P^HZUz`Ff(N|W at R=CNp5p=VQyn(Iv_J<IA&xwVKpsbHZx@{
> zG-feoEjVOmHZ3+eGhs9}W;HQnWj6{|Lu_wjYdRo%eF_TIx(m9^2Ic_Hx7lmzr2qf`
> z001bpFZ}>e?KS}FOGFS!z*dm~D2k#jilV6AqMxq(Vp{_Az<(~`=k4N^J9QNuj=Z`N
> z=~K!63-eO4{Xa?goO8~~I%_^CBZ1<YnJQf6Sh at N@2|aY1nNsKg<N)~qVZO0ZrY}vD
> zy%3s9<K4NfXf{L+b=bqk3;S2c?QOCMxO?C6GZ^+!^Gx$zyD--w?m=0vdDwn;L5_Ra
> z)wQCd>0t|o-+y-Dh~($*C8WEVmHYnrJ?s76-+IqxnZ_~OWX>t2h{+{DrBamb3&R_{
> z;m#^i0F_EH-wt>E9?reGOaJa1M|9-I2^O^Z-q&Fr-e!`irB*7%IjR0I-ygdXsZ@$r
> zrW6m`p$oIkN~NgMQRS>Um8$bR)M at pqdwLxd&gtfHSbwLRQvC`i)w)n+f9GNDZZi|h
> zlyVA75fPvxtQad)dZGZJvM8eKGmha6=5D?nx8-c_J_zeu`v1DV+jWcK4cc+893buI
> zVX8r;QhfFBBJJm)9VdzbkxqxLn6=FsmP(}<_UA-89k%Q^X%-~K>}{G9><f;VmNfL9
> z#@04x5`P$pS*0n1pkL1_nK1_{m7>0%zmJtW*}uBmjeXzF$Dyk+jJDtPUE>@0o!5-$
> diff --git a/src/box/lua/upgrade.lua b/src/box/lua/upgrade.lua
> index d9c2ae4..07cace0 100644
> --- a/src/box/lua/upgrade.lua
> +++ b/src/box/lua/upgrade.lua
> @@ -578,6 +578,32 @@ local function upgrade_to_2_1_0()
>       box.space._schema:format(format)
>   end
>   
> +--------------------------------------------------------------------------------
> +-- Tarantool 2.1.1
> +--------------------------------------------------------------------------------
> +
> +function update_collation_strength_field()
> +    local _collation = box.space[box.schema.COLLATION_ID]
> +    local _format = _collation:format()
> +    for _, collation in ipairs(_collation:select()) do
> +        if (collation.opts.strength == nil) then
> +            local old_collation = _collation:get{collation.id}
> +            local new_collation = {}
> +            for _,field in ipairs(_format) do
> +                new_collation[field.name] = old_collation[field.name]
> +            end
> +            new_collation.opts.strength = 'identical'
> +            _collation:delete{old_collation.id}
> +            _collation:insert(_collation:frommap(new_collation))

2. This looks much simpler:

@@ -584,17 +584,11 @@ end
  
  function update_collation_strength_field()
      local _collation = box.space[box.schema.COLLATION_ID]
-    local _format = _collation:format()
      for _, collation in ipairs(_collation:select()) do
-        if (collation.opts.strength == nil) then
-            local old_collation = _collation:get{collation.id}
-            local new_collation = {}
-            for _,field in ipairs(_format) do
-                new_collation[field.name] = old_collation[field.name]
-            end
-            new_collation.opts.strength = 'identical'
-            _collation:delete{old_collation.id}
-            _collation:insert(_collation:frommap(new_collation))
+        if collation.opts.strength == nil then
+            local collation = _collation:get{collation.id}:totable()
+            collation[6].strength = 'identical'
+            _collation:replace(collation)
          end
      end
  end


I did not test it though. Please, apply and debug if
necessary.

> +        end
> +    end
> +end
> diff --git a/test/sql/collation.result b/test/sql/collation.result
> index 79ba9ab..3c53be1 100644
> --- a/test/sql/collation.result
> +++ b/test/sql/collation.result
> @@ -110,3 +110,90 @@ cn:close()
>   box.schema.user.revoke('guest', 'read,write,execute', 'universe')
>   ---
>   ...
> +--
> +-- gh-3573: Strength in the _collation space
> +-- Collation without 'strength' option set now works as one with
> +-- 'strength' set to 'primary'.
> +--
> +box.internal.collation.create('c0', 'ICU', 'unicode')
> +---
> +...
> +box.internal.collation.create('c1', 'ICU', 'unicode', {strength='primary'})
> +---
> +...
> +box.internal.collation.create('c2', 'ICU', 'unicode', {strength='secondary'})
> +---
> +...
> +box.internal.collation.create('c5', 'ICU', 'unicode', {strength='identical'})
> +---
> +...
> +box.sql.execute([[create table tc (id int primary key autoincrement, s0 string collate "c0", s1 string collate "c1", s2 string collate "c2", s5 string collate "c5")]])
> +---
> +...
> +box.sql.execute([[insert into tc values (null, 'a', 'a', 'a', 'a')]])
> +---
> +...
> +box.sql.execute([[insert into tc values (null, 'A', 'A', 'A', 'A')]])
> +---
> +...
> +box.sql.execute([[insert into tc values (null, 'á', 'á', 'á', 'á')]])
> +---
> +...
> +box.sql.execute([[insert into tc values (null, 'â', 'â', 'â', 'â')]])
> +---
> +...
> +box.sql.execute([[select * from tc where s0 = 'a']])
> +---
> +- - [1, 'a', 'a', 'a', 'a']
> +  - [2, 'A', 'A', 'A', 'A']
> +  - [3, 'á', 'á', 'á', 'á']
> +  - [4, 'â', 'â', 'â', 'â']
> +...
> +box.sql.execute([[select * from tc where s1 = 'a']])
> +---
> +- - [1, 'a', 'a', 'a', 'a']
> +  - [2, 'A', 'A', 'A', 'A']
> +  - [3, 'á', 'á', 'á', 'á']
> +  - [4, 'â', 'â', 'â', 'â']
> +...
> +box.sql.execute([[select * from tc where s2 = 'a']])
> +---
> +- - [1, 'a', 'a', 'a', 'a']
> +  - [2, 'A', 'A', 'A', 'A']
> +...
> +box.sql.execute([[select * from tc where s5 = 'a']])
> +---
> +- - [1, 'a', 'a', 'a', 'a']
> +...
> +a = box.sql.execute([[select id from tc where s0 = 'a']])
> +---
> +...
> +b = box.sql.execute([[select id from tc where s1 = 'a']])
> +---
> +...
> +count = 0
> +---
> +...
> +for k,v in pairs(a) do if (a[k][1] ~= b[k][1]) then count = count + 1 end end

3. You printed both a and b whole above and they are the same. Why do you
need this additional check in a cycle?




More information about the Tarantool-patches mailing list