From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 7F5D22C0CF for ; Sat, 6 Oct 2018 05:04:41 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id OUjlZrTSYP5C for ; Sat, 6 Oct 2018 05:04:41 -0400 (EDT) Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 8A90728697 for ; Sat, 6 Oct 2018 05:04:40 -0400 (EDT) From: imeevma@tarantool.org Subject: [tarantool-patches] [PATCH v2 1/1] box: update collation strength option. Date: Sat, 6 Oct 2018 12:04:36 +0300 Message-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: v.shpilevoy@tarantool.org, tarantool-patches@freelists.org At the moment all collations that don't have their "strength" option set works the same way as ones with this option set to "identical". It is not efficient, according to ICU Comparison Levels. This patch updates this option of old collations and set its default value to "primary" for new ones. Closes #3573 --- Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3573-fix-collation-strength Issue: https://github.com/tarantool/tarantool/issues/3573 src/box/bootstrap.snap | Bin 1888 -> 1886 bytes src/box/coll_id_def.c | 4 +- src/box/lua/upgrade.lua | 29 ++++++++++++++- src/coll.c | 33 ++++++++-------- src/coll_def.c | 1 - src/coll_def.h | 3 +- test/box-py/bootstrap.result | 2 +- test/box/ddl.result | 6 +-- test/box/tree_pk.result | 2 +- test/box/tree_pk.test.lua | 2 +- test/sql/collation.result | 87 +++++++++++++++++++++++++++++++++++++++++++ test/sql/collation.test.lua | 32 ++++++++++++++++ test/unit/coll.cpp | 2 + 13 files changed, 176 insertions(+), 27 deletions(-) diff --git a/src/box/bootstrap.snap b/src/box/bootstrap.snap index 65739384a66d6ba4a538553ccf4677536ba15280..84b27c0d8b9d6d683692e2ef55034ffcc68d11d0 100644 GIT binary patch delta 1884 zcmV-i2c!7l4&Dxs7k@P^HZUz`Ff(N|W@R=CNp5p=VQyn(Iv_Je?KS}FOGFS!z*dm~D2k#jilV6AqMxq(Vp{_Az<(~`=k4N^J9QNuj=Z`N z=~K!63-eO4{Xa?goO8~~I%_^CBZ10t|o-+y-Dh~($*C8WEVmHYnrJ?s76-+IqxnZ_~OWX>t2h{+{DrBamb3&R_{ z;m#^i0F_EH-wt>E9?reGOaJa1M|9-I2^O^Z-q&Fr-e!`irB*7%IjR0I-ygdXsZ@$r zrW6m`p$oIkN~NgMQRS>Um8$bR)M@pqdwLxd>fHSbwLRQvC`i)w)n+f9GNDZZi|h zlyVA75fPvxtQad)dZGZJvM8eKGmha6=5D?nx8-c_J_zeu`v1DV+jWcK4cc+893buI zVX8r;QhfFBBJJm)9VdzbkxqxLn6=FsmP(}<_UA-89k%Q^X%-~K>}{G9>0%zmJtW*}uBmjeXzF$Dyk+jJDtPUE>@0o!5-$ z4}+fD+=MY8^f<<4{1&sf`BI=?*#25J*Uhc5>+tKWmmjy5G}Vw-6(R*9wS!urR4ARAoEjXXnj4xAG^t9ZXhefDd6=1OW=^x24l*2MHptYx_!i$BM3Y5B-=FI5HbeVCXFlUQQrBZB(kuxJG9|mC^+KB#luSu3ECCM}c zQ-7%xm-5#sT@MWPmwR>n!bXml^zRGiyo_I+k*@LGo$If&LRwbI^d?j)McCywPuH@h z1-+&v;Q&eKyf)-vT4r@xs^89S4&qj6`grla-~IYsmg6K#M@yy+mrA8L&c5GHhr8R{ z{E1Mh6pvgkyt~Cs9|)C7F`QZ4kkdKoBjSgq<_tLC0K7W6^ z*5S+wLEEG8TwdE=0q~lyqKPgN~P$v2rxSW z`kDD)ftx-yR{W@#J}G=qOr8@wCVzH%0)c9%@myr621F150D+(Y(FF%F=!ymsfS^E* zqcDzv5C}sMi~<`+KmZIv3N(mXFfRv>`7UJYp1i{(QOM}CqHfDQ31JSX2QkI0 zFz(7DZ02BFK@9^aL6^*vN`D9nV8$&ZPVH@YPR5hTz$&*j00@>q(!xy8L%eS2Ti(}v zCy($m6x6LF`5PlWr_3U zKo;S)^$NT4|F_kwzw8DqCZ>VsKhs>sR4o$?O?gWxbS|?>T?mj1a({fK4eGD+$}Y&i z^uSiPc@4BZ1;wsj$*T2|KY_(@nu?|$nj8u+jI?b}zbUm~_9sQDs6Q^@$4#t-<)#mv z-wt|>7&wB<$*!t=Q+2TaReg=MD3SiRnlW5~#3HywfCIkPp) zV-YvAM2k~ImI^wGbJ!hewU))G`{kEzao)T*^uv@*j8^r(ktY8$;OY~MJ=~b(WY)So2?xu8jML$8M zgg42p=UB6 zEi*7;Vl6Z_W-~1~FgZ0XI5lH1H#s$9F*ac{3RXjGZ)0mZAbWiZ3e~y`y3GdT0L~dp z)=i}V00000D77#B08rI706I%F5K6#UQ2{85q9}?YD$b&eu79{-3N}O7l2mpR;KZ{2 zn+{@4)QCi;q$nk^F5UY}lDbGJHYRv&HT-RauFbizhI29aQ_<+yD7;K51?d3d0QUge z@{IH&cw?nZUm7X||^4l~~$ zyA|o`B$McrFI%7sv(c_j^4i?AS~#iHcgndb*|2O@rGL-H=B9Kq-J4QQ3x#n|IW`!q z?C*R8@ona@&^f8F2r)^*5u~qF*^vUEPN7|$r0c;P%UjG{@M3Pu*^XZbD@6JayT03X zi{&lalBygO?dRjEL02dF>fPN{jX}`wXO~b42f8{*eIE=%7Kmm4!WWl)-_A>*t22zY z-}PPRoA{mAj_4nKp4%LSG4S&_#%25#vp4!upkLSyTQ=7PTV>Z{i0ub{ZXxOH(r+_j zZPbN8S0^c67>#!^tX&*}Hc{61>z}YpVx+^|FMkMz*dEcA0bQMBlm}TC1jj3#Rf(Cc zsz^-Lh;Bxt5^)M~0&!ZIRHhsg4o!;YW{3tgV>5H=>LiC|Ox}l?S0huJ$#{tA5W^vc z=Ed)e!VJs?7!5EPUNF2|c(G8ci$`ZeP^gQHO9e_>V!ArX77BAV6y-;stVdhW|L(QP zLVu?ic``6vog@+%;-mYCqYiT~+%K%;8A<=XV(tU;3m@q^-`%+mu^rN~OXxSDtCNIX zZu@i%)HI*hG^3jz5<0H|eVGQbI_=bN=N9zg_K5m;`M%%%`dyZ1B}ha}s128{PO_YR zzgUmE+Z^qQ(A7y2xmN}(`Vp)*g1RpHVgI-RXOKvq9aK4^M)JYsgx7ad)lWNo2S%9?}f zFn6rS7Ov7FPlY2XDTxZb(bY-1%;)=KS&ujyU7aL=I%$!os0Gm)k2N(wA*%_cMc&K{`nl-pB)=8` zW<)=FWj)4#eLlBBW8%IC@3_=Pth*~go48;SwHJFH2Fd%F?2kk;VXcww=-~-00nO2(4 z7#_C9Kn&JPXB`m05Sd^o^n)x9k}%ou6eKwIauTj!EYZT3wXZg0T(d*RYkzg?LUB|# zW0r(#8`yKErQ-?y^2rxLfPl3%zvXk}o-2LCX{@RGC#bmUq8G=dT?n=Lxht&Ltv5XM zKq^G>sVm1;VNq3wzfvSZ&zbt0ErQxX{}LL1Ts3TVav_oK;iE_r^^ZOib+@*&8Jb6T z{24#RZ0XOiX`b+=wiC=jmw(uke4NxWLj+RR*0wq)@TAqaja#AVtR8b(h~aq%?uI^M z`prRRPT(uuXYNcJ+VJy!Hkn6@ z`rP@=q}MzHN3)#x+LLco2l`9ZcTS5Eseh}}gCB-3f?K0?yRSvk7El%CEOMj@g~-HV zYP|oo3c>daiHd?`w|_NfFDEk*ne8ai;u4WnDMz7QaYtIMXL{bqEKk~Rq!Oegci z?|E^eRScX$shsKmIpci>6g3mx`LKW?cWdQ*5}9z8djSBq#m8;I;y?8oaX|qI z!}@5+->Dg+0MUDNxS*Xd7KijMg=l18w|Yfa#Q*iIdK8gK^jFHBq=_p2AoZQJo5JB~ z{)9_u-DI}n!v|vG5UoYj4MPyhwO7m|jicu.strength != COLL_ICU_STRENGTH_DEFAULT) { - enum coll_icu_strength w = def->icu.strength; - UColAttributeValue v = - w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY : - w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY : - w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY : - w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY : - w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL : - UCOL_DEFAULT; - ucol_setAttribute(collator, UCOL_STRENGTH, v, &status); - if (U_FAILURE(status)) { - diag_set(CollationError, "failed to set strength: %s", - u_errorName(status)); - return -1; - } + /* + * Set "strength" option of collation. Default values is + * "primary". + */ + enum coll_icu_strength w = def->icu.strength; + UColAttributeValue v = + w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY : + w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY : + w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY : + w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY : + w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_STRENGTH, v, &status); + if (U_FAILURE(status)) { + diag_set(CollationError, "failed to set strength: %s", + u_errorName(status)); + return -1; } + if (def->icu.numeric_collation != COLL_ICU_DEFAULT) { enum coll_icu_on_off w = def->icu.numeric_collation; UColAttributeValue v = w == COLL_ICU_ON ? UCOL_ON : diff --git a/src/coll_def.c b/src/coll_def.c index df58cac..1323643 100644 --- a/src/coll_def.c +++ b/src/coll_def.c @@ -54,7 +54,6 @@ const char *coll_icu_case_first_strs[] = { }; const char *coll_icu_strength_strs[] = { - "DEFAULT", "PRIMARY", "SECONDARY", "TERTIARY", diff --git a/src/coll_def.h b/src/coll_def.h index 7c20abf..2a1cb68 100644 --- a/src/coll_def.h +++ b/src/coll_def.h @@ -84,8 +84,7 @@ extern const char *coll_icu_case_first_strs[]; /** Strength ICU settings */ enum coll_icu_strength { - COLL_ICU_STRENGTH_DEFAULT = 0, - COLL_ICU_STRENGTH_PRIMARY, + COLL_ICU_STRENGTH_PRIMARY = 0, COLL_ICU_STRENGTH_SECONDARY, COLL_ICU_STRENGTH_TERTIARY, COLL_ICU_STRENGTH_QUATERNARY, diff --git a/test/box-py/bootstrap.result b/test/box-py/bootstrap.result index 506aca3..2532b70 100644 --- a/test/box-py/bootstrap.result +++ b/test/box-py/bootstrap.result @@ -5,7 +5,7 @@ box.space._schema:select{} --- - - ['cluster', ''] - ['max_id', 511] - - ['version', 2, 1, 0] + - ['version', 2, 1, 1] ... box.space._cluster:select{} --- diff --git a/test/box/ddl.result b/test/box/ddl.result index c9a8e96..396318f 100644 --- a/test/box/ddl.result +++ b/test/box/ddl.result @@ -484,14 +484,14 @@ box.space._collation:auto_increment{'test', 0, 'ICU', 'ru_RU', setmap{}} ... box.space._collation:select{} --- -- - [1, 'unicode', 1, 'ICU', '', {}] +- - [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}] - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}] - [3, 'test', 0, 'ICU', 'ru_RU', {}] ... test_run:cmd('restart server default') box.space._collation:select{} --- -- - [1, 'unicode', 1, 'ICU', '', {}] +- - [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}] - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}] - [3, 'test', 0, 'ICU', 'ru_RU', {}] ... @@ -512,7 +512,7 @@ utf8.cmp('abc', 'def') ... box.space._collation:replace(t) --- -- [1, 'unicode', 1, 'ICU', '', {}] +- [1, 'unicode', 1, 'ICU', '', {'strength': 'identical'}] ... -- -- gh-2839: allow to store custom fields in field definition. diff --git a/test/box/tree_pk.result b/test/box/tree_pk.result index df3c78b..7366ed8 100644 --- a/test/box/tree_pk.result +++ b/test/box/tree_pk.result @@ -690,7 +690,7 @@ s:drop() ... --https://github.com/tarantool/tarantool/issues/2649 -- create standart index and alter it to collation index -box.internal.collation.create('test', 'ICU', 'ru-RU') +box.internal.collation.create('test', 'ICU', 'ru-RU', {strength = 'identical'}) --- ... box.internal.collation.create('test-ci', 'ICU', 'ru-RU', {strength = 'secondary'}) diff --git a/test/box/tree_pk.test.lua b/test/box/tree_pk.test.lua index 1190ab4..b4ee65c 100644 --- a/test/box/tree_pk.test.lua +++ b/test/box/tree_pk.test.lua @@ -238,7 +238,7 @@ s:drop() --https://github.com/tarantool/tarantool/issues/2649 -- create standart index and alter it to collation index -box.internal.collation.create('test', 'ICU', 'ru-RU') +box.internal.collation.create('test', 'ICU', 'ru-RU', {strength = 'identical'}) box.internal.collation.create('test-ci', 'ICU', 'ru-RU', {strength = 'secondary'}) s = box.schema.space.create('test') i1 = s:create_index('i1', { type = 'tree', parts = {{1, 'str'}}, unique = true }) diff --git a/test/sql/collation.result b/test/sql/collation.result index 79ba9ab..3c53be1 100644 --- a/test/sql/collation.result +++ b/test/sql/collation.result @@ -110,3 +110,90 @@ cn:close() box.schema.user.revoke('guest', 'read,write,execute', 'universe') --- ... +-- +-- gh-3573: Strength in the _collation space +-- Collation without 'strength' option set now works as one with +-- 'strength' set to 'primary'. +-- +box.internal.collation.create('c0', 'ICU', 'unicode') +--- +... +box.internal.collation.create('c1', 'ICU', 'unicode', {strength='primary'}) +--- +... +box.internal.collation.create('c2', 'ICU', 'unicode', {strength='secondary'}) +--- +... +box.internal.collation.create('c5', 'ICU', 'unicode', {strength='identical'}) +--- +... +box.sql.execute([[create table tc (id int primary key autoincrement, s0 string collate "c0", s1 string collate "c1", s2 string collate "c2", s5 string collate "c5")]]) +--- +... +box.sql.execute([[insert into tc values (null, 'a', 'a', 'a', 'a')]]) +--- +... +box.sql.execute([[insert into tc values (null, 'A', 'A', 'A', 'A')]]) +--- +... +box.sql.execute([[insert into tc values (null, 'á', 'á', 'á', 'á')]]) +--- +... +box.sql.execute([[insert into tc values (null, 'â', 'â', 'â', 'â')]]) +--- +... +box.sql.execute([[select * from tc where s0 = 'a']]) +--- +- - [1, 'a', 'a', 'a', 'a'] + - [2, 'A', 'A', 'A', 'A'] + - [3, 'á', 'á', 'á', 'á'] + - [4, 'â', 'â', 'â', 'â'] +... +box.sql.execute([[select * from tc where s1 = 'a']]) +--- +- - [1, 'a', 'a', 'a', 'a'] + - [2, 'A', 'A', 'A', 'A'] + - [3, 'á', 'á', 'á', 'á'] + - [4, 'â', 'â', 'â', 'â'] +... +box.sql.execute([[select * from tc where s2 = 'a']]) +--- +- - [1, 'a', 'a', 'a', 'a'] + - [2, 'A', 'A', 'A', 'A'] +... +box.sql.execute([[select * from tc where s5 = 'a']]) +--- +- - [1, 'a', 'a', 'a', 'a'] +... +a = box.sql.execute([[select id from tc where s0 = 'a']]) +--- +... +b = box.sql.execute([[select id from tc where s1 = 'a']]) +--- +... +count = 0 +--- +... +for k,v in pairs(a) do if (a[k][1] ~= b[k][1]) then count = count + 1 end end +--- +... +-- Should be true as default value is 'primary'. +count == 0 +--- +- true +... +box.sql.execute([[drop table tc]]) +--- +... +box.internal.collation.drop('c0') +--- +... +box.internal.collation.drop('c1') +--- +... +box.internal.collation.drop('c2') +--- +... +box.internal.collation.drop('c5') +--- +... diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua index 935dea8..1923503 100644 --- a/test/sql/collation.test.lua +++ b/test/sql/collation.test.lua @@ -43,3 +43,35 @@ cn:execute('select 1 limit ? collate not_exist', {1}) cn:close() box.schema.user.revoke('guest', 'read,write,execute', 'universe') + +-- +-- gh-3573: Strength in the _collation space +-- Collation without 'strength' option set now works as one with +-- 'strength' set to 'primary'. +-- +box.internal.collation.create('c0', 'ICU', 'unicode') +box.internal.collation.create('c1', 'ICU', 'unicode', {strength='primary'}) +box.internal.collation.create('c2', 'ICU', 'unicode', {strength='secondary'}) +box.internal.collation.create('c5', 'ICU', 'unicode', {strength='identical'}) +box.sql.execute([[create table tc (id int primary key autoincrement, s0 string collate "c0", s1 string collate "c1", s2 string collate "c2", s5 string collate "c5")]]) +box.sql.execute([[insert into tc values (null, 'a', 'a', 'a', 'a')]]) +box.sql.execute([[insert into tc values (null, 'A', 'A', 'A', 'A')]]) +box.sql.execute([[insert into tc values (null, 'á', 'á', 'á', 'á')]]) +box.sql.execute([[insert into tc values (null, 'â', 'â', 'â', 'â')]]) +box.sql.execute([[select * from tc where s0 = 'a']]) +box.sql.execute([[select * from tc where s1 = 'a']]) +box.sql.execute([[select * from tc where s2 = 'a']]) +box.sql.execute([[select * from tc where s5 = 'a']]) +a = box.sql.execute([[select id from tc where s0 = 'a']]) +b = box.sql.execute([[select id from tc where s1 = 'a']]) +count = 0 +for k,v in pairs(a) do if (a[k][1] ~= b[k][1]) then count = count + 1 end end + +-- Should be true as default value is 'primary'. +count == 0 + +box.sql.execute([[drop table tc]]) +box.internal.collation.drop('c0') +box.internal.collation.drop('c1') +box.internal.collation.drop('c2') +box.internal.collation.drop('c5') diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp index 94374a7..acbc7f4 100644 --- a/test/unit/coll.cpp +++ b/test/unit/coll.cpp @@ -51,6 +51,7 @@ manual_test() memset(&def, 0, sizeof(def)); snprintf(def.locale, sizeof(def.locale), "%s", "ru_RU"); def.type = COLL_TYPE_ICU; + def.icu.strength = COLL_ICU_STRENGTH_IDENTICAL; struct coll *coll; cout << " -- default ru_RU -- " << endl; @@ -134,6 +135,7 @@ hash_test() struct coll *coll; /* Case sensitive */ + def.icu.strength = COLL_ICU_STRENGTH_IDENTICAL; coll = coll_new(&def); assert(coll != NULL); cout << "Case sensitive" << endl; -- 2.7.4