[tarantool-patches] [PATCH v2 2/3] Introduce "none" and "binary" collations

Nikita Pettik korablev at tarantool.org
Tue Nov 13 03:07:25 MSK 2018


This patch introduces two new collation sequences: "none" and "binary".
Despite the fact that they use the same comparing algorithm (simple
byte-by-byte comparison), they don't mean the same. "binary" collation
get to the format if user explicitly points it: either specifies this
collation in space format manually or adds <COLLATE BINARY> clause to
column definition within CREATE TABLE statement. "none" collation is
used when user doesn't specify any collation at all. "none" collation always
comes with id == 0 and it can't be changed (since its id vastly used
under the hood as an indicator of absence of collation).
Difference between these collations is vital for ANSI SQL: mixing
"binary" with other collations is prohibited, meanwhile "none" collation
can be used alongside with others.
In this respect current patch extends list of available collations: now
not only ICU collations are allowed, but also BINARY.

Note, that in SQL some queries have changed their query plan. That
occurred due to the fact that our parser allows using <COLLATE> clause
with numeric fields:

CREATE TABLE (id INT PRIMARY KEY);
SELECT id COLLATE "binary" ...

In the example collation of LHS (id column) is NULL, but collation of
RHS is "binary". Before this patch both collations were NULL. Hence,
usage of certain indexes may not be allowed by query planner.
On the other hand, this feature is obviously broken, so that doesn't
seem to be big deal.

Needed for #3185
---
 src/box/alter.cc                |  13 ++++++++--
 src/box/bootstrap.snap          | Bin 1888 -> 1911 bytes
 src/box/coll_id.c               |   1 -
 src/box/errcode.h               |   1 +
 src/box/field_def.c             |   2 +-
 src/box/key_def.c               |  18 +++++++-------
 src/box/key_def.h               |   6 -----
 src/box/lua/space.cc            |   2 +-
 src/box/lua/upgrade.lua         |   2 ++
 src/box/sql.c                   |  10 ++++----
 src/box/sql/build.c             |   2 +-
 src/box/sql/callback.c          |  19 +++++++++++----
 src/box/sql/expr.c              |   2 +-
 src/box/sql/func.c              |   2 +-
 src/box/sql/select.c            |   8 +++----
 src/box/sql/where.c             |   2 +-
 src/box/tuple_format.c          |   4 ++--
 src/coll.c                      |  52 +++++++++++++++++++++++++++++++++-------
 src/coll_def.c                  |   2 +-
 src/coll_def.h                  |   1 +
 test/box/ddl.result             |  20 +++++++++-------
 test/box/misc.result            |   1 +
 test/box/net.box.result         |   2 +-
 test/sql-tap/collation.test.lua |  12 ++++++----
 test/sql-tap/distinct.test.lua  |   2 +-
 test/sql-tap/in3.test.lua       |   2 +-
 test/sql-tap/index3.test.lua    |   2 +-
 test/sql-tap/where2.test.lua    |   2 +-
 test/sql/collation.result       |  27 +++++++++++++++++++++
 test/sql/collation.test.lua     |  14 +++++++++++
 30 files changed, 166 insertions(+), 67 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 6d2c59bbc..2eb9f53e8 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -403,7 +403,7 @@ field_def_decode(struct field_def *field, const char **data,
 				     "nullable action properties", fieldno +
 				     TUPLE_INDEX_BASE));
 	}
-	if (field->coll_id != COLL_NONE &&
+	if (field->coll_id != 0 &&
 	    field->type != FIELD_TYPE_STRING &&
 	    field->type != FIELD_TYPE_SCALAR &&
 	    field->type != FIELD_TYPE_ANY) {
@@ -2560,7 +2560,6 @@ coll_id_def_new_from_tuple(const struct tuple *tuple, struct coll_id_def *def)
 		tuple_field_with_type_xc(tuple, BOX_COLLATION_FIELD_OPTIONS,
 					 MP_MAP);
 
-	assert(base->type == COLL_TYPE_ICU);
 	if (opts_decode(&base->icu, coll_icu_opts_reg, &options,
 			ER_WRONG_COLLATION_OPTIONS,
 			BOX_COLLATION_FIELD_OPTIONS, NULL) != 0)
@@ -2671,6 +2670,16 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event)
 		 */
 		int32_t old_id = tuple_field_u32_xc(old_tuple,
 						    BOX_COLLATION_FIELD_ID);
+		/*
+		 * Don't allow user to drop "none" collation
+		 * since it is very special and vastly used
+		 * under the hood. Hence, we can rely on the
+		 * fact that "none" collation features id == 0.
+		 */
+		if (old_id == 0) {
+			tnt_raise(ClientError, ER_DROP_COLLATION, "none",
+				  "system collation");
+		}
 		struct coll_id *old_coll_id = coll_by_id(old_id);
 		assert(old_coll_id != NULL);
 		access_check_ddl(old_coll_id->name, old_coll_id->id,
diff --git a/src/box/bootstrap.snap b/src/box/bootstrap.snap
index 65739384a66d6ba4a538553ccf4677536ba15280..5e136d147036760cb67df68e81aab5744be783e4 100644
GIT binary patch
delta 1909
zcmV-*2a5RM4)+d_7k at P^GdL}0GiGFFH!?UkWC}@cb97;DV`VxZIb=CFWHw=8Eip7@
zHZ3$|Wn?X3VP<75WjSRrW;tVGG-6_A3RXjGZ)0mZAbWiZ3e~y`y3Gdt0M5D=y5Xe&
z00000D77#B08phj0J=<Y5J}KjO#vWYm=|2a2zw054KzTf;(v`no5cMPX0Ik5Fk}sC
zZHY`tO-kA|Nr?QCPAdtQm=p%^DbL*=mX%*pI<rd`svcw`Pi0Ce?*Qij`T#~|EuDAg
zwISJ&8Ps3 at D(~xEm|GitA#nG;<%8nbOU=QV_qvt29&azkI?c=W at fG>)UsuzHil~>Z
zIEK4*6Pgc(E`KBVf-I!(558x;-}}G!EEv-nW}{F_my&cb17iE?%&Kt$xH`$$zOTGR
zo2$3<@6K;R8QY{9z-I-F8GSFrS&udffUA={vcw0D9U3rH)adI_)8<hJ!*O+zlU5yO
zz8_{o!qrK3p-W!2LHAXoT%9CJby|h+KGnq+gy~DASbwG!%1fD8DG=9`laqtOp9*8Z
zvcK~Y#5P&?Lg%#dGL&UV%N8FlS9YoZsB<Vt7rdC)kZi|agcU0Nhh5*@vcd8eZAn!R
zjP~;p1;f=zy83vH_Vdw}^yB~u*5e1WHG0#4tCJl2b0k=gpDn2kgCm%=(e7nmvCOlh
zp!amPHGf*OKnctyQ5psPde#Z0kbtX`)c3(KWPv#LE_`v>_wBq4x;npR`&-|2wu#?4
zO^N>T=eb3qGYEh_!+4C_VAe)u4)pul;a6*2 at GH9>Lu at _p^QWY9OMfkit<e|*T%DwN
zUo+mtkalqh*2GxfuYtmn$&n6ozaSW5n?zv>aDR1 at R32nq5FEATS?Ndc!pfMwGcxu8
zRi!gkDL7e?3I*x}$^@#!iNtAjQr(QW5kYhl!X{;t!_`R=*?{O!vhGy#K~{r9W1{gu
z)4|2Bivow4n;Dy#4Kf;JGRR=4HRb}95lx{n7GgR|VQRQKNtg;#MnvVupRC84(EskV
z`F}!}j=CBeu1-=33~|!+#8HPxF5K^{<XK7oz9NzbW*1h{b-ufE9b#+bvrec-fvb~z
zU0(Av4b-!r*R!LPkeSYDL0+DLtUg2a+qngQxJ{x#CV1cPe*G=Wa}%YaCsc<7u1>O)
zeZN?byIUmMBg5567PVS=cYC8eF<hPGD1WmzsuKa0x4449)k(G<%&@%0AB&Y@;nj!w
z)tmmi*W~@DOd>voGTke}Q?0xe1}s$8>39_|W$AFY_7GWF)Q}q?T at V@|ROSa)Cs|?W
zQdxaa9p;Yp_}wZs>Qp_tbm?M+^5E(uW9IYyu&l at 14z5lTK%La6Q~B%{M_$qjI)88m
z#otPl%kf~iX*h$dQLNEw)SYnX{PJ!|=?$(<@<i)%i7^iD#BhN--1zZ8F&h0O#sGSl
z3nm41UECyI7e*4=&qrG4KekFbjk=Q!u1<2tN5+hXpUyb@`G|uP6tbXDYSh`(pdJmb
zPSTqZKvpC~hAc>o8|~2nqlbs`yno<vL3LW_uvo2FsaWmEY5{?9sPSB6s02n30003<
z0MP{pG3bg05 at 4`MjH4)yffxuw5R4)lM?e4yLJBm9S}-&d%%{4wpNvvCz%hP|9;3(T
zF?zHEA24jqw9<6O0<kp)YS~^M3_}1zWP+j450X47k(ks|7U0<bm|XF(>VJwCyFxW&
zvSx>h*Xrupa#T-amV|4!+4G#G at C5t>lh2DV*jpCAP2 at 0w?0v*%tf^HhsNvQ{7l)g>
z5CZc%DXe^PZ}cV*RfytKSHf2D at T#r3MhHgUaO#{b0JA~=3XQU^`W(%7AyoI+1(QS-
zqc0cr3D2Ph*<hTN1NY20D}Nub*|X;doDFCMT{2G^FEqytSV)}uyY`&Glh#2|ZZU<a
zzUb1zX4XR_Zpg*&3z9osX4aT3gYK*`P1`?bGjG4U5)1 at z%(1$?)d+h(Wr^9R`$E!f
ziSYdK3?HKdKvkBAFH^DzhpkH3f`7TKX8p--!lH5-?E5pZWK6YXqJPOLZ|Q}nl-W`j
zILKukUul5)<NRY6Bwu<4t3$j7+MW_(S6|7ho030)#c`U7rV*MP3NVaBZBM^cYTN9O
ziBi!&?!b?8tb at U(@15Tay+#-~n&rgTr+ia&_}^81l(p#5`)@np*g}UzaEkyp_}YhV
z!BtU?NRF(bYDJt+V}JRtEd<3cBuX5T-PT>QLz$n5K}W9^T8SL-I*Q|CJJQu!#!+|M
zH=_fq+5ko|oy_gO$HeItV&D`Qr?+iTcBTvGOe_s3$|U5 at f+vRDt(EhWRKkg}6aa8j
ze83bezNpuT3z|n5)<;Wzkj^-TyWZmAiaL=OI__QSp=Dq<d2Y3b*#G#fQWSAX^v0ef
zxhhH|_1n4|(%~2VL`z9-(zl|(2RqRZtyz>JLomv9uig;>T;qt+2cQ^PqXJ1aA^JZK
vL55PXLzk)lO8W>&phrR$u~5(Z*LkIYMVy at l-;OZk>+zl>-;fT~5UuSBltZC&

delta 1886
zcmV-k2ch`)4&V-u7k at M@I5#mZXEQS}VKFskVq;}xWC}@cb97;DV`VxZGc`3ZV>UB6
zEi*7;Vl6Z_W-~1~FgZ0XI5lH1H#s$9F*ac{3RXjGZ)0mZAbWiZ3e~y`y3GdT0L~dp
z)=i}V00000D77#B08rI706I%F5K6#UQ2{85q9}?YD$b&eu79{-3N}O7l2mpR;KZ{2
zn+{@4)QCi;q$nk^F5UY}lDbGJHYRv&HT-RauFbizhI29aQ_<+yD7;K51?d3d0QUge
z@{IH&cw?nZUm7X<AvBlHyK|e+jK~D)u!ofw_Akusjj{;1d*AXwaqOk$AnkkY%3P1Q
z7iGQnW&8Mw9Dnz)t7}3;^vhNp!|lQa%?HER5PU&a?)!uP+3)xM)_)e1=^V38D5X=3
zcrF3DI!W2Su)M|FtGo2?&T&B*+oKAgtCJ*x<i-gWH2PnNvmS5M$ZP4kI>||^4l~~$
zyA|o`B$McrFI%7sv(c_j^4i?AS~#iHcgndb*|2O at rGL-H=B9Kq-J4QQ3x#n|IW`!q
z?C*R8 at ona@&^f8F2r)^*5u~qF*^vUEPN7|$r0c;P%UjG{@M3Pu*^XZbD at 6JayT03X
zi{&lalBygO?dRjEL02dF>f<fi&qrHQ6N4gHkFA)sQ5u%6PIBzei(oyrY)NMp7{%<3
zdJ6Ux%YRHW0{Ty9Yojs=j>PN{jX}`wXO~b42f8{*eIE=%7Kmm4!WWl)-_A>*t22zY
z-}PPRoA{mAj_4nKp4%LSG4S&_#%25#vp4!upkLSyTQ=7PTV>Z{i0ub{ZXxOH(r+_j
zZPbN8S0^c67>#!^tX&*}Hc{61>z}YpVx+^|FMkMz*dEcA0bQMBlm}TC1jj3#Rf(Cc
zsz^-Lh;Bxt5^)M~0&!ZIRHhsg4o!;YW{3tgV>5H=>LiC|Ox}l?S0huJ$#{tA5W^vc
z=Ed)e!VJs?7!5EPUNF2|c(G8ci$`ZeP^gQHO9e_>V!ArX77BAV6y-;stVdhW|L(QP
zLVu?ic``6vog at +%;-mYCqYiT~+%K%;8A<=XV(tU;3m at q^-`%+mu^rN~OXxSDtCNIX
zZu at i%)HI*hG^3jz5<0H|eVGQbI_=bN=N9zg_K5m;`M%%%`dyZ1B}ha}s128{PO_YR
zzgUmE+Z^qQ(A7y2xm<a7i=#Xcx;n{mW`A*1=RquQan%y2i*v%MRgMX#RHf9)q$;6W
zI5#C0%Y-VeP&n9>N}(`Vp)*g1RpHVgI-RXOKvq9aK4^M)JYsgx7ad)lWNo2S%9?}f
zFn6rS7Ov7FPlY2XDTxZb(bY-1%;)=KS&ujyU7aL=I%$!os<B at jc}XkKz!?;OBY)9u
z#)ILm;S91y(MD^LXOf{a?7JnUF}ga*5w6xtlyPvUg$mqZ29N)d!RRKj#m~oHu_ved
z;wACEa4u*+A8Vce=ql+f@=P$gI?0_IE@(7t>0Gm)k2N(wA*%_cMc&K{`nl-pB)=8`
zW<)=FW<FTpD36U5Jt`<q3LX?x=YNEbiPcVlYpC&HWT*s05C8xHNC43V2Qlc11`=Sf
zK#Zd>j)4#eLlBBW8%IC at 3_=Pth*~go48;SwHJFH2Fd%F?2kk;VXcww=-~-00nO2(4
z7#_C9Kn&JPXB`m05Sd^o^n)x9k}%ou6eKwIauTj!EYZT3wXZg0T(d*RYkzg?LUB|#
zW0r(#8`yKErQ-?y^2rxLfPl3%zvXk}o-2LCX{@RGC#bmUq8G=dT?n=Lxht&Ltv5XM
zKq^G>sVm1;VNq3wzfvSZ&zbt0ErQxX{}LL1Ts3TVav_oK;iE_r^^ZOib+@*&8Jb6T
z{24#RZ0XOiX`b+=wiC=jmw(uke4NxWLj+RR*0wq)@TAqaja#AVtR8b(h~aq%?uI^M
z`prRRPT(uuXYNcJ+VJy!Hk<p!R)T;J at vKg_H;wT38<xm^m(NJmmI#uUo&7P|2}oqg
z_tGMZ at S$}GTlD{LtGR))1F&c at 4JrSb3^FEkG11VJx0FKTGHdEW3V*pE$5+~-{vxmL
zf*4EBeRas!LD at 4G?DCbYS}*y)SR8><QJU1qp&G+T()NPsQhQ*3fG8CW<4FAYY>n6@
z`rP@=q}MzHN3)#x+LLco2l`9ZcTS5Eseh}}gCB-3f?K0?yRSvk7El%CEOMj at g~-HV
zYP|oo3c>daiHd?`w|_NfFDEk*ne8ai;u4WnDMz7QaYtIM<u>XL{bqEKk~Rq!Oegci
z?|E^eRScX$<Mg@>shsKmIpci>6g3mx`LKW?cWdQ*5}9z8djSBq#m8;I;y?8oaX|qI
z!}@5+->Dg+0MUDNxS*Xd7KijMg=l18w|Yfa#Q*iIdK8gK^jFHBq=_p2AoZQJo5JB~
z{)9_u-DI}n!v|vG5UoYj4MPyhwO7m|j<QA`KTd1ZBMB3t<;Mk&Gr6RupO)4s0af}3
YejQs<I4l at DLYl8<_9QWfbkz{8?dNiiMF0Q*

diff --git a/src/box/coll_id.c b/src/box/coll_id.c
index 2d5f8a09a..b56c74961 100644
--- a/src/box/coll_id.c
+++ b/src/box/coll_id.c
@@ -37,7 +37,6 @@
 struct coll_id *
 coll_id_new(const struct coll_id_def *def)
 {
-	assert(def->base.type == COLL_TYPE_ICU);
 	size_t total_len = sizeof(struct coll_id) + def->name_len + 1;
 	struct coll_id *coll_id = (struct coll_id *) malloc(total_len);
 	if (coll_id == NULL) {
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 4eb7fced5..18ffdf3d5 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -224,6 +224,7 @@ struct errcode_record {
 	/*169 */_(ER_NO_SUCH_CONSTRAINT,	"Constraint %s does not exist") \
 	/*170 */_(ER_CONSTRAINT_EXISTS,		"Constraint %s already exists") \
 	/*171 */_(ER_SQL_TYPE_MISMATCH,		"Type mismatch: can not convert %s to %s") \
+	/*172 */_(ER_DROP_COLLATION,		"Can't drop collation %s : %s") \
 
 /*
  * !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/field_def.c b/src/box/field_def.c
index 3a9ff3703..3e63e12a3 100644
--- a/src/box/field_def.c
+++ b/src/box/field_def.c
@@ -123,7 +123,7 @@ const struct field_def field_def_default = {
 	.name = NULL,
 	.is_nullable = false,
 	.nullable_action = ON_CONFLICT_ACTION_DEFAULT,
-	.coll_id = COLL_NONE,
+	.coll_id = 0,
 	.default_value = NULL,
 	.default_value_expr = NULL
 };
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 3a560bb06..6802489f1 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -41,7 +41,7 @@ const char *sort_order_strs[] = { "asc", "desc", "undef" };
 const struct key_part_def key_part_def_default = {
 	0,
 	field_type_MAX,
-	COLL_NONE,
+	0,
 	false,
 	ON_CONFLICT_ACTION_DEFAULT,
 	SORT_ORDER_ASC
@@ -174,7 +174,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
 	for (uint32_t i = 0; i < part_count; i++) {
 		const struct key_part_def *part = &parts[i];
 		struct coll *coll = NULL;
-		if (part->coll_id != COLL_NONE) {
+		if (part->coll_id != 0) {
 			struct coll_id *coll_id = coll_by_id(part->coll_id);
 			if (coll_id == NULL) {
 				diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
@@ -223,7 +223,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
 		key_def_set_part(key_def, item, fields[item],
 				 (enum field_type)types[item],
 				 ON_CONFLICT_ACTION_DEFAULT,
-				 NULL, COLL_NONE, SORT_ORDER_ASC);
+				 NULL, 0, SORT_ORDER_ASC);
 	}
 	key_def_set_cmp(key_def);
 	return key_def;
@@ -319,7 +319,7 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
 	for (uint32_t i = 0; i < part_count; i++) {
 		const struct key_part_def *part = &parts[i];
 		int count = 2;
-		if (part->coll_id != COLL_NONE)
+		if (part->coll_id != 0)
 			count++;
 		if (part->is_nullable)
 			count++;
@@ -329,7 +329,7 @@ key_def_sizeof_parts(const struct key_part_def *parts, uint32_t part_count)
 		assert(part->type < field_type_MAX);
 		size += mp_sizeof_str(strlen(PART_OPT_TYPE));
 		size += mp_sizeof_str(strlen(field_type_strs[part->type]));
-		if (part->coll_id != COLL_NONE) {
+		if (part->coll_id != 0) {
 			size += mp_sizeof_str(strlen(PART_OPT_COLLATION));
 			size += mp_sizeof_uint(part->coll_id);
 		}
@@ -348,7 +348,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
 	for (uint32_t i = 0; i < part_count; i++) {
 		const struct key_part_def *part = &parts[i];
 		int count = 2;
-		if (part->coll_id != COLL_NONE)
+		if (part->coll_id != 0)
 			count++;
 		if (part->is_nullable)
 			count++;
@@ -361,7 +361,7 @@ key_def_encode_parts(char *data, const struct key_part_def *parts,
 		assert(part->type < field_type_MAX);
 		const char *type_str = field_type_strs[part->type];
 		data = mp_encode_str(data, type_str, strlen(type_str));
-		if (part->coll_id != COLL_NONE) {
+		if (part->coll_id != 0) {
 			data = mp_encode_str(data, PART_OPT_COLLATION,
 					     strlen(PART_OPT_COLLATION));
 			data = mp_encode_uint(data, part->coll_id);
@@ -431,7 +431,7 @@ key_def_decode_parts_166(struct key_part_def *parts, uint32_t part_count,
 		part->is_nullable = (part->fieldno < field_count ?
 				     fields[part->fieldno].is_nullable :
 				     key_part_def_default.is_nullable);
-		part->coll_id = COLL_NONE;
+		part->coll_id = 0;
 	}
 	return 0;
 }
@@ -488,7 +488,7 @@ key_def_decode_parts(struct key_part_def *parts, uint32_t part_count,
 				 "index part: unknown field type");
 			return -1;
 		}
-		if (part->coll_id != COLL_NONE &&
+		if (part->coll_id != 0 &&
 		    part->type != FIELD_TYPE_STRING &&
 		    part->type != FIELD_TYPE_SCALAR) {
 			diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 20e79f9fe..684e9cf75 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -72,12 +72,6 @@ struct key_part_def {
 
 extern const struct key_part_def key_part_def_default;
 
-/**
- * Set key_part_def.coll_id to COLL_NONE if
- * the field does not have a collation.
- */
-#define COLL_NONE UINT32_MAX
-
 /** Descriptor of a single part in a multipart key. */
 struct key_part {
 	/** Tuple field index for this part */
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 7cae436f1..e5e09b042 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -299,7 +299,7 @@ lbox_fillspace(struct lua_State *L, struct space *space, int i)
 			lua_pushboolean(L, key_part_is_nullable(part));
 			lua_setfield(L, -2, "is_nullable");
 
-			if (part->coll_id != COLL_NONE) {
+			if (part->coll_id != 0) {
 				struct coll_id *coll_id =
 					coll_by_id(part->coll_id);
 				assert(coll_id != NULL);
diff --git a/src/box/lua/upgrade.lua b/src/box/lua/upgrade.lua
index 64f74f9d3..a9525058b 100644
--- a/src/box/lua/upgrade.lua
+++ b/src/box/lua/upgrade.lua
@@ -396,8 +396,10 @@ local function create_collation_space()
     box.space._index:insert{_collation.id, 1, 'name', 'tree', {unique = true}, {{1, 'string'}}}
 
     log.info("create predefined collations")
+    box.space._collation:replace{0, "none", ADMIN, "BINARY", "", setmap{}}
     box.space._collation:replace{1, "unicode", ADMIN, "ICU", "", setmap{}}
     box.space._collation:replace{2, "unicode_ci", ADMIN, "ICU", "", {strength='primary'}}
+    box.space._collation:replace{3, "binary", ADMIN, "BINARY", "", setmap{}}
 
     local _priv = box.space[box.schema.PRIV_ID]
     _priv:insert{ADMIN, PUBLIC, 'space', _collation.id, box.priv.W}
diff --git a/src/box/sql.c b/src/box/sql.c
index caa66144f..686d32335 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -379,7 +379,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 		if (def != NULL && i < def->part_count)
 			part->coll_id = def->parts[i].coll_id;
 		else
-			part->coll_id = COLL_NONE;
+			part->coll_id = 0;
 	}
 	struct key_def *ephemer_key_def = key_def_new(ephemer_key_parts,
 						      field_count);
@@ -1139,7 +1139,7 @@ sql_encode_table(struct region *region, struct Table *table, uint32_t *size)
 		struct field_def *field = &def->fields[i];
 		const char *default_str = field->default_value;
 		int base_len = 5;
-		if (cid != COLL_NONE)
+		if (cid != 0)
 			base_len += 1;
 		if (default_str != NULL)
 			base_len += 1;
@@ -1160,7 +1160,7 @@ sql_encode_table(struct region *region, struct Table *table, uint32_t *size)
 		const char *action =
 			on_conflict_action_strs[def->fields[i].nullable_action];
 		mpstream_encode_str(&stream, action);
-		if (cid != COLL_NONE) {
+		if (cid != 0) {
 			mpstream_encode_str(&stream, "collation");
 			mpstream_encode_uint(&stream, cid);
 		}
@@ -1285,13 +1285,13 @@ sql_encode_index_parts(struct region *region, const struct field_def *fields,
 		       action_is_nullable(fields[col].nullable_action));
 		/* Do not decode default collation. */
 		uint32_t cid = part->coll_id;
-		mpstream_encode_map(&stream, 5 + (cid != COLL_NONE));
+		mpstream_encode_map(&stream, 5 + (cid != 0));
 		mpstream_encode_str(&stream, "type");
 		mpstream_encode_str(&stream, field_type_strs[fields[col].type]);
 		mpstream_encode_str(&stream, "field");
 		mpstream_encode_uint(&stream, col);
 
-		if (cid != COLL_NONE) {
+		if (cid != 0) {
 			mpstream_encode_str(&stream, "collation");
 			mpstream_encode_uint(&stream, cid);
 		}
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index 5b3348bd2..929d20dbf 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -2399,7 +2399,7 @@ index_fill_def(struct Parse *parse, struct index *index,
 		uint32_t coll_id;
 		if (expr->op == TK_COLLATE) {
 			sql_get_coll_seq(parse, expr->u.zToken, &coll_id);
-			if (coll_id == COLL_NONE &&
+			if (coll_id == 0 &&
 			    strcasecmp(expr->u.zToken, "binary") != 0) {
 				diag_set(ClientError, ER_NO_SUCH_COLLATION,
 					 expr->u.zToken);
diff --git a/src/box/sql/callback.c b/src/box/sql/callback.c
index 3cf3a835d..352745e0e 100644
--- a/src/box/sql/callback.c
+++ b/src/box/sql/callback.c
@@ -42,13 +42,22 @@
 struct coll *
 sql_get_coll_seq(Parse *parser, const char *name, uint32_t *coll_id)
 {
-	if (name == NULL || strcasecmp(name, "binary") == 0) {
-		*coll_id = COLL_NONE;
-		return NULL;
+	if (name == NULL) {
+		*coll_id = 0;
+		return coll_by_id(0)->coll;
 	}
-	struct coll_id *p = coll_by_name(name, strlen(name));
+	struct coll_id *p;
+	/*
+	 * In SQL all identifiers should be uppercased, so
+	 * to avoid mess lets simple search binary (since it is
+	 * sort of "special" collation) ignoring case at all.
+	 */
+	if (strcasecmp(name, "binary") == 0)
+		p = coll_by_name("binary", strlen("binary"));
+	else
+		p = coll_by_name(name, strlen(name));
 	if (p == NULL) {
-		*coll_id = COLL_NONE;
+		*coll_id = 0;
 		sqlite3ErrorMsg(parser, "no such collation sequence: %s",
 				name);
 		return NULL;
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 4d1c1a634..e52cd6407 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -194,7 +194,7 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_found, uint32_t *coll_id)
 {
 	struct coll *coll = NULL;
 	*is_found = false;
-	*coll_id = COLL_NONE;
+	*coll_id = 0;
 	while (p != NULL) {
 		int op = p->op;
 		if (p->flags & EP_Generic)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 8c34cbb3d..580cf1e60 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -506,7 +506,7 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv)   \
 	UErrorCode status = U_ZERO_ERROR;                                      \
 	struct coll *coll = sqlite3GetFuncCollSeq(context);                    \
 	const char *locale = NULL;                                             \
-	if (coll != NULL) {                                                    \
+	if (coll != NULL && coll->collator != NULL) {                          \
 		locale = ucol_getLocaleByType(coll->collator,                  \
 					      ULOC_VALID_LOCALE, &status);     \
 	}                                                                      \
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index dfa6ed8e0..cea453f08 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -1345,7 +1345,7 @@ sql_key_info_new(sqlite3 *db, uint32_t part_count)
 		struct key_part_def *part = &key_info->parts[i];
 		part->fieldno = i;
 		part->type = FIELD_TYPE_SCALAR;
-		part->coll_id = COLL_NONE;
+		part->coll_id = 0;
 		part->is_nullable = false;
 		part->nullable_action = ON_CONFLICT_ACTION_ABORT;
 		part->sort_order = SORT_ORDER_ASC;
@@ -1961,7 +1961,7 @@ sqlite3SelectAddColumnTypeAndCollation(Parse * pParse,		/* Parsing contexts */
 		pTab->def->fields[i].type = sql_affinity_to_field_type(affinity);
 		bool is_found;
 		uint32_t coll_id;
-		if (pTab->def->fields[i].coll_id == COLL_NONE &&
+		if (pTab->def->fields[i].coll_id == 0 &&
 		    sql_expr_coll(pParse, p, &is_found, &coll_id) && is_found)
 			pTab->def->fields[i].coll_id = coll_id;
 	}
@@ -2160,7 +2160,7 @@ multi_select_coll_seq_r(Parse *parser, Select *p, int n, bool *is_found,
 					       coll_id);
 	} else {
 		coll = NULL;
-		*coll_id = COLL_NONE;
+		*coll_id = 0;
 	}
 	assert(n >= 0);
 	/* iCol must be less than p->pEList->nExpr.  Otherwise an error would
@@ -2233,7 +2233,7 @@ sql_multiselect_orderby_to_key_info(struct Parse *parse, struct Select *s,
 		} else {
 			multi_select_coll_seq(parse, s,
 					      item->u.x.iOrderByCol - 1, &id);
-			if (id != COLL_NONE) {
+			if (id != 0) {
 				const char *name = coll_by_id(id)->name;
 				order_by->a[i].pExpr =
 					sqlite3ExprAddCollateString(parse, term,
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index 8c78c0c9b..1db4db874 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -2806,7 +2806,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder,	/* WHERE clause information */
 		part.nullable_action = ON_CONFLICT_ACTION_ABORT;
 		part.is_nullable = false;
 		part.sort_order = SORT_ORDER_ASC;
-		part.coll_id = COLL_NONE;
+		part.coll_id = 0;
 
 		struct key_def *key_def = key_def_new(&part, 1);
 		if (key_def == NULL) {
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 5bdf102e7..8e5aea51b 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -40,7 +40,7 @@ static uint32_t formats_size = 0, formats_capacity = 0;
 
 static const struct tuple_field tuple_field_default = {
 	FIELD_TYPE_ANY, TUPLE_OFFSET_SLOT_NIL, false,
-	ON_CONFLICT_ACTION_NONE, NULL, COLL_NONE,
+	ON_CONFLICT_ACTION_DEFAULT, NULL, 0,
 };
 
 /**
@@ -67,7 +67,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
 		format->fields[i].nullable_action = fields[i].nullable_action;
 		struct coll *coll = NULL;
 		uint32_t cid = fields[i].coll_id;
-		if (cid != COLL_NONE) {
+		if (cid != 0) {
 			struct coll_id *coll_id = coll_by_id(cid);
 			if (coll_id == NULL) {
 				diag_set(ClientError,ER_WRONG_COLLATION_OPTIONS,
diff --git a/src/coll.c b/src/coll.c
index 6a76f1f0b..6d9c44dbf 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -92,6 +92,17 @@ coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen,
 	return (int)result;
 }
 
+static int
+coll_bin_cmp(const char *s, size_t slen, const char *t, size_t tlen,
+	     const struct coll *coll)
+{
+	(void) coll;
+	int res = memcmp(s, t, slen < tlen ? slen : tlen);
+	if (res == 0)
+		res = slen - tlen;
+	return res;
+}
+
 /** Get a hash of a string using ICU collation. */
 static uint32_t
 coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
@@ -113,6 +124,15 @@ coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 	return total_size;
 }
 
+static uint32_t
+coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
+	      struct coll *coll)
+{
+	(void) coll;
+	PMurHash32_Process(ph, pcarry, s, s_len);
+	return s_len;
+}
+
 /**
  * Set up ICU collator and init cmp and hash members of collation.
  * @param coll Collation to set up.
@@ -262,17 +282,22 @@ static int
 coll_def_snfingerprint(char *buffer, int size, const struct coll_def *def)
 {
 	int total = 0;
-	SNPRINT(total, snprintf, buffer, size, "{locale: %s, type = %d, "\
-	        "icu: ", def->locale, (int) def->type);
-	SNPRINT(total, coll_icu_def_snfingerprint, buffer, size, &def->icu);
-	SNPRINT(total, snprintf, buffer, size, "}");
+	if (def->type == COLL_TYPE_ICU) {
+		SNPRINT(total, snprintf, buffer, size, "{locale: %s,"\
+			"type = %d, icu: ", def->locale, (int) def->type);
+		SNPRINT(total, coll_icu_def_snfingerprint, buffer,
+			size, &def->icu);
+		SNPRINT(total, snprintf, buffer, size, "}");
+	} else {
+		assert(def->type == COLL_TYPE_BINARY);
+		SNPRINT(total, snprintf, buffer, size, "{type = binary}");
+	}
 	return total;
 }
 
 struct coll *
 coll_new(const struct coll_def *def)
 {
-	assert(def->type == COLL_TYPE_ICU);
 	int fingerprint_len = coll_def_snfingerprint(NULL, 0, def);
 	assert(fingerprint_len <= TT_STATIC_BUF_LEN);
 	char *fingerprint = tt_static_buf();
@@ -296,9 +321,20 @@ coll_new(const struct coll_def *def)
 	memcpy((char *) coll->fingerprint, fingerprint, fingerprint_len + 1);
 	coll->refs = 1;
 	coll->type = def->type;
-	if (coll_icu_init_cmp(coll, def) != 0) {
-		free(coll);
-		return NULL;
+	switch (coll->type) {
+	case COLL_TYPE_ICU:
+		if (coll_icu_init_cmp(coll, def) != 0) {
+			free(coll);
+			return NULL;
+		}
+		break;
+	case COLL_TYPE_BINARY:
+		coll->collator = NULL;
+		coll->cmp = coll_bin_cmp;
+		coll->hash = coll_bin_hash;
+		break;
+	default:
+		unreachable();
 	}
 
 	struct mh_coll_node_t node = { fingerprint_len, hash, coll };
diff --git a/src/coll_def.c b/src/coll_def.c
index df58caca8..3a4fc2b87 100644
--- a/src/coll_def.c
+++ b/src/coll_def.c
@@ -31,7 +31,7 @@
 #include "coll_def.h"
 
 const char *coll_type_strs[] = {
-	"ICU"
+	"ICU", "BINARY"
 };
 
 const char *coll_icu_on_off_strs[] = {
diff --git a/src/coll_def.h b/src/coll_def.h
index 7c20abf66..d3af89802 100644
--- a/src/coll_def.h
+++ b/src/coll_def.h
@@ -36,6 +36,7 @@
 /** The supported collation types */
 enum coll_type {
 	COLL_TYPE_ICU = 0,
+	COLL_TYPE_BINARY = 1,
 	coll_type_MAX,
 };
 
diff --git a/test/box/ddl.result b/test/box/ddl.result
index c9a8e96ae..d3b0d1e0e 100644
--- a/test/box/ddl.result
+++ b/test/box/ddl.result
@@ -356,7 +356,7 @@ box.space._collation:auto_increment{'test', 0, 'ICU', 42}
 ...
 box.space._collation:auto_increment{'test', 0, 'ICU', 'ru_RU', setmap{}} --ok
 ---
-- [3, 'test', 0, 'ICU', 'ru_RU', {}]
+- [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 box.space._collation:auto_increment{'test', 0, 'ICU', 'ru_RU', setmap{}}
 ---
@@ -364,7 +364,7 @@ box.space._collation:auto_increment{'test', 0, 'ICU', 'ru_RU', setmap{}}
 ...
 box.space._collation.index.name:delete{'test'} -- ok
 ---
-- [3, 'test', 0, 'ICU', 'ru_RU', {}]
+- [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 box.space._collation.index.name:delete{'nothing'} -- allowed
 ---
@@ -480,24 +480,28 @@ _ = box.space._collation.index.name:delete{'test'} -- ok
 ...
 box.space._collation:auto_increment{'test', 0, 'ICU', 'ru_RU', setmap{}}
 ---
-- [3, 'test', 0, 'ICU', 'ru_RU', {}]
+- [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 box.space._collation:select{}
 ---
-- - [1, 'unicode', 1, 'ICU', '', {}]
+- - [0, 'none', 1, 'BINARY', '', {}]
+  - [1, 'unicode', 1, 'ICU', '', {}]
   - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}]
-  - [3, 'test', 0, 'ICU', 'ru_RU', {}]
+  - [3, 'binary', 1, 'BINARY', '', {}]
+  - [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 test_run:cmd('restart server default')
 box.space._collation:select{}
 ---
-- - [1, 'unicode', 1, 'ICU', '', {}]
+- - [0, 'none', 1, 'BINARY', '', {}]
+  - [1, 'unicode', 1, 'ICU', '', {}]
   - [2, 'unicode_ci', 1, 'ICU', '', {'strength': 'primary'}]
-  - [3, 'test', 0, 'ICU', 'ru_RU', {}]
+  - [3, 'binary', 1, 'BINARY', '', {}]
+  - [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 box.space._collation.index.name:delete{'test'}
 ---
-- [3, 'test', 0, 'ICU', 'ru_RU', {}]
+- [4, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 --
 -- gh-3290: expose ICU into Lua. It uses built-in collations, that
diff --git a/test/box/misc.result b/test/box/misc.result
index 3ada82fb7..ea3cd8805 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -498,6 +498,7 @@ t;
   169: box.error.NO_SUCH_CONSTRAINT
   170: box.error.CONSTRAINT_EXISTS
   171: box.error.SQL_TYPE_MISMATCH
+  172: box.error.DROP_COLLATION
 ...
 test_run:cmd("setopt delimiter ''");
 ---
diff --git a/test/box/net.box.result b/test/box/net.box.result
index 57eebfbfc..4f6979d5b 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -2654,7 +2654,7 @@ c.space.test.index.sk.parts
 ---
 - - type: string
     is_nullable: false
-    collation_id: 3
+    collation_id: 4
     fieldno: 1
 ...
 c:close()
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index eb4f43a90..a4684b956 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -17,11 +17,13 @@ test:do_execsql_test(
     prefix.."0.2",
     "pragma collation_list",
     {
-        0,"unicode",
-        1,"unicode_ci",
-        2,"unicode_numeric",
-        3,"unicode_numeric_s2",
-        4,"unicode_tur_s2"
+        0,"none",
+        1,"unicode",
+        2,"unicode_ci",
+        3,"binary",
+        4,"unicode_numeric",
+        5,"unicode_numeric_s2",
+        6,"unicode_tur_s2"
     }
 )
 
diff --git a/test/sql-tap/distinct.test.lua b/test/sql-tap/distinct.test.lua
index 26a4aace2..a83b37829 100755
--- a/test/sql-tap/distinct.test.lua
+++ b/test/sql-tap/distinct.test.lua
@@ -126,7 +126,7 @@ local data = {
     {"14.2", 1, "SELECT DISTINCT a, d COLLATE \"unicode_ci\" FROM t4"},
     {"15 ", 0, "SELECT DISTINCT a, d COLLATE binary FROM t1"},
     {"16.1", 0, "SELECT DISTINCT a, b, c COLLATE binary FROM t1"},
-    {"16.2", 1, "SELECT DISTINCT a, b, c COLLATE binary FROM t4"},
+    {"16.2", 0, "SELECT DISTINCT a, b, c COLLATE binary FROM t4"},
     {"17",  0,   --{ \/* Technically, it would be possible to detect that DISTINCT\n            ** is a no-op in cases like the following. But SQLite does not\n            ** do so. *\/\n
     "SELECT DISTINCT t1.id FROM t1, t2 WHERE t1.id=t2.x" },
     {"18 ", 1, "SELECT DISTINCT c1, c2 FROM t3"},
diff --git a/test/sql-tap/in3.test.lua b/test/sql-tap/in3.test.lua
index 83139a3e4..ad4f4fc2c 100755
--- a/test/sql-tap/in3.test.lua
+++ b/test/sql-tap/in3.test.lua
@@ -209,7 +209,7 @@ test:do_test(
         return exec_neph(" SELECT a FROM t1 WHERE a COLLATE binary IN (SELECT a FROM t1) ")
     end, {
         -- <in3-1.15>
-        0, 1, 3, 5
+        1, 1, 3, 5
         -- </in3-1.15>
     })
 
diff --git a/test/sql-tap/index3.test.lua b/test/sql-tap/index3.test.lua
index 4f950be09..6a9911ada 100755
--- a/test/sql-tap/index3.test.lua
+++ b/test/sql-tap/index3.test.lua
@@ -59,7 +59,7 @@ test:do_execsql_test(
         CREATE TABLE t1(a INT , b TEXT , c INT , d INT , e INT ,
                         PRIMARY KEY(a), UNIQUE(b COLLATE "unicode_ci" DESC));
         CREATE INDEX t1c ON t1(c);
-        CREATE INDEX t1d ON t1(d COLLATE binary ASC);
+        CREATE INDEX t1d ON t1(d);
         WITH RECURSIVE c(x) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<30)
           INSERT INTO t1(a,b,c,d,e) 
             SELECT x, printf('ab%03xxy',x), x, x, x FROM c;
diff --git a/test/sql-tap/where2.test.lua b/test/sql-tap/where2.test.lua
index a2b60e347..9089c97f9 100755
--- a/test/sql-tap/where2.test.lua
+++ b/test/sql-tap/where2.test.lua
@@ -183,7 +183,7 @@ test:do_test(
   ]])
     end, {
         -- <where2-2.1>
-        85, 6, 7396, 7402, "nosort", "T1", "*"
+        85, 6, 7396, 7402, "sort", "T1", "*"
         -- </where2-2.1>
     })
 
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 419e469f7..3df4cfa70 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -107,6 +107,33 @@ cn:execute('select 1 limit ? collate not_exist', {1})
 cn:close()
 ---
 ...
+-- Explicitly set BINARY collation is predifined and has ID.
+--
+box.sql.execute("CREATE TABLE t (id INT PRIMARY KEY, a TEXT, b TEXT COLLATE \"binary\");")
+---
+...
+box.space.T:format()[2]['collation']
+---
+- null
+...
+box.space.T:format()[3]['collation']
+---
+- 3
+...
+box.sql.execute("DROP TABLE t;")
+---
+...
+-- Collation with id == 0 is "none". It used to unify interaction
+-- with collation interface. It also can't be dropped.
+--
+box.space._collation:select{0}
+---
+- - [0, 'none', 1, 'BINARY', '', {}]
+...
+box.space._collation:delete{0}
+---
+- error: 'Can''t drop collation none : system collation'
+...
 box.schema.user.revoke('guest', 'read,write,execute', 'universe')
 ---
 ...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index da577c910..61df33a95 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -42,4 +42,18 @@ cn = remote.connect(box.cfg.listen)
 cn:execute('select 1 limit ? collate not_exist', {1})
 
 cn:close()
+
+-- Explicitly set BINARY collation is predifined and has ID.
+--
+box.sql.execute("CREATE TABLE t (id INT PRIMARY KEY, a TEXT, b TEXT COLLATE \"binary\");")
+box.space.T:format()[2]['collation']
+box.space.T:format()[3]['collation']
+box.sql.execute("DROP TABLE t;")
+
+-- Collation with id == 0 is "none". It used to unify interaction
+-- with collation interface. It also can't be dropped.
+--
+box.space._collation:select{0}
+box.space._collation:delete{0}
+
 box.schema.user.revoke('guest', 'read,write,execute', 'universe')
-- 
2.15.1





More information about the Tarantool-patches mailing list