From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 99D0826E99 for ; Mon, 30 Jul 2018 16:27:15 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id oXBFpHAcdDcc for ; Mon, 30 Jul 2018 16:27:15 -0400 (EDT) Received: from smtp58.i.mail.ru (smtp58.i.mail.ru [217.69.128.38]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id B94F522A84 for ; Mon, 30 Jul 2018 16:27:14 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE References: <2c8027b8d79da5b909cfbb4e9d0688615ff71cee.1532964363.git.imeevma@gmail.com> From: Vladislav Shpilevoy Message-ID: <6a583615-a4cd-cff5-784f-a9cc64ca67d8@tarantool.org> Date: Mon, 30 Jul 2018 23:27:12 +0300 MIME-Version: 1.0 In-Reply-To: <2c8027b8d79da5b909cfbb4e9d0688615ff71cee.1532964363.git.imeevma@gmail.com> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: imeevma@tarantool.org, tarantool-patches@freelists.org, Kirill Yukhin Thanks for the fixes! LGTM. On 30/07/2018 18:30, imeevma@tarantool.org wrote: > SQL functions UPPER and LOWER now works > with COLLATE as they should according to > ANSI Standart. > > Closes #3052. > --- > Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower > Issue: https://github.com/tarantool/tarantool/issues/3052 > > src/box/sql/func.c | 22 +++++++++++----- > test/sql/collation.result | 62 +++++++++++++++++++++++++++++++++++++++++++++ > test/sql/collation.test.lua | 23 +++++++++++++++++ > 3 files changed, 100 insertions(+), 7 deletions(-) > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index e211de1..45056a7 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -37,12 +37,13 @@ > #include "sqliteInt.h" > #include "vdbeInt.h" > #include "version.h" > +#include "coll.h" > #include > #include > #include > #include > +#include > > -static UCaseMap *pUCaseMap; > static UConverter* pUtf8conv; > > /* > @@ -503,7 +504,15 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \ > return; \ > } \ > UErrorCode status = U_ZERO_ERROR; \ > - int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\ > + struct coll *coll = sqlite3GetFuncCollSeq(context); \ > + const char *locale = NULL; \ > + if (coll != NULL) { \ > + locale = ucol_getLocaleByType(coll->collator, \ > + ULOC_VALID_LOCALE, &status); \ > + } \ > + UCaseMap *case_map = ucasemap_open(locale, 0, &status); \ > + assert(case_map != NULL); \ > + int len = ucasemap_utf8To##case_type(case_map, z1, n, z2, n, &status); \ > if (len > n) { \ > status = U_ZERO_ERROR; \ > sqlite3_free(z1); \ > @@ -512,8 +521,9 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \ > sqlite3_result_error_nomem(context); \ > return; \ > } \ > - ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\ > + ucasemap_utf8To##case_type(case_map, z1, len, z2, n, &status); \ > } \ > + ucasemap_close(case_map); \ > sqlite3_result_text(context, z1, len, sqlite3_free); \ > } \ > > @@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void) > */ > UErrorCode status = U_ZERO_ERROR; > > - pUCaseMap = ucasemap_open(NULL, 0, &status); > - assert(pUCaseMap); > pUtf8conv = ucnv_open("utf8", &status); > assert(pUtf8conv); > /* > @@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void) > FUNCTION(round, 1, 0, 0, roundFunc), > FUNCTION(round, 2, 0, 0, roundFunc), > #endif > - FUNCTION(upper, 1, 0, 0, UpperICUFunc), > - FUNCTION(lower, 1, 0, 0, LowerICUFunc), > + FUNCTION(upper, 1, 0, 1, UpperICUFunc), > + FUNCTION(lower, 1, 0, 1, LowerICUFunc), > FUNCTION(hex, 1, 0, 0, hexFunc), > FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE), > VFUNCTION(random, 0, 0, 0, randomFunc), > diff --git a/test/sql/collation.result b/test/sql/collation.result > index 7fec96d..79ba9ab 100644 > --- a/test/sql/collation.result > +++ b/test/sql/collation.result > @@ -32,6 +32,68 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;") > --- > - error: 'near "COLLATE": syntax error' > ... > +-- gh-3052: upper/lower support only default locale > +-- For tr-TR result depends on collation > +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]); > +--- > +... > +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'}); > +--- > +... > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]]) > +--- > +... > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]]) > +--- > +... > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]]) > +--- > +... > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]]) > +--- > +... > +-- Without collation > +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]]) > +--- > +- - ['Latin Capital Letter I U+0049', 'I', 'i'] > + - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇'] > + - ['Latin Small Letter Dotless I U+0131', 'I', 'ı'] > + - ['Latin Small Letter I U+0069', 'I', 'i'] > +... > +-- With collation > +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]]) > +--- > +- - ['Latin Capital Letter I U+0049', 'I', 'ı'] > + - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i'] > + - ['Latin Small Letter Dotless I U+0131', 'I', 'ı'] > + - ['Latin Small Letter I U+0069', 'İ', 'i'] > +... > +box.internal.collation.drop('TURKISH') > +--- > +... > +-- For de-DE result is actually the same > +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'}); > +--- > +... > +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]]) > +--- > +... > +-- Without collation > +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]]) > +--- > +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß'] > +... > +-- With collation > +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]]) > +--- > +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß'] > +... > +box.internal.collation.drop('GERMAN') > +--- > +... > +box.sql.execute(([[DROP TABLE tu]])) > +--- > +... > box.schema.user.grant('guest','read,write,execute', 'universe') > --- > ... > diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua > index ff2c5b2..935dea8 100644 > --- a/test/sql/collation.test.lua > +++ b/test/sql/collation.test.lua > @@ -12,6 +12,29 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;") > box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;") > box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;") > > +-- gh-3052: upper/lower support only default locale > +-- For tr-TR result depends on collation > +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]); > +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'}); > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]]) > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]]) > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]]) > +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]]) > +-- Without collation > +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]]) > +-- With collation > +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]]) > +box.internal.collation.drop('TURKISH') > + > +-- For de-DE result is actually the same > +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'}); > +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]]) > +-- Without collation > +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]]) > +-- With collation > +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]]) > +box.internal.collation.drop('GERMAN') > +box.sql.execute(([[DROP TABLE tu]])) > > box.schema.user.grant('guest','read,write,execute', 'universe') > cn = remote.connect(box.cfg.listen) >