Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: imeevma@tarantool.org, tarantool-patches@freelists.org,
	Kirill Yukhin <kyukhin@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE
Date: Mon, 30 Jul 2018 23:27:12 +0300	[thread overview]
Message-ID: <6a583615-a4cd-cff5-784f-a9cc64ca67d8@tarantool.org> (raw)
In-Reply-To: <2c8027b8d79da5b909cfbb4e9d0688615ff71cee.1532964363.git.imeevma@gmail.com>

Thanks for the fixes! LGTM.

On 30/07/2018 18:30, imeevma@tarantool.org wrote:
> SQL functions UPPER and LOWER now works
> with COLLATE as they should according to
> ANSI Standart.
> 
> Closes #3052.
> ---
> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
> Issue: https://github.com/tarantool/tarantool/issues/3052
> 
>   src/box/sql/func.c          | 22 +++++++++++-----
>   test/sql/collation.result   | 62 +++++++++++++++++++++++++++++++++++++++++++++
>   test/sql/collation.test.lua | 23 +++++++++++++++++
>   3 files changed, 100 insertions(+), 7 deletions(-)
> 
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index e211de1..45056a7 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -37,12 +37,13 @@
>   #include "sqliteInt.h"
>   #include "vdbeInt.h"
>   #include "version.h"
> +#include "coll.h"
>   #include <unicode/ustring.h>
>   #include <unicode/ucasemap.h>
>   #include <unicode/ucnv.h>
>   #include <unicode/uchar.h>
> +#include <unicode/ucol.h>
>   
> -static UCaseMap *pUCaseMap;
>   static UConverter* pUtf8conv;
>   
>   /*
> @@ -503,7 +504,15 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv)   \
>   		return;                                                        \
>   	}                                                                      \
>   	UErrorCode status = U_ZERO_ERROR;                                      \
> -	int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\
> +	struct coll *coll = sqlite3GetFuncCollSeq(context);                    \
> +	const char *locale = NULL;                                             \
> +	if (coll != NULL) {                                                    \
> +		locale = ucol_getLocaleByType(coll->collator,                  \
> +					      ULOC_VALID_LOCALE, &status);     \
> +	}                                                                      \
> +	UCaseMap *case_map = ucasemap_open(locale, 0, &status);                \
> +	assert(case_map != NULL);                                              \
> +	int len = ucasemap_utf8To##case_type(case_map, z1, n, z2, n, &status); \
>   	if (len > n) {                                                         \
>   		status = U_ZERO_ERROR;                                         \
>   		sqlite3_free(z1);                                              \
> @@ -512,8 +521,9 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv)   \
>   			sqlite3_result_error_nomem(context);                   \
>   			return;                                                \
>   		}                                                              \
> -		ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\
> +		ucasemap_utf8To##case_type(case_map, z1, len, z2, n, &status); \
>   	}                                                                      \
> +	ucasemap_close(case_map);                                              \
>   	sqlite3_result_text(context, z1, len, sqlite3_free);                   \
>   }                                                                              \
>   
> @@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void)
>   	 */
>   	UErrorCode status = U_ZERO_ERROR;
>   
> -	pUCaseMap = ucasemap_open(NULL, 0, &status);
> -	assert(pUCaseMap);
>   	pUtf8conv = ucnv_open("utf8", &status);
>   	assert(pUtf8conv);
>   	/*
> @@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void)
>   		FUNCTION(round, 1, 0, 0, roundFunc),
>   		FUNCTION(round, 2, 0, 0, roundFunc),
>   #endif
> -		FUNCTION(upper, 1, 0, 0, UpperICUFunc),
> -		FUNCTION(lower, 1, 0, 0, LowerICUFunc),
> +		FUNCTION(upper, 1, 0, 1, UpperICUFunc),
> +		FUNCTION(lower, 1, 0, 1, LowerICUFunc),
>   		FUNCTION(hex, 1, 0, 0, hexFunc),
>   		FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE),
>   		VFUNCTION(random, 0, 0, 0, randomFunc),
> diff --git a/test/sql/collation.result b/test/sql/collation.result
> index 7fec96d..79ba9ab 100644
> --- a/test/sql/collation.result
> +++ b/test/sql/collation.result
> @@ -32,6 +32,68 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
>   ---
>   - error: 'near "COLLATE": syntax error'
>   ...
> +-- gh-3052: upper/lower support only default locale
> +-- For tr-TR result depends on collation
> +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
> +---
> +...
> +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
> +---
> +...
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
> +---
> +- - ['Latin Capital Letter I U+0049', 'I', 'i']
> +  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇']
> +  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
> +  - ['Latin Small Letter I U+0069', 'I', 'i']
> +...
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
> +---
> +- - ['Latin Capital Letter I U+0049', 'I', 'ı']
> +  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i']
> +  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
> +  - ['Latin Small Letter I U+0069', 'İ', 'i']
> +...
> +box.internal.collation.drop('TURKISH')
> +---
> +...
> +-- For de-DE result is actually the same
> +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
> +---
> +...
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
> +---
> +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
> +...
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
> +---
> +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
> +...
> +box.internal.collation.drop('GERMAN')
> +---
> +...
> +box.sql.execute(([[DROP TABLE tu]]))
> +---
> +...
>   box.schema.user.grant('guest','read,write,execute', 'universe')
>   ---
>   ...
> diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
> index ff2c5b2..935dea8 100644
> --- a/test/sql/collation.test.lua
> +++ b/test/sql/collation.test.lua
> @@ -12,6 +12,29 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;")
>   box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;")
>   box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
>   
> +-- gh-3052: upper/lower support only default locale
> +-- For tr-TR result depends on collation
> +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
> +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
> +box.internal.collation.drop('TURKISH')
> +
> +-- For de-DE result is actually the same
> +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
> +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
> +box.internal.collation.drop('GERMAN')
> +box.sql.execute(([[DROP TABLE tu]]))
>   
>   box.schema.user.grant('guest','read,write,execute', 'universe')
>   cn = remote.connect(box.cfg.listen)
> 

  reply	other threads:[~2018-07-30 20:27 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-30 15:30 [tarantool-patches] " imeevma
2018-07-30 20:27 ` Vladislav Shpilevoy [this message]
2018-07-31  7:16 ` [tarantool-patches] " Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6a583615-a4cd-cff5-784f-a9cc64ca67d8@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=kyukhin@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox