* [tarantool-patches] [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE
@ 2018-07-30 15:30 imeevma
2018-07-30 20:27 ` [tarantool-patches] " Vladislav Shpilevoy
2018-07-31 7:16 ` Kirill Yukhin
0 siblings, 2 replies; 3+ messages in thread
From: imeevma @ 2018-07-30 15:30 UTC (permalink / raw)
To: tarantool-patches; +Cc: v.shpilevoy
SQL functions UPPER and LOWER now works
with COLLATE as they should according to
ANSI Standart.
Closes #3052.
---
Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
Issue: https://github.com/tarantool/tarantool/issues/3052
src/box/sql/func.c | 22 +++++++++++-----
test/sql/collation.result | 62 +++++++++++++++++++++++++++++++++++++++++++++
test/sql/collation.test.lua | 23 +++++++++++++++++
3 files changed, 100 insertions(+), 7 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e211de1..45056a7 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -37,12 +37,13 @@
#include "sqliteInt.h"
#include "vdbeInt.h"
#include "version.h"
+#include "coll.h"
#include <unicode/ustring.h>
#include <unicode/ucasemap.h>
#include <unicode/ucnv.h>
#include <unicode/uchar.h>
+#include <unicode/ucol.h>
-static UCaseMap *pUCaseMap;
static UConverter* pUtf8conv;
/*
@@ -503,7 +504,15 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \
return; \
} \
UErrorCode status = U_ZERO_ERROR; \
- int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\
+ struct coll *coll = sqlite3GetFuncCollSeq(context); \
+ const char *locale = NULL; \
+ if (coll != NULL) { \
+ locale = ucol_getLocaleByType(coll->collator, \
+ ULOC_VALID_LOCALE, &status); \
+ } \
+ UCaseMap *case_map = ucasemap_open(locale, 0, &status); \
+ assert(case_map != NULL); \
+ int len = ucasemap_utf8To##case_type(case_map, z1, n, z2, n, &status); \
if (len > n) { \
status = U_ZERO_ERROR; \
sqlite3_free(z1); \
@@ -512,8 +521,9 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \
sqlite3_result_error_nomem(context); \
return; \
} \
- ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\
+ ucasemap_utf8To##case_type(case_map, z1, len, z2, n, &status); \
} \
+ ucasemap_close(case_map); \
sqlite3_result_text(context, z1, len, sqlite3_free); \
} \
@@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void)
*/
UErrorCode status = U_ZERO_ERROR;
- pUCaseMap = ucasemap_open(NULL, 0, &status);
- assert(pUCaseMap);
pUtf8conv = ucnv_open("utf8", &status);
assert(pUtf8conv);
/*
@@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void)
FUNCTION(round, 1, 0, 0, roundFunc),
FUNCTION(round, 2, 0, 0, roundFunc),
#endif
- FUNCTION(upper, 1, 0, 0, UpperICUFunc),
- FUNCTION(lower, 1, 0, 0, LowerICUFunc),
+ FUNCTION(upper, 1, 0, 1, UpperICUFunc),
+ FUNCTION(lower, 1, 0, 1, LowerICUFunc),
FUNCTION(hex, 1, 0, 0, hexFunc),
FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE),
VFUNCTION(random, 0, 0, 0, randomFunc),
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 7fec96d..79ba9ab 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -32,6 +32,68 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
---
- error: 'near "COLLATE": syntax error'
...
+-- gh-3052: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
+---
+...
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'i']
+ - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇']
+ - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+ - ['Latin Small Letter I U+0069', 'I', 'i']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'ı']
+ - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i']
+ - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+ - ['Latin Small Letter I U+0069', 'İ', 'i']
+...
+box.internal.collation.drop('TURKISH')
+---
+...
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
+box.internal.collation.drop('GERMAN')
+---
+...
+box.sql.execute(([[DROP TABLE tu]]))
+---
+...
box.schema.user.grant('guest','read,write,execute', 'universe')
---
...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index ff2c5b2..935dea8 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -12,6 +12,29 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;")
box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;")
box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
+-- gh-3052: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+box.internal.collation.drop('TURKISH')
+
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
+box.internal.collation.drop('GERMAN')
+box.sql.execute(([[DROP TABLE tu]]))
box.schema.user.grant('guest','read,write,execute', 'universe')
cn = remote.connect(box.cfg.listen)
--
2.7.4
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE
2018-07-30 15:30 [tarantool-patches] [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE imeevma
@ 2018-07-30 20:27 ` Vladislav Shpilevoy
2018-07-31 7:16 ` Kirill Yukhin
1 sibling, 0 replies; 3+ messages in thread
From: Vladislav Shpilevoy @ 2018-07-30 20:27 UTC (permalink / raw)
To: imeevma, tarantool-patches, Kirill Yukhin
Thanks for the fixes! LGTM.
On 30/07/2018 18:30, imeevma@tarantool.org wrote:
> SQL functions UPPER and LOWER now works
> with COLLATE as they should according to
> ANSI Standart.
>
> Closes #3052.
> ---
> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
> Issue: https://github.com/tarantool/tarantool/issues/3052
>
> src/box/sql/func.c | 22 +++++++++++-----
> test/sql/collation.result | 62 +++++++++++++++++++++++++++++++++++++++++++++
> test/sql/collation.test.lua | 23 +++++++++++++++++
> 3 files changed, 100 insertions(+), 7 deletions(-)
>
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index e211de1..45056a7 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -37,12 +37,13 @@
> #include "sqliteInt.h"
> #include "vdbeInt.h"
> #include "version.h"
> +#include "coll.h"
> #include <unicode/ustring.h>
> #include <unicode/ucasemap.h>
> #include <unicode/ucnv.h>
> #include <unicode/uchar.h>
> +#include <unicode/ucol.h>
>
> -static UCaseMap *pUCaseMap;
> static UConverter* pUtf8conv;
>
> /*
> @@ -503,7 +504,15 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \
> return; \
> } \
> UErrorCode status = U_ZERO_ERROR; \
> - int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\
> + struct coll *coll = sqlite3GetFuncCollSeq(context); \
> + const char *locale = NULL; \
> + if (coll != NULL) { \
> + locale = ucol_getLocaleByType(coll->collator, \
> + ULOC_VALID_LOCALE, &status); \
> + } \
> + UCaseMap *case_map = ucasemap_open(locale, 0, &status); \
> + assert(case_map != NULL); \
> + int len = ucasemap_utf8To##case_type(case_map, z1, n, z2, n, &status); \
> if (len > n) { \
> status = U_ZERO_ERROR; \
> sqlite3_free(z1); \
> @@ -512,8 +521,9 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv) \
> sqlite3_result_error_nomem(context); \
> return; \
> } \
> - ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\
> + ucasemap_utf8To##case_type(case_map, z1, len, z2, n, &status); \
> } \
> + ucasemap_close(case_map); \
> sqlite3_result_text(context, z1, len, sqlite3_free); \
> } \
>
> @@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void)
> */
> UErrorCode status = U_ZERO_ERROR;
>
> - pUCaseMap = ucasemap_open(NULL, 0, &status);
> - assert(pUCaseMap);
> pUtf8conv = ucnv_open("utf8", &status);
> assert(pUtf8conv);
> /*
> @@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void)
> FUNCTION(round, 1, 0, 0, roundFunc),
> FUNCTION(round, 2, 0, 0, roundFunc),
> #endif
> - FUNCTION(upper, 1, 0, 0, UpperICUFunc),
> - FUNCTION(lower, 1, 0, 0, LowerICUFunc),
> + FUNCTION(upper, 1, 0, 1, UpperICUFunc),
> + FUNCTION(lower, 1, 0, 1, LowerICUFunc),
> FUNCTION(hex, 1, 0, 0, hexFunc),
> FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE),
> VFUNCTION(random, 0, 0, 0, randomFunc),
> diff --git a/test/sql/collation.result b/test/sql/collation.result
> index 7fec96d..79ba9ab 100644
> --- a/test/sql/collation.result
> +++ b/test/sql/collation.result
> @@ -32,6 +32,68 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
> ---
> - error: 'near "COLLATE": syntax error'
> ...
> +-- gh-3052: upper/lower support only default locale
> +-- For tr-TR result depends on collation
> +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
> +---
> +...
> +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
> +---
> +...
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
> +---
> +- - ['Latin Capital Letter I U+0049', 'I', 'i']
> + - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇']
> + - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
> + - ['Latin Small Letter I U+0069', 'I', 'i']
> +...
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
> +---
> +- - ['Latin Capital Letter I U+0049', 'I', 'ı']
> + - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i']
> + - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
> + - ['Latin Small Letter I U+0069', 'İ', 'i']
> +...
> +box.internal.collation.drop('TURKISH')
> +---
> +...
> +-- For de-DE result is actually the same
> +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
> +---
> +...
> +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
> +---
> +...
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
> +---
> +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
> +...
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
> +---
> +- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
> +...
> +box.internal.collation.drop('GERMAN')
> +---
> +...
> +box.sql.execute(([[DROP TABLE tu]]))
> +---
> +...
> box.schema.user.grant('guest','read,write,execute', 'universe')
> ---
> ...
> diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
> index ff2c5b2..935dea8 100644
> --- a/test/sql/collation.test.lua
> +++ b/test/sql/collation.test.lua
> @@ -12,6 +12,29 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;")
> box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;")
> box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
>
> +-- gh-3052: upper/lower support only default locale
> +-- For tr-TR result depends on collation
> +box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
> +box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
> +box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
> +box.internal.collation.drop('TURKISH')
> +
> +-- For de-DE result is actually the same
> +box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
> +box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
> +-- Without collation
> +box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
> +-- With collation
> +box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
> +box.internal.collation.drop('GERMAN')
> +box.sql.execute(([[DROP TABLE tu]]))
>
> box.schema.user.grant('guest','read,write,execute', 'universe')
> cn = remote.connect(box.cfg.listen)
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tarantool-patches] Re: [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE
2018-07-30 15:30 [tarantool-patches] [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE imeevma
2018-07-30 20:27 ` [tarantool-patches] " Vladislav Shpilevoy
@ 2018-07-31 7:16 ` Kirill Yukhin
1 sibling, 0 replies; 3+ messages in thread
From: Kirill Yukhin @ 2018-07-31 7:16 UTC (permalink / raw)
To: tarantool-patches; +Cc: v.shpilevoy
Hello,
On 30 июл 18:30, imeevma@tarantool.org wrote:
> SQL functions UPPER and LOWER now works
> with COLLATE as they should according to
> ANSI Standart.
>
> Closes #3052.
> ---
> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
> Issue: https://github.com/tarantool/tarantool/issues/3052
I've checked in your patch into 2.0 branch.
--
Regards, Kirill Yukhin
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-07-31 7:16 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-30 15:30 [tarantool-patches] [PATCH v2 1/1] sql: UPPER and LOWER support COLLATE imeevma
2018-07-30 20:27 ` [tarantool-patches] " Vladislav Shpilevoy
2018-07-31 7:16 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox