Tarantool development patches archive
 help / color / mirror / Atom feed
From: imeevma@tarantool.org
To: tarantool-patches@freelists.org
Subject: [tarantool-patches] [PATCH v1 1/1] sql: UPPER and LOWER support COLLATE
Date: Fri, 27 Jul 2018 18:40:09 +0300	[thread overview]
Message-ID: <76c1c2215de0e9d35e9501158efb1eea8ecfa52b.1532705640.git.imeevma@gmail.com> (raw)

SQL functions UPPER and LOWER now works
with COLLATE as they should according to
ANSI Standart.

Closes #3052.
---
Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3052-collate-for-upper-lower
Issue: https://github.com/tarantool/tarantool/issues/3052

 src/box/sql/func.c          | 18 ++++++++++-----
 test/sql/collation.result   | 53 +++++++++++++++++++++++++++++++++++++++++++++
 test/sql/collation.test.lua | 20 +++++++++++++++++
 3 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e211de1..637121c 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -37,12 +37,13 @@
 #include "sqliteInt.h"
 #include "vdbeInt.h"
 #include "version.h"
+#include "coll.h"
 #include <unicode/ustring.h>
 #include <unicode/ucasemap.h>
 #include <unicode/ucnv.h>
 #include <unicode/uchar.h>
+#include <unicode/ucol.h>
 
-static UCaseMap *pUCaseMap;
 static UConverter* pUtf8conv;
 
 /*
@@ -503,6 +504,14 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv)   \
 		return;                                                        \
 	}                                                                      \
 	UErrorCode status = U_ZERO_ERROR;                                      \
+	struct coll *pColl = sqlite3GetFuncCollSeq(context);                   \
+	const char *locale = NULL;                                             \
+	if (pColl != NULL) {                                                   \
+		locale = ucol_getLocaleByType(pColl->collator,                 \
+					      ULOC_VALID_LOCALE, &status);     \
+	}                                                                      \
+	UCaseMap *pUCaseMap = ucasemap_open(locale, 0, &status);               \
+	assert(pUCaseMap);                                                     \
 	int len = ucasemap_utf8To##case_type(pUCaseMap, z1, n, z2, n, &status);\
 	if (len > n) {                                                         \
 		status = U_ZERO_ERROR;                                         \
@@ -514,6 +523,7 @@ case_type##ICUFunc(sqlite3_context *context, int argc, sqlite3_value **argv)   \
 		}                                                              \
 		ucasemap_utf8To##case_type(pUCaseMap, z1, len, z2, n, &status);\
 	}                                                                      \
+	ucasemap_close(pUCaseMap);                                             \
 	sqlite3_result_text(context, z1, len, sqlite3_free);                   \
 }                                                                              \
 
@@ -1789,8 +1799,6 @@ sqlite3RegisterBuiltinFunctions(void)
 	 */
 	UErrorCode status = U_ZERO_ERROR;
 
-	pUCaseMap = ucasemap_open(NULL, 0, &status);
-	assert(pUCaseMap);
 	pUtf8conv = ucnv_open("utf8", &status);
 	assert(pUtf8conv);
 	/*
@@ -1835,8 +1843,8 @@ sqlite3RegisterBuiltinFunctions(void)
 		FUNCTION(round, 1, 0, 0, roundFunc),
 		FUNCTION(round, 2, 0, 0, roundFunc),
 #endif
-		FUNCTION(upper, 1, 0, 0, UpperICUFunc),
-		FUNCTION(lower, 1, 0, 0, LowerICUFunc),
+		FUNCTION(upper, 1, 0, 1, UpperICUFunc),
+		FUNCTION(lower, 1, 0, 1, LowerICUFunc),
 		FUNCTION(hex, 1, 0, 0, hexFunc),
 		FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQLITE_FUNC_COALESCE),
 		VFUNCTION(random, 0, 0, 0, randomFunc),
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 7fec96d..e057bb6 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -32,6 +32,59 @@ box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
 ---
 - error: 'near "COLLATE": syntax error'
 ...
+-- gh-3052: sql: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+---
+...
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'i']
+  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i̇']
+  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+  - ['Latin Small Letter I U+0069', 'I', 'i']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+---
+- - ['Latin Capital Letter I U+0049', 'I', 'ı']
+  - ['Latin Capital Letter I With Dot Above U+0130', 'İ', 'i']
+  - ['Latin Small Letter Dotless I U+0131', 'I', 'ı']
+  - ['Latin Small Letter I U+0069', 'İ', 'i']
+...
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+---
+...
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
+---
+...
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
+---
+- - ['German Small Letter Sharp S U+00DF', 'SS', 'ß']
+...
 box.schema.user.grant('guest','read,write,execute', 'universe')
 ---
 ...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index ff2c5b2..bb21999 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -12,6 +12,26 @@ box.sql.execute("SELECT 1 LIMIT 1 OFFSET 1 COLLATE BINARY;")
 box.sql.execute("SELECT 1 LIMIT 1, 1 COLLATE BINARY;")
 box.sql.execute("SELECT 1 LIMIT 1 COLLATE BINARY, 1;")
 
+-- gh-3052: sql: upper/lower support only default locale
+-- For tr-TR result depends on collation
+box.internal.collation.create('TURKISH', 'ICU', 'tr-TR', {strength='primary'});
+box.sql.execute([[CREATE TABLE tu (descriptor CHAR(50) PRIMARY KEY, letter CHAR)]]);
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I U+0049','I');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter I U+0069','i');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Capital Letter I With Dot Above U+0130','İ');]])
+box.sql.execute([[INSERT INTO tu VALUES ('Latin Small Letter Dotless I U+0131','ı');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter) AS upper,lower(letter) AS lower FROM tu;]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "TURKISH") AS upper,lower(letter COLLATE "TURKISH") AS lower FROM tu;]])
+
+-- For de-DE result is actually the same
+box.internal.collation.create('GERMAN', 'ICU', 'de-DE', {strength='primary'});
+box.sql.execute([[INSERT INTO tu VALUES ('German Small Letter Sharp S U+00DF','ß');]])
+-- Without collation
+box.sql.execute([[SELECT descriptor, upper(letter), letter FROM tu where UPPER(letter) = 'SS';]])
+-- With collation
+box.sql.execute([[SELECT descriptor, upper(letter COLLATE "GERMAN"), letter FROM tu where UPPER(letter COLLATE "GERMAN") = 'SS';]])
 
 box.schema.user.grant('guest','read,write,execute', 'universe')
 cn = remote.connect(box.cfg.listen)
-- 
2.7.4

             reply	other threads:[~2018-07-27 15:40 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-27 15:40 imeevma [this message]
2018-07-30 10:35 ` [tarantool-patches] " Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=76c1c2215de0e9d35e9501158efb1eea8ecfa52b.1532705640.git.imeevma@gmail.com \
    --to=imeevma@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v1 1/1] sql: UPPER and LOWER support COLLATE' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox