[tarantool-patches] Re: [PATCH v4] Feature request for a new collation

Stanislav Zudin szudin at tarantool.org
Wed Mar 20 18:53:05 MSK 2019

The recent patch defines COLL_LOCALE_LEN_MAX equal to 30 and introduces 
test for the following collations:
- Afrikaans
- Amharic
- Assamese
- Azerbaijani
- Belarusian
- Kyrgyz
- German (phonebook)
- Hebrew
- Japanese
The source of test sequences is the following:

Issue: https://github.com/tarantool/tarantool/issues/4007

On 06.03.2019 12:48, Konstantin Osipov wrote:
> * Stanislav Zudin <szudin at tarantool.org> [19/03/06 11:26]:
>> On 06.03.2019 11:11, Konstantin Osipov wrote:
>>> * Stanislav Zudin <szudin at tarantool.org> [19/03/06 10:24]:
>>>> On 05.03.2019 18:41, Konstantin Osipov wrote:
>>>>> * Stanislav Zudin <szudin at tarantool.org> [19/03/05 14:45]:
>>>>>>     /** Maximal length of locale name. */
>>>>>>     enum {
>>>>>> -	COLL_LOCALE_LEN_MAX = 16,
>>>>>> +	COLL_LOCALE_LEN_MAX = 1024,
>>>>>>     };
>>>>> Why?
>>>> Because locale may include numerous options such as collation, numbers,
>>>> calendar etc. e.g.: "fr at collation=phonebook;calendar=islamic-civil".
>>>> The max length is not specified.
>>> *Nobody* except us can add a new collation. The names of the
>>> collations you added don't exceed 16 bytes. Please keep the old
>>> value.
>> strlen("de_DE_u_co_phonebk") == 18.
> + "unicode_", then, increase to a relevant value please.

  src/coll_def.h                    |   2 +-
  test/sql-tap/collation-2.test.lua | 143 ++++++++++++++++++++++++++++++
  2 files changed, 144 insertions(+), 1 deletion(-)
  create mode 100755 test/sql-tap/collation-2.test.lua

diff --git a/src/coll_def.h b/src/coll_def.h
index 18ed606ad..fd91f942a 100644
--- a/src/coll_def.h
+++ b/src/coll_def.h
@@ -44,7 +44,7 @@ extern const char *coll_type_strs[];

  /** Maximal length of locale name. */
  enum {

diff --git a/test/sql-tap/collation-2.test.lua 
new file mode 100755
index 000000000..58b6a1ef1
--- /dev/null
+++ b/test/sql-tap/collation-2.test.lua
@@ -0,0 +1,143 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(10 * 4)
+local prefix = "unicode-collation-"
+local function insert_into_table(tbl_name, data)
+    local sql = string.format("INSERT INTO %s VALUES ", tbl_name)
+    --local values = {}
+    for _, item in ipairs(data) do
+        local value = "('"..item.."')"
+        local e = sql .. value
+        box.sql.execute(e)
+    end
+local collation_entries =
+    {   -- Afrikaans case sensitive
+        "unicode_af_s3",
+        {"a","A","á","Á","â","Â","b","B","c","C","d","D","e","E","é","É",
+            "ï","Ï","j","J","k","K","l","L","m","M","n","N","ʼn","o","O",
+            "û","Û","v","V","w","W","x","X","y","Y","z","Z"}},
+    {
+        -- Amharic
+        "unicode_am_s3",
+        {"ሀ","ሁ","ሂ","ሃ","ሄ","ህ","ሆ","ለ","ሉ","ሊ","ላ","ሌ","ል","ሎ","ሏ","ሐ",
+            "ሑ","ሒ","ሓ","ሔ","ሕ","ሖ","ሗ","መ","ሙ","ሚ","ማ","ሜ","ም","ሞ",
+            "ሟ","ሠ","ሡ","ሢ","ሣ","ሤ","ሥ","ሦ","ሧ","ረ","ሩ","ሪ","ራ","ሬ","ር",
+            "ሾ","ሿ","ቀ","ቁ","ቂ","ቃ","ቄ","ቅ","ቆ","ቈ","ቊ","ቋ","ቌ","ቍ","በ",
+            "ቡ","ቢ","ባ","ቤ","ብ","ቦ","ቧ","ቨ","ቩ","ቪ","ቫ","ቬ","ቭ","ቮ","ቯ",
+            "ተ","ቱ","ቲ","ታ","ቴ","ት","ቶ","ቷ","ቸ","ቹ","ቺ","ቻ","ቼ","ች","ቾ",
+            "ቿ","ኀ","ኁ","ኂ","ኃ","ኄ","ኅ","ኆ","ኈ","ኊ","ኋ","ኌ","ኍ","ነ","ኑ",
+            "ኢ","ኣ","ኤ","እ","ኦ","ኧ","ከ","ኩ","ኪ","ካ","ኬ","ክ","ኮ","ኰ","ኲ",
+            "ኳ","ኴ","ኵ","ኸ","ኹ","ኺ","ኻ","ኼ","ኽ","ኾ","ወ","ዉ","ዊ","ዋ","ዌ",
+            "ው","ዎ","ዐ","ዑ","ዒ","ዓ","ዔ","ዕ","ዖ","ዘ","ዙ","ዚ","ዛ","ዜ","ዝ",
+            "ዞ","ዟ","ዠ","ዡ","ዢ","ዣ","ዤ","ዥ","ዦ","ዧ","የ","ዩ","ዪ","ያ",
+            "ዬ","ይ","ዮ","ደ","ዱ","ዲ","ዳ","ዴ","ድ","ዶ","ዷ","ጀ","ጁ","ጂ","ጃ",
+            "ጄ","ጅ","ጆ","ጇ","ገ","ጉ","ጊ","ጋ","ጌ","ግ","ጎ","ጐ","ጒ","ጓ","ጔ",
+            "ጕ","ጠ","ጡ","ጢ","ጣ","ጤ","ጥ","ጦ","ጧ","ጨ","ጩ","ጪ","ጫ","ጬ",
+            "ጭ","ጮ","ጯ","ጰ","ጱ","ጲ","ጳ","ጴ","ጵ","ጶ","ጷ","ጸ","ጹ","ጺ","ጻ",
+            "ፍ","ፎ","ፏ","ፐ","ፑ","ፒ","ፓ","ፔ","ፕ","ፖ","ፗ"}},
+    {
+        -- Assamese
+        "unicode_as_s3",
+        {"়","অ","আ","ই","ঈ","উ","ঊ","ঋ","এ","ঐ","ও","ঔ","ং ","ঁ ","ঃ ",
+            "ণ","ৎ ","ত","থ","দ","ধ","ন","প","ফ","ব","ভ","ম","য","য়","ৰ",
+            "ল","ৱ","শ","ষ","স","হ","ক্ষ ","া","ি","ী","ু","ূ","ৃ","ে","ৈ",
+            "ো","ৌ","্"}},
+    {
+        -- Azerbaijani
+        "unicode_az_s3",
+        {"a ","A ","b ","B ","c ","C ","ç ","Ç ","ḉ ","Ḉ ","d ","D ","e ",
+            "E ","ə ","Ə ","f ","F ","g ","G ","ğ ","Ğ ","ģ̆ ","Ģ̆ ","h ",
+            "H ","x ","X ","ẍ ","Ẍ ","ẋ ","Ẋ ","ı ","I ","Í ","Ì ","Ĭ ",
+            "Î ","Ǐ ","Ï ","Ḯ ","Ĩ ","Į ","Ī ","Ỉ ","Ȉ ","Ȋ ","Ị ","Ḭ ",
+            "i ","İ ","Į̇ ","Ị̇ ","Ḭ̇ ","j ","J ","k ","K ","q ","Q ","l ",
+            "L ","m ","M ","n ","N ","o ","O ","ö ","Ö ","ǫ̈ ","Ǫ̈ ","ȫ ",
+            "Ȫ ","ơ̈ ","Ơ̈ ","ợ̈ ","Ợ̈ ","ọ̈ ","Ọ̈ ","p ","P ","r ","R ","s ",
+            "S ","ş ","Ş ","t ","T ","u ","U ","ü ","Ü ","ǘ ","Ǘ ","ǜ ",
+            "Ǜ ","ǚ ","Ǚ ","ų̈ ","Ų̈ ","ǖ ","Ǖ ","ư̈ ","Ư̈ ","ự̈ ","Ự̈ ","ụ̈ ",
+            "Ụ̈ ","ṳ̈ ","Ṳ̈ ","ṷ̈ ","Ṷ̈ ","ṵ̈ ","Ṵ̈ ","v ","V ","y ","Y ","z ",
+            "Z ","Ẉ","w ","W ","ẃ ","Ẃ ","ẁ ","Ẁ ","ŵ ","Ŵ ","ẘ ","ẅ 
","Ẅ ",
+            "ẇ ","Ẇ ","ẉ "}},
+    {
+        -- Belarusian
+        "unicode_be_s3",
+        {"а","А","б","Б","в","ᲀ","В","г","Г","д","ᲁ","Д","дж","дз","е",
+            "Е","ё","Ё","ж","Ж","з","З","і","І","й","Й","к","К","л",
+            "Л","м","М","н","Н","о","ᲂ","О","п","П","р","Р","с","ᲃ",
+            "С","т","Т","у","У","ў","Ў","ф","Ф","х","Х","ц",
+            "Ц","ч","Ч","ш","Ш","ы","Ы","ь","Ь","э","Э","ю","Ю","я","Я"}},
+    {
+        -- Kyrgyz
+        "unicode_ky_s3",
+        {"а","А","б","Б","г","Г","д","ᲁ","Д","е","Е","ё","Ё","ж","Ж",
+            "о","ᲂ","О","ө","Ө","п","П","р","Р","с","ᲃ","С","т","ᲄ",
+            "э","Э","ю","Ю","я","Я"}},
+    {
+        -- Kyrgyz (russian codepage)
+        "unicode_ky_s3",
+        {"а","А","б","Б","в","В","г","Г","д","Д","е","Е","ё","Ё","ж","Ж",
+            "з","З","и","И","й","Й","к","К","л","Л","м","М","н","Н",
+            "о","О","п","П","р","Р","с","С","т","Т","у","У","ф","Ф",
+            "х","Х","ц","Ц","ч","Ч","ш","Ш","щ","Щ","ъ","Ъ","ы","Ы",
+            "ь","Ь","э","Э","ю","Ю","я","Я"}},
+    {
+        -- German (umlaut as 'ae', 'oe', 'ue')
+        "unicode_de__phonebook_s3",
+        {"a","A","ä","ǟ","Ǟ","ą̈","Ą̈","ạ̈","Ạ̈","ḁ̈","Ḁ̈","Ä ","b","B","c","C",
+            "d","D","e","E","f","F","g","G","h","H","i","I","j","J",
+            "k","K","l","L","m","M","n","N","o","O","ȫ","Ȫ","ǫ̈","Ǫ̈",
+            "ơ̈","Ơ̈","ợ̈","Ợ̈","ọ̈","Ọ̈","ö ","Ö ","p","P","q","Q","r","R",
+            "s","S","ss","ß","t","T","u","U","ǘ","Ǘ","ǜ","Ǜ","ǚ","Ǚ",
+            "ǖ","Ǖ","ų̈","Ų̈","ư̈","Ư̈","ự̈","Ự̈","ụ̈","Ụ̈","ṳ̈","Ṳ̈","ṷ̈","Ṷ̈",
+            "ṵ̈","Ṵ̈","ü ","Ü ","v","V","w","W","x","X","y","Y","z","Z"}},
+    {
+        -- Hebrew
+        "unicode_he_s3",
+        {"׳","״","א","ב","ג","ד","ה","ו","ז","ח","ט","י","כ",
+            "ך","ל","מ","ם","נ","ן","ס","ע","פ","ף","צ","ץ",
+            "ק","ר","ש","ת"} },
+    {
+        -- Japanese
+        "unicode_ja_s3",
+        {"幸","広","庚","康","弘","恒","慌","抗","拘","控","攻","港",
+            "溝","甲","皇","硬","稿"}}
+for _, test_entry in ipairs(collation_entries) do
+    -- create title
+    local extendex_prefix = string.format("%s1.%s.", prefix, test_entry[1])
+    test:do_execsql_test(
+        extendex_prefix.."create_table",
+        string.format("create table t1(a char(5) collate \"%s\" primary 
key);", test_entry[1]),
+        {})
+    test:do_test(
+        extendex_prefix.."insert_values",
+        function()
+            return insert_into_table("t1", test_entry[2])
+        end, {})
+    test:do_execsql_test(
+        extendex_prefix.."select",
+        string.format("select a from t1 order by a"),
+        test_entry[2])
+    test:do_execsql_test(
+        extendex_prefix.."drop_table",
+        "drop table t1",
+        {})

More information about the Tarantool-patches mailing list