From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 2B7132A5C2 for ; Wed, 20 Mar 2019 11:53:10 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id jUk5XkEzhALP for ; Wed, 20 Mar 2019 11:53:10 -0400 (EDT) Received: from smtp63.i.mail.ru (smtp63.i.mail.ru [217.69.128.43]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id A90A02A5C4 for ; Wed, 20 Mar 2019 11:53:08 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v4] Feature request for a new collation References: <20190305114431.31311-1-szudin@tarantool.org> <20190305154146.GA6860@chai> <35d9a188-03ec-b177-a039-650f15da1fa0@tarantool.org> <20190306081152.GF17432@chai> <20190306094802.GH17432@chai> From: Stanislav Zudin Message-ID: Date: Wed, 20 Mar 2019 18:53:05 +0300 MIME-Version: 1.0 In-Reply-To: <20190306094802.GH17432@chai> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org, Konstantin Osipov The recent patch defines COLL_LOCALE_LEN_MAX equal to 30 and introduces test for the following collations: - Afrikaans - Amharic - Assamese - Azerbaijani - Belarusian - Kyrgyz - German (phonebook) - Hebrew - Japanese The source of test sequences is the following: https://www.unicode.org/cldr/charts/34/collation/index.html Issue: https://github.com/tarantool/tarantool/issues/4007 Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-4007-new-default-collation-2.1 On 06.03.2019 12:48, Konstantin Osipov wrote: > * Stanislav Zudin [19/03/06 11:26]: >> >> >> On 06.03.2019 11:11, Konstantin Osipov wrote: >>> * Stanislav Zudin [19/03/06 10:24]: >>>> >>>> >>>> On 05.03.2019 18:41, Konstantin Osipov wrote: >>>>> * Stanislav Zudin [19/03/05 14:45]: >>>>>> /** Maximal length of locale name. */ >>>>>> enum { >>>>>> - COLL_LOCALE_LEN_MAX = 16, >>>>>> + COLL_LOCALE_LEN_MAX = 1024, >>>>>> }; >>>>> >>>>> Why? >>>>> >>>> >>>> Because locale may include numerous options such as collation, numbers, >>>> calendar etc. e.g.: "fr@collation=phonebook;calendar=islamic-civil". >>>> The max length is not specified. >>> >>> *Nobody* except us can add a new collation. The names of the >>> collations you added don't exceed 16 bytes. Please keep the old >>> value. >>> >> strlen("de_DE_u_co_phonebk") == 18. > > + "unicode_", then, increase to a relevant value please. > Done. --- src/coll_def.h | 2 +- test/sql-tap/collation-2.test.lua | 143 ++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100755 test/sql-tap/collation-2.test.lua diff --git a/src/coll_def.h b/src/coll_def.h index 18ed606ad..fd91f942a 100644 --- a/src/coll_def.h +++ b/src/coll_def.h @@ -44,7 +44,7 @@ extern const char *coll_type_strs[]; /** Maximal length of locale name. */ enum { - COLL_LOCALE_LEN_MAX = 1024, + COLL_LOCALE_LEN_MAX = 30, }; /* diff --git a/test/sql-tap/collation-2.test.lua b/test/sql-tap/collation-2.test.lua new file mode 100755 index 000000000..58b6a1ef1 --- /dev/null +++ b/test/sql-tap/collation-2.test.lua @@ -0,0 +1,143 @@ +#!/usr/bin/env tarantool +test = require("sqltester") +test:plan(10 * 4) + +local prefix = "unicode-collation-" + +local function insert_into_table(tbl_name, data) + local sql = string.format("INSERT INTO %s VALUES ", tbl_name) + --local values = {} + for _, item in ipairs(data) do + local value = "('"..item.."')" + local e = sql .. value + box.sql.execute(e) + end +end + + +local collation_entries = +{ + { -- Afrikaans case sensitive + "unicode_af_s3", + {"a","A","á","Á","â","Â","b","B","c","C","d","D","e","E","é","É", + "è","È","ê","Ê","ë","Ë","f","F","g","G","h","H","i","I","î","Î", + "ï","Ï","j","J","k","K","l","L","m","M","n","N","ʼn","o","O", + "ô","Ô","ö","Ö","p","P","q","Q","r","R","s","S","t","T","u","U", + "û","Û","v","V","w","W","x","X","y","Y","z","Z"}}, + { + -- Amharic + "unicode_am_s3", + {"ሀ","ሁ","ሂ","ሃ","ሄ","ህ","ሆ","ለ","ሉ","ሊ","ላ","ሌ","ል","ሎ","ሏ","ሐ", + "ሑ","ሒ","ሓ","ሔ","ሕ","ሖ","ሗ","መ","ሙ","ሚ","ማ","ሜ","ም","ሞ", + "ሟ","ሠ","ሡ","ሢ","ሣ","ሤ","ሥ","ሦ","ሧ","ረ","ሩ","ሪ","ራ","ሬ","ር", + "ሮ","ሯ","ሰ","ሱ","ሲ","ሳ","ሴ","ስ","ሶ","ሷ","ሸ","ሹ","ሺ","ሻ","ሼ","ሽ", + "ሾ","ሿ","ቀ","ቁ","ቂ","ቃ","ቄ","ቅ","ቆ","ቈ","ቊ","ቋ","ቌ","ቍ","በ", + "ቡ","ቢ","ባ","ቤ","ብ","ቦ","ቧ","ቨ","ቩ","ቪ","ቫ","ቬ","ቭ","ቮ","ቯ", + "ተ","ቱ","ቲ","ታ","ቴ","ት","ቶ","ቷ","ቸ","ቹ","ቺ","ቻ","ቼ","ች","ቾ", + "ቿ","ኀ","ኁ","ኂ","ኃ","ኄ","ኅ","ኆ","ኈ","ኊ","ኋ","ኌ","ኍ","ነ","ኑ", + "ኒ","ና","ኔ","ን","ኖ","ኗ","ኘ","ኙ","ኚ","ኛ","ኜ","ኝ","ኞ","ኟ","አ","ኡ", + "ኢ","ኣ","ኤ","እ","ኦ","ኧ","ከ","ኩ","ኪ","ካ","ኬ","ክ","ኮ","ኰ","ኲ", + "ኳ","ኴ","ኵ","ኸ","ኹ","ኺ","ኻ","ኼ","ኽ","ኾ","ወ","ዉ","ዊ","ዋ","ዌ", + "ው","ዎ","ዐ","ዑ","ዒ","ዓ","ዔ","ዕ","ዖ","ዘ","ዙ","ዚ","ዛ","ዜ","ዝ", + "ዞ","ዟ","ዠ","ዡ","ዢ","ዣ","ዤ","ዥ","ዦ","ዧ","የ","ዩ","ዪ","ያ", + "ዬ","ይ","ዮ","ደ","ዱ","ዲ","ዳ","ዴ","ድ","ዶ","ዷ","ጀ","ጁ","ጂ","ጃ", + "ጄ","ጅ","ጆ","ጇ","ገ","ጉ","ጊ","ጋ","ጌ","ግ","ጎ","ጐ","ጒ","ጓ","ጔ", + "ጕ","ጠ","ጡ","ጢ","ጣ","ጤ","ጥ","ጦ","ጧ","ጨ","ጩ","ጪ","ጫ","ጬ", + "ጭ","ጮ","ጯ","ጰ","ጱ","ጲ","ጳ","ጴ","ጵ","ጶ","ጷ","ጸ","ጹ","ጺ","ጻ", + "ጼ","ጽ","ጾ","ጿ","ፀ","ፁ","ፂ","ፃ","ፄ","ፅ","ፆ","ፈ","ፉ","ፊ","ፋ","ፌ", + "ፍ","ፎ","ፏ","ፐ","ፑ","ፒ","ፓ","ፔ","ፕ","ፖ","ፗ"}}, + { + -- Assamese + "unicode_as_s3", + {"়","অ","আ","ই","ঈ","উ","ঊ","ঋ","এ","ঐ","ও","ঔ","ং ","ঁ ","ঃ ", + "ক","খ","গ","ঘ","ঙ","চ","ছ","জ","ঝ","ঞ","ট","ঠ","ড","ড়","ঢ","ঢ়", + "ণ","ৎ ","ত","থ","দ","ধ","ন","প","ফ","ব","ভ","ম","য","য়","ৰ", + "ল","ৱ","শ","ষ","স","হ","ক্ষ ","া","ি","ী","ু","ূ","ৃ","ে","ৈ", + "ো","ৌ","্"}}, + + { + -- Azerbaijani + "unicode_az_s3", + {"a ","A ","b ","B ","c ","C ","ç ","Ç ","ḉ ","Ḉ ","d ","D ","e ", + "E ","ə ","Ə ","f ","F ","g ","G ","ğ ","Ğ ","ģ̆ ","Ģ̆ ","h ", + "H ","x ","X ","ẍ ","Ẍ ","ẋ ","Ẋ ","ı ","I ","Í ","Ì ","Ĭ ", + "Î ","Ǐ ","Ï ","Ḯ ","Ĩ ","Į ","Ī ","Ỉ ","Ȉ ","Ȋ ","Ị ","Ḭ ", + "i ","İ ","Į̇ ","Ị̇ ","Ḭ̇ ","j ","J ","k ","K ","q ","Q ","l ", + "L ","m ","M ","n ","N ","o ","O ","ö ","Ö ","ǫ̈ ","Ǫ̈ ","ȫ ", + "Ȫ ","ơ̈ ","Ơ̈ ","ợ̈ ","Ợ̈ ","ọ̈ ","Ọ̈ ","p ","P ","r ","R ","s ", + "S ","ş ","Ş ","t ","T ","u ","U ","ü ","Ü ","ǘ ","Ǘ ","ǜ ", + "Ǜ ","ǚ ","Ǚ ","ų̈ ","Ų̈ ","ǖ ","Ǖ ","ư̈ ","Ư̈ ","ự̈ ","Ự̈ ","ụ̈ ", + "Ụ̈ ","ṳ̈ ","Ṳ̈ ","ṷ̈ ","Ṷ̈ ","ṵ̈ ","Ṵ̈ ","v ","V ","y ","Y ","z ", + "Z ","Ẉ","w ","W ","ẃ ","Ẃ ","ẁ ","Ẁ ","ŵ ","Ŵ ","ẘ ","ẅ ","Ẅ ", + "ẇ ","Ẇ ","ẉ "}}, + { + -- Belarusian + "unicode_be_s3", + {"а","А","б","Б","в","ᲀ","В","г","Г","д","ᲁ","Д","дж","дз","е", + "Е","ё","Ё","ж","Ж","з","З","і","І","й","Й","к","К","л", + "Л","м","М","н","Н","о","ᲂ","О","п","П","р","Р","с","ᲃ", + "С","т","Т","у","У","ў","Ў","ф","Ф","х","Х","ц", + "Ц","ч","Ч","ш","Ш","ы","Ы","ь","Ь","э","Э","ю","Ю","я","Я"}}, + { + -- Kyrgyz + "unicode_ky_s3", + {"а","А","б","Б","г","Г","д","ᲁ","Д","е","Е","ё","Ё","ж","Ж", + "з","З","и","И","й","Й","к","К","л","Л","м","М","н","Н","ң","Ң", + "о","ᲂ","О","ө","Ө","п","П","р","Р","с","ᲃ","С","т","ᲄ", + "Т","у","У","ү","Ү","х","Х","ч","Ч","ш","Ш","ъ","ᲆ","Ъ","ы","Ы", + "э","Э","ю","Ю","я","Я"}}, + { + -- Kyrgyz (russian codepage) + "unicode_ky_s3", + {"а","А","б","Б","в","В","г","Г","д","Д","е","Е","ё","Ё","ж","Ж", + "з","З","и","И","й","Й","к","К","л","Л","м","М","н","Н", + "о","О","п","П","р","Р","с","С","т","Т","у","У","ф","Ф", + "х","Х","ц","Ц","ч","Ч","ш","Ш","щ","Щ","ъ","Ъ","ы","Ы", + "ь","Ь","э","Э","ю","Ю","я","Я"}}, + { + -- German (umlaut as 'ae', 'oe', 'ue') + "unicode_de__phonebook_s3", + {"a","A","ä","ǟ","Ǟ","ą̈","Ą̈","ạ̈","Ạ̈","ḁ̈","Ḁ̈","Ä ","b","B","c","C", + "d","D","e","E","f","F","g","G","h","H","i","I","j","J", + "k","K","l","L","m","M","n","N","o","O","ȫ","Ȫ","ǫ̈","Ǫ̈", + "ơ̈","Ơ̈","ợ̈","Ợ̈","ọ̈","Ọ̈","ö ","Ö ","p","P","q","Q","r","R", + "s","S","ss","ß","t","T","u","U","ǘ","Ǘ","ǜ","Ǜ","ǚ","Ǚ", + "ǖ","Ǖ","ų̈","Ų̈","ư̈","Ư̈","ự̈","Ự̈","ụ̈","Ụ̈","ṳ̈","Ṳ̈","ṷ̈","Ṷ̈", + "ṵ̈","Ṵ̈","ü ","Ü ","v","V","w","W","x","X","y","Y","z","Z"}}, + { + -- Hebrew + "unicode_he_s3", + {"׳","״","א","ב","ג","ד","ה","ו","ז","ח","ט","י","כ", + "ך","ל","מ","ם","נ","ן","ס","ע","פ","ף","צ","ץ", + "ק","ר","ש","ת"} }, + { + -- Japanese + "unicode_ja_s3", + {"幸","広","庚","康","弘","恒","慌","抗","拘","控","攻","港", + "溝","甲","皇","硬","稿"}} +} + +for _, test_entry in ipairs(collation_entries) do + -- create title + local extendex_prefix = string.format("%s1.%s.", prefix, test_entry[1]) + + test:do_execsql_test( + extendex_prefix.."create_table", + string.format("create table t1(a char(5) collate \"%s\" primary key);", test_entry[1]), + {}) + test:do_test( + extendex_prefix.."insert_values", + function() + return insert_into_table("t1", test_entry[2]) + end, {}) + test:do_execsql_test( + extendex_prefix.."select", + string.format("select a from t1 order by a"), + test_entry[2]) + test:do_execsql_test( + extendex_prefix.."drop_table", + "drop table t1", + {}) +end + +test:finish_test() -- 2.17.1