From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 4529C22FF3 for ; Wed, 25 Apr 2018 19:29:15 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id zO67ZmHdM4HK for ; Wed, 25 Apr 2018 19:29:15 -0400 (EDT) Received: from smtp49.i.mail.ru (smtp49.i.mail.ru [94.100.177.109]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 0106E22F60 for ; Wed, 25 Apr 2018 19:29:15 -0400 (EDT) From: Vladislav Shpilevoy Subject: [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua Date: Thu, 26 Apr 2018 02:29:07 +0300 Message-Id: <44bf51af12117fba11a558b94bc4a50b37cfbfcf.1524698920.git.v.shpilevoy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 In-Reply-To: References: Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Lua has no built-in way to correctly compare unicode strings. But Tarantool links with ICU, so lets expose its collators into Lua. They are now out of box, and can be used in common libraries. Follow up #3290 --- extra/exports | 2 ++ src/CMakeLists.txt | 2 +- src/lua/string.lua | 35 +++++++++++++++++++++++++++++++++++ src/util.c | 31 +++++++++++++++++++++++++++++++ test/app-tap/string.test.lua | 18 +++++++++++++++++- test/box/ddl.result | 15 +++++++++++++++ test/box/ddl.test.lua | 8 ++++++++ 7 files changed, 109 insertions(+), 2 deletions(-) diff --git a/extra/exports b/extra/exports index b0480fe79..efcc3011c 100644 --- a/extra/exports +++ b/extra/exports @@ -41,6 +41,8 @@ title_get_status exception_get_string exception_get_int u_count +u_compare +u_icompare tarantool_lua_ibuf uuid_nil diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1032edc57..0ca41cfaf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -114,7 +114,7 @@ endif () add_library(core STATIC ${core_sources}) target_link_libraries(core - salad small pthread + salad small pthread misc ${LIBEV_LIBRARIES} ${LIBEIO_LIBRARIES} ${LIBCORO_LIBRARIES} diff --git a/src/lua/string.lua b/src/lua/string.lua index 6c566cb54..ce12c3f5d 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -32,6 +32,12 @@ ffi.cdef[[ int u_count(const char *s, int bsize, uint8_t flags); + + int + u_compare(const char *s1, size_t len1, const char *s2, size_t len2); + + int + u_icompare(const char *s1, size_t len1, const char *s2, size_t len2); ]] local c_char_ptr = ffi.typeof('const char *') @@ -503,6 +509,33 @@ local function string_u_count(inp, opts) end end +-- +-- Compare two UTF8 strings. +-- @param inp1 First string. +-- @param inp2 Second string. +-- @param func Comparator - case sensitive or insensitive. +-- @param usage Error on incorrect usage. +-- @retval <0 inp1 < inp2 +-- @retval >0 inp1 > inp2 +-- @retval ==0 inp1 == inp2 +-- +local function string_u_compare_impl(inp1, inp2, func, usage) + if type(inp1) ~= 'string' or type(inp2) ~= 'string' then + error(usage) + end + return func(c_char_ptr(inp1), #inp1, c_char_ptr(inp2), #inp2) +end + +local function string_u_compare(inp1, inp2) + return string_u_compare_impl(inp1, inp2, ffi.C.u_compare, + 'Usage: string.u_compare(, )') +end + +local function string_u_icompare(inp1, inp2) + return string_u_compare_impl(inp1, inp2, ffi.C.u_icompare, + 'Usage: string.u_icompare(, )') +end + -- It'll automatically set string methods, too. local string = require('string') string.split = string_split @@ -518,3 +551,5 @@ string.rstrip = string_rstrip string.u_upper = string_u_upper string.u_lower = string_u_lower string.u_count = string_u_count +string.u_compare = string_u_compare +string.u_icompare = string_u_icompare diff --git a/src/util.c b/src/util.c index c117dee05..0f4d89b71 100644 --- a/src/util.c +++ b/src/util.c @@ -45,6 +45,7 @@ #include /* mp_char2escape[] table */ #include "say.h" +#include "coll_cache.h" /** Find a string in an array of strings. * @@ -367,3 +368,33 @@ u_count(const char *s, int bsize, uint8_t flags) } return len; } + +/** + * Compare two UTF8 strings. + * @param s1 First string. + * @param len1 Binary size of @a s1. + * @param s2 Second string. + * @param len2 Binary size of @a s2. + * @retval Same as strcmp. + */ +int +u_compare(const char *s1, size_t len1, const char *s2, size_t len2) +{ + struct coll *coll = coll_by_id(COLLATION_ID_UNICODE); + return coll->cmp(s1, len1, s2, len2, coll); +} + +/** + * Case insensitive compare two UTF8 strings. + * @param s1 First string. + * @param len1 Binary size of @a s1. + * @param s2 Second string. + * @param len2 Binary size of @a s2. + * @retval Same as strcmp. + */ +int +u_icompare(const char *s1, size_t len1, const char *s2, size_t len2) +{ + struct coll *coll = coll_by_id(COLLATION_ID_UNICODE_CI); + return coll->cmp(s1, len1, s2, len2, coll); +} diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua index 650a5982d..f357304a0 100755 --- a/test/app-tap/string.test.lua +++ b/test/app-tap/string.test.lua @@ -115,7 +115,7 @@ test:test("hex", function(test) end) test:test("unicode", function(test) - test:plan(24) + test:plan(37) local str = 'хеЛлоу вОрЛд ё Ё я Я э Э ъ Ъ hElLo WorLd 1234 i I İ 勺#☢༺' local upper_res = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 I I İ 勺#☢༺' local upper_turkish = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 İ I İ 勺#☢༺' @@ -164,6 +164,22 @@ test:test("unicode", function(test) test:is(string.u_count(str, {digit = true}), 4, 'option digit') test:is(string.u_count(str, {digit = true, upper = true}), 17, 'options digit and upper') + -- Test compare. + local s1 = '☢' + local s2 = 'İ' + test:is(s1 < s2, false, 'test binary cmp') + test:is(string.u_compare(s1, s2) < 0, true, 'test unicode <') + test:is(string.u_compare(s1, s1) == 0, true, 'test unicode eq') + test:is(string.u_compare(s2, s1) > 0, true, 'test unicode >') + test:is(string.u_icompare('a', 'A') == 0, true, 'test icase ==') + test:is(string.u_icompare('b', 'A') > 0, true, 'test icase >, first') + test:is(string.u_icompare('B', 'a') > 0, true, 'test icase >, second >') + test:is(string.u_compare('', '') == 0, true, 'test empty compare') + test:is(string.u_compare('', 'a') < 0, true, 'test left empty compare') + test:is(string.u_compare('a', '') > 0, true, 'test right empty compare') + test:is(string.u_icompare('', '') == 0, true, 'test empty icompare') + test:is(string.u_icompare('', 'a') < 0, true, 'test left empty icompare') + test:is(string.u_icompare('a', '') > 0, true, 'test right empty icompare') end) test:test("strip", function(test) diff --git a/test/box/ddl.result b/test/box/ddl.result index 87b9581c6..a5e3d7206 100644 --- a/test/box/ddl.result +++ b/test/box/ddl.result @@ -500,6 +500,21 @@ box.space._collation.index.name:delete{'test'} - [3, 'test', 0, 'ICU', 'ru_RU', {}] ... -- +-- gh-3290: expose ICU into Lua. It uses built-in collations, that +-- must work even if a collation is deleted from _collation. +-- +t = box.space._collation:delete{1} +--- +... +string.u_compare('abc', 'def') +--- +- -1 +... +box.space._collation:replace(t) +--- +- [1, 'unicode', 1, 'ICU', '', {}] +... +-- -- gh-2839: allow to store custom fields in field definition. -- format = {} diff --git a/test/box/ddl.test.lua b/test/box/ddl.test.lua index a1502ae13..9e4577069 100644 --- a/test/box/ddl.test.lua +++ b/test/box/ddl.test.lua @@ -191,6 +191,14 @@ test_run:cmd('restart server default') box.space._collation:select{} box.space._collation.index.name:delete{'test'} +-- +-- gh-3290: expose ICU into Lua. It uses built-in collations, that +-- must work even if a collation is deleted from _collation. +-- +t = box.space._collation:delete{1} +string.u_compare('abc', 'def') +box.space._collation:replace(t) + -- -- gh-2839: allow to store custom fields in field definition. -- -- 2.15.1 (Apple Git-101)