From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: kostja@tarantool.org
Subject: [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua
Date: Thu, 26 Apr 2018 02:29:07 +0300 [thread overview]
Message-ID: <44bf51af12117fba11a558b94bc4a50b37cfbfcf.1524698920.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1524698920.git.v.shpilevoy@tarantool.org>
In-Reply-To: <cover.1524698920.git.v.shpilevoy@tarantool.org>
Lua has no built-in way to correctly compare unicode strings. But
Tarantool links with ICU, so lets expose its collators into Lua.
They are now out of box, and can be used in common libraries.
Follow up #3290
---
| 2 ++
src/CMakeLists.txt | 2 +-
src/lua/string.lua | 35 +++++++++++++++++++++++++++++++++++
src/util.c | 31 +++++++++++++++++++++++++++++++
test/app-tap/string.test.lua | 18 +++++++++++++++++-
test/box/ddl.result | 15 +++++++++++++++
test/box/ddl.test.lua | 8 ++++++++
7 files changed, 109 insertions(+), 2 deletions(-)
--git a/extra/exports b/extra/exports
index b0480fe79..efcc3011c 100644
--- a/extra/exports
+++ b/extra/exports
@@ -41,6 +41,8 @@ title_get_status
exception_get_string
exception_get_int
u_count
+u_compare
+u_icompare
tarantool_lua_ibuf
uuid_nil
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1032edc57..0ca41cfaf 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -114,7 +114,7 @@ endif ()
add_library(core STATIC ${core_sources})
target_link_libraries(core
- salad small pthread
+ salad small pthread misc
${LIBEV_LIBRARIES}
${LIBEIO_LIBRARIES}
${LIBCORO_LIBRARIES}
diff --git a/src/lua/string.lua b/src/lua/string.lua
index 6c566cb54..ce12c3f5d 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -32,6 +32,12 @@ ffi.cdef[[
int
u_count(const char *s, int bsize, uint8_t flags);
+
+ int
+ u_compare(const char *s1, size_t len1, const char *s2, size_t len2);
+
+ int
+ u_icompare(const char *s1, size_t len1, const char *s2, size_t len2);
]]
local c_char_ptr = ffi.typeof('const char *')
@@ -503,6 +509,33 @@ local function string_u_count(inp, opts)
end
end
+--
+-- Compare two UTF8 strings.
+-- @param inp1 First string.
+-- @param inp2 Second string.
+-- @param func Comparator - case sensitive or insensitive.
+-- @param usage Error on incorrect usage.
+-- @retval <0 inp1 < inp2
+-- @retval >0 inp1 > inp2
+-- @retval ==0 inp1 == inp2
+--
+local function string_u_compare_impl(inp1, inp2, func, usage)
+ if type(inp1) ~= 'string' or type(inp2) ~= 'string' then
+ error(usage)
+ end
+ return func(c_char_ptr(inp1), #inp1, c_char_ptr(inp2), #inp2)
+end
+
+local function string_u_compare(inp1, inp2)
+ return string_u_compare_impl(inp1, inp2, ffi.C.u_compare,
+ 'Usage: string.u_compare(<string>, <string>)')
+end
+
+local function string_u_icompare(inp1, inp2)
+ return string_u_compare_impl(inp1, inp2, ffi.C.u_icompare,
+ 'Usage: string.u_icompare(<string>, <string>)')
+end
+
-- It'll automatically set string methods, too.
local string = require('string')
string.split = string_split
@@ -518,3 +551,5 @@ string.rstrip = string_rstrip
string.u_upper = string_u_upper
string.u_lower = string_u_lower
string.u_count = string_u_count
+string.u_compare = string_u_compare
+string.u_icompare = string_u_icompare
diff --git a/src/util.c b/src/util.c
index c117dee05..0f4d89b71 100644
--- a/src/util.c
+++ b/src/util.c
@@ -45,6 +45,7 @@
#include <msgpuck/msgpuck.h> /* mp_char2escape[] table */
#include "say.h"
+#include "coll_cache.h"
/** Find a string in an array of strings.
*
@@ -367,3 +368,33 @@ u_count(const char *s, int bsize, uint8_t flags)
}
return len;
}
+
+/**
+ * Compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_compare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+ struct coll *coll = coll_by_id(COLLATION_ID_UNICODE);
+ return coll->cmp(s1, len1, s2, len2, coll);
+}
+
+/**
+ * Case insensitive compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_icompare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+ struct coll *coll = coll_by_id(COLLATION_ID_UNICODE_CI);
+ return coll->cmp(s1, len1, s2, len2, coll);
+}
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 650a5982d..f357304a0 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -115,7 +115,7 @@ test:test("hex", function(test)
end)
test:test("unicode", function(test)
- test:plan(24)
+ test:plan(37)
local str = 'хеЛлоу вОрЛд ё Ё я Я э Э ъ Ъ hElLo WorLd 1234 i I İ 勺#☢༺'
local upper_res = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 I I İ 勺#☢༺'
local upper_turkish = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 İ I İ 勺#☢༺'
@@ -164,6 +164,22 @@ test:test("unicode", function(test)
test:is(string.u_count(str, {digit = true}), 4, 'option digit')
test:is(string.u_count(str, {digit = true, upper = true}), 17,
'options digit and upper')
+ -- Test compare.
+ local s1 = '☢'
+ local s2 = 'İ'
+ test:is(s1 < s2, false, 'test binary cmp')
+ test:is(string.u_compare(s1, s2) < 0, true, 'test unicode <')
+ test:is(string.u_compare(s1, s1) == 0, true, 'test unicode eq')
+ test:is(string.u_compare(s2, s1) > 0, true, 'test unicode >')
+ test:is(string.u_icompare('a', 'A') == 0, true, 'test icase ==')
+ test:is(string.u_icompare('b', 'A') > 0, true, 'test icase >, first')
+ test:is(string.u_icompare('B', 'a') > 0, true, 'test icase >, second >')
+ test:is(string.u_compare('', '') == 0, true, 'test empty compare')
+ test:is(string.u_compare('', 'a') < 0, true, 'test left empty compare')
+ test:is(string.u_compare('a', '') > 0, true, 'test right empty compare')
+ test:is(string.u_icompare('', '') == 0, true, 'test empty icompare')
+ test:is(string.u_icompare('', 'a') < 0, true, 'test left empty icompare')
+ test:is(string.u_icompare('a', '') > 0, true, 'test right empty icompare')
end)
test:test("strip", function(test)
diff --git a/test/box/ddl.result b/test/box/ddl.result
index 87b9581c6..a5e3d7206 100644
--- a/test/box/ddl.result
+++ b/test/box/ddl.result
@@ -500,6 +500,21 @@ box.space._collation.index.name:delete{'test'}
- [3, 'test', 0, 'ICU', 'ru_RU', {}]
...
--
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+---
+...
+string.u_compare('abc', 'def')
+---
+- -1
+...
+box.space._collation:replace(t)
+---
+- [1, 'unicode', 1, 'ICU', '', {}]
+...
+--
-- gh-2839: allow to store custom fields in field definition.
--
format = {}
diff --git a/test/box/ddl.test.lua b/test/box/ddl.test.lua
index a1502ae13..9e4577069 100644
--- a/test/box/ddl.test.lua
+++ b/test/box/ddl.test.lua
@@ -191,6 +191,14 @@ test_run:cmd('restart server default')
box.space._collation:select{}
box.space._collation.index.name:delete{'test'}
+--
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+string.u_compare('abc', 'def')
+box.space._collation:replace(t)
+
--
-- gh-2839: allow to store custom fields in field definition.
--
--
2.15.1 (Apple Git-101)
next prev parent reply other threads:[~2018-04-25 23:29 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-25 23:29 [tarantool-patches] [PATCH 0/7] Expose ICU " Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 1/7] lua: expose ICU upper/lower functions to Lua Vladislav Shpilevoy
2018-04-28 0:56 ` [tarantool-patches] " Alexander Turenko
2018-04-25 23:29 ` [tarantool-patches] [PATCH 2/7] lua: implement string.u_count Vladislav Shpilevoy
2018-04-26 10:36 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-26 16:07 ` Vladislav Shpilevoy
2018-04-26 23:57 ` Vladislav Shpilevoy
2018-04-28 1:10 ` Alexander Turenko
2018-04-25 23:29 ` [tarantool-patches] [PATCH 3/7] alter: fix assertion in collations alter Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 4/7] Move struct on_access_denied_ctx into error.h Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 5/7] Merge box_error, stat and collations into core library Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 6/7] Always store built-in collations in the cache Vladislav Shpilevoy
2018-04-25 23:29 ` Vladislav Shpilevoy [this message]
2018-04-28 1:55 ` [tarantool-patches] Re: [PATCH 0/7] Expose ICU into Lua Alexander Turenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44bf51af12117fba11a558b94bc4a50b37cfbfcf.1524698920.git.v.shpilevoy@tarantool.org \
--to=v.shpilevoy@tarantool.org \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox