[tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Apr 26 02:29:07 MSK 2018


Lua has no built-in way to correctly compare unicode strings. But
Tarantool links with ICU, so lets expose its collators into Lua.
They are now out of box, and can be used in common libraries.

Follow up #3290
---
 extra/exports                |  2 ++
 src/CMakeLists.txt           |  2 +-
 src/lua/string.lua           | 35 +++++++++++++++++++++++++++++++++++
 src/util.c                   | 31 +++++++++++++++++++++++++++++++
 test/app-tap/string.test.lua | 18 +++++++++++++++++-
 test/box/ddl.result          | 15 +++++++++++++++
 test/box/ddl.test.lua        |  8 ++++++++
 7 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/extra/exports b/extra/exports
index b0480fe79..efcc3011c 100644
--- a/extra/exports
+++ b/extra/exports
@@ -41,6 +41,8 @@ title_get_status
 exception_get_string
 exception_get_int
 u_count
+u_compare
+u_icompare
 
 tarantool_lua_ibuf
 uuid_nil
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1032edc57..0ca41cfaf 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -114,7 +114,7 @@ endif ()
 
 add_library(core STATIC ${core_sources})
 target_link_libraries(core
-    salad small pthread
+    salad small pthread misc
     ${LIBEV_LIBRARIES}
     ${LIBEIO_LIBRARIES}
     ${LIBCORO_LIBRARIES}
diff --git a/src/lua/string.lua b/src/lua/string.lua
index 6c566cb54..ce12c3f5d 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -32,6 +32,12 @@ ffi.cdef[[
 
     int
     u_count(const char *s, int bsize, uint8_t flags);
+
+    int
+    u_compare(const char *s1, size_t len1, const char *s2, size_t len2);
+
+    int
+    u_icompare(const char *s1, size_t len1, const char *s2, size_t len2);
 ]]
 
 local c_char_ptr = ffi.typeof('const char *')
@@ -503,6 +509,33 @@ local function string_u_count(inp, opts)
     end
 end
 
+--
+-- Compare two UTF8 strings.
+-- @param inp1 First string.
+-- @param inp2 Second string.
+-- @param func Comparator - case sensitive or insensitive.
+-- @param usage Error on incorrect usage.
+-- @retval  <0 inp1 < inp2
+-- @retval  >0 inp1 > inp2
+-- @retval ==0 inp1 == inp2
+--
+local function string_u_compare_impl(inp1, inp2, func, usage)
+    if type(inp1) ~= 'string' or type(inp2) ~= 'string' then
+        error(usage)
+    end
+    return func(c_char_ptr(inp1), #inp1, c_char_ptr(inp2), #inp2)
+end
+
+local function string_u_compare(inp1, inp2)
+    return string_u_compare_impl(inp1, inp2, ffi.C.u_compare,
+                                 'Usage: string.u_compare(<string>, <string>)')
+end
+
+local function string_u_icompare(inp1, inp2)
+    return string_u_compare_impl(inp1, inp2, ffi.C.u_icompare,
+                                 'Usage: string.u_icompare(<string>, <string>)')
+end
+
 -- It'll automatically set string methods, too.
 local string = require('string')
 string.split      = string_split
@@ -518,3 +551,5 @@ string.rstrip      = string_rstrip
 string.u_upper    = string_u_upper
 string.u_lower    = string_u_lower
 string.u_count    = string_u_count
+string.u_compare  = string_u_compare
+string.u_icompare = string_u_icompare
diff --git a/src/util.c b/src/util.c
index c117dee05..0f4d89b71 100644
--- a/src/util.c
+++ b/src/util.c
@@ -45,6 +45,7 @@
 #include <msgpuck/msgpuck.h> /* mp_char2escape[] table */
 
 #include "say.h"
+#include "coll_cache.h"
 
 /** Find a string in an array of strings.
  *
@@ -367,3 +368,33 @@ u_count(const char *s, int bsize, uint8_t flags)
 	}
 	return len;
 }
+
+/**
+ * Compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_compare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+	struct coll *coll = coll_by_id(COLLATION_ID_UNICODE);
+	return coll->cmp(s1, len1, s2, len2, coll);
+}
+
+/**
+ * Case insensitive compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_icompare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+	struct coll *coll = coll_by_id(COLLATION_ID_UNICODE_CI);
+	return coll->cmp(s1, len1, s2, len2, coll);
+}
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 650a5982d..f357304a0 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -115,7 +115,7 @@ test:test("hex", function(test)
 end)
 
 test:test("unicode", function(test)
-    test:plan(24)
+    test:plan(37)
     local str = 'хеЛлоу вОрЛд ё Ё я Я э Э ъ Ъ hElLo WorLd 1234 i I İ 勺#☢༺'
     local upper_res = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 I I İ 勺#☢༺'
     local upper_turkish = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 İ I İ 勺#☢༺'
@@ -164,6 +164,22 @@ test:test("unicode", function(test)
     test:is(string.u_count(str, {digit = true}), 4, 'option digit')
     test:is(string.u_count(str, {digit = true, upper = true}), 17,
             'options digit and upper')
+    -- Test compare.
+    local s1 = '☢'
+    local s2 = 'İ'
+    test:is(s1 < s2, false, 'test binary cmp')
+    test:is(string.u_compare(s1, s2) < 0, true, 'test unicode <')
+    test:is(string.u_compare(s1, s1) == 0, true, 'test unicode eq')
+    test:is(string.u_compare(s2, s1) > 0, true, 'test unicode >')
+    test:is(string.u_icompare('a', 'A') == 0, true, 'test icase ==')
+    test:is(string.u_icompare('b', 'A') > 0, true, 'test icase >, first')
+    test:is(string.u_icompare('B', 'a') > 0, true, 'test icase >, second >')
+    test:is(string.u_compare('', '') == 0, true, 'test empty compare')
+    test:is(string.u_compare('', 'a') < 0, true, 'test left empty compare')
+    test:is(string.u_compare('a', '') > 0, true, 'test right empty compare')
+    test:is(string.u_icompare('', '') == 0, true, 'test empty icompare')
+    test:is(string.u_icompare('', 'a') < 0, true, 'test left empty icompare')
+    test:is(string.u_icompare('a', '') > 0, true, 'test right empty icompare')
 end)
 
 test:test("strip", function(test)
diff --git a/test/box/ddl.result b/test/box/ddl.result
index 87b9581c6..a5e3d7206 100644
--- a/test/box/ddl.result
+++ b/test/box/ddl.result
@@ -500,6 +500,21 @@ box.space._collation.index.name:delete{'test'}
 - [3, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 --
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+---
+...
+string.u_compare('abc', 'def')
+---
+- -1
+...
+box.space._collation:replace(t)
+---
+- [1, 'unicode', 1, 'ICU', '', {}]
+...
+--
 -- gh-2839: allow to store custom fields in field definition.
 --
 format = {}
diff --git a/test/box/ddl.test.lua b/test/box/ddl.test.lua
index a1502ae13..9e4577069 100644
--- a/test/box/ddl.test.lua
+++ b/test/box/ddl.test.lua
@@ -191,6 +191,14 @@ test_run:cmd('restart server default')
 box.space._collation:select{}
 box.space._collation.index.name:delete{'test'}
 
+--
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+string.u_compare('abc', 'def')
+box.space._collation:replace(t)
+
 --
 -- gh-2839: allow to store custom fields in field definition.
 --
-- 
2.15.1 (Apple Git-101)





More information about the Tarantool-patches mailing list