Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: kostja@tarantool.org
Subject: [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua
Date: Thu, 26 Apr 2018 02:29:07 +0300	[thread overview]
Message-ID: <44bf51af12117fba11a558b94bc4a50b37cfbfcf.1524698920.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1524698920.git.v.shpilevoy@tarantool.org>
In-Reply-To: <cover.1524698920.git.v.shpilevoy@tarantool.org>

Lua has no built-in way to correctly compare unicode strings. But
Tarantool links with ICU, so lets expose its collators into Lua.
They are now out of box, and can be used in common libraries.

Follow up #3290
---
 extra/exports                |  2 ++
 src/CMakeLists.txt           |  2 +-
 src/lua/string.lua           | 35 +++++++++++++++++++++++++++++++++++
 src/util.c                   | 31 +++++++++++++++++++++++++++++++
 test/app-tap/string.test.lua | 18 +++++++++++++++++-
 test/box/ddl.result          | 15 +++++++++++++++
 test/box/ddl.test.lua        |  8 ++++++++
 7 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/extra/exports b/extra/exports
index b0480fe79..efcc3011c 100644
--- a/extra/exports
+++ b/extra/exports
@@ -41,6 +41,8 @@ title_get_status
 exception_get_string
 exception_get_int
 u_count
+u_compare
+u_icompare
 
 tarantool_lua_ibuf
 uuid_nil
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1032edc57..0ca41cfaf 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -114,7 +114,7 @@ endif ()
 
 add_library(core STATIC ${core_sources})
 target_link_libraries(core
-    salad small pthread
+    salad small pthread misc
     ${LIBEV_LIBRARIES}
     ${LIBEIO_LIBRARIES}
     ${LIBCORO_LIBRARIES}
diff --git a/src/lua/string.lua b/src/lua/string.lua
index 6c566cb54..ce12c3f5d 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -32,6 +32,12 @@ ffi.cdef[[
 
     int
     u_count(const char *s, int bsize, uint8_t flags);
+
+    int
+    u_compare(const char *s1, size_t len1, const char *s2, size_t len2);
+
+    int
+    u_icompare(const char *s1, size_t len1, const char *s2, size_t len2);
 ]]
 
 local c_char_ptr = ffi.typeof('const char *')
@@ -503,6 +509,33 @@ local function string_u_count(inp, opts)
     end
 end
 
+--
+-- Compare two UTF8 strings.
+-- @param inp1 First string.
+-- @param inp2 Second string.
+-- @param func Comparator - case sensitive or insensitive.
+-- @param usage Error on incorrect usage.
+-- @retval  <0 inp1 < inp2
+-- @retval  >0 inp1 > inp2
+-- @retval ==0 inp1 == inp2
+--
+local function string_u_compare_impl(inp1, inp2, func, usage)
+    if type(inp1) ~= 'string' or type(inp2) ~= 'string' then
+        error(usage)
+    end
+    return func(c_char_ptr(inp1), #inp1, c_char_ptr(inp2), #inp2)
+end
+
+local function string_u_compare(inp1, inp2)
+    return string_u_compare_impl(inp1, inp2, ffi.C.u_compare,
+                                 'Usage: string.u_compare(<string>, <string>)')
+end
+
+local function string_u_icompare(inp1, inp2)
+    return string_u_compare_impl(inp1, inp2, ffi.C.u_icompare,
+                                 'Usage: string.u_icompare(<string>, <string>)')
+end
+
 -- It'll automatically set string methods, too.
 local string = require('string')
 string.split      = string_split
@@ -518,3 +551,5 @@ string.rstrip      = string_rstrip
 string.u_upper    = string_u_upper
 string.u_lower    = string_u_lower
 string.u_count    = string_u_count
+string.u_compare  = string_u_compare
+string.u_icompare = string_u_icompare
diff --git a/src/util.c b/src/util.c
index c117dee05..0f4d89b71 100644
--- a/src/util.c
+++ b/src/util.c
@@ -45,6 +45,7 @@
 #include <msgpuck/msgpuck.h> /* mp_char2escape[] table */
 
 #include "say.h"
+#include "coll_cache.h"
 
 /** Find a string in an array of strings.
  *
@@ -367,3 +368,33 @@ u_count(const char *s, int bsize, uint8_t flags)
 	}
 	return len;
 }
+
+/**
+ * Compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_compare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+	struct coll *coll = coll_by_id(COLLATION_ID_UNICODE);
+	return coll->cmp(s1, len1, s2, len2, coll);
+}
+
+/**
+ * Case insensitive compare two UTF8 strings.
+ * @param s1 First string.
+ * @param len1 Binary size of @a s1.
+ * @param s2 Second string.
+ * @param len2 Binary size of @a s2.
+ * @retval Same as strcmp.
+ */
+int
+u_icompare(const char *s1, size_t len1, const char *s2, size_t len2)
+{
+	struct coll *coll = coll_by_id(COLLATION_ID_UNICODE_CI);
+	return coll->cmp(s1, len1, s2, len2, coll);
+}
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 650a5982d..f357304a0 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -115,7 +115,7 @@ test:test("hex", function(test)
 end)
 
 test:test("unicode", function(test)
-    test:plan(24)
+    test:plan(37)
     local str = 'хеЛлоу вОрЛд ё Ё я Я э Э ъ Ъ hElLo WorLd 1234 i I İ 勺#☢༺'
     local upper_res = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 I I İ 勺#☢༺'
     local upper_turkish = 'ХЕЛЛОУ ВОРЛД Ё Ё Я Я Э Э Ъ Ъ HELLO WORLD 1234 İ I İ 勺#☢༺'
@@ -164,6 +164,22 @@ test:test("unicode", function(test)
     test:is(string.u_count(str, {digit = true}), 4, 'option digit')
     test:is(string.u_count(str, {digit = true, upper = true}), 17,
             'options digit and upper')
+    -- Test compare.
+    local s1 = '☢'
+    local s2 = 'İ'
+    test:is(s1 < s2, false, 'test binary cmp')
+    test:is(string.u_compare(s1, s2) < 0, true, 'test unicode <')
+    test:is(string.u_compare(s1, s1) == 0, true, 'test unicode eq')
+    test:is(string.u_compare(s2, s1) > 0, true, 'test unicode >')
+    test:is(string.u_icompare('a', 'A') == 0, true, 'test icase ==')
+    test:is(string.u_icompare('b', 'A') > 0, true, 'test icase >, first')
+    test:is(string.u_icompare('B', 'a') > 0, true, 'test icase >, second >')
+    test:is(string.u_compare('', '') == 0, true, 'test empty compare')
+    test:is(string.u_compare('', 'a') < 0, true, 'test left empty compare')
+    test:is(string.u_compare('a', '') > 0, true, 'test right empty compare')
+    test:is(string.u_icompare('', '') == 0, true, 'test empty icompare')
+    test:is(string.u_icompare('', 'a') < 0, true, 'test left empty icompare')
+    test:is(string.u_icompare('a', '') > 0, true, 'test right empty icompare')
 end)
 
 test:test("strip", function(test)
diff --git a/test/box/ddl.result b/test/box/ddl.result
index 87b9581c6..a5e3d7206 100644
--- a/test/box/ddl.result
+++ b/test/box/ddl.result
@@ -500,6 +500,21 @@ box.space._collation.index.name:delete{'test'}
 - [3, 'test', 0, 'ICU', 'ru_RU', {}]
 ...
 --
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+---
+...
+string.u_compare('abc', 'def')
+---
+- -1
+...
+box.space._collation:replace(t)
+---
+- [1, 'unicode', 1, 'ICU', '', {}]
+...
+--
 -- gh-2839: allow to store custom fields in field definition.
 --
 format = {}
diff --git a/test/box/ddl.test.lua b/test/box/ddl.test.lua
index a1502ae13..9e4577069 100644
--- a/test/box/ddl.test.lua
+++ b/test/box/ddl.test.lua
@@ -191,6 +191,14 @@ test_run:cmd('restart server default')
 box.space._collation:select{}
 box.space._collation.index.name:delete{'test'}
 
+--
+-- gh-3290: expose ICU into Lua. It uses built-in collations, that
+-- must work even if a collation is deleted from _collation.
+--
+t = box.space._collation:delete{1}
+string.u_compare('abc', 'def')
+box.space._collation:replace(t)
+
 --
 -- gh-2839: allow to store custom fields in field definition.
 --
-- 
2.15.1 (Apple Git-101)

  parent reply	other threads:[~2018-04-25 23:29 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-25 23:29 [tarantool-patches] [PATCH 0/7] Expose ICU " Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 1/7] lua: expose ICU upper/lower functions to Lua Vladislav Shpilevoy
2018-04-28  0:56   ` [tarantool-patches] " Alexander Turenko
2018-04-25 23:29 ` [tarantool-patches] [PATCH 2/7] lua: implement string.u_count Vladislav Shpilevoy
2018-04-26 10:36   ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-26 16:07   ` Vladislav Shpilevoy
2018-04-26 23:57   ` Vladislav Shpilevoy
2018-04-28  1:10   ` Alexander Turenko
2018-04-25 23:29 ` [tarantool-patches] [PATCH 3/7] alter: fix assertion in collations alter Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 4/7] Move struct on_access_denied_ctx into error.h Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 5/7] Merge box_error, stat and collations into core library Vladislav Shpilevoy
2018-04-25 23:29 ` [tarantool-patches] [PATCH 6/7] Always store built-in collations in the cache Vladislav Shpilevoy
2018-04-25 23:29 ` Vladislav Shpilevoy [this message]
2018-04-28  1:55 ` [tarantool-patches] Re: [PATCH 0/7] Expose ICU into Lua Alexander Turenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44bf51af12117fba11a558b94bc4a50b37cfbfcf.1524698920.git.v.shpilevoy@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox