Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions
@ 2019-01-17 21:35 Michał Durak
  2019-01-21 18:14 ` [tarantool-patches] " Alexander Turenko
  0 siblings, 1 reply; 2+ messages in thread
From: Michał Durak @ 2019-01-17 21:35 UTC (permalink / raw)
  To: tarantool-patches

Add optional 'chars' parameter to string.strip, string.lstrip
and string.rstrip for specifying the unwanted characters.
Behavior modeled after the equivalent Python built-ins.

Needed for: #2977
---

FreeLists seems to redact the email addresses of senders using ProtonMail.
Please direct any replies to gdrbyko1[at]protonmail[dot]com
branch: https://github.com/gdrbyKo1/tarantool/tree/gdrbyko1/gh-2977
issue:  https://github.com/tarantool/tarantool/issues/2977

 extra/exports                |  1 +
 src/CMakeLists.txt           |  2 ++
 src/lua/string.lua           | 64 +++++++++++++++++++++++++++++++----
 src/tt_string.c              | 80 ++++++++++++++++++++++++++++++++++++++++++++
 src/tt_string.h              | 60 +++++++++++++++++++++++++++++++++
 test/app-tap/string.test.lua | 68 ++++++++++++++++++++++++++++++++-----
 6 files changed, 259 insertions(+), 16 deletions(-)
 create mode 100644 src/tt_string.c
 create mode 100644 src/tt_string.h

diff --git a/extra/exports b/extra/exports
index 5f69e0730..0b304e4b1 100644
--- a/extra/exports
+++ b/extra/exports
@@ -209,6 +209,7 @@ clock_realtime64
 clock_monotonic64
 clock_process64
 clock_thread64
+string_strip_helper

 # Lua / LuaJIT

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04de5ad04..5ff4094ee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -109,6 +109,7 @@ set (core_sources
      coll_def.c
      mpstream.c
      port.c
+     tt_string.c
  )

 if (TARGET_OS_NETBSD)
@@ -215,6 +216,7 @@ set(api_headers
     ${CMAKE_SOURCE_DIR}/src/box/lua/tuple.h
     ${CMAKE_SOURCE_DIR}/src/latch.h
     ${CMAKE_SOURCE_DIR}/src/clock.h
+    ${CMAKE_SOURCE_DIR}/src/tt_string.h
 )
 rebuild_module_api(${api_headers})

diff --git a/src/lua/string.lua b/src/lua/string.lua
index cbce26b35..35bd95343 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -6,15 +6,23 @@ ffi.cdef[[
            const char *needle,   size_t needle_len);
     int memcmp(const char *mem1, const char *mem2, size_t num);
     int isspace(int c);
+    void
+    string_strip_helper(const char *inp, unsigned long inp_len,
+                        const char *chars, unsigned long chars_len,
+                        bool lstrip, bool rstrip,
+                        unsigned long *newstart, unsigned long *newlen);
 ]]

-local c_char_ptr = ffi.typeof('const char *')
+local c_char_ptr     = ffi.typeof('const char *')
+local strip_newstart = ffi.new("unsigned long[1]")
+local strip_newlen   = ffi.new("unsigned long[1]")

 local memcmp  = ffi.C.memcmp
 local memmem  = ffi.C.memmem
 local isspace = ffi.C.isspace

 local err_string_arg = "bad argument #%d to '%s' (%s expected, got %s)"
+local space_chars    = ' \t\n\v\f\r'

 local function string_split_empty(inp, maxsplit)
     local p = c_char_ptr(inp)
@@ -339,25 +347,67 @@ local function string_fromhex(inp)
     return ffi.string(res, len)
 end

-local function string_strip(inp)
+local function string_strip(inp, chars)
     if type(inp) ~= 'string' then
         error(err_string_arg:format(1, "string.strip", 'string', type(inp)), 2)
     end
-    return (string.gsub(inp, "^%s*(.-)%s*$", "%1"))
+    if inp == '' then
+        return inp
+    end
+    if chars == nil then
+        chars = space_chars
+    elseif type(chars) ~= 'string' then
+        error(err_string_arg:format(2, "string.strip", 'string', type(chars)), 2)
+    elseif chars == '' then
+        return inp
+    end
+
+    local casted_inp = c_char_ptr(inp)
+    ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, true,
+                              strip_newstart, strip_newlen)
+    return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
 end

-local function string_lstrip(inp)
+local function string_lstrip(inp, chars)
     if type(inp) ~= 'string' then
         error(err_string_arg:format(1, "string.lstrip", 'string', type(inp)), 2)
     end
-    return (string.gsub(inp, "^%s*(.-)", "%1"))
+    if inp == '' then
+        return inp
+    end
+    if chars == nil then
+        chars = space_chars
+    elseif type(chars) ~= 'string' then
+        error(err_string_arg:format(2, "string.lstrip", 'string', type(chars)), 2)
+    elseif chars == '' then
+        return inp
+    end
+
+    local casted_inp = c_char_ptr(inp)
+    ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, false,
+                              strip_newstart, strip_newlen)
+    return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
 end

-local function string_rstrip(inp)
+local function string_rstrip(inp, chars)
     if type(inp) ~= 'string' then
         error(err_string_arg:format(1, "string.rstrip", 'string', type(inp)), 2)
     end
-    return (string.gsub(inp, "(.-)%s*$", "%1"))
+    if inp == '' then
+        return inp
+    end
+    if chars == nil then
+        chars = space_chars
+    elseif type(chars) ~= 'string' then
+        error(err_string_arg:format(2, "string.rstrip", 'string', type(chars)), 2)
+    elseif chars == '' then
+        return inp
+    end
+
+    local casted_inp = c_char_ptr(inp)
+    ffi.C.string_strip_helper(inp, #inp, chars, #chars, false, true,
+                              strip_newstart, strip_newlen)
+    return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
 end


diff --git a/src/tt_string.c b/src/tt_string.c
new file mode 100644
index 000000000..a3cd4ff94
--- /dev/null
+++ b/src/tt_string.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tt_string.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+        unsigned long i;
+        for (i = 0; i < inp_len; ++i) {
+                unsigned c = inp[i];
+                if (arr[c] == 0)
+                        break;
+        }
+        return i;
+}
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+        unsigned long i;
+        for (i = inp_len - 1; i != (unsigned long)(-1); --i) {
+                unsigned c = inp[i];
+                if (arr[c] == 0)
+                        break;
+        }
+        return inp_len - i - 1;
+}
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+		    unsigned long chars_len, bool lstrip, bool rstrip,
+                    unsigned long *newstart, unsigned long *newlen)
+{
+        unsigned long skipped_from_left = 0;
+        unsigned long skipped_from_right = 0;
+        uint8_t arr[256] = {0};
+        for (unsigned long i = 0; i < chars_len; ++i) {
+                unsigned c = chars[i];
+                arr[c] = 1;
+        }
+
+        if (lstrip)
+                skipped_from_left = string_lstrip_helper(inp, inp_len, arr);
+        if (rstrip && skipped_from_left < inp_len - 1)
+                skipped_from_right = string_rstrip_helper(inp, inp_len, arr);
+
+        *newstart = skipped_from_left;
+        *newlen = inp_len - skipped_from_left - skipped_from_right;
+}
diff --git a/src/tt_string.h b/src/tt_string.h
new file mode 100644
index 000000000..f82337c46
--- /dev/null
+++ b/src/tt_string.h
@@ -0,0 +1,60 @@
+#ifndef TARANTOOL_STRING_H_INCLUDED
+#define TARANTOOL_STRING_H_INCLUDED
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+/** \cond public */
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+		    unsigned long chars_len, bool lstrip, bool rstrip,
+                    unsigned long *newstart, unsigned long *newlen);
+
+/** \endcond public */
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_STRING_H_INCLUDED */
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 7203fcd36..f79d4fb62 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -134,18 +134,68 @@ test:test("fromhex", function(test)
 end)

 test:test("strip", function(test)
-    test:plan(6)
-    local str = "  hello hello "
-    test:is(string.len(string.strip(str)), 11, "strip")
-    test:is(string.len(string.lstrip(str)), 12, "lstrip")
-    test:is(string.len(string.rstrip(str)), 13, "rstrip")
+    test:plan(39)
+    local str = "  Hello world! "
+    test:is(string.strip(str), "Hello world!", "strip (without chars)")
+    test:is(string.lstrip(str), "Hello world! ", "lstrip (without chars)")
+    test:is(string.rstrip(str), "  Hello world!", "rstrip (without chars)")
+    str = ""
+    test:is(string.strip(str), str, "strip (0-len inp without chars)")
+    test:is(string.lstrip(str), str, "lstrip (0-len inp without chars)")
+    test:is(string.rstrip(str), str, "rstrip (0-len inp without chars)")
+    str = "\t\v"
+    test:is(string.strip(str), "", "strip (strip everything without chars)")
+    test:is(string.lstrip(str), "", "lstrip (strip everything without chars)")
+    test:is(string.rstrip(str), "", "rstrip (strip everything without chars)")
+    str = "hello"
+    test:is(string.strip(str), str, "strip (strip nothing without chars)")
+    test:is(string.lstrip(str), str, "lstrip (strip nothing without chars)")
+    test:is(string.rstrip(str), str, "rstrip (strip nothing without chars)")
+    str = " \t\n\v\f\rTEST \t\n\v\f\r"
+    test:is(string.strip(str), "TEST", "strip (all space characters without chars)")
+    test:is(string.lstrip(str), "TEST \t\n\v\f\r", "lstrip (all space characters without chars)")
+    test:is(string.rstrip(str), " \t\n\v\f\rTEST", "rstrip (all space characters without chars)")
+
+    local chars = "#\0"
+    str = "##Hello world!#"
+    test:is(string.strip(str, chars), "Hello world!", "strip (with chars)")
+    test:is(string.lstrip(str, chars), "Hello world!#", "lstrip (with chars)")
+    test:is(string.rstrip(str, chars), "##Hello world!", "rstrip (with chars)")
+    str = ""
+    test:is(string.strip(str, chars), str, "strip (0-len inp with chars)")
+    test:is(string.lstrip(str, chars), str, "lstrip (0-len inp with chars)")
+    test:is(string.rstrip(str, chars), str, "rstrip (0-len inp with chars)")
+    str = "##"
+    test:is(string.strip(str, chars), "", "strip (strip everything with chars)")
+    test:is(string.lstrip(str, chars), "", "lstrip (strip everything with chars)")
+    test:is(string.rstrip(str, chars), "", "rstrip (strip everything with chars)")
+    str = "hello"
+    test:is(string.strip(str, chars), str, "strip (strip nothing with chars)")
+    test:is(string.lstrip(str, chars), str, "lstrip (strip nothing with chars)")
+    test:is(string.rstrip(str, chars), str, "rstrip (strip nothing with chars)")
+    str = "\0\0\0TEST\0"
+    test:is(string.strip(str, chars), "TEST", "strip (embedded 0s with chars)")
+    test:is(string.lstrip(str, chars), "TEST\0", "lstrip (embedded 0s with chars)")
+    test:is(string.rstrip(str, chars), "\0\0\0TEST", "rstrip (embedded 0s with chars)")
+    chars = ""
+    test:is(string.strip(str, chars), str, "strip (0-len chars)")
+    test:is(string.lstrip(str, chars), str, "lstrip (0-len chars)")
+    test:is(string.rstrip(str, chars), str, "rstrip (0-len chars)")
+
     local _, err = pcall(string.strip, 12)
-    test:ok(err and err:match("%(string expected, got number%)"))
+    test:ok(err and err:match("#1 to '.-%.strip' %(string expected, got number%)"), "strip err 1")
     _, err = pcall(string.lstrip, 12)
-    test:ok(err and err:match("%(string expected, got number%)"))
+    test:ok(err and err:match("#1 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 1")
     _, err = pcall(string.rstrip, 12)
-    test:ok(err and err:match("%(string expected, got number%)"))
-end )
+    test:ok(err and err:match("#1 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 1")
+
+    _, err = pcall(string.strip, "foo", 12)
+    test:ok(err and err:match("#2 to '.-%.strip' %(string expected, got number%)"), "strip err 2")
+    _, err = pcall(string.lstrip, "foo", 12)
+    test:ok(err and err:match("#2 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 2")
+    _, err = pcall(string.rstrip, "foo", 12)
+    test:ok(err and err:match("#2 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 2")
+end)

 test:test("unicode", function(test)
     test:plan(104)
--
2.11.0

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-01-21 18:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-17 21:35 [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions Michał Durak
2019-01-21 18:14 ` [tarantool-patches] " Alexander Turenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox