[tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions
Michał Durak
dmarc-noreply at freelists.org
Fri Jan 18 00:35:20 MSK 2019
Add optional 'chars' parameter to string.strip, string.lstrip
and string.rstrip for specifying the unwanted characters.
Behavior modeled after the equivalent Python built-ins.
Needed for: #2977
---
FreeLists seems to redact the email addresses of senders using ProtonMail.
Please direct any replies to gdrbyko1[at]protonmail[dot]com
branch: https://github.com/gdrbyKo1/tarantool/tree/gdrbyko1/gh-2977
issue: https://github.com/tarantool/tarantool/issues/2977
extra/exports | 1 +
src/CMakeLists.txt | 2 ++
src/lua/string.lua | 64 +++++++++++++++++++++++++++++++----
src/tt_string.c | 80 ++++++++++++++++++++++++++++++++++++++++++++
src/tt_string.h | 60 +++++++++++++++++++++++++++++++++
test/app-tap/string.test.lua | 68 ++++++++++++++++++++++++++++++++-----
6 files changed, 259 insertions(+), 16 deletions(-)
create mode 100644 src/tt_string.c
create mode 100644 src/tt_string.h
diff --git a/extra/exports b/extra/exports
index 5f69e0730..0b304e4b1 100644
--- a/extra/exports
+++ b/extra/exports
@@ -209,6 +209,7 @@ clock_realtime64
clock_monotonic64
clock_process64
clock_thread64
+string_strip_helper
# Lua / LuaJIT
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04de5ad04..5ff4094ee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -109,6 +109,7 @@ set (core_sources
coll_def.c
mpstream.c
port.c
+ tt_string.c
)
if (TARGET_OS_NETBSD)
@@ -215,6 +216,7 @@ set(api_headers
${CMAKE_SOURCE_DIR}/src/box/lua/tuple.h
${CMAKE_SOURCE_DIR}/src/latch.h
${CMAKE_SOURCE_DIR}/src/clock.h
+ ${CMAKE_SOURCE_DIR}/src/tt_string.h
)
rebuild_module_api(${api_headers})
diff --git a/src/lua/string.lua b/src/lua/string.lua
index cbce26b35..35bd95343 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -6,15 +6,23 @@ ffi.cdef[[
const char *needle, size_t needle_len);
int memcmp(const char *mem1, const char *mem2, size_t num);
int isspace(int c);
+ void
+ string_strip_helper(const char *inp, unsigned long inp_len,
+ const char *chars, unsigned long chars_len,
+ bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen);
]]
-local c_char_ptr = ffi.typeof('const char *')
+local c_char_ptr = ffi.typeof('const char *')
+local strip_newstart = ffi.new("unsigned long[1]")
+local strip_newlen = ffi.new("unsigned long[1]")
local memcmp = ffi.C.memcmp
local memmem = ffi.C.memmem
local isspace = ffi.C.isspace
local err_string_arg = "bad argument #%d to '%s' (%s expected, got %s)"
+local space_chars = ' \t\n\v\f\r'
local function string_split_empty(inp, maxsplit)
local p = c_char_ptr(inp)
@@ -339,25 +347,67 @@ local function string_fromhex(inp)
return ffi.string(res, len)
end
-local function string_strip(inp)
+local function string_strip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.strip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "^%s*(.-)%s*$", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.strip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, true,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
-local function string_lstrip(inp)
+local function string_lstrip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.lstrip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "^%s*(.-)", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.lstrip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, false,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
-local function string_rstrip(inp)
+local function string_rstrip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.rstrip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "(.-)%s*$", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.rstrip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, false, true,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
diff --git a/src/tt_string.c b/src/tt_string.c
new file mode 100644
index 000000000..a3cd4ff94
--- /dev/null
+++ b/src/tt_string.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tt_string.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+ unsigned long i;
+ for (i = 0; i < inp_len; ++i) {
+ unsigned c = inp[i];
+ if (arr[c] == 0)
+ break;
+ }
+ return i;
+}
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+ unsigned long i;
+ for (i = inp_len - 1; i != (unsigned long)(-1); --i) {
+ unsigned c = inp[i];
+ if (arr[c] == 0)
+ break;
+ }
+ return inp_len - i - 1;
+}
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+ unsigned long chars_len, bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen)
+{
+ unsigned long skipped_from_left = 0;
+ unsigned long skipped_from_right = 0;
+ uint8_t arr[256] = {0};
+ for (unsigned long i = 0; i < chars_len; ++i) {
+ unsigned c = chars[i];
+ arr[c] = 1;
+ }
+
+ if (lstrip)
+ skipped_from_left = string_lstrip_helper(inp, inp_len, arr);
+ if (rstrip && skipped_from_left < inp_len - 1)
+ skipped_from_right = string_rstrip_helper(inp, inp_len, arr);
+
+ *newstart = skipped_from_left;
+ *newlen = inp_len - skipped_from_left - skipped_from_right;
+}
diff --git a/src/tt_string.h b/src/tt_string.h
new file mode 100644
index 000000000..f82337c46
--- /dev/null
+++ b/src/tt_string.h
@@ -0,0 +1,60 @@
+#ifndef TARANTOOL_STRING_H_INCLUDED
+#define TARANTOOL_STRING_H_INCLUDED
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+/** \cond public */
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+ unsigned long chars_len, bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen);
+
+/** \endcond public */
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_STRING_H_INCLUDED */
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 7203fcd36..f79d4fb62 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -134,18 +134,68 @@ test:test("fromhex", function(test)
end)
test:test("strip", function(test)
- test:plan(6)
- local str = " hello hello "
- test:is(string.len(string.strip(str)), 11, "strip")
- test:is(string.len(string.lstrip(str)), 12, "lstrip")
- test:is(string.len(string.rstrip(str)), 13, "rstrip")
+ test:plan(39)
+ local str = " Hello world! "
+ test:is(string.strip(str), "Hello world!", "strip (without chars)")
+ test:is(string.lstrip(str), "Hello world! ", "lstrip (without chars)")
+ test:is(string.rstrip(str), " Hello world!", "rstrip (without chars)")
+ str = ""
+ test:is(string.strip(str), str, "strip (0-len inp without chars)")
+ test:is(string.lstrip(str), str, "lstrip (0-len inp without chars)")
+ test:is(string.rstrip(str), str, "rstrip (0-len inp without chars)")
+ str = "\t\v"
+ test:is(string.strip(str), "", "strip (strip everything without chars)")
+ test:is(string.lstrip(str), "", "lstrip (strip everything without chars)")
+ test:is(string.rstrip(str), "", "rstrip (strip everything without chars)")
+ str = "hello"
+ test:is(string.strip(str), str, "strip (strip nothing without chars)")
+ test:is(string.lstrip(str), str, "lstrip (strip nothing without chars)")
+ test:is(string.rstrip(str), str, "rstrip (strip nothing without chars)")
+ str = " \t\n\v\f\rTEST \t\n\v\f\r"
+ test:is(string.strip(str), "TEST", "strip (all space characters without chars)")
+ test:is(string.lstrip(str), "TEST \t\n\v\f\r", "lstrip (all space characters without chars)")
+ test:is(string.rstrip(str), " \t\n\v\f\rTEST", "rstrip (all space characters without chars)")
+
+ local chars = "#\0"
+ str = "##Hello world!#"
+ test:is(string.strip(str, chars), "Hello world!", "strip (with chars)")
+ test:is(string.lstrip(str, chars), "Hello world!#", "lstrip (with chars)")
+ test:is(string.rstrip(str, chars), "##Hello world!", "rstrip (with chars)")
+ str = ""
+ test:is(string.strip(str, chars), str, "strip (0-len inp with chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (0-len inp with chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (0-len inp with chars)")
+ str = "##"
+ test:is(string.strip(str, chars), "", "strip (strip everything with chars)")
+ test:is(string.lstrip(str, chars), "", "lstrip (strip everything with chars)")
+ test:is(string.rstrip(str, chars), "", "rstrip (strip everything with chars)")
+ str = "hello"
+ test:is(string.strip(str, chars), str, "strip (strip nothing with chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (strip nothing with chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (strip nothing with chars)")
+ str = "\0\0\0TEST\0"
+ test:is(string.strip(str, chars), "TEST", "strip (embedded 0s with chars)")
+ test:is(string.lstrip(str, chars), "TEST\0", "lstrip (embedded 0s with chars)")
+ test:is(string.rstrip(str, chars), "\0\0\0TEST", "rstrip (embedded 0s with chars)")
+ chars = ""
+ test:is(string.strip(str, chars), str, "strip (0-len chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (0-len chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (0-len chars)")
+
local _, err = pcall(string.strip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
+ test:ok(err and err:match("#1 to '.-%.strip' %(string expected, got number%)"), "strip err 1")
_, err = pcall(string.lstrip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
+ test:ok(err and err:match("#1 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 1")
_, err = pcall(string.rstrip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
-end )
+ test:ok(err and err:match("#1 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 1")
+
+ _, err = pcall(string.strip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.strip' %(string expected, got number%)"), "strip err 2")
+ _, err = pcall(string.lstrip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 2")
+ _, err = pcall(string.rstrip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 2")
+end)
test:test("unicode", function(test)
test:plan(104)
--
2.11.0
More information about the Tarantool-patches
mailing list