* [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions
@ 2019-01-17 21:35 Michał Durak
2019-01-21 18:14 ` [tarantool-patches] " Alexander Turenko
0 siblings, 1 reply; 2+ messages in thread
From: Michał Durak @ 2019-01-17 21:35 UTC (permalink / raw)
To: tarantool-patches
Add optional 'chars' parameter to string.strip, string.lstrip
and string.rstrip for specifying the unwanted characters.
Behavior modeled after the equivalent Python built-ins.
Needed for: #2977
---
FreeLists seems to redact the email addresses of senders using ProtonMail.
Please direct any replies to gdrbyko1[at]protonmail[dot]com
branch: https://github.com/gdrbyKo1/tarantool/tree/gdrbyko1/gh-2977
issue: https://github.com/tarantool/tarantool/issues/2977
| 1 +
src/CMakeLists.txt | 2 ++
src/lua/string.lua | 64 +++++++++++++++++++++++++++++++----
src/tt_string.c | 80 ++++++++++++++++++++++++++++++++++++++++++++
src/tt_string.h | 60 +++++++++++++++++++++++++++++++++
test/app-tap/string.test.lua | 68 ++++++++++++++++++++++++++++++++-----
6 files changed, 259 insertions(+), 16 deletions(-)
create mode 100644 src/tt_string.c
create mode 100644 src/tt_string.h
--git a/extra/exports b/extra/exports
index 5f69e0730..0b304e4b1 100644
--- a/extra/exports
+++ b/extra/exports
@@ -209,6 +209,7 @@ clock_realtime64
clock_monotonic64
clock_process64
clock_thread64
+string_strip_helper
# Lua / LuaJIT
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04de5ad04..5ff4094ee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -109,6 +109,7 @@ set (core_sources
coll_def.c
mpstream.c
port.c
+ tt_string.c
)
if (TARGET_OS_NETBSD)
@@ -215,6 +216,7 @@ set(api_headers
${CMAKE_SOURCE_DIR}/src/box/lua/tuple.h
${CMAKE_SOURCE_DIR}/src/latch.h
${CMAKE_SOURCE_DIR}/src/clock.h
+ ${CMAKE_SOURCE_DIR}/src/tt_string.h
)
rebuild_module_api(${api_headers})
diff --git a/src/lua/string.lua b/src/lua/string.lua
index cbce26b35..35bd95343 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -6,15 +6,23 @@ ffi.cdef[[
const char *needle, size_t needle_len);
int memcmp(const char *mem1, const char *mem2, size_t num);
int isspace(int c);
+ void
+ string_strip_helper(const char *inp, unsigned long inp_len,
+ const char *chars, unsigned long chars_len,
+ bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen);
]]
-local c_char_ptr = ffi.typeof('const char *')
+local c_char_ptr = ffi.typeof('const char *')
+local strip_newstart = ffi.new("unsigned long[1]")
+local strip_newlen = ffi.new("unsigned long[1]")
local memcmp = ffi.C.memcmp
local memmem = ffi.C.memmem
local isspace = ffi.C.isspace
local err_string_arg = "bad argument #%d to '%s' (%s expected, got %s)"
+local space_chars = ' \t\n\v\f\r'
local function string_split_empty(inp, maxsplit)
local p = c_char_ptr(inp)
@@ -339,25 +347,67 @@ local function string_fromhex(inp)
return ffi.string(res, len)
end
-local function string_strip(inp)
+local function string_strip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.strip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "^%s*(.-)%s*$", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.strip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, true,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
-local function string_lstrip(inp)
+local function string_lstrip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.lstrip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "^%s*(.-)", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.lstrip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, false,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
-local function string_rstrip(inp)
+local function string_rstrip(inp, chars)
if type(inp) ~= 'string' then
error(err_string_arg:format(1, "string.rstrip", 'string', type(inp)), 2)
end
- return (string.gsub(inp, "(.-)%s*$", "%1"))
+ if inp == '' then
+ return inp
+ end
+ if chars == nil then
+ chars = space_chars
+ elseif type(chars) ~= 'string' then
+ error(err_string_arg:format(2, "string.rstrip", 'string', type(chars)), 2)
+ elseif chars == '' then
+ return inp
+ end
+
+ local casted_inp = c_char_ptr(inp)
+ ffi.C.string_strip_helper(inp, #inp, chars, #chars, false, true,
+ strip_newstart, strip_newlen)
+ return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0])
end
diff --git a/src/tt_string.c b/src/tt_string.c
new file mode 100644
index 000000000..a3cd4ff94
--- /dev/null
+++ b/src/tt_string.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tt_string.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+ unsigned long i;
+ for (i = 0; i < inp_len; ++i) {
+ unsigned c = inp[i];
+ if (arr[c] == 0)
+ break;
+ }
+ return i;
+}
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
+{
+ unsigned long i;
+ for (i = inp_len - 1; i != (unsigned long)(-1); --i) {
+ unsigned c = inp[i];
+ if (arr[c] == 0)
+ break;
+ }
+ return inp_len - i - 1;
+}
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+ unsigned long chars_len, bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen)
+{
+ unsigned long skipped_from_left = 0;
+ unsigned long skipped_from_right = 0;
+ uint8_t arr[256] = {0};
+ for (unsigned long i = 0; i < chars_len; ++i) {
+ unsigned c = chars[i];
+ arr[c] = 1;
+ }
+
+ if (lstrip)
+ skipped_from_left = string_lstrip_helper(inp, inp_len, arr);
+ if (rstrip && skipped_from_left < inp_len - 1)
+ skipped_from_right = string_rstrip_helper(inp, inp_len, arr);
+
+ *newstart = skipped_from_left;
+ *newlen = inp_len - skipped_from_left - skipped_from_right;
+}
diff --git a/src/tt_string.h b/src/tt_string.h
new file mode 100644
index 000000000..f82337c46
--- /dev/null
+++ b/src/tt_string.h
@@ -0,0 +1,60 @@
+#ifndef TARANTOOL_STRING_H_INCLUDED
+#define TARANTOOL_STRING_H_INCLUDED
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+unsigned long
+string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+unsigned long
+string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
+
+/** \cond public */
+
+void
+string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
+ unsigned long chars_len, bool lstrip, bool rstrip,
+ unsigned long *newstart, unsigned long *newlen);
+
+/** \endcond public */
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_STRING_H_INCLUDED */
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 7203fcd36..f79d4fb62 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -134,18 +134,68 @@ test:test("fromhex", function(test)
end)
test:test("strip", function(test)
- test:plan(6)
- local str = " hello hello "
- test:is(string.len(string.strip(str)), 11, "strip")
- test:is(string.len(string.lstrip(str)), 12, "lstrip")
- test:is(string.len(string.rstrip(str)), 13, "rstrip")
+ test:plan(39)
+ local str = " Hello world! "
+ test:is(string.strip(str), "Hello world!", "strip (without chars)")
+ test:is(string.lstrip(str), "Hello world! ", "lstrip (without chars)")
+ test:is(string.rstrip(str), " Hello world!", "rstrip (without chars)")
+ str = ""
+ test:is(string.strip(str), str, "strip (0-len inp without chars)")
+ test:is(string.lstrip(str), str, "lstrip (0-len inp without chars)")
+ test:is(string.rstrip(str), str, "rstrip (0-len inp without chars)")
+ str = "\t\v"
+ test:is(string.strip(str), "", "strip (strip everything without chars)")
+ test:is(string.lstrip(str), "", "lstrip (strip everything without chars)")
+ test:is(string.rstrip(str), "", "rstrip (strip everything without chars)")
+ str = "hello"
+ test:is(string.strip(str), str, "strip (strip nothing without chars)")
+ test:is(string.lstrip(str), str, "lstrip (strip nothing without chars)")
+ test:is(string.rstrip(str), str, "rstrip (strip nothing without chars)")
+ str = " \t\n\v\f\rTEST \t\n\v\f\r"
+ test:is(string.strip(str), "TEST", "strip (all space characters without chars)")
+ test:is(string.lstrip(str), "TEST \t\n\v\f\r", "lstrip (all space characters without chars)")
+ test:is(string.rstrip(str), " \t\n\v\f\rTEST", "rstrip (all space characters without chars)")
+
+ local chars = "#\0"
+ str = "##Hello world!#"
+ test:is(string.strip(str, chars), "Hello world!", "strip (with chars)")
+ test:is(string.lstrip(str, chars), "Hello world!#", "lstrip (with chars)")
+ test:is(string.rstrip(str, chars), "##Hello world!", "rstrip (with chars)")
+ str = ""
+ test:is(string.strip(str, chars), str, "strip (0-len inp with chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (0-len inp with chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (0-len inp with chars)")
+ str = "##"
+ test:is(string.strip(str, chars), "", "strip (strip everything with chars)")
+ test:is(string.lstrip(str, chars), "", "lstrip (strip everything with chars)")
+ test:is(string.rstrip(str, chars), "", "rstrip (strip everything with chars)")
+ str = "hello"
+ test:is(string.strip(str, chars), str, "strip (strip nothing with chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (strip nothing with chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (strip nothing with chars)")
+ str = "\0\0\0TEST\0"
+ test:is(string.strip(str, chars), "TEST", "strip (embedded 0s with chars)")
+ test:is(string.lstrip(str, chars), "TEST\0", "lstrip (embedded 0s with chars)")
+ test:is(string.rstrip(str, chars), "\0\0\0TEST", "rstrip (embedded 0s with chars)")
+ chars = ""
+ test:is(string.strip(str, chars), str, "strip (0-len chars)")
+ test:is(string.lstrip(str, chars), str, "lstrip (0-len chars)")
+ test:is(string.rstrip(str, chars), str, "rstrip (0-len chars)")
+
local _, err = pcall(string.strip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
+ test:ok(err and err:match("#1 to '.-%.strip' %(string expected, got number%)"), "strip err 1")
_, err = pcall(string.lstrip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
+ test:ok(err and err:match("#1 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 1")
_, err = pcall(string.rstrip, 12)
- test:ok(err and err:match("%(string expected, got number%)"))
-end )
+ test:ok(err and err:match("#1 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 1")
+
+ _, err = pcall(string.strip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.strip' %(string expected, got number%)"), "strip err 2")
+ _, err = pcall(string.lstrip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.lstrip' %(string expected, got number%)"), "lstrip err 2")
+ _, err = pcall(string.rstrip, "foo", 12)
+ test:ok(err and err:match("#2 to '.-%.rstrip' %(string expected, got number%)"), "rstrip err 2")
+end)
test:test("unicode", function(test)
test:plan(104)
--
2.11.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* [tarantool-patches] Re: [PATCH v2] add optional 'chars' param to string.strip functions
2019-01-17 21:35 [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions Michał Durak
@ 2019-01-21 18:14 ` Alexander Turenko
0 siblings, 0 replies; 2+ messages in thread
From: Alexander Turenko @ 2019-01-21 18:14 UTC (permalink / raw)
To: Michał Durak; +Cc: tarantool-patches
Hi!
Add the 'lua: ' prefix to the commit message header and try to fit it in
50 symbols, maybe like so:
> lua: add 'chars' param to string.strip functions
The Lua part looks okay, the test looks okay. See comments re C part
below.
WBR, Alexander Turenko.
On Thu, Jan 17, 2019 at 09:35:20PM +0000, Michał Durak wrote:
> Add optional 'chars' parameter to string.strip, string.lstrip
> and string.rstrip for specifying the unwanted characters.
> Behavior modeled after the equivalent Python built-ins.
>
> Needed for: #2977
Please, change to 'Fixes #2977' or 'Closes #2977' to auto-close the
commit.
Also it would be great if you would write docbot comment to allow the
tarantool documentation team to catch the new task when your commit will
land into 2.1 branch. Docs and example:
https://github.com/tarantool/docbot/blob/master/README.md
https://github.com/tarantool/tarantool/commit/dfa7a61d53fd8ad1d5aabb8138028eb121a40f61
> ---
>
> FreeLists seems to redact the email addresses of senders using ProtonMail.
> Please direct any replies to gdrbyko1[at]protonmail[dot]com
> branch: https://github.com/gdrbyKo1/tarantool/tree/gdrbyko1/gh-2977
> issue: https://github.com/tarantool/tarantool/issues/2977
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 04de5ad04..5ff4094ee 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -109,6 +109,7 @@ set (core_sources
> coll_def.c
> mpstream.c
> port.c
> + tt_string.c
Move it to src/lua/string.[ch].
> diff --git a/src/lua/string.lua b/src/lua/string.lua
> index cbce26b35..35bd95343 100644
> --- a/src/lua/string.lua
> +++ b/src/lua/string.lua
> @@ -6,15 +6,23 @@ ffi.cdef[[
> const char *needle, size_t needle_len);
> int memcmp(const char *mem1, const char *mem2, size_t num);
> int isspace(int c);
> + void
> + string_strip_helper(const char *inp, unsigned long inp_len,
> + const char *chars, unsigned long chars_len,
> + bool lstrip, bool rstrip,
> + unsigned long *newstart, unsigned long *newlen);
> ]]
Don't forget to change definition here after size_t changes (see below).
> diff --git a/src/tt_string.c b/src/tt_string.c
> new file mode 100644
> index 000000000..a3cd4ff94
> --- /dev/null
> +++ b/src/tt_string.c
Please, use tabs for indent. Consider out C style guide:
https://www.tarantool.io/en/doc/1.10/dev_guide/c_style_guide/
> +unsigned long
> +string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
> +{
1. It seems that it is usual to use size_t for a string length. At least
lua_tolstring() and strlen() use them. It seems that we should use it
too.
2. We can make this and the following function static (and also remove
them from a header).
3. I think a comment would be helpful, at least what is a return value.
4. I think we can name it like lstrip_helper() to highlight the fact
that it a) is file-level (static); b) has different API from
string_strip_helper().
> + unsigned long i;
> + for (i = 0; i < inp_len; ++i) {
> + unsigned c = inp[i];
Maybe this should be unsigned char?
> + if (arr[c] == 0)
> + break;
> + }
> + return i;
> +}
> +
> +unsigned long
> +string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)
> +{
> + unsigned long i;
> + for (i = inp_len - 1; i != (unsigned long)(-1); --i) {
> + unsigned c = inp[i];
> + if (arr[c] == 0)
> + break;
> + }
> + return inp_len - i - 1;
> +}
The same questions as for string_lstrip_helper are applicable here.
> +void
> +string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
> + unsigned long chars_len, bool lstrip, bool rstrip,
> + unsigned long *newstart, unsigned long *newlen)
> +{
> + unsigned long skipped_from_left = 0;
> + unsigned long skipped_from_right = 0;
> + uint8_t arr[256] = {0};
> + for (unsigned long i = 0; i < chars_len; ++i) {
> + unsigned c = chars[i];
> + arr[c] = 1;
> + }
> +
> + if (lstrip)
> + skipped_from_left = string_lstrip_helper(inp, inp_len, arr);
> + if (rstrip && skipped_from_left < inp_len - 1)
This condition is not quite obvious. Can you please comment it?
> + skipped_from_right = string_rstrip_helper(inp, inp_len, arr);
> +
> + *newstart = skipped_from_left;
> + *newlen = inp_len - skipped_from_left - skipped_from_right;
> +}
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
> +unsigned long
> +string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
> +
> +unsigned long
> +string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr);
> +
These function can be removed.
> +/** \cond public */
> +
> +void
> +string_strip_helper(const char *inp, unsigned long inp_len, const char *chars,
> + unsigned long chars_len, bool lstrip, bool rstrip,
> + unsigned long *newstart, unsigned long *newlen);
Tabs correctly used at line 51, but spaces are at line 52. Please,
follow our C style guide (it is very similar to Linux Kernel style).
Please add doxygen-style comments (what a function do, what are
parameters and return values). Note: comments should fit into 66
symbols.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-01-21 18:14 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-17 21:35 [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions Michał Durak
2019-01-21 18:14 ` [tarantool-patches] " Alexander Turenko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox