From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id D59C12630F for ; Thu, 17 Jan 2019 16:35:28 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id hClUag0cI0th for ; Thu, 17 Jan 2019 16:35:28 -0500 (EST) Received: from mail-40133.protonmail.ch (mail-40133.protonmail.ch [185.70.40.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 11FC82630D for ; Thu, 17 Jan 2019 16:35:27 -0500 (EST) Date: Thu, 17 Jan 2019 21:35:20 +0000 From: "=?UTF-8?Q?Micha=C5=82_Durak?=" (Redacted sender "gdrbyko1" for DMARC) Subject: [tarantool-patches] [PATCH v2] add optional 'chars' param to string.strip functions Message-ID: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: "tarantool-patches@freelists.org" Add optional 'chars' parameter to string.strip, string.lstrip and string.rstrip for specifying the unwanted characters. Behavior modeled after the equivalent Python built-ins. Needed for: #2977 --- FreeLists seems to redact the email addresses of senders using ProtonMail. Please direct any replies to gdrbyko1[at]protonmail[dot]com branch: https://github.com/gdrbyKo1/tarantool/tree/gdrbyko1/gh-2977 issue: https://github.com/tarantool/tarantool/issues/2977 extra/exports | 1 + src/CMakeLists.txt | 2 ++ src/lua/string.lua | 64 +++++++++++++++++++++++++++++++---- src/tt_string.c | 80 ++++++++++++++++++++++++++++++++++++++++= ++++ src/tt_string.h | 60 +++++++++++++++++++++++++++++++++ test/app-tap/string.test.lua | 68 ++++++++++++++++++++++++++++++++----- 6 files changed, 259 insertions(+), 16 deletions(-) create mode 100644 src/tt_string.c create mode 100644 src/tt_string.h diff --git a/extra/exports b/extra/exports index 5f69e0730..0b304e4b1 100644 --- a/extra/exports +++ b/extra/exports @@ -209,6 +209,7 @@ clock_realtime64 clock_monotonic64 clock_process64 clock_thread64 +string_strip_helper # Lua / LuaJIT diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04de5ad04..5ff4094ee 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -109,6 +109,7 @@ set (core_sources coll_def.c mpstream.c port.c + tt_string.c ) if (TARGET_OS_NETBSD) @@ -215,6 +216,7 @@ set(api_headers ${CMAKE_SOURCE_DIR}/src/box/lua/tuple.h ${CMAKE_SOURCE_DIR}/src/latch.h ${CMAKE_SOURCE_DIR}/src/clock.h + ${CMAKE_SOURCE_DIR}/src/tt_string.h ) rebuild_module_api(${api_headers}) diff --git a/src/lua/string.lua b/src/lua/string.lua index cbce26b35..35bd95343 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -6,15 +6,23 @@ ffi.cdef[[ const char *needle, size_t needle_len); int memcmp(const char *mem1, const char *mem2, size_t num); int isspace(int c); + void + string_strip_helper(const char *inp, unsigned long inp_len, + const char *chars, unsigned long chars_len, + bool lstrip, bool rstrip, + unsigned long *newstart, unsigned long *newlen); ]] -local c_char_ptr =3D ffi.typeof('const char *') +local c_char_ptr =3D ffi.typeof('const char *') +local strip_newstart =3D ffi.new("unsigned long[1]") +local strip_newlen =3D ffi.new("unsigned long[1]") local memcmp =3D ffi.C.memcmp local memmem =3D ffi.C.memmem local isspace =3D ffi.C.isspace local err_string_arg =3D "bad argument #%d to '%s' (%s expected, got %s)" +local space_chars =3D ' \t\n\v\f\r' local function string_split_empty(inp, maxsplit) local p =3D c_char_ptr(inp) @@ -339,25 +347,67 @@ local function string_fromhex(inp) return ffi.string(res, len) end -local function string_strip(inp) +local function string_strip(inp, chars) if type(inp) ~=3D 'string' then error(err_string_arg:format(1, "string.strip", 'string', type(inp)= ), 2) end - return (string.gsub(inp, "^%s*(.-)%s*$", "%1")) + if inp =3D=3D '' then + return inp + end + if chars =3D=3D nil then + chars =3D space_chars + elseif type(chars) ~=3D 'string' then + error(err_string_arg:format(2, "string.strip", 'string', type(char= s)), 2) + elseif chars =3D=3D '' then + return inp + end + + local casted_inp =3D c_char_ptr(inp) + ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, true, + strip_newstart, strip_newlen) + return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0]) end -local function string_lstrip(inp) +local function string_lstrip(inp, chars) if type(inp) ~=3D 'string' then error(err_string_arg:format(1, "string.lstrip", 'string', type(inp= )), 2) end - return (string.gsub(inp, "^%s*(.-)", "%1")) + if inp =3D=3D '' then + return inp + end + if chars =3D=3D nil then + chars =3D space_chars + elseif type(chars) ~=3D 'string' then + error(err_string_arg:format(2, "string.lstrip", 'string', type(cha= rs)), 2) + elseif chars =3D=3D '' then + return inp + end + + local casted_inp =3D c_char_ptr(inp) + ffi.C.string_strip_helper(inp, #inp, chars, #chars, true, false, + strip_newstart, strip_newlen) + return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0]) end -local function string_rstrip(inp) +local function string_rstrip(inp, chars) if type(inp) ~=3D 'string' then error(err_string_arg:format(1, "string.rstrip", 'string', type(inp= )), 2) end - return (string.gsub(inp, "(.-)%s*$", "%1")) + if inp =3D=3D '' then + return inp + end + if chars =3D=3D nil then + chars =3D space_chars + elseif type(chars) ~=3D 'string' then + error(err_string_arg:format(2, "string.rstrip", 'string', type(cha= rs)), 2) + elseif chars =3D=3D '' then + return inp + end + + local casted_inp =3D c_char_ptr(inp) + ffi.C.string_strip_helper(inp, #inp, chars, #chars, false, true, + strip_newstart, strip_newlen) + return ffi.string(casted_inp + strip_newstart[0], strip_newlen[0]) end diff --git a/src/tt_string.c b/src/tt_string.c new file mode 100644 index 000000000..a3cd4ff94 --- /dev/null +++ b/src/tt_string.c @@ -0,0 +1,80 @@ +/* + * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "tt_string.h" + +#include +#include + +unsigned long +string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr) +{ + unsigned long i; + for (i =3D 0; i < inp_len; ++i) { + unsigned c =3D inp[i]; + if (arr[c] =3D=3D 0) + break; + } + return i; +} + +unsigned long +string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr) +{ + unsigned long i; + for (i =3D inp_len - 1; i !=3D (unsigned long)(-1); --i) { + unsigned c =3D inp[i]; + if (arr[c] =3D=3D 0) + break; + } + return inp_len - i - 1; +} + +void +string_strip_helper(const char *inp, unsigned long inp_len, const char *ch= ars, +=09=09 unsigned long chars_len, bool lstrip, bool rstrip, + unsigned long *newstart, unsigned long *newlen) +{ + unsigned long skipped_from_left =3D 0; + unsigned long skipped_from_right =3D 0; + uint8_t arr[256] =3D {0}; + for (unsigned long i =3D 0; i < chars_len; ++i) { + unsigned c =3D chars[i]; + arr[c] =3D 1; + } + + if (lstrip) + skipped_from_left =3D string_lstrip_helper(inp, inp_len, a= rr); + if (rstrip && skipped_from_left < inp_len - 1) + skipped_from_right =3D string_rstrip_helper(inp, inp_len, = arr); + + *newstart =3D skipped_from_left; + *newlen =3D inp_len - skipped_from_left - skipped_from_right; +} diff --git a/src/tt_string.h b/src/tt_string.h new file mode 100644 index 000000000..f82337c46 --- /dev/null +++ b/src/tt_string.h @@ -0,0 +1,60 @@ +#ifndef TARANTOOL_STRING_H_INCLUDED +#define TARANTOOL_STRING_H_INCLUDED +/* + * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +unsigned long +string_lstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)= ; + +unsigned long +string_rstrip_helper(const char *inp, unsigned long inp_len, uint8_t *arr)= ; + +/** \cond public */ + +void +string_strip_helper(const char *inp, unsigned long inp_len, const char *ch= ars, +=09=09 unsigned long chars_len, bool lstrip, bool rstrip, + unsigned long *newstart, unsigned long *newlen); + +/** \endcond public */ + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_STRING_H_INCLUDED */ diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua index 7203fcd36..f79d4fb62 100755 --- a/test/app-tap/string.test.lua +++ b/test/app-tap/string.test.lua @@ -134,18 +134,68 @@ test:test("fromhex", function(test) end) test:test("strip", function(test) - test:plan(6) - local str =3D " hello hello " - test:is(string.len(string.strip(str)), 11, "strip") - test:is(string.len(string.lstrip(str)), 12, "lstrip") - test:is(string.len(string.rstrip(str)), 13, "rstrip") + test:plan(39) + local str =3D " Hello world! " + test:is(string.strip(str), "Hello world!", "strip (without chars)") + test:is(string.lstrip(str), "Hello world! ", "lstrip (without chars)") + test:is(string.rstrip(str), " Hello world!", "rstrip (without chars)"= ) + str =3D "" + test:is(string.strip(str), str, "strip (0-len inp without chars)") + test:is(string.lstrip(str), str, "lstrip (0-len inp without chars)") + test:is(string.rstrip(str), str, "rstrip (0-len inp without chars)") + str =3D "\t\v" + test:is(string.strip(str), "", "strip (strip everything without chars)= ") + test:is(string.lstrip(str), "", "lstrip (strip everything without char= s)") + test:is(string.rstrip(str), "", "rstrip (strip everything without char= s)") + str =3D "hello" + test:is(string.strip(str), str, "strip (strip nothing without chars)") + test:is(string.lstrip(str), str, "lstrip (strip nothing without chars)= ") + test:is(string.rstrip(str), str, "rstrip (strip nothing without chars)= ") + str =3D " \t\n\v\f\rTEST \t\n\v\f\r" + test:is(string.strip(str), "TEST", "strip (all space characters withou= t chars)") + test:is(string.lstrip(str), "TEST \t\n\v\f\r", "lstrip (all space char= acters without chars)") + test:is(string.rstrip(str), " \t\n\v\f\rTEST", "rstrip (all space char= acters without chars)") + + local chars =3D "#\0" + str =3D "##Hello world!#" + test:is(string.strip(str, chars), "Hello world!", "strip (with chars)"= ) + test:is(string.lstrip(str, chars), "Hello world!#", "lstrip (with char= s)") + test:is(string.rstrip(str, chars), "##Hello world!", "rstrip (with cha= rs)") + str =3D "" + test:is(string.strip(str, chars), str, "strip (0-len inp with chars)") + test:is(string.lstrip(str, chars), str, "lstrip (0-len inp with chars)= ") + test:is(string.rstrip(str, chars), str, "rstrip (0-len inp with chars)= ") + str =3D "##" + test:is(string.strip(str, chars), "", "strip (strip everything with ch= ars)") + test:is(string.lstrip(str, chars), "", "lstrip (strip everything with = chars)") + test:is(string.rstrip(str, chars), "", "rstrip (strip everything with = chars)") + str =3D "hello" + test:is(string.strip(str, chars), str, "strip (strip nothing with char= s)") + test:is(string.lstrip(str, chars), str, "lstrip (strip nothing with ch= ars)") + test:is(string.rstrip(str, chars), str, "rstrip (strip nothing with ch= ars)") + str =3D "\0\0\0TEST\0" + test:is(string.strip(str, chars), "TEST", "strip (embedded 0s with cha= rs)") + test:is(string.lstrip(str, chars), "TEST\0", "lstrip (embedded 0s with= chars)") + test:is(string.rstrip(str, chars), "\0\0\0TEST", "rstrip (embedded 0s = with chars)") + chars =3D "" + test:is(string.strip(str, chars), str, "strip (0-len chars)") + test:is(string.lstrip(str, chars), str, "lstrip (0-len chars)") + test:is(string.rstrip(str, chars), str, "rstrip (0-len chars)") + local _, err =3D pcall(string.strip, 12) - test:ok(err and err:match("%(string expected, got number%)")) + test:ok(err and err:match("#1 to '.-%.strip' %(string expected, got nu= mber%)"), "strip err 1") _, err =3D pcall(string.lstrip, 12) - test:ok(err and err:match("%(string expected, got number%)")) + test:ok(err and err:match("#1 to '.-%.lstrip' %(string expected, got n= umber%)"), "lstrip err 1") _, err =3D pcall(string.rstrip, 12) - test:ok(err and err:match("%(string expected, got number%)")) -end ) + test:ok(err and err:match("#1 to '.-%.rstrip' %(string expected, got n= umber%)"), "rstrip err 1") + + _, err =3D pcall(string.strip, "foo", 12) + test:ok(err and err:match("#2 to '.-%.strip' %(string expected, got nu= mber%)"), "strip err 2") + _, err =3D pcall(string.lstrip, "foo", 12) + test:ok(err and err:match("#2 to '.-%.lstrip' %(string expected, got n= umber%)"), "lstrip err 2") + _, err =3D pcall(string.rstrip, "foo", 12) + test:ok(err and err:match("#2 to '.-%.rstrip' %(string expected, got n= umber%)"), "rstrip err 2") +end) test:test("unicode", function(test) test:plan(104) -- 2.11.0