From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 22737217DD for ; Thu, 26 Apr 2018 19:58:03 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Mt9WSZgIMxAR for ; Thu, 26 Apr 2018 19:58:03 -0400 (EDT) Received: from smtp47.i.mail.ru (smtp47.i.mail.ru [94.100.177.107]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 46289217D1 for ; Thu, 26 Apr 2018 19:58:01 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH 2/7] lua: implement string.u_count From: Vladislav Shpilevoy References: Message-ID: <179729e4-f665-bdf7-e1d3-6f645caf9272@tarantool.org> Date: Fri, 27 Apr 2018 02:57:56 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Review fixes after discussion with Alexander. Remove TITLE option, and introduce a separate LETTER option. It is needed because Unicode has more letter classes, than upper/lower/title, but even title is not needed in our API. Lets just check u_isalpha(), when a letter is needed, and remove title. diff --git a/src/lua/string.lua b/src/lua/string.lua index 8e3935963..2b6f5b3d9 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -464,18 +464,14 @@ end local U_COUNT_CLASS_ALL = 0 local U_COUNT_CLASS_UPPER_LETTER = 1 local U_COUNT_CLASS_LOWER_LETTER = 2 -local U_COUNT_CLASS_TITLE_LETTER = 4 +local U_COUNT_CLASS_LETTER = 4 local U_COUNT_CLASS_DIGIT = 8 -local U_COUNT_LETTER = bit.bor(U_COUNT_CLASS_UPPER_LETTER, - U_COUNT_CLASS_LOWER_LETTER, - U_COUNT_CLASS_TITLE_LETTER) - -- -- Calculate count of symbols matching the needed classes. -- @param inp Input UTF8 string. -- @param opts Options with needed classes. It supports 'all', --- 'upper', 'lower', 'title', 'digit'. Opts is a table, +-- 'upper', 'lower', 'letter', 'digit'. Opts is a table, -- where needed class key is set to true. By default all -- classes are needed, and count works like strlen (not -- bsize, like Lua operator '#'). @@ -500,11 +496,8 @@ local function string_u_count(inp, opts) if opts.lower then flags = bit.bor(flags, U_COUNT_CLASS_LOWER_LETTER) end - if opts.title then - flags = bit.bor(flags, U_COUNT_CLASS_TITLE_LETTER) - end else - flags = bit.bor(flags, U_COUNT_LETTER) + flags = bit.bor(flags, U_COUNT_CLASS_LETTER) end if opts.digit then flags = bit.bor(flags, U_COUNT_CLASS_DIGIT) diff --git a/src/util.c b/src/util.c index a7a1a35ac..c9eae25f8 100644 --- a/src/util.c +++ b/src/util.c @@ -328,7 +328,7 @@ enum u_count_class { U_COUNT_CLASS_ALL = 0, U_COUNT_CLASS_UPPER_LETTER = 1, U_COUNT_CLASS_LOWER_LETTER = 2, - U_COUNT_CLASS_TITLE_LETTER = 4, + U_COUNT_CLASS_LETTER = 4, U_COUNT_CLASS_DIGIT = 8, }; @@ -364,7 +364,7 @@ u_count(const char *s, int bsize, uint8_t flags) uint8_t f = 0; f |= (flags & U_COUNT_CLASS_UPPER_LETTER) != 0 && u_isupper(c); f |= (flags & U_COUNT_CLASS_LOWER_LETTER) != 0 && u_islower(c); - f |= (flags & U_COUNT_CLASS_TITLE_LETTER) != 0 && u_istitle(c); + f |= (flags & U_COUNT_CLASS_LETTER) != 0 && u_isalpha(c); f |= (flags & U_COUNT_CLASS_DIGIT) != 0 && u_isdigit(c); len += f != 0 ? 1 : 0; } diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua index 1b154298f..bbec0c974 100755 --- a/test/app-tap/string.test.lua +++ b/test/app-tap/string.test.lua @@ -165,10 +165,12 @@ test:test("unicode", function(test) test:is(string.u_count(str, {digit = true}), 4, 'option digit') test:is(string.u_count(str, {digit = true, upper = true}), 17, 'options digit and upper') - test:is(string.u_count('Dž', {title = true}), 1, 'option title') - test:is(string.u_count('Dž', {upper = true, lower = true}), 0, - 'title is not the same as upper or lower') - test:is(string.u_count(str..'Dž', {letter = true}), 33, 'option letter') + test:is(string.u_count('꜁Dž', {letter = true}), 1, + 'option letter for title and modifier symbols') + test:is(string.u_count('勺', {letter = true}), 1, + 'option letter for non-case symbols') + test:is(string.u_count('勺', {upper = true, lower = true}), 0, + 'non-case symbols are not visible for upper/lower') -- Test compare. local s1 = '☢' local s2 = 'İ'