From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Subject: [tarantool-patches] Re: [PATCH 2/7] lua: implement string.u_count Date: Fri, 27 Apr 2018 02:57:56 +0300 [thread overview] Message-ID: <179729e4-f665-bdf7-e1d3-6f645caf9272@tarantool.org> (raw) In-Reply-To: <a89c3b4d0946d32673fa3a6a4570a3002de4f81b.1524698920.git.v.shpilevoy@tarantool.org> Review fixes after discussion with Alexander. Remove TITLE option, and introduce a separate LETTER option. It is needed because Unicode has more letter classes, than upper/lower/title, but even title is not needed in our API. Lets just check u_isalpha(), when a letter is needed, and remove title. diff --git a/src/lua/string.lua b/src/lua/string.lua index 8e3935963..2b6f5b3d9 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -464,18 +464,14 @@ end local U_COUNT_CLASS_ALL = 0 local U_COUNT_CLASS_UPPER_LETTER = 1 local U_COUNT_CLASS_LOWER_LETTER = 2 -local U_COUNT_CLASS_TITLE_LETTER = 4 +local U_COUNT_CLASS_LETTER = 4 local U_COUNT_CLASS_DIGIT = 8 -local U_COUNT_LETTER = bit.bor(U_COUNT_CLASS_UPPER_LETTER, - U_COUNT_CLASS_LOWER_LETTER, - U_COUNT_CLASS_TITLE_LETTER) - -- -- Calculate count of symbols matching the needed classes. -- @param inp Input UTF8 string. -- @param opts Options with needed classes. It supports 'all', --- 'upper', 'lower', 'title', 'digit'. Opts is a table, +-- 'upper', 'lower', 'letter', 'digit'. Opts is a table, -- where needed class key is set to true. By default all -- classes are needed, and count works like strlen (not -- bsize, like Lua operator '#'). @@ -500,11 +496,8 @@ local function string_u_count(inp, opts) if opts.lower then flags = bit.bor(flags, U_COUNT_CLASS_LOWER_LETTER) end - if opts.title then - flags = bit.bor(flags, U_COUNT_CLASS_TITLE_LETTER) - end else - flags = bit.bor(flags, U_COUNT_LETTER) + flags = bit.bor(flags, U_COUNT_CLASS_LETTER) end if opts.digit then flags = bit.bor(flags, U_COUNT_CLASS_DIGIT) diff --git a/src/util.c b/src/util.c index a7a1a35ac..c9eae25f8 100644 --- a/src/util.c +++ b/src/util.c @@ -328,7 +328,7 @@ enum u_count_class { U_COUNT_CLASS_ALL = 0, U_COUNT_CLASS_UPPER_LETTER = 1, U_COUNT_CLASS_LOWER_LETTER = 2, - U_COUNT_CLASS_TITLE_LETTER = 4, + U_COUNT_CLASS_LETTER = 4, U_COUNT_CLASS_DIGIT = 8, }; @@ -364,7 +364,7 @@ u_count(const char *s, int bsize, uint8_t flags) uint8_t f = 0; f |= (flags & U_COUNT_CLASS_UPPER_LETTER) != 0 && u_isupper(c); f |= (flags & U_COUNT_CLASS_LOWER_LETTER) != 0 && u_islower(c); - f |= (flags & U_COUNT_CLASS_TITLE_LETTER) != 0 && u_istitle(c); + f |= (flags & U_COUNT_CLASS_LETTER) != 0 && u_isalpha(c); f |= (flags & U_COUNT_CLASS_DIGIT) != 0 && u_isdigit(c); len += f != 0 ? 1 : 0; } diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua index 1b154298f..bbec0c974 100755 --- a/test/app-tap/string.test.lua +++ b/test/app-tap/string.test.lua @@ -165,10 +165,12 @@ test:test("unicode", function(test) test:is(string.u_count(str, {digit = true}), 4, 'option digit') test:is(string.u_count(str, {digit = true, upper = true}), 17, 'options digit and upper') - test:is(string.u_count('Dž', {title = true}), 1, 'option title') - test:is(string.u_count('Dž', {upper = true, lower = true}), 0, - 'title is not the same as upper or lower') - test:is(string.u_count(str..'Dž', {letter = true}), 33, 'option letter') + test:is(string.u_count('꜁Dž', {letter = true}), 1, + 'option letter for title and modifier symbols') + test:is(string.u_count('勺', {letter = true}), 1, + 'option letter for non-case symbols') + test:is(string.u_count('勺', {upper = true, lower = true}), 0, + 'non-case symbols are not visible for upper/lower') -- Test compare. local s1 = '☢' local s2 = 'İ'
next prev parent reply other threads:[~2018-04-26 23:58 UTC|newest] Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-04-25 23:29 [tarantool-patches] [PATCH 0/7] Expose ICU into Lua Vladislav Shpilevoy 2018-04-25 23:29 ` [tarantool-patches] [PATCH 1/7] lua: expose ICU upper/lower functions to Lua Vladislav Shpilevoy 2018-04-28 0:56 ` [tarantool-patches] " Alexander Turenko 2018-04-25 23:29 ` [tarantool-patches] [PATCH 2/7] lua: implement string.u_count Vladislav Shpilevoy 2018-04-26 10:36 ` [tarantool-patches] " Vladislav Shpilevoy 2018-04-26 16:07 ` Vladislav Shpilevoy 2018-04-26 23:57 ` Vladislav Shpilevoy [this message] 2018-04-28 1:10 ` Alexander Turenko 2018-04-25 23:29 ` [tarantool-patches] [PATCH 3/7] alter: fix assertion in collations alter Vladislav Shpilevoy 2018-04-25 23:29 ` [tarantool-patches] [PATCH 4/7] Move struct on_access_denied_ctx into error.h Vladislav Shpilevoy 2018-04-25 23:29 ` [tarantool-patches] [PATCH 5/7] Merge box_error, stat and collations into core library Vladislav Shpilevoy 2018-04-25 23:29 ` [tarantool-patches] [PATCH 6/7] Always store built-in collations in the cache Vladislav Shpilevoy 2018-04-25 23:29 ` [tarantool-patches] [PATCH 7/7] lua: expose u_compare/u_icompare into Lua Vladislav Shpilevoy 2018-04-28 1:55 ` [tarantool-patches] Re: [PATCH 0/7] Expose ICU " Alexander Turenko
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=179729e4-f665-bdf7-e1d3-6f645caf9272@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='[tarantool-patches] Re: [PATCH 2/7] lua: implement string.u_count' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox