[tarantool-patches] Re: [PATCH 2/7] lua: implement string.u_count

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri Apr 27 02:57:56 MSK 2018


Review fixes after discussion with Alexander.

Remove TITLE option, and introduce a separate LETTER option.

It is needed because Unicode has more letter classes, than
upper/lower/title, but even title is not needed in our API. Lets
just check u_isalpha(), when a letter is needed, and remove title.


diff --git a/src/lua/string.lua b/src/lua/string.lua
index 8e3935963..2b6f5b3d9 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -464,18 +464,14 @@ end
  local U_COUNT_CLASS_ALL = 0
  local U_COUNT_CLASS_UPPER_LETTER = 1
  local U_COUNT_CLASS_LOWER_LETTER = 2
-local U_COUNT_CLASS_TITLE_LETTER = 4
+local U_COUNT_CLASS_LETTER = 4
  local U_COUNT_CLASS_DIGIT = 8
  
-local U_COUNT_LETTER = bit.bor(U_COUNT_CLASS_UPPER_LETTER,
-                               U_COUNT_CLASS_LOWER_LETTER,
-                               U_COUNT_CLASS_TITLE_LETTER)
-
  --
  -- Calculate count of symbols matching the needed classes.
  -- @param inp Input UTF8 string.
  -- @param opts Options with needed classes. It supports 'all',
---        'upper', 'lower', 'title', 'digit'. Opts is a table,
+--        'upper', 'lower', 'letter', 'digit'. Opts is a table,
  --        where needed class key is set to true. By default all
  --        classes are needed, and count works like strlen (not
  --        bsize, like Lua operator '#').
@@ -500,11 +496,8 @@ local function string_u_count(inp, opts)
                  if opts.lower then
                      flags = bit.bor(flags, U_COUNT_CLASS_LOWER_LETTER)
                  end
-                if opts.title then
-                    flags = bit.bor(flags, U_COUNT_CLASS_TITLE_LETTER)
-                end
              else
-                flags = bit.bor(flags, U_COUNT_LETTER)
+                flags = bit.bor(flags, U_COUNT_CLASS_LETTER)
              end
              if opts.digit then
                  flags = bit.bor(flags, U_COUNT_CLASS_DIGIT)
diff --git a/src/util.c b/src/util.c
index a7a1a35ac..c9eae25f8 100644
--- a/src/util.c
+++ b/src/util.c
@@ -328,7 +328,7 @@ enum u_count_class {
  	U_COUNT_CLASS_ALL = 0,
  	U_COUNT_CLASS_UPPER_LETTER = 1,
  	U_COUNT_CLASS_LOWER_LETTER = 2,
-	U_COUNT_CLASS_TITLE_LETTER = 4,
+	U_COUNT_CLASS_LETTER = 4,
  	U_COUNT_CLASS_DIGIT = 8,
  };
  
@@ -364,7 +364,7 @@ u_count(const char *s, int bsize, uint8_t flags)
  		uint8_t f = 0;
  		f |= (flags & U_COUNT_CLASS_UPPER_LETTER) != 0 && u_isupper(c);
  		f |= (flags & U_COUNT_CLASS_LOWER_LETTER) != 0 && u_islower(c);
-		f |= (flags & U_COUNT_CLASS_TITLE_LETTER) != 0 && u_istitle(c);
+		f |= (flags & U_COUNT_CLASS_LETTER) != 0 && u_isalpha(c);
  		f |= (flags & U_COUNT_CLASS_DIGIT) != 0 && u_isdigit(c);
  		len += f != 0 ? 1 : 0;
  	}
diff --git a/test/app-tap/string.test.lua b/test/app-tap/string.test.lua
index 1b154298f..bbec0c974 100755
--- a/test/app-tap/string.test.lua
+++ b/test/app-tap/string.test.lua
@@ -165,10 +165,12 @@ test:test("unicode", function(test)
      test:is(string.u_count(str, {digit = true}), 4, 'option digit')
      test:is(string.u_count(str, {digit = true, upper = true}), 17,
              'options digit and upper')
-    test:is(string.u_count('Dž', {title = true}), 1, 'option title')
-    test:is(string.u_count('Dž', {upper = true, lower = true}), 0,
-                           'title is not the same as upper or lower')
-    test:is(string.u_count(str..'Dž', {letter = true}), 33, 'option letter')
+    test:is(string.u_count('꜁Dž', {letter = true}), 1,
+            'option letter for title and modifier symbols')
+    test:is(string.u_count('勺', {letter = true}), 1,
+            'option letter for non-case symbols')
+    test:is(string.u_count('勺', {upper = true, lower = true}), 0,
+                           'non-case symbols are not visible for upper/lower')
      -- Test compare.
      local s1 = '☢'
      local s2 = 'İ'





More information about the Tarantool-patches mailing list