On 29/01/2019 19:35, n.pettik wrote:

Fixes LIKE and LENGTH functions. '\0' now treated as

Nit: is treated.

Fixed.

a usual symbol. Strings with '\0' are now processed
entirely. Consider examples:

LENGTH(CHAR(65,00,65)) == 3
LIKE(CHAR(65,00,65), CHAR(65,00,66)) == False

Also, I see that smth wrong with text in this mail again

I hope now the mail text is ok.

Not quite. It is still highlighted in some way. Have no idea.

 src/box/sql/func.c         |  88 +++++++++++++-----
 src/box/sql/vdbeInt.h      |   2 +-
 test/sql-tap/func.test.lua | 220 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 284 insertions(+), 26 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e46b162d9..2978af983 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -128,6 +128,30 @@ typeofFunc(sqlite3_context * context, int NotUsed, sqlite3_value ** argv)
 	sqlite3_result_text(context, z, -1, SQLITE_STATIC);
 }
 
+/**
+ * Return number of chars in the given string.
+ *
+ * Number of chars != byte size of string because some characters
+ * are encoded with more than one byte. Also note that all
+ * characters from 'str' to 'str + byte_len' would be counted,
+ * even if there is a '\0' somewhere between them.
+ * @param str String to be counted.
+ * @param byte_len Byte length of given string.
+ * @return

Return what?

+ */
+static int
+count_chars(const unsigned char *str, size_t byte_len)

Quite poor naming. I would call it utf8_str_len or

smth with utf8 prefix. Mb it is worth to put it some utils source file.

Also, consider using native U8_NEXT function from utf8.c,

instead of custom SQLITE_SKIP_UTF8. It may be not so fast

but safer I suppose. I don't insist though.

+{

What if str is NULL? Add at least an assertion.

+	int n_chars = 0;
+	const unsigned char *prev_z;
+	for (size_t cnt = 0; cnt < byte_len; cnt += (str - prev_z)) {
+		n_chars++;
+		prev_z = str;
+		SQLITE_SKIP_UTF8(str);
+	}
+	return n_chars;
+}

You can rewrite this function in a simpler way without using SQLITE macroses.

Read this topic: https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80/3911566#3911566

It is quite useful. You may borrow implementation from there.

+
 /*
  * Implementation of the length() function
  */
@@ -150,11 +174,7 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 			const unsigned char *z = sqlite3_value_text(argv[0]);
 			if (z == 0)
 				return;
-			len = 0;
-			while (*z) {
-				len++;
-				SQLITE_SKIP_UTF8(z);
-			}
+			len = count_chars(z, sqlite3_value_bytes(argv[0]));
 			sqlite3_result_int(context, len);
 			break;
 		}
@@ -340,11 +360,8 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 		if (z == 0)
 			return;
 		len = 0;
-		if (p1 < 0) {
-			for (z2 = z; *z2; len++) {
-				SQLITE_SKIP_UTF8(z2);
-			}
-		}
+		if (p1 < 0)
+			len = count_chars(z, sqlite3_value_bytes(argv[0]));
 	}
 #ifdef SQLITE_SUBSTR_COMPATIBILITY
 	/* If SUBSTR_COMPATIBILITY is defined then substr(X,0,N) work the same as
@@ -388,12 +405,21 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 	}
 	assert(p1 >= 0 && p2 >= 0);
 	if (p0type != SQLITE_BLOB) {
-		while (*z && p1) {
+		/*
+		 * In the code below 'cnt' and 'n_chars' is
+		 * used because '\0' is not supposed to be
+		 * end-of-string symbol.
+		 */
+		int n_chars = count_chars(z, sqlite3_value_bytes(argv[0]));

I’d better call it char_count or symbol_count or char_count.

diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index b7de1d955..8c712bd5e 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
+-- REPLACE
+test:do_execsql_test(
+    "func-62",
+    "SELECT REPLACE(CHAR(00,65,00,65), CHAR(00), CHAR(65)) LIKE 'AAAA';",
+    {1})
+
+test:do_execsql_test(
+    "func-63",
+    "SELECT REPLACE(CHAR(00,65,00,65), CHAR(65), CHAR(00)) \
+    LIKE CHAR(00,00,00,00);",
+    {1})
+
+-- SUBSTR
+test:do_execsql_test(
+    "func-64",
+    "SELECT SUBSTR(CHAR(65,00,66,67), 3, 2) LIKE CHAR(66, 67);",
+    {1})
+
+test:do_execsql_test(
+    "func-65",
+    "SELECT SUBSTR(CHAR(00,00,00,65), 1, 4) LIKE CHAR(00,00,00,65);",
+    {1})
+