[tarantool-patches] [PATCH v2 3/3] Multibyte characters support

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Mar 29 17:22:04 MSK 2018


---
 src/box/lua/tuple.c        |  1 -
 src/lib/json/path.c        | 19 +++++++++++++++++--
 test/engine/tuple.result   | 20 ++++++++++++++++++--
 test/engine/tuple.test.lua |  6 +++++-
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 99b9ff2..c3a435b 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -413,7 +413,6 @@ lbox_tuple_transform(struct lua_State *L)
 static inline int
 tuple_field_go_to_index(const char **field, uint64_t index)
 {
-	assert(index >= 0);
 	enum mp_type type = mp_typeof(**field);
 	if (type == MP_ARRAY) {
 		if (index == 0)
diff --git a/src/lib/json/path.c b/src/lib/json/path.c
index 4a6174e..3e1bb80 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/path.c
@@ -31,6 +31,8 @@
 
 #include "path.h"
 #include <ctype.h>
+#include <wchar.h>
+#include <wctype.h>
 #include "trivia/util.h"
 
 /** Same as strtoull(), but with limited length. */
@@ -44,6 +46,19 @@ strntoull(const char *src, int len) {
 	return value;
 }
 
+static inline int
+ismbaswcalpha(const char *str, size_t str_len_max)
+{
+	assert(str_len_max < 1024);
+	wchar_t buff[1024];
+	mbstate_t ps;
+	memset(&ps, 0, sizeof(ps));
+	str_len_max = mbrlen(str, str_len_max, &ps);
+	memset(&ps, 0, sizeof(ps));
+	mbsrtowcs(buff, &str, str_len_max, &ps);
+	return iswalpha((wint_t)buff[0]);
+}
+
 /**
  * Parse string identifier in quotes. Parser either stops right
  * after the closing quote, or returns an error position.
@@ -126,10 +141,10 @@ json_parse_identifier(struct json_path_parser *parser,
 	const char *str = pos;
 	char c = *pos;
 	/* First symbol can not be digit. */
-	if (!isalpha(c) && c != '_')
+	if (!ismbaswcalpha(pos, end - pos) && c != '_')
 		return pos - parser->src + 1;
 	int len = 1;
-	for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c));
+	for (c = *++pos; pos < end && (ismbaswcalpha(pos, end - pos) || c == '_' || isdigit(c));
 	     c = *++pos)
 		++len;
 	assert(len > 0);
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 2d7367a..d6eb4fa 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -611,7 +611,7 @@ s = box.schema.space.create('test', {format = format})
 pk = s:create_index('pk')
 ---
 ...
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1", hello中国world = {中国 = 'test'}}}
 ---
 ...
 field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
@@ -626,7 +626,7 @@ t[1]
 ...
 t[2]
 ---
-- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
+- [1, 2, 3, '4', [5, 6, 7], {'hello中国world': {'中国': 'test'}, 'key': 'key1', 'value': 'value1'}]
 ...
 t[3]
 ---
@@ -665,6 +665,10 @@ t["[2][5][3]"]
 ---
 - 7
 ...
+t["[2][5][3]['hello中国world'].中"]
+---
+- null
+...
 t["[2][6].key"]
 ---
 - key1
@@ -673,6 +677,10 @@ t["[2][6].value"]
 ---
 - value1
 ...
+t["[2][6].hello中国world"]
+---
+- {'中国': 'test'}
+...
 t["[2][6]['key']"]
 ---
 - key1
@@ -681,10 +689,18 @@ t["[2][6]['value']"]
 ---
 - value1
 ...
+t["[2][6]['hello中国world']"]
+---
+- {'中国': 'test'}
+...
 t["[3].k3[2].c"]
 ---
 - 3
 ...
+t["[2][6]['hello中国world'].中国"]
+---
+- test
+...
 t["[4]"]
 ---
 - '123456'
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index ba3482d..5a3bcfa 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -207,7 +207,7 @@ format[3] = {name = 'field3', type = 'map'}
 format[4] = {name = 'field4', type = 'string'}
 s = box.schema.space.create('test', {format = format})
 pk = s:create_index('pk')
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1", hello中国world = {中国 = 'test'}}}
 field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
 t = s:replace{1, field2, field3, "123456"}
 t[1]
@@ -221,11 +221,15 @@ t["[2][5]"]
 t["[2][5][1]"]
 t["[2][5][2]"]
 t["[2][5][3]"]
+t["[2][5][3]['hello中国world'].中"]
 t["[2][6].key"]
 t["[2][6].value"]
+t["[2][6].hello中国world"]
 t["[2][6]['key']"]
 t["[2][6]['value']"]
+t["[2][6]['hello中国world']"]
 t["[3].k3[2].c"]
+t["[2][6]['hello中国world'].中国"]
 t["[4]"]
 t.field1
 t.field2[5]
-- 
2.7.4





More information about the Tarantool-patches mailing list