29 марта 2018 г., в 21:04, Kirill Shcherbatov <kshcherbatov@tarantool.org> написал(а):From eb16ee6ea2d2ca5801b6b09c403c6e2f23984bd9 Mon Sep 17 00:00:00 2001
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Date: Thu, 29 Mar 2018 21:03:21 +0300
Subject: [PATCH] Multibyte characters support
---
src/box/lua/tuple.c | 1 -
src/lib/json/path.c | 35 ++++++++++++++++++++++++++++++-----
src/lib/json/path.h | 2 ++
test/engine/tuple.result | 16 ++++++++++++++--
test/engine/tuple.test.lua | 5 ++++-
5 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 99b9ff2..c3a435b 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -44,6 +46,26 @@ strntoull(const char *src, int len) {
return value;
}
+static inline size_t
+mbsize(const char *str, size_t str_size)
+{
+ mbstate_t ps;
+ memset(&ps, 0, sizeof(ps));
+ mbrlen(NULL,0,&ps);
+ return mbrlen(str, str_size, &ps);
+}
+
+static inline int
+mb2wcisalpha(char *mb_char, size_t mb_char_size)
+{
+ assert(mb_char_size < 5);
+ wchar_t buff[2];
+ mbstate_t ps;
+ memset(&ps, 0, sizeof(ps));
+ mbsrtowcs(buff, (const char **)&mb_char, mb_char_size + 1, &ps);
+ return iswalpha((wint_t)buff[0]);
+}
+
/**
* Parse string identifier in quotes. Parser either stops right
* after the closing quote, or returns an error position.
@@ -126,17 +148,20 @@ json_parse_identifier(struct json_path_parser *parser,
const char *str = pos;
char c = *pos;
/* First symbol can not be digit. */
- if (!isalpha(c) && c != '_')
- return pos - parser->src + 1;
+ size_t mb_size = 0;
+ if ((mb_size = mbsize(pos, end - pos)) && !mb2wcisalpha((char *)pos, mb_size) && c != '_')
+ return pos - parser->src + mb_size;
int len = 1;
- for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c));
- c = *++pos)
+ for (c = *(pos += mb_size);
+ pos < end && (((mb_size = mbsize(pos, end - pos))
+ && mb2wcisalpha((char *)pos, mb_size)) || c == '_' || isdigit(c));
+ c = *(pos += mb_size))
++len;
assert(len > 0);
parser->pos = pos;
node->type = JSON_PATH_STR;
node->str = str;
- node->len = len;
+ node->len = (int)(pos - str);
return 0;
}
diff --git a/src/lib/json/path.h b/src/lib/json/path.h
index 6e8db4c..b1028f6 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/path.h
@@ -33,6 +33,8 @@
#include <stdbool.h>
#include <stdint.h>
+#include <string.h>
+#include <malloc.h>
#ifdef __cplusplus
extern "C" {
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 2d7367a..c4b361a 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -611,7 +611,7 @@ s = box.schema.space.create('test', {format = format})
pk = s:create_index('pk')
---
...
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1", hello中国world = {中国 = 'test'}}}