[tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU
Kirill Shcherbatov
kshcherbatov at tarantool.org
Wed Apr 4 13:37:24 MSK 2018
ICU Implementation
From 8703e465382ba05817e7703550694c3790972e54 Mon Sep 17 00:00:00 2001
Message-Id:
<8703e465382ba05817e7703550694c3790972e54.1522838002.git.kshcherbatov at tarantool.org>
In-Reply-To: <cover.1522838002.git.kshcherbatov at tarantool.org>
References: <cover.1522838002.git.kshcherbatov at tarantool.org>
From: Kirill Shcherbatov <kshcherbatov at tarantool.org>
Date: Wed, 4 Apr 2018 13:06:22 +0300
Subject: [PATCH v2 3/3] ICU Unicode parsing implementation
---
src/box/lua/tuple.c | 31 +++++--
src/lib/json/path.c | 211
+++++++++++++++++++++++++++++++++------------
src/lib/json/path.h | 30 ++++---
test/engine/tuple.result | 43 +++++++--
test/engine/tuple.test.lua | 13 ++-
test/unit/CMakeLists.txt | 2 +-
test/unit/json_path.c | 4 +-
7 files changed, 247 insertions(+), 87 deletions(-)
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 99b9ff2..b89e1f9 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -413,7 +413,6 @@ lbox_tuple_transform(struct lua_State *L)
static inline int
tuple_field_go_to_index(const char **field, uint64_t index)
{
- assert(index >= 0);
enum mp_type type = mp_typeof(**field);
if (type == MP_ARRAY) {
if (index == 0)
@@ -497,6 +496,12 @@ tuple_field_go_to_key(const char **field, const
char *key, int len)
static int
lbox_tuple_field_by_path(struct lua_State *L)
{
+ int err_pos = 0;
+ struct json_path_parser parser;
+ /* Need uninitialized structure to
+ * json_path_parser_deinit on lua_isnumber */
+ memset(&parser, 0, sizeof(parser));
+ const char *path = NULL;
const char *field;
struct tuple *tuple = luaT_istuple(L, 1);
/* Is checked in Lua wrapper. */
@@ -506,6 +511,18 @@ lbox_tuple_field_by_path(struct lua_State *L)
index -= TUPLE_INDEX_BASE;
if (index < 0) {
not_found:
+ if (!path)
+ goto exit_not_found;
+ uint32_t path_len = strlen(path);
+ uint32_t path_hash = lua_hash(path, path_len);
+ field = tuple_field_by_name(tuple, path,
+ path_len, path_hash);
+ if (field)
+ goto push_value;
+ if (err_pos || path_len == 0)
+ luaL_error(L, "Error in path on position %d", err_pos);
+exit_not_found:
+ json_path_parser_deinit(&parser);
lua_pushinteger(L, -1);
lua_pushnil(L);
return 2;
@@ -514,19 +531,21 @@ not_found:
if (field == NULL)
goto not_found;
push_value:
+ json_path_parser_deinit(&parser);
lua_pushinteger(L, 0);
luamp_decode(L, luaL_msgpack_default, &field);
return 2;
}
assert(lua_isstring(L, 2));
size_t path_len;
- const char *path = lua_tolstring(L, 2, &path_len);
- struct json_path_parser parser;
+ path = lua_tolstring(L, 2, &path_len);
struct json_path_node node;
- json_path_parser_create(&parser, path, path_len);
+ json_path_parser_init(&parser, path, path_len);
int rc = json_path_next(&parser, &node);
- if (rc != 0 || node.type == JSON_PATH_END)
- luaL_error(L, "Error in path on position %d", rc);
+ if (rc != 0 || node.type == JSON_PATH_END) {
+ err_pos = rc;
+ goto not_found;
+ }
if (node.type == JSON_PATH_NUM) {
int index = node.num;
if (index == 0)
diff --git a/src/lib/json/path.c b/src/lib/json/path.c
index 4a6174e..4aadb3a 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/path.c
@@ -31,8 +31,11 @@
#include "path.h"
#include <ctype.h>
+#include <unicode/uchar.h>
#include "trivia/util.h"
+#define REPLACEMENT_CHARACTER (0xFFFD)
+
/** Same as strtoull(), but with limited length. */
static inline uint64_t
strntoull(const char *src, int len) {
@@ -45,6 +48,51 @@ strntoull(const char *src, int len) {
}
/**
+ * Parse string and update parser's state.
+ * @param[out] parser JSON path parser. Upates pos, signs_read.
+ * @param[out] UChar32 to store result.
+ *
+ * @retval 1 Success.
+ * @retval 0 End of string.
+ * @retval -1 Parse error.
+ */
+static inline int
+parser_read_sign(struct json_path_parser *parser, UChar32 *out)
+{
+ int rc;
+ UErrorCode status = U_ZERO_ERROR;
+ if (parser->pos == parser->end)
+ return 0;
+ *out = ucnv_getNextUChar(parser->utf8conv, &parser->pos, parser->end,
&status);
+ parser->invalid_sign_off += (rc = U_SUCCESS(status));
+ return rc ? 1 : -1;
+}
+
+/**
+ * Parse string and update parser's state.
+ * @param[out] parser JSON path parser. Upates pos, signs_read.
+ * @param old parser read offset.
+ * @param signs to drop.
+ */
+static inline void
+parser_reset_pos(struct json_path_parser *parser, const char *old_pos,
int signs)
+{
+ parser->pos = old_pos;
+ parser->invalid_sign_off -= signs;
+}
+
+static inline bool
+string_valid_sign(UChar32 c)
+{
+ int8_t type = u_charType(c);
+ return !(c == REPLACEMENT_CHARACTER ||
+ type == U_UNASSIGNED ||
+ type == U_LINE_SEPARATOR ||
+ type == U_CONTROL_CHAR ||
+ type == U_PARAGRAPH_SEPARATOR);
+}
+
+/**
* Parse string identifier in quotes. Parser either stops right
* after the closing quote, or returns an error position.
* @param parser JSON path parser.
@@ -56,22 +104,24 @@ strntoull(const char *src, int len) {
static inline int
json_parse_string(struct json_path_parser *parser, struct
json_path_node *node)
{
- const char *end = parser->src + parser->src_len;
- const char *pos = parser->pos;
- assert(pos < end);
- char quote_type = *pos;
- assert(quote_type == '\'' || quote_type == '"');
- /* Skip first quote. */
- int len = 0;
- ++pos;
- const char *str = pos;
- for (char c = *pos; pos < end && quote_type != c; c = *++pos)
- ++len;
- /* A string must be terminated with quote. */
- if (*pos != quote_type || len == 0)
- return pos - parser->src + 1;
- /* Skip the closing quote. */
- parser->pos = pos + 1;
+ assert(parser->pos < parser->end);
+ UChar32 quote_type;
+ (void)parser_read_sign(parser, "e_type);
+ assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"');
+ const char *str = parser->pos;
+ UChar32 c = 0;
+ int rc = 0;
+
+ while (((rc = parser_read_sign(parser, &c)) > 0)
+ && string_valid_sign(c) && c != quote_type);
+ int len = (int)(parser->pos - str - 1);
+ if (rc < 0 || len == 0)
+ return -1;
+ if (c != (UChar32)quote_type) {
+ parser->invalid_sign_off++;
+ return -1;
+ }
+
node->type = JSON_PATH_STR;
node->str = str;
node->len = len;
@@ -81,7 +131,7 @@ json_parse_string(struct json_path_parser *parser,
struct json_path_node *node)
/**
* Parse digit sequence into integer until non-digit is met.
* Parser stops right after the last digit.
- * @param parser JSON parser.
+ * @param[out] parser JSON parser. Updates signs_read field.
* @param[out] node JSON node to store result.
*
* @retval 0 Success.
@@ -90,27 +140,40 @@ json_parse_string(struct json_path_parser *parser,
struct json_path_node *node)
static inline int
json_parse_integer(struct json_path_parser *parser, struct
json_path_node *node)
{
- const char *end = parser->src + parser->src_len;
- const char *pos = parser->pos;
- assert(pos < end);
- const char *str = pos;
- int len = 0;
- for (char c = *pos; pos < end && isdigit(c); c = *++pos)
- ++len;
- if (len == 0)
- return pos - parser->src + 1;
- parser->pos = pos;
+ assert(parser->pos < parser->end);
+ const char *str = parser->pos;
+ const char *last_pos = parser->pos;
+ int len = 0, rc = 0;
+ UChar32 c = 0;
+
+ while (((rc = parser_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
+ last_pos = parser->pos;
+ len++;
+ }
+ if (rc > 0 && len > 0 && !u_isdigit(c))
+ parser_reset_pos(parser, last_pos, 1);
+ if (rc < 0 || len == 0)
+ return -1;
+
node->type = JSON_PATH_NUM;
node->num = strntoull(str, len);
return 0;
}
+static inline bool
+identifier_valid_sign(UChar32 c)
+{
+ return u_isUAlphabetic(c)
+ || c == (UChar32)'_'
+ || u_isdigit(c);
+}
+
/**
* Parse identifier out of quotes. It can contain only alphas,
* digits and underscores. And can not contain digit at the first
* position. Parser is stoped right after the last non-digit,
* non-alpha and non-underscore symbol.
- * @param parser JSON parser.
+ * @param[out] parser JSON parser. Updates signs_read field.
* @param[out] node JSON node to store result.
*
* @retval 0 Success.
@@ -120,68 +183,102 @@ static inline int
json_parse_identifier(struct json_path_parser *parser,
struct json_path_node *node)
{
- const char *end = parser->src + parser->src_len;
- const char *pos = parser->pos;
- assert(pos < end);
- const char *str = pos;
- char c = *pos;
+ assert(parser->pos < parser->end);
+ const char *str = parser->pos;
+ UChar32 c;
+ int rc = 0;
+ if (parser_read_sign(parser, &c) < 0)
+ return -1;
/* First symbol can not be digit. */
- if (!isalpha(c) && c != '_')
- return pos - parser->src + 1;
- int len = 1;
- for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c));
- c = *++pos)
- ++len;
- assert(len > 0);
- parser->pos = pos;
+ if (!u_isalpha(c) && c != (UChar32)'_')
+ return -1;
+
+ const char *last_pos = parser->pos;
+ while ((rc = parser_read_sign(parser, &c)) > 0 &&
identifier_valid_sign(c))
+ last_pos = parser->pos;
+ if (rc > 0 && !identifier_valid_sign(c))
+ parser_reset_pos(parser, last_pos, 1);
+ if (rc < 0)
+ return -1;
+
node->type = JSON_PATH_STR;
node->str = str;
- node->len = len;
+ node->len = (int)(parser->pos - str);
return 0;
}
int
+json_path_parser_init(struct json_path_parser *parser, const char *src,
+ int src_len)
+{
+ UErrorCode status = U_ZERO_ERROR ;
+ parser->utf8conv = ucnv_open("utf8", &status);
+ if (U_FAILURE(status))
+ return -1;
+ assert(parser->utf8conv);
+ parser->src = src;
+ parser->end = src + src_len;
+ parser->pos = src;
+ parser->invalid_sign_off = 0;
+ return 0;
+}
+
+void
+json_path_parser_deinit(struct json_path_parser *parser)
+{
+ if (parser->utf8conv)
+ ucnv_close(parser->utf8conv);
+}
+
+static inline int
+error_sign_offset(struct json_path_parser *parser)
+{
+ return parser->invalid_sign_off;
+}
+
+int
json_path_next(struct json_path_parser *parser, struct json_path_node
*node)
{
- const char *end = parser->src + parser->src_len;
+ assert(parser->utf8conv);
+ const char *end = parser->end;
if (end == parser->pos) {
node->type = JSON_PATH_END;
return 0;
}
- char c = *parser->pos;
+ UChar32 c = 0;
+ const char *last_pos = parser->pos;
+ if (parser_read_sign(parser, &c) < 0)
+ return error_sign_offset(parser);
int rc;
switch(c) {
- case '[':
- ++parser->pos;
+ case (UChar32)'[':
/* Error for []. */
if (parser->pos == end)
- return parser->pos - parser->src + 1;
+ return parser->invalid_sign_off;
c = *parser->pos;
if (c == '"' || c == '\'')
rc = json_parse_string(parser, node);
else
rc = json_parse_integer(parser, node);
if (rc != 0)
- return rc;
+ return parser->invalid_sign_off;
/*
* Expression, started from [ must be finished
* with ] regardless of its type.
*/
if (parser->pos == end || *parser->pos != ']')
- return parser->pos - parser->src + 1;
+ return parser->invalid_sign_off + 1;
/* Skip ]. */
- ++parser->pos;
+ (void)parser_read_sign(parser, &c);
break;
- case '.':
- /* Skip dot. */
- ++parser->pos;
- if (parser->pos == end)
- return parser->pos - parser->src + 1;
- FALLTHROUGH
default:
+ if (c != (UChar32)'.')
+ parser_reset_pos(parser, last_pos, 1);
+ else if (parser->pos == end)
+ return parser->invalid_sign_off + 1;
rc = json_parse_identifier(parser, node);
if (rc != 0)
- return rc;
+ return parser->invalid_sign_off;
break;
}
return 0;
diff --git a/src/lib/json/path.h b/src/lib/json/path.h
index 6e8db4c..0ff68c4 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/path.h
@@ -33,6 +33,9 @@
#include <stdbool.h>
#include <stdint.h>
+#include <unicode/ucnv_err.h>
+#include <unicode/ucnv.h>
+#include <assert.h>
#ifdef __cplusplus
extern "C" {
@@ -45,10 +48,12 @@ extern "C" {
struct json_path_parser {
/** Source string. */
const char *src;
- /** Length of src. */
- int src_len;
+ /** End of string. */
+ const char *end;
/** Current parser's position. */
const char *pos;
+ int invalid_sign_off;
+ UConverter* utf8conv;
};
enum json_path_type {
@@ -78,19 +83,22 @@ struct json_path_node {
};
/**
- * Create @a parser.
+ * Init @a parser.
* @param[out] parser Parser to create.
* @param src Source string.
* @param src_len Length of @a src.
+ * @retval 0 Success.
+ * @retval -1 Init error.
*/
-static inline void
-json_path_parser_create(struct json_path_parser *parser, const char *src,
- int src_len)
-{
- parser->src = src;
- parser->src_len = src_len;
- parser->pos = src;
-}
+int
+json_path_parser_init(struct json_path_parser *parser, const char *src,
+ int src_len);
+/**
+ * Deinit @a parser.
+ * @param[out] parser instance to deinit.
+ */
+void
+json_path_parser_deinit(struct json_path_parser *parser);
/**
* Get a next path node.
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 2d7367a..6b597d6 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'}
format[3] = {name = 'field3', type = 'map'}
---
...
-format[4] = {name = 'field4', type = 'string'}
+format[4] = {name = 'field4', type = 'string' }
+---
+...
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
---
...
s = box.schema.space.create('test', {format = format})
@@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = format})
pk = s:create_index('pk')
---
...
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"},
key="value1", value="key1"}}
---
...
field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3,
d=4} }, [-1] = 200}
---
...
-t = s:replace{1, field2, field3, "123456"}
+t = s:replace{1, field2, field3, "123456", "yes, this"}
---
...
t[1]
@@ -626,7 +629,7 @@ t[1]
...
t[2]
---
-- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
+- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key':
'value1', 'value': 'key1'}]
...
t[3]
---
@@ -667,19 +670,43 @@ t["[2][5][3]"]
...
t["[2][6].key"]
---
-- key1
+- value1
...
t["[2][6].value"]
---
-- value1
+- key1
...
t["[2][6]['key']"]
---
-- key1
+- value1
...
t["[2][6]['value']"]
---
-- value1
+- key1
+...
+t[2][6].привет中国world.中国
+---
+- привет
+...
+t["[2][6].привет中国world"].中国
+---
+- привет
+...
+t["[2][6].привет中国world.中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']"]["中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国']"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国a']"]
+---
+- yes, this
...
t["[3].k3[2].c"]
---
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index ba3482d..90da8b2 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -204,12 +204,13 @@ format = {}
format[1] = {name = 'field1', type = 'unsigned'}
format[2] = {name = 'field2', type = 'array'}
format[3] = {name = 'field3', type = 'map'}
-format[4] = {name = 'field4', type = 'string'}
+format[4] = {name = 'field4', type = 'string' }
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
s = box.schema.space.create('test', {format = format})
pk = s:create_index('pk')
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"},
key="value1", value="key1"}}
field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3,
d=4} }, [-1] = 200}
-t = s:replace{1, field2, field3, "123456"}
+t = s:replace{1, field2, field3, "123456", "yes, this"}
t[1]
t[2]
t[3]
@@ -225,6 +226,12 @@ t["[2][6].key"]
t["[2][6].value"]
t["[2][6]['key']"]
t["[2][6]['value']"]
+t[2][6].привет中国world.中国
+t["[2][6].привет中国world"].中国
+t["[2][6].привет中国world.中国"]
+t["[2][6]['привет中国world']"]["中国"]
+t["[2][6]['привет中国world']['中国']"]
+t["[2][6]['привет中国world']['中国a']"]
t["[3].k3[2].c"]
t["[4]"]
t.field1
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index fe8b2d2..667194c 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -130,7 +130,7 @@ add_executable(csv.test csv.c)
target_link_libraries(csv.test csv)
add_executable(json_path.test json_path.c)
-target_link_libraries(json_path.test json_path unit)
+target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES})
add_executable(rmean.test rmean.cc)
target_link_libraries(rmean.test stat unit)
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index 599658b..b62afd2 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -6,7 +6,7 @@
#define reset_to_new_path(value) \
path = value; \
len = strlen(value); \
- json_path_parser_create(&parser, path, len);
+ (void)json_path_parser_init(&parser, path, len);
#define is_next_index(value_len, value) \
path = parser.pos; \
@@ -30,6 +30,7 @@ test_basic()
const char *path;
int len;
struct json_path_parser parser;
+ memset(&parser, 0, sizeof(parser));
struct json_path_node node;
reset_to_new_path("[0].field1.field2['field3'][5]");
@@ -89,6 +90,7 @@ test_errors()
const char *path;
int len;
struct json_path_parser parser;
+ memset(&parser, 0, sizeof(parser));
const struct path_and_errpos errors[] = {
/* Double [[. */
{"[[", 2},
--
2.7.4
On 29.03.2018 17:22, Kirill Shcherbatov wrote:
> From: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
>
> In progress ...
>
> Closes #1285
> ---
> src/box/CMakeLists.txt | 2 +-
> src/box/lua/tuple.c | 176
+++++++++++++++++++++++++++++++++++-----
> src/box/lua/tuple.lua | 45 +++--------
> test/engine/tuple.result | 198
+++++++++++++++++++++++++++++++++++++++++++++
> test/engine/tuple.test.lua | 59 ++++++++++++++
> 5 files changed, 428 insertions(+), 52 deletions(-)
>
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index e420fe3..add0ff9 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -130,5 +130,5 @@ add_library(box STATIC
> ${bin_sources})
>
> target_link_libraries(box box_error tuple stat xrow xlog vclock
crc32 scramble
> - ${common_libraries})
> + json_path ${common_libraries})
> add_dependencies(box build_bundled_libs)
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 7ca4299..99b9ff2 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -41,6 +41,7 @@
> #include "box/tuple_convert.h"
> #include "box/errcode.h"
> #include "box/memtx_tuple.h"
> +#include "json/path.h"
>
> /** {{{ box.tuple Lua library
> *
> @@ -402,36 +403,175 @@ lbox_tuple_transform(struct lua_State *L)
> }
>
> /**
> - * Find a tuple field using its name.
> + * Propagate @a field to MessagePack(field)[index].
> + * @param[in][out] field Field to propagate.
> + * @param index 1-based index to propagate to.
> + *
> + * @retval 0 Success, the index was found.
> + * @retval -1 Not found.
> + */
> +static inline int
> +tuple_field_go_to_index(const char **field, uint64_t index)
> +{
> + assert(index >= 0);
> + enum mp_type type = mp_typeof(**field);
> + if (type == MP_ARRAY) {
> + if (index == 0)
> + return -1;
> + /* Make index 0-based. */
> + index -= TUPLE_INDEX_BASE;
> + uint32_t count = mp_decode_array(field);
> + if (index >= count)
> + return -1;
> + for (; index > 0; --index)
> + mp_next(field);
> + return 0;
> + } else if (type == MP_MAP) {
> + uint64_t count = mp_decode_map(field);
> + for (; count > 0; --count) {
> + type = mp_typeof(**field);
> + if (type == MP_UINT) {
> + uint64_t value = mp_decode_uint(field);
> + if (value == index)
> + return 0;
> + } else if (type == MP_INT) {
> + int64_t value = mp_decode_int(field);
> + if (value >= 0 && (uint64_t)value == index)
> + return 0;
> + } else {
> + /* Skip key. */
> + mp_next(field);
> + }
> + /* Skip value. */
> + mp_next(field);
> + }
> + }
> + return -1;
> +}
> +
> +/**
> + * Propagate @a field to MessagePack(field)[key].
> + * @param[in][out] field Field to propagate.
> + * @param key Key to propagate to.
> + * @param len Length of @a key.
> + *
> + * @retval 0 Success, the index was found.
> + * @retval -1 Not found.
> + */
> +static inline int
> +tuple_field_go_to_key(const char **field, const char *key, int len)
> +{
> + enum mp_type type = mp_typeof(**field);
> + if (type != MP_MAP)
> + return -1;
> + uint64_t count = mp_decode_map(field);
> + for (; count > 0; --count) {
> + type = mp_typeof(**field);
> + if (type == MP_STR) {
> + uint32_t value_len;
> + const char *value = mp_decode_str(field, &value_len);
> + if (value_len == (uint)len &&
> + memcmp(value, key, len) == 0)
> + return 0;
> + } else {
> + /* Skip key. */
> + mp_next(field);
> + }
> + /* Skip value. */
> + mp_next(field);
> + }
> + return -1;
> +}
> +
> +/**
> + * Find a tuple field by JSON path.
> * @param L Lua state.
> - * @param tuple 1-th argument on lua stack, tuple to get field
> + * @param tuple 1-th argument on a lua stack, tuple to get field
> * from.
> - * @param field_name 2-th argument on lua stack, field name to
> - * get.
> + * @param path 2-th argument on lua stack. Can be field name,
> + * JSON path to a field or a field number.
> *
> * @retval If a field was not found, return -1 and nil to lua else
> * return 0 and decoded field.
> */
> static int
> -lbox_tuple_field_by_name(struct lua_State *L)
> +lbox_tuple_field_by_path(struct lua_State *L)
> {
> + const char *field;
> struct tuple *tuple = luaT_istuple(L, 1);
> /* Is checked in Lua wrapper. */
> assert(tuple != NULL);
> - assert(lua_isstring(L, 2));
> - size_t name_len;
> - const char *name = lua_tolstring(L, 2, &name_len);
> - uint32_t name_hash = lua_hashstring(L, 2);
> - const char *field =
> - tuple_field_by_name(tuple, name, name_len, name_hash);
> - if (field == NULL) {
> - lua_pushinteger(L, -1);
> - lua_pushnil(L);
> + if (lua_isnumber(L, 2)) {
> + int index = lua_tointeger(L, 2);
> + index -= TUPLE_INDEX_BASE;
> + if (index < 0) {
> +not_found:
> + lua_pushinteger(L, -1);
> + lua_pushnil(L);
> + return 2;
> + }
> + field = tuple_field(tuple, index);
> + if (field == NULL)
> + goto not_found;
> +push_value:
> + lua_pushinteger(L, 0);
> + luamp_decode(L, luaL_msgpack_default, &field);
> return 2;
> }
> - lua_pushinteger(L, 0);
> - luamp_decode(L, luaL_msgpack_default, &field);
> - return 2;
> + assert(lua_isstring(L, 2));
> + size_t path_len;
> + const char *path = lua_tolstring(L, 2, &path_len);
> + struct json_path_parser parser;
> + struct json_path_node node;
> + json_path_parser_create(&parser, path, path_len);
> + int rc = json_path_next(&parser, &node);
> + if (rc != 0 || node.type == JSON_PATH_END)
> + luaL_error(L, "Error in path on position %d", rc);
> + if (node.type == JSON_PATH_NUM) {
> + int index = node.num;
> + if (index == 0)
> + goto not_found;
> + index -= TUPLE_INDEX_BASE;
> + field = tuple_field(tuple, index);
> + if (field == NULL)
> + goto not_found;
> + } else {
> + assert(node.type == JSON_PATH_STR);
> + /* First part of a path is a field name. */
> + const char *name = node.str;
> + uint32_t name_len = node.len;
> + uint32_t name_hash;
> + if (path_len == name_len) {
> + name_hash = lua_hashstring(L, 2);
> + } else {
> + /*
> + * If a string is "field....", then its
> + * precalculated juajit hash can not be
> + * used. A tuple dictionary hashes only
> + * name, not path.
> + */
> + name_hash = lua_hash(name, name_len);
> + }
> + field = tuple_field_by_name(tuple, name, name_len, name_hash);
> + if (field == NULL)
> + goto not_found;
> + }
> + while ((rc = json_path_next(&parser, &node)) == 0 &&
> + node.type != JSON_PATH_END) {
> + if (node.type == JSON_PATH_NUM) {
> + rc = tuple_field_go_to_index(&field, node.num);
> + } else {
> + assert(node.type == JSON_PATH_STR);
> + rc = tuple_field_go_to_key(&field, node.str, node.len);
> + }
> + if (rc != 0)
> + goto not_found;
> + }
> + if (rc == 0)
> + goto push_value;
> + luaL_error(L, "Error in path on position %d", rc);
> + unreachable();
> + goto not_found;
> }
>
> static int
> @@ -470,8 +610,8 @@ static const struct luaL_Reg lbox_tuple_meta[] = {
> {"tostring", lbox_tuple_to_string},
> {"slice", lbox_tuple_slice},
> {"transform", lbox_tuple_transform},
> - {"tuple_field_by_name", lbox_tuple_field_by_name},
> {"tuple_to_map", lbox_tuple_to_map},
> + {"tuple_field_by_path", lbox_tuple_field_by_path},
> {NULL, NULL}
> };
>
> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> index 001971a..b51b4df 100644
> --- a/src/box/lua/tuple.lua
> +++ b/src/box/lua/tuple.lua
> @@ -9,16 +9,9 @@ local internal = require('box.internal')
>
> ffi.cdef[[
> /** \cond public */
> -typedef struct tuple_format box_tuple_format_t;
> -
> -box_tuple_format_t *
> -box_tuple_format_default(void);
>
> typedef struct tuple box_tuple_t;
>
> -box_tuple_t *
> -box_tuple_new(box_tuple_format_t *format, const char *data, const
char *end);
> -
> int
> box_tuple_ref(box_tuple_t *tuple);
>
> @@ -34,9 +27,6 @@ box_tuple_bsize(const box_tuple_t *tuple);
> ssize_t
> box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size);
>
> -box_tuple_format_t *
> -box_tuple_format(const box_tuple_t *tuple);
> -
> const char *
> box_tuple_field(const box_tuple_t *tuple, uint32_t i);
>
> @@ -278,9 +268,9 @@ end
>
> msgpackffi.on_encode(const_tuple_ref_t, tuple_to_msgpack)
>
> -local function tuple_field_by_name(tuple, name)
> +local function tuple_field_by_path(tuple, path)
> tuple_check(tuple, "tuple['field_name']");
> - return internal.tuple.tuple_field_by_name(tuple, name)
> + return internal.tuple.tuple_field_by_path(tuple, path)
> end
>
> local methods = {
> @@ -306,33 +296,22 @@ end
>
> methods["__serialize"] = tuple_totable -- encode hook for
msgpack/yaml/json
>
> -local tuple_field = function(tuple, field_n)
> - local field = builtin.box_tuple_field(tuple, field_n - 1)
> - if field == nil then
> - return nil
> - end
> - -- Use () to shrink stack to the first return value
> - return (msgpackffi.decode_unchecked(field))
> -end
> -
> -
> ffi.metatype(tuple_t, {
> __len = function(tuple)
> return builtin.box_tuple_field_count(tuple)
> end;
> __tostring = internal.tuple.tostring;
> __index = function(tuple, key)
> - if type(key) == "number" then
> - return tuple_field(tuple, key)
> - elseif type(key) == "string" then
> - -- Try to get a field with a name = key. If it was not
> - -- found (rc ~= 0) then return a method from the
> - -- vtable. If a collision occurred, then fields have
> - -- higher priority. For example, if a tuple T has a
> - -- field with name 'bsize', then T.bsize returns field
> - -- value, not tuple_bsize function. To access hidden
> - -- methods use 'box.tuple.<method_name>(T, [args...])'.
> - local rc, field = tuple_field_by_name(tuple, key)
> + if type(key) == "string" or type(key) == "number" then
> + -- Try to get a field by json path or by [index]. If
> + -- it was not found (rc ~= 0) then return a method
> + -- from the vtable. If a collision occurred, then
> + -- fields have higher priority. For example, if a
> + -- tuple T has a field with name 'bsize', then T.bsize
> + -- returns field value, not tuple_bsize function. To
> + -- access hidden methods use
> + -- 'box.tuple.<method_name>(T, [args...])'.
> + local rc, field = tuple_field_by_path(tuple, key)
> if rc == 0 then
> return field
> end
> diff --git a/test/engine/tuple.result b/test/engine/tuple.result
> index b3b23b2..2d7367a 100644
> --- a/test/engine/tuple.result
> +++ b/test/engine/tuple.result
> @@ -590,6 +590,204 @@ maplen(t1map), t1map[1], t1map[2], t1map[3]
> s:drop()
> ---
> ...
> +format = {}
> +---
> +...
> +format[1] = {name = 'field1', type = 'unsigned'}
> +---
> +...
> +format[2] = {name = 'field2', type = 'array'}
> +---
> +...
> +format[3] = {name = 'field3', type = 'map'}
> +---
> +...
> +format[4] = {name = 'field4', type = 'string'}
> +---
> +...
> +s = box.schema.space.create('test', {format = format})
> +---
> +...
> +pk = s:create_index('pk')
> +---
> +...
> +field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
> +---
> +...
> +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2},
{c=3, d=4} }, [-1] = 200}
> +---
> +...
> +t = s:replace{1, field2, field3, "123456"}
> +---
> +...
> +t[1]
> +---
> +- 1
> +...
> +t[2]
> +---
> +- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
> +...
> +t[3]
> +---
> +- {'k1': 100, 'k3': [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}], -1: 200,
10: 100, 'k2': [
> + 1, 2, 3]}
> +...
> +t[4]
> +---
> +- '123456'
> +...
> +t[2][1]
> +---
> +- 1
> +...
> +t["[2][1]"]
> +---
> +- 1
> +...
> +t[2][5]
> +---
> +- [5, 6, 7]
> +...
> +t["[2][5]"]
> +---
> +- [5, 6, 7]
> +...
> +t["[2][5][1]"]
> +---
> +- 5
> +...
> +t["[2][5][2]"]
> +---
> +- 6
> +...
> +t["[2][5][3]"]
> +---
> +- 7
> +...
> +t["[2][6].key"]
> +---
> +- key1
> +...
> +t["[2][6].value"]
> +---
> +- value1
> +...
> +t["[2][6]['key']"]
> +---
> +- key1
> +...
> +t["[2][6]['value']"]
> +---
> +- value1
> +...
> +t["[3].k3[2].c"]
> +---
> +- 3
> +...
> +t["[4]"]
> +---
> +- '123456'
> +...
> +t.field1
> +---
> +- 1
> +...
> +t.field2[5]
> +---
> +- [5, 6, 7]
> +...
> +t[".field1"]
> +---
> +- 1
> +...
> +t["field1"]
> +---
> +- 1
> +...
> +t["[3][10]"]
> +---
> +- 100
> +...
> +-- Not found.
> +t[0]
> +---
> +- null
> +...
> +t["[0]"]
> +---
> +- null
> +...
> +t["[1000]"]
> +---
> +- null
> +...
> +t.field1000
> +---
> +- null
> +...
> +t["not_found"]
> +---
> +- null
> +...
> +t["[2][5][10]"]
> +---
> +- null
> +...
> +t["[2][6].key100"]
> +---
> +- null
> +...
> +t["[2][0]"] -- 0-based index in array.
> +---
> +- null
> +...
> +t["[4][3]"] -- Can not index string.
> +---
> +- null
> +...
> +t["[4]['key']"]
> +---
> +- null
> +...
> +-- Not found 'a'. Return 'null' despite of syntax error on a
> +-- next position.
> +t["a.b.c d.e.f"]
> +---
> +- null
> +...
> +-- Sytax errors.
> +t[""]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
> +...
> +t["[2].[5]"]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
> +...
> +t["[-1]"]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +...
> +t[".."]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +...
> +t["[["]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +...
> +t["]]"]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
> +...
> +t["{"]
> +---
> +- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
> +...
> +s:drop()
> +---
> +...
> engine = nil
> ---
> ...
> diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
> index 6d7d254..ba3482d 100644
> --- a/test/engine/tuple.test.lua
> +++ b/test/engine/tuple.test.lua
> @@ -200,5 +200,64 @@ t1map = t1:tomap()
> maplen(t1map), t1map[1], t1map[2], t1map[3]
> s:drop()
>
> +format = {}
> +format[1] = {name = 'field1', type = 'unsigned'}
> +format[2] = {name = 'field2', type = 'array'}
> +format[3] = {name = 'field3', type = 'map'}
> +format[4] = {name = 'field4', type = 'string'}
> +s = box.schema.space.create('test', {format = format})
> +pk = s:create_index('pk')
> +field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
> +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2},
{c=3, d=4} }, [-1] = 200}
> +t = s:replace{1, field2, field3, "123456"}
> +t[1]
> +t[2]
> +t[3]
> +t[4]
> +t[2][1]
> +t["[2][1]"]
> +t[2][5]
> +t["[2][5]"]
> +t["[2][5][1]"]
> +t["[2][5][2]"]
> +t["[2][5][3]"]
> +t["[2][6].key"]
> +t["[2][6].value"]
> +t["[2][6]['key']"]
> +t["[2][6]['value']"]
> +t["[3].k3[2].c"]
> +t["[4]"]
> +t.field1
> +t.field2[5]
> +t[".field1"]
> +t["field1"]
> +t["[3][10]"]
> +
> +-- Not found.
> +t[0]
> +t["[0]"]
> +t["[1000]"]
> +t.field1000
> +t["not_found"]
> +t["[2][5][10]"]
> +t["[2][6].key100"]
> +t["[2][0]"] -- 0-based index in array.
> +t["[4][3]"] -- Can not index string.
> +t["[4]['key']"]
> +-- Not found 'a'. Return 'null' despite of syntax error on a
> +-- next position.
> +t["a.b.c d.e.f"]
> +
> +-- Sytax errors.
> +t[""]
> +t["[2].[5]"]
> +t["[-1]"]
> +t[".."]
> +t["[["]
> +t["]]"]
> +t["{"]
> +
> +s:drop()
> +
> engine = nil
> test_run = nil
>
More information about the Tarantool-patches
mailing list