From: Kirill Shcherbatov <kshcherbatov@tarantool.org> To: tarantool-patches@freelists.org Cc: "v.shpilevoy@tarantool.org" <v.shpilevoy@tarantool.org> Subject: [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Date: Wed, 4 Apr 2018 13:37:24 +0300 [thread overview] Message-ID: <01a0181a-19f9-c38c-94ea-65ab549a4517@tarantool.org> (raw) In-Reply-To: <cover.1522333265.git.kshcherbatov@tarantool.org> ICU Implementation From 8703e465382ba05817e7703550694c3790972e54 Mon Sep 17 00:00:00 2001 Message-Id: <8703e465382ba05817e7703550694c3790972e54.1522838002.git.kshcherbatov@tarantool.org> In-Reply-To: <cover.1522838002.git.kshcherbatov@tarantool.org> References: <cover.1522838002.git.kshcherbatov@tarantool.org> From: Kirill Shcherbatov <kshcherbatov@tarantool.org> Date: Wed, 4 Apr 2018 13:06:22 +0300 Subject: [PATCH v2 3/3] ICU Unicode parsing implementation --- src/box/lua/tuple.c | 31 +++++-- src/lib/json/path.c | 211 +++++++++++++++++++++++++++++++++------------ src/lib/json/path.h | 30 ++++--- test/engine/tuple.result | 43 +++++++-- test/engine/tuple.test.lua | 13 ++- test/unit/CMakeLists.txt | 2 +- test/unit/json_path.c | 4 +- 7 files changed, 247 insertions(+), 87 deletions(-) diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c index 99b9ff2..b89e1f9 100644 --- a/src/box/lua/tuple.c +++ b/src/box/lua/tuple.c @@ -413,7 +413,6 @@ lbox_tuple_transform(struct lua_State *L) static inline int tuple_field_go_to_index(const char **field, uint64_t index) { - assert(index >= 0); enum mp_type type = mp_typeof(**field); if (type == MP_ARRAY) { if (index == 0) @@ -497,6 +496,12 @@ tuple_field_go_to_key(const char **field, const char *key, int len) static int lbox_tuple_field_by_path(struct lua_State *L) { + int err_pos = 0; + struct json_path_parser parser; + /* Need uninitialized structure to + * json_path_parser_deinit on lua_isnumber */ + memset(&parser, 0, sizeof(parser)); + const char *path = NULL; const char *field; struct tuple *tuple = luaT_istuple(L, 1); /* Is checked in Lua wrapper. */ @@ -506,6 +511,18 @@ lbox_tuple_field_by_path(struct lua_State *L) index -= TUPLE_INDEX_BASE; if (index < 0) { not_found: + if (!path) + goto exit_not_found; + uint32_t path_len = strlen(path); + uint32_t path_hash = lua_hash(path, path_len); + field = tuple_field_by_name(tuple, path, + path_len, path_hash); + if (field) + goto push_value; + if (err_pos || path_len == 0) + luaL_error(L, "Error in path on position %d", err_pos); +exit_not_found: + json_path_parser_deinit(&parser); lua_pushinteger(L, -1); lua_pushnil(L); return 2; @@ -514,19 +531,21 @@ not_found: if (field == NULL) goto not_found; push_value: + json_path_parser_deinit(&parser); lua_pushinteger(L, 0); luamp_decode(L, luaL_msgpack_default, &field); return 2; } assert(lua_isstring(L, 2)); size_t path_len; - const char *path = lua_tolstring(L, 2, &path_len); - struct json_path_parser parser; + path = lua_tolstring(L, 2, &path_len); struct json_path_node node; - json_path_parser_create(&parser, path, path_len); + json_path_parser_init(&parser, path, path_len); int rc = json_path_next(&parser, &node); - if (rc != 0 || node.type == JSON_PATH_END) - luaL_error(L, "Error in path on position %d", rc); + if (rc != 0 || node.type == JSON_PATH_END) { + err_pos = rc; + goto not_found; + } if (node.type == JSON_PATH_NUM) { int index = node.num; if (index == 0) diff --git a/src/lib/json/path.c b/src/lib/json/path.c index 4a6174e..4aadb3a 100644 --- a/src/lib/json/path.c +++ b/src/lib/json/path.c @@ -31,8 +31,11 @@ #include "path.h" #include <ctype.h> +#include <unicode/uchar.h> #include "trivia/util.h" +#define REPLACEMENT_CHARACTER (0xFFFD) + /** Same as strtoull(), but with limited length. */ static inline uint64_t strntoull(const char *src, int len) { @@ -45,6 +48,51 @@ strntoull(const char *src, int len) { } /** + * Parse string and update parser's state. + * @param[out] parser JSON path parser. Upates pos, signs_read. + * @param[out] UChar32 to store result. + * + * @retval 1 Success. + * @retval 0 End of string. + * @retval -1 Parse error. + */ +static inline int +parser_read_sign(struct json_path_parser *parser, UChar32 *out) +{ + int rc; + UErrorCode status = U_ZERO_ERROR; + if (parser->pos == parser->end) + return 0; + *out = ucnv_getNextUChar(parser->utf8conv, &parser->pos, parser->end, &status); + parser->invalid_sign_off += (rc = U_SUCCESS(status)); + return rc ? 1 : -1; +} + +/** + * Parse string and update parser's state. + * @param[out] parser JSON path parser. Upates pos, signs_read. + * @param old parser read offset. + * @param signs to drop. + */ +static inline void +parser_reset_pos(struct json_path_parser *parser, const char *old_pos, int signs) +{ + parser->pos = old_pos; + parser->invalid_sign_off -= signs; +} + +static inline bool +string_valid_sign(UChar32 c) +{ + int8_t type = u_charType(c); + return !(c == REPLACEMENT_CHARACTER || + type == U_UNASSIGNED || + type == U_LINE_SEPARATOR || + type == U_CONTROL_CHAR || + type == U_PARAGRAPH_SEPARATOR); +} + +/** * Parse string identifier in quotes. Parser either stops right * after the closing quote, or returns an error position. * @param parser JSON path parser. @@ -56,22 +104,24 @@ strntoull(const char *src, int len) { static inline int json_parse_string(struct json_path_parser *parser, struct json_path_node *node) { - const char *end = parser->src + parser->src_len; - const char *pos = parser->pos; - assert(pos < end); - char quote_type = *pos; - assert(quote_type == '\'' || quote_type == '"'); - /* Skip first quote. */ - int len = 0; - ++pos; - const char *str = pos; - for (char c = *pos; pos < end && quote_type != c; c = *++pos) - ++len; - /* A string must be terminated with quote. */ - if (*pos != quote_type || len == 0) - return pos - parser->src + 1; - /* Skip the closing quote. */ - parser->pos = pos + 1; + assert(parser->pos < parser->end); + UChar32 quote_type; + (void)parser_read_sign(parser, "e_type); + assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"'); + const char *str = parser->pos; + UChar32 c = 0; + int rc = 0; + + while (((rc = parser_read_sign(parser, &c)) > 0) + && string_valid_sign(c) && c != quote_type); + int len = (int)(parser->pos - str - 1); + if (rc < 0 || len == 0) + return -1; + if (c != (UChar32)quote_type) { + parser->invalid_sign_off++; + return -1; + } + node->type = JSON_PATH_STR; node->str = str; node->len = len; @@ -81,7 +131,7 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node) /** * Parse digit sequence into integer until non-digit is met. * Parser stops right after the last digit. - * @param parser JSON parser. + * @param[out] parser JSON parser. Updates signs_read field. * @param[out] node JSON node to store result. * * @retval 0 Success. @@ -90,27 +140,40 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node) static inline int json_parse_integer(struct json_path_parser *parser, struct json_path_node *node) { - const char *end = parser->src + parser->src_len; - const char *pos = parser->pos; - assert(pos < end); - const char *str = pos; - int len = 0; - for (char c = *pos; pos < end && isdigit(c); c = *++pos) - ++len; - if (len == 0) - return pos - parser->src + 1; - parser->pos = pos; + assert(parser->pos < parser->end); + const char *str = parser->pos; + const char *last_pos = parser->pos; + int len = 0, rc = 0; + UChar32 c = 0; + + while (((rc = parser_read_sign(parser, &c)) > 0) && u_isdigit(c)) { + last_pos = parser->pos; + len++; + } + if (rc > 0 && len > 0 && !u_isdigit(c)) + parser_reset_pos(parser, last_pos, 1); + if (rc < 0 || len == 0) + return -1; + node->type = JSON_PATH_NUM; node->num = strntoull(str, len); return 0; } +static inline bool +identifier_valid_sign(UChar32 c) +{ + return u_isUAlphabetic(c) + || c == (UChar32)'_' + || u_isdigit(c); +} + /** * Parse identifier out of quotes. It can contain only alphas, * digits and underscores. And can not contain digit at the first * position. Parser is stoped right after the last non-digit, * non-alpha and non-underscore symbol. - * @param parser JSON parser. + * @param[out] parser JSON parser. Updates signs_read field. * @param[out] node JSON node to store result. * * @retval 0 Success. @@ -120,68 +183,102 @@ static inline int json_parse_identifier(struct json_path_parser *parser, struct json_path_node *node) { - const char *end = parser->src + parser->src_len; - const char *pos = parser->pos; - assert(pos < end); - const char *str = pos; - char c = *pos; + assert(parser->pos < parser->end); + const char *str = parser->pos; + UChar32 c; + int rc = 0; + if (parser_read_sign(parser, &c) < 0) + return -1; /* First symbol can not be digit. */ - if (!isalpha(c) && c != '_') - return pos - parser->src + 1; - int len = 1; - for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c)); - c = *++pos) - ++len; - assert(len > 0); - parser->pos = pos; + if (!u_isalpha(c) && c != (UChar32)'_') + return -1; + + const char *last_pos = parser->pos; + while ((rc = parser_read_sign(parser, &c)) > 0 && identifier_valid_sign(c)) + last_pos = parser->pos; + if (rc > 0 && !identifier_valid_sign(c)) + parser_reset_pos(parser, last_pos, 1); + if (rc < 0) + return -1; + node->type = JSON_PATH_STR; node->str = str; - node->len = len; + node->len = (int)(parser->pos - str); return 0; } int +json_path_parser_init(struct json_path_parser *parser, const char *src, + int src_len) +{ + UErrorCode status = U_ZERO_ERROR ; + parser->utf8conv = ucnv_open("utf8", &status); + if (U_FAILURE(status)) + return -1; + assert(parser->utf8conv); + parser->src = src; + parser->end = src + src_len; + parser->pos = src; + parser->invalid_sign_off = 0; + return 0; +} + +void +json_path_parser_deinit(struct json_path_parser *parser) +{ + if (parser->utf8conv) + ucnv_close(parser->utf8conv); +} + +static inline int +error_sign_offset(struct json_path_parser *parser) +{ + return parser->invalid_sign_off; +} + +int json_path_next(struct json_path_parser *parser, struct json_path_node *node) { - const char *end = parser->src + parser->src_len; + assert(parser->utf8conv); + const char *end = parser->end; if (end == parser->pos) { node->type = JSON_PATH_END; return 0; } - char c = *parser->pos; + UChar32 c = 0; + const char *last_pos = parser->pos; + if (parser_read_sign(parser, &c) < 0) + return error_sign_offset(parser); int rc; switch(c) { - case '[': - ++parser->pos; + case (UChar32)'[': /* Error for []. */ if (parser->pos == end) - return parser->pos - parser->src + 1; + return parser->invalid_sign_off; c = *parser->pos; if (c == '"' || c == '\'') rc = json_parse_string(parser, node); else rc = json_parse_integer(parser, node); if (rc != 0) - return rc; + return parser->invalid_sign_off; /* * Expression, started from [ must be finished * with ] regardless of its type. */ if (parser->pos == end || *parser->pos != ']') - return parser->pos - parser->src + 1; + return parser->invalid_sign_off + 1; /* Skip ]. */ - ++parser->pos; + (void)parser_read_sign(parser, &c); break; - case '.': - /* Skip dot. */ - ++parser->pos; - if (parser->pos == end) - return parser->pos - parser->src + 1; - FALLTHROUGH default: + if (c != (UChar32)'.') + parser_reset_pos(parser, last_pos, 1); + else if (parser->pos == end) + return parser->invalid_sign_off + 1; rc = json_parse_identifier(parser, node); if (rc != 0) - return rc; + return parser->invalid_sign_off; break; } return 0; diff --git a/src/lib/json/path.h b/src/lib/json/path.h index 6e8db4c..0ff68c4 100644 --- a/src/lib/json/path.h +++ b/src/lib/json/path.h @@ -33,6 +33,9 @@ #include <stdbool.h> #include <stdint.h> +#include <unicode/ucnv_err.h> +#include <unicode/ucnv.h> +#include <assert.h> #ifdef __cplusplus extern "C" { @@ -45,10 +48,12 @@ extern "C" { struct json_path_parser { /** Source string. */ const char *src; - /** Length of src. */ - int src_len; + /** End of string. */ + const char *end; /** Current parser's position. */ const char *pos; + int invalid_sign_off; + UConverter* utf8conv; }; enum json_path_type { @@ -78,19 +83,22 @@ struct json_path_node { }; /** - * Create @a parser. + * Init @a parser. * @param[out] parser Parser to create. * @param src Source string. * @param src_len Length of @a src. + * @retval 0 Success. + * @retval -1 Init error. */ -static inline void -json_path_parser_create(struct json_path_parser *parser, const char *src, - int src_len) -{ - parser->src = src; - parser->src_len = src_len; - parser->pos = src; -} +int +json_path_parser_init(struct json_path_parser *parser, const char *src, + int src_len); +/** + * Deinit @a parser. + * @param[out] parser instance to deinit. + */ +void +json_path_parser_deinit(struct json_path_parser *parser); /** * Get a next path node. diff --git a/test/engine/tuple.result b/test/engine/tuple.result index 2d7367a..6b597d6 100644 --- a/test/engine/tuple.result +++ b/test/engine/tuple.result @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'} format[3] = {name = 'field3', type = 'map'} --- ... -format[4] = {name = 'field4', type = 'string'} +format[4] = {name = 'field4', type = 'string' } +--- +... +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'} --- ... s = box.schema.space.create('test', {format = format}) @@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = format}) pk = s:create_index('pk') --- ... -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}} --- ... field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} --- ... -t = s:replace{1, field2, field3, "123456"} +t = s:replace{1, field2, field3, "123456", "yes, this"} --- ... t[1] @@ -626,7 +629,7 @@ t[1] ... t[2] --- -- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}] +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key': 'value1', 'value': 'key1'}] ... t[3] --- @@ -667,19 +670,43 @@ t["[2][5][3]"] ... t["[2][6].key"] --- -- key1 +- value1 ... t["[2][6].value"] --- -- value1 +- key1 ... t["[2][6]['key']"] --- -- key1 +- value1 ... t["[2][6]['value']"] --- -- value1 +- key1 +... +t[2][6].привет中国world.中国 +--- +- привет +... +t["[2][6].привет中国world"].中国 +--- +- привет +... +t["[2][6].привет中国world.中国"] +--- +- привет +... +t["[2][6]['привет中国world']"]["中国"] +--- +- привет +... +t["[2][6]['привет中国world']['中国']"] +--- +- привет +... +t["[2][6]['привет中国world']['中国a']"] +--- +- yes, this ... t["[3].k3[2].c"] --- diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua index ba3482d..90da8b2 100644 --- a/test/engine/tuple.test.lua +++ b/test/engine/tuple.test.lua @@ -204,12 +204,13 @@ format = {} format[1] = {name = 'field1', type = 'unsigned'} format[2] = {name = 'field2', type = 'array'} format[3] = {name = 'field3', type = 'map'} -format[4] = {name = 'field4', type = 'string'} +format[4] = {name = 'field4', type = 'string' } +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'} s = box.schema.space.create('test', {format = format}) pk = s:create_index('pk') -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}} field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} -t = s:replace{1, field2, field3, "123456"} +t = s:replace{1, field2, field3, "123456", "yes, this"} t[1] t[2] t[3] @@ -225,6 +226,12 @@ t["[2][6].key"] t["[2][6].value"] t["[2][6]['key']"] t["[2][6]['value']"] +t[2][6].привет中国world.中国 +t["[2][6].привет中国world"].中国 +t["[2][6].привет中国world.中国"] +t["[2][6]['привет中国world']"]["中国"] +t["[2][6]['привет中国world']['中国']"] +t["[2][6]['привет中国world']['中国a']"] t["[3].k3[2].c"] t["[4]"] t.field1 diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index fe8b2d2..667194c 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -130,7 +130,7 @@ add_executable(csv.test csv.c) target_link_libraries(csv.test csv) add_executable(json_path.test json_path.c) -target_link_libraries(json_path.test json_path unit) +target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES}) add_executable(rmean.test rmean.cc) target_link_libraries(rmean.test stat unit) diff --git a/test/unit/json_path.c b/test/unit/json_path.c index 599658b..b62afd2 100644 --- a/test/unit/json_path.c +++ b/test/unit/json_path.c @@ -6,7 +6,7 @@ #define reset_to_new_path(value) \ path = value; \ len = strlen(value); \ - json_path_parser_create(&parser, path, len); + (void)json_path_parser_init(&parser, path, len); #define is_next_index(value_len, value) \ path = parser.pos; \ @@ -30,6 +30,7 @@ test_basic() const char *path; int len; struct json_path_parser parser; + memset(&parser, 0, sizeof(parser)); struct json_path_node node; reset_to_new_path("[0].field1.field2['field3'][5]"); @@ -89,6 +90,7 @@ test_errors() const char *path; int len; struct json_path_parser parser; + memset(&parser, 0, sizeof(parser)); const struct path_and_errpos errors[] = { /* Double [[. */ {"[[", 2}, -- 2.7.4 On 29.03.2018 17:22, Kirill Shcherbatov wrote: > From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> > > In progress ... > > Closes #1285 > --- > src/box/CMakeLists.txt | 2 +- > src/box/lua/tuple.c | 176 +++++++++++++++++++++++++++++++++++----- > src/box/lua/tuple.lua | 45 +++-------- > test/engine/tuple.result | 198 +++++++++++++++++++++++++++++++++++++++++++++ > test/engine/tuple.test.lua | 59 ++++++++++++++ > 5 files changed, 428 insertions(+), 52 deletions(-) > > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > index e420fe3..add0ff9 100644 > --- a/src/box/CMakeLists.txt > +++ b/src/box/CMakeLists.txt > @@ -130,5 +130,5 @@ add_library(box STATIC > ${bin_sources}) > > target_link_libraries(box box_error tuple stat xrow xlog vclock crc32 scramble > - ${common_libraries}) > + json_path ${common_libraries}) > add_dependencies(box build_bundled_libs) > diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c > index 7ca4299..99b9ff2 100644 > --- a/src/box/lua/tuple.c > +++ b/src/box/lua/tuple.c > @@ -41,6 +41,7 @@ > #include "box/tuple_convert.h" > #include "box/errcode.h" > #include "box/memtx_tuple.h" > +#include "json/path.h" > > /** {{{ box.tuple Lua library > * > @@ -402,36 +403,175 @@ lbox_tuple_transform(struct lua_State *L) > } > > /** > - * Find a tuple field using its name. > + * Propagate @a field to MessagePack(field)[index]. > + * @param[in][out] field Field to propagate. > + * @param index 1-based index to propagate to. > + * > + * @retval 0 Success, the index was found. > + * @retval -1 Not found. > + */ > +static inline int > +tuple_field_go_to_index(const char **field, uint64_t index) > +{ > + assert(index >= 0); > + enum mp_type type = mp_typeof(**field); > + if (type == MP_ARRAY) { > + if (index == 0) > + return -1; > + /* Make index 0-based. */ > + index -= TUPLE_INDEX_BASE; > + uint32_t count = mp_decode_array(field); > + if (index >= count) > + return -1; > + for (; index > 0; --index) > + mp_next(field); > + return 0; > + } else if (type == MP_MAP) { > + uint64_t count = mp_decode_map(field); > + for (; count > 0; --count) { > + type = mp_typeof(**field); > + if (type == MP_UINT) { > + uint64_t value = mp_decode_uint(field); > + if (value == index) > + return 0; > + } else if (type == MP_INT) { > + int64_t value = mp_decode_int(field); > + if (value >= 0 && (uint64_t)value == index) > + return 0; > + } else { > + /* Skip key. */ > + mp_next(field); > + } > + /* Skip value. */ > + mp_next(field); > + } > + } > + return -1; > +} > + > +/** > + * Propagate @a field to MessagePack(field)[key]. > + * @param[in][out] field Field to propagate. > + * @param key Key to propagate to. > + * @param len Length of @a key. > + * > + * @retval 0 Success, the index was found. > + * @retval -1 Not found. > + */ > +static inline int > +tuple_field_go_to_key(const char **field, const char *key, int len) > +{ > + enum mp_type type = mp_typeof(**field); > + if (type != MP_MAP) > + return -1; > + uint64_t count = mp_decode_map(field); > + for (; count > 0; --count) { > + type = mp_typeof(**field); > + if (type == MP_STR) { > + uint32_t value_len; > + const char *value = mp_decode_str(field, &value_len); > + if (value_len == (uint)len && > + memcmp(value, key, len) == 0) > + return 0; > + } else { > + /* Skip key. */ > + mp_next(field); > + } > + /* Skip value. */ > + mp_next(field); > + } > + return -1; > +} > + > +/** > + * Find a tuple field by JSON path. > * @param L Lua state. > - * @param tuple 1-th argument on lua stack, tuple to get field > + * @param tuple 1-th argument on a lua stack, tuple to get field > * from. > - * @param field_name 2-th argument on lua stack, field name to > - * get. > + * @param path 2-th argument on lua stack. Can be field name, > + * JSON path to a field or a field number. > * > * @retval If a field was not found, return -1 and nil to lua else > * return 0 and decoded field. > */ > static int > -lbox_tuple_field_by_name(struct lua_State *L) > +lbox_tuple_field_by_path(struct lua_State *L) > { > + const char *field; > struct tuple *tuple = luaT_istuple(L, 1); > /* Is checked in Lua wrapper. */ > assert(tuple != NULL); > - assert(lua_isstring(L, 2)); > - size_t name_len; > - const char *name = lua_tolstring(L, 2, &name_len); > - uint32_t name_hash = lua_hashstring(L, 2); > - const char *field = > - tuple_field_by_name(tuple, name, name_len, name_hash); > - if (field == NULL) { > - lua_pushinteger(L, -1); > - lua_pushnil(L); > + if (lua_isnumber(L, 2)) { > + int index = lua_tointeger(L, 2); > + index -= TUPLE_INDEX_BASE; > + if (index < 0) { > +not_found: > + lua_pushinteger(L, -1); > + lua_pushnil(L); > + return 2; > + } > + field = tuple_field(tuple, index); > + if (field == NULL) > + goto not_found; > +push_value: > + lua_pushinteger(L, 0); > + luamp_decode(L, luaL_msgpack_default, &field); > return 2; > } > - lua_pushinteger(L, 0); > - luamp_decode(L, luaL_msgpack_default, &field); > - return 2; > + assert(lua_isstring(L, 2)); > + size_t path_len; > + const char *path = lua_tolstring(L, 2, &path_len); > + struct json_path_parser parser; > + struct json_path_node node; > + json_path_parser_create(&parser, path, path_len); > + int rc = json_path_next(&parser, &node); > + if (rc != 0 || node.type == JSON_PATH_END) > + luaL_error(L, "Error in path on position %d", rc); > + if (node.type == JSON_PATH_NUM) { > + int index = node.num; > + if (index == 0) > + goto not_found; > + index -= TUPLE_INDEX_BASE; > + field = tuple_field(tuple, index); > + if (field == NULL) > + goto not_found; > + } else { > + assert(node.type == JSON_PATH_STR); > + /* First part of a path is a field name. */ > + const char *name = node.str; > + uint32_t name_len = node.len; > + uint32_t name_hash; > + if (path_len == name_len) { > + name_hash = lua_hashstring(L, 2); > + } else { > + /* > + * If a string is "field....", then its > + * precalculated juajit hash can not be > + * used. A tuple dictionary hashes only > + * name, not path. > + */ > + name_hash = lua_hash(name, name_len); > + } > + field = tuple_field_by_name(tuple, name, name_len, name_hash); > + if (field == NULL) > + goto not_found; > + } > + while ((rc = json_path_next(&parser, &node)) == 0 && > + node.type != JSON_PATH_END) { > + if (node.type == JSON_PATH_NUM) { > + rc = tuple_field_go_to_index(&field, node.num); > + } else { > + assert(node.type == JSON_PATH_STR); > + rc = tuple_field_go_to_key(&field, node.str, node.len); > + } > + if (rc != 0) > + goto not_found; > + } > + if (rc == 0) > + goto push_value; > + luaL_error(L, "Error in path on position %d", rc); > + unreachable(); > + goto not_found; > } > > static int > @@ -470,8 +610,8 @@ static const struct luaL_Reg lbox_tuple_meta[] = { > {"tostring", lbox_tuple_to_string}, > {"slice", lbox_tuple_slice}, > {"transform", lbox_tuple_transform}, > - {"tuple_field_by_name", lbox_tuple_field_by_name}, > {"tuple_to_map", lbox_tuple_to_map}, > + {"tuple_field_by_path", lbox_tuple_field_by_path}, > {NULL, NULL} > }; > > diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua > index 001971a..b51b4df 100644 > --- a/src/box/lua/tuple.lua > +++ b/src/box/lua/tuple.lua > @@ -9,16 +9,9 @@ local internal = require('box.internal') > > ffi.cdef[[ > /** \cond public */ > -typedef struct tuple_format box_tuple_format_t; > - > -box_tuple_format_t * > -box_tuple_format_default(void); > > typedef struct tuple box_tuple_t; > > -box_tuple_t * > -box_tuple_new(box_tuple_format_t *format, const char *data, const char *end); > - > int > box_tuple_ref(box_tuple_t *tuple); > > @@ -34,9 +27,6 @@ box_tuple_bsize(const box_tuple_t *tuple); > ssize_t > box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size); > > -box_tuple_format_t * > -box_tuple_format(const box_tuple_t *tuple); > - > const char * > box_tuple_field(const box_tuple_t *tuple, uint32_t i); > > @@ -278,9 +268,9 @@ end > > msgpackffi.on_encode(const_tuple_ref_t, tuple_to_msgpack) > > -local function tuple_field_by_name(tuple, name) > +local function tuple_field_by_path(tuple, path) > tuple_check(tuple, "tuple['field_name']"); > - return internal.tuple.tuple_field_by_name(tuple, name) > + return internal.tuple.tuple_field_by_path(tuple, path) > end > > local methods = { > @@ -306,33 +296,22 @@ end > > methods["__serialize"] = tuple_totable -- encode hook for msgpack/yaml/json > > -local tuple_field = function(tuple, field_n) > - local field = builtin.box_tuple_field(tuple, field_n - 1) > - if field == nil then > - return nil > - end > - -- Use () to shrink stack to the first return value > - return (msgpackffi.decode_unchecked(field)) > -end > - > - > ffi.metatype(tuple_t, { > __len = function(tuple) > return builtin.box_tuple_field_count(tuple) > end; > __tostring = internal.tuple.tostring; > __index = function(tuple, key) > - if type(key) == "number" then > - return tuple_field(tuple, key) > - elseif type(key) == "string" then > - -- Try to get a field with a name = key. If it was not > - -- found (rc ~= 0) then return a method from the > - -- vtable. If a collision occurred, then fields have > - -- higher priority. For example, if a tuple T has a > - -- field with name 'bsize', then T.bsize returns field > - -- value, not tuple_bsize function. To access hidden > - -- methods use 'box.tuple.<method_name>(T, [args...])'. > - local rc, field = tuple_field_by_name(tuple, key) > + if type(key) == "string" or type(key) == "number" then > + -- Try to get a field by json path or by [index]. If > + -- it was not found (rc ~= 0) then return a method > + -- from the vtable. If a collision occurred, then > + -- fields have higher priority. For example, if a > + -- tuple T has a field with name 'bsize', then T.bsize > + -- returns field value, not tuple_bsize function. To > + -- access hidden methods use > + -- 'box.tuple.<method_name>(T, [args...])'. > + local rc, field = tuple_field_by_path(tuple, key) > if rc == 0 then > return field > end > diff --git a/test/engine/tuple.result b/test/engine/tuple.result > index b3b23b2..2d7367a 100644 > --- a/test/engine/tuple.result > +++ b/test/engine/tuple.result > @@ -590,6 +590,204 @@ maplen(t1map), t1map[1], t1map[2], t1map[3] > s:drop() > --- > ... > +format = {} > +--- > +... > +format[1] = {name = 'field1', type = 'unsigned'} > +--- > +... > +format[2] = {name = 'field2', type = 'array'} > +--- > +... > +format[3] = {name = 'field3', type = 'map'} > +--- > +... > +format[4] = {name = 'field4', type = 'string'} > +--- > +... > +s = box.schema.space.create('test', {format = format}) > +--- > +... > +pk = s:create_index('pk') > +--- > +... > +field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} > +--- > +... > +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} > +--- > +... > +t = s:replace{1, field2, field3, "123456"} > +--- > +... > +t[1] > +--- > +- 1 > +... > +t[2] > +--- > +- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}] > +... > +t[3] > +--- > +- {'k1': 100, 'k3': [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}], -1: 200, 10: 100, 'k2': [ > + 1, 2, 3]} > +... > +t[4] > +--- > +- '123456' > +... > +t[2][1] > +--- > +- 1 > +... > +t["[2][1]"] > +--- > +- 1 > +... > +t[2][5] > +--- > +- [5, 6, 7] > +... > +t["[2][5]"] > +--- > +- [5, 6, 7] > +... > +t["[2][5][1]"] > +--- > +- 5 > +... > +t["[2][5][2]"] > +--- > +- 6 > +... > +t["[2][5][3]"] > +--- > +- 7 > +... > +t["[2][6].key"] > +--- > +- key1 > +... > +t["[2][6].value"] > +--- > +- value1 > +... > +t["[2][6]['key']"] > +--- > +- key1 > +... > +t["[2][6]['value']"] > +--- > +- value1 > +... > +t["[3].k3[2].c"] > +--- > +- 3 > +... > +t["[4]"] > +--- > +- '123456' > +... > +t.field1 > +--- > +- 1 > +... > +t.field2[5] > +--- > +- [5, 6, 7] > +... > +t[".field1"] > +--- > +- 1 > +... > +t["field1"] > +--- > +- 1 > +... > +t["[3][10]"] > +--- > +- 100 > +... > +-- Not found. > +t[0] > +--- > +- null > +... > +t["[0]"] > +--- > +- null > +... > +t["[1000]"] > +--- > +- null > +... > +t.field1000 > +--- > +- null > +... > +t["not_found"] > +--- > +- null > +... > +t["[2][5][10]"] > +--- > +- null > +... > +t["[2][6].key100"] > +--- > +- null > +... > +t["[2][0]"] -- 0-based index in array. > +--- > +- null > +... > +t["[4][3]"] -- Can not index string. > +--- > +- null > +... > +t["[4]['key']"] > +--- > +- null > +... > +-- Not found 'a'. Return 'null' despite of syntax error on a > +-- next position. > +t["a.b.c d.e.f"] > +--- > +- null > +... > +-- Sytax errors. > +t[""] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 0' > +... > +t["[2].[5]"] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 5' > +... > +t["[-1]"] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 2' > +... > +t[".."] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 2' > +... > +t["[["] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 2' > +... > +t["]]"] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 1' > +... > +t["{"] > +--- > +- error: 'builtin/box/tuple.lua:314: Error in path on position 1' > +... > +s:drop() > +--- > +... > engine = nil > --- > ... > diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua > index 6d7d254..ba3482d 100644 > --- a/test/engine/tuple.test.lua > +++ b/test/engine/tuple.test.lua > @@ -200,5 +200,64 @@ t1map = t1:tomap() > maplen(t1map), t1map[1], t1map[2], t1map[3] > s:drop() > > +format = {} > +format[1] = {name = 'field1', type = 'unsigned'} > +format[2] = {name = 'field2', type = 'array'} > +format[3] = {name = 'field3', type = 'map'} > +format[4] = {name = 'field4', type = 'string'} > +s = box.schema.space.create('test', {format = format}) > +pk = s:create_index('pk') > +field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} > +field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200} > +t = s:replace{1, field2, field3, "123456"} > +t[1] > +t[2] > +t[3] > +t[4] > +t[2][1] > +t["[2][1]"] > +t[2][5] > +t["[2][5]"] > +t["[2][5][1]"] > +t["[2][5][2]"] > +t["[2][5][3]"] > +t["[2][6].key"] > +t["[2][6].value"] > +t["[2][6]['key']"] > +t["[2][6]['value']"] > +t["[3].k3[2].c"] > +t["[4]"] > +t.field1 > +t.field2[5] > +t[".field1"] > +t["field1"] > +t["[3][10]"] > + > +-- Not found. > +t[0] > +t["[0]"] > +t["[1000]"] > +t.field1000 > +t["not_found"] > +t["[2][5][10]"] > +t["[2][6].key100"] > +t["[2][0]"] -- 0-based index in array. > +t["[4][3]"] -- Can not index string. > +t["[4]['key']"] > +-- Not found 'a'. Return 'null' despite of syntax error on a > +-- next position. > +t["a.b.c d.e.f"] > + > +-- Sytax errors. > +t[""] > +t["[2].[5]"] > +t["[-1]"] > +t[".."] > +t["[["] > +t["]]"] > +t["{"] > + > +s:drop() > + > engine = nil > test_run = nil >
next prev parent reply other threads:[~2018-04-04 10:37 UTC|newest] Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov 2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov 2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields Kirill Shcherbatov 2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov 2018-03-29 18:04 ` [tarantool-patches] " Kirill Shcherbatov 2018-03-30 10:24 ` v.shpilevoy 2018-03-30 10:25 ` v.shpilevoy 2018-04-02 19:19 ` Kirill Shcherbatov 2018-04-03 10:20 ` Vladislav Shpilevoy 2018-04-05 14:09 ` [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov 2018-04-05 18:00 ` [tarantool-patches] " Kirill Shcherbatov 2018-04-05 23:32 ` Vladislav Shpilevoy 2018-04-04 10:37 ` Kirill Shcherbatov [this message] 2018-04-04 11:30 ` [tarantool-patches] Re: [PATCH v2 3/3] Multibyte characters support ICU Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=01a0181a-19f9-c38c-94ea-65ab549a4517@tarantool.org \ --to=kshcherbatov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox