From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org,
Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.
Date: Fri, 6 Apr 2018 02:32:35 +0300 [thread overview]
Message-ID: <da8d7619-210d-a285-ebe1-6afc47df6c2e@tarantool.org> (raw)
In-Reply-To: <75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org>
See my review fixes on branch in a separate commit. Please, bench the
branch with my review fixes, and without.
05.04.2018 21:00, Kirill Shcherbatov пишет:
>
> From 8d97afd7ac7a90e5dfb1e756fd9d389bafdadd56 Mon Sep 17 00:00:00 2001
> From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
> Date: Thu, 5 Apr 2018 20:35:25 +0300
> Subject: [PATCH] Accounted Vlad's comments
>
> ---
> src/box/CMakeLists.txt | 2 +-
> src/box/lua/tuple.c | 23 +++++++++++------------
> src/box/lua/tuple.lua | 11 +++++------
> src/box/tuple.h | 20 ++++++++++++++++++++
> src/box/tuple_format.c | 13 ++++++-------
> src/box/tuple_format.h | 4 +++-
> src/lib/json/path.c | 30 ++++++++++++++----------------
> src/lib/json/path.h | 1 +
> 8 files changed, 61 insertions(+), 43 deletions(-)
>
> >Четверг, 5 апреля 2018, 17:48 +03:00 от Vladislav Shpilevoy
> <v.shpilevoy@tarantool.org>:
> >
> >Hello. See 18 comments below.
> >
> >05.04.2018 17:06, Kirill Shcherbatov пишет:
> >> This is an automated email from the git hooks/post-receive script.
> >>
> >> kshcherbatov pushed a commit to branch gh-1285-tuple-field-by-json-icu
> >> in repository tarantool.
> >>
> >> commit ab52c7a18441037c19d0b2759558a7eb93aabb0d
> >> Author: Kirill Shcherbatov < kshcherbatov@tarantool.org >
> >> AuthorDate: Thu Apr 5 17:04:15 2018 +0300
> >>
> >> ICU Unicode support for JSON parser.
> >> ---
> >> src/box/CMakeLists.txt | 2 +-
> >> src/box/lua/tuple.c | 172 ++++++----------------------------------
> >> src/box/lua/tuple.lua | 7 +-
> >> src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++
> >> src/box/tuple_format.h | 15 ++++
> >> src/lib/json/path.c | 191
> ++++++++++++++++++++++++++++++---------------
> >> src/lib/json/path.h | 25 ++++--
> >> test/engine/tuple.result | 57 ++++++++++----
> >> test/engine/tuple.test.lua | 13 ++-
> >> test/unit/CMakeLists.txt | 2 +-
> >> test/unit/json_path.c | 2 +-
> >> 11 files changed, 398 insertions(+), 243 deletions(-)
> >>
> >> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> >> index add0ff9..0f5b197 100644
> >> --- a/src/box/CMakeLists.txt
> >> +++ b/src/box/CMakeLists.txt
> >> @@ -44,7 +44,7 @@ add_library(tuple STATIC
> >> field_def.c
> >> opt_def.c
> >> )
> >> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES}
> ${ICU_LIBRARIES} misc bit)
> >> +target_link_libraries(tuple json_path box_error core
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
> >1. Remove ${common_libraries} please, the tuple lib must not depend on
> >Lua. See how to do it in the next comments.
>
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index e5a0c21..88c2c60 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -45,7 +45,7 @@ add_library(tuple STATIC
> field_def.c
> opt_def.c
> )
> -target_link_libraries(tuple json_path box_error core
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
> +target_link_libraries(tuple json_path box_error core
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
> add_library(xlog STATIC xlog.c)
> target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
>
> >> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field,
> const char *key, int len)
> >> static int
> >> lbox_tuple_field_by_path(struct lua_State *L)
> >> {
> >> -const char *field;
> >> struct tuple *tuple = luaT_istuple(L, 1);
> >> /* Is checked in Lua wrapper. */
> >> assert(tuple != NULL);
> >> -if (lua_isnumber(L, 2)) {
> >> +const char *field = NULL;
> >> +if (!lua_isnumber(L, 2)) {
> >> +/* string */
> >> +field = tuple_field_raw_by_path(tuple_format(tuple),
> tuple_data(tuple),
> >> + tuple_field_map(tuple),
> >> + lua_tostring(L, 2), (int)lua_strlen(L, 2),
> >> + lua_hashstring(L, 2));
> >2. Add a tuple_field_by_path wrapper for it, as I told you verbally.
> >Moreover
> >this code is out of 80 symbols width.
> >> +/* error code or message */
> >> +struct error *e = diag_last_error(diag_get());
> >3. Please, move all of this error checking into !lua_isnumber branch.
> >Tuple_field never returns
> >error, because sometimes absent field is ok.
> >> +if (field == NULL && e != NULL)
> >> +lua_pushstring(L, e->errmsg);
> >> +else
> >> +lua_pushinteger(L, -1*(field == NULL));
> >> +if (field)
> >4. Use explicit != NULL, please.
>
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 3b89cdd..ab62663 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -422,28 +422,27 @@ lbox_tuple_field_by_path(struct lua_State *L)
> const char *field = NULL;
> if (!lua_isnumber(L, 2)) {
> /* string */
> - field = tuple_field_raw_by_path(tuple_format(tuple),
> tuple_data(tuple),
> - tuple_field_map(tuple),
> - lua_tostring(L, 2), (int)lua_strlen(L, 2),
> - lua_hashstring(L, 2));
> + field = tuple_field_by_path(tuple, lua_tostring(L, 2),
> + (int)lua_strlen(L, 2),
> + lua_hashstring(L, 2));
> + struct error *e = diag_last_error(diag_get());
> + if (field == NULL && e) {
> + lua_pushnil(L);
> + lua_pushstring(L, e->errmsg);
> + return 2;
> + }
> } else {
> int index = lua_tointeger(L, 2);
> index -= TUPLE_INDEX_BASE;
> if (index >= 0)
> field = tuple_field(tuple, index);
> }
> - /* error code or message */
> - struct error *e = diag_last_error(diag_get());
> - if (field == NULL && e != NULL)
> - lua_pushstring(L, e->errmsg);
> - else
> - lua_pushinteger(L, -1*(field == NULL));
> - if (field)
> + if (field != NULL)
> luamp_decode(L, luaL_msgpack_default, &field);
> else
> lua_pushnil(L);
> diag_clear(diag_get());
> - return 2;
> + return 1;
> }
> static int
>
> >> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> >> index b51b4df..3785568 100644
> >> --- a/src/box/lua/tuple.lua
> >> +++ b/src/box/lua/tuple.lua
> >> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
> >> local fun = require('fun')
> >> local buffer = require('buffer')
> >> local internal = require('box.internal')
> >> +local log = require('log')
> >5. Unsed module.
>
> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> index 3785568..1c7e2a5 100644
> --- a/src/box/lua/tuple.lua
> +++ b/src/box/lua/tuple.lua
> @@ -6,7 +6,6 @@ local msgpackffi = require('msgpackffi')
> local fun = require('fun')
> local buffer = require('buffer')
> local internal = require('box.internal')
> -local log = require('log')
> ffi.cdef[[
> /** \cond public */
>
> >>
> >> ffi.cdef[[
> >> /** \cond public */
> >> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
> >> end;
> >> __tostring = internal.tuple.tostring;
> >> __index = function(tuple, key)
> >> + local rc, field
> >> if type(key) == "string" or type(key) == "number" then
> >> -- Try to get a field by json path or by [index]. If
> >> -- it was not found (rc ~= 0) then return a method
> >> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
> >> -- returns field value, not tuple_bsize function. To
> >> -- access hidden methods use
> >> -- 'box.tuple.<method_name>(T, [args...])'.
> >> - local rc, field = tuple_field_by_path(tuple, key)
> >> + rc, field = tuple_field_by_path(tuple, key)
> >> if rc == 0 then
> >> return field
> >> end
> >> end
> >> - return methods[key]
> >> + local method = methods[key]
> >> + return method == box.NULL and rc ~= -1 and rc or method
> >6. You must not return -1. Error must be nil + error message/object.
>
> @@ -303,7 +302,7 @@ ffi.metatype(tuple_t, {
> end;
> __tostring = internal.tuple.tostring;
> __index = function(tuple, key)
> - local rc, field
> + local res, err
> if type(key) == "string" or type(key) == "number" then
> -- Try to get a field by json path or by [index]. If
> -- it was not found (rc ~= 0) then return a method
> @@ -313,13 +312,13 @@ ffi.metatype(tuple_t, {
> -- returns field value, not tuple_bsize function. To
> -- access hidden methods use
> -- 'box.tuple.<method_name>(T, [args...])'.
> - rc, field = tuple_field_by_path(tuple, key)
> - if rc == 0 then
> - return field
> + res, err = tuple_field_by_path(tuple, key)
> + if res ~= box.NULL then
> + return res
> end
> end
> local method = methods[key]
> - return method == box.NULL and rc ~= -1 and rc or method
> + return method == box.NULL and err or method
> end;
> __eq = function(tuple_a, tuple_b)
> -- Two tuple are considered equal if they have same memory
> address
>
> >> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> >> index 3e2c8bf..f2dffaf 100644
> >> --- a/src/box/tuple_format.c
> >> +++ b/src/box/tuple_format.c
> >> @@ -28,6 +28,8 @@
> >> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> >> * SUCH DAMAGE.
> >> */
> >> +#include <lib/json/path.h>
> >> +#include <third_party/luajit/src/lua.h>
> >7. Do not include lua headers in files, that are not in lua/ dir.
> >Tarantool core must not
> >depend on lua. As I told you earlier, you must use field_name_hash
> >function, declared in
> >tuple_dictionary.h. It allows to remove lua dependency and remove
> >${common_libraries}
> >from cmake file.
>
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index 6ebedf5..f084c9d 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -514,6 +514,26 @@ tuple_field(const struct tuple *tuple, uint32_t
> fieldno)
> }
> /**
> + * Get tuple field by its path.
> + * @param tuple.
> + * @param path Field path.
> + * @param path_len Length of @a path.
> + * @param path_hash Hash of @a path.
> + *
> + * @retval not NULL on field found.
> + * @retval NULL No field by @a path.
> + */
> +static inline const char *
> +tuple_field_by_path(struct tuple *tuple, const char *path,
> + uint32_t path_len, uint32_t path_hash)
> +{
> + return tuple_field_raw_by_path(tuple_format(tuple),
> + tuple_data(tuple),
> + tuple_field_map(tuple),
> + path, path_len, path_hash);
> +}
> +
> +/**
> * Get tuple field by its name.
> * @param tuple Tuple to get field from.
> * @param name Field name.
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 9c6e3da..8ee7948 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -29,8 +29,8 @@
> * SUCH DAMAGE.
> */
> #include <lib/json/path.h>
> -#include <third_party/luajit/src/lua.h>
> #include "tuple_format.h"
> +#include "tuple_dictionary.h"
> /** Global table of tuple formats */
> struct tuple_format **tuple_formats;
>
> >> +const char *
> >> +tuple_field_raw_by_path(struct tuple_format *format, const char
> *tuple,
> >> + const uint32_t *field_map,
> >> + const char *path,uint32_t path_len, uint32_t path_hash)
> >8. Path argument can be placed on the previous line.
> >
> >9. After ',' put a space please.
> >> +{
> >> +int rc = 0;
> >> +const char *field;
> >> +struct json_path_parser parser;
> >> +struct json_path_node node;
> >> +json_path_parser_create(&parser, path, path_len);
> >> +rc = json_path_next(&parser, &node);
> >> +if (rc != 0 || node.type == JSON_PATH_END)
> >> +goto not_found;
> >> +if (node.type == JSON_PATH_NUM) {
> >> +int index = node.num;
> >> +if (index == 0)
> >> +goto not_found;
> >> +index -= TUPLE_INDEX_BASE;
> >> +field = tuple_field_raw(format, tuple,
> >> + field_map, index);
> >> +if (field == NULL)
> >> +goto not_found;
> >> +} else {
> >> +assert(node.type == JSON_PATH_STR);
> >> +/* First part of a path is a field name. */
> >> +const char *name = node.str;
> >> +uint32_t name_len = node.len;
> >> +uint32_t name_hash;
> >> +if (path_len == name_len) {
> >> +name_hash = path_hash;
> >> +} else {
> >> +/*
> >> + * If a string is "field....", then its
> >> + * precalculated juajit hash can not be
> >> + * used. A tuple dictionary hashes only
> >> + * name, not path.
> >> + */
> >> +name_hash = lua_hash(name, name_len);
> >10. Good comment! One remark - use field_name_hash instead of explicit
> >lua_hash.
> >> +}
> >> +field = tuple_field_raw_by_name(format, tuple,
> >> + field_map,
> >> + name, name_len, name_hash);
> >> +if (field == NULL)
> >> +goto not_found;
> >> +}
> >> +while ((rc = json_path_next(&parser, &node)) == 0 &&
> >> + node.type != JSON_PATH_END) {
> >> +if (node.type == JSON_PATH_NUM) {
> >> +rc = tuple_field_go_to_index(&field, node.num);
> >> +} else {
> >> +assert(node.type == JSON_PATH_STR);
> >> +rc = tuple_field_go_to_key(&field, node.str, node.len);
> >> +}
> >> +if (rc != 0) {
> >> +rc = 0; /* prevent error raise */
> >> +goto not_found;
> >> +}
> >> +}
> >> +if (rc == 0)
> >> +return field;
> >> +not_found:
> >11. Please, put labels to the line beginning.
>
> @@ -563,8 +563,8 @@ tuple_field_go_to_key(const char **field, const
> char *key, int len)
> const char *
> tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> - const uint32_t *field_map,
> - const char *path,uint32_t path_len, uint32_t path_hash)
> + const uint32_t *field_map, const char *path,
> + uint32_t path_len, uint32_t path_hash)
> {
> int rc = 0;
> const char *field;
> @@ -598,7 +598,7 @@ tuple_field_raw_by_path(struct tuple_format
> *format, const char *tuple,
> * used. A tuple dictionary hashes only
> * name, not path.
> */
> - name_hash = lua_hash(name, name_len);
> + name_hash = field_name_hash(name, name_len);
> }
> field = tuple_field_raw_by_name(format, tuple,
> field_map,
> @@ -621,7 +621,7 @@ tuple_field_raw_by_path(struct tuple_format
> *format, const char *tuple,
> }
> if (rc == 0)
> return field;
> - not_found:
> +not_found:
> /* try to use the whole string as identifier */
> field = tuple_field_raw_by_name(format, tuple,
> field_map,
> @@ -629,7 +629,6 @@ tuple_field_raw_by_path(struct tuple_format
> *format, const char *tuple,
> if (field)
> return field;
> if (rc || path_len == 0)
> - diag_set(IllegalParams, "Error in path on "
> - "position %d", rc);
> + diag_set(IllegalParams, "Error in path on position %d", rc);
> return NULL;
> }
>
> >> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> >> index d33c77a..3e7e595 100644
> >> --- a/src/box/tuple_format.h
> >> +++ b/src/box/tuple_format.h
> >> @@ -343,6 +343,21 @@ tuple_field_raw_by_name(struct tuple_format
> *format, const char *tuple,
> >> return tuple_field_raw(format, tuple, field_map, fieldno);
> >> }
> >>
> >> +/**
> >> + * Get tuple field by its path.
> >> + * @param Tuple.
> >> + * @param path Field path.
> >> + * @param path_len Length of @a path.
> >> + * @param path_hash Hash of @a path.
> >> + *
> >> + * @retval not NULL on field found.
> >> + * @retval NULL No field by @a path.
> >> + */
> >> +const char *
> >> +tuple_field_raw_by_path(struct tuple_format *format, const char
> *tuple,
> >> + const uint32_t *field_map,
> >> + const char *path,uint32_t path_len, uint32_t path_hash);
> >12. Please, add tuple_field_by_path as well to tuple.h, that calls this
> >function. It allows you
> >to call tuple_field_by_path in lua/tuple.c.
>
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index ce17085..ffa3d16 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -379,7 +379,9 @@ tuple_field_raw_by_name(struct tuple_format
> *format, const char *tuple,
> /**
> * Get tuple field by its path.
> - * @param Tuple.
> + * @param format Tuple format.
> + * @param tuple MessagePack tuple's body.
> + * @param field_map Tuple field map.
> * @param path Field path.
> * @param path_len Length of @a path.
> * @param path_hash Hash of @a path.
>
> >> +
> >> #if defined(__cplusplus)
> >> } /* extern "C" */
> >> #endif /* defined(__cplusplus) */
> >> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
> >> index 4a6174e..f3fcc46 100644
> >> --- a/src/lib/json/path.c
> >> +++ b/src/lib/json/path.c
> >> @@ -31,8 +31,10 @@
> >>
> >> #include "path.h"
> >> #include <ctype.h>
> >> +#include <unicode/uchar.h>
> >> #include "trivia/util.h"
> >>
> >> +
> >13. Garbage diff.
> >> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser
> *parser, struct json_path_node *node)
> >> static inline int
> >> json_parse_integer(struct json_path_parser *parser, struct
> json_path_node *node)
> >> {
> >> -const char *end = parser->src + parser->src_len;
> >> -const char *pos = parser->pos;
> >> -assert(pos < end);
> >> -const char *str = pos;
> >> -int len = 0;
> >> -for (char c = *pos; pos < end && isdigit(c); c = *++pos)
> >> -++len;
> >> -if (len == 0)
> >> -return pos - parser->src + 1;
> >> -parser->pos = pos;
> >> +assert(parser->offset < parser->src_len);
> >> +int str_offset = parser->offset;
> >> +int last_offset = str_offset;
> >> +int len = 0, rc = 0;
> >> +UChar32 c = 0;
> >> +
> >> +while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
> >14. I remember, that you told me, that a digit can be read using simple
> >char. Can you please implement
> >this cycle with no UChar?
>
> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
> index f3fcc46..e253ec4 100644
> --- a/src/lib/json/path.c
> +++ b/src/lib/json/path.c
> @@ -104,7 +104,7 @@ json_parse_string(struct json_path_parser *parser,
> struct json_path_node *node)
> while (((rc = json_read_sign(parser, &c)) > 0)
> && c != quote_type);
> - int len = (int)(parser->offset - str_offset - 1);
> + int len = parser->offset - str_offset - 1;
> if (rc < 0 || len == 0)
> return -1;
> if (c != (UChar32)quote_type) {
> @@ -130,23 +130,21 @@ json_parse_string(struct json_path_parser
> *parser, struct json_path_node *node)
> static inline int
> json_parse_integer(struct json_path_parser *parser, struct
> json_path_node *node)
> {
> - assert(parser->offset < parser->src_len);
> - int str_offset = parser->offset;
> - int last_offset = str_offset;
> - int len = 0, rc = 0;
> - UChar32 c = 0;
> -
> - while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
> - last_offset = parser->offset;
> - len++;
> - }
> - if (rc > 0 && len > 0 && !u_isdigit(c))
> - json_reset_pos(parser, last_offset, 1);
> - if (rc < 0 || len == 0)
> + const char *end = parser->src + parser->src_len;
> + const char *pos = parser->src + parser->offset;
> + assert(pos < end);
> + const char *str = pos;
> + int len = 0;
> + for (char c = *pos; pos < end && isdigit(c); c = *++pos)
> + ++len;
> + if (len == 0) {
> + parser->invalid_sign_off++;
> return -1;
> -
> + }
> + parser->offset += len;
> + parser->invalid_sign_off += len;
> node->type = JSON_PATH_NUM;
> - node->num = strntoull(parser->src + str_offset, len);
> + node->num = strntoull(str, len);
> return 0;
> }
> >> >> +static inline char
> >> +json_curr_char(struct json_path_parser *parser)
> >> +{
> >> +return *json_path_curr_substring(parser);
> >> +}
> >15. I do not like that on each char you must do string + offset. Please,
> >find a way to
> >store this position. Why did you delete parser->pos?
>
> I have no good ideas. This code is also not hot: just on each new
> lexem, believe nothing critical.
>
> >> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
> >> index 6e8db4c..15292fb 100644
> >> --- a/src/lib/json/path.h
> >> +++ b/src/lib/json/path.h
> >> @@ -45,10 +45,11 @@ extern "C" {
> >> struct json_path_parser {
> >> /** Source string. */
> >> const char *src;
> >> -/** Length of src. */
> >> +/** Length of string. */
> >> int src_len;
> >> -/** Current parser's position. */
> >> -const char *pos;
> >> +/** Current parser's offset. */
> >> +int offset;
> >> +int invalid_sign_off;
> >16. Please, add a comment.
>
> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
> index 15292fb..09f2e6f 100644
> --- a/src/lib/json/path.h
> +++ b/src/lib/json/path.h
> @@ -49,6 +49,7 @@ struct json_path_parser {
> int src_len;
> /** Current parser's offset. */
> int offset;
> + /** Successfully parsed signs count. */
> int invalid_sign_off;
> };
> -- 2.7.4
>
> On 05.04.2018 17:09, Kirill Shcherbatov wrote:
>> ---
>> src/box/CMakeLists.txt | 2 +-
>> src/box/lua/tuple.c | 172
>> ++++++----------------------------------
>> src/box/lua/tuple.lua | 7 +-
>> src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++
>> src/box/tuple_format.h | 15 ++++
>> src/lib/json/path.c | 191
>> ++++++++++++++++++++++++++++++---------------
>> src/lib/json/path.h | 25 ++++--
>> test/engine/tuple.result | 57 ++++++++++----
>> test/engine/tuple.test.lua | 13 ++-
>> test/unit/CMakeLists.txt | 2 +-
>> test/unit/json_path.c | 2 +-
>> 11 files changed, 398 insertions(+), 243 deletions(-)
>>
>> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
>> index f053ae4..e5a0c21 100644
>> --- a/src/box/CMakeLists.txt
>> +++ b/src/box/CMakeLists.txt
>> @@ -45,7 +45,7 @@ add_library(tuple STATIC
>> field_def.c
>> opt_def.c
>> )
>> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES}
>> ${ICU_LIBRARIES} misc bit)
>> +target_link_libraries(tuple json_path box_error core
>> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
>> add_library(xlog STATIC xlog.c)
>> target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
>> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
>> index 99b9ff2..3b89cdd 100644
>> --- a/src/box/lua/tuple.c
>> +++ b/src/box/lua/tuple.c
>> @@ -403,87 +403,6 @@ lbox_tuple_transform(struct lua_State *L)
>> }
>> /**
>> - * Propagate @a field to MessagePack(field)[index].
>> - * @param[in][out] field Field to propagate.
>> - * @param index 1-based index to propagate to.
>> - *
>> - * @retval 0 Success, the index was found.
>> - * @retval -1 Not found.
>> - */
>> -static inline int
>> -tuple_field_go_to_index(const char **field, uint64_t index)
>> -{
>> - assert(index >= 0);
>> - enum mp_type type = mp_typeof(**field);
>> - if (type == MP_ARRAY) {
>> - if (index == 0)
>> - return -1;
>> - /* Make index 0-based. */
>> - index -= TUPLE_INDEX_BASE;
>> - uint32_t count = mp_decode_array(field);
>> - if (index >= count)
>> - return -1;
>> - for (; index > 0; --index)
>> - mp_next(field);
>> - return 0;
>> - } else if (type == MP_MAP) {
>> - uint64_t count = mp_decode_map(field);
>> - for (; count > 0; --count) {
>> - type = mp_typeof(**field);
>> - if (type == MP_UINT) {
>> - uint64_t value = mp_decode_uint(field);
>> - if (value == index)
>> - return 0;
>> - } else if (type == MP_INT) {
>> - int64_t value = mp_decode_int(field);
>> - if (value >= 0 && (uint64_t)value == index)
>> - return 0;
>> - } else {
>> - /* Skip key. */
>> - mp_next(field);
>> - }
>> - /* Skip value. */
>> - mp_next(field);
>> - }
>> - }
>> - return -1;
>> -}
>> -
>> -/**
>> - * Propagate @a field to MessagePack(field)[key].
>> - * @param[in][out] field Field to propagate.
>> - * @param key Key to propagate to.
>> - * @param len Length of @a key.
>> - *
>> - * @retval 0 Success, the index was found.
>> - * @retval -1 Not found.
>> - */
>> -static inline int
>> -tuple_field_go_to_key(const char **field, const char *key, int len)
>> -{
>> - enum mp_type type = mp_typeof(**field);
>> - if (type != MP_MAP)
>> - return -1;
>> - uint64_t count = mp_decode_map(field);
>> - for (; count > 0; --count) {
>> - type = mp_typeof(**field);
>> - if (type == MP_STR) {
>> - uint32_t value_len;
>> - const char *value = mp_decode_str(field, &value_len);
>> - if (value_len == (uint)len &&
>> - memcmp(value, key, len) == 0)
>> - return 0;
>> - } else {
>> - /* Skip key. */
>> - mp_next(field);
>> - }
>> - /* Skip value. */
>> - mp_next(field);
>> - }
>> - return -1;
>> -}
>> -
>> -/**
>> * Find a tuple field by JSON path.
>> * @param L Lua state.
>> * @param tuple 1-th argument on a lua stack, tuple to get field
>> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const
>> char *key, int len)
>> static int
>> lbox_tuple_field_by_path(struct lua_State *L)
>> {
>> - const char *field;
>> struct tuple *tuple = luaT_istuple(L, 1);
>> /* Is checked in Lua wrapper. */
>> assert(tuple != NULL);
>> - if (lua_isnumber(L, 2)) {
>> + const char *field = NULL;
>> + if (!lua_isnumber(L, 2)) {
>> + /* string */
>> + field = tuple_field_raw_by_path(tuple_format(tuple),
>> tuple_data(tuple),
>> + tuple_field_map(tuple),
>> + lua_tostring(L, 2),
>> (int)lua_strlen(L, 2),
>> + lua_hashstring(L, 2));
>> + } else {
>> int index = lua_tointeger(L, 2);
>> index -= TUPLE_INDEX_BASE;
>> - if (index < 0) {
>> -not_found:
>> - lua_pushinteger(L, -1);
>> - lua_pushnil(L);
>> - return 2;
>> - }
>> - field = tuple_field(tuple, index);
>> - if (field == NULL)
>> - goto not_found;
>> -push_value:
>> - lua_pushinteger(L, 0);
>> - luamp_decode(L, luaL_msgpack_default, &field);
>> - return 2;
>> + if (index >= 0)
>> + field = tuple_field(tuple, index);
>> }
>> - assert(lua_isstring(L, 2));
>> - size_t path_len;
>> - const char *path = lua_tolstring(L, 2, &path_len);
>> - struct json_path_parser parser;
>> - struct json_path_node node;
>> - json_path_parser_create(&parser, path, path_len);
>> - int rc = json_path_next(&parser, &node);
>> - if (rc != 0 || node.type == JSON_PATH_END)
>> - luaL_error(L, "Error in path on position %d", rc);
>> - if (node.type == JSON_PATH_NUM) {
>> - int index = node.num;
>> - if (index == 0)
>> - goto not_found;
>> - index -= TUPLE_INDEX_BASE;
>> - field = tuple_field(tuple, index);
>> - if (field == NULL)
>> - goto not_found;
>> - } else {
>> - assert(node.type == JSON_PATH_STR);
>> - /* First part of a path is a field name. */
>> - const char *name = node.str;
>> - uint32_t name_len = node.len;
>> - uint32_t name_hash;
>> - if (path_len == name_len) {
>> - name_hash = lua_hashstring(L, 2);
>> - } else {
>> - /*
>> - * If a string is "field....", then its
>> - * precalculated juajit hash can not be
>> - * used. A tuple dictionary hashes only
>> - * name, not path.
>> - */
>> - name_hash = lua_hash(name, name_len);
>> - }
>> - field = tuple_field_by_name(tuple, name, name_len, name_hash);
>> - if (field == NULL)
>> - goto not_found;
>> - }
>> - while ((rc = json_path_next(&parser, &node)) == 0 &&
>> - node.type != JSON_PATH_END) {
>> - if (node.type == JSON_PATH_NUM) {
>> - rc = tuple_field_go_to_index(&field, node.num);
>> - } else {
>> - assert(node.type == JSON_PATH_STR);
>> - rc = tuple_field_go_to_key(&field, node.str, node.len);
>> - }
>> - if (rc != 0)
>> - goto not_found;
>> - }
>> - if (rc == 0)
>> - goto push_value;
>> - luaL_error(L, "Error in path on position %d", rc);
>> - unreachable();
>> - goto not_found;
>> + /* error code or message */
>> + struct error *e = diag_last_error(diag_get());
>> + if (field == NULL && e != NULL)
>> + lua_pushstring(L, e->errmsg);
>> + else
>> + lua_pushinteger(L, -1*(field == NULL));
>> + if (field)
>> + luamp_decode(L, luaL_msgpack_default, &field);
>> + else
>> + lua_pushnil(L);
>> + diag_clear(diag_get());
>> + return 2;
>> }
>> static int
>> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
>> index b51b4df..3785568 100644
>> --- a/src/box/lua/tuple.lua
>> +++ b/src/box/lua/tuple.lua
>> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
>> local fun = require('fun')
>> local buffer = require('buffer')
>> local internal = require('box.internal')
>> +local log = require('log')
>> ffi.cdef[[
>> /** \cond public */
>> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
>> end;
>> __tostring = internal.tuple.tostring;
>> __index = function(tuple, key)
>> + local rc, field
>> if type(key) == "string" or type(key) == "number" then
>> -- Try to get a field by json path or by [index]. If
>> -- it was not found (rc ~= 0) then return a method
>> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
>> -- returns field value, not tuple_bsize function. To
>> -- access hidden methods use
>> -- 'box.tuple.<method_name>(T, [args...])'.
>> - local rc, field = tuple_field_by_path(tuple, key)
>> + rc, field = tuple_field_by_path(tuple, key)
>> if rc == 0 then
>> return field
>> end
>> end
>> - return methods[key]
>> + local method = methods[key]
>> + return method == box.NULL and rc ~= -1 and rc or method
>> end;
>> __eq = function(tuple_a, tuple_b)
>> -- Two tuple are considered equal if they have same memory
>> address
>> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
>> index e458f49..9c6e3da 100644
>> --- a/src/box/tuple_format.c
>> +++ b/src/box/tuple_format.c
>> @@ -28,6 +28,8 @@
>> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> * SUCH DAMAGE.
>> */
>> +#include <lib/json/path.h>
>> +#include <third_party/luajit/src/lua.h>
>> #include "tuple_format.h"
>> /** Global table of tuple formats */
>> @@ -478,3 +480,156 @@ box_tuple_format_unref(box_tuple_format_t *format)
>> {
>> tuple_format_unref(format);
>> }
>> +
>> +/**
>> + * Propagate @a field to MessagePack(field)[index].
>> + * @param[in][out] field Field to propagate.
>> + * @param index 1-based index to propagate to.
>> + *
>> + * @retval 0 Success, the index was found.
>> + * @retval -1 Not found.
>> + */
>> +static inline int
>> +tuple_field_go_to_index(const char **field, uint64_t index)
>> +{
>> + enum mp_type type = mp_typeof(**field);
>> + if (type == MP_ARRAY) {
>> + if (index == 0)
>> + return -1;
>> + /* Make index 0-based. */
>> + index -= TUPLE_INDEX_BASE;
>> + uint32_t count = mp_decode_array(field);
>> + if (index >= count)
>> + return -1;
>> + for (; index > 0; --index)
>> + mp_next(field);
>> + return 0;
>> + } else if (type == MP_MAP) {
>> + uint64_t count = mp_decode_map(field);
>> + for (; count > 0; --count) {
>> + type = mp_typeof(**field);
>> + if (type == MP_UINT) {
>> + uint64_t value = mp_decode_uint(field);
>> + if (value == index)
>> + return 0;
>> + } else if (type == MP_INT) {
>> + int64_t value = mp_decode_int(field);
>> + if (value >= 0 && (uint64_t)value == index)
>> + return 0;
>> + } else {
>> + /* Skip key. */
>> + mp_next(field);
>> + }
>> + /* Skip value. */
>> + mp_next(field);
>> + }
>> + }
>> + return -1;
>> +}
>> +
>> +/**
>> + * Propagate @a field to MessagePack(field)[key].
>> + * @param[in][out] field Field to propagate.
>> + * @param key Key to propagate to.
>> + * @param len Length of @a key.
>> + *
>> + * @retval 0 Success, the index was found.
>> + * @retval -1 Not found.
>> + */
>> +static inline int
>> +tuple_field_go_to_key(const char **field, const char *key, int len)
>> +{
>> + enum mp_type type = mp_typeof(**field);
>> + if (type != MP_MAP)
>> + return -1;
>> + uint64_t count = mp_decode_map(field);
>> + for (; count > 0; --count) {
>> + type = mp_typeof(**field);
>> + if (type == MP_STR) {
>> + uint32_t value_len;
>> + const char *value = mp_decode_str(field, &value_len);
>> + if (value_len == (uint)len &&
>> + memcmp(value, key, len) == 0)
>> + return 0;
>> + } else {
>> + /* Skip key. */
>> + mp_next(field);
>> + }
>> + /* Skip value. */
>> + mp_next(field);
>> + }
>> + return -1;
>> +}
>> +
>> +const char *
>> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>> + const uint32_t *field_map,
>> + const char *path,uint32_t path_len, uint32_t
>> path_hash)
>> +{
>> + int rc = 0;
>> + const char *field;
>> + struct json_path_parser parser;
>> + struct json_path_node node;
>> + json_path_parser_create(&parser, path, path_len);
>> + rc = json_path_next(&parser, &node);
>> + if (rc != 0 || node.type == JSON_PATH_END)
>> + goto not_found;
>> + if (node.type == JSON_PATH_NUM) {
>> + int index = node.num;
>> + if (index == 0)
>> + goto not_found;
>> + index -= TUPLE_INDEX_BASE;
>> + field = tuple_field_raw(format, tuple,
>> + field_map, index);
>> + if (field == NULL)
>> + goto not_found;
>> + } else {
>> + assert(node.type == JSON_PATH_STR);
>> + /* First part of a path is a field name. */
>> + const char *name = node.str;
>> + uint32_t name_len = node.len;
>> + uint32_t name_hash;
>> + if (path_len == name_len) {
>> + name_hash = path_hash;
>> + } else {
>> + /*
>> + * If a string is "field....", then its
>> + * precalculated juajit hash can not be
>> + * used. A tuple dictionary hashes only
>> + * name, not path.
>> + */
>> + name_hash = lua_hash(name, name_len);
>> + }
>> + field = tuple_field_raw_by_name(format, tuple,
>> + field_map,
>> + name, name_len, name_hash);
>> + if (field == NULL)
>> + goto not_found;
>> + }
>> + while ((rc = json_path_next(&parser, &node)) == 0 &&
>> + node.type != JSON_PATH_END) {
>> + if (node.type == JSON_PATH_NUM) {
>> + rc = tuple_field_go_to_index(&field, node.num);
>> + } else {
>> + assert(node.type == JSON_PATH_STR);
>> + rc = tuple_field_go_to_key(&field, node.str, node.len);
>> + }
>> + if (rc != 0) {
>> + rc = 0; /* prevent error raise */
>> + goto not_found;
>> + }
>> + }
>> + if (rc == 0)
>> + return field;
>> + not_found:
>> + /* try to use the whole string as identifier */
>> + field = tuple_field_raw_by_name(format, tuple,
>> + field_map,
>> + path, path_len, path_hash);
>> + if (field)
>> + return field;
>> + if (rc || path_len == 0)
>> + diag_set(IllegalParams, "Error in path on "
>> + "position %d", rc);
>> + return NULL;
>> +}
>> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
>> index d35182d..ce17085 100644
>> --- a/src/box/tuple_format.h
>> +++ b/src/box/tuple_format.h
>> @@ -377,6 +377,21 @@ tuple_field_raw_by_name(struct tuple_format
>> *format, const char *tuple,
>> return tuple_field_raw(format, tuple, field_map, fieldno);
>> }
>> +/**
>> + * Get tuple field by its path.
>> + * @param Tuple.
>> + * @param path Field path.
>> + * @param path_len Length of @a path.
>> + * @param path_hash Hash of @a path.
>> + *
>> + * @retval not NULL on field found.
>> + * @retval NULL No field by @a path.
>> + */
>> +const char *
>> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>> + const uint32_t *field_map,
>> + const char *path,uint32_t path_len, uint32_t
>> path_hash);
>> +
>> #if defined(__cplusplus)
>> } /* extern "C" */
>> #endif /* defined(__cplusplus) */
>> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
>> index 4a6174e..f3fcc46 100644
>> --- a/src/lib/json/path.c
>> +++ b/src/lib/json/path.c
>> @@ -31,8 +31,10 @@
>> #include "path.h"
>> #include <ctype.h>
>> +#include <unicode/uchar.h>
>> #include "trivia/util.h"
>> +
>> /** Same as strtoull(), but with limited length. */
>> static inline uint64_t
>> strntoull(const char *src, int len) {
>> @@ -45,6 +47,42 @@ strntoull(const char *src, int len) {
>> }
>> /**
>> + * Parse string and update parser's state.
>> + * @param parser JSON path parser. Upates pos, signs_read.
>> + * @param[out] UChar32 to store result.
>> + *
>> + * @retval 1 Success.
>> + * @retval 0 End of string.
>> + * @retval -1 Parse error.
>> + */
>> +static inline int
>> +json_read_sign(struct json_path_parser *parser, UChar32 *out)
>> +{
>> + if (unlikely(parser->offset == parser->src_len))
>> + return 0;
>> + UChar32 c;
>> + U8_NEXT_OR_FFFD(parser->src, parser->offset, parser->src_len, c)
>> + if (c == 0xFFFD)
>> + return -1;
>> + *out = c;
>> + parser->invalid_sign_off += 1;
>> + return 1;
>> +}
>> +
>> +/**
>> + * Reset parser state to previous one.
>> + * @param parser JSON path parser.
>> + * @param old parser read offset.
>> + * @param signs to drop in signs_read counter.
>> + */
>> +static inline void
>> +json_reset_pos(struct json_path_parser *parser, int old_offset, int
>> signs)
>> +{
>> + parser->offset = old_offset;
>> + parser->invalid_sign_off -= signs;
>> +}
>> +
>> +/**
>> * Parse string identifier in quotes. Parser either stops right
>> * after the closing quote, or returns an error position.
>> * @param parser JSON path parser.
>> @@ -56,24 +94,26 @@ strntoull(const char *src, int len) {
>> static inline int
>> json_parse_string(struct json_path_parser *parser, struct
>> json_path_node *node)
>> {
>> - const char *end = parser->src + parser->src_len;
>> - const char *pos = parser->pos;
>> - assert(pos < end);
>> - char quote_type = *pos;
>> - assert(quote_type == '\'' || quote_type == '"');
>> - /* Skip first quote. */
>> - int len = 0;
>> - ++pos;
>> - const char *str = pos;
>> - for (char c = *pos; pos < end && quote_type != c; c = *++pos)
>> - ++len;
>> - /* A string must be terminated with quote. */
>> - if (*pos != quote_type || len == 0)
>> - return pos - parser->src + 1;
>> - /* Skip the closing quote. */
>> - parser->pos = pos + 1;
>> + assert(parser->offset < parser->src_len);
>> + UChar32 quote_type;
>> + (void) json_read_sign(parser, "e_type);
>> + assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"');
>> + int str_offset = parser->offset;
>> + UChar32 c = 0;
>> + int rc = 0;
>> +
>> + while (((rc = json_read_sign(parser, &c)) > 0)
>> + && c != quote_type);
>> + int len = (int)(parser->offset - str_offset - 1);
>> + if (rc < 0 || len == 0)
>> + return -1;
>> + if (c != (UChar32)quote_type) {
>> + parser->invalid_sign_off++;
>> + return -1;
>> + }
>> +
>> node->type = JSON_PATH_STR;
>> - node->str = str;
>> + node->str = parser->src + str_offset;
>> node->len = len;
>> return 0;
>> }
>> @@ -81,7 +121,7 @@ json_parse_string(struct json_path_parser *parser,
>> struct json_path_node *node)
>> /**
>> * Parse digit sequence into integer until non-digit is met.
>> * Parser stops right after the last digit.
>> - * @param parser JSON parser.
>> + * @param parser JSON parser. Updates signs_read field.
>> * @param[out] node JSON node to store result.
>> *
>> * @retval 0 Success.
>> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser
>> *parser, struct json_path_node *node)
>> static inline int
>> json_parse_integer(struct json_path_parser *parser, struct
>> json_path_node *node)
>> {
>> - const char *end = parser->src + parser->src_len;
>> - const char *pos = parser->pos;
>> - assert(pos < end);
>> - const char *str = pos;
>> - int len = 0;
>> - for (char c = *pos; pos < end && isdigit(c); c = *++pos)
>> - ++len;
>> - if (len == 0)
>> - return pos - parser->src + 1;
>> - parser->pos = pos;
>> + assert(parser->offset < parser->src_len);
>> + int str_offset = parser->offset;
>> + int last_offset = str_offset;
>> + int len = 0, rc = 0;
>> + UChar32 c = 0;
>> +
>> + while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
>> + last_offset = parser->offset;
>> + len++;
>> + }
>> + if (rc > 0 && len > 0 && !u_isdigit(c))
>> + json_reset_pos(parser, last_offset, 1);
>> + if (rc < 0 || len == 0)
>> + return -1;
>> +
>> node->type = JSON_PATH_NUM;
>> - node->num = strntoull(str, len);
>> + node->num = strntoull(parser->src + str_offset, len);
>> return 0;
>> }
>> +static inline bool
>> +identifier_valid_sign(UChar32 c)
>> +{
>> + return u_isUAlphabetic(c)
>> + || c == (UChar32)'_'
>> + || u_isdigit(c);
>> +}
>> +
>> /**
>> * Parse identifier out of quotes. It can contain only alphas,
>> * digits and underscores. And can not contain digit at the first
>> * position. Parser is stoped right after the last non-digit,
>> * non-alpha and non-underscore symbol.
>> - * @param parser JSON parser.
>> + * @param parser JSON parser. Updates signs_read field.
>> * @param[out] node JSON node to store result.
>> *
>> * @retval 0 Success.
>> @@ -120,68 +173,78 @@ static inline int
>> json_parse_identifier(struct json_path_parser *parser,
>> struct json_path_node *node)
>> {
>> - const char *end = parser->src + parser->src_len;
>> - const char *pos = parser->pos;
>> - assert(pos < end);
>> - const char *str = pos;
>> - char c = *pos;
>> + assert(parser->offset < parser->src_len);
>> + int str_offset = parser->offset;
>> + UChar32 c;
>> + int rc = 0;
>> + if (json_read_sign(parser, &c) < 0)
>> + return -1;
>> /* First symbol can not be digit. */
>> - if (!isalpha(c) && c != '_')
>> - return pos - parser->src + 1;
>> - int len = 1;
>> - for (c = *++pos; pos < end && (isalpha(c) || c == '_' ||
>> isdigit(c));
>> - c = *++pos)
>> - ++len;
>> - assert(len > 0);
>> - parser->pos = pos;
>> + if (!u_isalpha(c) && c != (UChar32)'_')
>> + return -1;
>> +
>> + int last_offset = parser->offset;
>> + while ((rc = json_read_sign(parser, &c)) > 0 &&
>> identifier_valid_sign(c))
>> + last_offset = parser->offset;
>> + if (rc > 0 && !identifier_valid_sign(c))
>> + json_reset_pos(parser, last_offset, 1);
>> + if (rc < 0)
>> + return -1;
>> +
>> node->type = JSON_PATH_STR;
>> - node->str = str;
>> - node->len = len;
>> + node->str = parser->src + str_offset;
>> + node->len = parser->offset - str_offset;
>> return 0;
>> }
>> +static inline char
>> +json_curr_char(struct json_path_parser *parser)
>> +{
>> + return *json_path_curr_substring(parser);
>> +}
>> +
>> int
>> json_path_next(struct json_path_parser *parser, struct
>> json_path_node *node)
>> {
>> - const char *end = parser->src + parser->src_len;
>> - if (end == parser->pos) {
>> + int end_offset = parser->src_len;
>> + if (end_offset == parser->offset) {
>> node->type = JSON_PATH_END;
>> return 0;
>> }
>> - char c = *parser->pos;
>> + UChar32 c = 0;
>> + int last_offset = parser->offset;
>> + if (json_read_sign(parser, &c) < 0)
>> + return parser->invalid_sign_off;
>> int rc;
>> switch(c) {
>> - case '[':
>> - ++parser->pos;
>> + case (UChar32)'[':
>> /* Error for []. */
>> - if (parser->pos == end)
>> - return parser->pos - parser->src + 1;
>> - c = *parser->pos;
>> + if (parser->offset == end_offset)
>> + return parser->invalid_sign_off;
>> + c = json_curr_char(parser);
>> if (c == '"' || c == '\'')
>> rc = json_parse_string(parser, node);
>> else
>> rc = json_parse_integer(parser, node);
>> if (rc != 0)
>> - return rc;
>> + return parser->invalid_sign_off;
>> /*
>> * Expression, started from [ must be finished
>> * with ] regardless of its type.
>> */
>> - if (parser->pos == end || *parser->pos != ']')
>> - return parser->pos - parser->src + 1;
>> + if (parser->offset == end_offset || json_curr_char(parser)
>> != ']')
>> + return parser->invalid_sign_off + 1;
>> /* Skip ]. */
>> - ++parser->pos;
>> + (void) json_read_sign(parser, &c);
>> break;
>> - case '.':
>> - /* Skip dot. */
>> - ++parser->pos;
>> - if (parser->pos == end)
>> - return parser->pos - parser->src + 1;
>> - FALLTHROUGH
>> default:
>> + if (c != (UChar32)'.')
>> + json_reset_pos(parser, last_offset, 1);
>> + else if (parser->offset == end_offset)
>> + return parser->invalid_sign_off + 1;
>> rc = json_parse_identifier(parser, node);
>> if (rc != 0)
>> - return rc;
>> + return parser->invalid_sign_off;
>> break;
>> }
>> return 0;
>> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
>> index 6e8db4c..15292fb 100644
>> --- a/src/lib/json/path.h
>> +++ b/src/lib/json/path.h
>> @@ -45,10 +45,11 @@ extern "C" {
>> struct json_path_parser {
>> /** Source string. */
>> const char *src;
>> - /** Length of src. */
>> + /** Length of string. */
>> int src_len;
>> - /** Current parser's position. */
>> - const char *pos;
>> + /** Current parser's offset. */
>> + int offset;
>> + int invalid_sign_off;
>> };
>> enum json_path_type {
>> @@ -78,18 +79,30 @@ struct json_path_node {
>> };
>> /**
>> - * Create @a parser.
>> + * Init @a parser.
>> * @param[out] parser Parser to create.
>> * @param src Source string.
>> * @param src_len Length of @a src.
>> */
>> static inline void
>> json_path_parser_create(struct json_path_parser *parser, const char
>> *src,
>> - int src_len)
>> + int src_len)
>> {
>> parser->src = src;
>> parser->src_len = src_len;
>> - parser->pos = src;
>> + parser->offset = 0;
>> + parser->invalid_sign_off = 0;
>> +}
>> +
>> +/**
>> + * Get current substring of parser.
>> + * @param parser Parser.
>> + * @retval ptr to substring
>> + */
>> +static inline const char *
>> +json_path_curr_substring(struct json_path_parser *parser)
>> +{
>> + return parser->src + parser->offset;
>> }
>> /**
>> diff --git a/test/engine/tuple.result b/test/engine/tuple.result
>> index 2d7367a..3a8e828 100644
>> --- a/test/engine/tuple.result
>> +++ b/test/engine/tuple.result
>> @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'}
>> format[3] = {name = 'field3', type = 'map'}
>> ---
>> ...
>> -format[4] = {name = 'field4', type = 'string'}
>> +format[4] = {name = 'field4', type = 'string' }
>> +---
>> +...
>> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type =
>> 'string'}
>> ---
>> ...
>> s = box.schema.space.create('test', {format = format})
>> @@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format =
>> format})
>> pk = s:create_index('pk')
>> ---
>> ...
>> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
>> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"},
>> key="value1", value="key1"}}
>> ---
>> ...
>> field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2},
>> {c=3, d=4} }, [-1] = 200}
>> ---
>> ...
>> -t = s:replace{1, field2, field3, "123456"}
>> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>> ---
>> ...
>> t[1]
>> @@ -626,7 +629,7 @@ t[1]
>> ...
>> t[2]
>> ---
>> -- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
>> +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'},
>> 'key': 'value1', 'value': 'key1'}]
>> ...
>> t[3]
>> ---
>> @@ -667,19 +670,43 @@ t["[2][5][3]"]
>> ...
>> t["[2][6].key"]
>> ---
>> -- key1
>> +- value1
>> ...
>> t["[2][6].value"]
>> ---
>> -- value1
>> +- key1
>> ...
>> t["[2][6]['key']"]
>> ---
>> -- key1
>> +- value1
>> ...
>> t["[2][6]['value']"]
>> ---
>> -- value1
>> +- key1
>> +...
>> +t[2][6].привет中国world.中国
>> +---
>> +- привет
>> +...
>> +t["[2][6].привет中国world"].中国
>> +---
>> +- привет
>> +...
>> +t["[2][6].привет中国world.中国"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']"]["中国"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']['中国']"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']['中国a']"]
>> +---
>> +- yes, this
>> ...
>> t["[3].k3[2].c"]
>> ---
>> @@ -759,31 +786,31 @@ t["a.b.c d.e.f"]
>> -- Sytax errors.
>> t[""]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
>> +- Error in path on position 0
>> ...
>> t["[2].[5]"]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
>> +- Error in path on position 5
>> ...
>> t["[-1]"]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>> ...
>> t[".."]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>> ...
>> t["[["]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>> ...
>> t["]]"]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
>> +- Error in path on position 1
>> ...
>> t["{"]
>> ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
>> +- Error in path on position 1
>> ...
>> s:drop()
>> ---
>> diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
>> index ba3482d..90da8b2 100644
>> --- a/test/engine/tuple.test.lua
>> +++ b/test/engine/tuple.test.lua
>> @@ -204,12 +204,13 @@ format = {}
>> format[1] = {name = 'field1', type = 'unsigned'}
>> format[2] = {name = 'field2', type = 'array'}
>> format[3] = {name = 'field3', type = 'map'}
>> -format[4] = {name = 'field4', type = 'string'}
>> +format[4] = {name = 'field4', type = 'string' }
>> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type =
>> 'string'}
>> s = box.schema.space.create('test', {format = format})
>> pk = s:create_index('pk')
>> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
>> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"},
>> key="value1", value="key1"}}
>> field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2},
>> {c=3, d=4} }, [-1] = 200}
>> -t = s:replace{1, field2, field3, "123456"}
>> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>> t[1]
>> t[2]
>> t[3]
>> @@ -225,6 +226,12 @@ t["[2][6].key"]
>> t["[2][6].value"]
>> t["[2][6]['key']"]
>> t["[2][6]['value']"]
>> +t[2][6].привет中国world.中国
>> +t["[2][6].привет中国world"].中国
>> +t["[2][6].привет中国world.中国"]
>> +t["[2][6]['привет中国world']"]["中国"]
>> +t["[2][6]['привет中国world']['中国']"]
>> +t["[2][6]['привет中国world']['中国a']"]
>> t["[3].k3[2].c"]
>> t["[4]"]
>> t.field1
>> diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
>> index fe8b2d2..667194c 100644
>> --- a/test/unit/CMakeLists.txt
>> +++ b/test/unit/CMakeLists.txt
>> @@ -130,7 +130,7 @@ add_executable(csv.test csv.c)
>> target_link_libraries(csv.test csv)
>> add_executable(json_path.test json_path.c)
>> -target_link_libraries(json_path.test json_path unit)
>> +target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES})
>> add_executable(rmean.test rmean.cc)
>> target_link_libraries(rmean.test stat unit)
>> diff --git a/test/unit/json_path.c b/test/unit/json_path.c
>> index 599658b..81ef7fc 100644
>> --- a/test/unit/json_path.c
>> +++ b/test/unit/json_path.c
>> @@ -9,7 +9,7 @@
>> json_path_parser_create(&parser, path, len);
>> #define is_next_index(value_len, value) \
>> - path = parser.pos; \
>> + path = json_path_curr_substring(&parser); \
>> is(json_path_next(&parser, &node), 0, "parse <%." #value_len
>> "s>", \
>> path); \
>> is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num",
>> path); \
>>
>
>
>
next prev parent reply other threads:[~2018-04-05 23:32 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov
2018-03-29 18:04 ` [tarantool-patches] " Kirill Shcherbatov
2018-03-30 10:24 ` v.shpilevoy
2018-03-30 10:25 ` v.shpilevoy
2018-04-02 19:19 ` Kirill Shcherbatov
2018-04-03 10:20 ` Vladislav Shpilevoy
2018-04-05 14:09 ` [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov
2018-04-05 18:00 ` [tarantool-patches] " Kirill Shcherbatov
2018-04-05 23:32 ` Vladislav Shpilevoy [this message]
2018-04-04 10:37 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Kirill Shcherbatov
2018-04-04 11:30 ` [tarantool-patches] " Vladislav Shpilevoy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=da8d7619-210d-a285-ebe1-6afc47df6c2e@tarantool.org \
--to=v.shpilevoy@tarantool.org \
--cc=kshcherbatov@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='[tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox