Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org,
	Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.
Date: Fri, 6 Apr 2018 02:32:35 +0300	[thread overview]
Message-ID: <da8d7619-210d-a285-ebe1-6afc47df6c2e@tarantool.org> (raw)
In-Reply-To: <75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org>

See my review fixes on branch in a separate commit. Please, bench the 
branch with my review fixes, and without.

05.04.2018 21:00, Kirill Shcherbatov пишет:
>
> From 8d97afd7ac7a90e5dfb1e756fd9d389bafdadd56 Mon Sep 17 00:00:00 2001
> From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
> Date: Thu, 5 Apr 2018 20:35:25 +0300
> Subject: [PATCH] Accounted Vlad's comments
>
> ---
>  src/box/CMakeLists.txt | 2 +-
>  src/box/lua/tuple.c | 23 +++++++++++------------
>  src/box/lua/tuple.lua | 11 +++++------
>  src/box/tuple.h | 20 ++++++++++++++++++++
>  src/box/tuple_format.c | 13 ++++++-------
>  src/box/tuple_format.h | 4 +++-
>  src/lib/json/path.c | 30 ++++++++++++++----------------
>  src/lib/json/path.h | 1 +
>  8 files changed, 61 insertions(+), 43 deletions(-)
>
> >Четверг, 5 апреля 2018, 17:48 +03:00 от Vladislav Shpilevoy 
> <v.shpilevoy@tarantool.org>:
> >
> >Hello. See 18 comments below.
> >
> >05.04.2018 17:06, Kirill Shcherbatov пишет:
> >> This is an automated email from the git hooks/post-receive script.
> >>
> >> kshcherbatov pushed a commit to branch gh-1285-tuple-field-by-json-icu
> >> in repository tarantool.
> >>
> >> commit ab52c7a18441037c19d0b2759558a7eb93aabb0d
> >> Author: Kirill Shcherbatov < kshcherbatov@tarantool.org >
> >> AuthorDate: Thu Apr 5 17:04:15 2018 +0300
> >>
> >> ICU Unicode support for JSON parser.
> >> ---
> >> src/box/CMakeLists.txt | 2 +-
> >> src/box/lua/tuple.c | 172 ++++++----------------------------------
> >> src/box/lua/tuple.lua | 7 +-
> >> src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++
> >> src/box/tuple_format.h | 15 ++++
> >> src/lib/json/path.c | 191 
> ++++++++++++++++++++++++++++++---------------
> >> src/lib/json/path.h | 25 ++++--
> >> test/engine/tuple.result | 57 ++++++++++----
> >> test/engine/tuple.test.lua | 13 ++-
> >> test/unit/CMakeLists.txt | 2 +-
> >> test/unit/json_path.c | 2 +-
> >> 11 files changed, 398 insertions(+), 243 deletions(-)
> >>
> >> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> >> index add0ff9..0f5b197 100644
> >> --- a/src/box/CMakeLists.txt
> >> +++ b/src/box/CMakeLists.txt
> >> @@ -44,7 +44,7 @@ add_library(tuple STATIC
> >> field_def.c
> >> opt_def.c
> >> )
> >> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} 
> ${ICU_LIBRARIES} misc bit)
> >> +target_link_libraries(tuple json_path box_error core 
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
> >1. Remove ${common_libraries} please, the tuple lib must not depend on
> >Lua. See how to do it in the next comments.
>
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index e5a0c21..88c2c60 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -45,7 +45,7 @@ add_library(tuple STATIC
>      field_def.c
>      opt_def.c
>  )
> -target_link_libraries(tuple json_path box_error core 
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
> +target_link_libraries(tuple json_path box_error core 
> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
>   add_library(xlog STATIC xlog.c)
>  target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
>
> >> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, 
> const char *key, int len)
> >> static int
> >> lbox_tuple_field_by_path(struct lua_State *L)
> >> {
> >> -const char *field;
> >> struct tuple *tuple = luaT_istuple(L, 1);
> >> /* Is checked in Lua wrapper. */
> >> assert(tuple != NULL);
> >> -if (lua_isnumber(L, 2)) {
> >> +const char *field = NULL;
> >> +if (!lua_isnumber(L, 2)) {
> >> +/* string */
> >> +field = tuple_field_raw_by_path(tuple_format(tuple), 
> tuple_data(tuple),
> >> + tuple_field_map(tuple),
> >> + lua_tostring(L, 2), (int)lua_strlen(L, 2),
> >> + lua_hashstring(L, 2));
> >2. Add a tuple_field_by_path wrapper for it, as I told you verbally.
> >Moreover
> >this code is out of 80 symbols width.
> >> +/* error code or message */
> >> +struct error *e = diag_last_error(diag_get());
> >3. Please, move all of this error checking into !lua_isnumber branch.
> >Tuple_field never returns
> >error, because sometimes absent field is ok.
> >> +if (field == NULL && e != NULL)
> >> +lua_pushstring(L, e->errmsg);
> >> +else
> >> +lua_pushinteger(L, -1*(field == NULL));
> >> +if (field)
> >4. Use explicit != NULL, please.
>
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 3b89cdd..ab62663 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -422,28 +422,27 @@ lbox_tuple_field_by_path(struct lua_State *L)
>      const char *field = NULL;
>      if (!lua_isnumber(L, 2)) {
>      /* string */
> -    field = tuple_field_raw_by_path(tuple_format(tuple), 
> tuple_data(tuple),
> -    tuple_field_map(tuple),
> -    lua_tostring(L, 2), (int)lua_strlen(L, 2),
> -    lua_hashstring(L, 2));
> +    field = tuple_field_by_path(tuple, lua_tostring(L, 2),
> +    (int)lua_strlen(L, 2),
> +    lua_hashstring(L, 2));
> +    struct error *e = diag_last_error(diag_get());
> +    if (field == NULL && e) {
> +    lua_pushnil(L);
> +    lua_pushstring(L, e->errmsg);
> +    return 2;
> +    }
>      } else {
>      int index = lua_tointeger(L, 2);
>      index -= TUPLE_INDEX_BASE;
>      if (index >= 0)
>      field = tuple_field(tuple, index);
>      }
> -    /* error code or message */
> -    struct error *e = diag_last_error(diag_get());
> -    if (field == NULL && e != NULL)
> -    lua_pushstring(L, e->errmsg);
> -    else
> -    lua_pushinteger(L, -1*(field == NULL));
> -    if (field)
> +    if (field != NULL)
>      luamp_decode(L, luaL_msgpack_default, &field);
>      else
>      lua_pushnil(L);
>      diag_clear(diag_get());
> -    return 2;
> +    return 1;
>  }
>   static int
>
> >> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> >> index b51b4df..3785568 100644
> >> --- a/src/box/lua/tuple.lua
> >> +++ b/src/box/lua/tuple.lua
> >> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
> >> local fun = require('fun')
> >> local buffer = require('buffer')
> >> local internal = require('box.internal')
> >> +local log = require('log')
> >5. Unsed module.
>
> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> index 3785568..1c7e2a5 100644
> --- a/src/box/lua/tuple.lua
> +++ b/src/box/lua/tuple.lua
> @@ -6,7 +6,6 @@ local msgpackffi = require('msgpackffi')
>  local fun = require('fun')
>  local buffer = require('buffer')
>  local internal = require('box.internal')
> -local log = require('log')
>   ffi.cdef[[
>  /** \cond public */
>
> >>
> >> ffi.cdef[[
> >> /** \cond public */
> >> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
> >> end;
> >> __tostring = internal.tuple.tostring;
> >> __index = function(tuple, key)
> >> + local rc, field
> >> if type(key) == "string" or type(key) == "number" then
> >> -- Try to get a field by json path or by [index]. If
> >> -- it was not found (rc ~= 0) then return a method
> >> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
> >> -- returns field value, not tuple_bsize function. To
> >> -- access hidden methods use
> >> -- 'box.tuple.<method_name>(T, [args...])'.
> >> - local rc, field = tuple_field_by_path(tuple, key)
> >> + rc, field = tuple_field_by_path(tuple, key)
> >> if rc == 0 then
> >> return field
> >> end
> >> end
> >> - return methods[key]
> >> + local method = methods[key]
> >> + return method == box.NULL and rc ~= -1 and rc or method
> >6. You must not return -1. Error must be nil + error message/object.
>
> @@ -303,7 +302,7 @@ ffi.metatype(tuple_t, {
>      end;
>      __tostring = internal.tuple.tostring;
>      __index = function(tuple, key)
> - local rc, field
> + local res, err
>          if type(key) == "string" or type(key) == "number" then
>              -- Try to get a field by json path or by [index]. If
>              -- it was not found (rc ~= 0) then return a method
> @@ -313,13 +312,13 @@ ffi.metatype(tuple_t, {
>              -- returns field value, not tuple_bsize function. To
>              -- access hidden methods use
>              -- 'box.tuple.<method_name>(T, [args...])'.
> - rc, field = tuple_field_by_path(tuple, key)
> - if rc == 0 then
> - return field
> + res, err = tuple_field_by_path(tuple, key)
> + if res ~= box.NULL then
> + return res
>              end
>          end
>          local method = methods[key]
> - return method == box.NULL and rc ~= -1 and rc or method
> + return method == box.NULL and err or method
>      end;
>      __eq = function(tuple_a, tuple_b)
>          -- Two tuple are considered equal if they have same memory 
> address
>
> >> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> >> index 3e2c8bf..f2dffaf 100644
> >> --- a/src/box/tuple_format.c
> >> +++ b/src/box/tuple_format.c
> >> @@ -28,6 +28,8 @@
> >> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> >> * SUCH DAMAGE.
> >> */
> >> +#include <lib/json/path.h>
> >> +#include <third_party/luajit/src/lua.h>
> >7. Do not include lua headers in files, that are not in lua/ dir.
> >Tarantool core must not
> >depend on lua. As I told you earlier, you must use field_name_hash
> >function, declared in
> >tuple_dictionary.h. It allows to remove lua dependency and remove
> >${common_libraries}
> >from cmake file.
>
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index 6ebedf5..f084c9d 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -514,6 +514,26 @@ tuple_field(const struct tuple *tuple, uint32_t 
> fieldno)
>  }
>   /**
> + * Get tuple field by its path.
> + * @param tuple.
> + * @param path Field path.
> + * @param path_len Length of @a path.
> + * @param path_hash Hash of @a path.
> + *
> + * @retval not NULL on field found.
> + * @retval NULL No field by @a path.
> + */
> +static inline const char *
> +tuple_field_by_path(struct tuple *tuple, const char *path,
> + uint32_t path_len, uint32_t path_hash)
> +{
> +    return tuple_field_raw_by_path(tuple_format(tuple),
> +    tuple_data(tuple),
> +    tuple_field_map(tuple),
> +    path, path_len, path_hash);
> +}
> +
> +/**
>   * Get tuple field by its name.
>   * @param tuple Tuple to get field from.
>   * @param name Field name.
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 9c6e3da..8ee7948 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -29,8 +29,8 @@
>   * SUCH DAMAGE.
>   */
>  #include <lib/json/path.h>
> -#include <third_party/luajit/src/lua.h>
>  #include "tuple_format.h"
> +#include "tuple_dictionary.h"
>   /** Global table of tuple formats */
>  struct tuple_format **tuple_formats;
>
> >> +const char *
> >> +tuple_field_raw_by_path(struct tuple_format *format, const char 
> *tuple,
> >> + const uint32_t *field_map,
> >> + const char *path,uint32_t path_len, uint32_t path_hash)
> >8. Path argument can be placed on the previous line.
> >
> >9. After ',' put a space please.
> >> +{
> >> +int rc = 0;
> >> +const char *field;
> >> +struct json_path_parser parser;
> >> +struct json_path_node node;
> >> +json_path_parser_create(&parser, path, path_len);
> >> +rc = json_path_next(&parser, &node);
> >> +if (rc != 0 || node.type == JSON_PATH_END)
> >> +goto not_found;
> >> +if (node.type == JSON_PATH_NUM) {
> >> +int index = node.num;
> >> +if (index == 0)
> >> +goto not_found;
> >> +index -= TUPLE_INDEX_BASE;
> >> +field = tuple_field_raw(format, tuple,
> >> + field_map, index);
> >> +if (field == NULL)
> >> +goto not_found;
> >> +} else {
> >> +assert(node.type == JSON_PATH_STR);
> >> +/* First part of a path is a field name. */
> >> +const char *name = node.str;
> >> +uint32_t name_len = node.len;
> >> +uint32_t name_hash;
> >> +if (path_len == name_len) {
> >> +name_hash = path_hash;
> >> +} else {
> >> +/*
> >> + * If a string is "field....", then its
> >> + * precalculated juajit hash can not be
> >> + * used. A tuple dictionary hashes only
> >> + * name, not path.
> >> + */
> >> +name_hash = lua_hash(name, name_len);
> >10. Good comment! One remark - use field_name_hash instead of explicit
> >lua_hash.
> >> +}
> >> +field = tuple_field_raw_by_name(format, tuple,
> >> + field_map,
> >> + name, name_len, name_hash);
> >> +if (field == NULL)
> >> +goto not_found;
> >> +}
> >> +while ((rc = json_path_next(&parser, &node)) == 0 &&
> >> + node.type != JSON_PATH_END) {
> >> +if (node.type == JSON_PATH_NUM) {
> >> +rc = tuple_field_go_to_index(&field, node.num);
> >> +} else {
> >> +assert(node.type == JSON_PATH_STR);
> >> +rc = tuple_field_go_to_key(&field, node.str, node.len);
> >> +}
> >> +if (rc != 0) {
> >> +rc = 0; /* prevent error raise */
> >> +goto not_found;
> >> +}
> >> +}
> >> +if (rc == 0)
> >> +return field;
> >> +not_found:
> >11. Please, put labels to the line beginning.
>
> @@ -563,8 +563,8 @@ tuple_field_go_to_key(const char **field, const 
> char *key, int len)
>   const char *
>  tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> - const uint32_t *field_map,
> - const char *path,uint32_t path_len, uint32_t path_hash)
> + const uint32_t *field_map, const char *path,
> + uint32_t path_len, uint32_t path_hash)
>  {
>      int rc = 0;
>      const char *field;
> @@ -598,7 +598,7 @@ tuple_field_raw_by_path(struct tuple_format 
> *format, const char *tuple,
>      * used. A tuple dictionary hashes only
>      * name, not path.
>      */
> -    name_hash = lua_hash(name, name_len);
> +    name_hash = field_name_hash(name, name_len);
>      }
>      field = tuple_field_raw_by_name(format, tuple,
>      field_map,
> @@ -621,7 +621,7 @@ tuple_field_raw_by_path(struct tuple_format 
> *format, const char *tuple,
>      }
>      if (rc == 0)
>      return field;
> -    not_found:
> +not_found:
>      /* try to use the whole string as identifier */
>      field = tuple_field_raw_by_name(format, tuple,
>      field_map,
> @@ -629,7 +629,6 @@ tuple_field_raw_by_path(struct tuple_format 
> *format, const char *tuple,
>      if (field)
>      return field;
>      if (rc || path_len == 0)
> -    diag_set(IllegalParams, "Error in path on "
> -    "position %d", rc);
> +    diag_set(IllegalParams, "Error in path on position %d", rc);
>      return NULL;
>  }
>
> >> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> >> index d33c77a..3e7e595 100644
> >> --- a/src/box/tuple_format.h
> >> +++ b/src/box/tuple_format.h
> >> @@ -343,6 +343,21 @@ tuple_field_raw_by_name(struct tuple_format 
> *format, const char *tuple,
> >> return tuple_field_raw(format, tuple, field_map, fieldno);
> >> }
> >>
> >> +/**
> >> + * Get tuple field by its path.
> >> + * @param Tuple.
> >> + * @param path Field path.
> >> + * @param path_len Length of @a path.
> >> + * @param path_hash Hash of @a path.
> >> + *
> >> + * @retval not NULL on field found.
> >> + * @retval NULL No field by @a path.
> >> + */
> >> +const char *
> >> +tuple_field_raw_by_path(struct tuple_format *format, const char 
> *tuple,
> >> + const uint32_t *field_map,
> >> + const char *path,uint32_t path_len, uint32_t path_hash);
> >12. Please, add tuple_field_by_path as well to tuple.h, that calls this
> >function. It allows you
> >to call tuple_field_by_path in lua/tuple.c.
>
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index ce17085..ffa3d16 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -379,7 +379,9 @@ tuple_field_raw_by_name(struct tuple_format 
> *format, const char *tuple,
>   /**
>   * Get tuple field by its path.
> - * @param Tuple.
> + * @param format Tuple format.
> + * @param tuple MessagePack tuple's body.
> + * @param field_map Tuple field map.
>   * @param path Field path.
>   * @param path_len Length of @a path.
>   * @param path_hash Hash of @a path.
>
> >> +
> >> #if defined(__cplusplus)
> >> } /* extern "C" */
> >> #endif /* defined(__cplusplus) */
> >> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
> >> index 4a6174e..f3fcc46 100644
> >> --- a/src/lib/json/path.c
> >> +++ b/src/lib/json/path.c
> >> @@ -31,8 +31,10 @@
> >>
> >> #include "path.h"
> >> #include <ctype.h>
> >> +#include <unicode/uchar.h>
> >> #include "trivia/util.h"
> >>
> >> +
> >13. Garbage diff.
> >> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser 
> *parser, struct json_path_node *node)
> >> static inline int
> >> json_parse_integer(struct json_path_parser *parser, struct 
> json_path_node *node)
> >> {
> >> -const char *end = parser->src + parser->src_len;
> >> -const char *pos = parser->pos;
> >> -assert(pos < end);
> >> -const char *str = pos;
> >> -int len = 0;
> >> -for (char c = *pos; pos < end && isdigit(c); c = *++pos)
> >> -++len;
> >> -if (len == 0)
> >> -return pos - parser->src + 1;
> >> -parser->pos = pos;
> >> +assert(parser->offset < parser->src_len);
> >> +int str_offset = parser->offset;
> >> +int last_offset = str_offset;
> >> +int len = 0, rc = 0;
> >> +UChar32 c = 0;
> >> +
> >> +while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
> >14. I remember, that you told me, that a digit can be read using simple
> >char. Can you please implement
> >this cycle with no UChar?
>
> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
> index f3fcc46..e253ec4 100644
> --- a/src/lib/json/path.c
> +++ b/src/lib/json/path.c
> @@ -104,7 +104,7 @@ json_parse_string(struct json_path_parser *parser, 
> struct json_path_node *node)
>       while (((rc = json_read_sign(parser, &c)) > 0)
>      && c != quote_type);
> -    int len = (int)(parser->offset - str_offset - 1);
> +    int len = parser->offset - str_offset - 1;
>      if (rc < 0 || len == 0)
>      return -1;
>      if (c != (UChar32)quote_type) {
> @@ -130,23 +130,21 @@ json_parse_string(struct json_path_parser 
> *parser, struct json_path_node *node)
>  static inline int
>  json_parse_integer(struct json_path_parser *parser, struct 
> json_path_node *node)
>  {
> -    assert(parser->offset < parser->src_len);
> -    int str_offset = parser->offset;
> -    int last_offset = str_offset;
> -    int len = 0, rc = 0;
> -    UChar32 c = 0;
> -
> -    while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
> -    last_offset = parser->offset;
> -    len++;
> -    }
> -    if (rc > 0 && len > 0 && !u_isdigit(c))
> -    json_reset_pos(parser, last_offset, 1);
> -    if (rc < 0 || len == 0)
> +    const char *end = parser->src + parser->src_len;
> +    const char *pos = parser->src + parser->offset;
> +    assert(pos < end);
> +    const char *str = pos;
> +    int len = 0;
> +    for (char c = *pos; pos < end && isdigit(c); c = *++pos)
> +    ++len;
> +    if (len == 0) {
> +    parser->invalid_sign_off++;
>      return -1;
> -
> +    }
> +    parser->offset += len;
> +    parser->invalid_sign_off += len;
>      node->type = JSON_PATH_NUM;
> -    node->num = strntoull(parser->src + str_offset, len);
> +    node->num = strntoull(str, len);
>      return 0;
>  }
>  >> >> +static inline char
> >> +json_curr_char(struct json_path_parser *parser)
> >> +{
> >> +return *json_path_curr_substring(parser);
> >> +}
> >15. I do not like that on each char you must do string + offset. Please,
> >find a way to
> >store this position. Why did you delete parser->pos?
>
> I have no good ideas. This code is also not hot: just on each new 
> lexem, believe nothing critical.
>
> >> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
> >> index 6e8db4c..15292fb 100644
> >> --- a/src/lib/json/path.h
> >> +++ b/src/lib/json/path.h
> >> @@ -45,10 +45,11 @@ extern "C" {
> >> struct json_path_parser {
> >> /** Source string. */
> >> const char *src;
> >> -/** Length of src. */
> >> +/** Length of string. */
> >> int src_len;
> >> -/** Current parser's position. */
> >> -const char *pos;
> >> +/** Current parser's offset. */
> >> +int offset;
> >> +int invalid_sign_off;
> >16. Please, add a comment.
>
> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
> index 15292fb..09f2e6f 100644
> --- a/src/lib/json/path.h
> +++ b/src/lib/json/path.h
> @@ -49,6 +49,7 @@ struct json_path_parser {
>      int src_len;
>      /** Current parser's offset. */
>      int offset;
> +    /** Successfully parsed signs count. */
>      int invalid_sign_off;
>  };
>  -- 2.7.4
>
> On 05.04.2018 17:09, Kirill Shcherbatov wrote:
>> ---
>>   src/box/CMakeLists.txt     |   2 +-
>>   src/box/lua/tuple.c        | 172 
>> ++++++----------------------------------
>>   src/box/lua/tuple.lua      |   7 +-
>>   src/box/tuple_format.c     | 155 ++++++++++++++++++++++++++++++++++++
>>   src/box/tuple_format.h     |  15 ++++
>>   src/lib/json/path.c        | 191 
>> ++++++++++++++++++++++++++++++---------------
>>   src/lib/json/path.h        |  25 ++++--
>>   test/engine/tuple.result   |  57 ++++++++++----
>>   test/engine/tuple.test.lua |  13 ++-
>>   test/unit/CMakeLists.txt   |   2 +-
>>   test/unit/json_path.c      |   2 +-
>>   11 files changed, 398 insertions(+), 243 deletions(-)
>>
>> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
>> index f053ae4..e5a0c21 100644
>> --- a/src/box/CMakeLists.txt
>> +++ b/src/box/CMakeLists.txt
>> @@ -45,7 +45,7 @@ add_library(tuple STATIC
>>       field_def.c
>>       opt_def.c
>>   )
>> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} 
>> ${ICU_LIBRARIES} misc bit)
>> +target_link_libraries(tuple json_path box_error core 
>> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
>>     add_library(xlog STATIC xlog.c)
>>   target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
>> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
>> index 99b9ff2..3b89cdd 100644
>> --- a/src/box/lua/tuple.c
>> +++ b/src/box/lua/tuple.c
>> @@ -403,87 +403,6 @@ lbox_tuple_transform(struct lua_State *L)
>>   }
>>     /**
>> - * Propagate @a field to MessagePack(field)[index].
>> - * @param[in][out] field Field to propagate.
>> - * @param index 1-based index to propagate to.
>> - *
>> - * @retval  0 Success, the index was found.
>> - * @retval -1 Not found.
>> - */
>> -static inline int
>> -tuple_field_go_to_index(const char **field, uint64_t index)
>> -{
>> -    assert(index >= 0);
>> -    enum mp_type type = mp_typeof(**field);
>> -    if (type == MP_ARRAY) {
>> -        if (index == 0)
>> -            return -1;
>> -        /* Make index 0-based. */
>> -        index -= TUPLE_INDEX_BASE;
>> -        uint32_t count = mp_decode_array(field);
>> -        if (index >= count)
>> -            return -1;
>> -        for (; index > 0; --index)
>> -            mp_next(field);
>> -        return 0;
>> -    } else if (type == MP_MAP) {
>> -        uint64_t count = mp_decode_map(field);
>> -        for (; count > 0; --count) {
>> -            type = mp_typeof(**field);
>> -            if (type == MP_UINT) {
>> -                uint64_t value = mp_decode_uint(field);
>> -                if (value == index)
>> -                    return 0;
>> -            } else if (type == MP_INT) {
>> -                int64_t value = mp_decode_int(field);
>> -                if (value >= 0 && (uint64_t)value == index)
>> -                    return 0;
>> -            } else {
>> -                /* Skip key. */
>> -                mp_next(field);
>> -            }
>> -            /* Skip value. */
>> -            mp_next(field);
>> -        }
>> -    }
>> -    return -1;
>> -}
>> -
>> -/**
>> - * Propagate @a field to MessagePack(field)[key].
>> - * @param[in][out] field Field to propagate.
>> - * @param key Key to propagate to.
>> - * @param len Length of @a key.
>> - *
>> - * @retval  0 Success, the index was found.
>> - * @retval -1 Not found.
>> - */
>> -static inline int
>> -tuple_field_go_to_key(const char **field, const char *key, int len)
>> -{
>> -    enum mp_type type = mp_typeof(**field);
>> -    if (type != MP_MAP)
>> -        return -1;
>> -    uint64_t count = mp_decode_map(field);
>> -    for (; count > 0; --count) {
>> -        type = mp_typeof(**field);
>> -        if (type == MP_STR) {
>> -            uint32_t value_len;
>> -            const char *value = mp_decode_str(field, &value_len);
>> -            if (value_len == (uint)len &&
>> -                memcmp(value, key, len) == 0)
>> -                return 0;
>> -        } else {
>> -            /* Skip key. */
>> -            mp_next(field);
>> -        }
>> -        /* Skip value. */
>> -        mp_next(field);
>> -    }
>> -    return -1;
>> -}
>> -
>> -/**
>>    * Find a tuple field by JSON path.
>>    * @param L Lua state.
>>    * @param tuple 1-th argument on a lua stack, tuple to get field
>> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const 
>> char *key, int len)
>>   static int
>>   lbox_tuple_field_by_path(struct lua_State *L)
>>   {
>> -    const char *field;
>>       struct tuple *tuple = luaT_istuple(L, 1);
>>       /* Is checked in Lua wrapper. */
>>       assert(tuple != NULL);
>> -    if (lua_isnumber(L, 2)) {
>> +    const char *field = NULL;
>> +    if (!lua_isnumber(L, 2)) {
>> +        /* string */
>> +        field = tuple_field_raw_by_path(tuple_format(tuple), 
>> tuple_data(tuple),
>> +                                        tuple_field_map(tuple),
>> +                                        lua_tostring(L, 2), 
>> (int)lua_strlen(L, 2),
>> +                                        lua_hashstring(L, 2));
>> +    } else {
>>           int index = lua_tointeger(L, 2);
>>           index -= TUPLE_INDEX_BASE;
>> -        if (index < 0) {
>> -not_found:
>> -            lua_pushinteger(L, -1);
>> -            lua_pushnil(L);
>> -            return 2;
>> -        }
>> -        field = tuple_field(tuple, index);
>> -        if (field == NULL)
>> -            goto not_found;
>> -push_value:
>> -        lua_pushinteger(L, 0);
>> -        luamp_decode(L, luaL_msgpack_default, &field);
>> -        return 2;
>> +        if (index >= 0)
>> +            field = tuple_field(tuple, index);
>>       }
>> -    assert(lua_isstring(L, 2));
>> -    size_t path_len;
>> -    const char *path = lua_tolstring(L, 2, &path_len);
>> -    struct json_path_parser parser;
>> -    struct json_path_node node;
>> -    json_path_parser_create(&parser, path, path_len);
>> -    int rc = json_path_next(&parser, &node);
>> -    if (rc != 0 || node.type == JSON_PATH_END)
>> -        luaL_error(L, "Error in path on position %d", rc);
>> -    if (node.type == JSON_PATH_NUM) {
>> -        int index = node.num;
>> -        if (index == 0)
>> -            goto not_found;
>> -        index -= TUPLE_INDEX_BASE;
>> -        field = tuple_field(tuple, index);
>> -        if (field == NULL)
>> -            goto not_found;
>> -    } else {
>> -        assert(node.type == JSON_PATH_STR);
>> -        /* First part of a path is a field name. */
>> -        const char *name = node.str;
>> -        uint32_t name_len = node.len;
>> -        uint32_t name_hash;
>> -        if (path_len == name_len) {
>> -            name_hash = lua_hashstring(L, 2);
>> -        } else {
>> -            /*
>> -             * If a string is "field....", then its
>> -             * precalculated juajit hash can not be
>> -             * used. A tuple dictionary hashes only
>> -             * name, not path.
>> -             */
>> -            name_hash = lua_hash(name, name_len);
>> -        }
>> -        field = tuple_field_by_name(tuple, name, name_len, name_hash);
>> -        if (field == NULL)
>> -            goto not_found;
>> -    }
>> -    while ((rc = json_path_next(&parser, &node)) == 0 &&
>> -           node.type != JSON_PATH_END) {
>> -        if (node.type == JSON_PATH_NUM) {
>> -            rc = tuple_field_go_to_index(&field, node.num);
>> -        } else {
>> -            assert(node.type == JSON_PATH_STR);
>> -            rc = tuple_field_go_to_key(&field, node.str, node.len);
>> -        }
>> -        if (rc != 0)
>> -            goto not_found;
>> -    }
>> -    if (rc == 0)
>> -        goto push_value;
>> -    luaL_error(L, "Error in path on position %d", rc);
>> -    unreachable();
>> -    goto not_found;
>> +    /* error code or message */
>> +    struct error *e = diag_last_error(diag_get());
>> +    if (field == NULL && e != NULL)
>> +        lua_pushstring(L, e->errmsg);
>> +    else
>> +        lua_pushinteger(L, -1*(field == NULL));
>> +    if (field)
>> +        luamp_decode(L, luaL_msgpack_default, &field);
>> +    else
>> +        lua_pushnil(L);
>> +    diag_clear(diag_get());
>> +    return 2;
>>   }
>>     static int
>> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
>> index b51b4df..3785568 100644
>> --- a/src/box/lua/tuple.lua
>> +++ b/src/box/lua/tuple.lua
>> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
>>   local fun = require('fun')
>>   local buffer = require('buffer')
>>   local internal = require('box.internal')
>> +local log = require('log')
>>     ffi.cdef[[
>>   /** \cond public */
>> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
>>       end;
>>       __tostring = internal.tuple.tostring;
>>       __index = function(tuple, key)
>> +        local rc, field
>>           if type(key) == "string" or type(key) == "number" then
>>               -- Try to get a field by json path or by [index]. If
>>               -- it was not found (rc ~= 0) then return a method
>> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
>>               -- returns field value, not tuple_bsize function. To
>>               -- access hidden methods use
>>               -- 'box.tuple.<method_name>(T, [args...])'.
>> -            local rc, field = tuple_field_by_path(tuple, key)
>> +            rc, field = tuple_field_by_path(tuple, key)
>>               if rc == 0 then
>>                   return field
>>               end
>>           end
>> -        return methods[key]
>> +        local method = methods[key]
>> +        return method == box.NULL and rc ~= -1 and rc or method
>>       end;
>>       __eq = function(tuple_a, tuple_b)
>>           -- Two tuple are considered equal if they have same memory 
>> address
>> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
>> index e458f49..9c6e3da 100644
>> --- a/src/box/tuple_format.c
>> +++ b/src/box/tuple_format.c
>> @@ -28,6 +28,8 @@
>>    * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>>    * SUCH DAMAGE.
>>    */
>> +#include <lib/json/path.h>
>> +#include <third_party/luajit/src/lua.h>
>>   #include "tuple_format.h"
>>     /** Global table of tuple formats */
>> @@ -478,3 +480,156 @@ box_tuple_format_unref(box_tuple_format_t *format)
>>   {
>>       tuple_format_unref(format);
>>   }
>> +
>> +/**
>> + * Propagate @a field to MessagePack(field)[index].
>> + * @param[in][out] field Field to propagate.
>> + * @param index 1-based index to propagate to.
>> + *
>> + * @retval  0 Success, the index was found.
>> + * @retval -1 Not found.
>> + */
>> +static inline int
>> +tuple_field_go_to_index(const char **field, uint64_t index)
>> +{
>> +    enum mp_type type = mp_typeof(**field);
>> +    if (type == MP_ARRAY) {
>> +        if (index == 0)
>> +            return -1;
>> +        /* Make index 0-based. */
>> +        index -= TUPLE_INDEX_BASE;
>> +        uint32_t count = mp_decode_array(field);
>> +        if (index >= count)
>> +            return -1;
>> +        for (; index > 0; --index)
>> +            mp_next(field);
>> +        return 0;
>> +    } else if (type == MP_MAP) {
>> +        uint64_t count = mp_decode_map(field);
>> +        for (; count > 0; --count) {
>> +            type = mp_typeof(**field);
>> +            if (type == MP_UINT) {
>> +                uint64_t value = mp_decode_uint(field);
>> +                if (value == index)
>> +                    return 0;
>> +            } else if (type == MP_INT) {
>> +                int64_t value = mp_decode_int(field);
>> +                if (value >= 0 && (uint64_t)value == index)
>> +                    return 0;
>> +            } else {
>> +                /* Skip key. */
>> +                mp_next(field);
>> +            }
>> +            /* Skip value. */
>> +            mp_next(field);
>> +        }
>> +    }
>> +    return -1;
>> +}
>> +
>> +/**
>> + * Propagate @a field to MessagePack(field)[key].
>> + * @param[in][out] field Field to propagate.
>> + * @param key Key to propagate to.
>> + * @param len Length of @a key.
>> + *
>> + * @retval  0 Success, the index was found.
>> + * @retval -1 Not found.
>> + */
>> +static inline int
>> +tuple_field_go_to_key(const char **field, const char *key, int len)
>> +{
>> +    enum mp_type type = mp_typeof(**field);
>> +    if (type != MP_MAP)
>> +        return -1;
>> +    uint64_t count = mp_decode_map(field);
>> +    for (; count > 0; --count) {
>> +        type = mp_typeof(**field);
>> +        if (type == MP_STR) {
>> +            uint32_t value_len;
>> +            const char *value = mp_decode_str(field, &value_len);
>> +            if (value_len == (uint)len &&
>> +                memcmp(value, key, len) == 0)
>> +                return 0;
>> +        } else {
>> +            /* Skip key. */
>> +            mp_next(field);
>> +        }
>> +        /* Skip value. */
>> +        mp_next(field);
>> +    }
>> +    return -1;
>> +}
>> +
>> +const char *
>> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>> +                        const uint32_t *field_map,
>> +                        const char *path,uint32_t path_len, uint32_t 
>> path_hash)
>> +{
>> +    int rc = 0;
>> +    const char *field;
>> +    struct json_path_parser parser;
>> +    struct json_path_node node;
>> +    json_path_parser_create(&parser, path, path_len);
>> +    rc = json_path_next(&parser, &node);
>> +    if (rc != 0 || node.type == JSON_PATH_END)
>> +        goto not_found;
>> +    if (node.type == JSON_PATH_NUM) {
>> +        int index = node.num;
>> +        if (index == 0)
>> +            goto not_found;
>> +        index -= TUPLE_INDEX_BASE;
>> +        field = tuple_field_raw(format, tuple,
>> +                                field_map, index);
>> +        if (field == NULL)
>> +            goto not_found;
>> +    } else {
>> +        assert(node.type == JSON_PATH_STR);
>> +        /* First part of a path is a field name. */
>> +        const char *name = node.str;
>> +        uint32_t name_len = node.len;
>> +        uint32_t name_hash;
>> +        if (path_len == name_len) {
>> +            name_hash = path_hash;
>> +        } else {
>> +            /*
>> +             * If a string is "field....", then its
>> +             * precalculated juajit hash can not be
>> +             * used. A tuple dictionary hashes only
>> +             * name, not path.
>> +             */
>> +            name_hash = lua_hash(name, name_len);
>> +        }
>> +        field = tuple_field_raw_by_name(format, tuple,
>> +                                        field_map,
>> +                                        name, name_len, name_hash);
>> +        if (field == NULL)
>> +            goto not_found;
>> +    }
>> +    while ((rc = json_path_next(&parser, &node)) == 0 &&
>> +           node.type != JSON_PATH_END) {
>> +        if (node.type == JSON_PATH_NUM) {
>> +            rc = tuple_field_go_to_index(&field, node.num);
>> +        } else {
>> +            assert(node.type == JSON_PATH_STR);
>> +            rc = tuple_field_go_to_key(&field, node.str, node.len);
>> +        }
>> +        if (rc != 0) {
>> +            rc = 0; /* prevent error raise */
>> +            goto not_found;
>> +        }
>> +    }
>> +    if (rc == 0)
>> +        return field;
>> +    not_found:
>> +    /* try to use the whole string as identifier */
>> +    field = tuple_field_raw_by_name(format, tuple,
>> +                                    field_map,
>> +                                    path, path_len, path_hash);
>> +    if (field)
>> +        return field;
>> +    if (rc || path_len == 0)
>> +        diag_set(IllegalParams, "Error in path on "
>> +                                "position %d", rc);
>> +    return NULL;
>> +}
>> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
>> index d35182d..ce17085 100644
>> --- a/src/box/tuple_format.h
>> +++ b/src/box/tuple_format.h
>> @@ -377,6 +377,21 @@ tuple_field_raw_by_name(struct tuple_format 
>> *format, const char *tuple,
>>       return tuple_field_raw(format, tuple, field_map, fieldno);
>>   }
>>   +/**
>> + * Get tuple field by its path.
>> + * @param Tuple.
>> + * @param path Field path.
>> + * @param path_len Length of @a path.
>> + * @param path_hash Hash of @a path.
>> + *
>> + * @retval not NULL on field found.
>> + * @retval     NULL No field by @a path.
>> + */
>> +const char *
>> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>> +                        const uint32_t *field_map,
>> +                        const char *path,uint32_t path_len, uint32_t 
>> path_hash);
>> +
>>   #if defined(__cplusplus)
>>   } /* extern "C" */
>>   #endif /* defined(__cplusplus) */
>> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
>> index 4a6174e..f3fcc46 100644
>> --- a/src/lib/json/path.c
>> +++ b/src/lib/json/path.c
>> @@ -31,8 +31,10 @@
>>     #include "path.h"
>>   #include <ctype.h>
>> +#include <unicode/uchar.h>
>>   #include "trivia/util.h"
>>   +
>>   /** Same as strtoull(), but with limited length. */
>>   static inline uint64_t
>>   strntoull(const char *src, int len) {
>> @@ -45,6 +47,42 @@ strntoull(const char *src, int len) {
>>   }
>>     /**
>> + * Parse string and update parser's state.
>> + * @param parser JSON path parser. Upates pos, signs_read.
>> + * @param[out] UChar32 to store result.
>> + *
>> + * @retval 1 Success.
>> + * @retval 0 End of string.
>> + * @retval -1 Parse error.
>> + */
>> +static inline int
>> +json_read_sign(struct json_path_parser *parser, UChar32 *out)
>> +{
>> +    if (unlikely(parser->offset == parser->src_len))
>> +        return 0;
>> +    UChar32 c;
>> +    U8_NEXT_OR_FFFD(parser->src, parser->offset, parser->src_len, c)
>> +    if (c == 0xFFFD)
>> +        return -1;
>> +    *out = c;
>> +    parser->invalid_sign_off += 1;
>> +    return 1;
>> +}
>> +
>> +/**
>> + * Reset parser state to previous one.
>> + * @param parser JSON path parser.
>> + * @param old parser read offset.
>> + * @param signs to drop in signs_read counter.
>> + */
>> +static inline void
>> +json_reset_pos(struct json_path_parser *parser, int old_offset, int 
>> signs)
>> +{
>> +    parser->offset = old_offset;
>> +    parser->invalid_sign_off -= signs;
>> +}
>> +
>> +/**
>>    * Parse string identifier in quotes. Parser either stops right
>>    * after the closing quote, or returns an error position.
>>    * @param parser JSON path parser.
>> @@ -56,24 +94,26 @@ strntoull(const char *src, int len) {
>>   static inline int
>>   json_parse_string(struct json_path_parser *parser, struct 
>> json_path_node *node)
>>   {
>> -    const char *end = parser->src + parser->src_len;
>> -    const char *pos = parser->pos;
>> -    assert(pos < end);
>> -    char quote_type = *pos;
>> -    assert(quote_type == '\'' || quote_type == '"');
>> -    /* Skip first quote. */
>> -    int len = 0;
>> -    ++pos;
>> -    const char *str = pos;
>> -    for (char c = *pos; pos < end && quote_type != c; c = *++pos)
>> -        ++len;
>> -    /* A string must be terminated with quote. */
>> -    if (*pos != quote_type || len == 0)
>> -        return pos - parser->src + 1;
>> -    /* Skip the closing quote. */
>> -    parser->pos = pos + 1;
>> +    assert(parser->offset < parser->src_len);
>> +    UChar32 quote_type;
>> +    (void) json_read_sign(parser, &quote_type);
>> +    assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"');
>> +    int str_offset = parser->offset;
>> +    UChar32 c = 0;
>> +    int rc = 0;
>> +
>> +    while (((rc = json_read_sign(parser, &c)) > 0)
>> +           && c != quote_type);
>> +    int len = (int)(parser->offset - str_offset - 1);
>> +    if (rc < 0 || len == 0)
>> +        return -1;
>> +    if (c != (UChar32)quote_type) {
>> +        parser->invalid_sign_off++;
>> +        return -1;
>> +    }
>> +
>>       node->type = JSON_PATH_STR;
>> -    node->str = str;
>> +    node->str = parser->src + str_offset;
>>       node->len = len;
>>       return 0;
>>   }
>> @@ -81,7 +121,7 @@ json_parse_string(struct json_path_parser *parser, 
>> struct json_path_node *node)
>>   /**
>>    * Parse digit sequence into integer until non-digit is met.
>>    * Parser stops right after the last digit.
>> - * @param parser JSON parser.
>> + * @param parser JSON parser. Updates signs_read field.
>>    * @param[out] node JSON node to store result.
>>    *
>>    * @retval     0 Success.
>> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser 
>> *parser, struct json_path_node *node)
>>   static inline int
>>   json_parse_integer(struct json_path_parser *parser, struct 
>> json_path_node *node)
>>   {
>> -    const char *end = parser->src + parser->src_len;
>> -    const char *pos = parser->pos;
>> -    assert(pos < end);
>> -    const char *str = pos;
>> -    int len = 0;
>> -    for (char c = *pos; pos < end && isdigit(c); c = *++pos)
>> -        ++len;
>> -    if (len == 0)
>> -        return pos - parser->src + 1;
>> -    parser->pos = pos;
>> +    assert(parser->offset < parser->src_len);
>> +    int str_offset = parser->offset;
>> +    int last_offset = str_offset;
>> +    int len = 0, rc = 0;
>> +    UChar32 c = 0;
>> +
>> +    while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
>> +        last_offset = parser->offset;
>> +        len++;
>> +    }
>> +    if (rc > 0 && len > 0 && !u_isdigit(c))
>> +        json_reset_pos(parser, last_offset, 1);
>> +    if (rc < 0 || len == 0)
>> +        return -1;
>> +
>>       node->type = JSON_PATH_NUM;
>> -    node->num = strntoull(str, len);
>> +    node->num = strntoull(parser->src + str_offset, len);
>>       return 0;
>>   }
>>   +static inline bool
>> +identifier_valid_sign(UChar32 c)
>> +{
>> +    return u_isUAlphabetic(c)
>> +           || c == (UChar32)'_'
>> +           || u_isdigit(c);
>> +}
>> +
>>   /**
>>    * Parse identifier out of quotes. It can contain only alphas,
>>    * digits and underscores. And can not contain digit at the first
>>    * position. Parser is stoped right after the last non-digit,
>>    * non-alpha and non-underscore symbol.
>> - * @param parser JSON parser.
>> + * @param parser JSON parser. Updates signs_read field.
>>    * @param[out] node JSON node to store result.
>>    *
>>    * @retval     0 Success.
>> @@ -120,68 +173,78 @@ static inline int
>>   json_parse_identifier(struct json_path_parser *parser,
>>                 struct json_path_node *node)
>>   {
>> -    const char *end = parser->src + parser->src_len;
>> -    const char *pos = parser->pos;
>> -    assert(pos < end);
>> -    const char *str = pos;
>> -    char c = *pos;
>> +    assert(parser->offset < parser->src_len);
>> +    int str_offset = parser->offset;
>> +    UChar32 c;
>> +    int rc = 0;
>> +    if (json_read_sign(parser, &c) < 0)
>> +        return -1;
>>       /* First symbol can not be digit. */
>> -    if (!isalpha(c) && c != '_')
>> -        return pos - parser->src + 1;
>> -    int len = 1;
>> -    for (c = *++pos; pos < end && (isalpha(c) || c == '_' || 
>> isdigit(c));
>> -         c = *++pos)
>> -        ++len;
>> -    assert(len > 0);
>> -    parser->pos = pos;
>> +    if (!u_isalpha(c) && c != (UChar32)'_')
>> +        return -1;
>> +
>> +    int last_offset = parser->offset;
>> +    while ((rc = json_read_sign(parser, &c)) > 0 && 
>> identifier_valid_sign(c))
>> +        last_offset = parser->offset;
>> +    if (rc > 0 && !identifier_valid_sign(c))
>> +        json_reset_pos(parser, last_offset, 1);
>> +    if (rc < 0)
>> +        return -1;
>> +
>>       node->type = JSON_PATH_STR;
>> -    node->str = str;
>> -    node->len = len;
>> +    node->str = parser->src + str_offset;
>> +    node->len = parser->offset - str_offset;
>>       return 0;
>>   }
>>   +static inline char
>> +json_curr_char(struct json_path_parser *parser)
>> +{
>> +    return *json_path_curr_substring(parser);
>> +}
>> +
>>   int
>>   json_path_next(struct json_path_parser *parser, struct 
>> json_path_node *node)
>>   {
>> -    const char *end = parser->src + parser->src_len;
>> -    if (end == parser->pos) {
>> +    int end_offset = parser->src_len;
>> +    if (end_offset == parser->offset) {
>>           node->type = JSON_PATH_END;
>>           return 0;
>>       }
>> -    char c = *parser->pos;
>> +    UChar32 c = 0;
>> +    int last_offset = parser->offset;
>> +    if (json_read_sign(parser, &c) < 0)
>> +        return parser->invalid_sign_off;
>>       int rc;
>>       switch(c) {
>> -    case '[':
>> -        ++parser->pos;
>> +    case (UChar32)'[':
>>           /* Error for []. */
>> -        if (parser->pos == end)
>> -            return parser->pos - parser->src + 1;
>> -        c = *parser->pos;
>> +        if (parser->offset == end_offset)
>> +            return parser->invalid_sign_off;
>> +        c = json_curr_char(parser);
>>           if (c == '"' || c == '\'')
>>               rc = json_parse_string(parser, node);
>>           else
>>               rc = json_parse_integer(parser, node);
>>           if (rc != 0)
>> -            return rc;
>> +            return parser->invalid_sign_off;
>>           /*
>>            * Expression, started from [ must be finished
>>            * with ] regardless of its type.
>>            */
>> -        if (parser->pos == end || *parser->pos != ']')
>> -            return parser->pos - parser->src + 1;
>> +        if (parser->offset == end_offset || json_curr_char(parser) 
>> != ']')
>> +            return parser->invalid_sign_off + 1;
>>           /* Skip ]. */
>> -        ++parser->pos;
>> +        (void) json_read_sign(parser, &c);
>>           break;
>> -    case '.':
>> -        /* Skip dot. */
>> -        ++parser->pos;
>> -        if (parser->pos == end)
>> -            return parser->pos - parser->src + 1;
>> -        FALLTHROUGH
>>       default:
>> +        if (c != (UChar32)'.')
>> +            json_reset_pos(parser, last_offset, 1);
>> +        else if (parser->offset == end_offset)
>> +            return parser->invalid_sign_off + 1;
>>           rc = json_parse_identifier(parser, node);
>>           if (rc != 0)
>> -            return rc;
>> +            return parser->invalid_sign_off;
>>           break;
>>       }
>>       return 0;
>> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
>> index 6e8db4c..15292fb 100644
>> --- a/src/lib/json/path.h
>> +++ b/src/lib/json/path.h
>> @@ -45,10 +45,11 @@ extern "C" {
>>   struct json_path_parser {
>>       /** Source string. */
>>       const char *src;
>> -    /** Length of src. */
>> +    /** Length of string. */
>>       int src_len;
>> -    /** Current parser's position. */
>> -    const char *pos;
>> +    /** Current parser's offset. */
>> +    int offset;
>> +    int invalid_sign_off;
>>   };
>>     enum json_path_type {
>> @@ -78,18 +79,30 @@ struct json_path_node {
>>   };
>>     /**
>> - * Create @a parser.
>> + * Init @a parser.
>>    * @param[out] parser Parser to create.
>>    * @param src Source string.
>>    * @param src_len Length of @a src.
>>    */
>>   static inline void
>>   json_path_parser_create(struct json_path_parser *parser, const char 
>> *src,
>> -            int src_len)
>> +                        int src_len)
>>   {
>>       parser->src = src;
>>       parser->src_len = src_len;
>> -    parser->pos = src;
>> +    parser->offset = 0;
>> +    parser->invalid_sign_off = 0;
>> +}
>> +
>> +/**
>> + * Get current substring of parser.
>> + * @param parser Parser.
>> + * @retval ptr to substring
>> + */
>> +static inline const char *
>> +json_path_curr_substring(struct json_path_parser *parser)
>> +{
>> +    return parser->src + parser->offset;
>>   }
>>     /**
>> diff --git a/test/engine/tuple.result b/test/engine/tuple.result
>> index 2d7367a..3a8e828 100644
>> --- a/test/engine/tuple.result
>> +++ b/test/engine/tuple.result
>> @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'}
>>   format[3] = {name = 'field3', type = 'map'}
>>   ---
>>   ...
>> -format[4] = {name = 'field4', type = 'string'}
>> +format[4] = {name = 'field4', type = 'string' }
>> +---
>> +...
>> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 
>> 'string'}
>>   ---
>>   ...
>>   s = box.schema.space.create('test', {format = format})
>> @@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = 
>> format})
>>   pk = s:create_index('pk')
>>   ---
>>   ...
>> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
>> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, 
>> key="value1", value="key1"}}
>>   ---
>>   ...
>>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, 
>> {c=3, d=4} }, [-1] = 200}
>>   ---
>>   ...
>> -t = s:replace{1, field2, field3, "123456"}
>> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>>   ---
>>   ...
>>   t[1]
>> @@ -626,7 +629,7 @@ t[1]
>>   ...
>>   t[2]
>>   ---
>> -- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
>> +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 
>> 'key': 'value1', 'value': 'key1'}]
>>   ...
>>   t[3]
>>   ---
>> @@ -667,19 +670,43 @@ t["[2][5][3]"]
>>   ...
>>   t["[2][6].key"]
>>   ---
>> -- key1
>> +- value1
>>   ...
>>   t["[2][6].value"]
>>   ---
>> -- value1
>> +- key1
>>   ...
>>   t["[2][6]['key']"]
>>   ---
>> -- key1
>> +- value1
>>   ...
>>   t["[2][6]['value']"]
>>   ---
>> -- value1
>> +- key1
>> +...
>> +t[2][6].привет中国world.中国
>> +---
>> +- привет
>> +...
>> +t["[2][6].привет中国world"].中国
>> +---
>> +- привет
>> +...
>> +t["[2][6].привет中国world.中国"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']"]["中国"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']['中国']"]
>> +---
>> +- привет
>> +...
>> +t["[2][6]['привет中国world']['中国a']"]
>> +---
>> +- yes, this
>>   ...
>>   t["[3].k3[2].c"]
>>   ---
>> @@ -759,31 +786,31 @@ t["a.b.c d.e.f"]
>>   -- Sytax errors.
>>   t[""]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
>> +- Error in path on position 0
>>   ...
>>   t["[2].[5]"]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
>> +- Error in path on position 5
>>   ...
>>   t["[-1]"]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>>   ...
>>   t[".."]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>>   ...
>>   t["[["]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
>> +- Error in path on position 2
>>   ...
>>   t["]]"]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
>> +- Error in path on position 1
>>   ...
>>   t["{"]
>>   ---
>> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
>> +- Error in path on position 1
>>   ...
>>   s:drop()
>>   ---
>> diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
>> index ba3482d..90da8b2 100644
>> --- a/test/engine/tuple.test.lua
>> +++ b/test/engine/tuple.test.lua
>> @@ -204,12 +204,13 @@ format = {}
>>   format[1] = {name = 'field1', type = 'unsigned'}
>>   format[2] = {name = 'field2', type = 'array'}
>>   format[3] = {name = 'field3', type = 'map'}
>> -format[4] = {name = 'field4', type = 'string'}
>> +format[4] = {name = 'field4', type = 'string' }
>> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 
>> 'string'}
>>   s = box.schema.space.create('test', {format = format})
>>   pk = s:create_index('pk')
>> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
>> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, 
>> key="value1", value="key1"}}
>>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, 
>> {c=3, d=4} }, [-1] = 200}
>> -t = s:replace{1, field2, field3, "123456"}
>> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>>   t[1]
>>   t[2]
>>   t[3]
>> @@ -225,6 +226,12 @@ t["[2][6].key"]
>>   t["[2][6].value"]
>>   t["[2][6]['key']"]
>>   t["[2][6]['value']"]
>> +t[2][6].привет中国world.中国
>> +t["[2][6].привет中国world"].中国
>> +t["[2][6].привет中国world.中国"]
>> +t["[2][6]['привет中国world']"]["中国"]
>> +t["[2][6]['привет中国world']['中国']"]
>> +t["[2][6]['привет中国world']['中国a']"]
>>   t["[3].k3[2].c"]
>>   t["[4]"]
>>   t.field1
>> diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
>> index fe8b2d2..667194c 100644
>> --- a/test/unit/CMakeLists.txt
>> +++ b/test/unit/CMakeLists.txt
>> @@ -130,7 +130,7 @@ add_executable(csv.test csv.c)
>>   target_link_libraries(csv.test csv)
>>     add_executable(json_path.test json_path.c)
>> -target_link_libraries(json_path.test json_path unit)
>> +target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES})
>>     add_executable(rmean.test rmean.cc)
>>   target_link_libraries(rmean.test stat unit)
>> diff --git a/test/unit/json_path.c b/test/unit/json_path.c
>> index 599658b..81ef7fc 100644
>> --- a/test/unit/json_path.c
>> +++ b/test/unit/json_path.c
>> @@ -9,7 +9,7 @@
>>       json_path_parser_create(&parser, path, len);
>>     #define is_next_index(value_len, value) \
>> -    path = parser.pos; \
>> +    path = json_path_curr_substring(&parser); \
>>       is(json_path_next(&parser, &node), 0, "parse <%." #value_len 
>> "s>", \
>>          path); \
>>       is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", 
>> path); \
>>
>
>
>

  reply	other threads:[~2018-04-05 23:32 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov
2018-03-29 18:04   ` [tarantool-patches] " Kirill Shcherbatov
2018-03-30 10:24     ` v.shpilevoy
2018-03-30 10:25       ` v.shpilevoy
2018-04-02 19:19       ` Kirill Shcherbatov
2018-04-03 10:20         ` Vladislav Shpilevoy
2018-04-05 14:09           ` [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov
2018-04-05 18:00             ` [tarantool-patches] " Kirill Shcherbatov
2018-04-05 23:32               ` Vladislav Shpilevoy [this message]
2018-04-04 10:37 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Kirill Shcherbatov
2018-04-04 11:30   ` [tarantool-patches] " Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=da8d7619-210d-a285-ebe1-6afc47df6c2e@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox