From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 95B4A2D722 for ; Thu, 5 Apr 2018 19:32:38 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id LeGGKgdyFYu3 for ; Thu, 5 Apr 2018 19:32:38 -0400 (EDT) Received: from smtp50.i.mail.ru (smtp50.i.mail.ru [94.100.177.110]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 7E6052D50C for ; Thu, 5 Apr 2018 19:32:37 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser. References: <08b2862f-7741-8d62-c2e7-293964e69376@tarantool.org> <75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org> From: Vladislav Shpilevoy Message-ID: Date: Fri, 6 Apr 2018 02:32:35 +0300 MIME-Version: 1.0 In-Reply-To: <75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-US Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org, Kirill Shcherbatov See my review fixes on branch in a separate commit. Please, bench the branch with my review fixes, and without. 05.04.2018 21:00, Kirill Shcherbatov пишет: > > From 8d97afd7ac7a90e5dfb1e756fd9d389bafdadd56 Mon Sep 17 00:00:00 2001 > From: Kirill Shcherbatov > Date: Thu, 5 Apr 2018 20:35:25 +0300 > Subject: [PATCH] Accounted Vlad's comments > > --- >  src/box/CMakeLists.txt | 2 +- >  src/box/lua/tuple.c | 23 +++++++++++------------ >  src/box/lua/tuple.lua | 11 +++++------ >  src/box/tuple.h | 20 ++++++++++++++++++++ >  src/box/tuple_format.c | 13 ++++++------- >  src/box/tuple_format.h | 4 +++- >  src/lib/json/path.c | 30 ++++++++++++++---------------- >  src/lib/json/path.h | 1 + >  8 files changed, 61 insertions(+), 43 deletions(-) > > >Четверг, 5 апреля 2018, 17:48 +03:00 от Vladislav Shpilevoy > : > > > >Hello. See 18 comments below. > > > >05.04.2018 17:06, Kirill Shcherbatov пишет: > >> This is an automated email from the git hooks/post-receive script. > >> > >> kshcherbatov pushed a commit to branch gh-1285-tuple-field-by-json-icu > >> in repository tarantool. > >> > >> commit ab52c7a18441037c19d0b2759558a7eb93aabb0d > >> Author: Kirill Shcherbatov < kshcherbatov@tarantool.org > > >> AuthorDate: Thu Apr 5 17:04:15 2018 +0300 > >> > >> ICU Unicode support for JSON parser. > >> --- > >> src/box/CMakeLists.txt | 2 +- > >> src/box/lua/tuple.c | 172 ++++++---------------------------------- > >> src/box/lua/tuple.lua | 7 +- > >> src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++ > >> src/box/tuple_format.h | 15 ++++ > >> src/lib/json/path.c | 191 > ++++++++++++++++++++++++++++++--------------- > >> src/lib/json/path.h | 25 ++++-- > >> test/engine/tuple.result | 57 ++++++++++---- > >> test/engine/tuple.test.lua | 13 ++- > >> test/unit/CMakeLists.txt | 2 +- > >> test/unit/json_path.c | 2 +- > >> 11 files changed, 398 insertions(+), 243 deletions(-) > >> > >> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > >> index add0ff9..0f5b197 100644 > >> --- a/src/box/CMakeLists.txt > >> +++ b/src/box/CMakeLists.txt > >> @@ -44,7 +44,7 @@ add_library(tuple STATIC > >> field_def.c > >> opt_def.c > >> ) > >> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} > ${ICU_LIBRARIES} misc bit) > >> +target_link_libraries(tuple json_path box_error core > ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries}) > >1. Remove ${common_libraries} please, the tuple lib must not depend on > >Lua. See how to do it in the next comments. > > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > index e5a0c21..88c2c60 100644 > --- a/src/box/CMakeLists.txt > +++ b/src/box/CMakeLists.txt > @@ -45,7 +45,7 @@ add_library(tuple STATIC >      field_def.c >      opt_def.c >  ) > -target_link_libraries(tuple json_path box_error core > ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries}) > +target_link_libraries(tuple json_path box_error core > ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit) >   add_library(xlog STATIC xlog.c) >  target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES}) > > >> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, > const char *key, int len) > >> static int > >> lbox_tuple_field_by_path(struct lua_State *L) > >> { > >> -const char *field; > >> struct tuple *tuple = luaT_istuple(L, 1); > >> /* Is checked in Lua wrapper. */ > >> assert(tuple != NULL); > >> -if (lua_isnumber(L, 2)) { > >> +const char *field = NULL; > >> +if (!lua_isnumber(L, 2)) { > >> +/* string */ > >> +field = tuple_field_raw_by_path(tuple_format(tuple), > tuple_data(tuple), > >> + tuple_field_map(tuple), > >> + lua_tostring(L, 2), (int)lua_strlen(L, 2), > >> + lua_hashstring(L, 2)); > >2. Add a tuple_field_by_path wrapper for it, as I told you verbally. > >Moreover > >this code is out of 80 symbols width. > >> +/* error code or message */ > >> +struct error *e = diag_last_error(diag_get()); > >3. Please, move all of this error checking into !lua_isnumber branch. > >Tuple_field never returns > >error, because sometimes absent field is ok. > >> +if (field == NULL && e != NULL) > >> +lua_pushstring(L, e->errmsg); > >> +else > >> +lua_pushinteger(L, -1*(field == NULL)); > >> +if (field) > >4. Use explicit != NULL, please. > > diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c > index 3b89cdd..ab62663 100644 > --- a/src/box/lua/tuple.c > +++ b/src/box/lua/tuple.c > @@ -422,28 +422,27 @@ lbox_tuple_field_by_path(struct lua_State *L) >      const char *field = NULL; >      if (!lua_isnumber(L, 2)) { >      /* string */ > -    field = tuple_field_raw_by_path(tuple_format(tuple), > tuple_data(tuple), > -    tuple_field_map(tuple), > -    lua_tostring(L, 2), (int)lua_strlen(L, 2), > -    lua_hashstring(L, 2)); > +    field = tuple_field_by_path(tuple, lua_tostring(L, 2), > +    (int)lua_strlen(L, 2), > +    lua_hashstring(L, 2)); > +    struct error *e = diag_last_error(diag_get()); > +    if (field == NULL && e) { > +    lua_pushnil(L); > +    lua_pushstring(L, e->errmsg); > +    return 2; > +    } >      } else { >      int index = lua_tointeger(L, 2); >      index -= TUPLE_INDEX_BASE; >      if (index >= 0) >      field = tuple_field(tuple, index); >      } > -    /* error code or message */ > -    struct error *e = diag_last_error(diag_get()); > -    if (field == NULL && e != NULL) > -    lua_pushstring(L, e->errmsg); > -    else > -    lua_pushinteger(L, -1*(field == NULL)); > -    if (field) > +    if (field != NULL) >      luamp_decode(L, luaL_msgpack_default, &field); >      else >      lua_pushnil(L); >      diag_clear(diag_get()); > -    return 2; > +    return 1; >  } >   static int > > >> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua > >> index b51b4df..3785568 100644 > >> --- a/src/box/lua/tuple.lua > >> +++ b/src/box/lua/tuple.lua > >> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi') > >> local fun = require('fun') > >> local buffer = require('buffer') > >> local internal = require('box.internal') > >> +local log = require('log') > >5. Unsed module. > > diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua > index 3785568..1c7e2a5 100644 > --- a/src/box/lua/tuple.lua > +++ b/src/box/lua/tuple.lua > @@ -6,7 +6,6 @@ local msgpackffi = require('msgpackffi') >  local fun = require('fun') >  local buffer = require('buffer') >  local internal = require('box.internal') > -local log = require('log') >   ffi.cdef[[ >  /** \cond public */ > > >> > >> ffi.cdef[[ > >> /** \cond public */ > >> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, { > >> end; > >> __tostring = internal.tuple.tostring; > >> __index = function(tuple, key) > >> + local rc, field > >> if type(key) == "string" or type(key) == "number" then > >> -- Try to get a field by json path or by [index]. If > >> -- it was not found (rc ~= 0) then return a method > >> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, { > >> -- returns field value, not tuple_bsize function. To > >> -- access hidden methods use > >> -- 'box.tuple.(T, [args...])'. > >> - local rc, field = tuple_field_by_path(tuple, key) > >> + rc, field = tuple_field_by_path(tuple, key) > >> if rc == 0 then > >> return field > >> end > >> end > >> - return methods[key] > >> + local method = methods[key] > >> + return method == box.NULL and rc ~= -1 and rc or method > >6. You must not return -1. Error must be nil + error message/object. > > @@ -303,7 +302,7 @@ ffi.metatype(tuple_t, { >      end; >      __tostring = internal.tuple.tostring; >      __index = function(tuple, key) > - local rc, field > + local res, err >          if type(key) == "string" or type(key) == "number" then >              -- Try to get a field by json path or by [index]. If >              -- it was not found (rc ~= 0) then return a method > @@ -313,13 +312,13 @@ ffi.metatype(tuple_t, { >              -- returns field value, not tuple_bsize function. To >              -- access hidden methods use >              -- 'box.tuple.(T, [args...])'. > - rc, field = tuple_field_by_path(tuple, key) > - if rc == 0 then > - return field > + res, err = tuple_field_by_path(tuple, key) > + if res ~= box.NULL then > + return res >              end >          end >          local method = methods[key] > - return method == box.NULL and rc ~= -1 and rc or method > + return method == box.NULL and err or method >      end; >      __eq = function(tuple_a, tuple_b) >          -- Two tuple are considered equal if they have same memory > address > > >> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > >> index 3e2c8bf..f2dffaf 100644 > >> --- a/src/box/tuple_format.c > >> +++ b/src/box/tuple_format.c > >> @@ -28,6 +28,8 @@ > >> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > >> * SUCH DAMAGE. > >> */ > >> +#include > >> +#include > >7. Do not include lua headers in files, that are not in lua/ dir. > >Tarantool core must not > >depend on lua. As I told you earlier, you must use field_name_hash > >function, declared in > >tuple_dictionary.h. It allows to remove lua dependency and remove > >${common_libraries} > >from cmake file. > > diff --git a/src/box/tuple.h b/src/box/tuple.h > index 6ebedf5..f084c9d 100644 > --- a/src/box/tuple.h > +++ b/src/box/tuple.h > @@ -514,6 +514,26 @@ tuple_field(const struct tuple *tuple, uint32_t > fieldno) >  } >   /** > + * Get tuple field by its path. > + * @param tuple. > + * @param path Field path. > + * @param path_len Length of @a path. > + * @param path_hash Hash of @a path. > + * > + * @retval not NULL on field found. > + * @retval NULL No field by @a path. > + */ > +static inline const char * > +tuple_field_by_path(struct tuple *tuple, const char *path, > + uint32_t path_len, uint32_t path_hash) > +{ > +    return tuple_field_raw_by_path(tuple_format(tuple), > +    tuple_data(tuple), > +    tuple_field_map(tuple), > +    path, path_len, path_hash); > +} > + > +/** >   * Get tuple field by its name. >   * @param tuple Tuple to get field from. >   * @param name Field name. > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index 9c6e3da..8ee7948 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -29,8 +29,8 @@ >   * SUCH DAMAGE. >   */ >  #include > -#include >  #include "tuple_format.h" > +#include "tuple_dictionary.h" >   /** Global table of tuple formats */ >  struct tuple_format **tuple_formats; > > >> +const char * > >> +tuple_field_raw_by_path(struct tuple_format *format, const char > *tuple, > >> + const uint32_t *field_map, > >> + const char *path,uint32_t path_len, uint32_t path_hash) > >8. Path argument can be placed on the previous line. > > > >9. After ',' put a space please. > >> +{ > >> +int rc = 0; > >> +const char *field; > >> +struct json_path_parser parser; > >> +struct json_path_node node; > >> +json_path_parser_create(&parser, path, path_len); > >> +rc = json_path_next(&parser, &node); > >> +if (rc != 0 || node.type == JSON_PATH_END) > >> +goto not_found; > >> +if (node.type == JSON_PATH_NUM) { > >> +int index = node.num; > >> +if (index == 0) > >> +goto not_found; > >> +index -= TUPLE_INDEX_BASE; > >> +field = tuple_field_raw(format, tuple, > >> + field_map, index); > >> +if (field == NULL) > >> +goto not_found; > >> +} else { > >> +assert(node.type == JSON_PATH_STR); > >> +/* First part of a path is a field name. */ > >> +const char *name = node.str; > >> +uint32_t name_len = node.len; > >> +uint32_t name_hash; > >> +if (path_len == name_len) { > >> +name_hash = path_hash; > >> +} else { > >> +/* > >> + * If a string is "field....", then its > >> + * precalculated juajit hash can not be > >> + * used. A tuple dictionary hashes only > >> + * name, not path. > >> + */ > >> +name_hash = lua_hash(name, name_len); > >10. Good comment! One remark - use field_name_hash instead of explicit > >lua_hash. > >> +} > >> +field = tuple_field_raw_by_name(format, tuple, > >> + field_map, > >> + name, name_len, name_hash); > >> +if (field == NULL) > >> +goto not_found; > >> +} > >> +while ((rc = json_path_next(&parser, &node)) == 0 && > >> + node.type != JSON_PATH_END) { > >> +if (node.type == JSON_PATH_NUM) { > >> +rc = tuple_field_go_to_index(&field, node.num); > >> +} else { > >> +assert(node.type == JSON_PATH_STR); > >> +rc = tuple_field_go_to_key(&field, node.str, node.len); > >> +} > >> +if (rc != 0) { > >> +rc = 0; /* prevent error raise */ > >> +goto not_found; > >> +} > >> +} > >> +if (rc == 0) > >> +return field; > >> +not_found: > >11. Please, put labels to the line beginning. > > @@ -563,8 +563,8 @@ tuple_field_go_to_key(const char **field, const > char *key, int len) >   const char * >  tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, > - const uint32_t *field_map, > - const char *path,uint32_t path_len, uint32_t path_hash) > + const uint32_t *field_map, const char *path, > + uint32_t path_len, uint32_t path_hash) >  { >      int rc = 0; >      const char *field; > @@ -598,7 +598,7 @@ tuple_field_raw_by_path(struct tuple_format > *format, const char *tuple, >      * used. A tuple dictionary hashes only >      * name, not path. >      */ > -    name_hash = lua_hash(name, name_len); > +    name_hash = field_name_hash(name, name_len); >      } >      field = tuple_field_raw_by_name(format, tuple, >      field_map, > @@ -621,7 +621,7 @@ tuple_field_raw_by_path(struct tuple_format > *format, const char *tuple, >      } >      if (rc == 0) >      return field; > -    not_found: > +not_found: >      /* try to use the whole string as identifier */ >      field = tuple_field_raw_by_name(format, tuple, >      field_map, > @@ -629,7 +629,6 @@ tuple_field_raw_by_path(struct tuple_format > *format, const char *tuple, >      if (field) >      return field; >      if (rc || path_len == 0) > -    diag_set(IllegalParams, "Error in path on " > -    "position %d", rc); > +    diag_set(IllegalParams, "Error in path on position %d", rc); >      return NULL; >  } > > >> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > >> index d33c77a..3e7e595 100644 > >> --- a/src/box/tuple_format.h > >> +++ b/src/box/tuple_format.h > >> @@ -343,6 +343,21 @@ tuple_field_raw_by_name(struct tuple_format > *format, const char *tuple, > >> return tuple_field_raw(format, tuple, field_map, fieldno); > >> } > >> > >> +/** > >> + * Get tuple field by its path. > >> + * @param Tuple. > >> + * @param path Field path. > >> + * @param path_len Length of @a path. > >> + * @param path_hash Hash of @a path. > >> + * > >> + * @retval not NULL on field found. > >> + * @retval NULL No field by @a path. > >> + */ > >> +const char * > >> +tuple_field_raw_by_path(struct tuple_format *format, const char > *tuple, > >> + const uint32_t *field_map, > >> + const char *path,uint32_t path_len, uint32_t path_hash); > >12. Please, add tuple_field_by_path as well to tuple.h, that calls this > >function. It allows you > >to call tuple_field_by_path in lua/tuple.c. > > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > index ce17085..ffa3d16 100644 > --- a/src/box/tuple_format.h > +++ b/src/box/tuple_format.h > @@ -379,7 +379,9 @@ tuple_field_raw_by_name(struct tuple_format > *format, const char *tuple, >   /** >   * Get tuple field by its path. > - * @param Tuple. > + * @param format Tuple format. > + * @param tuple MessagePack tuple's body. > + * @param field_map Tuple field map. >   * @param path Field path. >   * @param path_len Length of @a path. >   * @param path_hash Hash of @a path. > > >> + > >> #if defined(__cplusplus) > >> } /* extern "C" */ > >> #endif /* defined(__cplusplus) */ > >> diff --git a/src/lib/json/path.c b/src/lib/json/path.c > >> index 4a6174e..f3fcc46 100644 > >> --- a/src/lib/json/path.c > >> +++ b/src/lib/json/path.c > >> @@ -31,8 +31,10 @@ > >> > >> #include "path.h" > >> #include > >> +#include > >> #include "trivia/util.h" > >> > >> + > >13. Garbage diff. > >> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser > *parser, struct json_path_node *node) > >> static inline int > >> json_parse_integer(struct json_path_parser *parser, struct > json_path_node *node) > >> { > >> -const char *end = parser->src + parser->src_len; > >> -const char *pos = parser->pos; > >> -assert(pos < end); > >> -const char *str = pos; > >> -int len = 0; > >> -for (char c = *pos; pos < end && isdigit(c); c = *++pos) > >> -++len; > >> -if (len == 0) > >> -return pos - parser->src + 1; > >> -parser->pos = pos; > >> +assert(parser->offset < parser->src_len); > >> +int str_offset = parser->offset; > >> +int last_offset = str_offset; > >> +int len = 0, rc = 0; > >> +UChar32 c = 0; > >> + > >> +while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) { > >14. I remember, that you told me, that a digit can be read using simple > >char. Can you please implement > >this cycle with no UChar? > > diff --git a/src/lib/json/path.c b/src/lib/json/path.c > index f3fcc46..e253ec4 100644 > --- a/src/lib/json/path.c > +++ b/src/lib/json/path.c > @@ -104,7 +104,7 @@ json_parse_string(struct json_path_parser *parser, > struct json_path_node *node) >       while (((rc = json_read_sign(parser, &c)) > 0) >      && c != quote_type); > -    int len = (int)(parser->offset - str_offset - 1); > +    int len = parser->offset - str_offset - 1; >      if (rc < 0 || len == 0) >      return -1; >      if (c != (UChar32)quote_type) { > @@ -130,23 +130,21 @@ json_parse_string(struct json_path_parser > *parser, struct json_path_node *node) >  static inline int >  json_parse_integer(struct json_path_parser *parser, struct > json_path_node *node) >  { > -    assert(parser->offset < parser->src_len); > -    int str_offset = parser->offset; > -    int last_offset = str_offset; > -    int len = 0, rc = 0; > -    UChar32 c = 0; > - > -    while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) { > -    last_offset = parser->offset; > -    len++; > -    } > -    if (rc > 0 && len > 0 && !u_isdigit(c)) > -    json_reset_pos(parser, last_offset, 1); > -    if (rc < 0 || len == 0) > +    const char *end = parser->src + parser->src_len; > +    const char *pos = parser->src + parser->offset; > +    assert(pos < end); > +    const char *str = pos; > +    int len = 0; > +    for (char c = *pos; pos < end && isdigit(c); c = *++pos) > +    ++len; > +    if (len == 0) { > +    parser->invalid_sign_off++; >      return -1; > - > +    } > +    parser->offset += len; > +    parser->invalid_sign_off += len; >      node->type = JSON_PATH_NUM; > -    node->num = strntoull(parser->src + str_offset, len); > +    node->num = strntoull(str, len); >      return 0; >  } >  >> >> +static inline char > >> +json_curr_char(struct json_path_parser *parser) > >> +{ > >> +return *json_path_curr_substring(parser); > >> +} > >15. I do not like that on each char you must do string + offset. Please, > >find a way to > >store this position. Why did you delete parser->pos? > > I have no good ideas. This code is also not hot: just on each new > lexem, believe nothing critical. > > >> diff --git a/src/lib/json/path.h b/src/lib/json/path.h > >> index 6e8db4c..15292fb 100644 > >> --- a/src/lib/json/path.h > >> +++ b/src/lib/json/path.h > >> @@ -45,10 +45,11 @@ extern "C" { > >> struct json_path_parser { > >> /** Source string. */ > >> const char *src; > >> -/** Length of src. */ > >> +/** Length of string. */ > >> int src_len; > >> -/** Current parser's position. */ > >> -const char *pos; > >> +/** Current parser's offset. */ > >> +int offset; > >> +int invalid_sign_off; > >16. Please, add a comment. > > diff --git a/src/lib/json/path.h b/src/lib/json/path.h > index 15292fb..09f2e6f 100644 > --- a/src/lib/json/path.h > +++ b/src/lib/json/path.h > @@ -49,6 +49,7 @@ struct json_path_parser { >      int src_len; >      /** Current parser's offset. */ >      int offset; > +    /** Successfully parsed signs count. */ >      int invalid_sign_off; >  }; >  -- 2.7.4 > > On 05.04.2018 17:09, Kirill Shcherbatov wrote: >> --- >>   src/box/CMakeLists.txt     |   2 +- >>   src/box/lua/tuple.c        | 172 >> ++++++---------------------------------- >>   src/box/lua/tuple.lua      |   7 +- >>   src/box/tuple_format.c     | 155 ++++++++++++++++++++++++++++++++++++ >>   src/box/tuple_format.h     |  15 ++++ >>   src/lib/json/path.c        | 191 >> ++++++++++++++++++++++++++++++--------------- >>   src/lib/json/path.h        |  25 ++++-- >>   test/engine/tuple.result   |  57 ++++++++++---- >>   test/engine/tuple.test.lua |  13 ++- >>   test/unit/CMakeLists.txt   |   2 +- >>   test/unit/json_path.c      |   2 +- >>   11 files changed, 398 insertions(+), 243 deletions(-) >> >> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt >> index f053ae4..e5a0c21 100644 >> --- a/src/box/CMakeLists.txt >> +++ b/src/box/CMakeLists.txt >> @@ -45,7 +45,7 @@ add_library(tuple STATIC >>       field_def.c >>       opt_def.c >>   ) >> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} >> ${ICU_LIBRARIES} misc bit) >> +target_link_libraries(tuple json_path box_error core >> ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries}) >>     add_library(xlog STATIC xlog.c) >>   target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES}) >> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c >> index 99b9ff2..3b89cdd 100644 >> --- a/src/box/lua/tuple.c >> +++ b/src/box/lua/tuple.c >> @@ -403,87 +403,6 @@ lbox_tuple_transform(struct lua_State *L) >>   } >>     /** >> - * Propagate @a field to MessagePack(field)[index]. >> - * @param[in][out] field Field to propagate. >> - * @param index 1-based index to propagate to. >> - * >> - * @retval  0 Success, the index was found. >> - * @retval -1 Not found. >> - */ >> -static inline int >> -tuple_field_go_to_index(const char **field, uint64_t index) >> -{ >> -    assert(index >= 0); >> -    enum mp_type type = mp_typeof(**field); >> -    if (type == MP_ARRAY) { >> -        if (index == 0) >> -            return -1; >> -        /* Make index 0-based. */ >> -        index -= TUPLE_INDEX_BASE; >> -        uint32_t count = mp_decode_array(field); >> -        if (index >= count) >> -            return -1; >> -        for (; index > 0; --index) >> -            mp_next(field); >> -        return 0; >> -    } else if (type == MP_MAP) { >> -        uint64_t count = mp_decode_map(field); >> -        for (; count > 0; --count) { >> -            type = mp_typeof(**field); >> -            if (type == MP_UINT) { >> -                uint64_t value = mp_decode_uint(field); >> -                if (value == index) >> -                    return 0; >> -            } else if (type == MP_INT) { >> -                int64_t value = mp_decode_int(field); >> -                if (value >= 0 && (uint64_t)value == index) >> -                    return 0; >> -            } else { >> -                /* Skip key. */ >> -                mp_next(field); >> -            } >> -            /* Skip value. */ >> -            mp_next(field); >> -        } >> -    } >> -    return -1; >> -} >> - >> -/** >> - * Propagate @a field to MessagePack(field)[key]. >> - * @param[in][out] field Field to propagate. >> - * @param key Key to propagate to. >> - * @param len Length of @a key. >> - * >> - * @retval  0 Success, the index was found. >> - * @retval -1 Not found. >> - */ >> -static inline int >> -tuple_field_go_to_key(const char **field, const char *key, int len) >> -{ >> -    enum mp_type type = mp_typeof(**field); >> -    if (type != MP_MAP) >> -        return -1; >> -    uint64_t count = mp_decode_map(field); >> -    for (; count > 0; --count) { >> -        type = mp_typeof(**field); >> -        if (type == MP_STR) { >> -            uint32_t value_len; >> -            const char *value = mp_decode_str(field, &value_len); >> -            if (value_len == (uint)len && >> -                memcmp(value, key, len) == 0) >> -                return 0; >> -        } else { >> -            /* Skip key. */ >> -            mp_next(field); >> -        } >> -        /* Skip value. */ >> -        mp_next(field); >> -    } >> -    return -1; >> -} >> - >> -/** >>    * Find a tuple field by JSON path. >>    * @param L Lua state. >>    * @param tuple 1-th argument on a lua stack, tuple to get field >> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const >> char *key, int len) >>   static int >>   lbox_tuple_field_by_path(struct lua_State *L) >>   { >> -    const char *field; >>       struct tuple *tuple = luaT_istuple(L, 1); >>       /* Is checked in Lua wrapper. */ >>       assert(tuple != NULL); >> -    if (lua_isnumber(L, 2)) { >> +    const char *field = NULL; >> +    if (!lua_isnumber(L, 2)) { >> +        /* string */ >> +        field = tuple_field_raw_by_path(tuple_format(tuple), >> tuple_data(tuple), >> +                                        tuple_field_map(tuple), >> +                                        lua_tostring(L, 2), >> (int)lua_strlen(L, 2), >> +                                        lua_hashstring(L, 2)); >> +    } else { >>           int index = lua_tointeger(L, 2); >>           index -= TUPLE_INDEX_BASE; >> -        if (index < 0) { >> -not_found: >> -            lua_pushinteger(L, -1); >> -            lua_pushnil(L); >> -            return 2; >> -        } >> -        field = tuple_field(tuple, index); >> -        if (field == NULL) >> -            goto not_found; >> -push_value: >> -        lua_pushinteger(L, 0); >> -        luamp_decode(L, luaL_msgpack_default, &field); >> -        return 2; >> +        if (index >= 0) >> +            field = tuple_field(tuple, index); >>       } >> -    assert(lua_isstring(L, 2)); >> -    size_t path_len; >> -    const char *path = lua_tolstring(L, 2, &path_len); >> -    struct json_path_parser parser; >> -    struct json_path_node node; >> -    json_path_parser_create(&parser, path, path_len); >> -    int rc = json_path_next(&parser, &node); >> -    if (rc != 0 || node.type == JSON_PATH_END) >> -        luaL_error(L, "Error in path on position %d", rc); >> -    if (node.type == JSON_PATH_NUM) { >> -        int index = node.num; >> -        if (index == 0) >> -            goto not_found; >> -        index -= TUPLE_INDEX_BASE; >> -        field = tuple_field(tuple, index); >> -        if (field == NULL) >> -            goto not_found; >> -    } else { >> -        assert(node.type == JSON_PATH_STR); >> -        /* First part of a path is a field name. */ >> -        const char *name = node.str; >> -        uint32_t name_len = node.len; >> -        uint32_t name_hash; >> -        if (path_len == name_len) { >> -            name_hash = lua_hashstring(L, 2); >> -        } else { >> -            /* >> -             * If a string is "field....", then its >> -             * precalculated juajit hash can not be >> -             * used. A tuple dictionary hashes only >> -             * name, not path. >> -             */ >> -            name_hash = lua_hash(name, name_len); >> -        } >> -        field = tuple_field_by_name(tuple, name, name_len, name_hash); >> -        if (field == NULL) >> -            goto not_found; >> -    } >> -    while ((rc = json_path_next(&parser, &node)) == 0 && >> -           node.type != JSON_PATH_END) { >> -        if (node.type == JSON_PATH_NUM) { >> -            rc = tuple_field_go_to_index(&field, node.num); >> -        } else { >> -            assert(node.type == JSON_PATH_STR); >> -            rc = tuple_field_go_to_key(&field, node.str, node.len); >> -        } >> -        if (rc != 0) >> -            goto not_found; >> -    } >> -    if (rc == 0) >> -        goto push_value; >> -    luaL_error(L, "Error in path on position %d", rc); >> -    unreachable(); >> -    goto not_found; >> +    /* error code or message */ >> +    struct error *e = diag_last_error(diag_get()); >> +    if (field == NULL && e != NULL) >> +        lua_pushstring(L, e->errmsg); >> +    else >> +        lua_pushinteger(L, -1*(field == NULL)); >> +    if (field) >> +        luamp_decode(L, luaL_msgpack_default, &field); >> +    else >> +        lua_pushnil(L); >> +    diag_clear(diag_get()); >> +    return 2; >>   } >>     static int >> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua >> index b51b4df..3785568 100644 >> --- a/src/box/lua/tuple.lua >> +++ b/src/box/lua/tuple.lua >> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi') >>   local fun = require('fun') >>   local buffer = require('buffer') >>   local internal = require('box.internal') >> +local log = require('log') >>     ffi.cdef[[ >>   /** \cond public */ >> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, { >>       end; >>       __tostring = internal.tuple.tostring; >>       __index = function(tuple, key) >> +        local rc, field >>           if type(key) == "string" or type(key) == "number" then >>               -- Try to get a field by json path or by [index]. If >>               -- it was not found (rc ~= 0) then return a method >> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, { >>               -- returns field value, not tuple_bsize function. To >>               -- access hidden methods use >>               -- 'box.tuple.(T, [args...])'. >> -            local rc, field = tuple_field_by_path(tuple, key) >> +            rc, field = tuple_field_by_path(tuple, key) >>               if rc == 0 then >>                   return field >>               end >>           end >> -        return methods[key] >> +        local method = methods[key] >> +        return method == box.NULL and rc ~= -1 and rc or method >>       end; >>       __eq = function(tuple_a, tuple_b) >>           -- Two tuple are considered equal if they have same memory >> address >> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c >> index e458f49..9c6e3da 100644 >> --- a/src/box/tuple_format.c >> +++ b/src/box/tuple_format.c >> @@ -28,6 +28,8 @@ >>    * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >>    * SUCH DAMAGE. >>    */ >> +#include >> +#include >>   #include "tuple_format.h" >>     /** Global table of tuple formats */ >> @@ -478,3 +480,156 @@ box_tuple_format_unref(box_tuple_format_t *format) >>   { >>       tuple_format_unref(format); >>   } >> + >> +/** >> + * Propagate @a field to MessagePack(field)[index]. >> + * @param[in][out] field Field to propagate. >> + * @param index 1-based index to propagate to. >> + * >> + * @retval  0 Success, the index was found. >> + * @retval -1 Not found. >> + */ >> +static inline int >> +tuple_field_go_to_index(const char **field, uint64_t index) >> +{ >> +    enum mp_type type = mp_typeof(**field); >> +    if (type == MP_ARRAY) { >> +        if (index == 0) >> +            return -1; >> +        /* Make index 0-based. */ >> +        index -= TUPLE_INDEX_BASE; >> +        uint32_t count = mp_decode_array(field); >> +        if (index >= count) >> +            return -1; >> +        for (; index > 0; --index) >> +            mp_next(field); >> +        return 0; >> +    } else if (type == MP_MAP) { >> +        uint64_t count = mp_decode_map(field); >> +        for (; count > 0; --count) { >> +            type = mp_typeof(**field); >> +            if (type == MP_UINT) { >> +                uint64_t value = mp_decode_uint(field); >> +                if (value == index) >> +                    return 0; >> +            } else if (type == MP_INT) { >> +                int64_t value = mp_decode_int(field); >> +                if (value >= 0 && (uint64_t)value == index) >> +                    return 0; >> +            } else { >> +                /* Skip key. */ >> +                mp_next(field); >> +            } >> +            /* Skip value. */ >> +            mp_next(field); >> +        } >> +    } >> +    return -1; >> +} >> + >> +/** >> + * Propagate @a field to MessagePack(field)[key]. >> + * @param[in][out] field Field to propagate. >> + * @param key Key to propagate to. >> + * @param len Length of @a key. >> + * >> + * @retval  0 Success, the index was found. >> + * @retval -1 Not found. >> + */ >> +static inline int >> +tuple_field_go_to_key(const char **field, const char *key, int len) >> +{ >> +    enum mp_type type = mp_typeof(**field); >> +    if (type != MP_MAP) >> +        return -1; >> +    uint64_t count = mp_decode_map(field); >> +    for (; count > 0; --count) { >> +        type = mp_typeof(**field); >> +        if (type == MP_STR) { >> +            uint32_t value_len; >> +            const char *value = mp_decode_str(field, &value_len); >> +            if (value_len == (uint)len && >> +                memcmp(value, key, len) == 0) >> +                return 0; >> +        } else { >> +            /* Skip key. */ >> +            mp_next(field); >> +        } >> +        /* Skip value. */ >> +        mp_next(field); >> +    } >> +    return -1; >> +} >> + >> +const char * >> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, >> +                        const uint32_t *field_map, >> +                        const char *path,uint32_t path_len, uint32_t >> path_hash) >> +{ >> +    int rc = 0; >> +    const char *field; >> +    struct json_path_parser parser; >> +    struct json_path_node node; >> +    json_path_parser_create(&parser, path, path_len); >> +    rc = json_path_next(&parser, &node); >> +    if (rc != 0 || node.type == JSON_PATH_END) >> +        goto not_found; >> +    if (node.type == JSON_PATH_NUM) { >> +        int index = node.num; >> +        if (index == 0) >> +            goto not_found; >> +        index -= TUPLE_INDEX_BASE; >> +        field = tuple_field_raw(format, tuple, >> +                                field_map, index); >> +        if (field == NULL) >> +            goto not_found; >> +    } else { >> +        assert(node.type == JSON_PATH_STR); >> +        /* First part of a path is a field name. */ >> +        const char *name = node.str; >> +        uint32_t name_len = node.len; >> +        uint32_t name_hash; >> +        if (path_len == name_len) { >> +            name_hash = path_hash; >> +        } else { >> +            /* >> +             * If a string is "field....", then its >> +             * precalculated juajit hash can not be >> +             * used. A tuple dictionary hashes only >> +             * name, not path. >> +             */ >> +            name_hash = lua_hash(name, name_len); >> +        } >> +        field = tuple_field_raw_by_name(format, tuple, >> +                                        field_map, >> +                                        name, name_len, name_hash); >> +        if (field == NULL) >> +            goto not_found; >> +    } >> +    while ((rc = json_path_next(&parser, &node)) == 0 && >> +           node.type != JSON_PATH_END) { >> +        if (node.type == JSON_PATH_NUM) { >> +            rc = tuple_field_go_to_index(&field, node.num); >> +        } else { >> +            assert(node.type == JSON_PATH_STR); >> +            rc = tuple_field_go_to_key(&field, node.str, node.len); >> +        } >> +        if (rc != 0) { >> +            rc = 0; /* prevent error raise */ >> +            goto not_found; >> +        } >> +    } >> +    if (rc == 0) >> +        return field; >> +    not_found: >> +    /* try to use the whole string as identifier */ >> +    field = tuple_field_raw_by_name(format, tuple, >> +                                    field_map, >> +                                    path, path_len, path_hash); >> +    if (field) >> +        return field; >> +    if (rc || path_len == 0) >> +        diag_set(IllegalParams, "Error in path on " >> +                                "position %d", rc); >> +    return NULL; >> +} >> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h >> index d35182d..ce17085 100644 >> --- a/src/box/tuple_format.h >> +++ b/src/box/tuple_format.h >> @@ -377,6 +377,21 @@ tuple_field_raw_by_name(struct tuple_format >> *format, const char *tuple, >>       return tuple_field_raw(format, tuple, field_map, fieldno); >>   } >>   +/** >> + * Get tuple field by its path. >> + * @param Tuple. >> + * @param path Field path. >> + * @param path_len Length of @a path. >> + * @param path_hash Hash of @a path. >> + * >> + * @retval not NULL on field found. >> + * @retval     NULL No field by @a path. >> + */ >> +const char * >> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, >> +                        const uint32_t *field_map, >> +                        const char *path,uint32_t path_len, uint32_t >> path_hash); >> + >>   #if defined(__cplusplus) >>   } /* extern "C" */ >>   #endif /* defined(__cplusplus) */ >> diff --git a/src/lib/json/path.c b/src/lib/json/path.c >> index 4a6174e..f3fcc46 100644 >> --- a/src/lib/json/path.c >> +++ b/src/lib/json/path.c >> @@ -31,8 +31,10 @@ >>     #include "path.h" >>   #include >> +#include >>   #include "trivia/util.h" >>   + >>   /** Same as strtoull(), but with limited length. */ >>   static inline uint64_t >>   strntoull(const char *src, int len) { >> @@ -45,6 +47,42 @@ strntoull(const char *src, int len) { >>   } >>     /** >> + * Parse string and update parser's state. >> + * @param parser JSON path parser. Upates pos, signs_read. >> + * @param[out] UChar32 to store result. >> + * >> + * @retval 1 Success. >> + * @retval 0 End of string. >> + * @retval -1 Parse error. >> + */ >> +static inline int >> +json_read_sign(struct json_path_parser *parser, UChar32 *out) >> +{ >> +    if (unlikely(parser->offset == parser->src_len)) >> +        return 0; >> +    UChar32 c; >> +    U8_NEXT_OR_FFFD(parser->src, parser->offset, parser->src_len, c) >> +    if (c == 0xFFFD) >> +        return -1; >> +    *out = c; >> +    parser->invalid_sign_off += 1; >> +    return 1; >> +} >> + >> +/** >> + * Reset parser state to previous one. >> + * @param parser JSON path parser. >> + * @param old parser read offset. >> + * @param signs to drop in signs_read counter. >> + */ >> +static inline void >> +json_reset_pos(struct json_path_parser *parser, int old_offset, int >> signs) >> +{ >> +    parser->offset = old_offset; >> +    parser->invalid_sign_off -= signs; >> +} >> + >> +/** >>    * Parse string identifier in quotes. Parser either stops right >>    * after the closing quote, or returns an error position. >>    * @param parser JSON path parser. >> @@ -56,24 +94,26 @@ strntoull(const char *src, int len) { >>   static inline int >>   json_parse_string(struct json_path_parser *parser, struct >> json_path_node *node) >>   { >> -    const char *end = parser->src + parser->src_len; >> -    const char *pos = parser->pos; >> -    assert(pos < end); >> -    char quote_type = *pos; >> -    assert(quote_type == '\'' || quote_type == '"'); >> -    /* Skip first quote. */ >> -    int len = 0; >> -    ++pos; >> -    const char *str = pos; >> -    for (char c = *pos; pos < end && quote_type != c; c = *++pos) >> -        ++len; >> -    /* A string must be terminated with quote. */ >> -    if (*pos != quote_type || len == 0) >> -        return pos - parser->src + 1; >> -    /* Skip the closing quote. */ >> -    parser->pos = pos + 1; >> +    assert(parser->offset < parser->src_len); >> +    UChar32 quote_type; >> +    (void) json_read_sign(parser, "e_type); >> +    assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"'); >> +    int str_offset = parser->offset; >> +    UChar32 c = 0; >> +    int rc = 0; >> + >> +    while (((rc = json_read_sign(parser, &c)) > 0) >> +           && c != quote_type); >> +    int len = (int)(parser->offset - str_offset - 1); >> +    if (rc < 0 || len == 0) >> +        return -1; >> +    if (c != (UChar32)quote_type) { >> +        parser->invalid_sign_off++; >> +        return -1; >> +    } >> + >>       node->type = JSON_PATH_STR; >> -    node->str = str; >> +    node->str = parser->src + str_offset; >>       node->len = len; >>       return 0; >>   } >> @@ -81,7 +121,7 @@ json_parse_string(struct json_path_parser *parser, >> struct json_path_node *node) >>   /** >>    * Parse digit sequence into integer until non-digit is met. >>    * Parser stops right after the last digit. >> - * @param parser JSON parser. >> + * @param parser JSON parser. Updates signs_read field. >>    * @param[out] node JSON node to store result. >>    * >>    * @retval     0 Success. >> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser >> *parser, struct json_path_node *node) >>   static inline int >>   json_parse_integer(struct json_path_parser *parser, struct >> json_path_node *node) >>   { >> -    const char *end = parser->src + parser->src_len; >> -    const char *pos = parser->pos; >> -    assert(pos < end); >> -    const char *str = pos; >> -    int len = 0; >> -    for (char c = *pos; pos < end && isdigit(c); c = *++pos) >> -        ++len; >> -    if (len == 0) >> -        return pos - parser->src + 1; >> -    parser->pos = pos; >> +    assert(parser->offset < parser->src_len); >> +    int str_offset = parser->offset; >> +    int last_offset = str_offset; >> +    int len = 0, rc = 0; >> +    UChar32 c = 0; >> + >> +    while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) { >> +        last_offset = parser->offset; >> +        len++; >> +    } >> +    if (rc > 0 && len > 0 && !u_isdigit(c)) >> +        json_reset_pos(parser, last_offset, 1); >> +    if (rc < 0 || len == 0) >> +        return -1; >> + >>       node->type = JSON_PATH_NUM; >> -    node->num = strntoull(str, len); >> +    node->num = strntoull(parser->src + str_offset, len); >>       return 0; >>   } >>   +static inline bool >> +identifier_valid_sign(UChar32 c) >> +{ >> +    return u_isUAlphabetic(c) >> +           || c == (UChar32)'_' >> +           || u_isdigit(c); >> +} >> + >>   /** >>    * Parse identifier out of quotes. It can contain only alphas, >>    * digits and underscores. And can not contain digit at the first >>    * position. Parser is stoped right after the last non-digit, >>    * non-alpha and non-underscore symbol. >> - * @param parser JSON parser. >> + * @param parser JSON parser. Updates signs_read field. >>    * @param[out] node JSON node to store result. >>    * >>    * @retval     0 Success. >> @@ -120,68 +173,78 @@ static inline int >>   json_parse_identifier(struct json_path_parser *parser, >>                 struct json_path_node *node) >>   { >> -    const char *end = parser->src + parser->src_len; >> -    const char *pos = parser->pos; >> -    assert(pos < end); >> -    const char *str = pos; >> -    char c = *pos; >> +    assert(parser->offset < parser->src_len); >> +    int str_offset = parser->offset; >> +    UChar32 c; >> +    int rc = 0; >> +    if (json_read_sign(parser, &c) < 0) >> +        return -1; >>       /* First symbol can not be digit. */ >> -    if (!isalpha(c) && c != '_') >> -        return pos - parser->src + 1; >> -    int len = 1; >> -    for (c = *++pos; pos < end && (isalpha(c) || c == '_' || >> isdigit(c)); >> -         c = *++pos) >> -        ++len; >> -    assert(len > 0); >> -    parser->pos = pos; >> +    if (!u_isalpha(c) && c != (UChar32)'_') >> +        return -1; >> + >> +    int last_offset = parser->offset; >> +    while ((rc = json_read_sign(parser, &c)) > 0 && >> identifier_valid_sign(c)) >> +        last_offset = parser->offset; >> +    if (rc > 0 && !identifier_valid_sign(c)) >> +        json_reset_pos(parser, last_offset, 1); >> +    if (rc < 0) >> +        return -1; >> + >>       node->type = JSON_PATH_STR; >> -    node->str = str; >> -    node->len = len; >> +    node->str = parser->src + str_offset; >> +    node->len = parser->offset - str_offset; >>       return 0; >>   } >>   +static inline char >> +json_curr_char(struct json_path_parser *parser) >> +{ >> +    return *json_path_curr_substring(parser); >> +} >> + >>   int >>   json_path_next(struct json_path_parser *parser, struct >> json_path_node *node) >>   { >> -    const char *end = parser->src + parser->src_len; >> -    if (end == parser->pos) { >> +    int end_offset = parser->src_len; >> +    if (end_offset == parser->offset) { >>           node->type = JSON_PATH_END; >>           return 0; >>       } >> -    char c = *parser->pos; >> +    UChar32 c = 0; >> +    int last_offset = parser->offset; >> +    if (json_read_sign(parser, &c) < 0) >> +        return parser->invalid_sign_off; >>       int rc; >>       switch(c) { >> -    case '[': >> -        ++parser->pos; >> +    case (UChar32)'[': >>           /* Error for []. */ >> -        if (parser->pos == end) >> -            return parser->pos - parser->src + 1; >> -        c = *parser->pos; >> +        if (parser->offset == end_offset) >> +            return parser->invalid_sign_off; >> +        c = json_curr_char(parser); >>           if (c == '"' || c == '\'') >>               rc = json_parse_string(parser, node); >>           else >>               rc = json_parse_integer(parser, node); >>           if (rc != 0) >> -            return rc; >> +            return parser->invalid_sign_off; >>           /* >>            * Expression, started from [ must be finished >>            * with ] regardless of its type. >>            */ >> -        if (parser->pos == end || *parser->pos != ']') >> -            return parser->pos - parser->src + 1; >> +        if (parser->offset == end_offset || json_curr_char(parser) >> != ']') >> +            return parser->invalid_sign_off + 1; >>           /* Skip ]. */ >> -        ++parser->pos; >> +        (void) json_read_sign(parser, &c); >>           break; >> -    case '.': >> -        /* Skip dot. */ >> -        ++parser->pos; >> -        if (parser->pos == end) >> -            return parser->pos - parser->src + 1; >> -        FALLTHROUGH >>       default: >> +        if (c != (UChar32)'.') >> +            json_reset_pos(parser, last_offset, 1); >> +        else if (parser->offset == end_offset) >> +            return parser->invalid_sign_off + 1; >>           rc = json_parse_identifier(parser, node); >>           if (rc != 0) >> -            return rc; >> +            return parser->invalid_sign_off; >>           break; >>       } >>       return 0; >> diff --git a/src/lib/json/path.h b/src/lib/json/path.h >> index 6e8db4c..15292fb 100644 >> --- a/src/lib/json/path.h >> +++ b/src/lib/json/path.h >> @@ -45,10 +45,11 @@ extern "C" { >>   struct json_path_parser { >>       /** Source string. */ >>       const char *src; >> -    /** Length of src. */ >> +    /** Length of string. */ >>       int src_len; >> -    /** Current parser's position. */ >> -    const char *pos; >> +    /** Current parser's offset. */ >> +    int offset; >> +    int invalid_sign_off; >>   }; >>     enum json_path_type { >> @@ -78,18 +79,30 @@ struct json_path_node { >>   }; >>     /** >> - * Create @a parser. >> + * Init @a parser. >>    * @param[out] parser Parser to create. >>    * @param src Source string. >>    * @param src_len Length of @a src. >>    */ >>   static inline void >>   json_path_parser_create(struct json_path_parser *parser, const char >> *src, >> -            int src_len) >> +                        int src_len) >>   { >>       parser->src = src; >>       parser->src_len = src_len; >> -    parser->pos = src; >> +    parser->offset = 0; >> +    parser->invalid_sign_off = 0; >> +} >> + >> +/** >> + * Get current substring of parser. >> + * @param parser Parser. >> + * @retval ptr to substring >> + */ >> +static inline const char * >> +json_path_curr_substring(struct json_path_parser *parser) >> +{ >> +    return parser->src + parser->offset; >>   } >>     /** >> diff --git a/test/engine/tuple.result b/test/engine/tuple.result >> index 2d7367a..3a8e828 100644 >> --- a/test/engine/tuple.result >> +++ b/test/engine/tuple.result >> @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'} >>   format[3] = {name = 'field3', type = 'map'} >>   --- >>   ... >> -format[4] = {name = 'field4', type = 'string'} >> +format[4] = {name = 'field4', type = 'string' } >> +--- >> +... >> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = >> 'string'} >>   --- >>   ... >>   s = box.schema.space.create('test', {format = format}) >> @@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = >> format}) >>   pk = s:create_index('pk') >>   --- >>   ... >> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} >> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, >> key="value1", value="key1"}} >>   --- >>   ... >>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, >> {c=3, d=4} }, [-1] = 200} >>   --- >>   ... >> -t = s:replace{1, field2, field3, "123456"} >> +t = s:replace{1, field2, field3, "123456", "yes, this"} >>   --- >>   ... >>   t[1] >> @@ -626,7 +629,7 @@ t[1] >>   ... >>   t[2] >>   --- >> -- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}] >> +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, >> 'key': 'value1', 'value': 'key1'}] >>   ... >>   t[3] >>   --- >> @@ -667,19 +670,43 @@ t["[2][5][3]"] >>   ... >>   t["[2][6].key"] >>   --- >> -- key1 >> +- value1 >>   ... >>   t["[2][6].value"] >>   --- >> -- value1 >> +- key1 >>   ... >>   t["[2][6]['key']"] >>   --- >> -- key1 >> +- value1 >>   ... >>   t["[2][6]['value']"] >>   --- >> -- value1 >> +- key1 >> +... >> +t[2][6].привет中国world.中国 >> +--- >> +- привет >> +... >> +t["[2][6].привет中国world"].中国 >> +--- >> +- привет >> +... >> +t["[2][6].привет中国world.中国"] >> +--- >> +- привет >> +... >> +t["[2][6]['привет中国world']"]["中国"] >> +--- >> +- привет >> +... >> +t["[2][6]['привет中国world']['中国']"] >> +--- >> +- привет >> +... >> +t["[2][6]['привет中国world']['中国a']"] >> +--- >> +- yes, this >>   ... >>   t["[3].k3[2].c"] >>   --- >> @@ -759,31 +786,31 @@ t["a.b.c d.e.f"] >>   -- Sytax errors. >>   t[""] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 0' >> +- Error in path on position 0 >>   ... >>   t["[2].[5]"] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 5' >> +- Error in path on position 5 >>   ... >>   t["[-1]"] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2' >> +- Error in path on position 2 >>   ... >>   t[".."] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2' >> +- Error in path on position 2 >>   ... >>   t["[["] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2' >> +- Error in path on position 2 >>   ... >>   t["]]"] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1' >> +- Error in path on position 1 >>   ... >>   t["{"] >>   --- >> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1' >> +- Error in path on position 1 >>   ... >>   s:drop() >>   --- >> diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua >> index ba3482d..90da8b2 100644 >> --- a/test/engine/tuple.test.lua >> +++ b/test/engine/tuple.test.lua >> @@ -204,12 +204,13 @@ format = {} >>   format[1] = {name = 'field1', type = 'unsigned'} >>   format[2] = {name = 'field2', type = 'array'} >>   format[3] = {name = 'field3', type = 'map'} >> -format[4] = {name = 'field4', type = 'string'} >> +format[4] = {name = 'field4', type = 'string' } >> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = >> 'string'} >>   s = box.schema.space.create('test', {format = format}) >>   pk = s:create_index('pk') >> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}} >> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, >> key="value1", value="key1"}} >>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, >> {c=3, d=4} }, [-1] = 200} >> -t = s:replace{1, field2, field3, "123456"} >> +t = s:replace{1, field2, field3, "123456", "yes, this"} >>   t[1] >>   t[2] >>   t[3] >> @@ -225,6 +226,12 @@ t["[2][6].key"] >>   t["[2][6].value"] >>   t["[2][6]['key']"] >>   t["[2][6]['value']"] >> +t[2][6].привет中国world.中国 >> +t["[2][6].привет中国world"].中国 >> +t["[2][6].привет中国world.中国"] >> +t["[2][6]['привет中国world']"]["中国"] >> +t["[2][6]['привет中国world']['中国']"] >> +t["[2][6]['привет中国world']['中国a']"] >>   t["[3].k3[2].c"] >>   t["[4]"] >>   t.field1 >> diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt >> index fe8b2d2..667194c 100644 >> --- a/test/unit/CMakeLists.txt >> +++ b/test/unit/CMakeLists.txt >> @@ -130,7 +130,7 @@ add_executable(csv.test csv.c) >>   target_link_libraries(csv.test csv) >>     add_executable(json_path.test json_path.c) >> -target_link_libraries(json_path.test json_path unit) >> +target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES}) >>     add_executable(rmean.test rmean.cc) >>   target_link_libraries(rmean.test stat unit) >> diff --git a/test/unit/json_path.c b/test/unit/json_path.c >> index 599658b..81ef7fc 100644 >> --- a/test/unit/json_path.c >> +++ b/test/unit/json_path.c >> @@ -9,7 +9,7 @@ >>       json_path_parser_create(&parser, path, len); >>     #define is_next_index(value_len, value) \ >> -    path = parser.pos; \ >> +    path = json_path_curr_substring(&parser); \ >>       is(json_path_next(&parser, &node), 0, "parse <%." #value_len >> "s>", \ >>          path); \ >>       is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", >> path); \ >> > > >