Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org
Cc: "v.shpilevoy@tarantool.org" <v.shpilevoy@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.
Date: Thu, 5 Apr 2018 21:00:46 +0300	[thread overview]
Message-ID: <75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org> (raw)
In-Reply-To: <e35b03d5e6f0f78e38ee9db9b18e88bf9fef5f70.1522937317.git.kshcherbatov@tarantool.org>


 From 8d97afd7ac7a90e5dfb1e756fd9d389bafdadd56 Mon Sep 17 00:00:00 2001
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Date: Thu, 5 Apr 2018 20:35:25 +0300
Subject: [PATCH] Accounted Vlad's comments

---
  src/box/CMakeLists.txt | 2 +-
  src/box/lua/tuple.c | 23 +++++++++++------------
  src/box/lua/tuple.lua | 11 +++++------
  src/box/tuple.h | 20 ++++++++++++++++++++
  src/box/tuple_format.c | 13 ++++++-------
  src/box/tuple_format.h | 4 +++-
  src/lib/json/path.c | 30 ++++++++++++++----------------
  src/lib/json/path.h | 1 +
  8 files changed, 61 insertions(+), 43 deletions(-)

 >Четверг, 5 апреля 2018, 17:48 +03:00 от Vladislav Shpilevoy 
<v.shpilevoy@tarantool.org>:
 >
 >Hello. See 18 comments below.
 >
 >05.04.2018 17:06, Kirill Shcherbatov пишет:
 >> This is an automated email from the git hooks/post-receive script.
 >>
 >> kshcherbatov pushed a commit to branch gh-1285-tuple-field-by-json-icu
 >> in repository tarantool.
 >>
 >> commit ab52c7a18441037c19d0b2759558a7eb93aabb0d
 >> Author: Kirill Shcherbatov < kshcherbatov@tarantool.org >
 >> AuthorDate: Thu Apr 5 17:04:15 2018 +0300
 >>
 >> ICU Unicode support for JSON parser.
 >> ---
 >> src/box/CMakeLists.txt | 2 +-
 >> src/box/lua/tuple.c | 172 ++++++----------------------------------
 >> src/box/lua/tuple.lua | 7 +-
 >> src/box/tuple_format.c | 155 ++++++++++++++++++++++++++++++++++++
 >> src/box/tuple_format.h | 15 ++++
 >> src/lib/json/path.c | 191 ++++++++++++++++++++++++++++++---------------
 >> src/lib/json/path.h | 25 ++++--
 >> test/engine/tuple.result | 57 ++++++++++----
 >> test/engine/tuple.test.lua | 13 ++-
 >> test/unit/CMakeLists.txt | 2 +-
 >> test/unit/json_path.c | 2 +-
 >> 11 files changed, 398 insertions(+), 243 deletions(-)
 >>
 >> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
 >> index add0ff9..0f5b197 100644
 >> --- a/src/box/CMakeLists.txt
 >> +++ b/src/box/CMakeLists.txt
 >> @@ -44,7 +44,7 @@ add_library(tuple STATIC
 >> field_def.c
 >> opt_def.c
 >> )
 >> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} 
${ICU_LIBRARIES} misc bit)
 >> +target_link_libraries(tuple json_path box_error core 
${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
 >1. Remove ${common_libraries} please, the tuple lib must not depend on
 >Lua. See how to do it in the next comments.

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index e5a0c21..88c2c60 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -45,7 +45,7 @@ add_library(tuple STATIC
      field_def.c
      opt_def.c
  )
-target_link_libraries(tuple json_path box_error core 
${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
+target_link_libraries(tuple json_path box_error core 
${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
   add_library(xlog STATIC xlog.c)
  target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})

 >> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, 
const char *key, int len)
 >> static int
 >> lbox_tuple_field_by_path(struct lua_State *L)
 >> {
 >> -const char *field;
 >> struct tuple *tuple = luaT_istuple(L, 1);
 >> /* Is checked in Lua wrapper. */
 >> assert(tuple != NULL);
 >> -if (lua_isnumber(L, 2)) {
 >> +const char *field = NULL;
 >> +if (!lua_isnumber(L, 2)) {
 >> +/* string */
 >> +field = tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
 >> + tuple_field_map(tuple),
 >> + lua_tostring(L, 2), (int)lua_strlen(L, 2),
 >> + lua_hashstring(L, 2));
 >2. Add a tuple_field_by_path wrapper for it, as I told you verbally.
 >Moreover
 >this code is out of 80 symbols width.
 >> +/* error code or message */
 >> +struct error *e = diag_last_error(diag_get());
 >3. Please, move all of this error checking into !lua_isnumber branch.
 >Tuple_field never returns
 >error, because sometimes absent field is ok.
 >> +if (field == NULL && e != NULL)
 >> +lua_pushstring(L, e->errmsg);
 >> +else
 >> +lua_pushinteger(L, -1*(field == NULL));
 >> +if (field)
 >4. Use explicit != NULL, please.

diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 3b89cdd..ab62663 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -422,28 +422,27 @@ lbox_tuple_field_by_path(struct lua_State *L)
  	const char *field = NULL;
  	if (!lua_isnumber(L, 2)) {
  	/* string */
-	field = tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
-	tuple_field_map(tuple),
-	lua_tostring(L, 2), (int)lua_strlen(L, 2),
-	lua_hashstring(L, 2));
+	field = tuple_field_by_path(tuple, lua_tostring(L, 2),
+	(int)lua_strlen(L, 2),
+	lua_hashstring(L, 2));
+	struct error *e = diag_last_error(diag_get());
+	if (field == NULL && e) {
+	lua_pushnil(L);
+	lua_pushstring(L, e->errmsg);
+	return 2;
+	}
  	} else {
  	int index = lua_tointeger(L, 2);
  	index -= TUPLE_INDEX_BASE;
  	if (index >= 0)
  	field = tuple_field(tuple, index);
  	}
-	/* error code or message */
-	struct error *e = diag_last_error(diag_get());
-	if (field == NULL && e != NULL)
-	lua_pushstring(L, e->errmsg);
-	else
-	lua_pushinteger(L, -1*(field == NULL));
-	if (field)
+	if (field != NULL)
  	luamp_decode(L, luaL_msgpack_default, &field);
  	else
  	lua_pushnil(L);
  	diag_clear(diag_get());
-	return 2;
+	return 1;
  }
   static int

 >> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
 >> index b51b4df..3785568 100644
 >> --- a/src/box/lua/tuple.lua
 >> +++ b/src/box/lua/tuple.lua
 >> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
 >> local fun = require('fun')
 >> local buffer = require('buffer')
 >> local internal = require('box.internal')
 >> +local log = require('log')
 >5. Unsed module.

diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
index 3785568..1c7e2a5 100644
--- a/src/box/lua/tuple.lua
+++ b/src/box/lua/tuple.lua
@@ -6,7 +6,6 @@ local msgpackffi = require('msgpackffi')
  local fun = require('fun')
  local buffer = require('buffer')
  local internal = require('box.internal')
-local log = require('log')
   ffi.cdef[[
  /** \cond public */

 >>
 >> ffi.cdef[[
 >> /** \cond public */
 >> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
 >> end;
 >> __tostring = internal.tuple.tostring;
 >> __index = function(tuple, key)
 >> + local rc, field
 >> if type(key) == "string" or type(key) == "number" then
 >> -- Try to get a field by json path or by [index]. If
 >> -- it was not found (rc ~= 0) then return a method
 >> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
 >> -- returns field value, not tuple_bsize function. To
 >> -- access hidden methods use
 >> -- 'box.tuple.<method_name>(T, [args...])'.
 >> - local rc, field = tuple_field_by_path(tuple, key)
 >> + rc, field = tuple_field_by_path(tuple, key)
 >> if rc == 0 then
 >> return field
 >> end
 >> end
 >> - return methods[key]
 >> + local method = methods[key]
 >> + return method == box.NULL and rc ~= -1 and rc or method
 >6. You must not return -1. Error must be nil + error message/object.

@@ -303,7 +302,7 @@ ffi.metatype(tuple_t, {
      end;
      __tostring = internal.tuple.tostring;
      __index = function(tuple, key)
- local rc, field
+ local res, err
          if type(key) == "string" or type(key) == "number" then
              -- Try to get a field by json path or by [index]. If
              -- it was not found (rc ~= 0) then return a method
@@ -313,13 +312,13 @@ ffi.metatype(tuple_t, {
              -- returns field value, not tuple_bsize function. To
              -- access hidden methods use
              -- 'box.tuple.<method_name>(T, [args...])'.
- rc, field = tuple_field_by_path(tuple, key)
- if rc == 0 then
- return field
+ res, err = tuple_field_by_path(tuple, key)
+ if res ~= box.NULL then
+ return res
              end
          end
          local method = methods[key]
- return method == box.NULL and rc ~= -1 and rc or method
+ return method == box.NULL and err or method
      end;
      __eq = function(tuple_a, tuple_b)
          -- Two tuple are considered equal if they have same memory address

 >> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
 >> index 3e2c8bf..f2dffaf 100644
 >> --- a/src/box/tuple_format.c
 >> +++ b/src/box/tuple_format.c
 >> @@ -28,6 +28,8 @@
 >> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 >> * SUCH DAMAGE.
 >> */
 >> +#include <lib/json/path.h>
 >> +#include <third_party/luajit/src/lua.h>
 >7. Do not include lua headers in files, that are not in lua/ dir.
 >Tarantool core must not
 >depend on lua. As I told you earlier, you must use field_name_hash
 >function, declared in
 >tuple_dictionary.h. It allows to remove lua dependency and remove
 >${common_libraries}
 >from cmake file.

diff --git a/src/box/tuple.h b/src/box/tuple.h
index 6ebedf5..f084c9d 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -514,6 +514,26 @@ tuple_field(const struct tuple *tuple, uint32_t 
fieldno)
  }
   /**
+ * Get tuple field by its path.
+ * @param tuple.
+ * @param path Field path.
+ * @param path_len Length of @a path.
+ * @param path_hash Hash of @a path.
+ *
+ * @retval not NULL on field found.
+ * @retval NULL No field by @a path.
+ */
+static inline const char *
+tuple_field_by_path(struct tuple *tuple, const char *path,
+ uint32_t path_len, uint32_t path_hash)
+{
+	return tuple_field_raw_by_path(tuple_format(tuple),
+	tuple_data(tuple),
+	tuple_field_map(tuple),
+	path, path_len, path_hash);
+}
+
+/**
   * Get tuple field by its name.
   * @param tuple Tuple to get field from.
   * @param name Field name.
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 9c6e3da..8ee7948 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -29,8 +29,8 @@
   * SUCH DAMAGE.
   */
  #include <lib/json/path.h>
-#include <third_party/luajit/src/lua.h>
  #include "tuple_format.h"
+#include "tuple_dictionary.h"
   /** Global table of tuple formats */
  struct tuple_format **tuple_formats;

 >> +const char *
 >> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 >> + const uint32_t *field_map,
 >> + const char *path,uint32_t path_len, uint32_t path_hash)
 >8. Path argument can be placed on the previous line.
 >
 >9. After ',' put a space please.
 >> +{
 >> +int rc = 0;
 >> +const char *field;
 >> +struct json_path_parser parser;
 >> +struct json_path_node node;
 >> +json_path_parser_create(&parser, path, path_len);
 >> +rc = json_path_next(&parser, &node);
 >> +if (rc != 0 || node.type == JSON_PATH_END)
 >> +goto not_found;
 >> +if (node.type == JSON_PATH_NUM) {
 >> +int index = node.num;
 >> +if (index == 0)
 >> +goto not_found;
 >> +index -= TUPLE_INDEX_BASE;
 >> +field = tuple_field_raw(format, tuple,
 >> + field_map, index);
 >> +if (field == NULL)
 >> +goto not_found;
 >> +} else {
 >> +assert(node.type == JSON_PATH_STR);
 >> +/* First part of a path is a field name. */
 >> +const char *name = node.str;
 >> +uint32_t name_len = node.len;
 >> +uint32_t name_hash;
 >> +if (path_len == name_len) {
 >> +name_hash = path_hash;
 >> +} else {
 >> +/*
 >> + * If a string is "field....", then its
 >> + * precalculated juajit hash can not be
 >> + * used. A tuple dictionary hashes only
 >> + * name, not path.
 >> + */
 >> +name_hash = lua_hash(name, name_len);
 >10. Good comment! One remark - use field_name_hash instead of explicit
 >lua_hash.
 >> +}
 >> +field = tuple_field_raw_by_name(format, tuple,
 >> + field_map,
 >> + name, name_len, name_hash);
 >> +if (field == NULL)
 >> +goto not_found;
 >> +}
 >> +while ((rc = json_path_next(&parser, &node)) == 0 &&
 >> + node.type != JSON_PATH_END) {
 >> +if (node.type == JSON_PATH_NUM) {
 >> +rc = tuple_field_go_to_index(&field, node.num);
 >> +} else {
 >> +assert(node.type == JSON_PATH_STR);
 >> +rc = tuple_field_go_to_key(&field, node.str, node.len);
 >> +}
 >> +if (rc != 0) {
 >> +rc = 0; /* prevent error raise */
 >> +goto not_found;
 >> +}
 >> +}
 >> +if (rc == 0)
 >> +return field;
 >> +not_found:
 >11. Please, put labels to the line beginning.

@@ -563,8 +563,8 @@ tuple_field_go_to_key(const char **field, const char 
*key, int len)
   const char *
  tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
- const uint32_t *field_map,
- const char *path,uint32_t path_len, uint32_t path_hash)
+ const uint32_t *field_map, const char *path,
+ uint32_t path_len, uint32_t path_hash)
  {
  	int rc = 0;
  	const char *field;
@@ -598,7 +598,7 @@ tuple_field_raw_by_path(struct tuple_format *format, 
const char *tuple,
  	* used. A tuple dictionary hashes only
  	* name, not path.
  	*/
-	name_hash = lua_hash(name, name_len);
+	name_hash = field_name_hash(name, name_len);
  	}
  	field = tuple_field_raw_by_name(format, tuple,
  	field_map,
@@ -621,7 +621,7 @@ tuple_field_raw_by_path(struct tuple_format *format, 
const char *tuple,
  	}
  	if (rc == 0)
  	return field;
-	not_found:
+not_found:
  	/* try to use the whole string as identifier */
  	field = tuple_field_raw_by_name(format, tuple,
  	field_map,
@@ -629,7 +629,6 @@ tuple_field_raw_by_path(struct tuple_format *format, 
const char *tuple,
  	if (field)
  	return field;
  	if (rc || path_len == 0)
-	diag_set(IllegalParams, "Error in path on "
-	"position %d", rc);
+	diag_set(IllegalParams, "Error in path on position %d", rc);
  	return NULL;
  }

 >> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
 >> index d33c77a..3e7e595 100644
 >> --- a/src/box/tuple_format.h
 >> +++ b/src/box/tuple_format.h
 >> @@ -343,6 +343,21 @@ tuple_field_raw_by_name(struct tuple_format 
*format, const char *tuple,
 >> return tuple_field_raw(format, tuple, field_map, fieldno);
 >> }
 >>
 >> +/**
 >> + * Get tuple field by its path.
 >> + * @param Tuple.
 >> + * @param path Field path.
 >> + * @param path_len Length of @a path.
 >> + * @param path_hash Hash of @a path.
 >> + *
 >> + * @retval not NULL on field found.
 >> + * @retval NULL No field by @a path.
 >> + */
 >> +const char *
 >> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 >> + const uint32_t *field_map,
 >> + const char *path,uint32_t path_len, uint32_t path_hash);
 >12. Please, add tuple_field_by_path as well to tuple.h, that calls this
 >function. It allows you
 >to call tuple_field_by_path in lua/tuple.c.

diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index ce17085..ffa3d16 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -379,7 +379,9 @@ tuple_field_raw_by_name(struct tuple_format *format, 
const char *tuple,
   /**
   * Get tuple field by its path.
- * @param Tuple.
+ * @param format Tuple format.
+ * @param tuple MessagePack tuple's body.
+ * @param field_map Tuple field map.
   * @param path Field path.
   * @param path_len Length of @a path.
   * @param path_hash Hash of @a path.

 >> +
 >> #if defined(__cplusplus)
 >> } /* extern "C" */
 >> #endif /* defined(__cplusplus) */
 >> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
 >> index 4a6174e..f3fcc46 100644
 >> --- a/src/lib/json/path.c
 >> +++ b/src/lib/json/path.c
 >> @@ -31,8 +31,10 @@
 >>
 >> #include "path.h"
 >> #include <ctype.h>
 >> +#include <unicode/uchar.h>
 >> #include "trivia/util.h"
 >>
 >> +
 >13. Garbage diff.
 >> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser 
*parser, struct json_path_node *node)
 >> static inline int
 >> json_parse_integer(struct json_path_parser *parser, struct 
json_path_node *node)
 >> {
 >> -const char *end = parser->src + parser->src_len;
 >> -const char *pos = parser->pos;
 >> -assert(pos < end);
 >> -const char *str = pos;
 >> -int len = 0;
 >> -for (char c = *pos; pos < end && isdigit(c); c = *++pos)
 >> -++len;
 >> -if (len == 0)
 >> -return pos - parser->src + 1;
 >> -parser->pos = pos;
 >> +assert(parser->offset < parser->src_len);
 >> +int str_offset = parser->offset;
 >> +int last_offset = str_offset;
 >> +int len = 0, rc = 0;
 >> +UChar32 c = 0;
 >> +
 >> +while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
 >14. I remember, that you told me, that a digit can be read using simple
 >char. Can you please implement
 >this cycle with no UChar?

diff --git a/src/lib/json/path.c b/src/lib/json/path.c
index f3fcc46..e253ec4 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/path.c
@@ -104,7 +104,7 @@ json_parse_string(struct json_path_parser *parser, 
struct json_path_node *node)
   	while (((rc = json_read_sign(parser, &c)) > 0)
  	&& c != quote_type);
-	int len = (int)(parser->offset - str_offset - 1);
+	int len = parser->offset - str_offset - 1;
  	if (rc < 0 || len == 0)
  	return -1;
  	if (c != (UChar32)quote_type) {
@@ -130,23 +130,21 @@ json_parse_string(struct json_path_parser *parser, 
struct json_path_node *node)
  static inline int
  json_parse_integer(struct json_path_parser *parser, struct 
json_path_node *node)
  {
-	assert(parser->offset < parser->src_len);
-	int str_offset = parser->offset;
-	int last_offset = str_offset;
-	int len = 0, rc = 0;
-	UChar32 c = 0;
-
-	while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
-	last_offset = parser->offset;
-	len++;
-	}
-	if (rc > 0 && len > 0 && !u_isdigit(c))
-	json_reset_pos(parser, last_offset, 1);
-	if (rc < 0 || len == 0)
+	const char *end = parser->src + parser->src_len;
+	const char *pos = parser->src + parser->offset;
+	assert(pos < end);
+	const char *str = pos;
+	int len = 0;
+	for (char c = *pos; pos < end && isdigit(c); c = *++pos)
+	++len;
+	if (len == 0) {
+	parser->invalid_sign_off++;
  	return -1;
-
+	}
+	parser->offset += len;
+	parser->invalid_sign_off += len;
  	node->type = JSON_PATH_NUM;
-	node->num = strntoull(parser->src + str_offset, len);
+	node->num = strntoull(str, len);
  	return 0;
  }
  >> >> +static inline char
 >> +json_curr_char(struct json_path_parser *parser)
 >> +{
 >> +return *json_path_curr_substring(parser);
 >> +}
 >15. I do not like that on each char you must do string + offset. Please,
 >find a way to
 >store this position. Why did you delete parser->pos?

I have no good ideas. This code is also not hot: just on each new lexem, 
believe nothing critical.

 >> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
 >> index 6e8db4c..15292fb 100644
 >> --- a/src/lib/json/path.h
 >> +++ b/src/lib/json/path.h
 >> @@ -45,10 +45,11 @@ extern "C" {
 >> struct json_path_parser {
 >> /** Source string. */
 >> const char *src;
 >> -/** Length of src. */
 >> +/** Length of string. */
 >> int src_len;
 >> -/** Current parser's position. */
 >> -const char *pos;
 >> +/** Current parser's offset. */
 >> +int offset;
 >> +int invalid_sign_off;
 >16. Please, add a comment.

diff --git a/src/lib/json/path.h b/src/lib/json/path.h
index 15292fb..09f2e6f 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/path.h
@@ -49,6 +49,7 @@ struct json_path_parser {
  	int src_len;
  	/** Current parser's offset. */
  	int offset;
+	/** Successfully parsed signs count. */
  	int invalid_sign_off;
  };
  -- 2.7.4

On 05.04.2018 17:09, Kirill Shcherbatov wrote:
> ---
>   src/box/CMakeLists.txt     |   2 +-
>   src/box/lua/tuple.c        | 172 ++++++----------------------------------
>   src/box/lua/tuple.lua      |   7 +-
>   src/box/tuple_format.c     | 155 ++++++++++++++++++++++++++++++++++++
>   src/box/tuple_format.h     |  15 ++++
>   src/lib/json/path.c        | 191 ++++++++++++++++++++++++++++++---------------
>   src/lib/json/path.h        |  25 ++++--
>   test/engine/tuple.result   |  57 ++++++++++----
>   test/engine/tuple.test.lua |  13 ++-
>   test/unit/CMakeLists.txt   |   2 +-
>   test/unit/json_path.c      |   2 +-
>   11 files changed, 398 insertions(+), 243 deletions(-)
> 
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index f053ae4..e5a0c21 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -45,7 +45,7 @@ add_library(tuple STATIC
>       field_def.c
>       opt_def.c
>   )
> -target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
> +target_link_libraries(tuple json_path box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
>   
>   add_library(xlog STATIC xlog.c)
>   target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 99b9ff2..3b89cdd 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -403,87 +403,6 @@ lbox_tuple_transform(struct lua_State *L)
>   }
>   
>   /**
> - * Propagate @a field to MessagePack(field)[index].
> - * @param[in][out] field Field to propagate.
> - * @param index 1-based index to propagate to.
> - *
> - * @retval  0 Success, the index was found.
> - * @retval -1 Not found.
> - */
> -static inline int
> -tuple_field_go_to_index(const char **field, uint64_t index)
> -{
> -	assert(index >= 0);
> -	enum mp_type type = mp_typeof(**field);
> -	if (type == MP_ARRAY) {
> -		if (index == 0)
> -			return -1;
> -		/* Make index 0-based. */
> -		index -= TUPLE_INDEX_BASE;
> -		uint32_t count = mp_decode_array(field);
> -		if (index >= count)
> -			return -1;
> -		for (; index > 0; --index)
> -			mp_next(field);
> -		return 0;
> -	} else if (type == MP_MAP) {
> -		uint64_t count = mp_decode_map(field);
> -		for (; count > 0; --count) {
> -			type = mp_typeof(**field);
> -			if (type == MP_UINT) {
> -				uint64_t value = mp_decode_uint(field);
> -				if (value == index)
> -					return 0;
> -			} else if (type == MP_INT) {
> -				int64_t value = mp_decode_int(field);
> -				if (value >= 0 && (uint64_t)value == index)
> -					return 0;
> -			} else {
> -				/* Skip key. */
> -				mp_next(field);
> -			}
> -			/* Skip value. */
> -			mp_next(field);
> -		}
> -	}
> -	return -1;
> -}
> -
> -/**
> - * Propagate @a field to MessagePack(field)[key].
> - * @param[in][out] field Field to propagate.
> - * @param key Key to propagate to.
> - * @param len Length of @a key.
> - *
> - * @retval  0 Success, the index was found.
> - * @retval -1 Not found.
> - */
> -static inline int
> -tuple_field_go_to_key(const char **field, const char *key, int len)
> -{
> -	enum mp_type type = mp_typeof(**field);
> -	if (type != MP_MAP)
> -		return -1;
> -	uint64_t count = mp_decode_map(field);
> -	for (; count > 0; --count) {
> -		type = mp_typeof(**field);
> -		if (type == MP_STR) {
> -			uint32_t value_len;
> -			const char *value = mp_decode_str(field, &value_len);
> -			if (value_len == (uint)len &&
> -			    memcmp(value, key, len) == 0)
> -				return 0;
> -		} else {
> -			/* Skip key. */
> -			mp_next(field);
> -		}
> -		/* Skip value. */
> -		mp_next(field);
> -	}
> -	return -1;
> -}
> -
> -/**
>    * Find a tuple field by JSON path.
>    * @param L Lua state.
>    * @param tuple 1-th argument on a lua stack, tuple to get field
> @@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
>   static int
>   lbox_tuple_field_by_path(struct lua_State *L)
>   {
> -	const char *field;
>   	struct tuple *tuple = luaT_istuple(L, 1);
>   	/* Is checked in Lua wrapper. */
>   	assert(tuple != NULL);
> -	if (lua_isnumber(L, 2)) {
> +	const char *field = NULL;
> +	if (!lua_isnumber(L, 2)) {
> +		/* string */
> +		field = tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
> +		                                tuple_field_map(tuple),
> +		                                lua_tostring(L, 2), (int)lua_strlen(L, 2),
> +		                                lua_hashstring(L, 2));
> +	} else {
>   		int index = lua_tointeger(L, 2);
>   		index -= TUPLE_INDEX_BASE;
> -		if (index < 0) {
> -not_found:
> -			lua_pushinteger(L, -1);
> -			lua_pushnil(L);
> -			return 2;
> -		}
> -		field = tuple_field(tuple, index);
> -		if (field == NULL)
> -			goto not_found;
> -push_value:
> -		lua_pushinteger(L, 0);
> -		luamp_decode(L, luaL_msgpack_default, &field);
> -		return 2;
> +		if (index >= 0)
> +			field = tuple_field(tuple, index);
>   	}
> -	assert(lua_isstring(L, 2));
> -	size_t path_len;
> -	const char *path = lua_tolstring(L, 2, &path_len);
> -	struct json_path_parser parser;
> -	struct json_path_node node;
> -	json_path_parser_create(&parser, path, path_len);
> -	int rc = json_path_next(&parser, &node);
> -	if (rc != 0 || node.type == JSON_PATH_END)
> -		luaL_error(L, "Error in path on position %d", rc);
> -	if (node.type == JSON_PATH_NUM) {
> -		int index = node.num;
> -		if (index == 0)
> -			goto not_found;
> -		index -= TUPLE_INDEX_BASE;
> -		field = tuple_field(tuple, index);
> -		if (field == NULL)
> -			goto not_found;
> -	} else {
> -		assert(node.type == JSON_PATH_STR);
> -		/* First part of a path is a field name. */
> -		const char *name = node.str;
> -		uint32_t name_len = node.len;
> -		uint32_t name_hash;
> -		if (path_len == name_len) {
> -			name_hash = lua_hashstring(L, 2);
> -		} else {
> -			/*
> -			 * If a string is "field....", then its
> -			 * precalculated juajit hash can not be
> -			 * used. A tuple dictionary hashes only
> -			 * name, not path.
> -			 */
> -			name_hash = lua_hash(name, name_len);
> -		}
> -		field = tuple_field_by_name(tuple, name, name_len, name_hash);
> -		if (field == NULL)
> -			goto not_found;
> -	}
> -	while ((rc = json_path_next(&parser, &node)) == 0 &&
> -	       node.type != JSON_PATH_END) {
> -		if (node.type == JSON_PATH_NUM) {
> -			rc = tuple_field_go_to_index(&field, node.num);
> -		} else {
> -			assert(node.type == JSON_PATH_STR);
> -			rc = tuple_field_go_to_key(&field, node.str, node.len);
> -		}
> -		if (rc != 0)
> -			goto not_found;
> -	}
> -	if (rc == 0)
> -		goto push_value;
> -	luaL_error(L, "Error in path on position %d", rc);
> -	unreachable();
> -	goto not_found;
> +	/* error code or message */
> +	struct error *e = diag_last_error(diag_get());
> +	if (field == NULL && e != NULL)
> +		lua_pushstring(L, e->errmsg);
> +	else
> +		lua_pushinteger(L, -1*(field == NULL));
> +	if (field)
> +		luamp_decode(L, luaL_msgpack_default, &field);
> +	else
> +		lua_pushnil(L);
> +	diag_clear(diag_get());
> +	return 2;
>   }
>   
>   static int
> diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
> index b51b4df..3785568 100644
> --- a/src/box/lua/tuple.lua
> +++ b/src/box/lua/tuple.lua
> @@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
>   local fun = require('fun')
>   local buffer = require('buffer')
>   local internal = require('box.internal')
> +local log = require('log')
>   
>   ffi.cdef[[
>   /** \cond public */
> @@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
>       end;
>       __tostring = internal.tuple.tostring;
>       __index = function(tuple, key)
> +        local rc, field
>           if type(key) == "string" or type(key) == "number" then
>               -- Try to get a field by json path or by [index]. If
>               -- it was not found (rc ~= 0) then return a method
> @@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
>               -- returns field value, not tuple_bsize function. To
>               -- access hidden methods use
>               -- 'box.tuple.<method_name>(T, [args...])'.
> -            local rc, field = tuple_field_by_path(tuple, key)
> +            rc, field = tuple_field_by_path(tuple, key)
>               if rc == 0 then
>                   return field
>               end
>           end
> -        return methods[key]
> +        local method = methods[key]
> +        return method == box.NULL and rc ~= -1 and rc or method
>       end;
>       __eq = function(tuple_a, tuple_b)
>           -- Two tuple are considered equal if they have same memory address
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index e458f49..9c6e3da 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -28,6 +28,8 @@
>    * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>    * SUCH DAMAGE.
>    */
> +#include <lib/json/path.h>
> +#include <third_party/luajit/src/lua.h>
>   #include "tuple_format.h"
>   
>   /** Global table of tuple formats */
> @@ -478,3 +480,156 @@ box_tuple_format_unref(box_tuple_format_t *format)
>   {
>   	tuple_format_unref(format);
>   }
> +
> +/**
> + * Propagate @a field to MessagePack(field)[index].
> + * @param[in][out] field Field to propagate.
> + * @param index 1-based index to propagate to.
> + *
> + * @retval  0 Success, the index was found.
> + * @retval -1 Not found.
> + */
> +static inline int
> +tuple_field_go_to_index(const char **field, uint64_t index)
> +{
> +	enum mp_type type = mp_typeof(**field);
> +	if (type == MP_ARRAY) {
> +		if (index == 0)
> +			return -1;
> +		/* Make index 0-based. */
> +		index -= TUPLE_INDEX_BASE;
> +		uint32_t count = mp_decode_array(field);
> +		if (index >= count)
> +			return -1;
> +		for (; index > 0; --index)
> +			mp_next(field);
> +		return 0;
> +	} else if (type == MP_MAP) {
> +		uint64_t count = mp_decode_map(field);
> +		for (; count > 0; --count) {
> +			type = mp_typeof(**field);
> +			if (type == MP_UINT) {
> +				uint64_t value = mp_decode_uint(field);
> +				if (value == index)
> +					return 0;
> +			} else if (type == MP_INT) {
> +				int64_t value = mp_decode_int(field);
> +				if (value >= 0 && (uint64_t)value == index)
> +					return 0;
> +			} else {
> +				/* Skip key. */
> +				mp_next(field);
> +			}
> +			/* Skip value. */
> +			mp_next(field);
> +		}
> +	}
> +	return -1;
> +}
> +
> +/**
> + * Propagate @a field to MessagePack(field)[key].
> + * @param[in][out] field Field to propagate.
> + * @param key Key to propagate to.
> + * @param len Length of @a key.
> + *
> + * @retval  0 Success, the index was found.
> + * @retval -1 Not found.
> + */
> +static inline int
> +tuple_field_go_to_key(const char **field, const char *key, int len)
> +{
> +	enum mp_type type = mp_typeof(**field);
> +	if (type != MP_MAP)
> +		return -1;
> +	uint64_t count = mp_decode_map(field);
> +	for (; count > 0; --count) {
> +		type = mp_typeof(**field);
> +		if (type == MP_STR) {
> +			uint32_t value_len;
> +			const char *value = mp_decode_str(field, &value_len);
> +			if (value_len == (uint)len &&
> +			    memcmp(value, key, len) == 0)
> +				return 0;
> +		} else {
> +			/* Skip key. */
> +			mp_next(field);
> +		}
> +		/* Skip value. */
> +		mp_next(field);
> +	}
> +	return -1;
> +}
> +
> +const char *
> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> +                        const uint32_t *field_map,
> +                        const char *path,uint32_t path_len, uint32_t path_hash)
> +{
> +	int rc = 0;
> +	const char *field;
> +	struct json_path_parser parser;
> +	struct json_path_node node;
> +	json_path_parser_create(&parser, path, path_len);
> +	rc = json_path_next(&parser, &node);
> +	if (rc != 0 || node.type == JSON_PATH_END)
> +		goto not_found;
> +	if (node.type == JSON_PATH_NUM) {
> +		int index = node.num;
> +		if (index == 0)
> +			goto not_found;
> +		index -= TUPLE_INDEX_BASE;
> +		field = tuple_field_raw(format, tuple,
> +		                        field_map, index);
> +		if (field == NULL)
> +			goto not_found;
> +	} else {
> +		assert(node.type == JSON_PATH_STR);
> +		/* First part of a path is a field name. */
> +		const char *name = node.str;
> +		uint32_t name_len = node.len;
> +		uint32_t name_hash;
> +		if (path_len == name_len) {
> +			name_hash = path_hash;
> +		} else {
> +			/*
> +			 * If a string is "field....", then its
> +			 * precalculated juajit hash can not be
> +			 * used. A tuple dictionary hashes only
> +			 * name, not path.
> +			 */
> +			name_hash = lua_hash(name, name_len);
> +		}
> +		field = tuple_field_raw_by_name(format, tuple,
> +		                                field_map,
> +		                                name, name_len, name_hash);
> +		if (field == NULL)
> +			goto not_found;
> +	}
> +	while ((rc = json_path_next(&parser, &node)) == 0 &&
> +	       node.type != JSON_PATH_END) {
> +		if (node.type == JSON_PATH_NUM) {
> +			rc = tuple_field_go_to_index(&field, node.num);
> +		} else {
> +			assert(node.type == JSON_PATH_STR);
> +			rc = tuple_field_go_to_key(&field, node.str, node.len);
> +		}
> +		if (rc != 0) {
> +			rc = 0; /* prevent error raise */
> +			goto not_found;
> +		}
> +	}
> +	if (rc == 0)
> +		return field;
> +	not_found:
> +	/* try to use the whole string as identifier */
> +	field = tuple_field_raw_by_name(format, tuple,
> +	                                field_map,
> +	                                path, path_len, path_hash);
> +	if (field)
> +		return field;
> +	if (rc || path_len == 0)
> +		diag_set(IllegalParams, "Error in path on "
> +		                        "position %d", rc);
> +	return NULL;
> +}
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index d35182d..ce17085 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -377,6 +377,21 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
>   	return tuple_field_raw(format, tuple, field_map, fieldno);
>   }
>   
> +/**
> + * Get tuple field by its path.
> + * @param Tuple.
> + * @param path Field path.
> + * @param path_len Length of @a path.
> + * @param path_hash Hash of @a path.
> + *
> + * @retval not NULL on field found.
> + * @retval     NULL No field by @a path.
> + */
> +const char *
> +tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
> +                        const uint32_t *field_map,
> +                        const char *path,uint32_t path_len, uint32_t path_hash);
> +
>   #if defined(__cplusplus)
>   } /* extern "C" */
>   #endif /* defined(__cplusplus) */
> diff --git a/src/lib/json/path.c b/src/lib/json/path.c
> index 4a6174e..f3fcc46 100644
> --- a/src/lib/json/path.c
> +++ b/src/lib/json/path.c
> @@ -31,8 +31,10 @@
>   
>   #include "path.h"
>   #include <ctype.h>
> +#include <unicode/uchar.h>
>   #include "trivia/util.h"
>   
> +
>   /** Same as strtoull(), but with limited length. */
>   static inline uint64_t
>   strntoull(const char *src, int len) {
> @@ -45,6 +47,42 @@ strntoull(const char *src, int len) {
>   }
>   
>   /**
> + * Parse string and update parser's state.
> + * @param parser JSON path parser. Upates pos, signs_read.
> + * @param[out] UChar32 to store result.
> + *
> + * @retval 1 Success.
> + * @retval 0 End of string.
> + * @retval -1 Parse error.
> + */
> +static inline int
> +json_read_sign(struct json_path_parser *parser, UChar32 *out)
> +{
> +	if (unlikely(parser->offset == parser->src_len))
> +		return 0;
> +	UChar32 c;
> +	U8_NEXT_OR_FFFD(parser->src, parser->offset, parser->src_len, c)
> +	if (c == 0xFFFD)
> +		return -1;
> +	*out = c;
> +	parser->invalid_sign_off += 1;
> +	return 1;
> +}
> +
> +/**
> + * Reset parser state to previous one.
> + * @param parser JSON path parser.
> + * @param old parser read offset.
> + * @param signs to drop in signs_read counter.
> + */
> +static inline void
> +json_reset_pos(struct json_path_parser *parser, int old_offset, int signs)
> +{
> +	parser->offset = old_offset;
> +	parser->invalid_sign_off -= signs;
> +}
> +
> +/**
>    * Parse string identifier in quotes. Parser either stops right
>    * after the closing quote, or returns an error position.
>    * @param parser JSON path parser.
> @@ -56,24 +94,26 @@ strntoull(const char *src, int len) {
>   static inline int
>   json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
>   {
> -	const char *end = parser->src + parser->src_len;
> -	const char *pos = parser->pos;
> -	assert(pos < end);
> -	char quote_type = *pos;
> -	assert(quote_type == '\'' || quote_type == '"');
> -	/* Skip first quote. */
> -	int len = 0;
> -	++pos;
> -	const char *str = pos;
> -	for (char c = *pos; pos < end && quote_type != c; c = *++pos)
> -		++len;
> -	/* A string must be terminated with quote. */
> -	if (*pos != quote_type || len == 0)
> -		return pos - parser->src + 1;
> -	/* Skip the closing quote. */
> -	parser->pos = pos + 1;
> +	assert(parser->offset < parser->src_len);
> +	UChar32 quote_type;
> +	(void) json_read_sign(parser, &quote_type);
> +	assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"');
> +	int str_offset = parser->offset;
> +	UChar32 c = 0;
> +	int rc = 0;
> +
> +	while (((rc = json_read_sign(parser, &c)) > 0)
> +	       && c != quote_type);
> +	int len = (int)(parser->offset - str_offset - 1);
> +	if (rc < 0 || len == 0)
> +		return -1;
> +	if (c != (UChar32)quote_type) {
> +		parser->invalid_sign_off++;
> +		return -1;
> +	}
> +
>   	node->type = JSON_PATH_STR;
> -	node->str = str;
> +	node->str = parser->src + str_offset;
>   	node->len = len;
>   	return 0;
>   }
> @@ -81,7 +121,7 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
>   /**
>    * Parse digit sequence into integer until non-digit is met.
>    * Parser stops right after the last digit.
> - * @param parser JSON parser.
> + * @param parser JSON parser. Updates signs_read field.
>    * @param[out] node JSON node to store result.
>    *
>    * @retval     0 Success.
> @@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
>   static inline int
>   json_parse_integer(struct json_path_parser *parser, struct json_path_node *node)
>   {
> -	const char *end = parser->src + parser->src_len;
> -	const char *pos = parser->pos;
> -	assert(pos < end);
> -	const char *str = pos;
> -	int len = 0;
> -	for (char c = *pos; pos < end && isdigit(c); c = *++pos)
> -		++len;
> -	if (len == 0)
> -		return pos - parser->src + 1;
> -	parser->pos = pos;
> +	assert(parser->offset < parser->src_len);
> +	int str_offset = parser->offset;
> +	int last_offset = str_offset;
> +	int len = 0, rc = 0;
> +	UChar32 c = 0;
> +
> +	while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
> +		last_offset = parser->offset;
> +		len++;
> +	}
> +	if (rc > 0 && len > 0 && !u_isdigit(c))
> +		json_reset_pos(parser, last_offset, 1);
> +	if (rc < 0 || len == 0)
> +		return -1;
> +
>   	node->type = JSON_PATH_NUM;
> -	node->num = strntoull(str, len);
> +	node->num = strntoull(parser->src + str_offset, len);
>   	return 0;
>   }
>   
> +static inline bool
> +identifier_valid_sign(UChar32 c)
> +{
> +	return u_isUAlphabetic(c)
> +	       || c == (UChar32)'_'
> +	       || u_isdigit(c);
> +}
> +
>   /**
>    * Parse identifier out of quotes. It can contain only alphas,
>    * digits and underscores. And can not contain digit at the first
>    * position. Parser is stoped right after the last non-digit,
>    * non-alpha and non-underscore symbol.
> - * @param parser JSON parser.
> + * @param parser JSON parser. Updates signs_read field.
>    * @param[out] node JSON node to store result.
>    *
>    * @retval     0 Success.
> @@ -120,68 +173,78 @@ static inline int
>   json_parse_identifier(struct json_path_parser *parser,
>   		      struct json_path_node *node)
>   {
> -	const char *end = parser->src + parser->src_len;
> -	const char *pos = parser->pos;
> -	assert(pos < end);
> -	const char *str = pos;
> -	char c = *pos;
> +	assert(parser->offset < parser->src_len);
> +	int str_offset = parser->offset;
> +	UChar32 c;
> +	int rc = 0;
> +	if (json_read_sign(parser, &c) < 0)
> +		return -1;
>   	/* First symbol can not be digit. */
> -	if (!isalpha(c) && c != '_')
> -		return pos - parser->src + 1;
> -	int len = 1;
> -	for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c));
> -	     c = *++pos)
> -		++len;
> -	assert(len > 0);
> -	parser->pos = pos;
> +	if (!u_isalpha(c) && c != (UChar32)'_')
> +		return -1;
> +
> +	int last_offset = parser->offset;
> +	while ((rc = json_read_sign(parser, &c)) > 0 && identifier_valid_sign(c))
> +		last_offset = parser->offset;
> +	if (rc > 0 && !identifier_valid_sign(c))
> +		json_reset_pos(parser, last_offset, 1);
> +	if (rc < 0)
> +		return -1;
> +
>   	node->type = JSON_PATH_STR;
> -	node->str = str;
> -	node->len = len;
> +	node->str = parser->src + str_offset;
> +	node->len = parser->offset - str_offset;
>   	return 0;
>   }
>   
> +static inline char
> +json_curr_char(struct json_path_parser *parser)
> +{
> +	return *json_path_curr_substring(parser);
> +}
> +
>   int
>   json_path_next(struct json_path_parser *parser, struct json_path_node *node)
>   {
> -	const char *end = parser->src + parser->src_len;
> -	if (end == parser->pos) {
> +	int end_offset = parser->src_len;
> +	if (end_offset == parser->offset) {
>   		node->type = JSON_PATH_END;
>   		return 0;
>   	}
> -	char c = *parser->pos;
> +	UChar32 c = 0;
> +	int last_offset = parser->offset;
> +	if (json_read_sign(parser, &c) < 0)
> +		return parser->invalid_sign_off;
>   	int rc;
>   	switch(c) {
> -	case '[':
> -		++parser->pos;
> +	case (UChar32)'[':
>   		/* Error for []. */
> -		if (parser->pos == end)
> -			return parser->pos - parser->src + 1;
> -		c = *parser->pos;
> +		if (parser->offset == end_offset)
> +			return parser->invalid_sign_off;
> +		c = json_curr_char(parser);
>   		if (c == '"' || c == '\'')
>   			rc = json_parse_string(parser, node);
>   		else
>   			rc = json_parse_integer(parser, node);
>   		if (rc != 0)
> -			return rc;
> +			return parser->invalid_sign_off;
>   		/*
>   		 * Expression, started from [ must be finished
>   		 * with ] regardless of its type.
>   		 */
> -		if (parser->pos == end || *parser->pos != ']')
> -			return parser->pos - parser->src + 1;
> +		if (parser->offset == end_offset || json_curr_char(parser) != ']')
> +			return parser->invalid_sign_off + 1;
>   		/* Skip ]. */
> -		++parser->pos;
> +		(void) json_read_sign(parser, &c);
>   		break;
> -	case '.':
> -		/* Skip dot. */
> -		++parser->pos;
> -		if (parser->pos == end)
> -			return parser->pos - parser->src + 1;
> -		FALLTHROUGH
>   	default:
> +		if (c != (UChar32)'.')
> +			json_reset_pos(parser, last_offset, 1);
> +		else if (parser->offset == end_offset)
> +			return parser->invalid_sign_off + 1;
>   		rc = json_parse_identifier(parser, node);
>   		if (rc != 0)
> -			return rc;
> +			return parser->invalid_sign_off;
>   		break;
>   	}
>   	return 0;
> diff --git a/src/lib/json/path.h b/src/lib/json/path.h
> index 6e8db4c..15292fb 100644
> --- a/src/lib/json/path.h
> +++ b/src/lib/json/path.h
> @@ -45,10 +45,11 @@ extern "C" {
>   struct json_path_parser {
>   	/** Source string. */
>   	const char *src;
> -	/** Length of src. */
> +	/** Length of string. */
>   	int src_len;
> -	/** Current parser's position. */
> -	const char *pos;
> +	/** Current parser's offset. */
> +	int offset;
> +	int invalid_sign_off;
>   };
>   
>   enum json_path_type {
> @@ -78,18 +79,30 @@ struct json_path_node {
>   };
>   
>   /**
> - * Create @a parser.
> + * Init @a parser.
>    * @param[out] parser Parser to create.
>    * @param src Source string.
>    * @param src_len Length of @a src.
>    */
>   static inline void
>   json_path_parser_create(struct json_path_parser *parser, const char *src,
> -			int src_len)
> +                        int src_len)
>   {
>   	parser->src = src;
>   	parser->src_len = src_len;
> -	parser->pos = src;
> +	parser->offset = 0;
> +	parser->invalid_sign_off = 0;
> +}
> +
> +/**
> + * Get current substring of parser.
> + * @param parser Parser.
> + * @retval ptr to substring
> + */
> +static inline const char *
> +json_path_curr_substring(struct json_path_parser *parser)
> +{
> +	return parser->src + parser->offset;
>   }
>   
>   /**
> diff --git a/test/engine/tuple.result b/test/engine/tuple.result
> index 2d7367a..3a8e828 100644
> --- a/test/engine/tuple.result
> +++ b/test/engine/tuple.result
> @@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'}
>   format[3] = {name = 'field3', type = 'map'}
>   ---
>   ...
> -format[4] = {name = 'field4', type = 'string'}
> +format[4] = {name = 'field4', type = 'string' }
> +---
> +...
> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
>   ---
>   ...
>   s = box.schema.space.create('test', {format = format})
> @@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = format})
>   pk = s:create_index('pk')
>   ---
>   ...
> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
>   ---
>   ...
>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
>   ---
>   ...
> -t = s:replace{1, field2, field3, "123456"}
> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>   ---
>   ...
>   t[1]
> @@ -626,7 +629,7 @@ t[1]
>   ...
>   t[2]
>   ---
> -- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
> +- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key': 'value1', 'value': 'key1'}]
>   ...
>   t[3]
>   ---
> @@ -667,19 +670,43 @@ t["[2][5][3]"]
>   ...
>   t["[2][6].key"]
>   ---
> -- key1
> +- value1
>   ...
>   t["[2][6].value"]
>   ---
> -- value1
> +- key1
>   ...
>   t["[2][6]['key']"]
>   ---
> -- key1
> +- value1
>   ...
>   t["[2][6]['value']"]
>   ---
> -- value1
> +- key1
> +...
> +t[2][6].привет中国world.中国
> +---
> +- привет
> +...
> +t["[2][6].привет中国world"].中国
> +---
> +- привет
> +...
> +t["[2][6].привет中国world.中国"]
> +---
> +- привет
> +...
> +t["[2][6]['привет中国world']"]["中国"]
> +---
> +- привет
> +...
> +t["[2][6]['привет中国world']['中国']"]
> +---
> +- привет
> +...
> +t["[2][6]['привет中国world']['中国a']"]
> +---
> +- yes, this
>   ...
>   t["[3].k3[2].c"]
>   ---
> @@ -759,31 +786,31 @@ t["a.b.c d.e.f"]
>   -- Sytax errors.
>   t[""]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
> +- Error in path on position 0
>   ...
>   t["[2].[5]"]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
> +- Error in path on position 5
>   ...
>   t["[-1]"]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +- Error in path on position 2
>   ...
>   t[".."]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +- Error in path on position 2
>   ...
>   t["[["]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
> +- Error in path on position 2
>   ...
>   t["]]"]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
> +- Error in path on position 1
>   ...
>   t["{"]
>   ---
> -- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
> +- Error in path on position 1
>   ...
>   s:drop()
>   ---
> diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
> index ba3482d..90da8b2 100644
> --- a/test/engine/tuple.test.lua
> +++ b/test/engine/tuple.test.lua
> @@ -204,12 +204,13 @@ format = {}
>   format[1] = {name = 'field1', type = 'unsigned'}
>   format[2] = {name = 'field2', type = 'array'}
>   format[3] = {name = 'field3', type = 'map'}
> -format[4] = {name = 'field4', type = 'string'}
> +format[4] = {name = 'field4', type = 'string' }
> +format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
>   s = box.schema.space.create('test', {format = format})
>   pk = s:create_index('pk')
> -field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
> +field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
>   field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
> -t = s:replace{1, field2, field3, "123456"}
> +t = s:replace{1, field2, field3, "123456", "yes, this"}
>   t[1]
>   t[2]
>   t[3]
> @@ -225,6 +226,12 @@ t["[2][6].key"]
>   t["[2][6].value"]
>   t["[2][6]['key']"]
>   t["[2][6]['value']"]
> +t[2][6].привет中国world.中国
> +t["[2][6].привет中国world"].中国
> +t["[2][6].привет中国world.中国"]
> +t["[2][6]['привет中国world']"]["中国"]
> +t["[2][6]['привет中国world']['中国']"]
> +t["[2][6]['привет中国world']['中国a']"]
>   t["[3].k3[2].c"]
>   t["[4]"]
>   t.field1
> diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
> index fe8b2d2..667194c 100644
> --- a/test/unit/CMakeLists.txt
> +++ b/test/unit/CMakeLists.txt
> @@ -130,7 +130,7 @@ add_executable(csv.test csv.c)
>   target_link_libraries(csv.test csv)
>   
>   add_executable(json_path.test json_path.c)
> -target_link_libraries(json_path.test json_path unit)
> +target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES})
>   
>   add_executable(rmean.test rmean.cc)
>   target_link_libraries(rmean.test stat unit)
> diff --git a/test/unit/json_path.c b/test/unit/json_path.c
> index 599658b..81ef7fc 100644
> --- a/test/unit/json_path.c
> +++ b/test/unit/json_path.c
> @@ -9,7 +9,7 @@
>   	json_path_parser_create(&parser, path, len);
>   
>   #define is_next_index(value_len, value) \
> -	path = parser.pos; \
> +	path = json_path_curr_substring(&parser); \
>   	is(json_path_next(&parser, &node), 0, "parse <%." #value_len "s>", \
>   	   path); \
>   	is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", path); \
> 

  reply	other threads:[~2018-04-05 18:00 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov
2018-03-29 18:04   ` [tarantool-patches] " Kirill Shcherbatov
2018-03-30 10:24     ` v.shpilevoy
2018-03-30 10:25       ` v.shpilevoy
2018-04-02 19:19       ` Kirill Shcherbatov
2018-04-03 10:20         ` Vladislav Shpilevoy
2018-04-05 14:09           ` [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov
2018-04-05 18:00             ` Kirill Shcherbatov [this message]
2018-04-05 23:32               ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-04 10:37 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Kirill Shcherbatov
2018-04-04 11:30   ` [tarantool-patches] " Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=75b15d0f-0c35-dd26-4e30-4f085208d8d7@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='[tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox