Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org,
	Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser.
Date: Thu,  5 Apr 2018 17:09:36 +0300	[thread overview]
Message-ID: <e35b03d5e6f0f78e38ee9db9b18e88bf9fef5f70.1522937317.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <08b2862f-7741-8d62-c2e7-293964e69376@tarantool.org>

---
 src/box/CMakeLists.txt     |   2 +-
 src/box/lua/tuple.c        | 172 ++++++----------------------------------
 src/box/lua/tuple.lua      |   7 +-
 src/box/tuple_format.c     | 155 ++++++++++++++++++++++++++++++++++++
 src/box/tuple_format.h     |  15 ++++
 src/lib/json/path.c        | 191 ++++++++++++++++++++++++++++++---------------
 src/lib/json/path.h        |  25 ++++--
 test/engine/tuple.result   |  57 ++++++++++----
 test/engine/tuple.test.lua |  13 ++-
 test/unit/CMakeLists.txt   |   2 +-
 test/unit/json_path.c      |   2 +-
 11 files changed, 398 insertions(+), 243 deletions(-)

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index f053ae4..e5a0c21 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -45,7 +45,7 @@ add_library(tuple STATIC
     field_def.c
     opt_def.c
 )
-target_link_libraries(tuple box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit)
+target_link_libraries(tuple json_path box_error core ${MSGPUCK_LIBRARIES} ${ICU_LIBRARIES} misc bit ${common_libraries})
 
 add_library(xlog STATIC xlog.c)
 target_link_libraries(xlog core box_error crc32 ${ZSTD_LIBRARIES})
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 99b9ff2..3b89cdd 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -403,87 +403,6 @@ lbox_tuple_transform(struct lua_State *L)
 }
 
 /**
- * Propagate @a field to MessagePack(field)[index].
- * @param[in][out] field Field to propagate.
- * @param index 1-based index to propagate to.
- *
- * @retval  0 Success, the index was found.
- * @retval -1 Not found.
- */
-static inline int
-tuple_field_go_to_index(const char **field, uint64_t index)
-{
-	assert(index >= 0);
-	enum mp_type type = mp_typeof(**field);
-	if (type == MP_ARRAY) {
-		if (index == 0)
-			return -1;
-		/* Make index 0-based. */
-		index -= TUPLE_INDEX_BASE;
-		uint32_t count = mp_decode_array(field);
-		if (index >= count)
-			return -1;
-		for (; index > 0; --index)
-			mp_next(field);
-		return 0;
-	} else if (type == MP_MAP) {
-		uint64_t count = mp_decode_map(field);
-		for (; count > 0; --count) {
-			type = mp_typeof(**field);
-			if (type == MP_UINT) {
-				uint64_t value = mp_decode_uint(field);
-				if (value == index)
-					return 0;
-			} else if (type == MP_INT) {
-				int64_t value = mp_decode_int(field);
-				if (value >= 0 && (uint64_t)value == index)
-					return 0;
-			} else {
-				/* Skip key. */
-				mp_next(field);
-			}
-			/* Skip value. */
-			mp_next(field);
-		}
-	}
-	return -1;
-}
-
-/**
- * Propagate @a field to MessagePack(field)[key].
- * @param[in][out] field Field to propagate.
- * @param key Key to propagate to.
- * @param len Length of @a key.
- *
- * @retval  0 Success, the index was found.
- * @retval -1 Not found.
- */
-static inline int
-tuple_field_go_to_key(const char **field, const char *key, int len)
-{
-	enum mp_type type = mp_typeof(**field);
-	if (type != MP_MAP)
-		return -1;
-	uint64_t count = mp_decode_map(field);
-	for (; count > 0; --count) {
-		type = mp_typeof(**field);
-		if (type == MP_STR) {
-			uint32_t value_len;
-			const char *value = mp_decode_str(field, &value_len);
-			if (value_len == (uint)len &&
-			    memcmp(value, key, len) == 0)
-				return 0;
-		} else {
-			/* Skip key. */
-			mp_next(field);
-		}
-		/* Skip value. */
-		mp_next(field);
-	}
-	return -1;
-}
-
-/**
  * Find a tuple field by JSON path.
  * @param L Lua state.
  * @param tuple 1-th argument on a lua stack, tuple to get field
@@ -497,81 +416,34 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
 static int
 lbox_tuple_field_by_path(struct lua_State *L)
 {
-	const char *field;
 	struct tuple *tuple = luaT_istuple(L, 1);
 	/* Is checked in Lua wrapper. */
 	assert(tuple != NULL);
-	if (lua_isnumber(L, 2)) {
+	const char *field = NULL;
+	if (!lua_isnumber(L, 2)) {
+		/* string */
+		field = tuple_field_raw_by_path(tuple_format(tuple), tuple_data(tuple),
+		                                tuple_field_map(tuple),
+		                                lua_tostring(L, 2), (int)lua_strlen(L, 2),
+		                                lua_hashstring(L, 2));
+	} else {
 		int index = lua_tointeger(L, 2);
 		index -= TUPLE_INDEX_BASE;
-		if (index < 0) {
-not_found:
-			lua_pushinteger(L, -1);
-			lua_pushnil(L);
-			return 2;
-		}
-		field = tuple_field(tuple, index);
-		if (field == NULL)
-			goto not_found;
-push_value:
-		lua_pushinteger(L, 0);
-		luamp_decode(L, luaL_msgpack_default, &field);
-		return 2;
+		if (index >= 0)
+			field = tuple_field(tuple, index);
 	}
-	assert(lua_isstring(L, 2));
-	size_t path_len;
-	const char *path = lua_tolstring(L, 2, &path_len);
-	struct json_path_parser parser;
-	struct json_path_node node;
-	json_path_parser_create(&parser, path, path_len);
-	int rc = json_path_next(&parser, &node);
-	if (rc != 0 || node.type == JSON_PATH_END)
-		luaL_error(L, "Error in path on position %d", rc);
-	if (node.type == JSON_PATH_NUM) {
-		int index = node.num;
-		if (index == 0)
-			goto not_found;
-		index -= TUPLE_INDEX_BASE;
-		field = tuple_field(tuple, index);
-		if (field == NULL)
-			goto not_found;
-	} else {
-		assert(node.type == JSON_PATH_STR);
-		/* First part of a path is a field name. */
-		const char *name = node.str;
-		uint32_t name_len = node.len;
-		uint32_t name_hash;
-		if (path_len == name_len) {
-			name_hash = lua_hashstring(L, 2);
-		} else {
-			/*
-			 * If a string is "field....", then its
-			 * precalculated juajit hash can not be
-			 * used. A tuple dictionary hashes only
-			 * name, not path.
-			 */
-			name_hash = lua_hash(name, name_len);
-		}
-		field = tuple_field_by_name(tuple, name, name_len, name_hash);
-		if (field == NULL)
-			goto not_found;
-	}
-	while ((rc = json_path_next(&parser, &node)) == 0 &&
-	       node.type != JSON_PATH_END) {
-		if (node.type == JSON_PATH_NUM) {
-			rc = tuple_field_go_to_index(&field, node.num);
-		} else {
-			assert(node.type == JSON_PATH_STR);
-			rc = tuple_field_go_to_key(&field, node.str, node.len);
-		}
-		if (rc != 0)
-			goto not_found;
-	}
-	if (rc == 0)
-		goto push_value;
-	luaL_error(L, "Error in path on position %d", rc);
-	unreachable();
-	goto not_found;
+	/* error code or message */
+	struct error *e = diag_last_error(diag_get());
+	if (field == NULL && e != NULL)
+		lua_pushstring(L, e->errmsg);
+	else
+		lua_pushinteger(L, -1*(field == NULL));
+	if (field)
+		luamp_decode(L, luaL_msgpack_default, &field);
+	else
+		lua_pushnil(L);
+	diag_clear(diag_get());
+	return 2;
 }
 
 static int
diff --git a/src/box/lua/tuple.lua b/src/box/lua/tuple.lua
index b51b4df..3785568 100644
--- a/src/box/lua/tuple.lua
+++ b/src/box/lua/tuple.lua
@@ -6,6 +6,7 @@ local msgpackffi = require('msgpackffi')
 local fun = require('fun')
 local buffer = require('buffer')
 local internal = require('box.internal')
+local log = require('log')
 
 ffi.cdef[[
 /** \cond public */
@@ -302,6 +303,7 @@ ffi.metatype(tuple_t, {
     end;
     __tostring = internal.tuple.tostring;
     __index = function(tuple, key)
+        local rc, field
         if type(key) == "string" or type(key) == "number" then
             -- Try to get a field by json path or by [index]. If
             -- it was not found (rc ~= 0) then return a method
@@ -311,12 +313,13 @@ ffi.metatype(tuple_t, {
             -- returns field value, not tuple_bsize function. To
             -- access hidden methods use
             -- 'box.tuple.<method_name>(T, [args...])'.
-            local rc, field = tuple_field_by_path(tuple, key)
+            rc, field = tuple_field_by_path(tuple, key)
             if rc == 0 then
                 return field
             end
         end
-        return methods[key]
+        local method = methods[key]
+        return method == box.NULL and rc ~= -1 and rc or method
     end;
     __eq = function(tuple_a, tuple_b)
         -- Two tuple are considered equal if they have same memory address
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index e458f49..9c6e3da 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,6 +28,8 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+#include <lib/json/path.h>
+#include <third_party/luajit/src/lua.h>
 #include "tuple_format.h"
 
 /** Global table of tuple formats */
@@ -478,3 +480,156 @@ box_tuple_format_unref(box_tuple_format_t *format)
 {
 	tuple_format_unref(format);
 }
+
+/**
+ * Propagate @a field to MessagePack(field)[index].
+ * @param[in][out] field Field to propagate.
+ * @param index 1-based index to propagate to.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_index(const char **field, uint64_t index)
+{
+	enum mp_type type = mp_typeof(**field);
+	if (type == MP_ARRAY) {
+		if (index == 0)
+			return -1;
+		/* Make index 0-based. */
+		index -= TUPLE_INDEX_BASE;
+		uint32_t count = mp_decode_array(field);
+		if (index >= count)
+			return -1;
+		for (; index > 0; --index)
+			mp_next(field);
+		return 0;
+	} else if (type == MP_MAP) {
+		uint64_t count = mp_decode_map(field);
+		for (; count > 0; --count) {
+			type = mp_typeof(**field);
+			if (type == MP_UINT) {
+				uint64_t value = mp_decode_uint(field);
+				if (value == index)
+					return 0;
+			} else if (type == MP_INT) {
+				int64_t value = mp_decode_int(field);
+				if (value >= 0 && (uint64_t)value == index)
+					return 0;
+			} else {
+				/* Skip key. */
+				mp_next(field);
+			}
+			/* Skip value. */
+			mp_next(field);
+		}
+	}
+	return -1;
+}
+
+/**
+ * Propagate @a field to MessagePack(field)[key].
+ * @param[in][out] field Field to propagate.
+ * @param key Key to propagate to.
+ * @param len Length of @a key.
+ *
+ * @retval  0 Success, the index was found.
+ * @retval -1 Not found.
+ */
+static inline int
+tuple_field_go_to_key(const char **field, const char *key, int len)
+{
+	enum mp_type type = mp_typeof(**field);
+	if (type != MP_MAP)
+		return -1;
+	uint64_t count = mp_decode_map(field);
+	for (; count > 0; --count) {
+		type = mp_typeof(**field);
+		if (type == MP_STR) {
+			uint32_t value_len;
+			const char *value = mp_decode_str(field, &value_len);
+			if (value_len == (uint)len &&
+			    memcmp(value, key, len) == 0)
+				return 0;
+		} else {
+			/* Skip key. */
+			mp_next(field);
+		}
+		/* Skip value. */
+		mp_next(field);
+	}
+	return -1;
+}
+
+const char *
+tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
+                        const uint32_t *field_map,
+                        const char *path,uint32_t path_len, uint32_t path_hash)
+{
+	int rc = 0;
+	const char *field;
+	struct json_path_parser parser;
+	struct json_path_node node;
+	json_path_parser_create(&parser, path, path_len);
+	rc = json_path_next(&parser, &node);
+	if (rc != 0 || node.type == JSON_PATH_END)
+		goto not_found;
+	if (node.type == JSON_PATH_NUM) {
+		int index = node.num;
+		if (index == 0)
+			goto not_found;
+		index -= TUPLE_INDEX_BASE;
+		field = tuple_field_raw(format, tuple,
+		                        field_map, index);
+		if (field == NULL)
+			goto not_found;
+	} else {
+		assert(node.type == JSON_PATH_STR);
+		/* First part of a path is a field name. */
+		const char *name = node.str;
+		uint32_t name_len = node.len;
+		uint32_t name_hash;
+		if (path_len == name_len) {
+			name_hash = path_hash;
+		} else {
+			/*
+			 * If a string is "field....", then its
+			 * precalculated juajit hash can not be
+			 * used. A tuple dictionary hashes only
+			 * name, not path.
+			 */
+			name_hash = lua_hash(name, name_len);
+		}
+		field = tuple_field_raw_by_name(format, tuple,
+		                                field_map,
+		                                name, name_len, name_hash);
+		if (field == NULL)
+			goto not_found;
+	}
+	while ((rc = json_path_next(&parser, &node)) == 0 &&
+	       node.type != JSON_PATH_END) {
+		if (node.type == JSON_PATH_NUM) {
+			rc = tuple_field_go_to_index(&field, node.num);
+		} else {
+			assert(node.type == JSON_PATH_STR);
+			rc = tuple_field_go_to_key(&field, node.str, node.len);
+		}
+		if (rc != 0) {
+			rc = 0; /* prevent error raise */
+			goto not_found;
+		}
+	}
+	if (rc == 0)
+		return field;
+	not_found:
+	/* try to use the whole string as identifier */
+	field = tuple_field_raw_by_name(format, tuple,
+	                                field_map,
+	                                path, path_len, path_hash);
+	if (field)
+		return field;
+	if (rc || path_len == 0)
+		diag_set(IllegalParams, "Error in path on "
+		                        "position %d", rc);
+	return NULL;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d35182d..ce17085 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -377,6 +377,21 @@ tuple_field_raw_by_name(struct tuple_format *format, const char *tuple,
 	return tuple_field_raw(format, tuple, field_map, fieldno);
 }
 
+/**
+ * Get tuple field by its path.
+ * @param Tuple.
+ * @param path Field path.
+ * @param path_len Length of @a path.
+ * @param path_hash Hash of @a path.
+ *
+ * @retval not NULL on field found.
+ * @retval     NULL No field by @a path.
+ */
+const char *
+tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
+                        const uint32_t *field_map,
+                        const char *path,uint32_t path_len, uint32_t path_hash);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/lib/json/path.c b/src/lib/json/path.c
index 4a6174e..f3fcc46 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/path.c
@@ -31,8 +31,10 @@
 
 #include "path.h"
 #include <ctype.h>
+#include <unicode/uchar.h>
 #include "trivia/util.h"
 
+
 /** Same as strtoull(), but with limited length. */
 static inline uint64_t
 strntoull(const char *src, int len) {
@@ -45,6 +47,42 @@ strntoull(const char *src, int len) {
 }
 
 /**
+ * Parse string and update parser's state.
+ * @param parser JSON path parser. Upates pos, signs_read.
+ * @param[out] UChar32 to store result.
+ *
+ * @retval 1 Success.
+ * @retval 0 End of string.
+ * @retval -1 Parse error.
+ */
+static inline int
+json_read_sign(struct json_path_parser *parser, UChar32 *out)
+{
+	if (unlikely(parser->offset == parser->src_len))
+		return 0;
+	UChar32 c;
+	U8_NEXT_OR_FFFD(parser->src, parser->offset, parser->src_len, c)
+	if (c == 0xFFFD)
+		return -1;
+	*out = c;
+	parser->invalid_sign_off += 1;
+	return 1;
+}
+
+/**
+ * Reset parser state to previous one.
+ * @param parser JSON path parser.
+ * @param old parser read offset.
+ * @param signs to drop in signs_read counter.
+ */
+static inline void
+json_reset_pos(struct json_path_parser *parser, int old_offset, int signs)
+{
+	parser->offset = old_offset;
+	parser->invalid_sign_off -= signs;
+}
+
+/**
  * Parse string identifier in quotes. Parser either stops right
  * after the closing quote, or returns an error position.
  * @param parser JSON path parser.
@@ -56,24 +94,26 @@ strntoull(const char *src, int len) {
 static inline int
 json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
 {
-	const char *end = parser->src + parser->src_len;
-	const char *pos = parser->pos;
-	assert(pos < end);
-	char quote_type = *pos;
-	assert(quote_type == '\'' || quote_type == '"');
-	/* Skip first quote. */
-	int len = 0;
-	++pos;
-	const char *str = pos;
-	for (char c = *pos; pos < end && quote_type != c; c = *++pos)
-		++len;
-	/* A string must be terminated with quote. */
-	if (*pos != quote_type || len == 0)
-		return pos - parser->src + 1;
-	/* Skip the closing quote. */
-	parser->pos = pos + 1;
+	assert(parser->offset < parser->src_len);
+	UChar32 quote_type;
+	(void) json_read_sign(parser, &quote_type);
+	assert(quote_type == (UChar32)'\'' || quote_type == (UChar32)'"');
+	int str_offset = parser->offset;
+	UChar32 c = 0;
+	int rc = 0;
+
+	while (((rc = json_read_sign(parser, &c)) > 0)
+	       && c != quote_type);
+	int len = (int)(parser->offset - str_offset - 1);
+	if (rc < 0 || len == 0)
+		return -1;
+	if (c != (UChar32)quote_type) {
+		parser->invalid_sign_off++;
+		return -1;
+	}
+
 	node->type = JSON_PATH_STR;
-	node->str = str;
+	node->str = parser->src + str_offset;
 	node->len = len;
 	return 0;
 }
@@ -81,7 +121,7 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
 /**
  * Parse digit sequence into integer until non-digit is met.
  * Parser stops right after the last digit.
- * @param parser JSON parser.
+ * @param parser JSON parser. Updates signs_read field.
  * @param[out] node JSON node to store result.
  *
  * @retval     0 Success.
@@ -90,27 +130,40 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node)
 static inline int
 json_parse_integer(struct json_path_parser *parser, struct json_path_node *node)
 {
-	const char *end = parser->src + parser->src_len;
-	const char *pos = parser->pos;
-	assert(pos < end);
-	const char *str = pos;
-	int len = 0;
-	for (char c = *pos; pos < end && isdigit(c); c = *++pos)
-		++len;
-	if (len == 0)
-		return pos - parser->src + 1;
-	parser->pos = pos;
+	assert(parser->offset < parser->src_len);
+	int str_offset = parser->offset;
+	int last_offset = str_offset;
+	int len = 0, rc = 0;
+	UChar32 c = 0;
+
+	while (((rc = json_read_sign(parser, &c)) > 0) && u_isdigit(c)) {
+		last_offset = parser->offset;
+		len++;
+	}
+	if (rc > 0 && len > 0 && !u_isdigit(c))
+		json_reset_pos(parser, last_offset, 1);
+	if (rc < 0 || len == 0)
+		return -1;
+
 	node->type = JSON_PATH_NUM;
-	node->num = strntoull(str, len);
+	node->num = strntoull(parser->src + str_offset, len);
 	return 0;
 }
 
+static inline bool
+identifier_valid_sign(UChar32 c)
+{
+	return u_isUAlphabetic(c)
+	       || c == (UChar32)'_'
+	       || u_isdigit(c);
+}
+
 /**
  * Parse identifier out of quotes. It can contain only alphas,
  * digits and underscores. And can not contain digit at the first
  * position. Parser is stoped right after the last non-digit,
  * non-alpha and non-underscore symbol.
- * @param parser JSON parser.
+ * @param parser JSON parser. Updates signs_read field.
  * @param[out] node JSON node to store result.
  *
  * @retval     0 Success.
@@ -120,68 +173,78 @@ static inline int
 json_parse_identifier(struct json_path_parser *parser,
 		      struct json_path_node *node)
 {
-	const char *end = parser->src + parser->src_len;
-	const char *pos = parser->pos;
-	assert(pos < end);
-	const char *str = pos;
-	char c = *pos;
+	assert(parser->offset < parser->src_len);
+	int str_offset = parser->offset;
+	UChar32 c;
+	int rc = 0;
+	if (json_read_sign(parser, &c) < 0)
+		return -1;
 	/* First symbol can not be digit. */
-	if (!isalpha(c) && c != '_')
-		return pos - parser->src + 1;
-	int len = 1;
-	for (c = *++pos; pos < end && (isalpha(c) || c == '_' || isdigit(c));
-	     c = *++pos)
-		++len;
-	assert(len > 0);
-	parser->pos = pos;
+	if (!u_isalpha(c) && c != (UChar32)'_')
+		return -1;
+
+	int last_offset = parser->offset;
+	while ((rc = json_read_sign(parser, &c)) > 0 && identifier_valid_sign(c))
+		last_offset = parser->offset;
+	if (rc > 0 && !identifier_valid_sign(c))
+		json_reset_pos(parser, last_offset, 1);
+	if (rc < 0)
+		return -1;
+
 	node->type = JSON_PATH_STR;
-	node->str = str;
-	node->len = len;
+	node->str = parser->src + str_offset;
+	node->len = parser->offset - str_offset;
 	return 0;
 }
 
+static inline char
+json_curr_char(struct json_path_parser *parser)
+{
+	return *json_path_curr_substring(parser);
+}
+
 int
 json_path_next(struct json_path_parser *parser, struct json_path_node *node)
 {
-	const char *end = parser->src + parser->src_len;
-	if (end == parser->pos) {
+	int end_offset = parser->src_len;
+	if (end_offset == parser->offset) {
 		node->type = JSON_PATH_END;
 		return 0;
 	}
-	char c = *parser->pos;
+	UChar32 c = 0;
+	int last_offset = parser->offset;
+	if (json_read_sign(parser, &c) < 0)
+		return parser->invalid_sign_off;
 	int rc;
 	switch(c) {
-	case '[':
-		++parser->pos;
+	case (UChar32)'[':
 		/* Error for []. */
-		if (parser->pos == end)
-			return parser->pos - parser->src + 1;
-		c = *parser->pos;
+		if (parser->offset == end_offset)
+			return parser->invalid_sign_off;
+		c = json_curr_char(parser);
 		if (c == '"' || c == '\'')
 			rc = json_parse_string(parser, node);
 		else
 			rc = json_parse_integer(parser, node);
 		if (rc != 0)
-			return rc;
+			return parser->invalid_sign_off;
 		/*
 		 * Expression, started from [ must be finished
 		 * with ] regardless of its type.
 		 */
-		if (parser->pos == end || *parser->pos != ']')
-			return parser->pos - parser->src + 1;
+		if (parser->offset == end_offset || json_curr_char(parser) != ']')
+			return parser->invalid_sign_off + 1;
 		/* Skip ]. */
-		++parser->pos;
+		(void) json_read_sign(parser, &c);
 		break;
-	case '.':
-		/* Skip dot. */
-		++parser->pos;
-		if (parser->pos == end)
-			return parser->pos - parser->src + 1;
-		FALLTHROUGH
 	default:
+		if (c != (UChar32)'.')
+			json_reset_pos(parser, last_offset, 1);
+		else if (parser->offset == end_offset)
+			return parser->invalid_sign_off + 1;
 		rc = json_parse_identifier(parser, node);
 		if (rc != 0)
-			return rc;
+			return parser->invalid_sign_off;
 		break;
 	}
 	return 0;
diff --git a/src/lib/json/path.h b/src/lib/json/path.h
index 6e8db4c..15292fb 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/path.h
@@ -45,10 +45,11 @@ extern "C" {
 struct json_path_parser {
 	/** Source string. */
 	const char *src;
-	/** Length of src. */
+	/** Length of string. */
 	int src_len;
-	/** Current parser's position. */
-	const char *pos;
+	/** Current parser's offset. */
+	int offset;
+	int invalid_sign_off;
 };
 
 enum json_path_type {
@@ -78,18 +79,30 @@ struct json_path_node {
 };
 
 /**
- * Create @a parser.
+ * Init @a parser.
  * @param[out] parser Parser to create.
  * @param src Source string.
  * @param src_len Length of @a src.
  */
 static inline void
 json_path_parser_create(struct json_path_parser *parser, const char *src,
-			int src_len)
+                        int src_len)
 {
 	parser->src = src;
 	parser->src_len = src_len;
-	parser->pos = src;
+	parser->offset = 0;
+	parser->invalid_sign_off = 0;
+}
+
+/**
+ * Get current substring of parser.
+ * @param parser Parser.
+ * @retval ptr to substring
+ */
+static inline const char *
+json_path_curr_substring(struct json_path_parser *parser)
+{
+	return parser->src + parser->offset;
 }
 
 /**
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 2d7367a..3a8e828 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -602,7 +602,10 @@ format[2] = {name = 'field2', type = 'array'}
 format[3] = {name = 'field3', type = 'map'}
 ---
 ...
-format[4] = {name = 'field4', type = 'string'}
+format[4] = {name = 'field4', type = 'string' }
+---
+...
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
 ---
 ...
 s = box.schema.space.create('test', {format = format})
@@ -611,13 +614,13 @@ s = box.schema.space.create('test', {format = format})
 pk = s:create_index('pk')
 ---
 ...
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
 ---
 ...
 field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
 ---
 ...
-t = s:replace{1, field2, field3, "123456"}
+t = s:replace{1, field2, field3, "123456", "yes, this"}
 ---
 ...
 t[1]
@@ -626,7 +629,7 @@ t[1]
 ...
 t[2]
 ---
-- [1, 2, 3, '4', [5, 6, 7], {'key': 'key1', 'value': 'value1'}]
+- [1, 2, 3, '4', [5, 6, 7], {'привет中国world': {'中国': 'привет'}, 'key': 'value1', 'value': 'key1'}]
 ...
 t[3]
 ---
@@ -667,19 +670,43 @@ t["[2][5][3]"]
 ...
 t["[2][6].key"]
 ---
-- key1
+- value1
 ...
 t["[2][6].value"]
 ---
-- value1
+- key1
 ...
 t["[2][6]['key']"]
 ---
-- key1
+- value1
 ...
 t["[2][6]['value']"]
 ---
-- value1
+- key1
+...
+t[2][6].привет中国world.中国
+---
+- привет
+...
+t["[2][6].привет中国world"].中国
+---
+- привет
+...
+t["[2][6].привет中国world.中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']"]["中国"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国']"]
+---
+- привет
+...
+t["[2][6]['привет中国world']['中国a']"]
+---
+- yes, this
 ...
 t["[3].k3[2].c"]
 ---
@@ -759,31 +786,31 @@ t["a.b.c d.e.f"]
 -- Sytax errors.
 t[""]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 0'
+- Error in path on position 0
 ...
 t["[2].[5]"]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 5'
+- Error in path on position 5
 ...
 t["[-1]"]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+- Error in path on position 2
 ...
 t[".."]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+- Error in path on position 2
 ...
 t["[["]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 2'
+- Error in path on position 2
 ...
 t["]]"]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
+- Error in path on position 1
 ...
 t["{"]
 ---
-- error: 'builtin/box/tuple.lua:314: Error in path on position 1'
+- Error in path on position 1
 ...
 s:drop()
 ---
diff --git a/test/engine/tuple.test.lua b/test/engine/tuple.test.lua
index ba3482d..90da8b2 100644
--- a/test/engine/tuple.test.lua
+++ b/test/engine/tuple.test.lua
@@ -204,12 +204,13 @@ format = {}
 format[1] = {name = 'field1', type = 'unsigned'}
 format[2] = {name = 'field2', type = 'array'}
 format[3] = {name = 'field3', type = 'map'}
-format[4] = {name = 'field4', type = 'string'}
+format[4] = {name = 'field4', type = 'string' }
+format[5] = {name = "[2][6]['привет中国world']['中国a']", type = 'string'}
 s = box.schema.space.create('test', {format = format})
 pk = s:create_index('pk')
-field2 = {1, 2, 3, "4", {5,6,7}, {key="key1", value="value1"}}
+field2 = {1, 2, 3, "4", {5,6,7}, {привет中国world={中国="привет"}, key="value1", value="key1"}}
 field3 = {[10] = 100, k1 = 100, k2 = {1,2,3}, k3 = { {a=1, b=2}, {c=3, d=4} }, [-1] = 200}
-t = s:replace{1, field2, field3, "123456"}
+t = s:replace{1, field2, field3, "123456", "yes, this"}
 t[1]
 t[2]
 t[3]
@@ -225,6 +226,12 @@ t["[2][6].key"]
 t["[2][6].value"]
 t["[2][6]['key']"]
 t["[2][6]['value']"]
+t[2][6].привет中国world.中国
+t["[2][6].привет中国world"].中国
+t["[2][6].привет中国world.中国"]
+t["[2][6]['привет中国world']"]["中国"]
+t["[2][6]['привет中国world']['中国']"]
+t["[2][6]['привет中国world']['中国a']"]
 t["[3].k3[2].c"]
 t["[4]"]
 t.field1
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index fe8b2d2..667194c 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -130,7 +130,7 @@ add_executable(csv.test csv.c)
 target_link_libraries(csv.test csv)
 
 add_executable(json_path.test json_path.c)
-target_link_libraries(json_path.test json_path unit)
+target_link_libraries(json_path.test json_path unit ${ICU_LIBRARIES})
 
 add_executable(rmean.test rmean.cc)
 target_link_libraries(rmean.test stat unit)
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index 599658b..81ef7fc 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -9,7 +9,7 @@
 	json_path_parser_create(&parser, path, len);
 
 #define is_next_index(value_len, value) \
-	path = parser.pos; \
+	path = json_path_curr_substring(&parser); \
 	is(json_path_next(&parser, &node), 0, "parse <%." #value_len "s>", \
 	   path); \
 	is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", path); \
-- 
2.7.4

  reply	other threads:[~2018-04-05 14:09 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29 14:22 [tarantool-patches] [PATCH v2 0/3] tuple field access via a json path Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 1/3] Introduce json_path_parser Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 2/3] lua: implement json path access to tuple fields Kirill Shcherbatov
2018-03-29 14:22 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support Kirill Shcherbatov
2018-03-29 18:04   ` [tarantool-patches] " Kirill Shcherbatov
2018-03-30 10:24     ` v.shpilevoy
2018-03-30 10:25       ` v.shpilevoy
2018-04-02 19:19       ` Kirill Shcherbatov
2018-04-03 10:20         ` Vladislav Shpilevoy
2018-04-05 14:09           ` Kirill Shcherbatov [this message]
2018-04-05 18:00             ` [tarantool-patches] Re: [PATCH v2 1/1] ICU Unicode support for JSON parser Kirill Shcherbatov
2018-04-05 23:32               ` Vladislav Shpilevoy
2018-04-04 10:37 ` [tarantool-patches] [PATCH v2 3/3] Multibyte characters support ICU Kirill Shcherbatov
2018-04-04 11:30   ` [tarantool-patches] " Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e35b03d5e6f0f78e38ee9db9b18e88bf9fef5f70.1522937317.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH v2 1/1] ICU Unicode support for JSON parser.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox