[tarantool-patches] Re: [PATCH v5 2/9] lib: make index_base support for json_lexer

Thu Dec 6 10:56:59 MSK 2018

Introduced a new index_base field for json_lexer class - this
value is a base field offset for emitted JSON_TOKEN_NUM tokens.
Thus, we get rid of the need to perform manual casts using the
TUPLE_INDEX_BASE constant in the majority of cases. This will
also ensure that the extracted tuples are correctly inserted
into the numerical level of JSON tree.

Needed for #1012
---
 src/box/tuple_format.c     | 16 ++++------------
 src/lib/json/json.c        |  4 +++-
 src/lib/json/json.h        | 11 ++++++++++-
 test/engine/tuple.result   |  4 ++--
 test/unit/json_path.c      | 24 +++++++++++++++---------
 test/unit/json_path.result | 21 +++++++++++----------
 6 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 661cfdc94..149248144 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -491,7 +491,7 @@ box_tuple_format_unref(box_tuple_format_t *format)
 /**
  * Propagate @a field to MessagePack(field)[index].
  * @param[in][out] field Field to propagate.
- * @param index 1-based index to propagate to.
+ * @param index 0-based index to propagate to.
  *
  * @retval  0 Success, the index was found.
  * @retval -1 Not found.
@@ -501,10 +501,6 @@ tuple_field_go_to_index(const char **field, uint64_t index)
 {
 	enum mp_type type = mp_typeof(**field);
 	if (type == MP_ARRAY) {
-		if (index == 0)
-			return -1;
-		/* Make index 0-based. */
-		index -= TUPLE_INDEX_BASE;
 		uint32_t count = mp_decode_array(field);
 		if (index >= count)
 			return -1;
@@ -512,6 +508,7 @@ tuple_field_go_to_index(const char **field, uint64_t index)
 			mp_next(field);
 		return 0;
 	} else if (type == MP_MAP) {
+		index += TUPLE_INDEX_BASE;
 		uint64_t count = mp_decode_map(field);
 		for (; count > 0; --count) {
 			type = mp_typeof(**field);
@@ -582,7 +579,7 @@ tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
 	int rc;
 	struct json_lexer lexer;
 	struct json_token token;
-	json_lexer_create(&lexer, path, path_len);
+	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
 		switch (token.type) {
 		case JSON_TOKEN_NUM:
@@ -624,18 +621,13 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 	}
 	struct json_lexer lexer;
 	struct json_token token;
-	json_lexer_create(&lexer, path, path_len);
+	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	int rc = json_lexer_next_token(&lexer, &token);
 	if (rc != 0)
 		goto error;
 	switch(token.type) {
 	case JSON_TOKEN_NUM: {
 		int index = token.num;
-		if (index == 0) {
-			*field = NULL;
-			return 0;
-		}
-		index -= TUPLE_INDEX_BASE;
 		*field = tuple_field_raw(format, tuple, field_map, index);
 		if (*field == NULL)
 			return 0;
diff --git a/src/lib/json/json.c b/src/lib/json/json.c
index eb80e4bbc..81b291127 100644
--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -144,10 +144,12 @@ json_parse_integer(struct json_lexer *lexer, struct json_token *token)
 		value = value * 10 + c - (int)'0';
 		++len;
 	} while (++pos < end && isdigit((c = *pos)));
+	if (value < lexer->index_base)
+		return lexer->symbol_count + 1;
 	lexer->offset += len;
 	lexer->symbol_count += len;
 	token->type = JSON_TOKEN_NUM;
-	token->num = value;
+	token->num = value - lexer->index_base;
 	return 0;
 }
 
diff --git a/src/lib/json/json.h b/src/lib/json/json.h
index ead446878..5c8d973e5 100644
--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -49,6 +49,11 @@ struct json_lexer {
 	int offset;
 	/** Current lexer's offset in symbols. */
 	int symbol_count;
+	/**
+	 * Base field offset for emitted JSON_TOKEN_NUM tokens,
+	 * e.g. 0 for C and 1 for Lua.
+	 */
+	unsigned index_base;
 };
 
 enum json_token_type {
@@ -82,14 +87,18 @@ struct json_token {
  * @param[out] lexer Lexer to create.
  * @param src Source string.
  * @param src_len Length of @a src.
+ * @param index_base Base field offset for emitted JSON_TOKEN_NUM
+ *                   tokens e.g. 0 for C and 1 for Lua.
  */
 static inline void
-json_lexer_create(struct json_lexer *lexer, const char *src, int src_len)
+json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
+		  unsigned index_base)
 {
 	lexer->src = src;
 	lexer->src_len = src_len;
 	lexer->offset = 0;
 	lexer->symbol_count = 0;
+	lexer->index_base = index_base;
 }
 
 /**
diff --git a/test/engine/tuple.result b/test/engine/tuple.result
index 35c700e16..7ca3985c7 100644
--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -823,7 +823,7 @@ t[0]
 ...
 t["[0]"]
 ---
-- null
+- error: Illegal parameters, error in path on position 2
 ...
 t["[1000]"]
 ---
@@ -847,7 +847,7 @@ t["[2][6].key100"]
 ...
 t["[2][0]"] -- 0-based index in array.
 ---
-- null
+- error: Illegal parameters, error in path on position 5
 ...
 t["[4][3]"] -- Can not index string.
 ---
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index a5f90ad98..1d7707ee6 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -3,10 +3,12 @@
 #include "trivia/util.h"
 #include <string.h>
 
+#define TUPLE_INDEX_BASE 1
+
 #define reset_to_new_path(value) \
 	path = value; \
 	len = strlen(value); \
-	json_lexer_create(&lexer, path, len);
+	json_lexer_create(&lexer, path, len, TUPLE_INDEX_BASE);
 
 #define is_next_index(value_len, value) \
 	path = lexer.src + lexer.offset; \
@@ -32,18 +34,18 @@ test_basic()
 	struct json_lexer lexer;
 	struct json_token token;
 
-	reset_to_new_path("[0].field1.field2['field3'][5]");
+	reset_to_new_path("[1].field1.field2['field3'][5]");
 	is_next_index(3, 0);
 	is_next_key("field1");
 	is_next_key("field2");
 	is_next_key("field3");
-	is_next_index(3, 5);
+	is_next_index(3, 4);
 
 	reset_to_new_path("[3].field[2].field")
-	is_next_index(3, 3);
-	is_next_key("field");
 	is_next_index(3, 2);
 	is_next_key("field");
+	is_next_index(3, 1);
+	is_next_key("field");
 
 	reset_to_new_path("[\"f1\"][\"f2'3'\"]");
 	is_next_key("f1");
@@ -57,7 +59,7 @@ test_basic()
 
 	/* Long number. */
 	reset_to_new_path("[1234]");
-	is_next_index(6, 1234);
+	is_next_index(6, 1233);
 
 	/* Empty path. */
 	reset_to_new_path("");
@@ -70,8 +72,8 @@ test_basic()
 
 	/* Unicode. */
 	reset_to_new_path("[2][6]['привет中国world']['中国a']");
-	is_next_index(3, 2);
-	is_next_index(3, 6);
+	is_next_index(3, 1);
+	is_next_index(3, 5);
 	is_next_key("привет中国world");
 	is_next_key("中国a");
 
@@ -94,7 +96,7 @@ void
 test_errors()
 {
 	header();
-	plan(20);
+	plan(21);
 	const char *path;
 	int len;
 	struct json_lexer lexer;
@@ -155,6 +157,10 @@ test_errors()
 	json_lexer_next_token(&lexer, &token);
 	is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier");
 
+	reset_to_new_path("[0]");
+	is(json_lexer_next_token(&lexer, &token), 2,
+	   "invalid token for index_base %d", TUPLE_INDEX_BASE);
+
 	check_plan();
 	footer();
 }
diff --git a/test/unit/json_path.result b/test/unit/json_path.result
index a2a2f829f..ad6f07e5a 100644
--- a/test/unit/json_path.result
+++ b/test/unit/json_path.result
@@ -2,9 +2,9 @@
 1..2
 	*** test_basic ***
     1..71
-    ok 1 - parse <[0]>
-    ok 2 - <[0]> is num
-    ok 3 - <[0]> is 0
+    ok 1 - parse <[1]>
+    ok 2 - <[1]> is num
+    ok 3 - <[1]> is 0
     ok 4 - parse <field1>
     ok 5 - <field1> is str
     ok 6 - len is 6
@@ -19,17 +19,17 @@
     ok 15 - str is field3
     ok 16 - parse <[5]>
     ok 17 - <[5]> is num
-    ok 18 - <[5]> is 5
+    ok 18 - <[5]> is 4
     ok 19 - parse <[3]>
     ok 20 - <[3]> is num
-    ok 21 - <[3]> is 3
+    ok 21 - <[3]> is 2
     ok 22 - parse <field>
     ok 23 - <field> is str
     ok 24 - len is 5
     ok 25 - str is field
     ok 26 - parse <[2]>
     ok 27 - <[2]> is num
-    ok 28 - <[2]> is 2
+    ok 28 - <[2]> is 1
     ok 29 - parse <field>
     ok 30 - <field> is str
     ok 31 - len is 5
@@ -52,7 +52,7 @@
     ok 48 - str is field1
     ok 49 - parse <[1234]>
     ok 50 - <[1234]> is num
-    ok 51 - <[1234]> is 1234
+    ok 51 - <[1234]> is 1233
     ok 52 - parse empty path
     ok 53 - is str
     ok 54 - parse <field1>
@@ -61,10 +61,10 @@
     ok 57 - str is field1
     ok 58 - parse <[2]>
     ok 59 - <[2]> is num
-    ok 60 - <[2]> is 2
+    ok 60 - <[2]> is 1
     ok 61 - parse <[6]>
     ok 62 - <[6]> is num
-    ok 63 - <[6]> is 6
+    ok 63 - <[6]> is 5
     ok 64 - parse <привет中国world>
     ok 65 - <привет中国world> is str
     ok 66 - len is 23
@@ -76,7 +76,7 @@
 ok 1 - subtests
 	*** test_basic: done ***
 	*** test_errors ***
-    1..20
+    1..21
     ok 1 - error on position 2 for <[[>
     ok 2 - error on position 2 for <[field]>
     ok 3 - error on position 1 for <'field1'.field2>
@@ -97,6 +97,7 @@ ok 1 - subtests
     ok 18 - error in leading <.>
     ok 19 - space inside identifier
     ok 20 - tab inside identifier
+    ok 21 - invalid token for index_base 1
 ok 2 - subtests
 	*** test_errors: done ***
 	*** main: done ***
-- 
2.19.2