[tarantool-patches] [PATCH v5 1/9] box: refactor json_path_parser class

Mon Nov 26 15:53:06 MSK 2018

Renamed object json_path_node to json_token and
json_path_parser class to json_lexer.

Need for #1012
---
 src/box/lua/tuple.c             |   2 +-
 src/box/tuple_format.c          |  53 +++++------
 src/lib/json/CMakeLists.txt     |   2 +-
 src/lib/json/{path.c => json.c} | 153 ++++++++++++++++----------------
 src/lib/json/{path.h => json.h} |  55 ++++++------
 test/unit/json_path.c           |  56 ++++++------
 6 files changed, 160 insertions(+), 161 deletions(-)
 rename src/lib/json/{path.c => json.c} (55%)
 rename src/lib/json/{path.h => json.h} (70%)

diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 65660ce7a..cbe71da18 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -41,7 +41,7 @@
 #include "box/tuple.h"
 #include "box/tuple_convert.h"
 #include "box/errcode.h"
-#include "json/path.h"
+#include "json/json.h"
 #include "mpstream.h"
 
 /** {{{ box.tuple Lua library
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 5a2481fd6..661cfdc94 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,7 +28,7 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-#include "json/path.h"
+#include "json/json.h"
 #include "tuple_format.h"
 #include "coll_id_cache.h"
 
@@ -580,19 +580,19 @@ static int
 tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
 {
 	int rc;
-	struct json_path_parser parser;
-	struct json_path_node node;
-	json_path_parser_create(&parser, path, path_len);
-	while ((rc = json_path_next(&parser, &node)) == 0) {
-		switch (node.type) {
-		case JSON_PATH_NUM:
-			rc = tuple_field_go_to_index(data, node.num);
+	struct json_lexer lexer;
+	struct json_token token;
+	json_lexer_create(&lexer, path, path_len);
+	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
+		switch (token.type) {
+		case JSON_TOKEN_NUM:
+			rc = tuple_field_go_to_index(data, token.num);
 			break;
-		case JSON_PATH_STR:
-			rc = tuple_field_go_to_key(data, node.str, node.len);
+		case JSON_TOKEN_STR:
+			rc = tuple_field_go_to_key(data, token.str, token.len);
 			break;
 		default:
-			assert(node.type == JSON_PATH_END);
+			assert(token.type == JSON_TOKEN_END);
 			return 0;
 		}
 		if (rc != 0) {
@@ -622,15 +622,15 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 		*field = tuple_field_raw(format, tuple, field_map, fieldno);
 		return 0;
 	}
-	struct json_path_parser parser;
-	struct json_path_node node;
-	json_path_parser_create(&parser, path, path_len);
-	int rc = json_path_next(&parser, &node);
+	struct json_lexer lexer;
+	struct json_token token;
+	json_lexer_create(&lexer, path, path_len);
+	int rc = json_lexer_next_token(&lexer, &token);
 	if (rc != 0)
 		goto error;
-	switch(node.type) {
-	case JSON_PATH_NUM: {
-		int index = node.num;
+	switch(token.type) {
+	case JSON_TOKEN_NUM: {
+		int index = token.num;
 		if (index == 0) {
 			*field = NULL;
 			return 0;
@@ -641,10 +641,10 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 			return 0;
 		break;
 	}
-	case JSON_PATH_STR: {
+	case JSON_TOKEN_STR: {
 		/* First part of a path is a field name. */
 		uint32_t name_hash;
-		if (path_len == (uint32_t) node.len) {
+		if (path_len == (uint32_t) token.len) {
 			name_hash = path_hash;
 		} else {
 			/*
@@ -653,25 +653,26 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 			 * used. A tuple dictionary hashes only
 			 * name, not path.
 			 */
-			name_hash = field_name_hash(node.str, node.len);
+			name_hash = field_name_hash(token.str, token.len);
 		}
 		*field = tuple_field_raw_by_name(format, tuple, field_map,
-						 node.str, node.len, name_hash);
+						 token.str, token.len,
+						 name_hash);
 		if (*field == NULL)
 			return 0;
 		break;
 	}
 	default:
-		assert(node.type == JSON_PATH_END);
+		assert(token.type == JSON_TOKEN_END);
 		*field = NULL;
 		return 0;
 	}
-	rc = tuple_field_go_to_path(field, path + parser.offset,
-				    path_len - parser.offset);
+	rc = tuple_field_go_to_path(field, path + lexer.offset,
+				    path_len - lexer.offset);
 	if (rc == 0)
 		return 0;
 	/* Setup absolute error position. */
-	rc += parser.offset;
+	rc += lexer.offset;
 
 error:
 	assert(rc > 0);
diff --git a/src/lib/json/CMakeLists.txt b/src/lib/json/CMakeLists.txt
index 203fe6f42..0f0739620 100644
--- a/src/lib/json/CMakeLists.txt
+++ b/src/lib/json/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(lib_sources
-    path.c
+    json.c
 )
 
 set_source_files_compile_flags(${lib_sources})
diff --git a/src/lib/json/path.c b/src/lib/json/json.c
similarity index 55%
rename from src/lib/json/path.c
rename to src/lib/json/json.c
index 2e72930a6..eb80e4bbc 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/json.c
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  */
 
-#include "path.h"
+#include "json.h"
 #include <ctype.h>
 #include <stdbool.h>
 #include <unicode/uchar.h>
@@ -38,82 +38,82 @@
 
 /**
  * Read a single symbol from a string starting from an offset.
- * @param parser JSON path parser.
+ * @param lexer JSON path lexer.
  * @param[out] UChar32 Read symbol.
  *
  * @retval   0 Success.
  * @retval > 0 1-based position of a syntax error.
  */
 static inline int
-json_read_symbol(struct json_path_parser *parser, UChar32 *out)
+json_read_symbol(struct json_lexer *lexer, UChar32 *out)
 {
-	if (parser->offset == parser->src_len) {
+	if (lexer->offset == lexer->src_len) {
 		*out = U_SENTINEL;
-		return parser->symbol_count + 1;
+		return lexer->symbol_count + 1;
 	}
-	U8_NEXT(parser->src, parser->offset, parser->src_len, *out);
+	U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out);
 	if (*out == U_SENTINEL)
-		return parser->symbol_count + 1;
-	++parser->symbol_count;
+		return lexer->symbol_count + 1;
+	++lexer->symbol_count;
 	return 0;
 }
 
 /**
  * Rollback one symbol offset.
- * @param parser JSON path parser.
+ * @param lexer JSON path lexer.
  * @param offset Offset to the previous symbol.
  */
 static inline void
-json_revert_symbol(struct json_path_parser *parser, int offset)
+json_revert_symbol(struct json_lexer *lexer, int offset)
 {
-	parser->offset = offset;
-	--parser->symbol_count;
+	lexer->offset = offset;
+	--lexer->symbol_count;
 }
 
 /** Fast forward when it is known that a symbol is 1-byte char. */
 static inline void
-json_skip_char(struct json_path_parser *parser)
+json_skip_char(struct json_lexer *lexer)
 {
-	++parser->offset;
-	++parser->symbol_count;
+	++lexer->offset;
+	++lexer->symbol_count;
 }
 
 /** Get a current symbol as a 1-byte char. */
 static inline char
-json_current_char(const struct json_path_parser *parser)
+json_current_char(const struct json_lexer *lexer)
 {
-	return *(parser->src + parser->offset);
+	return *(lexer->src + lexer->offset);
 }
 
 /**
- * Parse string identifier in quotes. Parser either stops right
+ * Parse string identifier in quotes. Lexer either stops right
  * after the closing quote, or returns an error position.
- * @param parser JSON path parser.
- * @param[out] node JSON node to store result.
+ * @param lexer JSON path lexer.
+ * @param[out] token JSON token to store result.
  * @param quote_type Quote by that a string must be terminated.
  *
  * @retval   0 Success.
  * @retval > 0 1-based position of a syntax error.
  */
 static inline int
-json_parse_string(struct json_path_parser *parser, struct json_path_node *node,
+json_parse_string(struct json_lexer *lexer, struct json_token *token,
 		  UChar32 quote_type)
 {
-	assert(parser->offset < parser->src_len);
-	assert(quote_type == json_current_char(parser));
+	assert(lexer->offset < lexer->src_len);
+	assert(quote_type == json_current_char(lexer));
 	/* The first symbol is always char  - ' or ". */
-	json_skip_char(parser);
-	int str_offset = parser->offset;
+	json_skip_char(lexer);
+	int str_offset = lexer->offset;
 	UChar32 c;
 	int rc;
-	while ((rc = json_read_symbol(parser, &c)) == 0) {
+	while ((rc = json_read_symbol(lexer, &c)) == 0) {
 		if (c == quote_type) {
-			int len = parser->offset - str_offset - 1;
+			int len = lexer->offset - str_offset - 1;
 			if (len == 0)
-				return parser->symbol_count;
-			node->type = JSON_PATH_STR;
-			node->str = parser->src + str_offset;
-			node->len = len;
+				return lexer->symbol_count;
+			token->type = JSON_TOKEN_STR;
+			token->str = lexer->src + str_offset;
+			token->len = len;
 			return 0;
 		}
 	}
@@ -122,32 +122,32 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node,
 
 /**
  * Parse digit sequence into integer until non-digit is met.
- * Parser stops right after the last digit.
- * @param parser JSON parser.
- * @param[out] node JSON node to store result.
+ * Lexer stops right after the last digit.
+ * @param lexer JSON lexer.
+ * @param[out] token JSON token to store result.
  *
  * @retval   0 Success.
  * @retval > 0 1-based position of a syntax error.
  */
 static inline int
-json_parse_integer(struct json_path_parser *parser, struct json_path_node *node)
+json_parse_integer(struct json_lexer *lexer, struct json_token *token)
 {
-	const char *end = parser->src + parser->src_len;
-	const char *pos = parser->src + parser->offset;
+	const char *end = lexer->src + lexer->src_len;
+	const char *pos = lexer->src + lexer->offset;
 	assert(pos < end);
 	int len = 0;
 	uint64_t value = 0;
 	char c = *pos;
 	if (! isdigit(c))
-		return parser->symbol_count + 1;
+		return lexer->symbol_count + 1;
 	do {
 		value = value * 10 + c - (int)'0';
 		++len;
 	} while (++pos < end && isdigit((c = *pos)));
-	parser->offset += len;
-	parser->symbol_count += len;
-	node->type = JSON_PATH_NUM;
-	node->num = value;
+	lexer->offset += len;
+	lexer->symbol_count += len;
+	token->type = JSON_TOKEN_NUM;
+	token->num = value;
 	return 0;
 }
 
@@ -164,81 +164,80 @@ json_is_valid_identifier_symbol(UChar32 c)
 /**
  * Parse identifier out of quotes. It can contain only alphas,
  * digits and underscores. And can not contain digit at the first
- * position. Parser is stoped right after the last non-digit,
+ * position. Lexer is stoped right after the last non-digit,
  * non-alpha and non-underscore symbol.
- * @param parser JSON parser.
- * @param[out] node JSON node to store result.
+ * @param lexer JSON lexer.
+ * @param[out] token JSON token to store result.
  *
  * @retval   0 Success.
  * @retval > 0 1-based position of a syntax error.
  */
 static inline int
-json_parse_identifier(struct json_path_parser *parser,
-		      struct json_path_node *node)
+json_parse_identifier(struct json_lexer *lexer, struct json_token *token)
 {
-	assert(parser->offset < parser->src_len);
-	int str_offset = parser->offset;
+	assert(lexer->offset < lexer->src_len);
+	int str_offset = lexer->offset;
 	UChar32 c;
-	int rc = json_read_symbol(parser, &c);
+	int rc = json_read_symbol(lexer, &c);
 	if (rc != 0)
 		return rc;
 	/* First symbol can not be digit. */
 	if (!u_isalpha(c) && c != (UChar32)'_')
-		return parser->symbol_count;
-	int last_offset = parser->offset;
-	while ((rc = json_read_symbol(parser, &c)) == 0) {
+		return lexer->symbol_count;
+	int last_offset = lexer->offset;
+	while ((rc = json_read_symbol(lexer, &c)) == 0) {
 		if (! json_is_valid_identifier_symbol(c)) {
-			json_revert_symbol(parser, last_offset);
+			json_revert_symbol(lexer, last_offset);
 			break;
 		}
-		last_offset = parser->offset;
+		last_offset = lexer->offset;
 	}
-	node->type = JSON_PATH_STR;
-	node->str = parser->src + str_offset;
-	node->len = parser->offset - str_offset;
+	token->type = JSON_TOKEN_STR;
+	token->str = lexer->src + str_offset;
+	token->len = lexer->offset - str_offset;
 	return 0;
 }
 
 int
-json_path_next(struct json_path_parser *parser, struct json_path_node *node)
+json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
 {
-	if (parser->offset == parser->src_len) {
-		node->type = JSON_PATH_END;
+	if (lexer->offset == lexer->src_len) {
+		token->type = JSON_TOKEN_END;
 		return 0;
 	}
 	UChar32 c;
-	int last_offset = parser->offset;
-	int rc = json_read_symbol(parser, &c);
+	int last_offset = lexer->offset;
+	int rc = json_read_symbol(lexer, &c);
 	if (rc != 0)
 		return rc;
 	switch(c) {
 	case (UChar32)'[':
 		/* Error for '[\0'. */
-		if (parser->offset == parser->src_len)
-			return parser->symbol_count;
-		c = json_current_char(parser);
+		if (lexer->offset == lexer->src_len)
+			return lexer->symbol_count;
+		c = json_current_char(lexer);
 		if (c == '"' || c == '\'')
-			rc = json_parse_string(parser, node, c);
+			rc = json_parse_string(lexer, token, c);
 		else
-			rc = json_parse_integer(parser, node);
+			rc = json_parse_integer(lexer, token);
 		if (rc != 0)
 			return rc;
 		/*
 		 * Expression, started from [ must be finished
 		 * with ] regardless of its type.
 		 */
-		if (parser->offset == parser->src_len ||
-		    json_current_char(parser) != ']')
-			return parser->symbol_count + 1;
+		if (lexer->offset == lexer->src_len ||
+		    json_current_char(lexer) != ']')
+			return lexer->symbol_count + 1;
 		/* Skip ] - one byte char. */
-		json_skip_char(parser);
+		json_skip_char(lexer);
 		return 0;
 	case (UChar32)'.':
-		if (parser->offset == parser->src_len)
-			return parser->symbol_count + 1;
-		return json_parse_identifier(parser, node);
+		if (lexer->offset == lexer->src_len)
+			return lexer->symbol_count + 1;
+		return json_parse_identifier(lexer, token);
 	default:
-		json_revert_symbol(parser, last_offset);
-		return json_parse_identifier(parser, node);
+		json_revert_symbol(lexer, last_offset);
+		return json_parse_identifier(lexer, token);
 	}
 }
diff --git a/src/lib/json/path.h b/src/lib/json/json.h
similarity index 70%
rename from src/lib/json/path.h
rename to src/lib/json/json.h
index c3c381a14..ead446878 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/json.h
@@ -1,5 +1,5 @@
-#ifndef TARANTOOL_JSON_PATH_H_INCLUDED
-#define TARANTOOL_JSON_PATH_H_INCLUDED
+#ifndef TARANTOOL_JSON_JSON_H_INCLUDED
+#define TARANTOOL_JSON_JSON_H_INCLUDED
 /*
  * Copyright 2010-2018 Tarantool AUTHORS: please see AUTHORS file.
  *
@@ -37,25 +37,25 @@ extern "C" {
 #endif
 
 /**
- * Parser for JSON paths:
+ * Lexer for JSON paths:
  * <field>, <.field>, <[123]>, <['field']> and their combinations.
  */
-struct json_path_parser {
+struct json_lexer {
 	/** Source string. */
 	const char *src;
 	/** Length of string. */
 	int src_len;
-	/** Current parser's offset in bytes. */
+	/** Current lexer's offset in bytes. */
 	int offset;
-	/** Current parser's offset in symbols. */
+	/** Current lexer's offset in symbols. */
 	int symbol_count;
 };
 
-enum json_path_type {
-	JSON_PATH_NUM,
-	JSON_PATH_STR,
-	/** Parser reached end of path. */
-	JSON_PATH_END,
+enum json_token_type {
+	JSON_TOKEN_NUM,
+	JSON_TOKEN_STR,
+	/** Lexer reached end of path. */
+	JSON_TOKEN_END,
 };
 
 /**
@@ -63,8 +63,8 @@ enum json_path_type {
  * String idenfiers are in ["..."] and between dots. Numbers are
  * indexes in [...].
  */
-struct json_path_node {
-	enum json_path_type type;
+struct json_token {
+	enum json_token_type type;
 	union {
 		struct {
 			/** String identifier. */
@@ -78,35 +78,34 @@ struct json_path_node {
 };
 
 /**
- * Create @a parser.
- * @param[out] parser Parser to create.
+ * Create @a lexer.
+ * @param[out] lexer Lexer to create.
  * @param src Source string.
  * @param src_len Length of @a src.
  */
 static inline void
-json_path_parser_create(struct json_path_parser *parser, const char *src,
-                        int src_len)
+json_lexer_create(struct json_lexer *lexer, const char *src, int src_len)
 {
-	parser->src = src;
-	parser->src_len = src_len;
-	parser->offset = 0;
-	parser->symbol_count = 0;
+	lexer->src = src;
+	lexer->src_len = src_len;
+	lexer->offset = 0;
+	lexer->symbol_count = 0;
 }
 
 /**
- * Get a next path node.
- * @param parser Parser.
- * @param[out] node Node to store parsed result.
- * @retval   0 Success. For result see @a node.str, node.len,
- *             node.num.
+ * Get a next path token.
+ * @param lexer Lexer.
+ * @param[out] token Token to store parsed result.
+ * @retval   0 Success. For result see @a token.str, token.len,
+ *             token.num.
  * @retval > 0 Position of a syntax error. A position is 1-based
  *             and starts from a beginning of a source string.
  */
 int
-json_path_next(struct json_path_parser *parser, struct json_path_node *node);
+json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif /* TARANTOOL_JSON_PATH_H_INCLUDED */
+#endif /* TARANTOOL_JSON_JSON_H_INCLUDED */
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index 1d7e7d372..a5f90ad98 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -1,4 +1,4 @@
-#include "json/path.h"
+#include "json/json.h"
 #include "unit.h"
 #include "trivia/util.h"
 #include <string.h>
@@ -6,21 +6,21 @@
 #define reset_to_new_path(value) \
 	path = value; \
 	len = strlen(value); \
-	json_path_parser_create(&parser, path, len);
+	json_lexer_create(&lexer, path, len);
 
 #define is_next_index(value_len, value) \
-	path = parser.src + parser.offset; \
-	is(json_path_next(&parser, &node), 0, "parse <%." #value_len "s>", \
+	path = lexer.src + lexer.offset; \
+	is(json_lexer_next_token(&lexer, &token), 0, "parse <%." #value_len "s>", \
 	   path); \
-	is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", path); \
-	is(node.num, value, "<%." #value_len "s> is " #value, path);
+	is(token.type, JSON_TOKEN_NUM, "<%." #value_len "s> is num", path); \
+	is(token.num, value, "<%." #value_len "s> is " #value, path);
 
 #define is_next_key(value) \
 	len = strlen(value); \
-	is(json_path_next(&parser, &node), 0, "parse <" value ">"); \
-	is(node.type, JSON_PATH_STR, "<" value "> is str"); \
-	is(node.len, len, "len is %d", len); \
-	is(strncmp(node.str, value, len), 0, "str is " value);
+	is(json_lexer_next_token(&lexer, &token), 0, "parse <" value ">"); \
+	is(token.type, JSON_TOKEN_STR, "<" value "> is str"); \
+	is(token.len, len, "len is %d", len); \
+	is(strncmp(token.str, value, len), 0, "str is " value);
 
 void
 test_basic()
@@ -29,8 +29,8 @@ test_basic()
 	plan(71);
 	const char *path;
 	int len;
-	struct json_path_parser parser;
-	struct json_path_node node;
+	struct json_lexer lexer;
+	struct json_token token;
 
 	reset_to_new_path("[0].field1.field2['field3'][5]");
 	is_next_index(3, 0);
@@ -61,8 +61,8 @@ test_basic()
 
 	/* Empty path. */
 	reset_to_new_path("");
-	is(json_path_next(&parser, &node), 0, "parse empty path");
-	is(node.type, JSON_PATH_END, "is str");
+	is(json_lexer_next_token(&lexer, &token), 0, "parse empty path");
+	is(token.type, JSON_TOKEN_END, "is str");
 
 	/* Path with no '.' at the beginning. */
 	reset_to_new_path("field1.field2");
@@ -81,8 +81,8 @@ test_basic()
 
 #define check_new_path_on_error(value, errpos) \
 	reset_to_new_path(value); \
-	struct json_path_node node; \
-	is(json_path_next(&parser, &node), errpos, "error on position %d" \
+	struct json_token token; \
+	is(json_lexer_next_token(&lexer, &token), errpos, "error on position %d" \
 	   " for <%s>", errpos, path);
 
 struct path_and_errpos {
@@ -97,7 +97,7 @@ test_errors()
 	plan(20);
 	const char *path;
 	int len;
-	struct json_path_parser parser;
+	struct json_lexer lexer;
 	const struct path_and_errpos errors[] = {
 		/* Double [[. */
 		{"[[", 2},
@@ -133,27 +133,27 @@ test_errors()
 	for (size_t i = 0; i < lengthof(errors); ++i) {
 		reset_to_new_path(errors[i].path);
 		int errpos = errors[i].errpos;
-		struct json_path_node node;
-		is(json_path_next(&parser, &node), errpos,
+		struct json_token token;
+		is(json_lexer_next_token(&lexer, &token), errpos,
 		   "error on position %d for <%s>", errpos, path);
 	}
 
 	reset_to_new_path("f.[2]")
-	struct json_path_node node;
-	json_path_next(&parser, &node);
-	is(json_path_next(&parser, &node), 3, "can not write <field.[index]>")
+	struct json_token token;
+	json_lexer_next_token(&lexer, &token);
+	is(json_lexer_next_token(&lexer, &token), 3, "can not write <field.[index]>")
 
 	reset_to_new_path("f.")
-	json_path_next(&parser, &node);
-	is(json_path_next(&parser, &node), 3, "error in leading <.>");
+	json_lexer_next_token(&lexer, &token);
+	is(json_lexer_next_token(&lexer, &token), 3, "error in leading <.>");
 
 	reset_to_new_path("fiel d1")
-	json_path_next(&parser, &node);
-	is(json_path_next(&parser, &node), 5, "space inside identifier");
+	json_lexer_next_token(&lexer, &token);
+	is(json_lexer_next_token(&lexer, &token), 5, "space inside identifier");
 
 	reset_to_new_path("field\t1")
-	json_path_next(&parser, &node);
-	is(json_path_next(&parser, &node), 6, "tab inside identifier");
+	json_lexer_next_token(&lexer, &token);
+	is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier");
 
 	check_plan();
 	footer();
-- 
2.19.2