[tarantool-patches] [PATCH v5 1/9] box: refactor json_path_parser class
Kirill Shcherbatov
kshcherbatov at tarantool.org
Mon Nov 26 15:53:06 MSK 2018
Renamed object json_path_node to json_token and
json_path_parser class to json_lexer.
Need for #1012
---
src/box/lua/tuple.c | 2 +-
src/box/tuple_format.c | 53 +++++------
src/lib/json/CMakeLists.txt | 2 +-
src/lib/json/{path.c => json.c} | 153 ++++++++++++++++----------------
src/lib/json/{path.h => json.h} | 55 ++++++------
test/unit/json_path.c | 56 ++++++------
6 files changed, 160 insertions(+), 161 deletions(-)
rename src/lib/json/{path.c => json.c} (55%)
rename src/lib/json/{path.h => json.h} (70%)
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 65660ce7a..cbe71da18 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -41,7 +41,7 @@
#include "box/tuple.h"
#include "box/tuple_convert.h"
#include "box/errcode.h"
-#include "json/path.h"
+#include "json/json.h"
#include "mpstream.h"
/** {{{ box.tuple Lua library
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 5a2481fd6..661cfdc94 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -28,7 +28,7 @@
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-#include "json/path.h"
+#include "json/json.h"
#include "tuple_format.h"
#include "coll_id_cache.h"
@@ -580,19 +580,19 @@ static int
tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
{
int rc;
- struct json_path_parser parser;
- struct json_path_node node;
- json_path_parser_create(&parser, path, path_len);
- while ((rc = json_path_next(&parser, &node)) == 0) {
- switch (node.type) {
- case JSON_PATH_NUM:
- rc = tuple_field_go_to_index(data, node.num);
+ struct json_lexer lexer;
+ struct json_token token;
+ json_lexer_create(&lexer, path, path_len);
+ while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
+ switch (token.type) {
+ case JSON_TOKEN_NUM:
+ rc = tuple_field_go_to_index(data, token.num);
break;
- case JSON_PATH_STR:
- rc = tuple_field_go_to_key(data, node.str, node.len);
+ case JSON_TOKEN_STR:
+ rc = tuple_field_go_to_key(data, token.str, token.len);
break;
default:
- assert(node.type == JSON_PATH_END);
+ assert(token.type == JSON_TOKEN_END);
return 0;
}
if (rc != 0) {
@@ -622,15 +622,15 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
*field = tuple_field_raw(format, tuple, field_map, fieldno);
return 0;
}
- struct json_path_parser parser;
- struct json_path_node node;
- json_path_parser_create(&parser, path, path_len);
- int rc = json_path_next(&parser, &node);
+ struct json_lexer lexer;
+ struct json_token token;
+ json_lexer_create(&lexer, path, path_len);
+ int rc = json_lexer_next_token(&lexer, &token);
if (rc != 0)
goto error;
- switch(node.type) {
- case JSON_PATH_NUM: {
- int index = node.num;
+ switch(token.type) {
+ case JSON_TOKEN_NUM: {
+ int index = token.num;
if (index == 0) {
*field = NULL;
return 0;
@@ -641,10 +641,10 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
return 0;
break;
}
- case JSON_PATH_STR: {
+ case JSON_TOKEN_STR: {
/* First part of a path is a field name. */
uint32_t name_hash;
- if (path_len == (uint32_t) node.len) {
+ if (path_len == (uint32_t) token.len) {
name_hash = path_hash;
} else {
/*
@@ -653,25 +653,26 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
* used. A tuple dictionary hashes only
* name, not path.
*/
- name_hash = field_name_hash(node.str, node.len);
+ name_hash = field_name_hash(token.str, token.len);
}
*field = tuple_field_raw_by_name(format, tuple, field_map,
- node.str, node.len, name_hash);
+ token.str, token.len,
+ name_hash);
if (*field == NULL)
return 0;
break;
}
default:
- assert(node.type == JSON_PATH_END);
+ assert(token.type == JSON_TOKEN_END);
*field = NULL;
return 0;
}
- rc = tuple_field_go_to_path(field, path + parser.offset,
- path_len - parser.offset);
+ rc = tuple_field_go_to_path(field, path + lexer.offset,
+ path_len - lexer.offset);
if (rc == 0)
return 0;
/* Setup absolute error position. */
- rc += parser.offset;
+ rc += lexer.offset;
error:
assert(rc > 0);
diff --git a/src/lib/json/CMakeLists.txt b/src/lib/json/CMakeLists.txt
index 203fe6f42..0f0739620 100644
--- a/src/lib/json/CMakeLists.txt
+++ b/src/lib/json/CMakeLists.txt
@@ -1,5 +1,5 @@
set(lib_sources
- path.c
+ json.c
)
set_source_files_compile_flags(${lib_sources})
diff --git a/src/lib/json/path.c b/src/lib/json/json.c
similarity index 55%
rename from src/lib/json/path.c
rename to src/lib/json/json.c
index 2e72930a6..eb80e4bbc 100644
--- a/src/lib/json/path.c
+++ b/src/lib/json/json.c
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*/
-#include "path.h"
+#include "json.h"
#include <ctype.h>
#include <stdbool.h>
#include <unicode/uchar.h>
@@ -38,82 +38,82 @@
/**
* Read a single symbol from a string starting from an offset.
- * @param parser JSON path parser.
+ * @param lexer JSON path lexer.
* @param[out] UChar32 Read symbol.
*
* @retval 0 Success.
* @retval > 0 1-based position of a syntax error.
*/
static inline int
-json_read_symbol(struct json_path_parser *parser, UChar32 *out)
+json_read_symbol(struct json_lexer *lexer, UChar32 *out)
{
- if (parser->offset == parser->src_len) {
+ if (lexer->offset == lexer->src_len) {
*out = U_SENTINEL;
- return parser->symbol_count + 1;
+ return lexer->symbol_count + 1;
}
- U8_NEXT(parser->src, parser->offset, parser->src_len, *out);
+ U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out);
if (*out == U_SENTINEL)
- return parser->symbol_count + 1;
- ++parser->symbol_count;
+ return lexer->symbol_count + 1;
+ ++lexer->symbol_count;
return 0;
}
/**
* Rollback one symbol offset.
- * @param parser JSON path parser.
+ * @param lexer JSON path lexer.
* @param offset Offset to the previous symbol.
*/
static inline void
-json_revert_symbol(struct json_path_parser *parser, int offset)
+json_revert_symbol(struct json_lexer *lexer, int offset)
{
- parser->offset = offset;
- --parser->symbol_count;
+ lexer->offset = offset;
+ --lexer->symbol_count;
}
/** Fast forward when it is known that a symbol is 1-byte char. */
static inline void
-json_skip_char(struct json_path_parser *parser)
+json_skip_char(struct json_lexer *lexer)
{
- ++parser->offset;
- ++parser->symbol_count;
+ ++lexer->offset;
+ ++lexer->symbol_count;
}
/** Get a current symbol as a 1-byte char. */
static inline char
-json_current_char(const struct json_path_parser *parser)
+json_current_char(const struct json_lexer *lexer)
{
- return *(parser->src + parser->offset);
+ return *(lexer->src + lexer->offset);
}
/**
- * Parse string identifier in quotes. Parser either stops right
+ * Parse string identifier in quotes. Lexer either stops right
* after the closing quote, or returns an error position.
- * @param parser JSON path parser.
- * @param[out] node JSON node to store result.
+ * @param lexer JSON path lexer.
+ * @param[out] token JSON token to store result.
* @param quote_type Quote by that a string must be terminated.
*
* @retval 0 Success.
* @retval > 0 1-based position of a syntax error.
*/
static inline int
-json_parse_string(struct json_path_parser *parser, struct json_path_node *node,
+json_parse_string(struct json_lexer *lexer, struct json_token *token,
UChar32 quote_type)
{
- assert(parser->offset < parser->src_len);
- assert(quote_type == json_current_char(parser));
+ assert(lexer->offset < lexer->src_len);
+ assert(quote_type == json_current_char(lexer));
/* The first symbol is always char - ' or ". */
- json_skip_char(parser);
- int str_offset = parser->offset;
+ json_skip_char(lexer);
+ int str_offset = lexer->offset;
UChar32 c;
int rc;
- while ((rc = json_read_symbol(parser, &c)) == 0) {
+ while ((rc = json_read_symbol(lexer, &c)) == 0) {
if (c == quote_type) {
- int len = parser->offset - str_offset - 1;
+ int len = lexer->offset - str_offset - 1;
if (len == 0)
- return parser->symbol_count;
- node->type = JSON_PATH_STR;
- node->str = parser->src + str_offset;
- node->len = len;
+ return lexer->symbol_count;
+ token->type = JSON_TOKEN_STR;
+ token->str = lexer->src + str_offset;
+ token->len = len;
return 0;
}
}
@@ -122,32 +122,32 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node,
/**
* Parse digit sequence into integer until non-digit is met.
- * Parser stops right after the last digit.
- * @param parser JSON parser.
- * @param[out] node JSON node to store result.
+ * Lexer stops right after the last digit.
+ * @param lexer JSON lexer.
+ * @param[out] token JSON token to store result.
*
* @retval 0 Success.
* @retval > 0 1-based position of a syntax error.
*/
static inline int
-json_parse_integer(struct json_path_parser *parser, struct json_path_node *node)
+json_parse_integer(struct json_lexer *lexer, struct json_token *token)
{
- const char *end = parser->src + parser->src_len;
- const char *pos = parser->src + parser->offset;
+ const char *end = lexer->src + lexer->src_len;
+ const char *pos = lexer->src + lexer->offset;
assert(pos < end);
int len = 0;
uint64_t value = 0;
char c = *pos;
if (! isdigit(c))
- return parser->symbol_count + 1;
+ return lexer->symbol_count + 1;
do {
value = value * 10 + c - (int)'0';
++len;
} while (++pos < end && isdigit((c = *pos)));
- parser->offset += len;
- parser->symbol_count += len;
- node->type = JSON_PATH_NUM;
- node->num = value;
+ lexer->offset += len;
+ lexer->symbol_count += len;
+ token->type = JSON_TOKEN_NUM;
+ token->num = value;
return 0;
}
@@ -164,81 +164,80 @@ json_is_valid_identifier_symbol(UChar32 c)
/**
* Parse identifier out of quotes. It can contain only alphas,
* digits and underscores. And can not contain digit at the first
- * position. Parser is stoped right after the last non-digit,
+ * position. Lexer is stoped right after the last non-digit,
* non-alpha and non-underscore symbol.
- * @param parser JSON parser.
- * @param[out] node JSON node to store result.
+ * @param lexer JSON lexer.
+ * @param[out] token JSON token to store result.
*
* @retval 0 Success.
* @retval > 0 1-based position of a syntax error.
*/
static inline int
-json_parse_identifier(struct json_path_parser *parser,
- struct json_path_node *node)
+json_parse_identifier(struct json_lexer *lexer, struct json_token *token)
{
- assert(parser->offset < parser->src_len);
- int str_offset = parser->offset;
+ assert(lexer->offset < lexer->src_len);
+ int str_offset = lexer->offset;
UChar32 c;
- int rc = json_read_symbol(parser, &c);
+ int rc = json_read_symbol(lexer, &c);
if (rc != 0)
return rc;
/* First symbol can not be digit. */
if (!u_isalpha(c) && c != (UChar32)'_')
- return parser->symbol_count;
- int last_offset = parser->offset;
- while ((rc = json_read_symbol(parser, &c)) == 0) {
+ return lexer->symbol_count;
+ int last_offset = lexer->offset;
+ while ((rc = json_read_symbol(lexer, &c)) == 0) {
if (! json_is_valid_identifier_symbol(c)) {
- json_revert_symbol(parser, last_offset);
+ json_revert_symbol(lexer, last_offset);
break;
}
- last_offset = parser->offset;
+ last_offset = lexer->offset;
}
- node->type = JSON_PATH_STR;
- node->str = parser->src + str_offset;
- node->len = parser->offset - str_offset;
+ token->type = JSON_TOKEN_STR;
+ token->str = lexer->src + str_offset;
+ token->len = lexer->offset - str_offset;
return 0;
}
int
-json_path_next(struct json_path_parser *parser, struct json_path_node *node)
+json_lexer_next_token(struct json_lexer *lexer, struct json_token *token)
{
- if (parser->offset == parser->src_len) {
- node->type = JSON_PATH_END;
+ if (lexer->offset == lexer->src_len) {
+ token->type = JSON_TOKEN_END;
return 0;
}
UChar32 c;
- int last_offset = parser->offset;
- int rc = json_read_symbol(parser, &c);
+ int last_offset = lexer->offset;
+ int rc = json_read_symbol(lexer, &c);
if (rc != 0)
return rc;
switch(c) {
case (UChar32)'[':
/* Error for '[\0'. */
- if (parser->offset == parser->src_len)
- return parser->symbol_count;
- c = json_current_char(parser);
+ if (lexer->offset == lexer->src_len)
+ return lexer->symbol_count;
+ c = json_current_char(lexer);
if (c == '"' || c == '\'')
- rc = json_parse_string(parser, node, c);
+ rc = json_parse_string(lexer, token, c);
else
- rc = json_parse_integer(parser, node);
+ rc = json_parse_integer(lexer, token);
if (rc != 0)
return rc;
/*
* Expression, started from [ must be finished
* with ] regardless of its type.
*/
- if (parser->offset == parser->src_len ||
- json_current_char(parser) != ']')
- return parser->symbol_count + 1;
+ if (lexer->offset == lexer->src_len ||
+ json_current_char(lexer) != ']')
+ return lexer->symbol_count + 1;
/* Skip ] - one byte char. */
- json_skip_char(parser);
+ json_skip_char(lexer);
return 0;
case (UChar32)'.':
- if (parser->offset == parser->src_len)
- return parser->symbol_count + 1;
- return json_parse_identifier(parser, node);
+ if (lexer->offset == lexer->src_len)
+ return lexer->symbol_count + 1;
+ return json_parse_identifier(lexer, token);
default:
- json_revert_symbol(parser, last_offset);
- return json_parse_identifier(parser, node);
+ json_revert_symbol(lexer, last_offset);
+ return json_parse_identifier(lexer, token);
}
}
diff --git a/src/lib/json/path.h b/src/lib/json/json.h
similarity index 70%
rename from src/lib/json/path.h
rename to src/lib/json/json.h
index c3c381a14..ead446878 100644
--- a/src/lib/json/path.h
+++ b/src/lib/json/json.h
@@ -1,5 +1,5 @@
-#ifndef TARANTOOL_JSON_PATH_H_INCLUDED
-#define TARANTOOL_JSON_PATH_H_INCLUDED
+#ifndef TARANTOOL_JSON_JSON_H_INCLUDED
+#define TARANTOOL_JSON_JSON_H_INCLUDED
/*
* Copyright 2010-2018 Tarantool AUTHORS: please see AUTHORS file.
*
@@ -37,25 +37,25 @@ extern "C" {
#endif
/**
- * Parser for JSON paths:
+ * Lexer for JSON paths:
* <field>, <.field>, <[123]>, <['field']> and their combinations.
*/
-struct json_path_parser {
+struct json_lexer {
/** Source string. */
const char *src;
/** Length of string. */
int src_len;
- /** Current parser's offset in bytes. */
+ /** Current lexer's offset in bytes. */
int offset;
- /** Current parser's offset in symbols. */
+ /** Current lexer's offset in symbols. */
int symbol_count;
};
-enum json_path_type {
- JSON_PATH_NUM,
- JSON_PATH_STR,
- /** Parser reached end of path. */
- JSON_PATH_END,
+enum json_token_type {
+ JSON_TOKEN_NUM,
+ JSON_TOKEN_STR,
+ /** Lexer reached end of path. */
+ JSON_TOKEN_END,
};
/**
@@ -63,8 +63,8 @@ enum json_path_type {
* String idenfiers are in ["..."] and between dots. Numbers are
* indexes in [...].
*/
-struct json_path_node {
- enum json_path_type type;
+struct json_token {
+ enum json_token_type type;
union {
struct {
/** String identifier. */
@@ -78,35 +78,34 @@ struct json_path_node {
};
/**
- * Create @a parser.
- * @param[out] parser Parser to create.
+ * Create @a lexer.
+ * @param[out] lexer Lexer to create.
* @param src Source string.
* @param src_len Length of @a src.
*/
static inline void
-json_path_parser_create(struct json_path_parser *parser, const char *src,
- int src_len)
+json_lexer_create(struct json_lexer *lexer, const char *src, int src_len)
{
- parser->src = src;
- parser->src_len = src_len;
- parser->offset = 0;
- parser->symbol_count = 0;
+ lexer->src = src;
+ lexer->src_len = src_len;
+ lexer->offset = 0;
+ lexer->symbol_count = 0;
}
/**
- * Get a next path node.
- * @param parser Parser.
- * @param[out] node Node to store parsed result.
- * @retval 0 Success. For result see @a node.str, node.len,
- * node.num.
+ * Get a next path token.
+ * @param lexer Lexer.
+ * @param[out] token Token to store parsed result.
+ * @retval 0 Success. For result see @a token.str, token.len,
+ * token.num.
* @retval > 0 Position of a syntax error. A position is 1-based
* and starts from a beginning of a source string.
*/
int
-json_path_next(struct json_path_parser *parser, struct json_path_node *node);
+json_lexer_next_token(struct json_lexer *lexer, struct json_token *token);
#ifdef __cplusplus
}
#endif
-#endif /* TARANTOOL_JSON_PATH_H_INCLUDED */
+#endif /* TARANTOOL_JSON_JSON_H_INCLUDED */
diff --git a/test/unit/json_path.c b/test/unit/json_path.c
index 1d7e7d372..a5f90ad98 100644
--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -1,4 +1,4 @@
-#include "json/path.h"
+#include "json/json.h"
#include "unit.h"
#include "trivia/util.h"
#include <string.h>
@@ -6,21 +6,21 @@
#define reset_to_new_path(value) \
path = value; \
len = strlen(value); \
- json_path_parser_create(&parser, path, len);
+ json_lexer_create(&lexer, path, len);
#define is_next_index(value_len, value) \
- path = parser.src + parser.offset; \
- is(json_path_next(&parser, &node), 0, "parse <%." #value_len "s>", \
+ path = lexer.src + lexer.offset; \
+ is(json_lexer_next_token(&lexer, &token), 0, "parse <%." #value_len "s>", \
path); \
- is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", path); \
- is(node.num, value, "<%." #value_len "s> is " #value, path);
+ is(token.type, JSON_TOKEN_NUM, "<%." #value_len "s> is num", path); \
+ is(token.num, value, "<%." #value_len "s> is " #value, path);
#define is_next_key(value) \
len = strlen(value); \
- is(json_path_next(&parser, &node), 0, "parse <" value ">"); \
- is(node.type, JSON_PATH_STR, "<" value "> is str"); \
- is(node.len, len, "len is %d", len); \
- is(strncmp(node.str, value, len), 0, "str is " value);
+ is(json_lexer_next_token(&lexer, &token), 0, "parse <" value ">"); \
+ is(token.type, JSON_TOKEN_STR, "<" value "> is str"); \
+ is(token.len, len, "len is %d", len); \
+ is(strncmp(token.str, value, len), 0, "str is " value);
void
test_basic()
@@ -29,8 +29,8 @@ test_basic()
plan(71);
const char *path;
int len;
- struct json_path_parser parser;
- struct json_path_node node;
+ struct json_lexer lexer;
+ struct json_token token;
reset_to_new_path("[0].field1.field2['field3'][5]");
is_next_index(3, 0);
@@ -61,8 +61,8 @@ test_basic()
/* Empty path. */
reset_to_new_path("");
- is(json_path_next(&parser, &node), 0, "parse empty path");
- is(node.type, JSON_PATH_END, "is str");
+ is(json_lexer_next_token(&lexer, &token), 0, "parse empty path");
+ is(token.type, JSON_TOKEN_END, "is str");
/* Path with no '.' at the beginning. */
reset_to_new_path("field1.field2");
@@ -81,8 +81,8 @@ test_basic()
#define check_new_path_on_error(value, errpos) \
reset_to_new_path(value); \
- struct json_path_node node; \
- is(json_path_next(&parser, &node), errpos, "error on position %d" \
+ struct json_token token; \
+ is(json_lexer_next_token(&lexer, &token), errpos, "error on position %d" \
" for <%s>", errpos, path);
struct path_and_errpos {
@@ -97,7 +97,7 @@ test_errors()
plan(20);
const char *path;
int len;
- struct json_path_parser parser;
+ struct json_lexer lexer;
const struct path_and_errpos errors[] = {
/* Double [[. */
{"[[", 2},
@@ -133,27 +133,27 @@ test_errors()
for (size_t i = 0; i < lengthof(errors); ++i) {
reset_to_new_path(errors[i].path);
int errpos = errors[i].errpos;
- struct json_path_node node;
- is(json_path_next(&parser, &node), errpos,
+ struct json_token token;
+ is(json_lexer_next_token(&lexer, &token), errpos,
"error on position %d for <%s>", errpos, path);
}
reset_to_new_path("f.[2]")
- struct json_path_node node;
- json_path_next(&parser, &node);
- is(json_path_next(&parser, &node), 3, "can not write <field.[index]>")
+ struct json_token token;
+ json_lexer_next_token(&lexer, &token);
+ is(json_lexer_next_token(&lexer, &token), 3, "can not write <field.[index]>")
reset_to_new_path("f.")
- json_path_next(&parser, &node);
- is(json_path_next(&parser, &node), 3, "error in leading <.>");
+ json_lexer_next_token(&lexer, &token);
+ is(json_lexer_next_token(&lexer, &token), 3, "error in leading <.>");
reset_to_new_path("fiel d1")
- json_path_next(&parser, &node);
- is(json_path_next(&parser, &node), 5, "space inside identifier");
+ json_lexer_next_token(&lexer, &token);
+ is(json_lexer_next_token(&lexer, &token), 5, "space inside identifier");
reset_to_new_path("field\t1")
- json_path_next(&parser, &node);
- is(json_path_next(&parser, &node), 6, "tab inside identifier");
+ json_lexer_next_token(&lexer, &token);
+ is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier");
check_plan();
footer();
--
2.19.2
More information about the Tarantool-patches
mailing list