From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v5 1/9] box: refactor json_path_parser class Date: Mon, 26 Nov 2018 13:49:35 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov List-ID: Renamed object json_path_node to json_token and json_path_parser class to json_lexer. Need for #1012 --- src/box/tuple_format.c | 51 +++++++++-------- src/lib/json/path.c | 151 ++++++++++++++++++++++++------------------------- src/lib/json/path.h | 49 ++++++++-------- test/unit/json_path.c | 54 +++++++++--------- 4 files changed, 152 insertions(+), 153 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 5a2481f..cf05cc8 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -580,19 +580,19 @@ static int tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len) { int rc; - struct json_path_parser parser; - struct json_path_node node; - json_path_parser_create(&parser, path, path_len); - while ((rc = json_path_next(&parser, &node)) == 0) { - switch (node.type) { - case JSON_PATH_NUM: - rc = tuple_field_go_to_index(data, node.num); + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, path, path_len); + while ((rc = json_lexer_next_token(&lexer, &token)) == 0) { + switch (token.type) { + case JSON_TOKEN_NUM: + rc = tuple_field_go_to_index(data, token.num); break; - case JSON_PATH_STR: - rc = tuple_field_go_to_key(data, node.str, node.len); + case JSON_TOKEN_STR: + rc = tuple_field_go_to_key(data, token.str, token.len); break; default: - assert(node.type == JSON_PATH_END); + assert(token.type == JSON_TOKEN_END); return 0; } if (rc != 0) { @@ -622,15 +622,15 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, *field = tuple_field_raw(format, tuple, field_map, fieldno); return 0; } - struct json_path_parser parser; - struct json_path_node node; - json_path_parser_create(&parser, path, path_len); - int rc = json_path_next(&parser, &node); + struct json_lexer lexer; + struct json_token token; + json_lexer_create(&lexer, path, path_len); + int rc = json_lexer_next_token(&lexer, &token); if (rc != 0) goto error; - switch(node.type) { - case JSON_PATH_NUM: { - int index = node.num; + switch(token.type) { + case JSON_TOKEN_NUM: { + int index = token.num; if (index == 0) { *field = NULL; return 0; @@ -641,10 +641,10 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, return 0; break; } - case JSON_PATH_STR: { + case JSON_TOKEN_STR: { /* First part of a path is a field name. */ uint32_t name_hash; - if (path_len == (uint32_t) node.len) { + if (path_len == (uint32_t) token.len) { name_hash = path_hash; } else { /* @@ -653,25 +653,26 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple, * used. A tuple dictionary hashes only * name, not path. */ - name_hash = field_name_hash(node.str, node.len); + name_hash = field_name_hash(token.str, token.len); } *field = tuple_field_raw_by_name(format, tuple, field_map, - node.str, node.len, name_hash); + token.str, token.len, + name_hash); if (*field == NULL) return 0; break; } default: - assert(node.type == JSON_PATH_END); + assert(token.type == JSON_TOKEN_END); *field = NULL; return 0; } - rc = tuple_field_go_to_path(field, path + parser.offset, - path_len - parser.offset); + rc = tuple_field_go_to_path(field, path + lexer.offset, + path_len - lexer.offset); if (rc == 0) return 0; /* Setup absolute error position. */ - rc += parser.offset; + rc += lexer.offset; error: assert(rc > 0); diff --git a/src/lib/json/path.c b/src/lib/json/path.c index 2e72930..dfd7d5c 100644 --- a/src/lib/json/path.c +++ b/src/lib/json/path.c @@ -38,82 +38,82 @@ /** * Read a single symbol from a string starting from an offset. - * @param parser JSON path parser. + * @param lexer JSON path lexer. * @param[out] UChar32 Read symbol. * * @retval 0 Success. * @retval > 0 1-based position of a syntax error. */ static inline int -json_read_symbol(struct json_path_parser *parser, UChar32 *out) +json_read_symbol(struct json_lexer *lexer, UChar32 *out) { - if (parser->offset == parser->src_len) { + if (lexer->offset == lexer->src_len) { *out = U_SENTINEL; - return parser->symbol_count + 1; + return lexer->symbol_count + 1; } - U8_NEXT(parser->src, parser->offset, parser->src_len, *out); + U8_NEXT(lexer->src, lexer->offset, lexer->src_len, *out); if (*out == U_SENTINEL) - return parser->symbol_count + 1; - ++parser->symbol_count; + return lexer->symbol_count + 1; + ++lexer->symbol_count; return 0; } /** * Rollback one symbol offset. - * @param parser JSON path parser. + * @param lexer JSON path lexer. * @param offset Offset to the previous symbol. */ static inline void -json_revert_symbol(struct json_path_parser *parser, int offset) +json_revert_symbol(struct json_lexer *lexer, int offset) { - parser->offset = offset; - --parser->symbol_count; + lexer->offset = offset; + --lexer->symbol_count; } /** Fast forward when it is known that a symbol is 1-byte char. */ static inline void -json_skip_char(struct json_path_parser *parser) +json_skip_char(struct json_lexer *lexer) { - ++parser->offset; - ++parser->symbol_count; + ++lexer->offset; + ++lexer->symbol_count; } /** Get a current symbol as a 1-byte char. */ static inline char -json_current_char(const struct json_path_parser *parser) +json_current_char(const struct json_lexer *lexer) { - return *(parser->src + parser->offset); + return *(lexer->src + lexer->offset); } /** - * Parse string identifier in quotes. Parser either stops right + * Parse string identifier in quotes. Lexer either stops right * after the closing quote, or returns an error position. - * @param parser JSON path parser. - * @param[out] node JSON node to store result. + * @param lexer JSON path lexer. + * @param[out] token JSON token to store result. * @param quote_type Quote by that a string must be terminated. * * @retval 0 Success. * @retval > 0 1-based position of a syntax error. */ static inline int -json_parse_string(struct json_path_parser *parser, struct json_path_node *node, +json_parse_string(struct json_lexer *lexer, struct json_token *token, UChar32 quote_type) { - assert(parser->offset < parser->src_len); - assert(quote_type == json_current_char(parser)); + assert(lexer->offset < lexer->src_len); + assert(quote_type == json_current_char(lexer)); /* The first symbol is always char - ' or ". */ - json_skip_char(parser); - int str_offset = parser->offset; + json_skip_char(lexer); + int str_offset = lexer->offset; UChar32 c; int rc; - while ((rc = json_read_symbol(parser, &c)) == 0) { + while ((rc = json_read_symbol(lexer, &c)) == 0) { if (c == quote_type) { - int len = parser->offset - str_offset - 1; + int len = lexer->offset - str_offset - 1; if (len == 0) - return parser->symbol_count; - node->type = JSON_PATH_STR; - node->str = parser->src + str_offset; - node->len = len; + return lexer->symbol_count; + token->type = JSON_TOKEN_STR; + token->str = lexer->src + str_offset; + token->len = len; return 0; } } @@ -122,32 +122,32 @@ json_parse_string(struct json_path_parser *parser, struct json_path_node *node, /** * Parse digit sequence into integer until non-digit is met. - * Parser stops right after the last digit. - * @param parser JSON parser. - * @param[out] node JSON node to store result. + * Lexer stops right after the last digit. + * @param lexer JSON lexer. + * @param[out] token JSON token to store result. * * @retval 0 Success. * @retval > 0 1-based position of a syntax error. */ static inline int -json_parse_integer(struct json_path_parser *parser, struct json_path_node *node) +json_parse_integer(struct json_lexer *lexer, struct json_token *token) { - const char *end = parser->src + parser->src_len; - const char *pos = parser->src + parser->offset; + const char *end = lexer->src + lexer->src_len; + const char *pos = lexer->src + lexer->offset; assert(pos < end); int len = 0; uint64_t value = 0; char c = *pos; if (! isdigit(c)) - return parser->symbol_count + 1; + return lexer->symbol_count + 1; do { value = value * 10 + c - (int)'0'; ++len; } while (++pos < end && isdigit((c = *pos))); - parser->offset += len; - parser->symbol_count += len; - node->type = JSON_PATH_NUM; - node->num = value; + lexer->offset += len; + lexer->symbol_count += len; + token->type = JSON_TOKEN_NUM; + token->num = value; return 0; } @@ -164,81 +164,80 @@ json_is_valid_identifier_symbol(UChar32 c) /** * Parse identifier out of quotes. It can contain only alphas, * digits and underscores. And can not contain digit at the first - * position. Parser is stoped right after the last non-digit, + * position. Lexer is stoped right after the last non-digit, * non-alpha and non-underscore symbol. - * @param parser JSON parser. - * @param[out] node JSON node to store result. + * @param lexer JSON lexer. + * @param[out] token JSON token to store result. * * @retval 0 Success. * @retval > 0 1-based position of a syntax error. */ static inline int -json_parse_identifier(struct json_path_parser *parser, - struct json_path_node *node) +json_parse_identifier(struct json_lexer *lexer, struct json_token *token) { - assert(parser->offset < parser->src_len); - int str_offset = parser->offset; + assert(lexer->offset < lexer->src_len); + int str_offset = lexer->offset; UChar32 c; - int rc = json_read_symbol(parser, &c); + int rc = json_read_symbol(lexer, &c); if (rc != 0) return rc; /* First symbol can not be digit. */ if (!u_isalpha(c) && c != (UChar32)'_') - return parser->symbol_count; - int last_offset = parser->offset; - while ((rc = json_read_symbol(parser, &c)) == 0) { + return lexer->symbol_count; + int last_offset = lexer->offset; + while ((rc = json_read_symbol(lexer, &c)) == 0) { if (! json_is_valid_identifier_symbol(c)) { - json_revert_symbol(parser, last_offset); + json_revert_symbol(lexer, last_offset); break; } - last_offset = parser->offset; + last_offset = lexer->offset; } - node->type = JSON_PATH_STR; - node->str = parser->src + str_offset; - node->len = parser->offset - str_offset; + token->type = JSON_TOKEN_STR; + token->str = lexer->src + str_offset; + token->len = lexer->offset - str_offset; return 0; } int -json_path_next(struct json_path_parser *parser, struct json_path_node *node) +json_lexer_next_token(struct json_lexer *lexer, struct json_token *token) { - if (parser->offset == parser->src_len) { - node->type = JSON_PATH_END; + if (lexer->offset == lexer->src_len) { + token->type = JSON_TOKEN_END; return 0; } UChar32 c; - int last_offset = parser->offset; - int rc = json_read_symbol(parser, &c); + int last_offset = lexer->offset; + int rc = json_read_symbol(lexer, &c); if (rc != 0) return rc; switch(c) { case (UChar32)'[': /* Error for '[\0'. */ - if (parser->offset == parser->src_len) - return parser->symbol_count; - c = json_current_char(parser); + if (lexer->offset == lexer->src_len) + return lexer->symbol_count; + c = json_current_char(lexer); if (c == '"' || c == '\'') - rc = json_parse_string(parser, node, c); + rc = json_parse_string(lexer, token, c); else - rc = json_parse_integer(parser, node); + rc = json_parse_integer(lexer, token); if (rc != 0) return rc; /* * Expression, started from [ must be finished * with ] regardless of its type. */ - if (parser->offset == parser->src_len || - json_current_char(parser) != ']') - return parser->symbol_count + 1; + if (lexer->offset == lexer->src_len || + json_current_char(lexer) != ']') + return lexer->symbol_count + 1; /* Skip ] - one byte char. */ - json_skip_char(parser); + json_skip_char(lexer); return 0; case (UChar32)'.': - if (parser->offset == parser->src_len) - return parser->symbol_count + 1; - return json_parse_identifier(parser, node); + if (lexer->offset == lexer->src_len) + return lexer->symbol_count + 1; + return json_parse_identifier(lexer, token); default: - json_revert_symbol(parser, last_offset); - return json_parse_identifier(parser, node); + json_revert_symbol(lexer, last_offset); + return json_parse_identifier(lexer, token); } } diff --git a/src/lib/json/path.h b/src/lib/json/path.h index c3c381a..7f41fb4 100644 --- a/src/lib/json/path.h +++ b/src/lib/json/path.h @@ -37,25 +37,25 @@ extern "C" { #endif /** - * Parser for JSON paths: + * Lexer for JSON paths: * , <.field>, <[123]>, <['field']> and their combinations. */ -struct json_path_parser { +struct json_lexer { /** Source string. */ const char *src; /** Length of string. */ int src_len; - /** Current parser's offset in bytes. */ + /** Current lexer's offset in bytes. */ int offset; - /** Current parser's offset in symbols. */ + /** Current lexer's offset in symbols. */ int symbol_count; }; -enum json_path_type { - JSON_PATH_NUM, - JSON_PATH_STR, - /** Parser reached end of path. */ - JSON_PATH_END, +enum json_token_type { + JSON_TOKEN_NUM, + JSON_TOKEN_STR, + /** Lexer reached end of path. */ + JSON_TOKEN_END, }; /** @@ -63,8 +63,8 @@ enum json_path_type { * String idenfiers are in ["..."] and between dots. Numbers are * indexes in [...]. */ -struct json_path_node { - enum json_path_type type; +struct json_token { + enum json_token_type type; union { struct { /** String identifier. */ @@ -78,32 +78,31 @@ struct json_path_node { }; /** - * Create @a parser. - * @param[out] parser Parser to create. + * Create @a lexer. + * @param[out] lexer Lexer to create. * @param src Source string. * @param src_len Length of @a src. */ static inline void -json_path_parser_create(struct json_path_parser *parser, const char *src, - int src_len) +json_lexer_create(struct json_lexer *lexer, const char *src, int src_len) { - parser->src = src; - parser->src_len = src_len; - parser->offset = 0; - parser->symbol_count = 0; + lexer->src = src; + lexer->src_len = src_len; + lexer->offset = 0; + lexer->symbol_count = 0; } /** - * Get a next path node. - * @param parser Parser. - * @param[out] node Node to store parsed result. - * @retval 0 Success. For result see @a node.str, node.len, - * node.num. + * Get a next path token. + * @param lexer Lexer. + * @param[out] token Token to store parsed result. + * @retval 0 Success. For result see @a token.str, token.len, + * token.num. * @retval > 0 Position of a syntax error. A position is 1-based * and starts from a beginning of a source string. */ int -json_path_next(struct json_path_parser *parser, struct json_path_node *node); +json_lexer_next_token(struct json_lexer *lexer, struct json_token *token); #ifdef __cplusplus } diff --git a/test/unit/json_path.c b/test/unit/json_path.c index 1d7e7d3..bb6e5ca 100644 --- a/test/unit/json_path.c +++ b/test/unit/json_path.c @@ -6,21 +6,21 @@ #define reset_to_new_path(value) \ path = value; \ len = strlen(value); \ - json_path_parser_create(&parser, path, len); + json_lexer_create(&lexer, path, len); #define is_next_index(value_len, value) \ - path = parser.src + parser.offset; \ - is(json_path_next(&parser, &node), 0, "parse <%." #value_len "s>", \ + path = lexer.src + lexer.offset; \ + is(json_lexer_next_token(&lexer, &token), 0, "parse <%." #value_len "s>", \ path); \ - is(node.type, JSON_PATH_NUM, "<%." #value_len "s> is num", path); \ - is(node.num, value, "<%." #value_len "s> is " #value, path); + is(token.type, JSON_TOKEN_NUM, "<%." #value_len "s> is num", path); \ + is(token.num, value, "<%." #value_len "s> is " #value, path); #define is_next_key(value) \ len = strlen(value); \ - is(json_path_next(&parser, &node), 0, "parse <" value ">"); \ - is(node.type, JSON_PATH_STR, "<" value "> is str"); \ - is(node.len, len, "len is %d", len); \ - is(strncmp(node.str, value, len), 0, "str is " value); + is(json_lexer_next_token(&lexer, &token), 0, "parse <" value ">"); \ + is(token.type, JSON_TOKEN_STR, "<" value "> is str"); \ + is(token.len, len, "len is %d", len); \ + is(strncmp(token.str, value, len), 0, "str is " value); void test_basic() @@ -29,8 +29,8 @@ test_basic() plan(71); const char *path; int len; - struct json_path_parser parser; - struct json_path_node node; + struct json_lexer lexer; + struct json_token token; reset_to_new_path("[0].field1.field2['field3'][5]"); is_next_index(3, 0); @@ -61,8 +61,8 @@ test_basic() /* Empty path. */ reset_to_new_path(""); - is(json_path_next(&parser, &node), 0, "parse empty path"); - is(node.type, JSON_PATH_END, "is str"); + is(json_lexer_next_token(&lexer, &token), 0, "parse empty path"); + is(token.type, JSON_TOKEN_END, "is str"); /* Path with no '.' at the beginning. */ reset_to_new_path("field1.field2"); @@ -81,8 +81,8 @@ test_basic() #define check_new_path_on_error(value, errpos) \ reset_to_new_path(value); \ - struct json_path_node node; \ - is(json_path_next(&parser, &node), errpos, "error on position %d" \ + struct json_token token; \ + is(json_lexer_next_token(&lexer, &token), errpos, "error on position %d" \ " for <%s>", errpos, path); struct path_and_errpos { @@ -97,7 +97,7 @@ test_errors() plan(20); const char *path; int len; - struct json_path_parser parser; + struct json_lexer lexer; const struct path_and_errpos errors[] = { /* Double [[. */ {"[[", 2}, @@ -133,27 +133,27 @@ test_errors() for (size_t i = 0; i < lengthof(errors); ++i) { reset_to_new_path(errors[i].path); int errpos = errors[i].errpos; - struct json_path_node node; - is(json_path_next(&parser, &node), errpos, + struct json_token token; + is(json_lexer_next_token(&lexer, &token), errpos, "error on position %d for <%s>", errpos, path); } reset_to_new_path("f.[2]") - struct json_path_node node; - json_path_next(&parser, &node); - is(json_path_next(&parser, &node), 3, "can not write ") + struct json_token token; + json_lexer_next_token(&lexer, &token); + is(json_lexer_next_token(&lexer, &token), 3, "can not write ") reset_to_new_path("f.") - json_path_next(&parser, &node); - is(json_path_next(&parser, &node), 3, "error in leading <.>"); + json_lexer_next_token(&lexer, &token); + is(json_lexer_next_token(&lexer, &token), 3, "error in leading <.>"); reset_to_new_path("fiel d1") - json_path_next(&parser, &node); - is(json_path_next(&parser, &node), 5, "space inside identifier"); + json_lexer_next_token(&lexer, &token); + is(json_lexer_next_token(&lexer, &token), 5, "space inside identifier"); reset_to_new_path("field\t1") - json_path_next(&parser, &node); - is(json_path_next(&parser, &node), 6, "tab inside identifier"); + json_lexer_next_token(&lexer, &token); + is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier"); check_plan(); footer(); -- 2.7.4