[tarantool-patches] Re: [PATCH] json: clarify bad syntax error messages
Roman Khabibov
roman.habibov at tarantool.org
Sun Sep 29 19:04:19 MSK 2019
> On Sep 27, 2019, at 22:54, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>
> Hi! Thanks for the patch!
>
>> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
>> index 6b03e391a..0dee2deb9 100644
>> --- a/third_party/lua-cjson/lua_cjson.c
>> +++ b/third_party/lua-cjson/lua_cjson.c
>> @@ -105,11 +107,12 @@ typedef struct {
>> strbuf_t *tmp; /* Temporary storage for strings */
>> struct luaL_serializer *cfg;
>> int current_depth;
>> + int line_count;
>> + const char *cur_line_ptr;
>> } json_parse_t;
>>
>> typedef struct {
>> json_token_type_t type;
>> - int index;
>
> Unfortunately, you can't remove token index. Otherwise
> error messages will report wrong positions:
>
> tarantool> json.decode("{ 100: 200 }")
> ---
> - error: Expected object key string but found T_INT on line 1 at character 6
> ...
>
> Error was on character 3 (begin of '100'), not 6.
@@ -829,9 +837,9 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
else
found = json_token_type_name[token->type];
- /* Note: token->index is 0 based, display starting from 1 */
- luaL_error(l, "Expected %s but found %s at character %d",
- exp, found, token->index + 1);
+ /* Note: token->column_index is 0 based, display starting from 1 */
+ luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
+ found, json->line_count, token->column_index + 1);
}
commit 9f9bd3eb2d064129ff6b1a764140ebef242d7ff7
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date: Wed Sep 18 16:47:48 2019 +0300
json: clarify bad syntax error messages
Count lines in the json parsing structure. It is needed to print
the number of line and column where a mistake was made.
Closes #3316
diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index 10f6f4ab2..2efbe3648 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -22,7 +22,7 @@ end
tap.test("json", function(test)
local serializer = require('json')
- test:plan(32)
+ test:plan(39)
test:test("unsigned", common.test_unsigned, serializer)
test:test("signed", common.test_signed, serializer)
@@ -121,4 +121,34 @@ tap.test("json", function(test)
test:is(serializer.encode(rec4),
'{"a":{"a":null,"b":null},"b":{"a":null,"b":null}}')
serializer.cfg({encode_max_depth = orig_encode_max_depth})
+
+ --
+ -- gh-3316: Make sure that line number is printed in the error
+ -- message.
+ --
+ local _, err_msg
+ _, err_msg = pcall(serializer.decode, 'a{"hello": \n"world"}')
+ test:ok(string.find(err_msg, 'line 1 at character 1') ~= nil,
+ 'mistake on first line')
+ _, err_msg = pcall(serializer.decode, '{"hello": \n"world"a}')
+ test:ok(string.find(err_msg, 'line 2 at character 8') ~= nil,
+ 'mistake on second line')
+ _, err_msg = pcall(serializer.decode, '\n\n\n\n{"hello": "world"a}')
+ test:ok(string.find(err_msg, 'line 5 at character 18') ~= nil,
+ 'mistake on fifth line')
+ serializer.cfg{decode_max_depth = 1}
+ _, err_msg = pcall(serializer.decode,
+ '{"hello": {"world": {"hello": "world"}}}')
+ test:ok(string.find(err_msg, 'line 1 at character 11') ~= nil,
+ 'mistake on first line')
+ _, err_msg = pcall(serializer.decode,
+ '{"hello": \n{"world": {"hello": "world"}}}')
+ test:ok(string.find(err_msg, 'line 2 at character 1') ~= nil,
+ 'mistake on second line')
+ _, err_msg = pcall(serializer.decode, "{ 100: 200 }")
+ test:ok(string.find(err_msg, 'line 1 at character 3') ~= nil,
+ 'mistake on first line')
+ _, err_msg = pcall(serializer.decode, '{"hello": "world",\n 100: 200}')
+ test:ok(string.find(err_msg, 'line 2 at character 2') ~= nil,
+ 'mistake on second line')
end)
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 6b03e391a..1a446c035 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -66,6 +66,7 @@ typedef enum {
T_COMMA,
T_END,
T_WHITESPACE,
+ T_LINEFEED,
T_ERROR,
T_UNKNOWN
} json_token_type_t;
@@ -85,6 +86,7 @@ static const char *json_token_type_name[] = {
"T_COMMA",
"T_END",
"T_WHITESPACE",
+ "T_LINEFEED",
"T_ERROR",
"T_UNKNOWN",
NULL
@@ -105,11 +107,13 @@ typedef struct {
strbuf_t *tmp; /* Temporary storage for strings */
struct luaL_serializer *cfg;
int current_depth;
+ int line_count;
+ const char *cur_line_ptr;
} json_parse_t;
typedef struct {
json_token_type_t type;
- int index;
+ int column_index;
union {
const char *string;
double number;
@@ -195,7 +199,7 @@ static void json_create_tokens()
ch2token['\0'] = T_END;
ch2token[' '] = T_WHITESPACE;
ch2token['\t'] = T_WHITESPACE;
- ch2token['\n'] = T_WHITESPACE;
+ ch2token['\n'] = T_LINEFEED;
ch2token['\r'] = T_WHITESPACE;
/* Update characters that require further processing */
@@ -592,7 +596,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
const char *errtype)
{
token->type = T_ERROR;
- token->index = json->ptr - json->data;
+ token->column_index = json->ptr - json->cur_line_ptr;
token->value.string = errtype;
}
@@ -732,14 +736,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
while (1) {
ch = (unsigned char)*(json->ptr);
token->type = ch2token[ch];
- if (token->type != T_WHITESPACE)
+ if (token->type == T_LINEFEED) {
+ json->line_count++;
+ json->cur_line_ptr = json->ptr + 1;
+ } else if (token->type != T_WHITESPACE) {
break;
+ }
json->ptr++;
}
/* Store location of new token. Required when throwing errors
* for unexpected tokens (syntax errors). */
- token->index = json->ptr - json->data;
+ token->column_index = json->ptr - json->cur_line_ptr;
/* Don't advance the pointer for an error or the end */
if (token->type == T_ERROR) {
@@ -829,9 +837,9 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
else
found = json_token_type_name[token->type];
- /* Note: token->index is 0 based, display starting from 1 */
- luaL_error(l, "Expected %s but found %s at character %d",
- exp, found, token->index + 1);
+ /* Note: token->column_index is 0 based, display starting from 1 */
+ luaL_error(l, "Expected %s but found %s on line %d at character %d", exp,
+ found, json->line_count, token->column_index + 1);
}
static inline void json_decode_ascend(json_parse_t *json)
@@ -849,8 +857,9 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
}
strbuf_free(json->tmp);
- luaL_error(l, "Found too many nested data structures (%d) at character %d",
- json->current_depth, json->ptr - json->data);
+ luaL_error(l, "Found too many nested data structures (%d) on line %d at "
+ "character %d", json->current_depth, json->line_count,
+ json->ptr - json->cur_line_ptr);
}
static void json_parse_object_context(lua_State *l, json_parse_t *json)
@@ -1001,6 +1010,8 @@ static int json_decode(lua_State *l)
json.data = luaL_checklstring(l, 1, &json_len);
json.current_depth = 0;
json.ptr = json.data;
+ json.line_count = 1;
+ json.cur_line_ptr = json.data;
/* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
*
More information about the Tarantool-patches
mailing list