* [Tarantool-patches] [PATCH 0/2] Improve json error message. @ 2020-10-08 21:59 Roman Khabibov 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 1/2] json: make error messages more readable Roman Khabibov ` (3 more replies) 0 siblings, 4 replies; 13+ messages in thread From: Roman Khabibov @ 2020-10-08 21:59 UTC (permalink / raw) To: tarantool-patches I already have LGTM from Vlad. Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err Issue: https://github.com/tarantool/tarantool/issues/4339 Roman Khabibov (2): json: make error messages more readable json: print context in error mesages test/app-tap/json.test.lua | 50 +++++++++++++++- third_party/lua-cjson/lua_cjson.c | 97 +++++++++++++++++++++++-------- 2 files changed, 122 insertions(+), 25 deletions(-) -- 2.24.3 (Apple Git-128) ^ permalink raw reply [flat|nested] 13+ messages in thread
* [Tarantool-patches] [PATCH 1/2] json: make error messages more readable 2020-10-08 21:59 [Tarantool-patches] [PATCH 0/2] Improve json error message Roman Khabibov @ 2020-10-08 21:59 ` Roman Khabibov 2020-10-12 8:52 ` Leonid Vasiliev 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 2/2] json: print context in error mesages Roman Khabibov ` (2 subsequent siblings) 3 siblings, 1 reply; 13+ messages in thread From: Roman Khabibov @ 2020-10-08 21:59 UTC (permalink / raw) To: tarantool-patches Print tokens themselves instead of token names "T_*" in the error messages. Part of #4339 --- test/app-tap/json.test.lua | 32 +++++++++++++++++++++++++- third_party/lua-cjson/lua_cjson.c | 38 +++++++++++++++---------------- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index fadfc74ec..6d511e686 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end tap.test("json", function(test) local serializer = require('json') - test:plan(40) + test:plan(51) test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) @@ -154,4 +154,34 @@ tap.test("json", function(test) _, err_msg = pcall(serializer.decode, '{"hello": "world",\n 100: 200}') test:ok(string.find(err_msg, 'line 2 at character 2') ~= nil, 'mistake on second line') + + -- + -- gh-4339: Make sure that tokens 'T_*' are absent in error + -- messages and a context is printed. + -- + _, err_msg = pcall(serializer.decode, '{{: "world"}') + test:ok(string.find(err_msg, '\'{\'') ~= nil, '"{" instead of T_OBJ_BEGIN') + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') + test:ok(string.find(err_msg, '\'}\'') ~= nil, '"}" instead of T_OBJ_END') + _, err_msg = pcall(serializer.decode, '{[: "world"}') + test:ok(string.find(err_msg, '\'[\'', 1, true) ~= nil, + '"[" instead of T_ARR_BEGIN') + _, err_msg = pcall(serializer.decode, '{]: "world"}') + test:ok(string.find(err_msg, '\']\'', 1, true) ~= nil, + '"]" instead of T_ARR_END') + _, err_msg = pcall(serializer.decode, '{1: "world"}') + test:ok(string.find(err_msg, 'int') ~= nil, 'int instead of T_INT') + _, err_msg = pcall(serializer.decode, '{1.0: "world"}') + test:ok(string.find(err_msg, 'number') ~= nil, 'number instead of T_NUMBER') + _, err_msg = pcall(serializer.decode, '{true: "world"}') + test:ok(string.find(err_msg, 'boolean') ~= nil, + 'boolean instead of T_BOOLEAN') + _, err_msg = pcall(serializer.decode, '{null: "world"}') + test:ok(string.find(err_msg, 'null') ~= nil, 'null instead of T_NULL') + _, err_msg = pcall(serializer.decode, '{:: "world"}') + test:ok(string.find(err_msg, 'colon') ~= nil, 'colon instead of T_COLON') + _, err_msg = pcall(serializer.decode, '{,: "world"}') + test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA') + _, err_msg = pcall(serializer.decode, '{') + test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') end) diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c index d4b89ce0d..33cf30577 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -75,23 +75,23 @@ typedef enum { } json_token_type_t; static const char *json_token_type_name[] = { - "T_OBJ_BEGIN", - "T_OBJ_END", - "T_ARR_BEGIN", - "T_ARR_END", - "T_STRING", - "T_UINT", - "T_INT", - "T_NUMBER", - "T_BOOLEAN", - "T_NULL", - "T_COLON", - "T_COMMA", - "T_END", - "T_WHITESPACE", - "T_LINEFEED", - "T_ERROR", - "T_UNKNOWN", + "'{'", + "'}'", + "'['", + "']'", + "string", + "unsigned int", + "int", + "number", + "boolean", + "null", + "colon", + "comma", + "end", + "whitespace", + "line feed", + "error", + "unknown symbol", NULL }; @@ -920,7 +920,7 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json) } if (token.type != T_COMMA) - json_throw_parse_error(l, json, "comma or object end", &token); + json_throw_parse_error(l, json, "comma or '}'", &token); json_next_token(json, &token); } @@ -960,7 +960,7 @@ static void json_parse_array_context(lua_State *l, json_parse_t *json) } if (token.type != T_COMMA) - json_throw_parse_error(l, json, "comma or array end", &token); + json_throw_parse_error(l, json, "comma or ']'", &token); json_next_token(json, &token); } -- 2.24.3 (Apple Git-128) ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/2] json: make error messages more readable 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 1/2] json: make error messages more readable Roman Khabibov @ 2020-10-12 8:52 ` Leonid Vasiliev 0 siblings, 0 replies; 13+ messages in thread From: Leonid Vasiliev @ 2020-10-12 8:52 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thank you for the patch. LGTM. ^ permalink raw reply [flat|nested] 13+ messages in thread
* [Tarantool-patches] [PATCH 2/2] json: print context in error mesages 2020-10-08 21:59 [Tarantool-patches] [PATCH 0/2] Improve json error message Roman Khabibov 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 1/2] json: make error messages more readable Roman Khabibov @ 2020-10-08 21:59 ` Roman Khabibov 2020-10-12 9:20 ` Leonid Vasiliev 2020-10-12 8:51 ` [Tarantool-patches] [PATCH 0/2] Improve json error message Leonid Vasiliev 2020-11-26 9:46 ` Kirill Yukhin 3 siblings, 1 reply; 13+ messages in thread From: Roman Khabibov @ 2020-10-08 21:59 UTC (permalink / raw) To: tarantool-patches Context is just a string with a few characters before and after wrong token, wrong token itself and a symbolic arrow pointing to this token. Closes #4339 --- test/app-tap/json.test.lua | 20 ++++++++++- third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index 6d511e686..70e9f6cf7 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end tap.test("json", function(test) local serializer = require('json') - test:plan(51) + test:plan(57) test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) @@ -184,4 +184,22 @@ tap.test("json", function(test) test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA') _, err_msg = pcall(serializer.decode, '{') test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') + + -- + -- gh-4339: Make sure that context is printed. + -- + _, err_msg = pcall(serializer.decode, '{{: "world"}') + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') + _, err_msg = pcall(serializer.decode, '{1: "world"}') + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') + _, err_msg = pcall(serializer.decode, '{') + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') + _, err_msg = pcall(serializer.decode, '}') + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') + serializer.cfg{decode_max_depth = 1} + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, 'context #6') + end) diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c index 33cf30577..f9e06172d 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json, json_token_t *token) json_set_token_error(token, json, "invalid token"); } +enum context_length { + CONTEXT_ARROW_LENGTH = 4, + CONTEXT_MAX_LENGTH_BEFORE = 8, + CONTEXT_MAX_LENGTH_AFTER = 8, + CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER + + CONTEXT_ARROW_LENGTH, +}; + +/** + * Copy characters near wrong token with the position @a + * column_index to a static string buffer @a context and lay out + * arrow " >> " before this token. + * + * @param context String static buffer to fill. + * @param json Structure with pointers to parsing string. + * @param column_index Position of wrong token in the current + * line. + */ +static void fill_context(char *context, json_parse_t *json, int column_index) +{ + assert(column_index >= 0); + int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ? + column_index : CONTEXT_MAX_LENGTH_BEFORE; + const char *src = json->cur_line_ptr + column_index - length_before; + /* Fill context before the arrow. */ + memcpy(context, src, length_before); + context += length_before; + src += length_before; + + /* Make the arrow. */ + *(context++) = ' '; + memset(context, '>', CONTEXT_ARROW_LENGTH - 2); + context += CONTEXT_ARROW_LENGTH - 2; + *(context++) = ' '; + + /* Fill context after the arrow. */ + const char *end = context + CONTEXT_MAX_LENGTH_AFTER; + for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context) + *context = *src; + *context = '\0'; +} + /* This function does not return. * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. * The only supported exception is the temporary parser string @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json, else found = json_token_type_name[token->type]; + int column_index = token->column_index; + char context[CONTEXT_MAX_LENGTH + 1]; + fill_context(context, json, column_index); + /* Note: token->column_index is 0 based, display starting from 1 */ - luaL_error(l, "Expected %s but found %s on line %d at character %d", exp, - found, json->line_count, token->column_index + 1); + luaL_error(l, "Expected %s but found %s on line %d at character %d here " + "'%s'", exp, found, json->line_count, column_index + 1, context); } static inline void json_decode_ascend(json_parse_t *json) @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots) return; } + char context[CONTEXT_MAX_LENGTH + 1]; + fill_context(context, json, json->ptr - json->cur_line_ptr - 1); + strbuf_free(json->tmp); - luaL_error(l, "Found too many nested data structures (%d) on line %d at " - "character %d", json->current_depth, json->line_count, - json->ptr - json->cur_line_ptr); + luaL_error(l, "Found too many nested data structures (%d) on line %d at cha" + "racter %d here '%s'", json->current_depth, json->line_count, + json->ptr - json->cur_line_ptr, context); } static void json_parse_object_context(lua_State *l, json_parse_t *json) -- 2.24.3 (Apple Git-128) ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 2/2] json: print context in error mesages 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 2/2] json: print context in error mesages Roman Khabibov @ 2020-10-12 9:20 ` Leonid Vasiliev 2020-11-03 9:53 ` roman 0 siblings, 1 reply; 13+ messages in thread From: Leonid Vasiliev @ 2020-10-12 9:20 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thank you for the patch. I have two small questions: On 09.10.2020 00:59, Roman Khabibov wrote: > Context is just a string with a few characters before and after > wrong token, wrong token itself and a symbolic arrow pointing to > this token. > > Closes #4339Hi > --- > test/app-tap/json.test.lua | 20 ++++++++++- > third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++--- > 2 files changed, 73 insertions(+), 6 deletions(-) > > diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua > index 6d511e686..70e9f6cf7 100755 > --- a/test/app-tap/json.test.lua > +++ b/test/app-tap/json.test.lua > @@ -22,7 +22,7 @@ end > > tap.test("json", function(test) > local serializer = require('json') > - test:plan(51) > + test:plan(57) > > test:test("unsigned", common.test_unsigned, serializer) > test:test("signed", common.test_signed, serializer) > @@ -184,4 +184,22 @@ tap.test("json", function(test) > test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA') > _, err_msg = pcall(serializer.decode, '{') > test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') > + > + -- > + -- gh-4339: Make sure that context is printed. > + -- > + _, err_msg = pcall(serializer.decode, '{{: "world"}') > + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') > + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') > + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') > + _, err_msg = pcall(serializer.decode, '{1: "world"}') > + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') > + _, err_msg = pcall(serializer.decode, '{') > + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') > + _, err_msg = pcall(serializer.decode, '}') > + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') > + serializer.cfg{decode_max_depth = 1} > + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') > + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, 'context #6') > + > end) > diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c > index 33cf30577..f9e06172d 100644 > --- a/third_party/lua-cjson/lua_cjson.c > +++ b/third_party/lua-cjson/lua_cjson.c > @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json, json_token_t *token) > json_set_token_error(token, json, "invalid token"); > } > > +enum context_length { > + CONTEXT_ARROW_LENGTH = 4, > + CONTEXT_MAX_LENGTH_BEFORE = 8, > + CONTEXT_MAX_LENGTH_AFTER = 8, > + CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + CONTEXT_MAX_LENGTH_AFTER + > + CONTEXT_ARROW_LENGTH, > +}; > + > +/** > + * Copy characters near wrong token with the position @a > + * column_index to a static string buffer @a context and lay out > + * arrow " >> " before this token. > + * > + * @param context String static buffer to fill. > + * @param json Structure with pointers to parsing string. > + * @param column_index Position of wrong token in the current > + * line. > + */ > +static void fill_context(char *context, json_parse_t *json, int column_index) > +{ > + assert(column_index >= 0); > + int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ? > + column_index : CONTEXT_MAX_LENGTH_BEFORE; > + const char *src = json->cur_line_ptr + column_index - length_before; > + /* Fill context before the arrow. */ > + memcpy(context, src, length_before); > + context += length_before; > + src += length_before; > + > + /* Make the arrow. */ > + *(context++) = ' '; > + memset(context, '>', CONTEXT_ARROW_LENGTH - 2); > + context += CONTEXT_ARROW_LENGTH - 2; > + *(context++) = ' '; > + > + /* Fill context after the arrow. */ > + const char *end = context + CONTEXT_MAX_LENGTH_AFTER; > + for (; context < end && *src != '\0' && *src != '\n'; ++src, ++context) > + *context = *src; > + *context = '\0'; > +} > + > /* This function does not return. > * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. > * The only supported exception is the temporary parser string > @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json, > else > found = json_token_type_name[token->type]; > > + int column_index = token->column_index; > + char context[CONTEXT_MAX_LENGTH + 1]; > + fill_context(context, json, column_index); What for you using the additional variable "column_index"? Maybe just "fill_context(context, json, token->column_index)". > + > /* Note: token->column_index is 0 based, display starting from 1 */ > - luaL_error(l, "Expected %s but found %s on line %d at character %d", exp, > - found, json->line_count, token->column_index + 1); > + luaL_error(l, "Expected %s but found %s on line %d at character %d here " > + "'%s'", exp, found, json->line_count, column_index + 1, context); > } > > static inline void json_decode_ascend(json_parse_t *json) > @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots) > return; > } > > + char context[CONTEXT_MAX_LENGTH + 1]; > + fill_context(context, json, json->ptr - json->cur_line_ptr - 1); Typically, the code uses "token-> column_index = json-> ptr - json-> cur_line_ptr;". Why do you use "json->ptr - json->cur_line_ptr - 1"? > + > strbuf_free(json->tmp); > - luaL_error(l, "Found too many nested data structures (%d) on line %d at " > - "character %d", json->current_depth, json->line_count, > - json->ptr - json->cur_line_ptr); > + luaL_error(l, "Found too many nested data structures (%d) on line %d at cha" > + "racter %d here '%s'", json->current_depth, json->line_count, > + json->ptr - json->cur_line_ptr, context); > } > > static void json_parse_object_context(lua_State *l, json_parse_t *json) > ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 2/2] json: print context in error mesages 2020-10-12 9:20 ` Leonid Vasiliev @ 2020-11-03 9:53 ` roman 2020-11-09 21:21 ` Leonid Vasiliev 0 siblings, 1 reply; 13+ messages in thread From: roman @ 2020-11-03 9:53 UTC (permalink / raw) To: Leonid Vasiliev, tarantool-patches Hi! Thanks for the review. On 12.10.2020 12:20, Leonid Vasiliev wrote: > Hi! Thank you for the patch. > I have two small questions: > > On 09.10.2020 00:59, Roman Khabibov wrote: >> Context is just a string with a few characters before and after >> wrong token, wrong token itself and a symbolic arrow pointing to >> this token. >> >> Closes #4339Hi >> --- >> test/app-tap/json.test.lua | 20 ++++++++++- >> third_party/lua-cjson/lua_cjson.c | 59 ++++++++++++++++++++++++++++--- >> 2 files changed, 73 insertions(+), 6 deletions(-) >> >> diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua >> index 6d511e686..70e9f6cf7 100755 >> --- a/test/app-tap/json.test.lua >> +++ b/test/app-tap/json.test.lua >> @@ -22,7 +22,7 @@ end >> tap.test("json", function(test) >> local serializer = require('json') >> - test:plan(51) >> + test:plan(57) >> test:test("unsigned", common.test_unsigned, serializer) >> test:test("signed", common.test_signed, serializer) >> @@ -184,4 +184,22 @@ tap.test("json", function(test) >> test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of >> T_COMMA') >> _, err_msg = pcall(serializer.decode, '{') >> test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of >> T_END') >> + >> + -- >> + -- gh-4339: Make sure that context is printed. >> + -- >> + _, err_msg = pcall(serializer.decode, '{{: "world"}') >> + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') >> + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') >> + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') >> + _, err_msg = pcall(serializer.decode, '{1: "world"}') >> + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') >> + _, err_msg = pcall(serializer.decode, '{') >> + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') >> + _, err_msg = pcall(serializer.decode, '}') >> + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') >> + serializer.cfg{decode_max_depth = 1} >> + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') >> + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, >> 'context #6') >> + >> end) >> diff --git a/third_party/lua-cjson/lua_cjson.c >> b/third_party/lua-cjson/lua_cjson.c >> index 33cf30577..f9e06172d 100644 >> --- a/third_party/lua-cjson/lua_cjson.c >> +++ b/third_party/lua-cjson/lua_cjson.c >> @@ -831,6 +831,48 @@ static void json_next_token(json_parse_t *json, >> json_token_t *token) >> json_set_token_error(token, json, "invalid token"); >> } >> +enum context_length { >> + CONTEXT_ARROW_LENGTH = 4, >> + CONTEXT_MAX_LENGTH_BEFORE = 8, >> + CONTEXT_MAX_LENGTH_AFTER = 8, >> + CONTEXT_MAX_LENGTH = CONTEXT_MAX_LENGTH_BEFORE + >> CONTEXT_MAX_LENGTH_AFTER + >> + CONTEXT_ARROW_LENGTH, >> +}; >> + >> +/** >> + * Copy characters near wrong token with the position @a >> + * column_index to a static string buffer @a context and lay out >> + * arrow " >> " before this token. >> + * >> + * @param context String static buffer to fill. >> + * @param json Structure with pointers to parsing string. >> + * @param column_index Position of wrong token in the current >> + * line. >> + */ >> +static void fill_context(char *context, json_parse_t *json, int >> column_index) >> +{ >> + assert(column_index >= 0); >> + int length_before = column_index < CONTEXT_MAX_LENGTH_BEFORE ? >> + column_index : CONTEXT_MAX_LENGTH_BEFORE; >> + const char *src = json->cur_line_ptr + column_index - >> length_before; >> + /* Fill context before the arrow. */ >> + memcpy(context, src, length_before); >> + context += length_before; >> + src += length_before; >> + >> + /* Make the arrow. */ >> + *(context++) = ' '; >> + memset(context, '>', CONTEXT_ARROW_LENGTH - 2); >> + context += CONTEXT_ARROW_LENGTH - 2; >> + *(context++) = ' '; >> + >> + /* Fill context after the arrow. */ >> + const char *end = context + CONTEXT_MAX_LENGTH_AFTER; >> + for (; context < end && *src != '\0' && *src != '\n'; ++src, >> ++context) >> + *context = *src; >> + *context = '\0'; >> +} >> + >> /* This function does not return. >> * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. >> * The only supported exception is the temporary parser string >> @@ -849,9 +891,13 @@ static void json_throw_parse_error(lua_State *l, >> json_parse_t *json, >> else >> found = json_token_type_name[token->type]; >> + int column_index = token->column_index; >> + char context[CONTEXT_MAX_LENGTH + 1]; >> + fill_context(context, json, column_index); > > What for you using the additional variable "column_index"? Maybe just > "fill_context(context, json, token->column_index)". Yes. Fixed. >> + >> /* Note: token->column_index is 0 based, display starting from >> 1 */ >> - luaL_error(l, "Expected %s but found %s on line %d at character >> %d", exp, >> - found, json->line_count, token->column_index + 1); >> + luaL_error(l, "Expected %s but found %s on line %d at character >> %d here " >> + "'%s'", exp, found, json->line_count, column_index + >> 1, context); >> } >> static inline void json_decode_ascend(json_parse_t *json) >> @@ -868,10 +914,13 @@ static void json_decode_descend(lua_State *l, >> json_parse_t *json, int slots) >> return; >> } >> + char context[CONTEXT_MAX_LENGTH + 1]; >> + fill_context(context, json, json->ptr - json->cur_line_ptr - 1); > > Typically, the code uses "token-> column_index = json-> ptr - json-> > cur_line_ptr;". > Why do you use "json->ptr - json->cur_line_ptr - 1"? Because, in this case json->ptr point to the character after the character we need to point arrow. I mean (json->ptr - 1) - json->cur_line_ptr >> + >> strbuf_free(json->tmp); >> - luaL_error(l, "Found too many nested data structures (%d) on >> line %d at " >> - "character %d", json->current_depth, json->line_count, >> - json->ptr - json->cur_line_ptr); >> + luaL_error(l, "Found too many nested data structures (%d) on >> line %d at cha" >> + "racter %d here '%s'", json->current_depth, >> json->line_count, >> + json->ptr - json->cur_line_ptr, context); >> } >> static void json_parse_object_context(lua_State *l, json_parse_t >> *json) >> I decided to rename context to err_context to avoid confusion with json context (see functions in lua_cjson.c below). commit 6c5cc1e6f067f1c9358a8bee03501f5df23f0191 Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Dec 12 16:55:33 2019 +0300 json: print context in error mesages Context is just a string with a few characters before and after wrong token, wrong token itself and a symbolic arrow pointing to this token. Closes #4339 diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index 6d511e6..70e9f6c 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end tap.test("json", function(test) local serializer = require('json') - test:plan(51) + test:plan(57) test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) @@ -184,4 +184,22 @@ tap.test("json", function(test) test:ok(string.find(err_msg, 'comma') ~= nil, 'comma instead of T_COMMA') _, err_msg = pcall(serializer.decode, '{') test:ok(string.find(err_msg, 'end') ~= nil, 'end instead of T_END') + + -- + -- gh-4339: Make sure that context is printed. + -- + _, err_msg = pcall(serializer.decode, '{{: "world"}') + test:ok(string.find(err_msg, '{ >> {: "worl') ~= nil, 'context #1') + _, err_msg = pcall(serializer.decode, '{"a": "world"}}') + test:ok(string.find(err_msg, '"world"} >> }') ~= nil, 'context #2') + _, err_msg = pcall(serializer.decode, '{1: "world"}') + test:ok(string.find(err_msg, '{ >> 1: "worl') ~= nil, 'context #3') + _, err_msg = pcall(serializer.decode, '{') + test:ok(string.find(err_msg, '{ >> ') ~= nil, 'context #4') + _, err_msg = pcall(serializer.decode, '}') + test:ok(string.find(err_msg, ' >> }') ~= nil, 'context #5') + serializer.cfg{decode_max_depth = 1} + _, err_msg = pcall(serializer.decode, '{"a": {a = {}}}') + test:ok(string.find(err_msg, '{"a": >> {a = {}}') ~= nil, 'context #6') + end) diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c index 33cf305..38e9998 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -831,6 +831,50 @@ static void json_next_token(json_parse_t *json, json_token_t *token) json_set_token_error(token, json, "invalid token"); } +enum err_context_length { + ERR_CONTEXT_ARROW_LENGTH = 4, + ERR_CONTEXT_MAX_LENGTH_BEFORE = 8, + ERR_CONTEXT_MAX_LENGTH_AFTER = 8, + ERR_CONTEXT_MAX_LENGTH = ERR_CONTEXT_MAX_LENGTH_BEFORE + + ERR_CONTEXT_MAX_LENGTH_AFTER + ERR_CONTEXT_ARROW_LENGTH, +}; + +/** + * Copy characters near wrong token with the position @a + * column_index to a static string buffer @a err_context and lay + * out arrow " >> " before this token. + * + * @param context String static buffer to fill. + * @param json Structure with pointers to parsing string. + * @param column_index Position of wrong token in the current + * line. + */ +static void fill_err_context(char *err_context, json_parse_t *json, + int column_index) +{ + assert(column_index >= 0); + int length_before = column_index < ERR_CONTEXT_MAX_LENGTH_BEFORE ? + column_index : ERR_CONTEXT_MAX_LENGTH_BEFORE; + const char *src = json->cur_line_ptr + column_index - length_before; + /* Fill error context before the arrow. */ + memcpy(err_context, src, length_before); + err_context += length_before; + src += length_before; + + /* Make the arrow. */ + *(err_context++) = ' '; + memset(err_context, '>', ERR_CONTEXT_ARROW_LENGTH - 2); + err_context += ERR_CONTEXT_ARROW_LENGTH - 2; + *(err_context++) = ' '; + + /* Fill error context after the arrow. */ + const char *end = err_context + ERR_CONTEXT_MAX_LENGTH_AFTER; + for (; err_context < end && *src != '\0' && *src != '\n'; ++src, + ++err_context) + *err_context = *src; + *err_context = '\0'; +} + /* This function does not return. * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. * The only supported exception is the temporary parser string @@ -849,9 +893,13 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json, else found = json_token_type_name[token->type]; + char err_context[ERR_CONTEXT_MAX_LENGTH + 1]; + fill_err_context(err_context, json, token->column_index); + /* Note: token->column_index is 0 based, display starting from 1 */ - luaL_error(l, "Expected %s but found %s on line %d at character %d", exp, - found, json->line_count, token->column_index + 1); + luaL_error(l, "Expected %s but found %s on line %d at character %d here " + "'%s'", exp, found, json->line_count, token->column_index + 1, + err_context); } static inline void json_decode_ascend(json_parse_t *json) @@ -868,10 +916,13 @@ static void json_decode_descend(lua_State *l, json_parse_t *json, int slots) return; } + char err_context[ERR_CONTEXT_MAX_LENGTH + 1]; + fill_err_context(err_context, json, json->ptr - json->cur_line_ptr - 1); + strbuf_free(json->tmp); - luaL_error(l, "Found too many nested data structures (%d) on line %d at " - "character %d", json->current_depth, json->line_count, - json->ptr - json->cur_line_ptr); + luaL_error(l, "Found too many nested data structures (%d) on line %d at cha" + "racter %d here '%s'", json->current_depth, json->line_count, + json->ptr - json->cur_line_ptr, err_context); } static void json_parse_object_context(lua_State *l, json_parse_t *json) ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 2/2] json: print context in error mesages 2020-11-03 9:53 ` roman @ 2020-11-09 21:21 ` Leonid Vasiliev 0 siblings, 0 replies; 13+ messages in thread From: Leonid Vasiliev @ 2020-11-09 21:21 UTC (permalink / raw) To: roman, tarantool-patches LGTM. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-10-08 21:59 [Tarantool-patches] [PATCH 0/2] Improve json error message Roman Khabibov 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 1/2] json: make error messages more readable Roman Khabibov 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 2/2] json: print context in error mesages Roman Khabibov @ 2020-10-12 8:51 ` Leonid Vasiliev 2020-11-03 10:01 ` roman 2020-11-26 9:46 ` Kirill Yukhin 3 siblings, 1 reply; 13+ messages in thread From: Leonid Vasiliev @ 2020-10-12 8:51 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thank you for the patch. Add @ChangeLog. On 09.10.2020 00:59, Roman Khabibov wrote: > I already have LGTM from Vlad. > > Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err > Issue: https://github.com/tarantool/tarantool/issues/4339 > > Roman Khabibov (2): > json: make error messages more readable > json: print context in error mesages > > test/app-tap/json.test.lua | 50 +++++++++++++++- > third_party/lua-cjson/lua_cjson.c | 97 +++++++++++++++++++++++-------- > 2 files changed, 122 insertions(+), 25 deletions(-) > ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-10-12 8:51 ` [Tarantool-patches] [PATCH 0/2] Improve json error message Leonid Vasiliev @ 2020-11-03 10:01 ` roman 2020-11-09 21:22 ` Leonid Vasiliev 0 siblings, 1 reply; 13+ messages in thread From: roman @ 2020-11-03 10:01 UTC (permalink / raw) To: Leonid Vasiliev; +Cc: tarantool-patches Hi! On 12.10.2020 11:51, Leonid Vasiliev wrote: > Hi! Thank you for the patch. > Add @ChangeLog. > > On 09.10.2020 00:59, Roman Khabibov wrote: >> I already have LGTM from Vlad. >> >> Branch: >> https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err >> Issue: https://github.com/tarantool/tarantool/issues/4339 >> >> Roman Khabibov (2): >> json: make error messages more readable >> json: print context in error mesages >> >> test/app-tap/json.test.lua | 50 +++++++++++++++- >> third_party/lua-cjson/lua_cjson.c | 97 +++++++++++++++++++++++-------- >> 2 files changed, 122 insertions(+), 25 deletions(-) >> @ChangeLog: * Print json tokens themselves instead of token names "T_*" in the error messages (gh-4339). * Print context in json error mesages (gh-4339). ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-11-03 10:01 ` roman @ 2020-11-09 21:22 ` Leonid Vasiliev 2020-11-05 15:44 ` roman 0 siblings, 1 reply; 13+ messages in thread From: Leonid Vasiliev @ 2020-11-09 21:22 UTC (permalink / raw) To: roman; +Cc: tarantool-patches LGTM. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-11-09 21:22 ` Leonid Vasiliev @ 2020-11-05 15:44 ` roman 2020-11-12 14:03 ` Alexander V. Tikhonov 0 siblings, 1 reply; 13+ messages in thread From: roman @ 2020-11-05 15:44 UTC (permalink / raw) To: avtikhon; +Cc: tarantool-patches Thank you, Leonid. Alexander, can you look through the patch set, please? On 10.11.2020 00:22, Leonid Vasiliev wrote: > LGTM. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-11-05 15:44 ` roman @ 2020-11-12 14:03 ` Alexander V. Tikhonov 0 siblings, 0 replies; 13+ messages in thread From: Alexander V. Tikhonov @ 2020-11-12 14:03 UTC (permalink / raw) To: roman; +Cc: tarantool-patches Hi Roman, I've checked all results in gitlab-ci, and no new degradations found [1], patch LGTM. [1] - https://gitlab.com/tarantool/tarantool/-/pipelines/211735855 On Thu, Nov 05, 2020 at 06:44:31PM +0300, roman wrote: > Thank you, Leonid. > > Alexander, can you look through the patch set, please? > > > On 10.11.2020 00:22, Leonid Vasiliev wrote: > > LGTM. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Tarantool-patches] [PATCH 0/2] Improve json error message. 2020-10-08 21:59 [Tarantool-patches] [PATCH 0/2] Improve json error message Roman Khabibov ` (2 preceding siblings ...) 2020-10-12 8:51 ` [Tarantool-patches] [PATCH 0/2] Improve json error message Leonid Vasiliev @ 2020-11-26 9:46 ` Kirill Yukhin 3 siblings, 0 replies; 13+ messages in thread From: Kirill Yukhin @ 2020-11-26 9:46 UTC (permalink / raw) To: Roman Khabibov; +Cc: tarantool-patches Hello, On 09 Oct 00:59, Roman Khabibov wrote: > I already have LGTM from Vlad. > > Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4339-json-err > Issue: https://github.com/tarantool/tarantool/issues/4339 > > Roman Khabibov (2): > json: make error messages more readable > json: print context in error mesages > > test/app-tap/json.test.lua | 50 +++++++++++++++- > third_party/lua-cjson/lua_cjson.c | 97 +++++++++++++++++++++++-------- > 2 files changed, 122 insertions(+), 25 deletions(-) I've checked your patchset into 2.5, 2.6 and master. -- Regards, Kirill Yukhin ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2020-11-26 9:46 UTC | newest] Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-10-08 21:59 [Tarantool-patches] [PATCH 0/2] Improve json error message Roman Khabibov 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 1/2] json: make error messages more readable Roman Khabibov 2020-10-12 8:52 ` Leonid Vasiliev 2020-10-08 21:59 ` [Tarantool-patches] [PATCH 2/2] json: print context in error mesages Roman Khabibov 2020-10-12 9:20 ` Leonid Vasiliev 2020-11-03 9:53 ` roman 2020-11-09 21:21 ` Leonid Vasiliev 2020-10-12 8:51 ` [Tarantool-patches] [PATCH 0/2] Improve json error message Leonid Vasiliev 2020-11-03 10:01 ` roman 2020-11-09 21:22 ` Leonid Vasiliev 2020-11-05 15:44 ` roman 2020-11-12 14:03 ` Alexander V. Tikhonov 2020-11-26 9:46 ` Kirill Yukhin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox