From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp60.i.mail.ru (smtp60.i.mail.ru [217.69.128.40]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 6F5EE46971A for ; Sat, 7 Dec 2019 00:16:59 +0300 (MSK) Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 13.0 \(3594.4.19\)) From: Roman Khabibov In-Reply-To: Date: Sat, 7 Dec 2019 00:16:57 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <822D629F-3969-4FD2-B5A3-B30C1EB7FB52@tarantool.org> References: <20191015155953.10246-1-roman.habibov@tarantool.org> Subject: Re: [Tarantool-patches] [tarantool-patches] [PATCH] json: improve error messages List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy Cc: tarantool-patches@dev.tarantool.org Hi! Thanks for the review. > On Oct 17, 2019, at 00:45, Vladislav Shpilevoy = wrote: >=20 > Hi! Thanks for the patch! >=20 > Sorry, but I am not sure that now the messages are > better. Just look at it: >=20 > tarantool> json.decode('{') > --- > - error: Expected object key string but found '\0' on line 1 at = character 2 here ^^^... > ... >=20 > What is `^^^`, what is `...`? In the ticket it is said, that a user = should have a > context, but there is no a context. >=20 > I was thinking, that the author of 4339 rather meant that ^^^ should = be below the > error in an error string, like this: >=20 > tarantool> json.decode('{') > --- > - error: Expected object key string but found '\0' on line 1 at = character 2 here: > "{ " > ^^^ > ... >=20 > But I do realize, that it is too complex. And still ^^^ looks = confusing. > It looks like arrows pointing up, while you are trying to put a > context after them. >=20 > At least, you could make a context like this: >=20 > tarantool> json.decode('{') > --- > - error: Expected object key string but found '\0' on line 1 at = character 2 here > "{>>\0<<" > ... >=20 > So you would have arrows pointing right at the bad position. >=20 > I tried an error not related to eof, and got the same confusing > result: >=20 > tarantool> json.decode('{1: "world"}') > --- > - error: 'Expected object key string but found int on line 1 at = character 2 here ^^^: > "world"}...' > ... >=20 > =46rom that message looks like the error is in ':', or in "world", or > somewhere above because of up arrows. Let=E2=80=99s just remove =E2=80=9CT_*=E2=80=9D tokens. Let print of a = context will be =E2=80=9Cwontfix". Double arrows isn=E2=80=99t very good, IMO, because we don=E2=80=99t know the = length of wrong token. > See 3 comments below. >=20 >> diff --git a/third_party/lua-cjson/lua_cjson.c = b/third_party/lua-cjson/lua_cjson.c >> index 3d7edbf28..aab662585 100644 >> --- a/third_party/lua-cjson/lua_cjson.c >> +++ b/third_party/lua-cjson/lua_cjson.c >> @@ -72,23 +72,23 @@ typedef enum { >> } json_token_type_t; >>=20 >> static const char *json_token_type_name[] =3D { >> - "T_OBJ_BEGIN", >> - "T_OBJ_END", >> - "T_ARR_BEGIN", >> - "T_ARR_END", >> - "T_STRING", >> - "T_UINT", >> - "T_INT", >> - "T_NUMBER", >> - "T_BOOLEAN", >> - "T_NULL", >> - "T_COLON", >> - "T_COMMA", >> - "T_END", >> - "T_WHITESPACE", >> - "T_LINEFEED", >> - "T_ERROR", >> - "T_UNKNOWN", >> + "'{'", >> + "'}'", >> + "'['", >> + "']'", >> + "string", >> + "unsigned int", >> + "int", >> + "number", >> + "boolean", >> + "null", >> + "':'", >> + "','", >> + "'\\0'", >=20 > 1. I don't think this is a good idea. Probably, > just 'end' would be better. Users don't write > the terminating zero explicitly, and its presence > in an error message is confusing. Done. >> + "whitespace", >> + "'\\n'", >=20 > 2. Please, no special characters: no \0, no \n, no \t > or anything else. Here 'new line' would look easier > to understand. Done. >> + "error", >> + "unknown symbol", >> NULL >> }; >>=20 >> @@ -825,6 +825,21 @@ static void json_next_token(json_parse_t *json, = json_token_t *token) >> json_set_token_error(token, json, "invalid token"); >> } >>=20 >> +/* >> + * Copy 10 or less (if string @a ptr is shorter than 10) >> + * characters to static string buffer @a aux_str. >> + * >> + * @param aux_str String static buffer to fill. >> + * @param ptr String with the context. >> + */ >> +static void fill_context(char *aux_str, const char *ptr) >=20 > 3. So as a context you understand 10 symbols *after* the error? > I don't think that will help with error understanding, because > a user won't even see the error position. I think, you need to > take some symbols before and some after the bad position. >=20 > Also, please, make 10 a enum constant. >=20 >> +{ >> + int i =3D 0; >> + for(; ptr[i] !=3D '\0' && i < 10; i++) >> + aux_str[i] =3D ptr[i]; >> + aux_str[i] =3D '\0'; >> +} >> + >> /* This function does not return. >> * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. >> * The only supported exception is the temporary parser string >=20 commit 5642c534a599dcb1ada004a4d6ff4555063ff03d Author: Roman Khabibov Date: Wed Oct 9 17:07:41 2019 +0300 json: make error messages more readable =20 Print tokens themselves instead of token names "T_*" in the error messages. =20 Closes #4339 diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua index fadfc74ec..de277f2f7 100755 --- a/test/app-tap/json.test.lua +++ b/test/app-tap/json.test.lua @@ -22,7 +22,7 @@ end =20 tap.test("json", function(test) local serializer =3D require('json') - test:plan(40) + test:plan(51) =20 test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) @@ -154,4 +154,34 @@ tap.test("json", function(test) _, err_msg =3D pcall(serializer.decode, '{"hello": "world",\n 100: = 200}') test:ok(string.find(err_msg, 'line 2 at character 2') ~=3D nil, 'mistake on second line') + + -- + -- gh-3316: Make sure that tokens 'T_*' are absent in error + -- messages and a context is printed. + -- + _, err_msg =3D pcall(serializer.decode, '{{: "world"}') + test:ok(string.find(err_msg, '\'{\'') ~=3D nil, '"{" instead of = T_OBJ_BEGIN') + _, err_msg =3D pcall(serializer.decode, '{"a": "world"}}') + test:ok(string.find(err_msg, '\'}\'') ~=3D nil, '"}" instead of = T_OBJ_END') + _, err_msg =3D pcall(serializer.decode, '{[: "world"}') + test:ok(string.find(err_msg, '\'[\'', 1, true) ~=3D nil, + '"[" instead of T_ARR_BEGIN') + _, err_msg =3D pcall(serializer.decode, '{]: "world"}') + test:ok(string.find(err_msg, '\']\'', 1, true) ~=3D nil, + '"]" instead of T_ARR_END') + _, err_msg =3D pcall(serializer.decode, '{1: "world"}') + test:ok(string.find(err_msg, 'int') ~=3D nil, 'int instead of = T_INT') + _, err_msg =3D pcall(serializer.decode, '{1.0: "world"}') + test:ok(string.find(err_msg, 'number') ~=3D nil, 'number instead of = T_NUMBER') + _, err_msg =3D pcall(serializer.decode, '{true: "world"}') + test:ok(string.find(err_msg, 'boolean') ~=3D nil, + 'boolean instead of T_BOOLEAN') + _, err_msg =3D pcall(serializer.decode, '{null: "world"}') + test:ok(string.find(err_msg, 'null') ~=3D nil, 'null instead of = T_NULL') + _, err_msg =3D pcall(serializer.decode, '{:: "world"}') + test:ok(string.find(err_msg, 'colon') ~=3D nil, 'colon instead of = T_COLON') + _, err_msg =3D pcall(serializer.decode, '{,: "world"}') + test:ok(string.find(err_msg, 'comma') ~=3D nil, 'comma instead of = T_COMMA') + _, err_msg =3D pcall(serializer.decode, '{') + test:ok(string.find(err_msg, 'end') ~=3D nil, 'end instead of = T_END') end) diff --git a/third_party/lua-cjson/lua_cjson.c = b/third_party/lua-cjson/lua_cjson.c index 3d25814f3..e68b52847 100644 --- a/third_party/lua-cjson/lua_cjson.c +++ b/third_party/lua-cjson/lua_cjson.c @@ -72,23 +72,23 @@ typedef enum { } json_token_type_t; =20 static const char *json_token_type_name[] =3D { - "T_OBJ_BEGIN", - "T_OBJ_END", - "T_ARR_BEGIN", - "T_ARR_END", - "T_STRING", - "T_UINT", - "T_INT", - "T_NUMBER", - "T_BOOLEAN", - "T_NULL", - "T_COLON", - "T_COMMA", - "T_END", - "T_WHITESPACE", - "T_LINEFEED", - "T_ERROR", - "T_UNKNOWN", + "'{'", + "'}'", + "'['", + "']'", + "string", + "unsigned int", + "int", + "number", + "boolean", + "null", + "colon", + "comma", + "end", + "whitespace", + "line feed", + "error", + "unknown symbol", NULL }; =20 @@ -914,7 +914,7 @@ static void json_parse_object_context(lua_State *l, = json_parse_t *json) } =20 if (token.type !=3D T_COMMA) - json_throw_parse_error(l, json, "comma or object end", = &token); + json_throw_parse_error(l, json, "comma or '}'", &token); =20 json_next_token(json, &token); } @@ -954,7 +954,7 @@ static void json_parse_array_context(lua_State *l, = json_parse_t *json) } =20 if (token.type !=3D T_COMMA) - json_throw_parse_error(l, json, "comma or array end", = &token); + json_throw_parse_error(l, json, "comma or ']'", &token); =20 json_next_token(json, &token); }