From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladislav Shpilevoy Subject: Re: [tarantool-patches] Re: [PATCH v2 03/10] yaml: introduce yaml.decode_tag References: <0bce785538bba5d81f6c813732a7e309a941e175.1524228894.git.v.shpilevoy@tarantool.org> <20180510184107.GC30593@atlas> Message-ID: Date: Thu, 24 May 2018 23:50:36 +0300 MIME-Version: 1.0 In-Reply-To: <20180510184107.GC30593@atlas> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Konstantin Osipov Cc: vdavydov.dev@gmail.com List-ID: On 10/05/2018 21:41, Konstantin Osipov wrote: > * Vladislav Shpilevoy [18/04/20 16:25]: > >> Yaml.decode_tag allows to decode a single tag of a YAML document. > > Why do you need a function to decode just the tag, not an entire > document with tags? Fixed on the branch. See the complete patch at the end of the letter. But IMO now decode() function looks ugly. Option 'tag_only' requires to create a Lua table on each call with the single option when it is needed. And 'tag_only' itself is very strange. It is the same as we would make message pack types as options like this: - msgpack.decode_array/unsigned/string ... + msgpack.decode(..., {array/unsigned/string... = true}) But as you wish. > >> >> To distinguish them YAML tags will be used. A client console for >> each message will try to find a tag. If a tag is absent, then the >> message is a simple response on a request. > > response to Ditto. >> @@ -87,6 +90,31 @@ local function test_tagged(test, s) >> test:is(ret, "%TAG !push! "..prefix.."\n--- 300\n...\n", "encode_tagged usage") >> ret = s.encode_tagged({handle = '!print!', prefix = prefix}, {a = 100, b = 200}) >> test:is(ret, "%TAG !print! tag:tarantool.io/push,2018\n---\na: 100\nb: 200\n...\n", 'encode_tagged usage') >> + -- >> + -- Test decoding tags. >> + -- >> + must_be_err = "Usage: decode_tag()" > > This is a bad test - any change to the error message will require > changes to the test. If you're testing that there is a relevant > usage message, you could search for substring. @@ -93,11 +93,10 @@ local function test_tagged(test, s) -- -- Test decoding tags. -- - must_be_err = "Usage: yaml.decode(document, [{tag_only = boolean}])" ok, err = pcall(s.decode) - test:is(err, must_be_err, "decode usage") + test:isnt(err:find('Usage'), nil, "decode usage") ok, err = pcall(s.decode, false) - test:is(err, must_be_err, "decode usage") + test:isnt(err:find('Usage'), nil, "decode usage") local handle, prefix = s.decode(ret, {tag_only = true}) test:is(handle, "!print!", "handle is decoded ok") test:is(prefix, "tag:tarantool.io/push,2018", "prefix is decoded ok") Answers on your questions below are in the commit comment already. I will copy-paste them here for you. > > As a person reading your code and test for it, I am at a loss what > kind of string the API expects. Is it a fragment of YAML document > containing the tag? Is it a single YAML document? Is it a stream > with multiple documents? Cite: "Yaml.decode tag_only option allows to decode a single tag of a YAML document.". > I can see you're passing the return value > from the encoder into decode_tag(). Why is the API a-symmetric? > > It should not be asymmetric in the first place, but if you decided > to make it one please this deserves an explanation in the > changeset comment. Cite: "If a tag is !print!, then the document consists of a single string, that must be printed. Such a document must be decoded to get the printed string." Cite: "If a tag is !push!, then the document is sent via box.session.push, and must not be decoded. It can be just printed or ignored or something." Summary: it makes no sense to decode a push message in console, if it will be in any case encoded back and printed. So the message can be just printed without decode+encode. > >> + ok, err = pcall(s.decode_tag) > > I suggest you simply have encode_tagged and decode_tagged. > Or even simpler, extend dump/load, with tag support. I do not need full tag support. I need to be able to encode with a single tag - 'push' or 'print', and decode a *single* tag, not the entire document. I explained why in the commit comment, and above. > >> + test:is(err, must_be_err, "decode_tag usage") >> + ok, err = pcall(s.decode_tag, false) >> + test:is(err, must_be_err, "decode_tag usage") >> + local handle, prefix = s.decode_tag(ret) >> + test:is(handle, "!print!", "handle is decoded ok") >> + test:is(prefix, "tag:tarantool.io/push,2018", "prefix is decoded ok") >> + local several_tags = >> +[[%TAG !tag1! tag:tarantool.io/tag1,2018 >> +%TAG !tag2! tag:tarantool.io/tag2,2018 > > Please add a test case for multiple documents. + local several_documents = +[[ +%TAG !tag1! tag:tarantool.io/tag1,2018 +--- 1 +... + +%TAG !tag2! tag:tarantool.io/tag2,2018 +--- 2 +... + +]] + handle, prefix = s.decode(several_documents, {tag_only = true}) + test:is(handle, "!tag1!", "tag handle on multiple documents") + test:is(prefix, "tag:tarantool.io/tag1,2018", + "tag prefix on multiple documents") >> diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc >> index 8329f84e9..d24715edd 100644 >> --- a/third_party/lua-yaml/lyaml.cc >> +++ b/third_party/lua-yaml/lyaml.cc >> @@ -361,6 +361,72 @@ static int l_load(lua_State *L) { >> return loader.document_count; >> } >> >> +/** >> + * Decode a global tag of document. Multiple tags can not be >> + * decoded. In a case of multiple documents only first is >> + * processed. > > Decode a document taking into account document tags. > In case of success, pops the input from the stack and pushed > the document and a table with options, containing tag prefix and > tag handle. Is not relevant after the previous comment. > >> + * @param YAML document in string. >> + * @retval nil, err Error occured during decoding. In the second >> + * value is error description. > > >> + * @retval nil, nil A document does not contain tags. >> + * @retval handle, prefix Handle and prefix of a tag. >> + */ >> +static int >> +l_load_tag(struct lua_State *L) > > Function name in C does not match the Lua name. Please make sure > the names match. I did it advisedly. Lyaml library has such naming convention. We must either convert the entire file, or obey this style. > > I understand you sometimes may avoid extra work Ok, thanks for the 'compliment'. I am 'happy' you think so about me. I am working every day more then 12 hours for you, so 'obviously' I very very 'do not like' to work trying to avoid any. You are right as always. > because you don't believe I am ever going to look at the patch, but this is not > extra work, this is just sloppy code. > >> +{ >> + if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) >> + return luaL_error(L, "Usage: decode_tag()"); >> + size_t len; >> + const char *str = lua_tolstring(L, 1, &len); >> + struct lua_yaml_loader loader; >> + memset(&loader, 0, sizeof(loader)); >> + loader.L = L; >> + loader.cfg = luaL_checkserializer(L); >> + if (yaml_parser_initialize(&loader.parser) == 0) >> + luaL_error(L, OOM_ERRMSG); >> + yaml_tag_directive_t *start, *end; >> + yaml_parser_set_input_string(&loader.parser, (const unsigned char *) str, >> + len); > > Shouldn't you use yaml typedef here rather than const unsigned > char *? It is not used in yaml_parser_set_input_string declaration, but as you wish. - yaml_parser_set_input_string(&loader.parser, - (const unsigned char *) document, len); + yaml_parser_set_input_string(&loader.parser, (yaml_char_t *) document, len); if (lua_gettop(L) > 1) { > >> + /* Initial parser step. Detect the documents start position. */ >> + if (do_parse(&loader) == 0) >> + goto parse_error; >> + if (loader.event.type != YAML_STREAM_START_EVENT) { >> + lua_pushnil(L); >> + lua_pushstring(L, "expected STREAM_START_EVENT"); >> + return 2; >> + } > > What is the current convention for dump/load API? Does it use nil, > err or lua_error() for errors? Throw on OOM, return nil and error object on other error types. > > Why did you decide to depart from the current convention? I respect the convention. Our convention is throw on OOM, and nil + error object else. > >> + /* Parse a document start. */ >> + if (do_parse(&loader) == 0) >> + goto parse_error; >> + if (loader.event.type == YAML_STREAM_END_EVENT) >> + goto no_tags; >> + assert(loader.event.type == YAML_DOCUMENT_START_EVENT); >> + start = loader.event.data.document_start.tag_directives.start; >> + end = loader.event.data.document_start.tag_directives.end; >> + if (start == end) >> + goto no_tags; >> + if (end - start > 1) { >> + lua_pushnil(L); >> + lua_pushstring(L, "can not decode multiple tags"); >> + return 2; >> + } >> + lua_pushstring(L, (const char *) start->handle); >> + lua_pushstring(L, (const char *) start->prefix); >> + delete_event(&loader); >> + yaml_parser_delete(&loader.parser); >> + return 2; > > Why not make the API symmetric in what it expects and returns? > > dump(object, options) -> stream Dump does not return a stream. It returns a single string with YAML documents. Maybe you meant load? Load returns multiple values, and throws on any error, when no options are specified. This is its old behavior that I can not break. But for new functionality (tag_only option) I use the correct convention: objects or nil + error. See below the entire patch. =============================================================================== commit 2d30cb1f3c28faf25fc39e33d7215d09d0814e3f Author: Vladislav Shpilevoy Date: Tue Apr 3 23:12:50 2018 +0300 yaml: introduce yaml.decode tag_only option Yaml.decode tag_only option allows to decode a single tag of a YAML document. For #2677 it is needed to detect different push types in text console: print pushes via console.print, and actual pushes via box.session.push. To distinguish them YAML tags will be used. A client console for each message will try to find a tag. If a tag is absent, then the message is a simple response to a request. If a tag is !print!, then the document consists of a single string, that must be printed. Such a document must be decoded to get the printed string. So the calls sequence is yaml.decode(tag_only) + yaml.decode. The reason why a print message must be decoded is that a print() result on a server side can be not well-formatted YAML, and must be encoded into it to be correctly sent. For example, when I do on a server side something like this: console.print('very bad YAML string') The result of a print() is not a YAML document, and to be sent it must be encoded into YAML on a server side. If a tag is !push!, then the document is sent via box.session.push, and must not be decoded. It can be just printed or ignored or something. Needed for #2677 diff --git a/test/app-tap/yaml.test.lua b/test/app-tap/yaml.test.lua index bb75ce4ea..c6eca100d 100755 --- a/test/app-tap/yaml.test.lua +++ b/test/app-tap/yaml.test.lua @@ -70,7 +70,10 @@ local function test_output(test, s) end local function test_tagged(test, s) - test:plan(7) + test:plan(17) + -- + -- Test encoding tags. + -- local prefix = 'tag:tarantool.io/push,2018' local ok, err = pcall(s.encode_tagged) test:isnt(err:find('Usage'), nil, "encode_tagged usage") @@ -86,6 +89,45 @@ local function test_tagged(test, s) test:is(ret, "%TAG !push! "..prefix.."\n--- 300\n...\n", "encode_tagged usage") ret = s.encode_tagged({a = 100, b = 200}, {handle = '!print!', prefix = prefix}) test:is(ret, "%TAG !print! tag:tarantool.io/push,2018\n---\na: 100\nb: 200\n...\n", 'encode_tagged usage') + -- + -- Test decoding tags. + -- + ok, err = pcall(s.decode) + test:isnt(err:find('Usage'), nil, "decode usage") + ok, err = pcall(s.decode, false) + test:isnt(err:find('Usage'), nil, "decode usage") + local handle, prefix = s.decode(ret, {tag_only = true}) + test:is(handle, "!print!", "handle is decoded ok") + test:is(prefix, "tag:tarantool.io/push,2018", "prefix is decoded ok") + local several_tags = +[[%TAG !tag1! tag:tarantool.io/tag1,2018 +%TAG !tag2! tag:tarantool.io/tag2,2018 +--- +- 100 +... +]] + ok, err = s.decode(several_tags, {tag_only = true}) + test:is(ok, nil, "can not decode multiple tags") + test:is(err, "can not decode multiple tags", "same") + local no_tags = s.encode(100) + handle, prefix = s.decode(no_tags, {tag_only = true}) + test:is(handle, nil, "no tag - no handle") + test:is(prefix, nil, "no tag - no prefix") + local several_documents = +[[ +%TAG !tag1! tag:tarantool.io/tag1,2018 +--- 1 +... + +%TAG !tag2! tag:tarantool.io/tag2,2018 +--- 2 +... + +]] + handle, prefix = s.decode(several_documents, {tag_only = true}) + test:is(handle, "!tag1!", "tag handle on multiple documents") + test:is(prefix, "tag:tarantool.io/tag1,2018", + "tag prefix on multiple documents") end tap.test("yaml", function(test) diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc index 33057da5d..03573dd39 100644 --- a/third_party/lua-yaml/lyaml.cc +++ b/third_party/lua-yaml/lyaml.cc @@ -309,6 +309,53 @@ static int load_node(struct lua_yaml_loader *loader) { } } +/** + * Decode YAML document global tag onto Lua stack. + * @param loader Initialized loader to load tag from. + * @retval 2 Tag handle and prefix are pushed. Both are not nil. + * @retval 2 If an error occurred during decoding. Nil and error + * string are pushed. + */ +static int load_tag(struct lua_yaml_loader *loader) { + yaml_tag_directive_t *start, *end; + /* Initial parser step. Detect the documents start position. */ + if (do_parse(loader) == 0) + goto parse_error; + if (loader->event.type != YAML_STREAM_START_EVENT) { + lua_pushnil(loader->L); + lua_pushstring(loader->L, "expected STREAM_START_EVENT"); + return 2; + } + /* Parse a document start. */ + if (do_parse(loader) == 0) + goto parse_error; + if (loader->event.type == YAML_STREAM_END_EVENT) + goto no_tags; + assert(loader->event.type == YAML_DOCUMENT_START_EVENT); + start = loader->event.data.document_start.tag_directives.start; + end = loader->event.data.document_start.tag_directives.end; + if (start == end) + goto no_tags; + if (end - start > 1) { + lua_pushnil(loader->L); + lua_pushstring(loader->L, "can not decode multiple tags"); + return 2; + } + lua_pushstring(loader->L, (const char *) start->handle); + lua_pushstring(loader->L, (const char *) start->prefix); + return 2; + +parse_error: + lua_pushnil(loader->L); + /* Make nil be before an error message. */ + lua_insert(loader->L, -2); + return 2; + +no_tags: + lua_pushnil(loader->L); + return 1; +} + static void load(struct lua_yaml_loader *loader) { if (!do_parse(loader)) return; @@ -340,34 +387,59 @@ static void load(struct lua_yaml_loader *loader) { } } +/** + * Decode YAML documents onto Lua stack. First value on stack + * is string with YAML document. Second value is options: + * {tag_only = boolean}. Options are not required. + * @retval N Pushed document count, if tag_only option is not + * specified, or equals to false. + * @retval 2 If tag_only option is true. Tag handle and prefix + * are pushed. + * @retval 2 If an error occurred during decoding. Nil and error + * string are pushed. + */ static int l_load(lua_State *L) { struct lua_yaml_loader loader; - - luaL_argcheck(L, lua_isstring(L, 1), 1, "must provide a string argument"); - + if (! lua_isstring(L, 1)) { +usage_error: + return luaL_error(L, "Usage: yaml.decode(document, "\ + "[{tag_only = boolean}])"); + } + size_t len; + const char *document = lua_tolstring(L, 1, &len); loader.L = L; loader.cfg = luaL_checkserializer(L); loader.validevent = 0; loader.error = 0; loader.document_count = 0; - - /* create table used to track anchors */ - lua_newtable(L); - loader.anchortable_index = lua_gettop(L); - if (!yaml_parser_initialize(&loader.parser)) - luaL_error(L, OOM_ERRMSG); - yaml_parser_set_input_string(&loader.parser, - (const unsigned char *)lua_tostring(L, 1), lua_strlen(L, 1)); - load(&loader); + return luaL_error(L, OOM_ERRMSG); + yaml_parser_set_input_string(&loader.parser, (yaml_char_t *) document, len); + bool tag_only; + if (lua_gettop(L) > 1) { + if (! lua_istable(L, 2)) + goto usage_error; + lua_getfield(L, 2, "tag_only"); + tag_only = lua_isboolean(L, -1) && lua_toboolean(L, -1); + } else { + tag_only = false; + } + int rc; + if (! tag_only) { + /* create table used to track anchors */ + lua_newtable(L); + loader.anchortable_index = lua_gettop(L); + load(&loader); + if (loader.error) + lua_error(L); + rc = loader.document_count; + } else { + rc = load_tag(&loader); + } delete_event(&loader); yaml_parser_delete(&loader.parser); - - if (loader.error) - lua_error(L); - - return loader.document_count; + return rc; } static int dump_node(struct lua_yaml_dumper *dumper);