From: "Maria Khaydich" <maria.khaydich@tarantool.org> To: "Alexander Turenko" <alexander.turenko@tarantool.org> Cc: tarantool-patches@dev.tarantool.org Subject: Re: [Tarantool-patches] [PATCH] lua: msgpackffi considers msgpack.cfg options now Date: Fri, 24 Jan 2020 20:37:44 +0300 [thread overview] Message-ID: <1579887464.335069064@f120.i.mail.ru> (raw) In-Reply-To: <20191217232635.hln5hjeeqbq2iquz@tkn_work_nb> [-- Attachment #1: Type: text/plain, Size: 18133 bytes --] Thank you for the review! All done now: Msgpack.cfg reconfiguration did not affect ffi module behavior. This patch fixes it for all the options that were not yet covered elsewhere: encode_sparse_convert, encode_sparse_ratio, encode_sparse_safe, encode_invalid_numbers & decode_invalid_numbers, encode_load_metatables & decode_save_metatables, encode_use_tostring, encode_invalid_as_nil. Most of common serializers' test were not applicable to msgpackffi since it has no cfg field of its own. This issue was also solved. Closes #4499 --- Issue: https://github.com/tarantool/tarantool/issues/4499 Branch: https://github.com/tarantool/tarantool/compare/eljashm/gh-4499-msgpackffi-ignores-msgpack.cfg-options src/lua/msgpack.c | 1 - src/lua/msgpack.h | 2 +- src/lua/msgpackffi.lua | 70 +++++++++++++++++++---- src/lua/utils.h | 2 +- test/app-tap/lua/serializer_test.lua | 7 ++- test/app-tap/msgpackffi.test.lua | 85 +++++++++++++++++++++++++++- 6 files changed, 149 insertions(+), 18 deletions(-) diff --git a/src/lua/msgpack.h b/src/lua/msgpack.h index d0ade303f..1e60f0a5d 100644 --- a/src/lua/msgpack.h +++ b/src/lua/msgpack.h @@ -47,7 +47,7 @@ struct mpstream; /** * Default instance of msgpack serializer (msgpack = require('msgpack')). * This instance is used by all box's Lua/C API bindings (e.g. space:replace()). - * All changes made by msgpack.cfg{} function are also affect box's bindings + * All changes made by msgpack.cfg{} function also affect box's bindings * (this is a feature). */ extern struct luaL_serializer *luaL_msgpack_default; diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua index f775f2d41..159a652b9 100644 --- a/src/lua/msgpackffi.lua +++ b/src/lua/msgpackffi.lua @@ -206,6 +206,10 @@ local function encode_nil(buf) p[0] = 0xc0 end +local inf = 1/0 +local minf = -1/0 +local nan = 0/0 + local function encode_r(buf, obj, level) ::restart:: if type(obj) == "number" then @@ -213,6 +217,14 @@ local function encode_r(buf, obj, level) if obj % 1 == 0 and obj > -1e63 and obj < 1e64 then encode_int(buf, obj) else + if (obj == inf or obj == minf or tostring(obj) == "nan") and + not msgpack.cfg.encode_invalid_numbers then + if msgpack.cfg.encode_invalid_as_nil then + encode_nil(buf) + else + error("Invalid numbers encoding is manually prohibited") + end + end encode_double(buf, obj) end elseif type(obj) == "string" then @@ -227,28 +239,46 @@ local function encode_r(buf, obj, level) return end local serialize = nil - local mt = getmetatable(obj) - if mt ~= nil then - serialize = mt.__serialize + if msgpack.cfg.encode_load_metatables then + local mt = getmetatable(obj) + if mt ~= nil then + serialize = mt.__serialize + end end -- calculate the number of array and map elements in the table -- TODO: pairs() aborts JIT - local array_count, map_count = 0, 0 - for key in pairs(obj) do - if type(key) == 'number' and key >= 1 and - key == math.floor(key) and key == array_count + 1 then + local array_count, map_count, max_idx = 0, 0, 0 + for key, el in pairs(obj) do + if type(key) == 'number' and key >= 1 and + key == math.floor(key) then array_count = array_count + 1 + if key > max_idx then + max_idx = key + end else map_count = map_count + 1 end end - if (serialize == nil and map_count == 0) or serialize == 'array' or - serialize == 'seq' or serialize == 'sequence' then + -- excessively sparse tables should be encoded as maps + local is_excessively_sparse = false + if msgpack.cfg.encode_sparse_ratio and + max_idx > msgpack.cfg.encode_sparse_safe and + max_idx > array_count * msgpack.cfg.encode_sparse_ratio then + if not msgpack.cfg.encode_sparse_convert then + error(string.format("Can\'t encode excessively ".. + "sparse tables when encode_sparse_convert ".. + "configuration option is set to false")) + end + is_excessively_sparse = true + end + if (not is_excessively_sparse) and ( serialize == 'array' or + serialize == 'seq' or serialize == 'sequence' or + (serialize == nil and map_count == 0) ) then encode_array(buf, array_count) for i=1,array_count,1 do encode_r(buf, obj[i], level + 1) end - elseif (serialize == nil and map_count > 0) or + elseif serialize == nil or is_excessively_sparse or serialize == 'map' or serialize == 'mapping' then encode_map(buf, array_count + map_count) for key, val in pairs(obj) do @@ -278,7 +308,18 @@ local function encode_r(buf, obj, level) error("can not encode FFI type: '"..ffi.typeof(obj).."'") end else - error("can not encode Lua type: '"..type(obj).."'") + if msgpack.cfg.encode_use_tostring then + obj = tostring(obj) + if obj then + encode_str(buf, obj) + else + error("can not encode Lua type: '"..type(obj).."'") + end + elseif msgpack.cfg.encode_invalid_as_nil then + encode_nil(buf) + else + error("can not encode Lua type: '"..type(obj).."'") + end end end @@ -453,7 +494,12 @@ end local function decode_double(data) data[0] = data[0] - 1 -- mp_decode_double need type code - return tonumber(builtin.mp_decode_double(data)) + local res = tonumber(builtin.mp_decode_double(data)) + if (res == inf or res == minf or tostring(res) == "nan") and + not msgpack.cfg.decode_invalid_numbers then + error("Invalid numbers decoding is manually prohibited") + end + return res end local function decode_str(data, size) diff --git a/src/lua/utils.h b/src/lua/utils.h index 0b3672769..fed83cf8d 100644 --- a/src/lua/utils.h +++ b/src/lua/utils.h @@ -183,7 +183,7 @@ struct luaL_serializer { * + map - at least one table index is not unsigned integer. * + regular array - all array indexes are available. * + sparse array - at least one array index is missing. - * + excessively sparse arrat - the number of values missing + * + excessively sparse array - the number of values missing * exceeds the configured ratio. * * An array is excessively sparse when **all** the following diff --git a/test/app-tap/lua/serializer_test.lua b/test/app-tap/lua/serializer_test.lua index 47edac621..860f4dceb 100644 --- a/test/app-tap/lua/serializer_test.lua +++ b/test/app-tap/lua/serializer_test.lua @@ -150,7 +150,7 @@ local function test_signed(test, s) end local function test_double(test, s) - test:plan(s.cfg and 15 or 9) + test:plan(s.cfg and 18 or 9) rt(test, s, -1.1) rt(test, s, 3.1415926535898) @@ -172,23 +172,28 @@ local function test_double(test, s) -- local nan = 0/0 local inf = 1/0 + local minf = -1/0 local ss = s.new() ss.cfg{encode_invalid_numbers = false} test:ok(not pcall(ss.encode, nan), "encode exception on nan") test:ok(not pcall(ss.encode, inf), "encode exception on inf") + test:ok(not pcall(ss.encode, minf), "encode exception on minf") ss.cfg{encode_invalid_numbers = true} local xnan = ss.encode(nan) local xinf = ss.encode(inf) + local xminf = ss.encode(minf) ss.cfg{decode_invalid_numbers = false} test:ok(not pcall(ss.decode, xnan), "decode exception on nan") test:ok(not pcall(ss.decode, xinf), "decode exception on inf") + test:ok(not pcall(ss.decode, xminf), "decode exception on minf") ss.cfg{decode_invalid_numbers = true} rt(test, s, nan) rt(test, s, inf) + rt(test, s, minf) ss = nil end diff --git a/test/app-tap/msgpackffi.test.lua b/test/app-tap/msgpackffi.test.lua index 994402d15..6304559f0 100755 --- a/test/app-tap/msgpackffi.test.lua +++ b/test/app-tap/msgpackffi.test.lua @@ -5,6 +5,7 @@ package.path = "lua/?.lua;"..package.path local tap = require('tap') local common = require('serializer_test') local ffi = require('ffi') +local msgpack = require('msgpack') local function is_map(s) local b = string.byte(string.sub(s, 1, 1)) @@ -36,6 +37,86 @@ local function test_offsets(test, s) test:ok(not pcall(s.decode, dump, offset), "invalid offset") end +local function test_configs(test, s) + test:plan(26) + -- + -- gh-4499: ffi module should consider msgpack.cfg options + -- + + -- only excessively sparse arrays should be encoded as maps, + -- i.e. encode_sparse_ratio > 0 && max_idx > encode_sparse_safe + -- && max_idx > num(elements) * encode_sparse_ratio + test:ok(is_array(s.encode({ [1] = 1, [3] = 3, [4] = 3 })), + "not excessively sparse") + test:ok(is_array(s.encode({ [1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, + [11] = 6 })), "still not excessively sparse") + test:ok(is_map(s.encode({ [1] = 1, [2] = 2, [100] = 3 })), + "is excessively sparse") + + msgpack.cfg({encode_sparse_convert = false}) + local ok, _ = pcall(s.encode(), { [1] = 1, [2] = 2, [100] = 3 }) + test:ok(not ok, "conversion of sparse tables is manually prohibited") + + -- testing (en/de)coding of invalid numbers + local inf = 1/0 + local minf = -1/0 + local nan = 0/0 + + test:ok(s.decode(s.encode(inf)), "decode & encode for inf") + test:ok(s.decode(s.encode(minf)), "decode & encode for minf") + test:ok(s.decode(s.encode(nan)), "decode & encode for nan") + + msgpack.cfg({encode_invalid_numbers = false}) + test:ok(not pcall(s.encode, inf), "prohibited encode for inf") + test:ok(not pcall(s.encode, minf), "prohibited encode for minf") + test:ok(not pcall(s.encode, nan), "prohibited encode for nan") + + msgpack.cfg({decode_invalid_numbers = false}) + test:ok(not pcall(s.decode, inf), "prohibited decode for inf") + test:ok(not pcall(s.decode, minf), "prohibited decode for minf") + test:ok(not pcall(s.decode, nan), "prohibited decode for nan") + + -- invalid numbers can also be encoded as nil values + msgpack.cfg({decode_invalid_numbers = true, encode_invalid_as_nil = true}) + test:is(s.decode(s.encode(inf)), nil, "invalid_as_nil for inf") + test:is(s.decode(s.encode(minf)), nil, "invalid_as_nil for minf") + test:is(s.decode(s.encode(nan)), nil, "invalid_as_nil for nan") + + -- whether __serialize meta-value checking is enabled + local arr = setmetatable({1, 2, 3, k1 = 'v1', k2 = 'v2', 4, 5}, + { __serialize = 'seq'}) + local map = setmetatable({1, 2, 3, 4, 5}, { __serialize = 'map'}) + local obj = setmetatable({}, { + __serialize = function(x) return 'serialize' end + }) + + -- testing encode metatables + msgpack.cfg{encode_load_metatables = false} + test:ok(is_map(s.encode(arr)), "array ignore __serialize") + test:ok(is_array(s.encode(map)), "map ignore __serialize") + test:ok(is_array(s.encode(obj)), "object ignore __serialize") + + msgpack.cfg{encode_load_metatables = true} + test:ok(is_array(s.encode(arr)), "array load __serialize") + test:ok(is_map(s.encode(map)), "map load __serialize") + test:is(s.decode(s.encode(obj)), "serialize", "object load __serialize") + + -- testing decode metatables + msgpack.cfg{decode_save_metatables = false} + test:isnil(getmetatable(s.decode(s.encode(arr))), "array __serialize") + test:isnil(getmetatable(s.decode(s.encode(map))), "map __serialize") + + msgpack.cfg{decode_save_metatables = true} + test:is(getmetatable(s.decode(s.encode(arr))).__serialize, "seq", + "array save __serialize") + test:is(getmetatable(s.decode(s.encode(map))).__serialize, "map", + "map save __serialize") + + -- applying default configs + msgpack.cfg({encode_sparse_convert = true, encode_invalid_numbers = true, + encode_invalid_as_nil = false}) +end + local function test_other(test, s) test:plan(24) local buf = string.char(0x93, 0x6e, 0xcb, 0x42, 0x2b, 0xed, 0x30, 0x47, @@ -82,7 +163,6 @@ local function test_other(test, s) while t ~= nil do level = level + 1 t = t[1] end return level end - local msgpack = require('msgpack') local deep_as_nil = msgpack.cfg.encode_deep_as_nil msgpack.cfg({encode_deep_as_nil = true}) local max_depth = msgpack.cfg.encode_max_depth @@ -118,7 +198,7 @@ end tap.test("msgpackffi", function(test) local serializer = require('msgpackffi') - test:plan(11) + test:plan(12) test:test("unsigned", common.test_unsigned, serializer) test:test("signed", common.test_signed, serializer) test:test("double", common.test_double, serializer) @@ -130,6 +210,7 @@ tap.test("msgpackffi", function(test) -- udata/cdata hooks are not implemented --test:test("ucdata", common.test_ucdata, serializer) test:test("offsets", test_offsets, serializer) + test:test("configs", test_configs, serializer) test:test("other", test_other, serializer) test:test("decode_buffer", common.test_decode_buffer, serializer) end) diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c index edbc15b72..133b2b527 100644 --- a/src/lua/msgpack.c +++ b/src/lua/msgpack.c @@ -95,7 +95,6 @@ luamp_decode_extension_default(struct lua_State *L, const char **data) (unsigned char) **data); } - void luamp_set_decode_extension(luamp_decode_extension_f handler) { -- 2.24.0 > >> >>Среда, 18 декабря 2019, 2:26 +03:00 от Alexander Turenko <alexander.turenko@tarantool.org>: >> >>Sorry again for the late reply. >> >>> > I think we also should implement encode_sparse_* options to >>> > be compatible with msgpack module. >>> >>> Could you please explain how do you propose to implement them? It >>> seems to me the purpose of those configs is to handle excessively >>> sparse arrays as maps (as stated in src/lua/utils.h). Which does not >>> seem relevant to lua module where the only container type is table. >>> What do I miss here? >> >>It is about encoding, not decoding: encode a Lua table with numeric keys >>as MP_ARRAY, MP_MAP or give an error. It seems that nothing prevent us >>from implementing this logic on Lua. >> >>> Subject: [PATCH 1/2] Msgpackffi considers msgpack.cfg options now >>> Msgpack.cfg options changes did not affect msgpackffi serializer state. >>> This patch fixes it for options encode_load_metatables, decode_ and >>> encode_invalid_numbers, encode_use_tostring and encode_invalid_as_nil. >>> Closes #4499 >>> --- >>> Issue: >>> https://github.com/tarantool/tarantool/issues/4499 >>> Branch: >>> https://github.com/tarantool/tarantool/compare/eljashm/gh-4499-msgpackffi-ignores-msgpack.cfg-options >> >>> @@ -261,6 +263,19 @@ local function encode_r(buf, obj, level) >>> else >>> error("Invalid __serialize value") >>> end >>> + elseif type(obj) == math.huge or type(obj) == -math.huge or >>> + type(obj) == math.nan then >>> + if msgpack.cfg.encode_invalid_numbers then >>> + if obj == math.huge then >>> + obj = 1/0 >>> + elseif obj == -math.huge then >>> + obj = -1/0 >>> + else >>> + obj = 0/0 >>> + end >>> + else >>> + encode_nil(buf) >>> + end >> >>I would save 1/0, -1/0, 0/0 as module local variables for readability, >>like as it is done in test_double(). >> >>I also found that math.huge is 1/0 (inf) and there is no math.nan (it >>just nil). >> >>> @@ -562,6 +589,15 @@ decode_r = function(data) >>> elseif c >= 0xe0 then >>> return tonumber(ffi.cast('signed char',c)) -- negfixint >>> elseif c == 0xc0 then >>> + if msgpack.cfg.decode_invalid_numbers then >>> + if c == 0/0 then >>> + return math.nan >>> + elseif c == 1/0 then >>> + return math.huge >>> + elseif c == -1/0 then >>> + return -math.huge >>> + end >>> + end >> >>Same as above. >> >>> @@ -628,6 +664,8 @@ return { >>> on_encode = on_encode; >>> decode_unchecked = decode_unchecked; >>> decode = decode_unchecked; -- just for tests >>> + cfg = msgpack.cfg; >>> + new = msgpack.new; -- for serializer tests to work properly >> >>So they'll test msgpack, not msgpackffi. > > >-- >Maria Khaydich > [-- Attachment #2: Type: text/html, Size: 24901 bytes --]
next prev parent reply other threads:[~2020-01-24 17:37 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-11-07 10:36 [Tarantool-patches] [PATCH] Msgpackffi " Maria 2019-11-11 23:21 ` Alexander Turenko 2019-11-29 21:09 ` Maria Khaydich 2019-12-17 23:26 ` Alexander Turenko 2020-01-24 17:37 ` Maria Khaydich [this message] 2020-01-25 15:01 ` [Tarantool-patches] [PATCH] lua: msgpackffi " Konstantin Osipov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1579887464.335069064@f120.i.mail.ru \ --to=maria.khaydich@tarantool.org \ --cc=alexander.turenko@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH] lua: msgpackffi considers msgpack.cfg options now' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox