[Tarantool-patches] [PATCH] lua: msgpackffi considers msgpack.cfg options now

Maria Khaydich maria.khaydich at tarantool.org
Fri Jan 24 20:37:44 MSK 2020


Thank you for the review! All done now:
 
Msgpack.cfg reconfiguration did not affect ffi module behavior. This
patch fixes it for all the options that were not yet covered elsewhere:
encode_sparse_convert, encode_sparse_ratio, encode_sparse_safe,
encode_invalid_numbers & decode_invalid_numbers,
encode_load_metatables & decode_save_metatables,
encode_use_tostring, encode_invalid_as_nil.
Most of common serializers' test were not applicable to msgpackffi since
it has no cfg field of its own. This issue was also solved.
Closes #4499
---
Issue:
https://github.com/tarantool/tarantool/issues/4499
Branch:
https://github.com/tarantool/tarantool/compare/eljashm/gh-4499-msgpackffi-ignores-msgpack.cfg-options
 src/lua/msgpack.c                    |  1 -
 src/lua/msgpack.h                    |  2 +-
 src/lua/msgpackffi.lua               | 70 +++++++++++++++++++----
 src/lua/utils.h                      |  2 +-
 test/app-tap/lua/serializer_test.lua |  7 ++-
 test/app-tap/msgpackffi.test.lua     | 85 +++++++++++++++++++++++++++-
 6 files changed, 149 insertions(+), 18 deletions(-)
diff --git a/src/lua/msgpack.h b/src/lua/msgpack.h
index d0ade303f..1e60f0a5d 100644
--- a/src/lua/msgpack.h
+++ b/src/lua/msgpack.h
@@ -47,7 +47,7 @@ struct mpstream;
 /**
  * Default instance of msgpack serializer (msgpack = require('msgpack')).
  * This instance is used by all box's Lua/C API bindings (e.g. space:replace()).
- * All changes made by msgpack.cfg{} function are also affect box's bindings
+ * All changes made by msgpack.cfg{} function also affect box's bindings
  * (this is a feature).
  */
 extern struct luaL_serializer *luaL_msgpack_default;
diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua
index f775f2d41..159a652b9 100644
--- a/src/lua/msgpackffi.lua
+++ b/src/lua/msgpackffi.lua
@@ -206,6 +206,10 @@ local function encode_nil(buf)
     p[0] = 0xc0
 end
 
+local inf = 1/0
+local minf = -1/0
+local nan = 0/0
+
 local function encode_r(buf, obj, level)
 ::restart::
     if type(obj) == "number" then
@@ -213,6 +217,14 @@ local function encode_r(buf, obj, level)
         if obj % 1 == 0 and obj > -1e63 and obj < 1e64 then
             encode_int(buf, obj)
         else
+            if (obj == inf or obj == minf or tostring(obj) == "nan") and
+            not msgpack.cfg.encode_invalid_numbers then
+                if msgpack.cfg.encode_invalid_as_nil then
+                    encode_nil(buf)
+                else
+                    error("Invalid numbers encoding is manually prohibited")
+                end
+            end
             encode_double(buf, obj)
         end
     elseif type(obj) == "string" then
@@ -227,28 +239,46 @@ local function encode_r(buf, obj, level)
             return
         end
         local serialize = nil
-        local mt = getmetatable(obj)
-        if mt ~= nil then
-            serialize = mt.__serialize
+        if msgpack.cfg.encode_load_metatables then
+            local mt = getmetatable(obj)
+            if mt ~= nil then
+                serialize = mt.__serialize
+            end
         end
         -- calculate the number of array and map elements in the table
         -- TODO: pairs() aborts JIT
-        local array_count, map_count = 0, 0
-        for key in pairs(obj) do
-            if type(key) == 'number' and key >= 1 and
-               key == math.floor(key) and key == array_count + 1 then
+        local array_count, map_count, max_idx = 0, 0, 0
+        for key, el in pairs(obj) do
+            if type(key) == 'number' and key >= 1 and 
+            key == math.floor(key) then
                 array_count = array_count + 1
+                if key > max_idx then
+                    max_idx = key
+                end
             else
                 map_count = map_count + 1
             end
         end
-        if (serialize == nil and map_count == 0) or serialize == 'array' or
-            serialize == 'seq' or serialize == 'sequence' then
+        -- excessively sparse tables should be encoded as maps
+        local is_excessively_sparse = false
+        if msgpack.cfg.encode_sparse_ratio and
+            max_idx > msgpack.cfg.encode_sparse_safe and
+            max_idx > array_count * msgpack.cfg.encode_sparse_ratio then
+                if not msgpack.cfg.encode_sparse_convert then
+                    error(string.format("Can\'t encode excessively "..
+                    "sparse tables when encode_sparse_convert "..
+                    "configuration option is set to false"))
+                end
+                is_excessively_sparse = true
+        end
+        if (not is_excessively_sparse) and ( serialize == 'array' or
+            serialize == 'seq' or serialize == 'sequence' or
+            (serialize == nil and map_count == 0) ) then
             encode_array(buf, array_count)
             for i=1,array_count,1 do
                 encode_r(buf, obj[i], level + 1)
             end
-        elseif (serialize == nil and map_count > 0) or
+        elseif serialize == nil or is_excessively_sparse or
             serialize == 'map' or serialize == 'mapping' then
             encode_map(buf, array_count + map_count)
             for key, val in pairs(obj) do
@@ -278,7 +308,18 @@ local function encode_r(buf, obj, level)
             error("can not encode FFI type: '"..ffi.typeof(obj).."'")
         end
     else
-        error("can not encode Lua type: '"..type(obj).."'")
+        if msgpack.cfg.encode_use_tostring then
+            obj = tostring(obj)
+            if obj then
+                encode_str(buf, obj)
+            else
+                error("can not encode Lua type: '"..type(obj).."'")
+            end
+        elseif msgpack.cfg.encode_invalid_as_nil then
+            encode_nil(buf)
+        else
+            error("can not encode Lua type: '"..type(obj).."'")
+        end
     end
 end
 
@@ -453,7 +494,12 @@ end
 
 local function decode_double(data)
     data[0] = data[0] - 1 -- mp_decode_double need type code
-    return tonumber(builtin.mp_decode_double(data))
+    local res = tonumber(builtin.mp_decode_double(data))
+    if (res == inf or res == minf or tostring(res) == "nan") and
+    not msgpack.cfg.decode_invalid_numbers then
+        error("Invalid numbers decoding is manually prohibited")
+    end
+    return res
 end
 
 local function decode_str(data, size)
diff --git a/src/lua/utils.h b/src/lua/utils.h
index 0b3672769..fed83cf8d 100644
--- a/src/lua/utils.h
+++ b/src/lua/utils.h
@@ -183,7 +183,7 @@ struct luaL_serializer {
      *  + map - at least one table index is not unsigned integer.
      *  + regular array - all array indexes are available.
      *  + sparse array - at least one array index is missing.
-     *  + excessively sparse arrat - the number of values missing
+     *  + excessively sparse array - the number of values missing
      * exceeds the configured ratio.
      *
      * An array is excessively sparse when **all** the following
diff --git a/test/app-tap/lua/serializer_test.lua b/test/app-tap/lua/serializer_test.lua
index 47edac621..860f4dceb 100644
--- a/test/app-tap/lua/serializer_test.lua
+++ b/test/app-tap/lua/serializer_test.lua
@@ -150,7 +150,7 @@ local function test_signed(test, s)
 end
 
 local function test_double(test, s)
-    test:plan(s.cfg and 15 or 9)
+    test:plan(s.cfg and 18 or 9)
     rt(test, s, -1.1)
 
     rt(test, s, 3.1415926535898)
@@ -172,23 +172,28 @@ local function test_double(test, s)
     --
     local nan = 0/0
     local inf = 1/0
+    local minf = -1/0
 
     local ss = s.new()
     ss.cfg{encode_invalid_numbers = false}
     test:ok(not pcall(ss.encode, nan), "encode exception on nan")
     test:ok(not pcall(ss.encode, inf), "encode exception on inf")
+    test:ok(not pcall(ss.encode, minf), "encode exception on minf")
 
     ss.cfg{encode_invalid_numbers = true}
     local xnan = ss.encode(nan)
     local xinf = ss.encode(inf)
+    local xminf = ss.encode(minf)
 
     ss.cfg{decode_invalid_numbers = false}
     test:ok(not pcall(ss.decode, xnan), "decode exception on nan")
     test:ok(not pcall(ss.decode, xinf), "decode exception on inf")
+    test:ok(not pcall(ss.decode, xminf), "decode exception on minf")
 
     ss.cfg{decode_invalid_numbers = true}
     rt(test, s, nan)
     rt(test, s, inf)
+    rt(test, s, minf)
 
     ss = nil
 end
diff --git a/test/app-tap/msgpackffi.test.lua b/test/app-tap/msgpackffi.test.lua
index 994402d15..6304559f0 100755
--- a/test/app-tap/msgpackffi.test.lua
+++ b/test/app-tap/msgpackffi.test.lua
@@ -5,6 +5,7 @@ package.path = "lua/?.lua;"..package.path
 local tap = require('tap')
 local common = require('serializer_test')
 local ffi = require('ffi')
+local msgpack = require('msgpack')
 
 local function is_map(s)
     local b = string.byte(string.sub(s, 1, 1))
@@ -36,6 +37,86 @@ local function test_offsets(test, s)
     test:ok(not pcall(s.decode, dump, offset), "invalid offset")
 end
 
+local function test_configs(test, s)
+    test:plan(26)
+    --
+    -- gh-4499: ffi module should consider msgpack.cfg options
+    --
+
+    -- only excessively sparse arrays should be encoded as maps,
+    -- i.e. encode_sparse_ratio > 0 && max_idx > encode_sparse_safe
+    -- && max_idx > num(elements) * encode_sparse_ratio
+    test:ok(is_array(s.encode({ [1] = 1, [3] = 3, [4] = 3 })),
+        "not excessively sparse")
+    test:ok(is_array(s.encode({ [1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5,
+        [11] = 6 })), "still not excessively sparse")
+    test:ok(is_map(s.encode({ [1] = 1, [2] = 2, [100] = 3 })),
+        "is excessively sparse")
+
+    msgpack.cfg({encode_sparse_convert = false})
+    local ok, _ = pcall(s.encode(), { [1] = 1, [2] = 2, [100] = 3 })
+    test:ok(not ok, "conversion of sparse tables is manually prohibited")
+
+    -- testing (en/de)coding of invalid numbers
+    local inf = 1/0
+    local minf = -1/0
+    local nan = 0/0
+
+    test:ok(s.decode(s.encode(inf)), "decode & encode for inf")
+    test:ok(s.decode(s.encode(minf)), "decode & encode for minf")
+    test:ok(s.decode(s.encode(nan)), "decode & encode for nan")
+
+    msgpack.cfg({encode_invalid_numbers = false})
+    test:ok(not pcall(s.encode, inf), "prohibited encode for inf")
+    test:ok(not pcall(s.encode, minf), "prohibited encode for minf")
+    test:ok(not pcall(s.encode, nan), "prohibited encode for nan")
+
+    msgpack.cfg({decode_invalid_numbers = false})
+    test:ok(not pcall(s.decode, inf), "prohibited decode for inf")
+    test:ok(not pcall(s.decode, minf), "prohibited decode for minf")
+    test:ok(not pcall(s.decode, nan), "prohibited decode for nan")
+
+    -- invalid numbers can also be encoded as nil values
+    msgpack.cfg({decode_invalid_numbers = true, encode_invalid_as_nil = true})
+    test:is(s.decode(s.encode(inf)), nil, "invalid_as_nil for inf")
+    test:is(s.decode(s.encode(minf)), nil, "invalid_as_nil for minf")
+    test:is(s.decode(s.encode(nan)), nil, "invalid_as_nil for nan")
+
+    -- whether __serialize meta-value checking is enabled
+    local arr = setmetatable({1, 2, 3, k1 = 'v1', k2 = 'v2', 4, 5},
+        { __serialize = 'seq'})
+    local map = setmetatable({1, 2, 3, 4, 5}, { __serialize = 'map'})
+    local obj = setmetatable({}, {
+        __serialize = function(x) return 'serialize' end
+    })
+
+    -- testing encode metatables
+    msgpack.cfg{encode_load_metatables = false}
+    test:ok(is_map(s.encode(arr)), "array ignore __serialize")
+    test:ok(is_array(s.encode(map)), "map ignore __serialize")
+    test:ok(is_array(s.encode(obj)), "object ignore __serialize")
+
+    msgpack.cfg{encode_load_metatables = true}
+    test:ok(is_array(s.encode(arr)), "array load __serialize")
+    test:ok(is_map(s.encode(map)), "map load __serialize")
+    test:is(s.decode(s.encode(obj)), "serialize", "object load __serialize")
+
+    -- testing decode metatables
+    msgpack.cfg{decode_save_metatables = false}
+    test:isnil(getmetatable(s.decode(s.encode(arr))), "array __serialize")
+    test:isnil(getmetatable(s.decode(s.encode(map))), "map __serialize")
+
+    msgpack.cfg{decode_save_metatables = true}
+    test:is(getmetatable(s.decode(s.encode(arr))).__serialize, "seq",
+        "array save __serialize")
+    test:is(getmetatable(s.decode(s.encode(map))).__serialize, "map",
+        "map save __serialize")
+
+    -- applying default configs
+    msgpack.cfg({encode_sparse_convert = true, encode_invalid_numbers = true,
+    encode_invalid_as_nil = false})
+end
+
 local function test_other(test, s)
     test:plan(24)
     local buf = string.char(0x93, 0x6e, 0xcb, 0x42, 0x2b, 0xed, 0x30, 0x47,
@@ -82,7 +163,6 @@ local function test_other(test, s)
         while t ~= nil do level = level + 1 t = t[1] end
         return level
     end
-    local msgpack = require('msgpack')
     local deep_as_nil = msgpack.cfg.encode_deep_as_nil
     msgpack.cfg({encode_deep_as_nil = true})
     local max_depth = msgpack.cfg.encode_max_depth
@@ -118,7 +198,7 @@ end
 
 tap.test("msgpackffi", function(test)
     local serializer = require('msgpackffi')
-    test:plan(11)
+    test:plan(12)
     test:test("unsigned", common.test_unsigned, serializer)
     test:test("signed", common.test_signed, serializer)
     test:test("double", common.test_double, serializer)
@@ -130,6 +210,7 @@ tap.test("msgpackffi", function(test)
     -- udata/cdata hooks are not implemented
     --test:test("ucdata", common.test_ucdata, serializer)
     test:test("offsets", test_offsets, serializer)
+    test:test("configs", test_configs, serializer)
     test:test("other", test_other, serializer)
     test:test("decode_buffer", common.test_decode_buffer, serializer)
 end)
diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
index edbc15b72..133b2b527 100644
--- a/src/lua/msgpack.c
+++ b/src/lua/msgpack.c
@@ -95,7 +95,6 @@ luamp_decode_extension_default(struct lua_State *L, const char **data)
            (unsigned char) **data);
 }
 
-
 void
 luamp_set_decode_extension(luamp_decode_extension_f handler)
 {
-- 
2.24.0
> 
>>
>>Среда, 18 декабря 2019, 2:26 +03:00 от Alexander Turenko <alexander.turenko at tarantool.org>:
>> 
>>Sorry again for the late reply.
>>
>>> > I think we also should implement encode_sparse_* options to
>>> > be compatible with msgpack module.
>>>
>>> Could you please explain how do you propose to implement them? It
>>> seems to me the purpose of those configs is to handle excessively
>>> sparse arrays as maps (as stated in src/lua/utils.h). Which does not
>>> seem relevant to lua module where the only container type is table.
>>> What do I miss here? 
>>
>>It is about encoding, not decoding: encode a Lua table with numeric keys
>>as MP_ARRAY, MP_MAP or give an error. It seems that nothing prevent us
>>from implementing this logic on Lua.
>>
>>> Subject: [PATCH 1/2] Msgpackffi considers msgpack.cfg options now
>>> Msgpack.cfg options changes did not affect msgpackffi serializer state.
>>> This patch fixes it for options encode_load_metatables, decode_ and
>>> encode_invalid_numbers, encode_use_tostring and encode_invalid_as_nil.
>>> Closes #4499
>>> ---
>>> Issue:
>>>  https://github.com/tarantool/tarantool/issues/4499
>>> Branch:
>>>  https://github.com/tarantool/tarantool/compare/eljashm/gh-4499-msgpackffi-ignores-msgpack.cfg-options
>>
>>> @@ -261,6 +263,19 @@ local function encode_r(buf, obj, level)
>>>          else
>>>              error("Invalid __serialize value")
>>>          end
>>> +    elseif type(obj) == math.huge or type(obj) == -math.huge or 
>>> +        type(obj) == math.nan then
>>> +        if msgpack.cfg.encode_invalid_numbers then
>>> +            if obj == math.huge then
>>> +                obj = 1/0
>>> +            elseif obj == -math.huge then
>>> +                obj = -1/0
>>> +            else
>>> +                obj = 0/0
>>> +            end
>>> +        else
>>> +            encode_nil(buf)
>>> +        end
>>
>>I would save 1/0, -1/0, 0/0 as module local variables for readability,
>>like as it is done in test_double().
>>
>>I also found that math.huge is 1/0 (inf) and there is no math.nan (it
>>just nil).
>>
>>> @@ -562,6 +589,15 @@ decode_r = function(data)
>>>      elseif c >= 0xe0 then
>>>          return tonumber(ffi.cast('signed char',c)) -- negfixint
>>>      elseif c == 0xc0 then
>>> +        if msgpack.cfg.decode_invalid_numbers then
>>> +            if c == 0/0 then
>>> +                return math.nan
>>> +            elseif c == 1/0 then
>>> +                return math.huge
>>> +            elseif c == -1/0 then
>>> +                return -math.huge
>>> +            end
>>> +        end
>>
>>Same as above.
>>
>>> @@ -628,6 +664,8 @@ return {
>>>      on_encode = on_encode;
>>>      decode_unchecked = decode_unchecked;
>>>      decode = decode_unchecked; -- just for tests
>>> +    cfg = msgpack.cfg;
>>> +    new = msgpack.new; -- for serializer tests to work properly
>>
>>So they'll test msgpack, not msgpackffi. 
> 
> 
>--
>Maria Khaydich
> 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20200124/08583b71/attachment.html>


More information about the Tarantool-patches mailing list