[Tarantool-patches] [PATCH 15/15] buffer: remove Lua registers
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Thu Mar 25 00:24:27 MSK 2021
Lua buffer module used to have a couple of preallocated objects of
type 'union c_register'. It was a bunch of C scalar and array
types intended for use instead of ffi.new() where it was needed to
allocate a temporary object like 'int[1]' just to be able to pass
'int *' into a C function via FFI.
It was a bit faster than ffi.new() even for small sizes. For
instance (when JIT works), getting a register to use it as
'int[1]' cost around 0.2-0.3 ns while ffi.new('int[1]') costs
around 0.4 ns. Also the code looked cleaner.
But Lua registers were global and therefore had the same issue as
IBUF_SHARED and static_alloc() in Lua - no ownership, and sudden
reuse when GC starts right the register is still in use in some
Lua code. __gc handlers could wipe the register values making the
original code behave unpredictably.
IBUF_SHARED was fixed by proper ownership implementation, but it
is not necessary with Lua registers. It could be done with the
buffer.ffi_stash_new() feature, but its performance is about 0.8
ns which is worse than plain ffi.new() for simple scalar types.
This patch eliminates Lua registers, and uses ffi.new() instead
everywhere.
Closes #5632
(cherry picked from commit 911ca60e202986ea283341bb31bfd7a7a5688559)
---
changelogs/unreleased/fix-ibuf-static.md | 7 ++++
src/lua/msgpackffi.lua | 39 +++++++++---------
test/app-tap/gh-5632-gc-buf-reuse.test.lua | 46 +++++++++++++++++++++-
3 files changed, 72 insertions(+), 20 deletions(-)
create mode 100644 changelogs/unreleased/fix-ibuf-static.md
diff --git a/changelogs/unreleased/fix-ibuf-static.md b/changelogs/unreleased/fix-ibuf-static.md
new file mode 100644
index 000000000..34450b85d
--- /dev/null
+++ b/changelogs/unreleased/fix-ibuf-static.md
@@ -0,0 +1,7 @@
+## bugfix/core
+
+* Extensive usage of `uri` and `uuid` modules with debug log level could lead to
+ a crash or corrupted result of the functions from these modules. Also their
+ usage from the callbacks passed to `ffi.gc()` could lead to the same but much
+ easier. The same could happen with some functions from the modules `fio`,
+ `box.tuple`, `iconv` (gh-5632).
diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua
index ad7998ed1..b07f0e7f0 100644
--- a/src/lua/msgpackffi.lua
+++ b/src/lua/msgpackffi.lua
@@ -31,11 +31,6 @@ union tmpint {
local strict_alignment = (jit.arch == 'arm')
-local tmpint
-if strict_alignment then
- tmpint = ffi.new('union tmpint[1]')
-end
-
local function bswap_u16(num)
return bit.rshift(bit.bswap(tonumber(num)), 16)
end
@@ -71,7 +66,7 @@ end
local encode_u16
if strict_alignment then
encode_u16 = function(buf, code, num)
- tmpint[0].u16 = bswap_u16(num)
+ local tmpint = ffi.new('uint16_t[1]', bswap_u16(num))
local p = buf:alloc(3)
p[0] = code
ffi.copy(p + 1, tmpint, 2)
@@ -87,8 +82,9 @@ end
local encode_u32
if strict_alignment then
encode_u32 = function(buf, code, num)
- tmpint[0].u32 =
- ffi.cast('uint32_t', bit.bswap(tonumber(num)))
+ local tmpint =
+ ffi.new('uint32_t[1]', ffi.cast('uint32_t',
+ bit.bswap(tonumber(num))))
local p = buf:alloc(5)
p[0] = code
ffi.copy(p + 1, tmpint, 4)
@@ -105,7 +101,8 @@ end
local encode_u64
if strict_alignment then
encode_u64 = function(buf, code, num)
- tmpint[0].u64 = bit.bswap(ffi.cast('uint64_t', num))
+ local tmpint =
+ ffi.new('uint64_t[1]', bit.bswap(ffi.cast('uint64_t', num)))
local p = buf:alloc(9)
p[0] = code
ffi.copy(p + 1, tmpint, 8)
@@ -328,9 +325,10 @@ end
local decode_u16
if strict_alignment then
decode_u16 = function(data)
+ local tmpint = ffi.new('uint16_t[1]')
ffi.copy(tmpint, data[0], 2)
data[0] = data[0] + 2
- return tonumber(bswap_u16(tmpint[0].u16))
+ return tonumber(bswap_u16(tmpint[0]))
end
else
decode_u16 = function(data)
@@ -343,10 +341,11 @@ end
local decode_u32
if strict_alignment then
decode_u32 = function(data)
+ local tmpint = ffi.new('uint32_t[1]')
ffi.copy(tmpint, data[0], 4)
data[0] = data[0] + 4
return tonumber(
- ffi.cast('uint32_t', bit.bswap(tonumber(tmpint[0].u32))))
+ ffi.cast('uint32_t', bit.bswap(tonumber(tmpint[0]))))
end
else
decode_u32 = function(data)
@@ -360,9 +359,10 @@ end
local decode_u64
if strict_alignment then
decode_u64 = function(data)
+ local tmpint = ffi.new('uint64_t[1]')
ffi.copy(tmpint, data[0], 8);
data[0] = data[0] + 8
- local num = bit.bswap(tmpint[0].u64)
+ local num = bit.bswap(tmpint[0])
if num <= DBL_INT_MAX then
return tonumber(num) -- return as 'number'
end
@@ -389,8 +389,9 @@ end
local decode_i16
if strict_alignment then
decode_i16 = function(data)
+ local tmpint = ffi.new('uint16_t[1]')
ffi.copy(tmpint, data[0], 2)
- local num = bswap_u16(tmpint[0].u16)
+ local num = bswap_u16(tmpint[0])
data[0] = data[0] + 2
-- note: this double cast is actually necessary
return tonumber(ffi.cast('int16_t', ffi.cast('uint16_t', num)))
@@ -407,8 +408,9 @@ end
local decode_i32
if strict_alignment then
decode_i32 = function(data)
+ local tmpint = ffi.new('uint32_t[1]')
ffi.copy(tmpint, data[0], 4)
- local num = bit.bswap(tonumber(tmpint[0].u32))
+ local num = bit.bswap(tonumber(tmpint[0]))
data[0] = data[0] + 4
return num
end
@@ -423,9 +425,10 @@ end
local decode_i64
if strict_alignment then
decode_i64 = function(data)
+ local tmpint = ffi.new('int64_t[1]')
ffi.copy(tmpint, data[0], 8)
data[0] = data[0] + 8
- local num = bit.bswap(ffi.cast('int64_t', tmpint[0].u64))
+ local num = bit.bswap(tmpint[0])
if num >= -DBL_INT_MAX and num <= DBL_INT_MAX then
return tonumber(num) -- return as 'number'
end
@@ -550,13 +553,11 @@ decode_r = function(data)
end
---
--- A temporary const char ** buffer.
-- All decode_XXX functions accept const char **data as its first argument,
-- like libmsgpuck does. After decoding data[0] position is changed to the next
-- element. It is significally faster on LuaJIT to use double pointer than
-- return result, newpos.
--
-local bufp = ffi.new('const unsigned char *[1]');
local function check_offset(offset, len)
if offset == nil then
@@ -576,13 +577,13 @@ local function decode_unchecked(str, offset)
if type(str) == "string" then
offset = check_offset(offset, #str)
local buf = ffi.cast(char_ptr_t, str)
- bufp[0] = buf + offset - 1
+ local bufp = ffi.new('const unsigned char *[1]', buf + offset - 1)
local r = decode_r(bufp)
return r, bufp[0] - buf + 1
elseif ffi.istype(char_ptr_t, str) then
-- Note: ffi.istype() ignores the const qualifier, so both
-- (char *) and (const char *) buffers are valid.
- bufp[0] = str
+ local bufp = ffi.new('const unsigned char *[1]', str)
local r = decode_r(bufp)
return r, ffi.cast(ffi.typeof(str), bufp[0])
else
diff --git a/test/app-tap/gh-5632-gc-buf-reuse.test.lua b/test/app-tap/gh-5632-gc-buf-reuse.test.lua
index 81dafd36e..6efddb714 100755
--- a/test/app-tap/gh-5632-gc-buf-reuse.test.lua
+++ b/test/app-tap/gh-5632-gc-buf-reuse.test.lua
@@ -11,6 +11,7 @@ local tap = require('tap')
local ffi = require('ffi')
local uuid = require('uuid')
local uri = require('uri')
+local msgpackffi = require('msgpackffi')
local function test_uuid(test)
test:plan(1)
@@ -99,9 +100,52 @@ local function test_uri(test)
test:ok(is_success, 'uri in gc')
end
+local function test_msgpackffi(test)
+ test:plan(1)
+
+ local mp_encode = msgpackffi.encode
+ local mp_decode = msgpackffi.decode
+ local gc_count = 100
+ local iter_count = 1000
+ local is_success = true
+ local data = {0, 1, 1000, 100000000, 'str', true, 1.1}
+
+ local function do_encode()
+ if not is_success then
+ return
+ end
+ local t = mp_encode(data)
+ t = mp_decode(t)
+ if #t ~= #data then
+ is_success = false
+ return
+ end
+ for i = 1, #t do
+ if t[i] ~= data[i] then
+ is_success = false
+ return
+ end
+ end
+ end
+
+ local function create_gc()
+ for _ = 1, gc_count do
+ ffi.gc(ffi.new('char[1]'), do_encode)
+ end
+ end
+
+ for _ = 1, iter_count do
+ create_gc()
+ do_encode()
+ end
+
+ test:ok(is_success, 'msgpackffi in gc')
+end
+
local test = tap.test('gh-5632-gc-buf-reuse')
-test:plan(2)
+test:plan(3)
test:test('uuid in __gc', test_uuid)
test:test('uri in __gc', test_uri)
+test:test('msgpackffi in __gc', test_msgpackffi)
os.exit(test:check() and 0 or 1)
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list