[tarantool-patches] [PATCH v2 2/2] buffer: replace all ffi.new(type[1]) with cached union

Sun May 19 01:15:09 MSK 2019

Lua, which suffers from lack of ability to pass values by
pointers into FFI functions, nor has an address operator '&' to
take an address of integer or char or anything. Because of that
a user need to either use ffi.new(type[1]) or use static buffer,
bur for such small allocations they are both too expensive and
aggravate GC problem.

Now buffer module provides preallocated basic types to use in FFI
functions. The commit is motivated by one another place where
ffi.new('int[1]') appeared, in SWIM module, to obtain payload
size as an out parameter of a C function.
---
 src/lua/buffer.lua       | 38 ++++++++++++++++++++++++++++++
 src/lua/crypto.lua       | 22 ++++++++----------
 src/lua/msgpackffi.lua   | 50 ++++++++++++++++------------------------
 src/lua/socket.lua       | 25 ++++++++++++--------
 src/lua/string.lua       |  4 ++--
 test/app/buffer.result   | 27 ++++++++++++++++++++++
 test/app/buffer.test.lua | 11 +++++++++
 7 files changed, 123 insertions(+), 54 deletions(-)

diff --git a/src/lua/buffer.lua b/src/lua/buffer.lua
index 5e9209387..7fd9f46ed 100644
--- a/src/lua/buffer.lua
+++ b/src/lua/buffer.lua
@@ -36,6 +36,34 @@ ibuf_reserve_slow(struct ibuf *ibuf, size_t size);
 
 void *
 lua_static_aligned_alloc(size_t size, size_t alignment);
+
+/**
+ * Scalar is a buffer to use with FFI functions, which usually
+ * operate with pointers to scalar values like int, char, size_t,
+ * void *. To avoid doing 'ffi.new(<type>[1])' on each such FFI
+ * function invocation, a module can use one of attributes of the
+ * scalar union.
+ *
+ * Naming policy of the attributes is easy to remember:
+ * 'a' for array type + type name first letters + 'p' for pointer.
+ *
+ * For example:
+ * - int[1] - <a>rray of <i>nt - ai;
+ * - const unsigned char *[1] -
+ *       <a>rray of <c>onst <u>nsigned <c>har <p> pointer - acucp.
+ */
+union scalar {
+    size_t as[1];
+    void *ap[1];
+    int ai[1];
+    char ac[1];
+    const unsigned char *acucp[1];
+    unsigned long aul[1];
+    uint16_t u16;
+    uint32_t u32;
+    uint64_t u64;
+    int64_t i64;
+};
 ]]
 
 local builtin = ffi.C
@@ -196,9 +224,19 @@ local function static_alloc(type, size)
     return ffi.new(type..'[?]', size)
 end
 
+--
+-- Sometimes it is wanted to use several temporary scalar cdata
+-- values. Then one union object is not enough - its attributes
+-- share memory.
+--
+local scalar_array = ffi.new('union scalar[?]', 2)
+
 return {
     ibuf = ibuf_new;
     IBUF_SHARED = ffi.C.tarantool_lua_ibuf;
     READAHEAD = READAHEAD;
     static_alloc = static_alloc,
+    scalar_array = scalar_array,
+    -- Fast access when only one variable is needed.
+    scalar = scalar_array[0],
 }
diff --git a/src/lua/crypto.lua b/src/lua/crypto.lua
index e76370517..7db6afac9 100644
--- a/src/lua/crypto.lua
+++ b/src/lua/crypto.lua
@@ -2,6 +2,7 @@
 
 local ffi = require('ffi')
 local buffer = require('buffer')
+local scalar = buffer.scalar
 
 ffi.cdef[[
     int tnt_openssl_init(void);
@@ -84,7 +85,6 @@ local function digest_new(digest)
         digest = digest,
         buf = buffer.ibuf(64),
         initialized = false,
-        outl = ffi.new('int[1]')
     }, digest_mt)
     self:init()
     return self
@@ -114,10 +114,10 @@ local function digest_final(self)
         return error('Digest not initialized')
     end
     self.initialized = false
-    if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, self.outl) ~= 1 then
+    if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, scalar.ai) ~= 1 then
         return error('Can\'t finalize digest: ' .. openssl_err_str())
     end
-    return ffi.string(self.buf.wpos, self.outl[0])
+    return ffi.string(self.buf.wpos, scalar.ai[0])
 end
 
 local function digest_free(self)
@@ -156,9 +156,7 @@ local function hmac_new(digest, key)
     local self = setmetatable({
         ctx = ctx,
         digest = digest,
-        buf = buffer.ibuf(64),
         initialized = false,
-        outl = ffi.new('int[1]')
     }, hmac_mt)
     self:init(key)
     return self
@@ -188,10 +186,11 @@ local function hmac_final(self)
         return error('HMAC not initialized')
     end
     self.initialized = false
-    if ffi.C.HMAC_Final(self.ctx, self.buf.wpos, self.outl) ~= 1 then
+    local buf = buffer.static_alloc('char', 64)
+    if ffi.C.HMAC_Final(self.ctx, buf, scalar.ai) ~= 1 then
         return error('Can\'t finalize HMAC: ' .. openssl_err_str())
     end
-    return ffi.string(self.buf.wpos, self.outl[0])
+    return ffi.string(buf, scalar.ai[0])
 end
 
 local function hmac_free(self)
@@ -254,7 +253,6 @@ local function cipher_new(cipher, key, iv, direction)
         direction = direction,
         buf = buffer.ibuf(),
         initialized = false,
-        outl = ffi.new('int[1]')
     }, cipher_mt)
     self:init(key, iv)
     return self
@@ -279,10 +277,10 @@ local function cipher_update(self, input)
         error("Usage: cipher:update(string)")
     end
     local wpos = self.buf:reserve(input:len() + self.block_size - 1)
-    if ffi.C.EVP_CipherUpdate(self.ctx, wpos, self.outl, input, input:len()) ~= 1 then
+    if ffi.C.EVP_CipherUpdate(self.ctx, wpos, scalar.ai, input, input:len()) ~= 1 then
         return error('Can\'t update cipher:' .. openssl_err_str())
     end
-    return ffi.string(wpos, self.outl[0])
+    return ffi.string(wpos, scalar.ai[0])
 end
 
 local function cipher_final(self)
@@ -291,11 +289,11 @@ local function cipher_final(self)
     end
     self.initialized = false
     local wpos = self.buf:reserve(self.block_size - 1)
-    if ffi.C.EVP_CipherFinal_ex(self.ctx, wpos, self.outl) ~= 1 then
+    if ffi.C.EVP_CipherFinal_ex(self.ctx, wpos, scalar.ai) ~= 1 then
         return error('Can\'t finalize cipher:' .. openssl_err_str())
     end
     self.initialized = false
-    return ffi.string(wpos, self.outl[0])
+    return ffi.string(wpos, scalar.ai[0])
 end
 
 local function cipher_free(self)
diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua
index 609334cff..1e4786707 100644
--- a/src/lua/msgpackffi.lua
+++ b/src/lua/msgpackffi.lua
@@ -21,19 +21,10 @@ float
 mp_decode_float(const char **data);
 double
 mp_decode_double(const char **data);
-union tmpint {
-    uint16_t u16;
-    uint32_t u32;
-    uint64_t u64;
-};
 ]])
 
 local strict_alignment = (jit.arch == 'arm')
-
-local tmpint
-if strict_alignment then
-   tmpint = ffi.new('union tmpint[1]')
-end
+local scalar = buffer.scalar
 
 local function bswap_u16(num)
     return bit.rshift(bit.bswap(tonumber(num)), 16)
@@ -70,10 +61,10 @@ end
 local encode_u16
 if strict_alignment then
     encode_u16 = function(buf, code, num)
-        tmpint[0].u16 = bswap_u16(num)
+        scalar.u16 = bswap_u16(num)
         local p = buf:alloc(3)
         p[0] = code
-        ffi.copy(p + 1, tmpint, 2)
+        ffi.copy(p + 1, scalar, 2)
     end
 else
     encode_u16 = function(buf, code, num)
@@ -86,11 +77,10 @@ end
 local encode_u32
 if strict_alignment then
     encode_u32 = function(buf, code, num)
-        tmpint[0].u32 =
-            ffi.cast('uint32_t', bit.bswap(tonumber(num)))
+        scalar.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num)))
         local p = buf:alloc(5)
         p[0] = code
-        ffi.copy(p + 1, tmpint, 4)
+        ffi.copy(p + 1, scalar, 4)
     end
 else
     encode_u32 = function(buf, code, num)
@@ -104,10 +94,10 @@ end
 local encode_u64
 if strict_alignment then
     encode_u64 = function(buf, code, num)
-        tmpint[0].u64 = bit.bswap(ffi.cast('uint64_t', num))
+        scalar.u64 = bit.bswap(ffi.cast('uint64_t', num))
         local p = buf:alloc(9)
         p[0] = code
-        ffi.copy(p + 1, tmpint, 8)
+        ffi.copy(p + 1, scalar, 8)
     end
 else
     encode_u64 = function(buf, code, num)
@@ -324,9 +314,9 @@ end
 local decode_u16
 if strict_alignment then
     decode_u16 = function(data)
-        ffi.copy(tmpint, data[0], 2)
+        ffi.copy(scalar, data[0], 2)
         data[0] = data[0] + 2
-        return tonumber(bswap_u16(tmpint[0].u16))
+        return tonumber(bswap_u16(scalar.u16))
     end
 else
     decode_u16 = function(data)
@@ -339,10 +329,10 @@ end
 local decode_u32
 if strict_alignment then
     decode_u32 = function(data)
-        ffi.copy(tmpint, data[0], 4)
+        ffi.copy(scalar, data[0], 4)
         data[0] = data[0] + 4
         return tonumber(
-            ffi.cast('uint32_t', bit.bswap(tonumber(tmpint[0].u32))))
+            ffi.cast('uint32_t', bit.bswap(tonumber(scalar.u32))))
     end
 else
     decode_u32 = function(data)
@@ -356,9 +346,9 @@ end
 local decode_u64
 if strict_alignment then
     decode_u64 = function(data)
-        ffi.copy(tmpint, data[0], 8);
+        ffi.copy(scalar, data[0], 8);
         data[0] = data[0] + 8
-        local num = bit.bswap(tmpint[0].u64)
+        local num = bit.bswap(scalar.u64)
         if num <= DBL_INT_MAX then
             return tonumber(num) -- return as 'number'
         end
@@ -385,8 +375,8 @@ end
 local decode_i16
 if strict_alignment then
     decode_i16 = function(data)
-        ffi.copy(tmpint, data[0], 2)
-        local num = bswap_u16(tmpint[0].u16)
+        ffi.copy(scalar, data[0], 2)
+        local num = bswap_u16(scalar.u16)
         data[0] = data[0] + 2
         -- note: this double cast is actually necessary
         return tonumber(ffi.cast('int16_t', ffi.cast('uint16_t', num)))
@@ -403,8 +393,8 @@ end
 local decode_i32
 if strict_alignment then
     decode_i32 = function(data)
-        ffi.copy(tmpint, data[0], 4)
-        local num = bit.bswap(tonumber(tmpint[0].u32))
+        ffi.copy(scalar, data[0], 4)
+        local num = bit.bswap(tonumber(scalar.u32))
         data[0] = data[0] + 4
         return num
     end
@@ -419,9 +409,9 @@ end
 local decode_i64
 if strict_alignment then
     decode_i64 = function(data)
-        ffi.copy(tmpint, data[0], 8)
+        ffi.copy(scalar, data[0], 8)
         data[0] = data[0] + 8
-        local num = bit.bswap(ffi.cast('int64_t', tmpint[0].u64))
+        local num = bit.bswap(scalar.i64)
         if num >= -DBL_INT_MAX and num <= DBL_INT_MAX then
             return tonumber(num) -- return as 'number'
         end
@@ -552,7 +542,7 @@ end
 -- element. It is significally faster on LuaJIT to use double pointer than
 -- return result, newpos.
 --
-local bufp = ffi.new('const unsigned char *[1]');
+local bufp = scalar.acucp;
 
 local function check_offset(offset, len)
     if offset == nil then
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index cbf8c0f29..1ddafee2f 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -10,6 +10,8 @@ local fiber = require('fiber')
 local fio = require('fio')
 local log = require('log')
 local buffer = require('buffer')
+local scalar = buffer.scalar
+local scalar_array = buffer.scalar_array
 local static_alloc = buffer.static_alloc
 
 local format = string.format
@@ -473,9 +475,9 @@ local function socket_setsockopt(self, level, name, value)
     end
 
     if info.type == 1 then
-        local value = ffi.new("int[1]", value)
+        scalar.ai[0] = value
         local res = ffi.C.setsockopt(fd,
-            level, info.iname, value, ffi.sizeof('int'))
+            level, info.iname, scalar.ai, ffi.sizeof('int'))
 
         if res < 0 then
             self._errno = boxerrno()
@@ -517,8 +519,10 @@ local function socket_getsockopt(self, level, name)
     self._errno = nil
 
     if info.type == 1 then
-        local value = ffi.new("int[1]", 0)
-        local len = ffi.new("size_t[1]", ffi.sizeof('int'))
+        local value = scalar_array[0].ai
+        value[0] = 0
+        local len = scalar_array[1].as
+        len[0] = ffi.sizeof('int')
         local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
 
         if res < 0 then
@@ -533,8 +537,9 @@ local function socket_getsockopt(self, level, name)
     end
 
     if info.type == 2 then
-        local value = ffi.new("char[256]", { 0 })
-        local len = ffi.new("size_t[1]", 256)
+        local value = static_alloc('char', 256)
+        local len = scalar.as
+        len[0] = 256
         local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
         if res < 0 then
             self._errno = boxerrno()
@@ -556,8 +561,9 @@ local function socket_linger(self, active, timeout)
     local info = internal.SO_OPT[level].SO_LINGER
     self._errno = nil
     if active == nil then
-        local value = ffi.new("linger_t[1]")
-        local len = ffi.new("size_t[1]", 2 * ffi.sizeof('int'))
+        local value = static_alloc('linger_t')
+        local len = scalar.as
+        len[0] = 2 * ffi.sizeof('linger_t')
         local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
         if res < 0 then
             self._errno = boxerrno()
@@ -800,8 +806,7 @@ local function get_recv_size(self, size)
             -- them using message peek.
             local iflags = get_iflags(internal.SEND_FLAGS, {'MSG_PEEK'})
             assert(iflags ~= nil)
-            local buf = ffi.new('char[?]', 1)
-            size = tonumber(ffi.C.recv(fd, buf, 1, iflags))
+            size = tonumber(ffi.C.recv(fd, scalar.ac, 1, iflags))
             -- Prevent race condition: proceed with the case when
             -- a datagram of length > 0 has been arrived after the
             -- getsockopt call above.
diff --git a/src/lua/string.lua b/src/lua/string.lua
index bb4adfc78..6ed6be8bf 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -15,8 +15,8 @@ ffi.cdef[[
 ]]
 
 local c_char_ptr     = ffi.typeof('const char *')
-local strip_newstart = ffi.new("unsigned long[1]")
-local strip_newlen   = ffi.new("unsigned long[1]")
+local strip_newstart = buffer.scalar_array[0].aul
+local strip_newlen   = buffer.scalar_array[1].aul
 
 local memcmp  = ffi.C.memcmp
 local memmem  = ffi.C.memmem
diff --git a/test/app/buffer.result b/test/app/buffer.result
index e0aad9bc7..10494b812 100644
--- a/test/app/buffer.result
+++ b/test/app/buffer.result
@@ -7,6 +7,33 @@ buffer = require('buffer')
 ffi = require('ffi')
 ---
 ...
+-- Scalar.
+scalar = buffer.scalar
+---
+...
+scalar.u16 = 100
+---
+...
+u16 = ffi.new('uint16_t[1]')
+---
+...
+ffi.copy(u16, scalar, 2)
+---
+...
+u16[0]
+---
+- 100
+...
+u16[0] = 200
+---
+...
+ffi.copy(scalar, u16, 2)
+---
+...
+scalar.u16
+---
+- 200
+...
 -- Alignment.
 _ = buffer.static_alloc('char') -- This makes buffer pos unaligned.
 ---
diff --git a/test/app/buffer.test.lua b/test/app/buffer.test.lua
index ba7299f33..6b1a974ca 100644
--- a/test/app/buffer.test.lua
+++ b/test/app/buffer.test.lua
@@ -2,6 +2,17 @@ test_run = require('test_run').new()
 buffer = require('buffer')
 ffi = require('ffi')
 
+-- Scalar.
+scalar = buffer.scalar
+scalar.u16 = 100
+u16 = ffi.new('uint16_t[1]')
+ffi.copy(u16, scalar, 2)
+u16[0]
+
+u16[0] = 200
+ffi.copy(scalar, u16, 2)
+scalar.u16
+
 -- Alignment.
 _ = buffer.static_alloc('char') -- This makes buffer pos unaligned.
 p = buffer.static_alloc('int')
-- 
2.20.1 (Apple Git-117)