[tarantool-patches] Re: [PATCH v2 2/2] buffer: replace all ffi.new(type[1]) with cached union
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Mon May 20 17:59:17 MSK 2019
Kostja asked to replace buffer.scalar_array and buffer.scalar
with buffer.reg1 and buffer.reg2. I did:
diff --git a/src/lua/buffer.lua b/src/lua/buffer.lua
index 7fd9f46ed..7467cb938 100644
--- a/src/lua/buffer.lua
+++ b/src/lua/buffer.lua
@@ -38,11 +38,11 @@ void *
lua_static_aligned_alloc(size_t size, size_t alignment);
/**
- * Scalar is a buffer to use with FFI functions, which usually
+ * Register is a buffer to use with FFI functions, which usually
* operate with pointers to scalar values like int, char, size_t,
* void *. To avoid doing 'ffi.new(<type>[1])' on each such FFI
* function invocation, a module can use one of attributes of the
- * scalar union.
+ * register union.
*
* Naming policy of the attributes is easy to remember:
* 'a' for array type + type name first letters + 'p' for pointer.
@@ -52,7 +52,7 @@ lua_static_aligned_alloc(size_t size, size_t alignment);
* - const unsigned char *[1] -
* <a>rray of <c>onst <u>nsigned <c>har <p> pointer - acucp.
*/
-union scalar {
+union c_register {
size_t as[1];
void *ap[1];
int ai[1];
@@ -225,18 +225,20 @@ local function static_alloc(type, size)
end
--
--- Sometimes it is wanted to use several temporary scalar cdata
--- values. Then one union object is not enough - its attributes
--- share memory.
+-- Sometimes it is wanted to use several temporary registers at
+-- once. For example, when a C function takes 2 C pointers. Then
+-- one register is not enough - its attributes share memory. Note,
+-- registers are not allocated with separate ffi.new calls
+-- deliberately. With a single allocation they fit into 1 cache
+-- line, and reduce the heap fragmentation.
--
-local scalar_array = ffi.new('union scalar[?]', 2)
+local reg_array = ffi.new('union c_register[?]', 2)
return {
ibuf = ibuf_new;
IBUF_SHARED = ffi.C.tarantool_lua_ibuf;
READAHEAD = READAHEAD;
static_alloc = static_alloc,
- scalar_array = scalar_array,
- -- Fast access when only one variable is needed.
- scalar = scalar_array[0],
+ reg1 = reg_array[0],
+ reg2 = reg_array[1]
}
diff --git a/src/lua/crypto.lua b/src/lua/crypto.lua
index 32107c0ce..d23ba8e3a 100644
--- a/src/lua/crypto.lua
+++ b/src/lua/crypto.lua
@@ -2,7 +2,7 @@
local ffi = require('ffi')
local buffer = require('buffer')
-local scalar = buffer.scalar
+local reg = buffer.reg1
ffi.cdef[[
/* from openssl/err.h */
@@ -132,10 +132,10 @@ local function digest_final(self)
return error('Digest not initialized')
end
self.initialized = false
- if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, scalar.ai) ~= 1 then
+ if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, reg.ai) ~= 1 then
return error('Can\'t finalize digest: ' .. openssl_err_str())
end
- return ffi.string(self.buf.wpos, scalar.ai[0])
+ return ffi.string(self.buf.wpos, reg.ai[0])
end
local function digest_free(self)
@@ -205,10 +205,10 @@ local function hmac_final(self)
end
self.initialized = false
local buf = buffer.static_alloc('char', 64)
- if ffi.C.HMAC_Final(self.ctx, buf, scalar.ai) ~= 1 then
+ if ffi.C.HMAC_Final(self.ctx, buf, reg.ai) ~= 1 then
return error('Can\'t finalize HMAC: ' .. openssl_err_str())
end
- return ffi.string(buf, scalar.ai[0])
+ return ffi.string(buf, reg.ai[0])
end
local function hmac_free(self)
diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua
index 1e4786707..bfeedbc4b 100644
--- a/src/lua/msgpackffi.lua
+++ b/src/lua/msgpackffi.lua
@@ -24,7 +24,7 @@ mp_decode_double(const char **data);
]])
local strict_alignment = (jit.arch == 'arm')
-local scalar = buffer.scalar
+local reg = buffer.reg1
local function bswap_u16(num)
return bit.rshift(bit.bswap(tonumber(num)), 16)
@@ -61,10 +61,10 @@ end
local encode_u16
if strict_alignment then
encode_u16 = function(buf, code, num)
- scalar.u16 = bswap_u16(num)
+ reg.u16 = bswap_u16(num)
local p = buf:alloc(3)
p[0] = code
- ffi.copy(p + 1, scalar, 2)
+ ffi.copy(p + 1, reg, 2)
end
else
encode_u16 = function(buf, code, num)
@@ -77,10 +77,10 @@ end
local encode_u32
if strict_alignment then
encode_u32 = function(buf, code, num)
- scalar.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num)))
+ reg.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num)))
local p = buf:alloc(5)
p[0] = code
- ffi.copy(p + 1, scalar, 4)
+ ffi.copy(p + 1, reg, 4)
end
else
encode_u32 = function(buf, code, num)
@@ -94,10 +94,10 @@ end
local encode_u64
if strict_alignment then
encode_u64 = function(buf, code, num)
- scalar.u64 = bit.bswap(ffi.cast('uint64_t', num))
+ reg.u64 = bit.bswap(ffi.cast('uint64_t', num))
local p = buf:alloc(9)
p[0] = code
- ffi.copy(p + 1, scalar, 8)
+ ffi.copy(p + 1, reg, 8)
end
else
encode_u64 = function(buf, code, num)
@@ -314,9 +314,9 @@ end
local decode_u16
if strict_alignment then
decode_u16 = function(data)
- ffi.copy(scalar, data[0], 2)
+ ffi.copy(reg, data[0], 2)
data[0] = data[0] + 2
- return tonumber(bswap_u16(scalar.u16))
+ return tonumber(bswap_u16(reg.u16))
end
else
decode_u16 = function(data)
@@ -329,10 +329,10 @@ end
local decode_u32
if strict_alignment then
decode_u32 = function(data)
- ffi.copy(scalar, data[0], 4)
+ ffi.copy(reg, data[0], 4)
data[0] = data[0] + 4
return tonumber(
- ffi.cast('uint32_t', bit.bswap(tonumber(scalar.u32))))
+ ffi.cast('uint32_t', bit.bswap(tonumber(reg.u32))))
end
else
decode_u32 = function(data)
@@ -346,9 +346,9 @@ end
local decode_u64
if strict_alignment then
decode_u64 = function(data)
- ffi.copy(scalar, data[0], 8);
+ ffi.copy(reg, data[0], 8);
data[0] = data[0] + 8
- local num = bit.bswap(scalar.u64)
+ local num = bit.bswap(reg.u64)
if num <= DBL_INT_MAX then
return tonumber(num) -- return as 'number'
end
@@ -375,8 +375,8 @@ end
local decode_i16
if strict_alignment then
decode_i16 = function(data)
- ffi.copy(scalar, data[0], 2)
- local num = bswap_u16(scalar.u16)
+ ffi.copy(reg, data[0], 2)
+ local num = bswap_u16(reg.u16)
data[0] = data[0] + 2
-- note: this double cast is actually necessary
return tonumber(ffi.cast('int16_t', ffi.cast('uint16_t', num)))
@@ -393,8 +393,8 @@ end
local decode_i32
if strict_alignment then
decode_i32 = function(data)
- ffi.copy(scalar, data[0], 4)
- local num = bit.bswap(tonumber(scalar.u32))
+ ffi.copy(reg, data[0], 4)
+ local num = bit.bswap(tonumber(reg.u32))
data[0] = data[0] + 4
return num
end
@@ -409,9 +409,9 @@ end
local decode_i64
if strict_alignment then
decode_i64 = function(data)
- ffi.copy(scalar, data[0], 8)
+ ffi.copy(reg, data[0], 8)
data[0] = data[0] + 8
- local num = bit.bswap(scalar.i64)
+ local num = bit.bswap(reg.i64)
if num >= -DBL_INT_MAX and num <= DBL_INT_MAX then
return tonumber(num) -- return as 'number'
end
@@ -542,7 +542,7 @@ end
-- element. It is significally faster on LuaJIT to use double pointer than
-- return result, newpos.
--
-local bufp = scalar.acucp;
+local bufp = reg.acucp;
local function check_offset(offset, len)
if offset == nil then
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 1ddafee2f..ad894c0fb 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -10,8 +10,8 @@ local fiber = require('fiber')
local fio = require('fio')
local log = require('log')
local buffer = require('buffer')
-local scalar = buffer.scalar
-local scalar_array = buffer.scalar_array
+local reg1 = buffer.reg1
+local reg2 = buffer.reg2
local static_alloc = buffer.static_alloc
local format = string.format
@@ -475,9 +475,9 @@ local function socket_setsockopt(self, level, name, value)
end
if info.type == 1 then
- scalar.ai[0] = value
+ reg1.ai[0] = value
local res = ffi.C.setsockopt(fd,
- level, info.iname, scalar.ai, ffi.sizeof('int'))
+ level, info.iname, reg1.ai, ffi.sizeof('int'))
if res < 0 then
self._errno = boxerrno()
@@ -519,9 +519,9 @@ local function socket_getsockopt(self, level, name)
self._errno = nil
if info.type == 1 then
- local value = scalar_array[0].ai
+ local value = reg1.ai
value[0] = 0
- local len = scalar_array[1].as
+ local len = reg2.as
len[0] = ffi.sizeof('int')
local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
@@ -538,7 +538,7 @@ local function socket_getsockopt(self, level, name)
if info.type == 2 then
local value = static_alloc('char', 256)
- local len = scalar.as
+ local len = reg1.as
len[0] = 256
local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
if res < 0 then
@@ -562,8 +562,8 @@ local function socket_linger(self, active, timeout)
self._errno = nil
if active == nil then
local value = static_alloc('linger_t')
- local len = scalar.as
- len[0] = 2 * ffi.sizeof('linger_t')
+ local len = reg1.as
+ len[0] = ffi.sizeof('linger_t')
local res = ffi.C.getsockopt(fd, level, info.iname, value, len)
if res < 0 then
self._errno = boxerrno()
@@ -806,7 +806,7 @@ local function get_recv_size(self, size)
-- them using message peek.
local iflags = get_iflags(internal.SEND_FLAGS, {'MSG_PEEK'})
assert(iflags ~= nil)
- size = tonumber(ffi.C.recv(fd, scalar.ac, 1, iflags))
+ size = tonumber(ffi.C.recv(fd, reg1.ac, 1, iflags))
-- Prevent race condition: proceed with the case when
-- a datagram of length > 0 has been arrived after the
-- getsockopt call above.
diff --git a/src/lua/string.lua b/src/lua/string.lua
index 6ed6be8bf..6e12c59ae 100644
--- a/src/lua/string.lua
+++ b/src/lua/string.lua
@@ -15,8 +15,8 @@ ffi.cdef[[
]]
local c_char_ptr = ffi.typeof('const char *')
-local strip_newstart = buffer.scalar_array[0].aul
-local strip_newlen = buffer.scalar_array[1].aul
+local strip_newstart = buffer.reg1.aul
+local strip_newlen = buffer.reg2.aul
local memcmp = ffi.C.memcmp
local memmem = ffi.C.memmem
diff --git a/test/app/buffer.result b/test/app/buffer.result
index 10494b812..beccc5a87 100644
--- a/test/app/buffer.result
+++ b/test/app/buffer.result
@@ -7,17 +7,17 @@ buffer = require('buffer')
ffi = require('ffi')
---
...
--- Scalar.
-scalar = buffer.scalar
+-- Registers.
+reg1 = buffer.reg1
---
...
-scalar.u16 = 100
+reg1.u16 = 100
---
...
u16 = ffi.new('uint16_t[1]')
---
...
-ffi.copy(u16, scalar, 2)
+ffi.copy(u16, reg1, 2)
---
...
u16[0]
@@ -27,10 +27,10 @@ u16[0]
u16[0] = 200
---
...
-ffi.copy(scalar, u16, 2)
+ffi.copy(reg1, u16, 2)
---
...
-scalar.u16
+reg1.u16
---
- 200
...
diff --git a/test/app/buffer.test.lua b/test/app/buffer.test.lua
index 6b1a974ca..a1c380680 100644
--- a/test/app/buffer.test.lua
+++ b/test/app/buffer.test.lua
@@ -2,16 +2,16 @@ test_run = require('test_run').new()
buffer = require('buffer')
ffi = require('ffi')
--- Scalar.
-scalar = buffer.scalar
-scalar.u16 = 100
+-- Registers.
+reg1 = buffer.reg1
+reg1.u16 = 100
u16 = ffi.new('uint16_t[1]')
-ffi.copy(u16, scalar, 2)
+ffi.copy(u16, reg1, 2)
u16[0]
u16[0] = 200
-ffi.copy(scalar, u16, 2)
-scalar.u16
+ffi.copy(reg1, u16, 2)
+reg1.u16
-- Alignment.
_ = buffer.static_alloc('char') -- This makes buffer pos unaligned.
More information about the Tarantool-patches
mailing list