From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 585162F225 for ; Mon, 20 May 2019 10:59:22 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id DrpjzIzN-4jM for ; Mon, 20 May 2019 10:59:22 -0400 (EDT) Received: from smtp49.i.mail.ru (smtp49.i.mail.ru [94.100.177.109]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 6FF982F100 for ; Mon, 20 May 2019 10:59:21 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH v2 2/2] buffer: replace all ffi.new(type[1]) with cached union From: Vladislav Shpilevoy References: <32785343e2fd5117bbcab000ef6f9fe16c79a56e.1558217352.git.v.shpilevoy@tarantool.org> Message-ID: <1191c320-cb91-8954-9f88-d7ecce1b41d1@tarantool.org> Date: Mon, 20 May 2019 17:59:17 +0300 MIME-Version: 1.0 In-Reply-To: <32785343e2fd5117bbcab000ef6f9fe16c79a56e.1558217352.git.v.shpilevoy@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: kostja@tarantool.org Kostja asked to replace buffer.scalar_array and buffer.scalar with buffer.reg1 and buffer.reg2. I did: diff --git a/src/lua/buffer.lua b/src/lua/buffer.lua index 7fd9f46ed..7467cb938 100644 --- a/src/lua/buffer.lua +++ b/src/lua/buffer.lua @@ -38,11 +38,11 @@ void * lua_static_aligned_alloc(size_t size, size_t alignment); /** - * Scalar is a buffer to use with FFI functions, which usually + * Register is a buffer to use with FFI functions, which usually * operate with pointers to scalar values like int, char, size_t, * void *. To avoid doing 'ffi.new([1])' on each such FFI * function invocation, a module can use one of attributes of the - * scalar union. + * register union. * * Naming policy of the attributes is easy to remember: * 'a' for array type + type name first letters + 'p' for pointer. @@ -52,7 +52,7 @@ lua_static_aligned_alloc(size_t size, size_t alignment); * - const unsigned char *[1] - * rray of onst nsigned har

pointer - acucp. */ -union scalar { +union c_register { size_t as[1]; void *ap[1]; int ai[1]; @@ -225,18 +225,20 @@ local function static_alloc(type, size) end -- --- Sometimes it is wanted to use several temporary scalar cdata --- values. Then one union object is not enough - its attributes --- share memory. +-- Sometimes it is wanted to use several temporary registers at +-- once. For example, when a C function takes 2 C pointers. Then +-- one register is not enough - its attributes share memory. Note, +-- registers are not allocated with separate ffi.new calls +-- deliberately. With a single allocation they fit into 1 cache +-- line, and reduce the heap fragmentation. -- -local scalar_array = ffi.new('union scalar[?]', 2) +local reg_array = ffi.new('union c_register[?]', 2) return { ibuf = ibuf_new; IBUF_SHARED = ffi.C.tarantool_lua_ibuf; READAHEAD = READAHEAD; static_alloc = static_alloc, - scalar_array = scalar_array, - -- Fast access when only one variable is needed. - scalar = scalar_array[0], + reg1 = reg_array[0], + reg2 = reg_array[1] } diff --git a/src/lua/crypto.lua b/src/lua/crypto.lua index 32107c0ce..d23ba8e3a 100644 --- a/src/lua/crypto.lua +++ b/src/lua/crypto.lua @@ -2,7 +2,7 @@ local ffi = require('ffi') local buffer = require('buffer') -local scalar = buffer.scalar +local reg = buffer.reg1 ffi.cdef[[ /* from openssl/err.h */ @@ -132,10 +132,10 @@ local function digest_final(self) return error('Digest not initialized') end self.initialized = false - if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, scalar.ai) ~= 1 then + if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, reg.ai) ~= 1 then return error('Can\'t finalize digest: ' .. openssl_err_str()) end - return ffi.string(self.buf.wpos, scalar.ai[0]) + return ffi.string(self.buf.wpos, reg.ai[0]) end local function digest_free(self) @@ -205,10 +205,10 @@ local function hmac_final(self) end self.initialized = false local buf = buffer.static_alloc('char', 64) - if ffi.C.HMAC_Final(self.ctx, buf, scalar.ai) ~= 1 then + if ffi.C.HMAC_Final(self.ctx, buf, reg.ai) ~= 1 then return error('Can\'t finalize HMAC: ' .. openssl_err_str()) end - return ffi.string(buf, scalar.ai[0]) + return ffi.string(buf, reg.ai[0]) end local function hmac_free(self) diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua index 1e4786707..bfeedbc4b 100644 --- a/src/lua/msgpackffi.lua +++ b/src/lua/msgpackffi.lua @@ -24,7 +24,7 @@ mp_decode_double(const char **data); ]]) local strict_alignment = (jit.arch == 'arm') -local scalar = buffer.scalar +local reg = buffer.reg1 local function bswap_u16(num) return bit.rshift(bit.bswap(tonumber(num)), 16) @@ -61,10 +61,10 @@ end local encode_u16 if strict_alignment then encode_u16 = function(buf, code, num) - scalar.u16 = bswap_u16(num) + reg.u16 = bswap_u16(num) local p = buf:alloc(3) p[0] = code - ffi.copy(p + 1, scalar, 2) + ffi.copy(p + 1, reg, 2) end else encode_u16 = function(buf, code, num) @@ -77,10 +77,10 @@ end local encode_u32 if strict_alignment then encode_u32 = function(buf, code, num) - scalar.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num))) + reg.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num))) local p = buf:alloc(5) p[0] = code - ffi.copy(p + 1, scalar, 4) + ffi.copy(p + 1, reg, 4) end else encode_u32 = function(buf, code, num) @@ -94,10 +94,10 @@ end local encode_u64 if strict_alignment then encode_u64 = function(buf, code, num) - scalar.u64 = bit.bswap(ffi.cast('uint64_t', num)) + reg.u64 = bit.bswap(ffi.cast('uint64_t', num)) local p = buf:alloc(9) p[0] = code - ffi.copy(p + 1, scalar, 8) + ffi.copy(p + 1, reg, 8) end else encode_u64 = function(buf, code, num) @@ -314,9 +314,9 @@ end local decode_u16 if strict_alignment then decode_u16 = function(data) - ffi.copy(scalar, data[0], 2) + ffi.copy(reg, data[0], 2) data[0] = data[0] + 2 - return tonumber(bswap_u16(scalar.u16)) + return tonumber(bswap_u16(reg.u16)) end else decode_u16 = function(data) @@ -329,10 +329,10 @@ end local decode_u32 if strict_alignment then decode_u32 = function(data) - ffi.copy(scalar, data[0], 4) + ffi.copy(reg, data[0], 4) data[0] = data[0] + 4 return tonumber( - ffi.cast('uint32_t', bit.bswap(tonumber(scalar.u32)))) + ffi.cast('uint32_t', bit.bswap(tonumber(reg.u32)))) end else decode_u32 = function(data) @@ -346,9 +346,9 @@ end local decode_u64 if strict_alignment then decode_u64 = function(data) - ffi.copy(scalar, data[0], 8); + ffi.copy(reg, data[0], 8); data[0] = data[0] + 8 - local num = bit.bswap(scalar.u64) + local num = bit.bswap(reg.u64) if num <= DBL_INT_MAX then return tonumber(num) -- return as 'number' end @@ -375,8 +375,8 @@ end local decode_i16 if strict_alignment then decode_i16 = function(data) - ffi.copy(scalar, data[0], 2) - local num = bswap_u16(scalar.u16) + ffi.copy(reg, data[0], 2) + local num = bswap_u16(reg.u16) data[0] = data[0] + 2 -- note: this double cast is actually necessary return tonumber(ffi.cast('int16_t', ffi.cast('uint16_t', num))) @@ -393,8 +393,8 @@ end local decode_i32 if strict_alignment then decode_i32 = function(data) - ffi.copy(scalar, data[0], 4) - local num = bit.bswap(tonumber(scalar.u32)) + ffi.copy(reg, data[0], 4) + local num = bit.bswap(tonumber(reg.u32)) data[0] = data[0] + 4 return num end @@ -409,9 +409,9 @@ end local decode_i64 if strict_alignment then decode_i64 = function(data) - ffi.copy(scalar, data[0], 8) + ffi.copy(reg, data[0], 8) data[0] = data[0] + 8 - local num = bit.bswap(scalar.i64) + local num = bit.bswap(reg.i64) if num >= -DBL_INT_MAX and num <= DBL_INT_MAX then return tonumber(num) -- return as 'number' end @@ -542,7 +542,7 @@ end -- element. It is significally faster on LuaJIT to use double pointer than -- return result, newpos. -- -local bufp = scalar.acucp; +local bufp = reg.acucp; local function check_offset(offset, len) if offset == nil then diff --git a/src/lua/socket.lua b/src/lua/socket.lua index 1ddafee2f..ad894c0fb 100644 --- a/src/lua/socket.lua +++ b/src/lua/socket.lua @@ -10,8 +10,8 @@ local fiber = require('fiber') local fio = require('fio') local log = require('log') local buffer = require('buffer') -local scalar = buffer.scalar -local scalar_array = buffer.scalar_array +local reg1 = buffer.reg1 +local reg2 = buffer.reg2 local static_alloc = buffer.static_alloc local format = string.format @@ -475,9 +475,9 @@ local function socket_setsockopt(self, level, name, value) end if info.type == 1 then - scalar.ai[0] = value + reg1.ai[0] = value local res = ffi.C.setsockopt(fd, - level, info.iname, scalar.ai, ffi.sizeof('int')) + level, info.iname, reg1.ai, ffi.sizeof('int')) if res < 0 then self._errno = boxerrno() @@ -519,9 +519,9 @@ local function socket_getsockopt(self, level, name) self._errno = nil if info.type == 1 then - local value = scalar_array[0].ai + local value = reg1.ai value[0] = 0 - local len = scalar_array[1].as + local len = reg2.as len[0] = ffi.sizeof('int') local res = ffi.C.getsockopt(fd, level, info.iname, value, len) @@ -538,7 +538,7 @@ local function socket_getsockopt(self, level, name) if info.type == 2 then local value = static_alloc('char', 256) - local len = scalar.as + local len = reg1.as len[0] = 256 local res = ffi.C.getsockopt(fd, level, info.iname, value, len) if res < 0 then @@ -562,8 +562,8 @@ local function socket_linger(self, active, timeout) self._errno = nil if active == nil then local value = static_alloc('linger_t') - local len = scalar.as - len[0] = 2 * ffi.sizeof('linger_t') + local len = reg1.as + len[0] = ffi.sizeof('linger_t') local res = ffi.C.getsockopt(fd, level, info.iname, value, len) if res < 0 then self._errno = boxerrno() @@ -806,7 +806,7 @@ local function get_recv_size(self, size) -- them using message peek. local iflags = get_iflags(internal.SEND_FLAGS, {'MSG_PEEK'}) assert(iflags ~= nil) - size = tonumber(ffi.C.recv(fd, scalar.ac, 1, iflags)) + size = tonumber(ffi.C.recv(fd, reg1.ac, 1, iflags)) -- Prevent race condition: proceed with the case when -- a datagram of length > 0 has been arrived after the -- getsockopt call above. diff --git a/src/lua/string.lua b/src/lua/string.lua index 6ed6be8bf..6e12c59ae 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -15,8 +15,8 @@ ffi.cdef[[ ]] local c_char_ptr = ffi.typeof('const char *') -local strip_newstart = buffer.scalar_array[0].aul -local strip_newlen = buffer.scalar_array[1].aul +local strip_newstart = buffer.reg1.aul +local strip_newlen = buffer.reg2.aul local memcmp = ffi.C.memcmp local memmem = ffi.C.memmem diff --git a/test/app/buffer.result b/test/app/buffer.result index 10494b812..beccc5a87 100644 --- a/test/app/buffer.result +++ b/test/app/buffer.result @@ -7,17 +7,17 @@ buffer = require('buffer') ffi = require('ffi') --- ... --- Scalar. -scalar = buffer.scalar +-- Registers. +reg1 = buffer.reg1 --- ... -scalar.u16 = 100 +reg1.u16 = 100 --- ... u16 = ffi.new('uint16_t[1]') --- ... -ffi.copy(u16, scalar, 2) +ffi.copy(u16, reg1, 2) --- ... u16[0] @@ -27,10 +27,10 @@ u16[0] u16[0] = 200 --- ... -ffi.copy(scalar, u16, 2) +ffi.copy(reg1, u16, 2) --- ... -scalar.u16 +reg1.u16 --- - 200 ... diff --git a/test/app/buffer.test.lua b/test/app/buffer.test.lua index 6b1a974ca..a1c380680 100644 --- a/test/app/buffer.test.lua +++ b/test/app/buffer.test.lua @@ -2,16 +2,16 @@ test_run = require('test_run').new() buffer = require('buffer') ffi = require('ffi') --- Scalar. -scalar = buffer.scalar -scalar.u16 = 100 +-- Registers. +reg1 = buffer.reg1 +reg1.u16 = 100 u16 = ffi.new('uint16_t[1]') -ffi.copy(u16, scalar, 2) +ffi.copy(u16, reg1, 2) u16[0] u16[0] = 200 -ffi.copy(scalar, u16, 2) -scalar.u16 +ffi.copy(reg1, u16, 2) +reg1.u16 -- Alignment. _ = buffer.static_alloc('char') -- This makes buffer pos unaligned.