* Re: [tarantool-patches] Re: [PATCH 1/1] netbox: fix wait_connected ignorance
2018-12-06 17:46 ` Vladimir Davydov
@ 2018-12-06 22:45 ` Vladislav Shpilevoy
2018-12-09 11:16 ` Vladimir Davydov
0 siblings, 1 reply; 4+ messages in thread
From: Vladislav Shpilevoy @ 2018-12-06 22:45 UTC (permalink / raw)
To: tarantool-patches, Vladimir Davydov
Hi! Thanks for the review.
On 06/12/2018 20:46, Vladimir Davydov wrote:
> On Wed, Dec 05, 2018 at 06:06:39PM +0300, Vladislav Shpilevoy wrote:
>> After this patch d2468dacaf it became possible to
>> wrap an existing connection into netbox API. A regular
>> netbox.connect function was refactored so as to reuse
>> connection establishment code.
>>
>> But connection should be established in a worker
>> fiber, not in a caller's one. Otherwise it is
>> impossible to do not wait for connect result.
>>
>> The patch just moves connection establishment into a
>> worker fiber, without any functional changes.
>>
>> Closes #3856
>> ---
>> https://github.com/tarantool/tarantool/tree/gerold103/gh-3856-netbox-ignores-wait-connected
>> https://github.com/tarantool/tarantool/issues/3856
>
> Please rebase on top of 2.1 - we don't do merges anymore.
Done.
>
> Should I cherry-pick it to 1.10?
Yes, the bug appears on 1.10 as well.
>
>>
>> src/box/lua/net_box.lua | 34 ++++++++++++++--------------------
>> test/box/net.box.result | 17 +++++++++++++++++
>> test/box/net.box.test.lua | 7 +++++++
>> 3 files changed, 38 insertions(+), 20 deletions(-)
>>
>> diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
>> index fd6ebf9de..d54b3e7d9 100644
>> --- a/src/box/lua/net_box.lua
>> +++ b/src/box/lua/net_box.lua
>> @@ -419,21 +419,21 @@ local function create_transport(host, port, user, password, callback,
>>
>> local function start()
>> if state ~= 'initial' then return not is_final_state[state] end
>> - if not connection and not callback('reconnect_timeout') then
>> - set_state('error', E_NO_CONNECTION)
>> - return
>> - end
>> fiber.create(function()
>> local ok, err
>> worker_fiber = fiber_self()
>> fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
>> - -- It is possible, if the first connection attempt had
>> - -- been failed, but reconnect timeout is set. In such
>> - -- a case the worker must be run, and immediately
>> - -- start reconnecting.
>> if not connection then
>> - set_state('error_reconnect', E_NO_CONNECTION, greeting)
>> - goto do_reconnect
>> + local tm = callback('fetch_connect_timeout')
>> + connection, greeting = establish_connection(host, port, tm)
>
> This code is very similar to the reconnect code below. You probably
> didn't reuse it for a reason, but still, may be we could do something
> like this?>
> diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
> index d54b3e7d..f342889a 100644
> --- a/src/box/lua/net_box.lua
> +++ b/src/box/lua/net_box.lua
> @@ -424,16 +424,7 @@ local function create_transport(host, port, user, password, callback,
> worker_fiber = fiber_self()
> fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
> if not connection then
> - local tm = callback('fetch_connect_timeout')
> - connection, greeting = establish_connection(host, port, tm)
> - if not connection then
> - if not callback('reconnect_timeout') then
> - set_state('error', E_NO_CONNECTION, greeting)
> - return
> - end
> - set_state('error_reconnect', E_NO_CONNECTION, greeting)
> - goto do_reconnect
> - end
> + goto do_connect
> end
> ::handle_connection::
> ok, err = pcall(protocol_sm)
> @@ -448,6 +439,7 @@ local function create_transport(host, port, user, password, callback,
> local timeout = callback('reconnect_timeout')
> while timeout and state == 'error_reconnect' do
> fiber.sleep(timeout)
> + ::do_connect::
> timeout = callback('reconnect_timeout')
> if not timeout or state ~= 'error_reconnect' then
> break
>
Yes, we can, but there are a few problems:
1. In Lua labels have visibility scope. So to put do_connect inside
while I need to replace while with another explicit goto.
2. In the first attempt to connect I set error as set_state('error')
instead of set_state('error_reconnect') - it is important, because
the latter logs an error.
But nonetheless I've solved these problems. This makes diff 3 lines
shorter, but I am not sure that such a small shortening is worth
making code less clear. It is up to you.
diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index cf6f7a168..20deaf2bd 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -427,16 +427,7 @@ local function create_transport(host, port, user, password, callback,
worker_fiber = fiber_self()
fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
if not connection then
- local tm = callback('fetch_connect_timeout')
- connection, greeting = establish_connection(host, port, tm)
- if not connection then
- if not callback('reconnect_timeout') then
- set_state('error', E_NO_CONNECTION, greeting)
- goto stop
- end
- set_state('error_reconnect', E_NO_CONNECTION, greeting)
- goto do_reconnect
- end
+ goto do_connect
end
::handle_connection::
ok, err = pcall(protocol_sm)
@@ -447,23 +438,29 @@ local function create_transport(host, port, user, password, callback,
connection:close()
connection = nil
end
+ timeout = callback('reconnect_timeout')
::do_reconnect::
+ if not timeout or state ~= 'error_reconnect' then
+ goto stop
+ end
+ fiber.sleep(timeout)
timeout = callback('reconnect_timeout')
- while timeout and state == 'error_reconnect' do
- fiber.sleep(timeout)
- timeout = callback('reconnect_timeout')
- if not timeout or state ~= 'error_reconnect' then
- break
- end
- connection, greeting =
- establish_connection(host, port,
- callback('fetch_connect_timeout'))
- if connection then
- goto handle_connection
- end
- set_state('error_reconnect', E_NO_CONNECTION, greeting)
- timeout = callback('reconnect_timeout')
+ if not timeout or state ~= 'error_reconnect' then
+ goto stop
+ end
+ ::do_connect::
+ connection, greeting =
+ establish_connection(host, port, callback('fetch_connect_timeout'))
+ if connection then
+ goto handle_connection
+ end
+ timeout = callback('reconnect_timeout')
+ if not timeout then
+ set_state('error', E_NO_CONNECTION, greeting)
+ goto stop
end
+ set_state('error_reconnect', E_NO_CONNECTION, greeting)
+ goto do_reconnect
::stop::
send_buf:recycle()
recv_buf:recycle()
>> + if not connection then
>> + if not callback('reconnect_timeout') then
>> + set_state('error', E_NO_CONNECTION, greeting)
>> + return
>
> Shouldn't we clear worker_fiber here?
>
> diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
> index d54b3e7d..33f26706 100644
> --- a/src/box/lua/net_box.lua
> +++ b/src/box/lua/net_box.lua
> @@ -429,7 +429,7 @@ local function create_transport(host, port, user, password, callback,
> if not connection then
> if not callback('reconnect_timeout') then
> set_state('error', E_NO_CONNECTION, greeting)
> - return
> + goto stop
> end
> set_state('error_reconnect', E_NO_CONNECTION, greeting)
> goto do_reconnect
> @@ -461,6 +461,7 @@ local function create_transport(host, port, user, password, callback,
> set_state('error_reconnect', E_NO_CONNECTION, greeting)
> timeout = callback('reconnect_timeout')
> end
> + ::stop::
> send_buf:recycle()
> recv_buf:recycle()
> worker_fiber = nil
>
>> + end
>> + set_state('error_reconnect', E_NO_CONNECTION, greeting)
>> + goto do_reconnect
>> + end
>> end
>> ::handle_connection::
>> ok, err = pcall(protocol_sm)
>> @@ -472,7 +472,9 @@ local function create_transport(host, port, user, password, callback,
>> set_state('closed', E_NO_CONNECTION, 'Connection closed')
>> end
>> if worker_fiber then
>> - worker_fiber:cancel()
>> + if worker_fiber:status() ~= 'dead' then
>> + worker_fiber:cancel()
>> + end
>
> ... then you probably wouldn't need to change this.
>
>> worker_fiber = nil
>> end
>> end
>
>
Yes, it is possible.
diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 72c0104b8..cf6f7a168 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -423,7 +423,7 @@ local function create_transport(host, port, user, password, callback,
local function start()
if state ~= 'initial' then return not is_final_state[state] end
fiber.create(function()
- local ok, err
+ local ok, err, timeout
worker_fiber = fiber_self()
fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
if not connection then
@@ -432,7 +432,7 @@ local function create_transport(host, port, user, password, callback,
if not connection then
if not callback('reconnect_timeout') then
set_state('error', E_NO_CONNECTION, greeting)
- return
+ goto stop
end
set_state('error_reconnect', E_NO_CONNECTION, greeting)
goto do_reconnect
@@ -448,7 +448,7 @@ local function create_transport(host, port, user, password, callback,
connection = nil
end
::do_reconnect::
- local timeout = callback('reconnect_timeout')
+ timeout = callback('reconnect_timeout')
while timeout and state == 'error_reconnect' do
fiber.sleep(timeout)
timeout = callback('reconnect_timeout')
@@ -464,6 +464,7 @@ local function create_transport(host, port, user, password, callback,
set_state('error_reconnect', E_NO_CONNECTION, greeting)
timeout = callback('reconnect_timeout')
end
+ ::stop::
send_buf:recycle()
recv_buf:recycle()
worker_fiber = nil
@@ -475,9 +476,7 @@ local function create_transport(host, port, user, password, callback,
set_state('closed', E_NO_CONNECTION, 'Connection closed')
end
if worker_fiber then
- if worker_fiber:status() ~= 'dead' then
- worker_fiber:cancel()
- end
+ worker_fiber:cancel()
worker_fiber = nil
end
end
New patch:
==================================================================
commit fbd63b6195358549505597dcda86d21ccfbb272a
Author: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Date: Wed Dec 5 14:59:10 2018 +0300
netbox: fix wait_connected ignorance
After this patch d2468dacaf it became possible to
wrap an existing connection into netbox API. A regular
netbox.connect function was refactored so as to reuse
connection establishment code.
But connection should be established in a worker
fiber, not in a caller's one. Otherwise it is
impossible to do not wait for connect result.
The patch just moves connection establishment into a
worker fiber, without any functional changes.
Closes #3856
diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 2040d5e41..20deaf2bd 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -422,21 +422,12 @@ local function create_transport(host, port, user, password, callback,
local function start()
if state ~= 'initial' then return not is_final_state[state] end
- if not connection and not callback('reconnect_timeout') then
- set_state('error', E_NO_CONNECTION)
- return
- end
fiber.create(function()
- local ok, err
+ local ok, err, timeout
worker_fiber = fiber_self()
fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
- -- It is possible, if the first connection attempt had
- -- been failed, but reconnect timeout is set. In such
- -- a case the worker must be run, and immediately
- -- start reconnecting.
if not connection then
- set_state('error_reconnect', E_NO_CONNECTION, greeting)
- goto do_reconnect
+ goto do_connect
end
::handle_connection::
ok, err = pcall(protocol_sm)
@@ -447,23 +438,30 @@ local function create_transport(host, port, user, password, callback,
connection:close()
connection = nil
end
+ timeout = callback('reconnect_timeout')
::do_reconnect::
- local timeout = callback('reconnect_timeout')
- while timeout and state == 'error_reconnect' do
- fiber.sleep(timeout)
- timeout = callback('reconnect_timeout')
- if not timeout or state ~= 'error_reconnect' then
- break
- end
- connection, greeting =
- establish_connection(host, port,
- callback('fetch_connect_timeout'))
- if connection then
- goto handle_connection
- end
- set_state('error_reconnect', E_NO_CONNECTION, greeting)
- timeout = callback('reconnect_timeout')
+ if not timeout or state ~= 'error_reconnect' then
+ goto stop
+ end
+ fiber.sleep(timeout)
+ timeout = callback('reconnect_timeout')
+ if not timeout or state ~= 'error_reconnect' then
+ goto stop
end
+ ::do_connect::
+ connection, greeting =
+ establish_connection(host, port, callback('fetch_connect_timeout'))
+ if connection then
+ goto handle_connection
+ end
+ timeout = callback('reconnect_timeout')
+ if not timeout then
+ set_state('error', E_NO_CONNECTION, greeting)
+ goto stop
+ end
+ set_state('error_reconnect', E_NO_CONNECTION, greeting)
+ goto do_reconnect
+ ::stop::
send_buf:recycle()
recv_buf:recycle()
worker_fiber = nil
@@ -993,15 +991,7 @@ end
-- @retval Net.box object.
--
local function connect(...)
- local host, port, opts = parse_connect_params(...)
- local connection, greeting =
- establish_connection(host, port, opts.connect_timeout)
- if not connection then
- local dummy_conn = new_sm(host, port, opts)
- dummy_conn.error = greeting
- return dummy_conn
- end
- return new_sm(host, port, opts, connection, greeting)
+ return new_sm(parse_connect_params(...))
end
local function check_remote_arg(remote, method)
diff --git a/test/box/net.box.result b/test/box/net.box.result
index faae5e9a5..b6b6d88a4 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3405,6 +3405,23 @@ ok, err
- false
- Connection closed
...
+--
+-- gh-3856: wait_connected = false is ignored.
+--
+c = net.connect('8.8.8.8:123456', {wait_connected = false})
+---
+...
+c
+---
+- opts:
+ wait_connected: false
+ host: 8.8.8.8
+ state: initial
+ port: '123456'
+...
+c:close()
+---
+...
box.schema.func.drop('do_long')
---
...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index dd6c2fd94..5152e3569 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1374,6 +1374,13 @@ ready = true
while not err do fiber.sleep(0.01) end
ok, err
+--
+-- gh-3856: wait_connected = false is ignored.
+--
+c = net.connect('8.8.8.8:123456', {wait_connected = false})
+c
+c:close()
+
box.schema.func.drop('do_long')
box.schema.user.revoke('guest', 'write', 'space', '_schema')
box.schema.user.revoke('guest', 'read,write', 'space', '_space')
^ permalink raw reply [flat|nested] 4+ messages in thread