[PATCH 1/1] netbox: fix wait_connected ignorance

Vladimir Davydov vdavydov.dev at gmail.com
Thu Dec 6 20:46:19 MSK 2018


On Wed, Dec 05, 2018 at 06:06:39PM +0300, Vladislav Shpilevoy wrote:
> After this patch d2468dacaf it became possible to
> wrap an existing connection into netbox API. A regular
> netbox.connect function was refactored so as to reuse
> connection establishment code.
> 
> But connection should be established in a worker
> fiber, not in a caller's one. Otherwise it is
> impossible to do not wait for connect result.
> 
> The patch just moves connection establishment into a
> worker fiber, without any functional changes.
> 
> Closes #3856
> ---
> https://github.com/tarantool/tarantool/tree/gerold103/gh-3856-netbox-ignores-wait-connected
> https://github.com/tarantool/tarantool/issues/3856

Please rebase on top of 2.1 - we don't do merges anymore.

Should I cherry-pick it to 1.10?

> 
>  src/box/lua/net_box.lua   | 34 ++++++++++++++--------------------
>  test/box/net.box.result   | 17 +++++++++++++++++
>  test/box/net.box.test.lua |  7 +++++++
>  3 files changed, 38 insertions(+), 20 deletions(-)
> 
> diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
> index fd6ebf9de..d54b3e7d9 100644
> --- a/src/box/lua/net_box.lua
> +++ b/src/box/lua/net_box.lua
> @@ -419,21 +419,21 @@ local function create_transport(host, port, user, password, callback,
>  
>      local function start()
>          if state ~= 'initial' then return not is_final_state[state] end
> -        if not connection and not callback('reconnect_timeout') then
> -            set_state('error', E_NO_CONNECTION)
> -            return
> -        end
>          fiber.create(function()
>              local ok, err
>              worker_fiber = fiber_self()
>              fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
> -            -- It is possible, if the first connection attempt had
> -            -- been failed, but reconnect timeout is set. In such
> -            -- a case the worker must be run, and immediately
> -            -- start reconnecting.
>              if not connection then
> -                set_state('error_reconnect', E_NO_CONNECTION, greeting)
> -                goto do_reconnect
> +                local tm = callback('fetch_connect_timeout')
> +                connection, greeting = establish_connection(host, port, tm)

This code is very similar to the reconnect code below. You probably
didn't reuse it for a reason, but still, may be we could do something
like this?

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index d54b3e7d..f342889a 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -424,16 +424,7 @@ local function create_transport(host, port, user, password, callback,
             worker_fiber = fiber_self()
             fiber.name(string.format('%s:%s (net.box)', host, port), {truncate=true})
             if not connection then
-                local tm = callback('fetch_connect_timeout')
-                connection, greeting = establish_connection(host, port, tm)
-                if not connection then
-                    if not callback('reconnect_timeout') then
-                        set_state('error', E_NO_CONNECTION, greeting)
-                        return
-                    end
-                    set_state('error_reconnect', E_NO_CONNECTION, greeting)
-                    goto do_reconnect
-                end
+                goto do_connect
             end
     ::handle_connection::
             ok, err = pcall(protocol_sm)
@@ -448,6 +439,7 @@ local function create_transport(host, port, user, password, callback,
             local timeout = callback('reconnect_timeout')
             while timeout and state == 'error_reconnect' do
                 fiber.sleep(timeout)
+    ::do_connect::
                 timeout = callback('reconnect_timeout')
                 if not timeout or state ~= 'error_reconnect' then
                     break

> +                if not connection then
> +                    if not callback('reconnect_timeout') then
> +                        set_state('error', E_NO_CONNECTION, greeting)
> +                        return

Shouldn't we clear worker_fiber here?

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index d54b3e7d..33f26706 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -429,7 +429,7 @@ local function create_transport(host, port, user, password, callback,
                 if not connection then
                     if not callback('reconnect_timeout') then
                         set_state('error', E_NO_CONNECTION, greeting)
-                        return
+                        goto stop
                     end
                     set_state('error_reconnect', E_NO_CONNECTION, greeting)
                     goto do_reconnect
@@ -461,6 +461,7 @@ local function create_transport(host, port, user, password, callback,
                 set_state('error_reconnect', E_NO_CONNECTION, greeting)
                 timeout = callback('reconnect_timeout')
             end
+    ::stop::
             send_buf:recycle()
             recv_buf:recycle()
             worker_fiber = nil

> +                    end
> +                    set_state('error_reconnect', E_NO_CONNECTION, greeting)
> +                    goto do_reconnect
> +                end
>              end
>      ::handle_connection::
>              ok, err = pcall(protocol_sm)
> @@ -472,7 +472,9 @@ local function create_transport(host, port, user, password, callback,
>              set_state('closed', E_NO_CONNECTION, 'Connection closed')
>          end
>          if worker_fiber then
> -            worker_fiber:cancel()
> +            if worker_fiber:status() ~= 'dead' then
> +                worker_fiber:cancel()
> +            end

... then you probably wouldn't need to change this.

>              worker_fiber = nil
>          end
>      end
 



More information about the Tarantool-patches mailing list