From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp58.i.mail.ru (smtp58.i.mail.ru [217.69.128.38]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 0D6FF445321 for ; Fri, 7 Aug 2020 11:07:24 +0300 (MSK) From: Oleg Babin References: <7f34d056c1f2b0f461740a4b04221881074106cd.1596750426.git.v.shpilevoy@tarantool.org> Message-ID: <9b0b6fad-f296-aec9-881a-1efe13e89f72@tarantool.org> Date: Fri, 7 Aug 2020 11:07:22 +0300 MIME-Version: 1.0 In-Reply-To: <7f34d056c1f2b0f461740a4b04221881074106cd.1596750426.git.v.shpilevoy@tarantool.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-GB Subject: Re: [Tarantool-patches] [PATCH vshard 1/1] replicaset: check URI match when rebind connection List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy , tarantool-patches@dev.tarantool.org, yaroslav.dynnikov@tarantool.org Hi! Thanks for your patch. LGTM. On 07/08/2020 00:48, Vladislav Shpilevoy wrote: > Reconfiguration in router and storage always creates new > replica objects, and outdates old objects. Since most of the > time reconfiguration does not change cluster topology and URIs, > old connections are relocated to the new replicaset objects so as > not to reconnect on each router.cfg() and storage.cfg(). > > Connections were moved from old objects to new ones by UUID match > not taking into account possible URI change. > > If URI was changed, the new replica object took the old connection > with old URI, and kept trying to connect to the old address. > > The patch makes connection relocation work only if old and new URI > match. > > Closes #245 > --- > Branch:http://github.com/tarantool/vshard/tree/gerold103/gh-245-router-reconnect > Issue:https://github.com/tarantool/vshard/issues/245 > > test/router/reconnect_to_master.result | 55 ++++++++++++++++++++++++ > test/router/reconnect_to_master.test.lua | 26 +++++++++++ > vshard/replicaset.lua | 2 +- > 3 files changed, 82 insertions(+), 1 deletion(-) > > diff --git a/test/router/reconnect_to_master.result b/test/router/reconnect_to_master.result > index d640f0b..aa33f9c 100644 > --- a/test/router/reconnect_to_master.result > +++ b/test/router/reconnect_to_master.result > @@ -169,6 +169,61 @@ is_disconnected() > --- > - false > ... > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was changed in > +-- the config for any instance, it used old connection, because reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +--- > +... > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +--- > +... > +rs1_cfg = cfg.sharding[rs1_uuid] > +--- > +... > +cfg.sharding[rs1_uuid] = nil > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +--- > +... > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = 'https://bad_uri.com:123' > +--- > +... > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +--- > +... > +res == nil and err ~= nil > +--- > +- true > +... > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > +--- > +- 1 > +... > _ = test_run:switch("default") > --- > ... > diff --git a/test/router/reconnect_to_master.test.lua b/test/router/reconnect_to_master.test.lua > index c315c9f..87270af 100644 > --- a/test/router/reconnect_to_master.test.lua > +++ b/test/router/reconnect_to_master.test.lua > @@ -70,6 +70,32 @@ while is_disconnected() and i < max_iters do i = i + 1 fiber.sleep(0.1) end > -- Master connection is active again. > is_disconnected() > > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was changed in > +-- the config for any instance, it used old connection, because reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +rs1_cfg = cfg.sharding[rs1_uuid] > +cfg.sharding[rs1_uuid] = nil > +vshard.router.cfg(cfg) > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = 'https://bad_uri.com:123' > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +vshard.router.cfg(cfg) > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +res == nil and err ~= nil > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +vshard.router.cfg(cfg) > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > + > _ = test_run:switch("default") > _ = test_run:cmd('stop server router_1') > _ = test_run:cmd('cleanup server router_1') > diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua > index 0191936..b13d05e 100644 > --- a/vshard/replicaset.lua > +++ b/vshard/replicaset.lua > @@ -456,7 +456,7 @@ local function rebind_replicasets(replicasets, old_replicasets) > for replica_uuid, replica in pairs(replicaset.replicas) do > local old_replica = old_replicaset and > old_replicaset.replicas[replica_uuid] > - if old_replica then > + if old_replica and old_replica.uri == replica.uri then > local conn = old_replica.conn > replica.conn = conn > replica.down_ts = old_replica.down_ts