Hi! Thanks for the fix. LGTM. Best regards Yaroslav Dynnikov On Fri, 7 Aug 2020 at 00:48, Vladislav Shpilevoy wrote: > Reconfiguration in router and storage always creates new > replica objects, and outdates old objects. Since most of the > time reconfiguration does not change cluster topology and URIs, > old connections are relocated to the new replicaset objects so as > not to reconnect on each router.cfg() and storage.cfg(). > > Connections were moved from old objects to new ones by UUID match > not taking into account possible URI change. > > If URI was changed, the new replica object took the old connection > with old URI, and kept trying to connect to the old address. > > The patch makes connection relocation work only if old and new URI > match. > > Closes #245 > --- > Branch: > http://github.com/tarantool/vshard/tree/gerold103/gh-245-router-reconnect > Issue: https://github.com/tarantool/vshard/issues/245 > > test/router/reconnect_to_master.result | 55 ++++++++++++++++++++++++ > test/router/reconnect_to_master.test.lua | 26 +++++++++++ > vshard/replicaset.lua | 2 +- > 3 files changed, 82 insertions(+), 1 deletion(-) > > diff --git a/test/router/reconnect_to_master.result > b/test/router/reconnect_to_master.result > index d640f0b..aa33f9c 100644 > --- a/test/router/reconnect_to_master.result > +++ b/test/router/reconnect_to_master.result > @@ -169,6 +169,61 @@ is_disconnected() > --- > - false > ... > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was > changed in > +-- the config for any instance, it used old connection, because > reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +--- > +... > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +--- > +... > +rs1_cfg = cfg.sharding[rs1_uuid] > +--- > +... > +cfg.sharding[rs1_uuid] = nil > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +--- > +... > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = ' > https://bad_uri.com:123' > +--- > +... > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +--- > +... > +res == nil and err ~= nil > +--- > +- true > +... > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > +--- > +- 1 > +... > _ = test_run:switch("default") > --- > ... > diff --git a/test/router/reconnect_to_master.test.lua > b/test/router/reconnect_to_master.test.lua > index c315c9f..87270af 100644 > --- a/test/router/reconnect_to_master.test.lua > +++ b/test/router/reconnect_to_master.test.lua > @@ -70,6 +70,32 @@ while is_disconnected() and i < max_iters do i = i + 1 > fiber.sleep(0.1) end > -- Master connection is active again. > is_disconnected() > > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was > changed in > +-- the config for any instance, it used old connection, because > reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +rs1_cfg = cfg.sharding[rs1_uuid] > +cfg.sharding[rs1_uuid] = nil > +vshard.router.cfg(cfg) > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = ' > https://bad_uri.com:123' > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +vshard.router.cfg(cfg) > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +res == nil and err ~= nil > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +vshard.router.cfg(cfg) > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > + > _ = test_run:switch("default") > _ = test_run:cmd('stop server router_1') > _ = test_run:cmd('cleanup server router_1') > diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua > index 0191936..b13d05e 100644 > --- a/vshard/replicaset.lua > +++ b/vshard/replicaset.lua > @@ -456,7 +456,7 @@ local function rebind_replicasets(replicasets, > old_replicasets) > for replica_uuid, replica in pairs(replicaset.replicas) do > local old_replica = old_replicaset and > old_replicaset.replicas[replica_uuid] > - if old_replica then > + if old_replica and old_replica.uri == replica.uri then > local conn = old_replica.conn > replica.conn = conn > replica.down_ts = old_replica.down_ts > -- > 2.21.1 (Apple Git-122.3) > >