From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id D2A94445320 for ; Fri, 7 Aug 2020 00:48:33 +0300 (MSK) From: Vladislav Shpilevoy Date: Thu, 6 Aug 2020 23:48:31 +0200 Message-Id: <7f34d056c1f2b0f461740a4b04221881074106cd.1596750426.git.v.shpilevoy@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH vshard 1/1] replicaset: check URI match when rebind connection List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, yaroslav.dynnikov@tarantool.org, olegrok@tarantool.org Reconfiguration in router and storage always creates new replica objects, and outdates old objects. Since most of the time reconfiguration does not change cluster topology and URIs, old connections are relocated to the new replicaset objects so as not to reconnect on each router.cfg() and storage.cfg(). Connections were moved from old objects to new ones by UUID match not taking into account possible URI change. If URI was changed, the new replica object took the old connection with old URI, and kept trying to connect to the old address. The patch makes connection relocation work only if old and new URI match. Closes #245 --- Branch: http://github.com/tarantool/vshard/tree/gerold103/gh-245-router-reconnect Issue: https://github.com/tarantool/vshard/issues/245 test/router/reconnect_to_master.result | 55 ++++++++++++++++++++++++ test/router/reconnect_to_master.test.lua | 26 +++++++++++ vshard/replicaset.lua | 2 +- 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/test/router/reconnect_to_master.result b/test/router/reconnect_to_master.result index d640f0b..aa33f9c 100644 --- a/test/router/reconnect_to_master.result +++ b/test/router/reconnect_to_master.result @@ -169,6 +169,61 @@ is_disconnected() --- - false ... +-- +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was changed in +-- the config for any instance, it used old connection, because reconfiguration +-- compared connections by UUID instead of URI. +-- +util = require('util') +--- +... +-- Firstly, clean router from storage_1_a connection. +rs1_uuid = util.replicasets[1] +--- +... +rs1_cfg = cfg.sharding[rs1_uuid] +--- +... +cfg.sharding[rs1_uuid] = nil +--- +... +vshard.router.cfg(cfg) +--- +... +-- Now break the URI in the config. +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri +--- +... +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = 'https://bad_uri.com:123' +--- +... +-- Apply the bad config. +cfg.sharding[rs1_uuid] = rs1_cfg +--- +... +vshard.router.cfg(cfg) +--- +... +-- Should fail - master is not available because of the bad URI. +res, err = vshard.router.callrw(1, 'echo', {1}) +--- +... +res == nil and err ~= nil +--- +- true +... +-- Repair the config. +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri +--- +... +vshard.router.cfg(cfg) +--- +... +-- Should drop the old connection object and connect fine. +vshard.router.callrw(1, 'echo', {1}) +--- +- 1 +... _ = test_run:switch("default") --- ... diff --git a/test/router/reconnect_to_master.test.lua b/test/router/reconnect_to_master.test.lua index c315c9f..87270af 100644 --- a/test/router/reconnect_to_master.test.lua +++ b/test/router/reconnect_to_master.test.lua @@ -70,6 +70,32 @@ while is_disconnected() and i < max_iters do i = i + 1 fiber.sleep(0.1) end -- Master connection is active again. is_disconnected() +-- +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was changed in +-- the config for any instance, it used old connection, because reconfiguration +-- compared connections by UUID instead of URI. +-- +util = require('util') +-- Firstly, clean router from storage_1_a connection. +rs1_uuid = util.replicasets[1] +rs1_cfg = cfg.sharding[rs1_uuid] +cfg.sharding[rs1_uuid] = nil +vshard.router.cfg(cfg) +-- Now break the URI in the config. +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = 'https://bad_uri.com:123' +-- Apply the bad config. +cfg.sharding[rs1_uuid] = rs1_cfg +vshard.router.cfg(cfg) +-- Should fail - master is not available because of the bad URI. +res, err = vshard.router.callrw(1, 'echo', {1}) +res == nil and err ~= nil +-- Repair the config. +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri +vshard.router.cfg(cfg) +-- Should drop the old connection object and connect fine. +vshard.router.callrw(1, 'echo', {1}) + _ = test_run:switch("default") _ = test_run:cmd('stop server router_1') _ = test_run:cmd('cleanup server router_1') diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua index 0191936..b13d05e 100644 --- a/vshard/replicaset.lua +++ b/vshard/replicaset.lua @@ -456,7 +456,7 @@ local function rebind_replicasets(replicasets, old_replicasets) for replica_uuid, replica in pairs(replicaset.replicas) do local old_replica = old_replicaset and old_replicaset.replicas[replica_uuid] - if old_replica then + if old_replica and old_replica.uri == replica.uri then local conn = old_replica.conn replica.conn = conn replica.down_ts = old_replica.down_ts -- 2.21.1 (Apple Git-122.3)