From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp55.i.mail.ru (smtp55.i.mail.ru [217.69.128.35]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id D6E8C445320 for ; Fri, 7 Aug 2020 01:02:30 +0300 (MSK) Received: by smtp55.i.mail.ru with esmtpa (envelope-from ) id 1k3ny2-0000bs-2M for tarantool-patches@dev.tarantool.org; Fri, 07 Aug 2020 01:02:30 +0300 Received: by mail-lj1-f172.google.com with SMTP id z14so38939ljm.1 for ; Thu, 06 Aug 2020 15:02:30 -0700 (PDT) MIME-Version: 1.0 References: <7f34d056c1f2b0f461740a4b04221881074106cd.1596750426.git.v.shpilevoy@tarantool.org> In-Reply-To: <7f34d056c1f2b0f461740a4b04221881074106cd.1596750426.git.v.shpilevoy@tarantool.org> From: Yaroslav Dynnikov Date: Fri, 7 Aug 2020 01:02:18 +0300 Message-ID: Content-Type: multipart/alternative; boundary="000000000000fde36705ac3ca4ba" Subject: Re: [Tarantool-patches] [PATCH vshard 1/1] replicaset: check URI match when rebind connection List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy Cc: Yaroslav Dynnikov , tml --000000000000fde36705ac3ca4ba Content-Type: text/plain; charset="UTF-8" Hi! Thanks for the fix. LGTM. Best regards Yaroslav Dynnikov On Fri, 7 Aug 2020 at 00:48, Vladislav Shpilevoy wrote: > Reconfiguration in router and storage always creates new > replica objects, and outdates old objects. Since most of the > time reconfiguration does not change cluster topology and URIs, > old connections are relocated to the new replicaset objects so as > not to reconnect on each router.cfg() and storage.cfg(). > > Connections were moved from old objects to new ones by UUID match > not taking into account possible URI change. > > If URI was changed, the new replica object took the old connection > with old URI, and kept trying to connect to the old address. > > The patch makes connection relocation work only if old and new URI > match. > > Closes #245 > --- > Branch: > http://github.com/tarantool/vshard/tree/gerold103/gh-245-router-reconnect > Issue: https://github.com/tarantool/vshard/issues/245 > > test/router/reconnect_to_master.result | 55 ++++++++++++++++++++++++ > test/router/reconnect_to_master.test.lua | 26 +++++++++++ > vshard/replicaset.lua | 2 +- > 3 files changed, 82 insertions(+), 1 deletion(-) > > diff --git a/test/router/reconnect_to_master.result > b/test/router/reconnect_to_master.result > index d640f0b..aa33f9c 100644 > --- a/test/router/reconnect_to_master.result > +++ b/test/router/reconnect_to_master.result > @@ -169,6 +169,61 @@ is_disconnected() > --- > - false > ... > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was > changed in > +-- the config for any instance, it used old connection, because > reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +--- > +... > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +--- > +... > +rs1_cfg = cfg.sharding[rs1_uuid] > +--- > +... > +cfg.sharding[rs1_uuid] = nil > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +--- > +... > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = ' > https://bad_uri.com:123' > +--- > +... > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +--- > +... > +res == nil and err ~= nil > +--- > +- true > +... > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +--- > +... > +vshard.router.cfg(cfg) > +--- > +... > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > +--- > +- 1 > +... > _ = test_run:switch("default") > --- > ... > diff --git a/test/router/reconnect_to_master.test.lua > b/test/router/reconnect_to_master.test.lua > index c315c9f..87270af 100644 > --- a/test/router/reconnect_to_master.test.lua > +++ b/test/router/reconnect_to_master.test.lua > @@ -70,6 +70,32 @@ while is_disconnected() and i < max_iters do i = i + 1 > fiber.sleep(0.1) end > -- Master connection is active again. > is_disconnected() > > +-- > +-- gh-245: dynamic uri reconfiguration didn't work - even if URI was > changed in > +-- the config for any instance, it used old connection, because > reconfiguration > +-- compared connections by UUID instead of URI. > +-- > +util = require('util') > +-- Firstly, clean router from storage_1_a connection. > +rs1_uuid = util.replicasets[1] > +rs1_cfg = cfg.sharding[rs1_uuid] > +cfg.sharding[rs1_uuid] = nil > +vshard.router.cfg(cfg) > +-- Now break the URI in the config. > +old_uri = rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = ' > https://bad_uri.com:123' > +-- Apply the bad config. > +cfg.sharding[rs1_uuid] = rs1_cfg > +vshard.router.cfg(cfg) > +-- Should fail - master is not available because of the bad URI. > +res, err = vshard.router.callrw(1, 'echo', {1}) > +res == nil and err ~= nil > +-- Repair the config. > +rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri = old_uri > +vshard.router.cfg(cfg) > +-- Should drop the old connection object and connect fine. > +vshard.router.callrw(1, 'echo', {1}) > + > _ = test_run:switch("default") > _ = test_run:cmd('stop server router_1') > _ = test_run:cmd('cleanup server router_1') > diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua > index 0191936..b13d05e 100644 > --- a/vshard/replicaset.lua > +++ b/vshard/replicaset.lua > @@ -456,7 +456,7 @@ local function rebind_replicasets(replicasets, > old_replicasets) > for replica_uuid, replica in pairs(replicaset.replicas) do > local old_replica = old_replicaset and > old_replicaset.replicas[replica_uuid] > - if old_replica then > + if old_replica and old_replica.uri == replica.uri then > local conn = old_replica.conn > replica.conn = conn > replica.down_ts = old_replica.down_ts > -- > 2.21.1 (Apple Git-122.3) > > --000000000000fde36705ac3ca4ba Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable
Hi!

Thanks for the fix. LGTM= .

Best regards
Yaroslav Dynnikov


On Fri, 7 Aug 2020 at 00:48, Vladislav Shpilevoy &= lt;v.shpilevoy@tarantool.org> wrote:
Re= configuration in router and storage always creates new
replica objects, and outdates old objects. Since most of the
time reconfiguration does not change cluster topology and URIs,
old connections are relocated to the new replicaset objects so as
not to reconnect on each router.cfg() and storage.cfg().

Connections were moved from old objects to new ones by UUID match
not taking into account possible URI change.

If URI was changed, the new replica object took the old connection
with old URI, and kept trying to connect to the old address.

The patch makes connection relocation work only if old and new URI
match.

Closes #245
---
Branch:
http://github.com/t= arantool/vshard/tree/gerold103/gh-245-router-reconnect
Issue: https://github.com/tarantool/vshard/issues/245<= /a>

=C2=A0test/router/reconnect_to_master.result=C2=A0 =C2=A0| 55 +++++++++++++= +++++++++++
=C2=A0test/router/reconnect_to_master.test.lua | 26 +++++++++++
=C2=A0vshard/replicaset.lua=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 |=C2=A0 2 +-
=C2=A03 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/test/router/reconnect_to_master.result b/test/router/reconnect= _to_master.result
index d640f0b..aa33f9c 100644
--- a/test/router/reconnect_to_master.result
+++ b/test/router/reconnect_to_master.result
@@ -169,6 +169,61 @@ is_disconnected()
=C2=A0---
=C2=A0- false
=C2=A0...
+--
+-- gh-245: dynamic uri reconfiguration didn't work - even if URI was c= hanged in
+-- the config for any instance, it used old connection, because reconfigur= ation
+-- compared connections by UUID instead of URI.
+--
+util =3D require('util')
+---
+...
+-- Firstly, clean router from storage_1_a connection.
+rs1_uuid =3D util.replicasets[1]
+---
+...
+rs1_cfg =3D cfg.sharding[rs1_uuid]
+---
+...
+cfg.sharding[rs1_uuid] =3D nil
+---
+...
+vshard.router.cfg(cfg)
+---
+...
+-- Now break the URI in the config.
+old_uri =3D rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri
+---
+...
+rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri =3D '
https://bad_uri= .com:123'
+---
+...
+-- Apply the bad config.
+cfg.sharding[rs1_uuid] =3D rs1_cfg
+---
+...
+vshard.router.cfg(cfg)
+---
+...
+-- Should fail - master is not available because of the bad URI.
+res, err =3D vshard.router.callrw(1, 'echo', {1})
+---
+...
+res =3D=3D nil and err ~=3D nil
+---
+- true
+...
+-- Repair the config.
+rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri =3D old_uri
+---
+...
+vshard.router.cfg(cfg)
+---
+...
+-- Should drop the old connection object and connect fine.
+vshard.router.callrw(1, 'echo', {1})
+---
+- 1
+...
=C2=A0_ =3D test_run:switch("default")
=C2=A0---
=C2=A0...
diff --git a/test/router/reconnect_to_master.test.lua b/test/router/reconne= ct_to_master.test.lua
index c315c9f..87270af 100644
--- a/test/router/reconnect_to_master.test.lua
+++ b/test/router/reconnect_to_master.test.lua
@@ -70,6 +70,32 @@ while is_disconnected() and i < max_iters do i =3D i = + 1 fiber.sleep(0.1) end
=C2=A0-- Master connection is active again.
=C2=A0is_disconnected()

+--
+-- gh-245: dynamic uri reconfiguration didn't work - even if URI was c= hanged in
+-- the config for any instance, it used old connection, because reconfigur= ation
+-- compared connections by UUID instead of URI.
+--
+util =3D require('util')
+-- Firstly, clean router from storage_1_a connection.
+rs1_uuid =3D util.replicasets[1]
+rs1_cfg =3D cfg.sharding[rs1_uuid]
+cfg.sharding[rs1_uuid] =3D nil
+vshard.router.cfg(cfg)
+-- Now break the URI in the config.
+old_uri =3D rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri
+rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri =3D 'https://bad_uri= .com:123'
+-- Apply the bad config.
+cfg.sharding[rs1_uuid] =3D rs1_cfg
+vshard.router.cfg(cfg)
+-- Should fail - master is not available because of the bad URI.
+res, err =3D vshard.router.callrw(1, 'echo', {1})
+res =3D=3D nil and err ~=3D nil
+-- Repair the config.
+rs1_cfg.replicas[util.name_to_uuid.storage_1_a].uri =3D old_uri
+vshard.router.cfg(cfg)
+-- Should drop the old connection object and connect fine.
+vshard.router.callrw(1, 'echo', {1})
+
=C2=A0_ =3D test_run:switch("default")
=C2=A0_ =3D test_run:cmd('stop server router_1')
=C2=A0_ =3D test_run:cmd('cleanup server router_1')
diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
index 0191936..b13d05e 100644
--- a/vshard/replicaset.lua
+++ b/vshard/replicaset.lua
@@ -456,7 +456,7 @@ local function rebind_replicasets(replicasets, old_repl= icasets)
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0for replica_uuid, replica in pairs(replic= aset.replicas) do
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0local old_replica =3D old_r= eplicaset and
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0old_replicaset.replicas[replic= a_uuid]
-=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if old_replica then
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if old_replica and old_replica.u= ri =3D=3D replica.uri then
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0local conn = =3D old_replica.conn
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0replica.conn = =3D conn
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0replica.down_= ts =3D old_replica.down_ts
--
2.21.1 (Apple Git-122.3)

--000000000000fde36705ac3ca4ba--