From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 649C471218; Wed, 27 Oct 2021 11:10:36 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 649C471218 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1635322236; bh=3/nFDKzwiqSPUYc3gCFj1VgaZW5MLipT26TfasYrhfc=; h=Date:To:References:In-Reply-To:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=QwCtdz1cZPeETxSTD6uxn6+i/oa5yT6ZW/R9/BBydB6IN9HW3ty82LZgbdx4KlgL/ fYgXHfx0KqIQvJowBau1C0fG8qNzuN6vMvZEyAahG0vQ5fn5DzXJADeLyFQem5mVqo l5zTVakuclLM8XDweIo1BsFcEOU5k2jWN/C0nZBo= Received: from smtp35.i.mail.ru (smtp35.i.mail.ru [94.100.177.95]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 5E04871218 for ; Wed, 27 Oct 2021 11:10:33 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 5E04871218 Received: by smtp35.i.mail.ru with esmtpa (envelope-from ) id 1mfe11-00058C-KZ; Wed, 27 Oct 2021 11:10:32 +0300 Message-ID: <7b3ddb15-6149-a1db-1882-5714b7388b15@tarantool.org> Date: Wed, 27 Oct 2021 11:10:31 +0300 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:91.0) Gecko/20100101 Thunderbird/91.2.1 Content-Language: en-GB To: Vitaliia Ioffe References: <46ad5eaec4e0f29b3770c2bc53f1f21a45bc0c22.1635318768.git.sergepetrenko@tarantool.org> <1635321176.960768974@f166.i.mail.ru> In-Reply-To: <1635321176.960768974@f166.i.mail.ru> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-4EC0790: 10 X-7564579A: 646B95376F6C166E X-77F55803: 4F1203BC0FB41BD9D1D35DBD2D15487E99C830D5B4EA346169F334C4E4A2904B182A05F538085040D021FE72AB4E7F1063CCFA7E793109751BA59A2F273AA429E44D1C13EF41B13C X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE743AE26858062A689EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F79006375045C080FAAE96148638F802B75D45FF36EB9D2243A4F8B5A6FCA7DBDB1FC311F39EFFDF887939037866D6147AF826D8DB9AB4C87288B9ECD2167C6DFCC52D12117882F4460429724CE54428C33FAD305F5C1EE8F4F765FCAA867293B0326636D2E47CDBA5A96583BD4B6F7A4D31EC0BC014FD901B82EE079FA2833FD35BB23D27C277FBC8AE2E8BEC1C9C6CFAD2A0F5A471835C12D1D977C4224003CC836476EB9C4185024447017B076A6E789B0E975F5C1EE8F4F765FCED058CF5F1569FC73AA81AA40904B5D9CF19DD082D7633A078D18283394535A93AA81AA40904B5D98AA50765F79006376123584C0BA1C1EFD81D268191BDAD3D698AB9A7B718F8C4D1B931868CE1C5781A620F70A64A45A98AA50765F79006372E808ACE2090B5E1725E5C173C3A84C3C5EA940A35A165FF2DBA43225CD8A89F616AD31D0D18CD5C5E1C53F199C2BB95B5C8C57E37DE458BEDA766A37F9254B7 X-B7AD71C0: AC4F5C86D027EB782CDD5689AFBDA7A213B5FB47DCBC3458834459D11680B50541910CD1C429DD5545CD0ED175473416 X-C1DE0DAB: 0D63561A33F958A59A9687E0CA6CB77CEC435330E7A69EB41F25DDAFB7A95607D59269BC5F550898D99A6476B3ADF6B47008B74DF8BB9EF7333BD3B22AA88B938A852937E12ACA752DA3D96DA0CEF5C48E8E86DC7131B365E7726E8460B7C23C X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D344F2126FF337FA651C47E9F3517790740FF9B02A981FA624EF6DD34DB9ACC92255669268088D0D3411D7E09C32AA3244C9B9121D04B27051317451323EFFA7E2E63871F383B54D9B3FACE5A9C96DEB163 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2biojOHwMx23X6B0YJdtlT3oKxQ== X-Mailru-Sender: 11C2EC085EDE56FA38FD4C59F7EFE4072526D0CB9650BEBDE7F1837CAE03EDD8F96948CC93F46FD26BB2E709EA627F343C7DDD459B58856F0E45BC603594F5A135B915D4279FF0579437F6177E88F7363CDA0F3B3F5B9367 X-Mras: Ok Subject: Re: [Tarantool-patches] [PATCH v3 2/2] replication: fix replica disconnect upon reconfiguration X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Serge Petrenko via Tarantool-patches Reply-To: Serge Petrenko Cc: tarantool-patches@dev.tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" 27.10.2021 10:52, Vitaliia Ioffe пишет: > Why did not you use > > asserts:assert_server_follow_upstream(server, id) > > from asserts.lua here? I forgot about its existence, thanks! Force-pushed the updated version, here's the diff: diff --git a/test/replication-luatest/gh_4669_applier_reconnect_test.lua b/test/replication-luatest/gh_4669_applier_reconnect_test.lua index 9c6d26036..2dad87dd0 100644 --- a/test/replication-luatest/gh_4669_applier_reconnect_test.lua +++ b/test/replication-luatest/gh_4669_applier_reconnect_test.lua @@ -1,14 +1,10 @@  local t = require('luatest')  local cluster = require('test.luatest_helpers.cluster') +local asserts = require('test.luatest_helpers.asserts')  local helpers = require('test.luatest_helpers')  local g = t.group('gh-4669-applier-reconnect') -local function check_follow_master(server) -    return t.assert_equals( -        server:eval('return box.info.replication[1].upstream.status'), 'follow') -end -  g.before_each(function()      g.cluster = cluster:new({})      g.master = g.cluster:build_server({alias = 'master'}) @@ -22,7 +18,7 @@ g.before_each(function()      g.cluster:add_server(g.replica)      g.cluster:add_server(g.replica2)      g.cluster:start() -    check_follow_master(g.replica) +    asserts:assert_server_follow_upstream(g.replica, 1)  end)  g.after_each(function() @@ -48,6 +44,6 @@ g.test_applier_connection_on_reconfig = function(g)              }          }      ]]) -    check_follow_master(g.replica) +    asserts:assert_server_follow_upstream(g.replica, 1)      t.assert_equals(g.master:grep_log("exiting the relay loop"), nil)  end > +local function check_follow_master(server) > + return t.assert_equals( > + server:eval('return box.info.replication[1].upstream.status'), 'follow') > +end > Also, do we want to check this patch only for memtx? I asked because > you haven’t provided any engine for the test. Engine only matters when you create spaces in the testcase. I have none here so there's nothing to test actually. Also, even when a replication test creates some spaces we usually (but not always) don't care which engine it uses. Replication subsystem is generally agnostic of the storage engine used, because replication works via WAL, and it's the same for both memtx and vinyl. > -- > Vitaliia Ioffe > > Среда, 27 октября 2021, 10:20 +03:00 от Serge Petrenko > : > Replication reconfiguration used to work as follows: upon receiving a > new config disconnect from all the existing masters and try to connect > to all the masters from the new config. > > This lead to instances doing extra work when old config and new config > had the same nodes in them: instead of doing nothing, tarantool > reinitialized the connection. > > There was another problem: when an existing connection is broken, > master > takes some time to notice that. So, when replica resets the > connection, > it may try to reconnect faster than the master is able to notice its > absence. In this case replica wouldn't be able to reconnect due to > `duplicate connection with the same replica UUID`. So replication > would > stop for a replication_timeout, which may be quite large (seconds or > tens of seconds). > > Let's prevent tarantool from reconnecting to the same instance, if > there > already is a working connection. > > Closes #4669 > --- >  .../gh-4669-applier-reconfig-reconnect.md | 5 ++ >  src/box/box.cc | 31 ++++++++--- >  src/box/replication.cc | 41 +++++++++----- >  src/box/replication.h | 4 +- >  .../gh_4669_applier_reconnect_test.lua | 53 +++++++++++++++++++ >  5 files changed, 113 insertions(+), 21 deletions(-) >  create mode 100644 > changelogs/unreleased/gh-4669-applier-reconfig-reconnect.md >  create mode 100644 > test/replication-luatest/gh_4669_applier_reconnect_test.lua > > diff --git > a/changelogs/unreleased/gh-4669-applier-reconfig-reconnect.md > b/changelogs/unreleased/gh-4669-applier-reconfig-reconnect.md > new file mode 100644 > index 000000000..2a776872b > --- /dev/null > +++ b/changelogs/unreleased/gh-4669-applier-reconfig-reconnect.md > @@ -0,0 +1,5 @@ > +## bugfix/replication > + > +* Fixed replica reconnecting to a living master on any > `box.cfg{replication=...}` > + change. Such reconnects could lead to replica failing to restore > connection > + for `replication_timeout` seconds (gh-4669). > diff --git a/src/box/box.cc b/src/box/box.cc > index b1154affa..c2e4a55a7 100644 > --- a/src/box/box.cc > +++ b/src/box/box.cc > @@ -1261,7 +1261,7 @@ cfg_get_replication(int *p_count) >   * don't start appliers. >   */ >  static void > -box_sync_replication(bool connect_quorum) > +box_sync_replication(bool do_quorum, bool do_reuse) >  { >   int count = 0; >   struct applier **appliers = cfg_get_replication(&count); > @@ -1272,12 +1272,27 @@ box_sync_replication(bool connect_quorum) >   for (int i = 0; i < count; i++) >   applier_delete(appliers[i]); /* doesn't affect diag */ >   }); > - > - replicaset_connect(appliers, count, connect_quorum); > + replicaset_connect(appliers, count, do_quorum, do_reuse); > >   guard.is_active = false; >  } > > +static inline void > +box_restart_replication(void) > +{ > + const bool do_quorum = true; > + const bool do_reuse = false; > + box_sync_replication(do_quorum, do_reuse); > +} > + > +static inline void > +box_update_replication(void) > +{ > + const bool do_quorum = false; > + const bool do_reuse = true; > + box_sync_replication(do_quorum, do_reuse); > +} > + >  void >  box_set_replication(void) >  { > @@ -1296,7 +1311,7 @@ box_set_replication(void) >   * Stay in orphan mode in case we fail to connect to at least >   * 'replication_connect_quorum' remote instances. >   */ > - box_sync_replication(false); > + box_update_replication(); >   /* Follow replica */ >   replicaset_follow(); >   /* Wait until appliers are in sync */ > @@ -1416,7 +1431,7 @@ box_set_replication_anon(void) >   * them can register and others resend a >   * non-anonymous subscribe. >   */ > - box_sync_replication(true); > + box_restart_replication(); >   /* >   * Wait until the master has registered this >   * instance. > @@ -3279,7 +3294,7 @@ bootstrap(const struct tt_uuid *instance_uuid, >   * with connecting to 'replication_connect_quorum' masters. >   * If this also fails, throw an error. >   */ > - box_sync_replication(true); > + box_restart_replication(); > >   struct replica *master = replicaset_find_join_master(); >   assert(master == NULL || master->applier != NULL); > @@ -3356,7 +3371,7 @@ local_recovery(const struct tt_uuid > *instance_uuid, >   if (wal_dir_lock >= 0) { >   if (box_listen() != 0) >   diag_raise(); > - box_sync_replication(false); > + box_update_replication(); > >   struct replica *master; >   if (replicaset_needs_rejoin(&master)) { > @@ -3435,7 +3450,7 @@ local_recovery(const struct tt_uuid > *instance_uuid, > vclock_copy(&replicaset.vclock, &recovery->vclock); >   if (box_listen() != 0) >   diag_raise(); > - box_sync_replication(false); > + box_update_replication(); >   } >   stream_guard.is_active = false; >   recovery_finalize(recovery); > diff --git a/src/box/replication.cc b/src/box/replication.cc > index 1288bc9b1..10b4ac915 100644 > --- a/src/box/replication.cc > +++ b/src/box/replication.cc > @@ -507,7 +507,7 @@ replica_on_applier_state_f(struct trigger > *trigger, void *event) >   * upon reconfiguration of box.cfg.replication. >   */ >  static void > -replicaset_update(struct applier **appliers, int count) > +replicaset_update(struct applier **appliers, int count, bool > keep_connect) >  { >   replica_hash_t uniq; >   memset(&uniq, 0, sizeof(uniq)); > @@ -572,22 +572,39 @@ replicaset_update(struct applier **appliers, > int count) >   applier_stop(applier); >   applier_delete(applier); >   } > + > + replicaset.applier.total = count; > + replicaset.applier.connected = 0; > + replicaset.applier.loading = 0; > + replicaset.applier.synced = 0; >   replicaset_foreach(replica) { >   if (replica->applier == NULL) >   continue; > - applier = replica->applier; > - replica_clear_applier(replica); > - replica->applier_sync_state = APPLIER_DISCONNECTED; > + struct replica *other = replica_hash_search(&uniq, replica); > + if (keep_connect && other != NULL && > + (replica->applier->state == APPLIER_FOLLOW || > + replica->applier->state == APPLIER_SYNC)) { > + /* > + * Try not to interrupt working appliers upon > + * reconfiguration. > + */ > + replicaset.applier.connected++; > + replicaset.applier.synced++; > + replica_hash_remove(&uniq, other); > + applier = other->applier; > + replica_clear_applier(other); > + replica_delete(other); > + } else { > + applier = replica->applier; > + replica_clear_applier(replica); > + replica->applier_sync_state = APPLIER_DISCONNECTED; > + } >   applier_stop(applier); >   applier_delete(applier); >   } > > - /* Save new appliers */ > - replicaset.applier.total = count; > - replicaset.applier.connected = 0; > - replicaset.applier.loading = 0; > - replicaset.applier.synced = 0; > > + /* Save new appliers */ > replica_hash_foreach_safe(&uniq, replica, next) { >   replica_hash_remove(&uniq, replica); > > @@ -664,11 +681,11 @@ applier_on_connect_f(struct trigger > *trigger, void *event) > >  void >  replicaset_connect(struct applier **appliers, int count, > - bool connect_quorum) > + bool connect_quorum, bool keep_connect) >  { >   if (count == 0) { >   /* Cleanup the replica set. */ > - replicaset_update(appliers, count); > + replicaset_update(appliers, 0, false); >   return; >   } > > @@ -772,7 +789,7 @@ replicaset_connect(struct applier **appliers, > int count, > >   /* Now all the appliers are connected, update the replica set. */ >   try { > - replicaset_update(appliers, count); > + replicaset_update(appliers, count, keep_connect); >   } catch (Exception *e) { >   goto error; >   } > diff --git a/src/box/replication.h b/src/box/replication.h > index 4c82cd839..a8fed45e8 100644 > --- a/src/box/replication.h > +++ b/src/box/replication.h > @@ -439,10 +439,12 @@ replicaset_add_anon(const struct tt_uuid > *replica_uuid); >   * \param connect_quorum if this flag is set, fail unless at >   * least replication_connect_quorum >   * appliers have successfully connected. > + * \param keep_connect if this flag is set do not force a > reconnect if the > + * old connection to the replica is fine. >   */ >  void >  replicaset_connect(struct applier **appliers, int count, > - bool connect_quorum); > + bool connect_quorum, bool keep_connect); > >  /** >   * Check if the current instance fell too much behind its > diff --git > a/test/replication-luatest/gh_4669_applier_reconnect_test.lua > b/test/replication-luatest/gh_4669_applier_reconnect_test.lua > new file mode 100644 > index 000000000..9c6d26036 > --- /dev/null > +++ b/test/replication-luatest/gh_4669_applier_reconnect_test.lua > @@ -0,0 +1,53 @@ > +local t = require('luatest') > +local cluster = require('test.luatest_helpers.cluster') > +local helpers = require('test.luatest_helpers') > + > +local g = t.group('gh-4669-applier-reconnect') > + > +local function check_follow_master(server) > + return t.assert_equals( > + server:eval('return box.info.replication[1].upstream.status'), > 'follow') > +end > + > +g.before_each(function() > + g.cluster = cluster:new({}) > + g.master = g.cluster:build_server({alias = 'master'}) > + local box_cfg = { > + replication = ('%s/master.iproto'):format(helpers.SOCKET_DIR), > + } > + g.replica = g.cluster:build_server({alias = 'replica', box_cfg = > box_cfg}) > + g.replica2 = g.cluster:build_server({alias = 'replica2', box_cfg > = box_cfg}) > + > + g.cluster:add_server(g.master) > + g.cluster:add_server(g.replica) > + g.cluster:add_server(g.replica2) > + g.cluster:start() > + check_follow_master(g.replica) > +end) > + > +g.after_each(function() > + g.cluster:stop() > +end) > + > +-- Test that appliers aren't recreated upon replication > reconfiguration. > +-- Add and then remove two extra replicas to the configuration. > The master > +-- connection should stay intact. > +g.test_applier_connection_on_reconfig = function(g) > + g.replica:eval(([[ > + box.cfg{ > + replication = { > + box.cfg.listen, > + box.cfg.replication[1], > + "%s/replica2.iproto", > + } > + }]]):format(helpers.SOCKET_DIR)) > + g.replica:eval([[ > + box.cfg{ > + replication = { > + box.cfg.replication[2] > + } > + } > + ]]) > + check_follow_master(g.replica) > + t.assert_equals(g.master:grep_log("exiting the relay loop"), nil) > +end > -- > 2.30.1 (Apple Git-130) > -- Serge Petrenko