[tarantool-patches] [PATCH v2] replication: do not ignore replication_connect_quorum.
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Aug 7 20:28:47 MSK 2018
On Tue, Aug 07, 2018 at 03:44:13PM +0300, Serge Petrenko wrote:
> On bootstrap and after replication reconfiguration
> replication_connect_quorum was ignored. The instance tried to connect to
> every replica listed in replication parameter, and errored if it wasn't
> possible.
> The patch alters this behaviour. The instance still tries to connect to
> every node listed in replication, but does not raise an error if it was
> able to connect to at least replication_connect_quorum instances.
Please append a documentation request (@TarantoolBot) to the commit
message.
>
> Closes #3428
> ---
> https://github.com/tarantool/tarantool/issues/3428
> https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3428-replication-connect-quorum
>
> Changes in v2:
> - change test/replication/ddl.lua instance file to fix
> test failure on Travis.
>
> src/box/box.cc | 6 +++---
> src/box/replication.cc | 8 +++-----
> src/box/replication.h | 7 ++++---
> test/replication-py/init_storage.test.py | 2 +-
> test/replication-py/master.lua | 2 ++
> test/replication-py/replica.lua | 2 ++
> test/replication/autobootstrap.lua | 3 ++-
> test/replication/autobootstrap_guest.lua | 2 +-
> test/replication/ddl.lua | 3 ++-
> test/replication/errinj.result | 6 +++---
> test/replication/errinj.test.lua | 6 +++---
> test/replication/master.lua | 1 +
> test/replication/master_quorum.lua | 3 ++-
> test/replication/on_replace.lua | 3 ++-
> test/replication/quorum.lua | 4 ++--
> test/replication/rebootstrap.lua | 2 +-
> test/replication/replica_no_quorum.lua | 3 ++-
> test/replication/replica_timeout.lua | 3 ++-
> test/replication/replica_uuid_ro.lua | 2 +-
> 19 files changed, 39 insertions(+), 29 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index e3eb2738f..f8731f464 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -595,7 +595,7 @@ cfg_get_replication(int *p_count)
> * don't start appliers.
> */
> static void
> -box_sync_replication(double timeout, bool connect_all)
> +box_sync_replication(double timeout, bool reach_quorum)
After this patch, 'timeout' always equals replication_connect_timeout
so you don't need to pass it explicitly anymore. Please remove it from
this function and from replicaset_connect.
Also, I don't like 'reach_quorum' name. Would 'connect_quorum' sound
better? Or may be we should pass the minimal number of masters to
connect instead?
> {
> int count = 0;
> struct applier **appliers = cfg_get_replication(&count);
> @@ -607,7 +607,7 @@ box_sync_replication(double timeout, bool connect_all)
> applier_delete(appliers[i]); /* doesn't affect diag */
> });
>
> - replicaset_connect(appliers, count, timeout, connect_all);
> + replicaset_connect(appliers, count, timeout, reach_quorum);
>
> guard.is_active = false;
> }
> @@ -1888,7 +1888,7 @@ box_cfg_xc(void)
> * receive the same replica set UUID when a new cluster
> * is deployed.
> */
> - box_sync_replication(TIMEOUT_INFINITY, true);
> + box_sync_replication(replication_connect_timeout, true);
> /* Bootstrap a new master */
> bootstrap(&replicaset_uuid, &is_bootstrap_leader);
> }
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index 528fe4459..a6c60220f 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -46,7 +46,7 @@ struct tt_uuid INSTANCE_UUID;
> struct tt_uuid REPLICASET_UUID;
>
> double replication_timeout = 1.0; /* seconds */
> -double replication_connect_timeout = 4.0; /* seconds */
> +double replication_connect_timeout = 10.0; /* seconds */
Why? BTW, this isn't enough - replication_connect_timeout is actually
set in load_cfg.lua (I've no idea why they differ).
> int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
> double replication_sync_lag = 10.0; /* seconds */
>
> @@ -540,7 +540,7 @@ applier_on_connect_f(struct trigger *trigger, void *event)
>
> void
> replicaset_connect(struct applier **appliers, int count,
> - double timeout, bool connect_all)
> + double timeout, bool reach_quorum)
> {
> if (count == 0) {
> /* Cleanup the replica set. */
> @@ -587,15 +587,13 @@ replicaset_connect(struct applier **appliers, int count,
> double wait_start = ev_monotonic_now(loop());
> if (fiber_cond_wait_timeout(&state.wakeup, timeout) != 0)
> break;
> - if (state.failed > 0 && connect_all)
> - break;
I guess you should break the loop if
state.failed > count - min_count_to_connect
> timeout -= ev_monotonic_now(loop()) - wait_start;
> }
> if (state.connected < count) {
> say_crit("failed to connect to %d out of %d replicas",
> count - state.connected, count);
> /* Timeout or connection failure. */
> - if (connect_all)
> + if (reach_quorum && state.connected < replication_connect_quorum)
replication_connect_quorum can be greater than the number of configured
replicas. I think you should use MIN(count, replication_connect_quorum).
> goto error;
> } else {
> say_verbose("connected to %d replicas", state.connected);
> diff --git a/src/box/replication.h b/src/box/replication.h
> index 95122eb45..c16c8b56c 100644
> --- a/src/box/replication.h
> +++ b/src/box/replication.h
> @@ -357,12 +357,13 @@ replicaset_add(uint32_t replica_id, const struct tt_uuid *instance_uuid);
> * \param appliers the array of appliers
> * \param count size of appliers array
> * \param timeout connection timeout
> - * \param connect_all if this flag is set, fail unless all
> - * appliers have successfully connected
> + * \param reach_quorum if this flag is set, fail unless at
> + * least replication_connect_quorum
> + * appliers have successfully connected.
> */
> void
> replicaset_connect(struct applier **appliers, int count,
> - double timeout, bool connect_all);
> + double timeout, bool reach_quorum);
>
> /**
> * Resume all appliers registered with the replica set.
> diff --git a/test/replication-py/init_storage.test.py b/test/replication-py/init_storage.test.py
> index 0911a02c0..32b4639f1 100644
> --- a/test/replication-py/init_storage.test.py
> +++ b/test/replication-py/init_storage.test.py
> @@ -57,7 +57,7 @@ print '-------------------------------------------------------------'
>
> server.stop()
> replica = TarantoolServer(server.ini)
> -replica.script = 'replication/replica.lua'
> +replica.script = 'replication-py/replica.lua'
> replica.vardir = server.vardir #os.path.join(server.vardir, 'replica')
> replica.rpl_master = master
> replica.deploy(wait=False)
> diff --git a/test/replication-py/master.lua b/test/replication-py/master.lua
> index 0f9f7a6f0..51283efdf 100644
> --- a/test/replication-py/master.lua
> +++ b/test/replication-py/master.lua
> @@ -3,6 +3,8 @@ os = require('os')
> box.cfg({
> listen = os.getenv("LISTEN"),
> memtx_memory = 107374182,
> + replication_connect_timeout = 1.0,
> + replication_timeout = 0.3
Why do you need to adjust the timeouts?
> })
>
> require('console').listen(os.getenv('ADMIN'))
> diff --git a/test/replication-py/replica.lua b/test/replication-py/replica.lua
> index 278291bba..b9d193b70 100644
> --- a/test/replication-py/replica.lua
> +++ b/test/replication-py/replica.lua
> @@ -7,6 +7,8 @@ box.cfg({
> listen = os.getenv("LISTEN"),
> replication = os.getenv("MASTER"),
> memtx_memory = 107374182,
> + replication_connect_timeout = 1.0,
> + replication_timeout = 0.3
> })
>
> box_cfg_done = true
> diff --git a/test/replication/autobootstrap.lua b/test/replication/autobootstrap.lua
> index 4f55417ae..8fc6809de 100644
> --- a/test/replication/autobootstrap.lua
> +++ b/test/replication/autobootstrap.lua
> @@ -21,7 +21,8 @@ box.cfg({
> USER..':'..PASSWORD..'@'..instance_uri(2);
> USER..':'..PASSWORD..'@'..instance_uri(3);
> };
> - replication_connect_timeout = 0.5,
> + replication_connect_timeout = 3.0;
> + replication_timeout = 0.5;
> })
>
> box.once("bootstrap", function()
> diff --git a/test/replication/autobootstrap_guest.lua b/test/replication/autobootstrap_guest.lua
> index 40fef2c7a..7cd921e3c 100644
> --- a/test/replication/autobootstrap_guest.lua
> +++ b/test/replication/autobootstrap_guest.lua
> @@ -20,7 +20,7 @@ box.cfg({
> instance_uri(2);
> instance_uri(3);
> };
> - replication_connect_timeout = 0.5,
> + replication_connect_timeout = 5,
Why do you use different timeouts in different tests?
More information about the Tarantool-patches
mailing list