[tarantool-patches] [PATCH v2] replication: do not ignore replication_connect_quorum.

Vladimir Davydov vdavydov.dev at gmail.com
Tue Aug 7 20:28:47 MSK 2018


On Tue, Aug 07, 2018 at 03:44:13PM +0300, Serge Petrenko wrote:
> On bootstrap and after replication reconfiguration
> replication_connect_quorum was ignored. The instance tried to connect to
> every replica listed in replication parameter, and errored if it wasn't
> possible.
> The patch alters this behaviour. The instance still tries to connect to
> every node listed in replication, but does not raise an error if it was
> able to connect to at least replication_connect_quorum instances.

Please append a documentation request (@TarantoolBot) to the commit
message.

> 
> Closes #3428
> ---
> https://github.com/tarantool/tarantool/issues/3428
> https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3428-replication-connect-quorum
> 
> Changes in v2:
>   - change test/replication/ddl.lua instance file to fix
>     test failure on Travis.
> 
>  src/box/box.cc                           | 6 +++---
>  src/box/replication.cc                   | 8 +++-----
>  src/box/replication.h                    | 7 ++++---
>  test/replication-py/init_storage.test.py | 2 +-
>  test/replication-py/master.lua           | 2 ++
>  test/replication-py/replica.lua          | 2 ++
>  test/replication/autobootstrap.lua       | 3 ++-
>  test/replication/autobootstrap_guest.lua | 2 +-
>  test/replication/ddl.lua                 | 3 ++-
>  test/replication/errinj.result           | 6 +++---
>  test/replication/errinj.test.lua         | 6 +++---
>  test/replication/master.lua              | 1 +
>  test/replication/master_quorum.lua       | 3 ++-
>  test/replication/on_replace.lua          | 3 ++-
>  test/replication/quorum.lua              | 4 ++--
>  test/replication/rebootstrap.lua         | 2 +-
>  test/replication/replica_no_quorum.lua   | 3 ++-
>  test/replication/replica_timeout.lua     | 3 ++-
>  test/replication/replica_uuid_ro.lua     | 2 +-
>  19 files changed, 39 insertions(+), 29 deletions(-)
> 
> diff --git a/src/box/box.cc b/src/box/box.cc
> index e3eb2738f..f8731f464 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -595,7 +595,7 @@ cfg_get_replication(int *p_count)
>   * don't start appliers.
>   */
>  static void
> -box_sync_replication(double timeout, bool connect_all)
> +box_sync_replication(double timeout, bool reach_quorum)

After this patch, 'timeout' always equals replication_connect_timeout
so you don't need to pass it explicitly anymore. Please remove it from
this function and from replicaset_connect.

Also, I don't like 'reach_quorum' name. Would 'connect_quorum' sound
better? Or may be we should pass the minimal number of masters to
connect instead?

>  {
>  	int count = 0;
>  	struct applier **appliers = cfg_get_replication(&count);
> @@ -607,7 +607,7 @@ box_sync_replication(double timeout, bool connect_all)
>  			applier_delete(appliers[i]); /* doesn't affect diag */
>  	});
>  
> -	replicaset_connect(appliers, count, timeout, connect_all);
> +	replicaset_connect(appliers, count, timeout, reach_quorum);
>  
>  	guard.is_active = false;
>  }
> @@ -1888,7 +1888,7 @@ box_cfg_xc(void)
>  		 * receive the same replica set UUID when a new cluster
>  		 * is deployed.
>  		 */
> -		box_sync_replication(TIMEOUT_INFINITY, true);
> +		box_sync_replication(replication_connect_timeout, true);
>  		/* Bootstrap a new master */
>  		bootstrap(&replicaset_uuid, &is_bootstrap_leader);
>  	}
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index 528fe4459..a6c60220f 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -46,7 +46,7 @@ struct tt_uuid INSTANCE_UUID;
>  struct tt_uuid REPLICASET_UUID;
>  
>  double replication_timeout = 1.0; /* seconds */
> -double replication_connect_timeout = 4.0; /* seconds */
> +double replication_connect_timeout = 10.0; /* seconds */

Why? BTW, this isn't enough - replication_connect_timeout is actually
set in load_cfg.lua (I've no idea why they differ).

>  int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
>  double replication_sync_lag = 10.0; /* seconds */
>  
> @@ -540,7 +540,7 @@ applier_on_connect_f(struct trigger *trigger, void *event)
>  
>  void
>  replicaset_connect(struct applier **appliers, int count,
> -		   double timeout, bool connect_all)
> +		   double timeout, bool reach_quorum)
>  {
>  	if (count == 0) {
>  		/* Cleanup the replica set. */
> @@ -587,15 +587,13 @@ replicaset_connect(struct applier **appliers, int count,
>  		double wait_start = ev_monotonic_now(loop());
>  		if (fiber_cond_wait_timeout(&state.wakeup, timeout) != 0)
>  			break;
> -		if (state.failed > 0 && connect_all)
> -			break;

I guess you should break the loop if

  state.failed > count - min_count_to_connect

>  		timeout -= ev_monotonic_now(loop()) - wait_start;
>  	}
>  	if (state.connected < count) {
>  		say_crit("failed to connect to %d out of %d replicas",
>  			 count - state.connected, count);
>  		/* Timeout or connection failure. */
> -		if (connect_all)
> +		if (reach_quorum && state.connected < replication_connect_quorum)

replication_connect_quorum can be greater than the number of configured
replicas. I think you should use MIN(count, replication_connect_quorum).

>  			goto error;
>  	} else {
>  		say_verbose("connected to %d replicas", state.connected);
> diff --git a/src/box/replication.h b/src/box/replication.h
> index 95122eb45..c16c8b56c 100644
> --- a/src/box/replication.h
> +++ b/src/box/replication.h
> @@ -357,12 +357,13 @@ replicaset_add(uint32_t replica_id, const struct tt_uuid *instance_uuid);
>   * \param appliers the array of appliers
>   * \param count size of appliers array
>   * \param timeout connection timeout
> - * \param connect_all if this flag is set, fail unless all
> - *                    appliers have successfully connected
> + * \param reach_quorum if this flag is set, fail unless at
> + *		       least replication_connect_quorum
> + *		       appliers have successfully connected.
>   */
>  void
>  replicaset_connect(struct applier **appliers, int count,
> -		   double timeout, bool connect_all);
> +		   double timeout, bool reach_quorum);
>  
>  /**
>   * Resume all appliers registered with the replica set.
> diff --git a/test/replication-py/init_storage.test.py b/test/replication-py/init_storage.test.py
> index 0911a02c0..32b4639f1 100644
> --- a/test/replication-py/init_storage.test.py
> +++ b/test/replication-py/init_storage.test.py
> @@ -57,7 +57,7 @@ print '-------------------------------------------------------------'
>  
>  server.stop()
>  replica = TarantoolServer(server.ini)
> -replica.script = 'replication/replica.lua'
> +replica.script = 'replication-py/replica.lua'
>  replica.vardir = server.vardir #os.path.join(server.vardir, 'replica')
>  replica.rpl_master = master
>  replica.deploy(wait=False)
> diff --git a/test/replication-py/master.lua b/test/replication-py/master.lua
> index 0f9f7a6f0..51283efdf 100644
> --- a/test/replication-py/master.lua
> +++ b/test/replication-py/master.lua
> @@ -3,6 +3,8 @@ os = require('os')
>  box.cfg({
>      listen              = os.getenv("LISTEN"),
>      memtx_memory        = 107374182,
> +    replication_connect_timeout = 1.0,
> +    replication_timeout = 0.3

Why do you need to adjust the timeouts?

>  })
>  
>  require('console').listen(os.getenv('ADMIN'))
> diff --git a/test/replication-py/replica.lua b/test/replication-py/replica.lua
> index 278291bba..b9d193b70 100644
> --- a/test/replication-py/replica.lua
> +++ b/test/replication-py/replica.lua
> @@ -7,6 +7,8 @@ box.cfg({
>      listen              = os.getenv("LISTEN"),
>      replication         = os.getenv("MASTER"),
>      memtx_memory        = 107374182,
> +    replication_connect_timeout = 1.0,
> +    replication_timeout = 0.3
>  })
>  
>  box_cfg_done = true
> diff --git a/test/replication/autobootstrap.lua b/test/replication/autobootstrap.lua
> index 4f55417ae..8fc6809de 100644
> --- a/test/replication/autobootstrap.lua
> +++ b/test/replication/autobootstrap.lua
> @@ -21,7 +21,8 @@ box.cfg({
>          USER..':'..PASSWORD..'@'..instance_uri(2);
>          USER..':'..PASSWORD..'@'..instance_uri(3);
>      };
> -    replication_connect_timeout = 0.5,
> +    replication_connect_timeout = 3.0;
> +    replication_timeout = 0.5;
>  })
>  
>  box.once("bootstrap", function()
> diff --git a/test/replication/autobootstrap_guest.lua b/test/replication/autobootstrap_guest.lua
> index 40fef2c7a..7cd921e3c 100644
> --- a/test/replication/autobootstrap_guest.lua
> +++ b/test/replication/autobootstrap_guest.lua
> @@ -20,7 +20,7 @@ box.cfg({
>          instance_uri(2);
>          instance_uri(3);
>      };
> -    replication_connect_timeout = 0.5,
> +    replication_connect_timeout = 5,

Why do you use different timeouts in different tests?



More information about the Tarantool-patches mailing list