[Tarantool-patches] [PATCH v2 4/4] replication: do not register outgoing connections

Serge Petrenko sergepetrenko at tarantool.org
Tue Sep 15 10:50:52 MSK 2020


15.09.2020 02:11, Vladislav Shpilevoy пишет:
> Replication protocol's first stage for non-anonymous replicas is
> that the replica should be registered in _cluster to get a unique
> ID number.
>
> That happens, when replica connects to a writable node, which
> performs the registration. So it means, registration always
> happens on the master node when appears an *incoming* request for
> it, explicitly asking for a registration. Only relay can do that.
>
> That wasn't the case for bootstrap. If box.cfg.replication wasn't
> empty on the master node doing the cluster bootstrap, it
> registered all the outgoing connections in _cluster. Note, the
> target node could be even anonymous, but still was registered.
>
> That breaks the protocol, and leads to registration of anon
> replicas sometimes. The patch drops it.
>
> Another motivation here is Raft cluster bootstrap specifics.
> During Raft bootstrap it is going to be very important that
> non-joined replicas should not be registered in _cluster. A
> replica can only register after its JOIN request was accepted, and
> its snapshot download has started.
>
> Closes #5287
> Needed for #1146
> ---
>   src/box/applier.cc                          | 13 ++++
>   src/box/box.cc                              | 29 +++++---
>   src/box/errcode.h                           |  1 +
>   test/box/error.result                       |  1 +
>   test/replication/autobootstrap_anon.lua     | 25 +++++++
>   test/replication/autobootstrap_anon1.lua    |  1 +
>   test/replication/autobootstrap_anon2.lua    |  1 +
>   test/replication/gh-5287-boot-anon.result   | 81 +++++++++++++++++++++
>   test/replication/gh-5287-boot-anon.test.lua | 30 ++++++++
>   test/replication/prune.result               | 18 ++++-
>   test/replication/prune.test.lua             |  7 +-
>   11 files changed, 190 insertions(+), 17 deletions(-)
>   create mode 100644 test/replication/autobootstrap_anon.lua
>   create mode 120000 test/replication/autobootstrap_anon1.lua
>   create mode 120000 test/replication/autobootstrap_anon2.lua
>   create mode 100644 test/replication/gh-5287-boot-anon.result
>   create mode 100644 test/replication/gh-5287-boot-anon.test.lua
>
> diff --git a/src/box/applier.cc b/src/box/applier.cc
> index 96dd48c0d..e272a7af6 100644
> --- a/src/box/applier.cc
> +++ b/src/box/applier.cc
> @@ -107,6 +107,7 @@ applier_log_error(struct applier *applier, struct error *e)
>   	case ER_UNKNOWN_REPLICA:
>   	case ER_PASSWORD_MISMATCH:
>   	case ER_XLOG_GAP:
> +	case ER_TOO_EARLY_SUBSCRIBE:
>   		say_info("will retry every %.2lf second",
>   			 replication_reconnect_interval());
>   		break;
> @@ -1306,6 +1307,18 @@ applier_f(va_list ap)
>   				applier_log_error(applier, e);
>   				applier_disconnect(applier, APPLIER_LOADING);
>   				goto reconnect;
> +			} else if (e->errcode() == ER_TOO_EARLY_SUBSCRIBE) {
> +				/*
> +				 * The instance is not anonymous, and is
> +				 * registered, but its ID is not delivered to
> +				 * all the nodes in the cluster yet, and some
> +				 * nodes may ask to retry connection later,
> +				 * until they receive _cluster record of this
> +				 * instance. From some third node, for example.
> +				 */
> +				applier_log_error(applier, e);
> +				applier_disconnect(applier, APPLIER_LOADING);
> +				goto reconnect;
>   			} else if (e->errcode() == ER_SYNC_QUORUM_TIMEOUT ||
>   				   e->errcode() == ER_SYNC_ROLLBACK) {
>   				/*
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 145b53ec8..0b1f6c237 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1992,9 +1992,23 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
>   	struct replica *replica = replica_by_uuid(&replica_uuid);
>   
>   	if (!anon && (replica == NULL || replica->id == REPLICA_ID_NIL)) {
> -		tnt_raise(ClientError, ER_UNKNOWN_REPLICA,
> -			  tt_uuid_str(&replica_uuid),
> -			  tt_uuid_str(&REPLICASET_UUID));
> +		/*
> +		 * The instance is not anonymous, and is registered (at least it
> +		 * claims so), but its ID is not delivered to the current
> +		 * instance yet. Need to wait until its _cluster record arrives
> +		 * from some third node. Likely to happen on bootstrap, when
> +		 * there is a fullmesh and 1 leader doing all the _cluster
> +		 * registrations. Not all of them are delivered to the other
> +		 * nodes yet.
> +		 * Also can happen when the replica is deleted from _cluster,
> +		 * but still tries to subscribe. It won't have an ID here.
> +		 */
> +		tnt_raise(ClientError, ER_TOO_EARLY_SUBSCRIBE,
> +			  tt_uuid_str(&replica_uuid));
> +	}
> +	if (anon && replica != NULL && replica->id != REPLICA_ID_NIL) {
> +		tnt_raise(ClientError, ER_PROTOCOL, "Can't subscribe an "
> +			  "anonymous replica having an ID assigned");
>   	}
>   	if (replica == NULL)
>   		replica = replicaset_add_anon(&replica_uuid);
> @@ -2208,15 +2222,6 @@ bootstrap_master(const struct tt_uuid *replicaset_uuid)
>   	box_register_replica(replica_id, &INSTANCE_UUID);
>   	assert(replica_by_uuid(&INSTANCE_UUID)->id == 1);
>   
> -	/* Register other cluster members */
> -	replicaset_foreach(replica) {
> -		if (tt_uuid_is_equal(&replica->uuid, &INSTANCE_UUID))
> -			continue;
> -		assert(replica->applier != NULL);
> -		box_register_replica(++replica_id, &replica->uuid);
> -		assert(replica->id == replica_id);
> -	}
> -
>   	/* Set UUID of a new replica set */
>   	box_set_replicaset_uuid(replicaset_uuid);
>   
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 9a240a431..e6957d612 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -272,6 +272,7 @@ struct errcode_record {
>           /*217 */_(ER_SYNC_ROLLBACK,             "A rollback for a synchronous transaction is received") \
>   	/*218 */_(ER_TUPLE_METADATA_IS_TOO_BIG,	"Can't create tuple: metadata size %u is too big") \
>   	/*219 */_(ER_XLOG_GAP,			"%s") \
> +	/*220 */_(ER_TOO_EARLY_SUBSCRIBE,	"Can't subscribe non-anonymous replica %s until join is done") \
>   
>   /*
>    * !IMPORTANT! Please follow instructions at start of the file
> diff --git a/test/box/error.result b/test/box/error.result
> index 600517316..4d85b9e55 100644
> --- a/test/box/error.result
> +++ b/test/box/error.result
> @@ -438,6 +438,7 @@ t;
>    |   217: box.error.SYNC_ROLLBACK
>    |   218: box.error.TUPLE_METADATA_IS_TOO_BIG
>    |   219: box.error.XLOG_GAP
> + |   220: box.error.TOO_EARLY_SUBSCRIBE
>    | ...
>   
>   test_run:cmd("setopt delimiter ''");
> diff --git a/test/replication/autobootstrap_anon.lua b/test/replication/autobootstrap_anon.lua
> new file mode 100644
> index 000000000..2e96d9d1a
> --- /dev/null
> +++ b/test/replication/autobootstrap_anon.lua
> @@ -0,0 +1,25 @@
> +#!/usr/bin/env tarantool
> +
> +local INSTANCE_ID = string.match(arg[0], "%d")
> +local SOCKET_DIR = require('fio').cwd()
> +local ANON = arg[1] == 'true'
> +
> +local function instance_uri(instance_id)
> +    return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock';
> +end
> +
> +require('console').listen(os.getenv('ADMIN'))
> +
> +box.cfg({
> +    listen = instance_uri(INSTANCE_ID),
> +    replication = {
> +        instance_uri(1),
> +        instance_uri(2),
> +    };
> +    replication_anon = ANON,
> +    read_only = ANON,
> +})
> +
> +box.once("bootstrap", function()
> +    box.schema.user.grant('guest', 'super')
> +end)
> diff --git a/test/replication/autobootstrap_anon1.lua b/test/replication/autobootstrap_anon1.lua
> new file mode 120000
> index 000000000..27e0fec36
> --- /dev/null
> +++ b/test/replication/autobootstrap_anon1.lua
> @@ -0,0 +1 @@
> +autobootstrap_anon.lua
> \ No newline at end of file
> diff --git a/test/replication/autobootstrap_anon2.lua b/test/replication/autobootstrap_anon2.lua
> new file mode 120000
> index 000000000..27e0fec36
> --- /dev/null
> +++ b/test/replication/autobootstrap_anon2.lua
> @@ -0,0 +1 @@
> +autobootstrap_anon.lua
> \ No newline at end of file
> diff --git a/test/replication/gh-5287-boot-anon.result b/test/replication/gh-5287-boot-anon.result
> new file mode 100644
> index 000000000..bf6660ae5
> --- /dev/null
> +++ b/test/replication/gh-5287-boot-anon.result
> @@ -0,0 +1,81 @@
> +-- test-run result file version 2
> +test_run = require('test_run').new()
> + | ---
> + | ...
> +
> +--
> +-- gh-5287: when a cluster contained an anonymous replica during bootstrap, it
> +-- could be registered anyway.
> +--
> +
> +test_run:cmd("create server replica1 with script='replication/autobootstrap_anon1.lua'")
> + | ---
> + | - true
> + | ...
> +test_run:cmd("start server replica1 with wait=False")
> + | ---
> + | - true
> + | ...
> +
> +test_run:cmd("create server replica2 with script='replication/autobootstrap_anon2.lua'")
> + | ---
> + | - true
> + | ...
> +test_run:cmd("start server replica2 with args='true', wait=False")
> + | ---
> + | - true
> + | ...
> +
> +test_run:switch('replica2')
> + | ---
> + | - true
> + | ...
> +-- Without box.info.replication test-run fails to wait a cond.
> +test_run:wait_cond(function() return next(box.info.replication) ~= nil end)
> + | ---
> + | - true
> + | ...
> +test_run:wait_upstream(1, {status = 'follow'})
> + | ---
> + | - true
> + | ...
> +
> +test_run:switch('replica1')
> + | ---
> + | - true
> + | ...
> +-- The anonymous replica wasn't registered.
> +assert(box.space._cluster:len() == 1)
> + | ---
> + | - true
> + | ...
> +box.info.gc().consumers
> + | ---
> + | - []
> + | ...
> +box.info.replication_anon.count == 1
> + | ---
> + | - true
> + | ...
> +
> +test_run:switch('default')
> + | ---
> + | - true
> + | ...
> +
> +test_run:cmd("stop server replica1")
> + | ---
> + | - true
> + | ...
> +test_run:cmd("delete server replica1")
> + | ---
> + | - true
> + | ...
> +test_run:cmd("stop server replica2")
> + | ---
> + | - true
> + | ...
> +test_run:cmd("delete server replica2")
> + | ---
> + | - true
> + | ...
> diff --git a/test/replication/gh-5287-boot-anon.test.lua b/test/replication/gh-5287-boot-anon.test.lua
> new file mode 100644
> index 000000000..94ad81af7
> --- /dev/null
> +++ b/test/replication/gh-5287-boot-anon.test.lua
> @@ -0,0 +1,30 @@
> +test_run = require('test_run').new()
> +
> +--
> +-- gh-5287: when a cluster contained an anonymous replica during bootstrap, it
> +-- could be registered anyway.
> +--
> +
> +test_run:cmd("create server replica1 with script='replication/autobootstrap_anon1.lua'")
> +test_run:cmd("start server replica1 with wait=False")
> +
> +test_run:cmd("create server replica2 with script='replication/autobootstrap_anon2.lua'")
> +test_run:cmd("start server replica2 with args='true', wait=False")
> +
> +test_run:switch('replica2')
> +-- Without box.info.replication test-run fails to wait a cond.
> +test_run:wait_cond(function() return next(box.info.replication) ~= nil end)
> +test_run:wait_upstream(1, {status = 'follow'})
> +
> +test_run:switch('replica1')
> +-- The anonymous replica wasn't registered.
> +assert(box.space._cluster:len() == 1)
> +box.info.gc().consumers
> +box.info.replication_anon.count == 1
> +
> +test_run:switch('default')
> +
> +test_run:cmd("stop server replica1")
> +test_run:cmd("delete server replica1")
> +test_run:cmd("stop server replica2")
> +test_run:cmd("delete server replica2")
> diff --git a/test/replication/prune.result b/test/replication/prune.result
> index 1a130df40..67fd62990 100644
> --- a/test/replication/prune.result
> +++ b/test/replication/prune.result
> @@ -133,7 +133,19 @@ test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')
>   - ['The local instance id 2 is read-only']
>   ...
>   -- restart replica and check that replica isn't able to join to cluster
> -test_run:cmd('restart server replica1')
> +test_run:cmd('stop server replica1')
> +---
> +- true
> +...
> +test_run:cmd('start server replica1 with args="true 0"')
> +---
> +- true
> +...
> +test_run:cmd('switch replica1')
> +---
> +- true
> +...
> +test_run:wait_upstream(1, {message_re = "Can't subscribe non%-anonymous replica"})
>   ---
>   - true
>   ...
> @@ -147,9 +159,9 @@ box.space._cluster:len() == 1
>   ...
>   test_run:cmd('eval replica1 "box.info.replication[1].upstream.status"')
>   ---
> -- ['stopped']
> +- ['loading']
>   ...
> -test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')[1]:match("is not registered with replica set") ~= nil
> +test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')[1]:match("Can't subscribe non%-anonymous replica") ~= nil
>   ---
>   - true
>   ...
> diff --git a/test/replication/prune.test.lua b/test/replication/prune.test.lua
> index 80847325b..ea8b0b3c3 100644
> --- a/test/replication/prune.test.lua
> +++ b/test/replication/prune.test.lua
> @@ -65,11 +65,14 @@ while test_run:cmd('eval replica1 "box.info.replication[1].upstream.status"')[1]
>   test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')
>   
>   -- restart replica and check that replica isn't able to join to cluster
> -test_run:cmd('restart server replica1')
> +test_run:cmd('stop server replica1')
> +test_run:cmd('start server replica1 with args="true 0"')
> +test_run:cmd('switch replica1')
> +test_run:wait_upstream(1, {message_re = "Can't subscribe non%-anonymous replica"})
>   test_run:cmd('switch default')
>   box.space._cluster:len() == 1
>   test_run:cmd('eval replica1 "box.info.replication[1].upstream.status"')
> -test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')[1]:match("is not registered with replica set") ~= nil
> +test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"')[1]:match("Can't subscribe non%-anonymous replica") ~= nil
>   replica_set.delete(test_run, 2)
>   
>   box.space.test:drop()
LGTM

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list