[PATCH] replication: fix bug with read-only replica as a bootstrap leader
Vladimir Davydov
vdavydov.dev at gmail.com
Fri Apr 13 11:54:59 MSK 2018
On Wed, Apr 11, 2018 at 07:02:27PM +0300, Konstantin Belyavskiy wrote:
> ticket: https://github.com/tarantool/tarantool/issues/3257
> branch: https://github.com/tarantool/tarantool/compare/gh-3257-fix-bug-with-ro-replica-as-a-leader
>
> When bootstrapping a new cluster, each replica from replicaset can
> be chosen as a leader, but if it is 'read-only', bootstrap will
> failed with an error.
> Fixed it by eliminating read-only replicas from voting by adding
> access rights information to IPROTO_REQUEST_VOTE reply.
>
> Closes #3257
> ---
> src/box/applier.cc | 3 +-
> src/box/applier.h | 2 +
> src/box/iproto.cc | 11 +++--
> src/box/iproto_constants.c | 2 +-
> src/box/iproto_constants.h | 1 +
> src/box/replication.cc | 8 ++++
> src/box/xrow.c | 29 ++++++++++---
> src/box/xrow.h | 54 ++++++++++++++++++-----
> test/replication/replica_uuid_ro.lua | 33 ++++++++++++++
> test/replication/replica_uuid_ro1.lua | 1 +
> test/replication/replica_uuid_ro2.lua | 1 +
> test/replication/replicaset_ro_mostly.result | 59 ++++++++++++++++++++++++++
> test/replication/replicaset_ro_mostly.test.lua | 30 +++++++++++++
> 13 files changed, 212 insertions(+), 22 deletions(-)
> create mode 100644 test/replication/replica_uuid_ro.lua
> create mode 120000 test/replication/replica_uuid_ro1.lua
> create mode 120000 test/replication/replica_uuid_ro2.lua
> create mode 100644 test/replication/replicaset_ro_mostly.result
> create mode 100644 test/replication/replicaset_ro_mostly.test.lua
>
> diff --git a/src/box/applier.cc b/src/box/applier.cc
> index 9aa951c34..b3a13260d 100644
> --- a/src/box/applier.cc
> +++ b/src/box/applier.cc
> @@ -223,7 +223,8 @@ applier_connect(struct applier *applier)
> if (row.type != IPROTO_OK)
> xrow_decode_error_xc(&row);
> vclock_create(&applier->vclock);
> - xrow_decode_vclock_xc(&row, &applier->vclock);
> + xrow_decode_request_vote_xc(&row, &applier->vclock,
> + &applier->read_only);
> }
>
> applier_set_state(applier, APPLIER_CONNECTED);
> diff --git a/src/box/applier.h b/src/box/applier.h
> index f25d6cb26..f47cf330f 100644
> --- a/src/box/applier.h
> +++ b/src/box/applier.h
> @@ -95,6 +95,8 @@ struct applier {
> uint32_t version_id;
> /** Remote vclock at time of connect. */
> struct vclock vclock;
> + /** Remote access rights, true if read-only, default: false */
The comment is misleading IMO as this variable doesn't have anything
to do with access rights. It just means that the master is running in
read-only mode. Please fix.
> + bool read_only;
I'd call it master_is_ro, because it's not the applier which is read
only - it's the master.
> /** Remote address */
> union {
> struct sockaddr addr;
> diff --git a/src/box/iproto.cc b/src/box/iproto.cc
> index db5820806..51970bb28 100644
> --- a/src/box/iproto.cc
> +++ b/src/box/iproto.cc
> @@ -60,6 +60,8 @@
> #include "iproto_constants.h"
> #include "rmean.h"
> #include "errinj.h"
> +#include "applier.h"
> +#include "cfg.h"
>
> /* The number of iproto messages in flight */
> enum { IPROTO_MSG_MAX = 768 };
> @@ -1352,6 +1354,7 @@ tx_process_misc(struct cmsg *m)
> goto error;
>
> try {
> + bool read_only = false;
> switch (msg->header.type) {
> case IPROTO_AUTH:
> box_process_auth(&msg->auth);
> @@ -1363,9 +1366,11 @@ tx_process_misc(struct cmsg *m)
> ::schema_version);
> break;
> case IPROTO_REQUEST_VOTE:
> - iproto_reply_vclock_xc(out, msg->header.sync,
> - ::schema_version,
> - &replicaset.vclock);
> + read_only = cfg_geti("read_only");
We have box_is_ro() for this, which also takes into account "orphan"
mode.
> + iproto_reply_request_vote_xc(out, msg->header.sync,
> + ::schema_version,
> + &replicaset.vclock,
> + read_only);
Nit: you don't need an extra variable here - let's pass box_is_ro()
directly to iproto_reply_request_vote_xc().
> break;
> default:
> unreachable();
> diff --git a/src/box/iproto_constants.c b/src/box/iproto_constants.c
> index cd7b1d03b..eaba6259e 100644
> --- a/src/box/iproto_constants.c
> +++ b/src/box/iproto_constants.c
> @@ -40,10 +40,10 @@ const unsigned char iproto_key_type[IPROTO_KEY_MAX] =
> /* 0x04 */ MP_DOUBLE, /* IPROTO_TIMESTAMP */
> /* 0x05 */ MP_UINT, /* IPROTO_SCHEMA_VERSION */
> /* 0x06 */ MP_UINT, /* IPROTO_SERVER_VERSION */
> + /* 0x07 */ MP_UINT, /* IPROTO_SERVER_READONLY */
> /* }}} */
>
> /* {{{ unused */
> - /* 0x07 */ MP_UINT,
> /* 0x08 */ MP_UINT,
> /* 0x09 */ MP_UINT,
> /* 0x0a */ MP_UINT,
> diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
> index 951842485..aae004584 100644
> --- a/src/box/iproto_constants.h
> +++ b/src/box/iproto_constants.h
> @@ -58,6 +58,7 @@ enum iproto_key {
> IPROTO_TIMESTAMP = 0x04,
> IPROTO_SCHEMA_VERSION = 0x05,
> IPROTO_SERVER_VERSION = 0x06,
> + IPROTO_SERVER_READONLY = 0x07,
IPROTO_SERVER_IS_RO, may be?
> /* Leave a gap for other keys in the header. */
> IPROTO_SPACE_ID = 0x10,
> IPROTO_INDEX_ID = 0x11,
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index b1c84d36c..423de2c88 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -685,6 +685,14 @@ replicaset_leader(void)
> replicaset_foreach(replica) {
> if (replica->applier == NULL)
> continue;
> + /**
> + * While bootstrapping a new cluster,
> + * read-only replicas shouldn't be considered
> + * as a leader.
> + */
> + if (replica->applier->read_only &&
> + replica->applier->vclock.signature == 0)
> + continue;
What if you're adding a new replica to an existing cluster some nodes of
which are read only? AFAIU you want to choose an rw replica in this case
as well.
> if (leader == NULL) {
> leader = replica;
> continue;
> diff --git a/test/replication/replicaset_ro_mostly.test.lua b/test/replication/replicaset_ro_mostly.test.lua
> new file mode 100644
> index 000000000..539ca5a13
> --- /dev/null
> +++ b/test/replication/replicaset_ro_mostly.test.lua
> @@ -0,0 +1,30 @@
> +-- gh-3257 check bootstrap with read-only replica in cluster.
> +-- Old behaviour: failed, since read-only is chosen by uuid.
> +test_run = require('test_run').new()
> +
> +SERVERS = {'replica_uuid_ro1', 'replica_uuid_ro2'}
> +
> +uuid = require('uuid')
> +uuid1 = uuid.new()
> +uuid2 = uuid.new()
> +function sort_cmp(a, b) return a.time_low > b.time_low and true or false end
> +function sort(t) table.sort(t, sort_cmp) return t end
> +UUID = sort({uuid1, uuid2}, sort_cmp)
I don't think you really need to set UUID explicitly. If you don't, the
test will fail quite often anyway.
> +
> +create_cluster_cmd1 = 'create server %s with script="replication/%s.lua"'
> +create_cluster_cmd2 = 'start server %s with args="%s", wait_load=False, wait=False'
> +
> +test_run:cmd("setopt delimiter ';'")
> +function create_cluster_uuid(servers, uuids)
> + for i, name in ipairs(servers) do
> + test_run:cmd(create_cluster_cmd1:format(name, name))
> + test_run:cmd(create_cluster_cmd2:format(name, uuids[i]))
> + end
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +-- Deploy a cluster.
> +create_cluster_uuid(SERVERS, UUID)
> +test_run:wait_fullmesh(SERVERS)
> +-- Cleanup.
> +test_run:drop_cluster(SERVERS)
More information about the Tarantool-patches
mailing list