Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Konstantin Belyavskiy <k.belyavskiy@tarantool.org>
Cc: georgy@tarantool.org, tarantool-patches@freelists.org
Subject: Re: [PATCH] replication: fix bug with read-only replica as a bootstrap leader
Date: Fri, 13 Apr 2018 11:54:59 +0300	[thread overview]
Message-ID: <20180413085459.x2zuxqufp73r7cmi@esperanza> (raw)
In-Reply-To: <20180411160227.51871-1-k.belyavskiy@tarantool.org>

On Wed, Apr 11, 2018 at 07:02:27PM +0300, Konstantin Belyavskiy wrote:
> ticket: https://github.com/tarantool/tarantool/issues/3257
> branch: https://github.com/tarantool/tarantool/compare/gh-3257-fix-bug-with-ro-replica-as-a-leader
> 
> When bootstrapping a new cluster, each replica from replicaset can
> be chosen as a leader, but if it is 'read-only', bootstrap will
> failed with an error.
> Fixed it by eliminating read-only replicas from voting by adding
> access rights information to IPROTO_REQUEST_VOTE reply.
> 
> Closes #3257
> ---
>  src/box/applier.cc                             |  3 +-
>  src/box/applier.h                              |  2 +
>  src/box/iproto.cc                              | 11 +++--
>  src/box/iproto_constants.c                     |  2 +-
>  src/box/iproto_constants.h                     |  1 +
>  src/box/replication.cc                         |  8 ++++
>  src/box/xrow.c                                 | 29 ++++++++++---
>  src/box/xrow.h                                 | 54 ++++++++++++++++++-----
>  test/replication/replica_uuid_ro.lua           | 33 ++++++++++++++
>  test/replication/replica_uuid_ro1.lua          |  1 +
>  test/replication/replica_uuid_ro2.lua          |  1 +
>  test/replication/replicaset_ro_mostly.result   | 59 ++++++++++++++++++++++++++
>  test/replication/replicaset_ro_mostly.test.lua | 30 +++++++++++++
>  13 files changed, 212 insertions(+), 22 deletions(-)
>  create mode 100644 test/replication/replica_uuid_ro.lua
>  create mode 120000 test/replication/replica_uuid_ro1.lua
>  create mode 120000 test/replication/replica_uuid_ro2.lua
>  create mode 100644 test/replication/replicaset_ro_mostly.result
>  create mode 100644 test/replication/replicaset_ro_mostly.test.lua
> 
> diff --git a/src/box/applier.cc b/src/box/applier.cc
> index 9aa951c34..b3a13260d 100644
> --- a/src/box/applier.cc
> +++ b/src/box/applier.cc
> @@ -223,7 +223,8 @@ applier_connect(struct applier *applier)
>  		if (row.type != IPROTO_OK)
>  			xrow_decode_error_xc(&row);
>  		vclock_create(&applier->vclock);
> -		xrow_decode_vclock_xc(&row, &applier->vclock);
> +		xrow_decode_request_vote_xc(&row, &applier->vclock,
> +					    &applier->read_only);
>  	}
>  
>  	applier_set_state(applier, APPLIER_CONNECTED);
> diff --git a/src/box/applier.h b/src/box/applier.h
> index f25d6cb26..f47cf330f 100644
> --- a/src/box/applier.h
> +++ b/src/box/applier.h
> @@ -95,6 +95,8 @@ struct applier {
>  	uint32_t version_id;
>  	/** Remote vclock at time of connect. */
>  	struct vclock vclock;
> +	/** Remote access rights, true if read-only, default: false */

The comment is misleading IMO as this variable doesn't have anything
to do with access rights. It just means that the master is running in
read-only mode. Please fix.

> +	bool read_only;

I'd call it master_is_ro, because it's not the applier which is read
only - it's the master.

>  	/** Remote address */
>  	union {
>  		struct sockaddr addr;
> diff --git a/src/box/iproto.cc b/src/box/iproto.cc
> index db5820806..51970bb28 100644
> --- a/src/box/iproto.cc
> +++ b/src/box/iproto.cc
> @@ -60,6 +60,8 @@
>  #include "iproto_constants.h"
>  #include "rmean.h"
>  #include "errinj.h"
> +#include "applier.h"
> +#include "cfg.h"
>  
>  /* The number of iproto messages in flight */
>  enum { IPROTO_MSG_MAX = 768 };
> @@ -1352,6 +1354,7 @@ tx_process_misc(struct cmsg *m)
>  		goto error;
>  
>  	try {
> +		bool read_only = false;
>  		switch (msg->header.type) {
>  		case IPROTO_AUTH:
>  			box_process_auth(&msg->auth);
> @@ -1363,9 +1366,11 @@ tx_process_misc(struct cmsg *m)
>  					   ::schema_version);
>  			break;
>  		case IPROTO_REQUEST_VOTE:
> -			iproto_reply_vclock_xc(out, msg->header.sync,
> -					       ::schema_version,
> -					       &replicaset.vclock);

> +			read_only = cfg_geti("read_only");

We have box_is_ro() for this, which also takes into account "orphan"
mode.

> +			iproto_reply_request_vote_xc(out, msg->header.sync,
> +						     ::schema_version,
> +						     &replicaset.vclock,
> +						     read_only);

Nit: you don't need an extra variable here - let's pass box_is_ro()
directly to iproto_reply_request_vote_xc().

>  			break;
>  		default:
>  			unreachable();
> diff --git a/src/box/iproto_constants.c b/src/box/iproto_constants.c
> index cd7b1d03b..eaba6259e 100644
> --- a/src/box/iproto_constants.c
> +++ b/src/box/iproto_constants.c
> @@ -40,10 +40,10 @@ const unsigned char iproto_key_type[IPROTO_KEY_MAX] =
>  		/* 0x04 */	MP_DOUBLE, /* IPROTO_TIMESTAMP */
>  		/* 0x05 */	MP_UINT,   /* IPROTO_SCHEMA_VERSION */
>  		/* 0x06 */	MP_UINT,   /* IPROTO_SERVER_VERSION */
> +		/* 0x07 */	MP_UINT,   /* IPROTO_SERVER_READONLY */
>  	/* }}} */
>  
>  	/* {{{ unused */
> -		/* 0x07 */	MP_UINT,
>  		/* 0x08 */	MP_UINT,
>  		/* 0x09 */	MP_UINT,
>  		/* 0x0a */	MP_UINT,
> diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
> index 951842485..aae004584 100644
> --- a/src/box/iproto_constants.h
> +++ b/src/box/iproto_constants.h
> @@ -58,6 +58,7 @@ enum iproto_key {
>  	IPROTO_TIMESTAMP = 0x04,
>  	IPROTO_SCHEMA_VERSION = 0x05,
>  	IPROTO_SERVER_VERSION = 0x06,
> +	IPROTO_SERVER_READONLY = 0x07,

IPROTO_SERVER_IS_RO, may be?

>  	/* Leave a gap for other keys in the header. */
>  	IPROTO_SPACE_ID = 0x10,
>  	IPROTO_INDEX_ID = 0x11,
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index b1c84d36c..423de2c88 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -685,6 +685,14 @@ replicaset_leader(void)
>  	replicaset_foreach(replica) {
>  		if (replica->applier == NULL)
>  			continue;
> +		/**
> +		 * While bootstrapping a new cluster,
> +		 * read-only replicas shouldn't be considered
> +		 * as a leader.
> +		 */
> +		if (replica->applier->read_only &&
> +		    replica->applier->vclock.signature == 0)
> +			continue;

What if you're adding a new replica to an existing cluster some nodes of
which are read only? AFAIU you want to choose an rw replica in this case
as well.

>  		if (leader == NULL) {
>  			leader = replica;
>  			continue;

> diff --git a/test/replication/replicaset_ro_mostly.test.lua b/test/replication/replicaset_ro_mostly.test.lua
> new file mode 100644
> index 000000000..539ca5a13
> --- /dev/null
> +++ b/test/replication/replicaset_ro_mostly.test.lua
> @@ -0,0 +1,30 @@
> +-- gh-3257 check bootstrap with read-only replica in cluster.
> +-- Old behaviour: failed, since read-only is chosen by uuid.
> +test_run = require('test_run').new()
> +
> +SERVERS = {'replica_uuid_ro1', 'replica_uuid_ro2'}
> +
> +uuid = require('uuid')
> +uuid1 = uuid.new()
> +uuid2 = uuid.new()
> +function sort_cmp(a, b) return a.time_low > b.time_low and true or false end
> +function sort(t) table.sort(t, sort_cmp) return t end
> +UUID = sort({uuid1, uuid2}, sort_cmp)

I don't think you really need to set UUID explicitly. If you don't, the
test will fail quite often anyway.

> +
> +create_cluster_cmd1 = 'create server %s with script="replication/%s.lua"'
> +create_cluster_cmd2 = 'start server %s with args="%s", wait_load=False, wait=False'
> +
> +test_run:cmd("setopt delimiter ';'")
> +function create_cluster_uuid(servers, uuids)
> +    for i, name in ipairs(servers) do
> +        test_run:cmd(create_cluster_cmd1:format(name, name))
> +        test_run:cmd(create_cluster_cmd2:format(name, uuids[i]))
> +    end
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +-- Deploy a cluster.
> +create_cluster_uuid(SERVERS, UUID)
> +test_run:wait_fullmesh(SERVERS)
> +-- Cleanup.
> +test_run:drop_cluster(SERVERS)

  parent reply	other threads:[~2018-04-13  8:54 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-11 16:02 Konstantin Belyavskiy
2018-04-11 16:11 ` [tarantool-patches] " Georgy Kirichenko
2018-04-13  8:54 ` Vladimir Davydov [this message]
2018-04-13 11:13   ` [tarantool-patches] " Konstantin Belyavskiy
2018-04-13 11:15     ` Re[2]: " Konstantin Belyavskiy
2018-04-13 12:38     ` Vladimir Davydov
2018-04-18 10:34       ` Vladimir Davydov
2018-05-22 15:40 Konstantin Belyavskiy
2018-05-22 16:45 ` Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180413085459.x2zuxqufp73r7cmi@esperanza \
    --to=vdavydov.dev@gmail.com \
    --cc=georgy@tarantool.org \
    --cc=k.belyavskiy@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH] replication: fix bug with read-only replica as a bootstrap leader' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox