[Tarantool-patches] [PATCH v2] box: make instace ro while limbo is not empty

Alexander V. Tikhonov avtikhon at tarantool.org
Tue Dec 1 14:06:15 MSK 2020


Hi Sergey, , thanks for the patch, as I see no new degradation found in
gitlab-ci testing commit criteria pipeline [1], patch LGTM.

[1] - https://gitlab.com/tarantool/tarantool/-/pipelines/223561823

On Mon, Nov 30, 2020 at 12:35:51PM +0300, Serge Petrenko via Tarantool-patches wrote:
> Users usually use box.ctl.wait_rw() to determine the moment when the
> instance becomes writeable.
> Since the synchronous replication introduction, this function became
> pointless, because even when an instance is writeable, it may fail at
> writing something because its limbo is not empty.
> To fix the problem introduce a new helper, txn_limbo_is_ro() and start
> using it in box_update_ro_summary().
> Call bax_update_ro_summary() every time the limbo gets emptied out or
> changes an owner.
> 
> Closes #5440
> ---
> https://github.com/tarantool/tarantool/issues/5440
> sp/gh-5440-on-state-update
> 
> Changes in v2:
>   - instead of updating is_ro on raft side, make
>     sure is_ro is updated every time limbo is
>     filled and emptied out.
> 
>  src/box/box.cc                                |  3 ++-
>  src/box/raft.c                                |  9 +++----
>  src/box/txn_limbo.c                           | 25 +++++++++++++++++++
>  src/box/txn_limbo.h                           |  3 +++
>  test/replication/election_qsync.result        | 10 ++++++--
>  test/replication/election_qsync.test.lua      |  6 +++--
>  test/replication/election_qsync_stress.result |  4 +++
>  .../election_qsync_stress.test.lua            |  2 ++
>  8 files changed, 52 insertions(+), 10 deletions(-)
> 
> diff --git a/src/box/box.cc b/src/box/box.cc
> index b315635dc..4070cbeab 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -157,7 +157,8 @@ void
>  box_update_ro_summary(void)
>  {
>  	bool old_is_ro_summary = is_ro_summary;
> -	is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft());
> +	is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()) ||
> +			txn_limbo_is_ro(&txn_limbo);
>  	/* In 99% nothing changes. Filter this out first. */
>  	if (is_ro_summary == old_is_ro_summary)
>  		return;
> diff --git a/src/box/raft.c b/src/box/raft.c
> index 6d98f5645..8f32a9662 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -155,11 +155,10 @@ box_raft_on_update_f(struct trigger *trigger, void *event)
>  	struct raft *raft = (struct raft *)event;
>  	assert(raft == box_raft());
>  	/*
> -	 * XXX: in case the instance became a leader, RO must be updated only
> -	 * after clearing the synchro queue.
> -	 *
> -	 * When the instance became a follower, then on the contrary - make it
> -	 * read-only ASAP, this is good.
> +	 * When the instance becomes a follower, it's good to make it read-only
> +	 * ASAP. This way we make sure followers don't write anything.
> +	 * However, if the instance is transitioning to leader it'll become
> +	 * writable only after it clears its synchro queue.
>  	 */
>  	box_update_ro_summary();
>  	if (raft->state != RAFT_STATE_LEADER)
> diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
> index 2c35cd785..c406ed4c8 100644
> --- a/src/box/txn_limbo.c
> +++ b/src/box/txn_limbo.c
> @@ -33,6 +33,7 @@
>  #include "replication.h"
>  #include "iproto_constants.h"
>  #include "journal.h"
> +#include "box.h"
>  
>  struct txn_limbo txn_limbo;
>  
> @@ -48,10 +49,17 @@ txn_limbo_create(struct txn_limbo *limbo)
>  	limbo->is_in_rollback = false;
>  }
>  
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo)
> +{
> +	return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo);
> +}
> +
>  struct txn_limbo_entry *
>  txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>  {
>  	assert(txn_has_flag(txn, TXN_WAIT_SYNC));
> +	assert(limbo == &txn_limbo);
>  	/*
>  	 * Transactions should be added to the limbo before WAL write. Limbo
>  	 * needs that to be able rollback transactions, whose WAL write is in
> @@ -72,11 +80,14 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>  	}
>  	if (id == 0)
>  		id = instance_id;
> +	bool make_ro = false;
>  	if (limbo->owner_id != id) {
>  		if (limbo->owner_id == REPLICA_ID_NIL ||
>  		    rlist_empty(&limbo->queue)) {
>  			limbo->owner_id = id;
>  			limbo->confirmed_lsn = 0;
> +			if (id != instance_id)
> +				make_ro = true;
>  		} else {
>  			diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS,
>  				 limbo->owner_id);
> @@ -96,6 +107,12 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>  	e->is_commit = false;
>  	e->is_rollback = false;
>  	rlist_add_tail_entry(&limbo->queue, e, in_queue);
> +	/*
> +	 * We added new entries from a remote instance to an empty limbo.
> +	 * Time to make this instance read-only.
> +	 */
> +	if (make_ro)
> +		box_update_ro_summary();
>  	return e;
>  }
>  
> @@ -349,6 +366,7 @@ static void
>  txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
>  {
>  	assert(limbo->owner_id != REPLICA_ID_NIL);
> +	assert(limbo == &txn_limbo);
>  	struct txn_limbo_entry *e, *tmp;
>  	rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) {
>  		/*
> @@ -388,6 +406,9 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
>  		if (e->txn->signature >= 0)
>  			txn_complete_success(e->txn);
>  	}
> +	/* Update is_ro once the limbo is clear. */
> +	if (txn_limbo_is_empty(limbo))
> +		box_update_ro_summary();
>  }
>  
>  /**
> @@ -410,6 +431,7 @@ static void
>  txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
>  {
>  	assert(limbo->owner_id != REPLICA_ID_NIL);
> +	assert(limbo == &txn_limbo);
>  	struct txn_limbo_entry *e, *tmp;
>  	struct txn_limbo_entry *last_rollback = NULL;
>  	rlist_foreach_entry_reverse(e, &limbo->queue, in_queue) {
> @@ -452,6 +474,9 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
>  		if (e == last_rollback)
>  			break;
>  	}
> +	/* Update is_ro once the limbo is clear. */
> +	if (txn_limbo_is_empty(limbo))
> +		box_update_ro_summary();
>  }
>  
>  void
> diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
> index c7e70ba64..f7d67a826 100644
> --- a/src/box/txn_limbo.h
> +++ b/src/box/txn_limbo.h
> @@ -172,6 +172,9 @@ txn_limbo_is_empty(struct txn_limbo *limbo)
>  	return rlist_empty(&limbo->queue);
>  }
>  
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo);
> +
>  static inline struct txn_limbo_entry *
>  txn_limbo_first_entry(struct txn_limbo *limbo)
>  {
> diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
> index cb349efcc..c06400b38 100644
> --- a/test/replication/election_qsync.result
> +++ b/test/replication/election_qsync.result
> @@ -75,7 +75,10 @@ box.cfg{
>   | ---
>   | ...
>  
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
>   | ---
>   | - true
>   | ...
> @@ -130,7 +133,10 @@ box.cfg{
>   | ---
>   | ...
>  
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
>   | ---
>   | - true
>   | ...
> diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
> index eb89e5b79..ea6fc4a61 100644
> --- a/test/replication/election_qsync.test.lua
> +++ b/test/replication/election_qsync.test.lua
> @@ -39,7 +39,8 @@ box.cfg{
>      replication_timeout = 0.1,                                                  \
>  }
>  
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
>  lsn = box.info.lsn
>  _ = fiber.create(function()                                                     \
>      ok, err = pcall(box.space.test.replace, box.space.test, {1})                \
> @@ -69,7 +70,8 @@ box.cfg{
>      replication_timeout = 0.01,                                                 \
>  }
>  
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
>  _ = box.space.test:replace{2}
>  box.space.test:select{}
>  box.space.test:drop()
> diff --git a/test/replication/election_qsync_stress.result b/test/replication/election_qsync_stress.result
> index 9fab2f1d7..1380c910b 100644
> --- a/test/replication/election_qsync_stress.result
> +++ b/test/replication/election_qsync_stress.result
> @@ -69,6 +69,9 @@ leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
>  c = netbox.connect(leader_port)
>   | ---
>   | ...
> +c:eval('box.ctl.wait_rw()')
> + | ---
> + | ...
>  
>  _ = c:eval('box.schema.space.create("test", {is_sync=true})')
>   | ---
> @@ -94,6 +97,7 @@ for i = 1,10 do
>      leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
>      c = netbox.connect(leader_port)
>      c:eval('box.cfg{replication_synchro_timeout=1000}')
> +    c:eval('box.ctl.wait_rw()')
>      c.space._schema:replace{'smth'}
>      c.space.test:get{i}
>      test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
> diff --git a/test/replication/election_qsync_stress.test.lua b/test/replication/election_qsync_stress.test.lua
> index 0ba15eef7..d70601cc5 100644
> --- a/test/replication/election_qsync_stress.test.lua
> +++ b/test/replication/election_qsync_stress.test.lua
> @@ -40,6 +40,7 @@ old_leader_nr = get_leader(nrs)
>  old_leader = 'election_replica'..old_leader_nr
>  leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
>  c = netbox.connect(leader_port)
> +c:eval('box.ctl.wait_rw()')
>  
>  _ = c:eval('box.schema.space.create("test", {is_sync=true})')
>  _ = c:eval('box.space.test:create_index("pk")')
> @@ -58,6 +59,7 @@ for i = 1,10 do
>      leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
>      c = netbox.connect(leader_port)
>      c:eval('box.cfg{replication_synchro_timeout=1000}')
> +    c:eval('box.ctl.wait_rw()')
>      c.space._schema:replace{'smth'}
>      c.space.test:get{i}
>      test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
> -- 
> 2.24.3 (Apple Git-128)
> 


More information about the Tarantool-patches mailing list