[Tarantool-patches] [PATCH v2] box: make instace ro while limbo is not empty

Serge Petrenko sergepetrenko at tarantool.org
Mon Nov 30 12:41:27 MSK 2020


fixed a typo: instace -> instance.

30.11.2020 12:35, Serge Petrenko пишет:
> Users usually use box.ctl.wait_rw() to determine the moment when the
> instance becomes writeable.
> Since the synchronous replication introduction, this function became
> pointless, because even when an instance is writeable, it may fail at
> writing something because its limbo is not empty.
> To fix the problem introduce a new helper, txn_limbo_is_ro() and start
> using it in box_update_ro_summary().
> Call bax_update_ro_summary() every time the limbo gets emptied out or
> changes an owner.
>
> Closes #5440
> ---
> https://github.com/tarantool/tarantool/issues/5440
> sp/gh-5440-on-state-update
>
> Changes in v2:
>    - instead of updating is_ro on raft side, make
>      sure is_ro is updated every time limbo is
>      filled and emptied out.
>
>   src/box/box.cc                                |  3 ++-
>   src/box/raft.c                                |  9 +++----
>   src/box/txn_limbo.c                           | 25 +++++++++++++++++++
>   src/box/txn_limbo.h                           |  3 +++
>   test/replication/election_qsync.result        | 10 ++++++--
>   test/replication/election_qsync.test.lua      |  6 +++--
>   test/replication/election_qsync_stress.result |  4 +++
>   .../election_qsync_stress.test.lua            |  2 ++
>   8 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index b315635dc..4070cbeab 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -157,7 +157,8 @@ void
>   box_update_ro_summary(void)
>   {
>   	bool old_is_ro_summary = is_ro_summary;
> -	is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft());
> +	is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()) ||
> +			txn_limbo_is_ro(&txn_limbo);
>   	/* In 99% nothing changes. Filter this out first. */
>   	if (is_ro_summary == old_is_ro_summary)
>   		return;
> diff --git a/src/box/raft.c b/src/box/raft.c
> index 6d98f5645..8f32a9662 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -155,11 +155,10 @@ box_raft_on_update_f(struct trigger *trigger, void *event)
>   	struct raft *raft = (struct raft *)event;
>   	assert(raft == box_raft());
>   	/*
> -	 * XXX: in case the instance became a leader, RO must be updated only
> -	 * after clearing the synchro queue.
> -	 *
> -	 * When the instance became a follower, then on the contrary - make it
> -	 * read-only ASAP, this is good.
> +	 * When the instance becomes a follower, it's good to make it read-only
> +	 * ASAP. This way we make sure followers don't write anything.
> +	 * However, if the instance is transitioning to leader it'll become
> +	 * writable only after it clears its synchro queue.
>   	 */
>   	box_update_ro_summary();
>   	if (raft->state != RAFT_STATE_LEADER)
> diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
> index 2c35cd785..c406ed4c8 100644
> --- a/src/box/txn_limbo.c
> +++ b/src/box/txn_limbo.c
> @@ -33,6 +33,7 @@
>   #include "replication.h"
>   #include "iproto_constants.h"
>   #include "journal.h"
> +#include "box.h"
>   
>   struct txn_limbo txn_limbo;
>   
> @@ -48,10 +49,17 @@ txn_limbo_create(struct txn_limbo *limbo)
>   	limbo->is_in_rollback = false;
>   }
>   
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo)
> +{
> +	return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo);
> +}
> +
>   struct txn_limbo_entry *
>   txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>   {
>   	assert(txn_has_flag(txn, TXN_WAIT_SYNC));
> +	assert(limbo == &txn_limbo);
>   	/*
>   	 * Transactions should be added to the limbo before WAL write. Limbo
>   	 * needs that to be able rollback transactions, whose WAL write is in
> @@ -72,11 +80,14 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>   	}
>   	if (id == 0)
>   		id = instance_id;
> +	bool make_ro = false;
>   	if (limbo->owner_id != id) {
>   		if (limbo->owner_id == REPLICA_ID_NIL ||
>   		    rlist_empty(&limbo->queue)) {
>   			limbo->owner_id = id;
>   			limbo->confirmed_lsn = 0;
> +			if (id != instance_id)
> +				make_ro = true;
>   		} else {
>   			diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS,
>   				 limbo->owner_id);
> @@ -96,6 +107,12 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
>   	e->is_commit = false;
>   	e->is_rollback = false;
>   	rlist_add_tail_entry(&limbo->queue, e, in_queue);
> +	/*
> +	 * We added new entries from a remote instance to an empty limbo.
> +	 * Time to make this instance read-only.
> +	 */
> +	if (make_ro)
> +		box_update_ro_summary();
>   	return e;
>   }
>   
> @@ -349,6 +366,7 @@ static void
>   txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
>   {
>   	assert(limbo->owner_id != REPLICA_ID_NIL);
> +	assert(limbo == &txn_limbo);
>   	struct txn_limbo_entry *e, *tmp;
>   	rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) {
>   		/*
> @@ -388,6 +406,9 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
>   		if (e->txn->signature >= 0)
>   			txn_complete_success(e->txn);
>   	}
> +	/* Update is_ro once the limbo is clear. */
> +	if (txn_limbo_is_empty(limbo))
> +		box_update_ro_summary();
>   }
>   
>   /**
> @@ -410,6 +431,7 @@ static void
>   txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
>   {
>   	assert(limbo->owner_id != REPLICA_ID_NIL);
> +	assert(limbo == &txn_limbo);
>   	struct txn_limbo_entry *e, *tmp;
>   	struct txn_limbo_entry *last_rollback = NULL;
>   	rlist_foreach_entry_reverse(e, &limbo->queue, in_queue) {
> @@ -452,6 +474,9 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
>   		if (e == last_rollback)
>   			break;
>   	}
> +	/* Update is_ro once the limbo is clear. */
> +	if (txn_limbo_is_empty(limbo))
> +		box_update_ro_summary();
>   }
>   
>   void
> diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
> index c7e70ba64..f7d67a826 100644
> --- a/src/box/txn_limbo.h
> +++ b/src/box/txn_limbo.h
> @@ -172,6 +172,9 @@ txn_limbo_is_empty(struct txn_limbo *limbo)
>   	return rlist_empty(&limbo->queue);
>   }
>   
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo);
> +
>   static inline struct txn_limbo_entry *
>   txn_limbo_first_entry(struct txn_limbo *limbo)
>   {
> diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
> index cb349efcc..c06400b38 100644
> --- a/test/replication/election_qsync.result
> +++ b/test/replication/election_qsync.result
> @@ -75,7 +75,10 @@ box.cfg{
>    | ---
>    | ...
>   
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
>    | ---
>    | - true
>    | ...
> @@ -130,7 +133,10 @@ box.cfg{
>    | ---
>    | ...
>   
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
>    | ---
>    | - true
>    | ...
> diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
> index eb89e5b79..ea6fc4a61 100644
> --- a/test/replication/election_qsync.test.lua
> +++ b/test/replication/election_qsync.test.lua
> @@ -39,7 +39,8 @@ box.cfg{
>       replication_timeout = 0.1,                                                  \
>   }
>   
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
>   lsn = box.info.lsn
>   _ = fiber.create(function()                                                     \
>       ok, err = pcall(box.space.test.replace, box.space.test, {1})                \
> @@ -69,7 +70,8 @@ box.cfg{
>       replication_timeout = 0.01,                                                 \
>   }
>   
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
>   _ = box.space.test:replace{2}
>   box.space.test:select{}
>   box.space.test:drop()
> diff --git a/test/replication/election_qsync_stress.result b/test/replication/election_qsync_stress.result
> index 9fab2f1d7..1380c910b 100644
> --- a/test/replication/election_qsync_stress.result
> +++ b/test/replication/election_qsync_stress.result
> @@ -69,6 +69,9 @@ leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
>   c = netbox.connect(leader_port)
>    | ---
>    | ...
> +c:eval('box.ctl.wait_rw()')
> + | ---
> + | ...
>   
>   _ = c:eval('box.schema.space.create("test", {is_sync=true})')
>    | ---
> @@ -94,6 +97,7 @@ for i = 1,10 do
>       leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
>       c = netbox.connect(leader_port)
>       c:eval('box.cfg{replication_synchro_timeout=1000}')
> +    c:eval('box.ctl.wait_rw()')
>       c.space._schema:replace{'smth'}
>       c.space.test:get{i}
>       test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
> diff --git a/test/replication/election_qsync_stress.test.lua b/test/replication/election_qsync_stress.test.lua
> index 0ba15eef7..d70601cc5 100644
> --- a/test/replication/election_qsync_stress.test.lua
> +++ b/test/replication/election_qsync_stress.test.lua
> @@ -40,6 +40,7 @@ old_leader_nr = get_leader(nrs)
>   old_leader = 'election_replica'..old_leader_nr
>   leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
>   c = netbox.connect(leader_port)
> +c:eval('box.ctl.wait_rw()')
>   
>   _ = c:eval('box.schema.space.create("test", {is_sync=true})')
>   _ = c:eval('box.space.test:create_index("pk")')
> @@ -58,6 +59,7 @@ for i = 1,10 do
>       leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
>       c = netbox.connect(leader_port)
>       c:eval('box.cfg{replication_synchro_timeout=1000}')
> +    c:eval('box.ctl.wait_rw()')
>       c.space._schema:replace{'smth'}
>       c.space.test:get{i}
>       test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list