[Tarantool-patches] [PATCH v2] box: make instace ro while limbo is not empty
Serge Petrenko
sergepetrenko at tarantool.org
Mon Nov 30 12:41:27 MSK 2020
fixed a typo: instace -> instance.
30.11.2020 12:35, Serge Petrenko пишет:
> Users usually use box.ctl.wait_rw() to determine the moment when the
> instance becomes writeable.
> Since the synchronous replication introduction, this function became
> pointless, because even when an instance is writeable, it may fail at
> writing something because its limbo is not empty.
> To fix the problem introduce a new helper, txn_limbo_is_ro() and start
> using it in box_update_ro_summary().
> Call bax_update_ro_summary() every time the limbo gets emptied out or
> changes an owner.
>
> Closes #5440
> ---
> https://github.com/tarantool/tarantool/issues/5440
> sp/gh-5440-on-state-update
>
> Changes in v2:
> - instead of updating is_ro on raft side, make
> sure is_ro is updated every time limbo is
> filled and emptied out.
>
> src/box/box.cc | 3 ++-
> src/box/raft.c | 9 +++----
> src/box/txn_limbo.c | 25 +++++++++++++++++++
> src/box/txn_limbo.h | 3 +++
> test/replication/election_qsync.result | 10 ++++++--
> test/replication/election_qsync.test.lua | 6 +++--
> test/replication/election_qsync_stress.result | 4 +++
> .../election_qsync_stress.test.lua | 2 ++
> 8 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index b315635dc..4070cbeab 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -157,7 +157,8 @@ void
> box_update_ro_summary(void)
> {
> bool old_is_ro_summary = is_ro_summary;
> - is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft());
> + is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()) ||
> + txn_limbo_is_ro(&txn_limbo);
> /* In 99% nothing changes. Filter this out first. */
> if (is_ro_summary == old_is_ro_summary)
> return;
> diff --git a/src/box/raft.c b/src/box/raft.c
> index 6d98f5645..8f32a9662 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -155,11 +155,10 @@ box_raft_on_update_f(struct trigger *trigger, void *event)
> struct raft *raft = (struct raft *)event;
> assert(raft == box_raft());
> /*
> - * XXX: in case the instance became a leader, RO must be updated only
> - * after clearing the synchro queue.
> - *
> - * When the instance became a follower, then on the contrary - make it
> - * read-only ASAP, this is good.
> + * When the instance becomes a follower, it's good to make it read-only
> + * ASAP. This way we make sure followers don't write anything.
> + * However, if the instance is transitioning to leader it'll become
> + * writable only after it clears its synchro queue.
> */
> box_update_ro_summary();
> if (raft->state != RAFT_STATE_LEADER)
> diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
> index 2c35cd785..c406ed4c8 100644
> --- a/src/box/txn_limbo.c
> +++ b/src/box/txn_limbo.c
> @@ -33,6 +33,7 @@
> #include "replication.h"
> #include "iproto_constants.h"
> #include "journal.h"
> +#include "box.h"
>
> struct txn_limbo txn_limbo;
>
> @@ -48,10 +49,17 @@ txn_limbo_create(struct txn_limbo *limbo)
> limbo->is_in_rollback = false;
> }
>
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo)
> +{
> + return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo);
> +}
> +
> struct txn_limbo_entry *
> txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
> {
> assert(txn_has_flag(txn, TXN_WAIT_SYNC));
> + assert(limbo == &txn_limbo);
> /*
> * Transactions should be added to the limbo before WAL write. Limbo
> * needs that to be able rollback transactions, whose WAL write is in
> @@ -72,11 +80,14 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
> }
> if (id == 0)
> id = instance_id;
> + bool make_ro = false;
> if (limbo->owner_id != id) {
> if (limbo->owner_id == REPLICA_ID_NIL ||
> rlist_empty(&limbo->queue)) {
> limbo->owner_id = id;
> limbo->confirmed_lsn = 0;
> + if (id != instance_id)
> + make_ro = true;
> } else {
> diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS,
> limbo->owner_id);
> @@ -96,6 +107,12 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
> e->is_commit = false;
> e->is_rollback = false;
> rlist_add_tail_entry(&limbo->queue, e, in_queue);
> + /*
> + * We added new entries from a remote instance to an empty limbo.
> + * Time to make this instance read-only.
> + */
> + if (make_ro)
> + box_update_ro_summary();
> return e;
> }
>
> @@ -349,6 +366,7 @@ static void
> txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
> {
> assert(limbo->owner_id != REPLICA_ID_NIL);
> + assert(limbo == &txn_limbo);
> struct txn_limbo_entry *e, *tmp;
> rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) {
> /*
> @@ -388,6 +406,9 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
> if (e->txn->signature >= 0)
> txn_complete_success(e->txn);
> }
> + /* Update is_ro once the limbo is clear. */
> + if (txn_limbo_is_empty(limbo))
> + box_update_ro_summary();
> }
>
> /**
> @@ -410,6 +431,7 @@ static void
> txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
> {
> assert(limbo->owner_id != REPLICA_ID_NIL);
> + assert(limbo == &txn_limbo);
> struct txn_limbo_entry *e, *tmp;
> struct txn_limbo_entry *last_rollback = NULL;
> rlist_foreach_entry_reverse(e, &limbo->queue, in_queue) {
> @@ -452,6 +474,9 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
> if (e == last_rollback)
> break;
> }
> + /* Update is_ro once the limbo is clear. */
> + if (txn_limbo_is_empty(limbo))
> + box_update_ro_summary();
> }
>
> void
> diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
> index c7e70ba64..f7d67a826 100644
> --- a/src/box/txn_limbo.h
> +++ b/src/box/txn_limbo.h
> @@ -172,6 +172,9 @@ txn_limbo_is_empty(struct txn_limbo *limbo)
> return rlist_empty(&limbo->queue);
> }
>
> +bool
> +txn_limbo_is_ro(struct txn_limbo *limbo);
> +
> static inline struct txn_limbo_entry *
> txn_limbo_first_entry(struct txn_limbo *limbo)
> {
> diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
> index cb349efcc..c06400b38 100644
> --- a/test/replication/election_qsync.result
> +++ b/test/replication/election_qsync.result
> @@ -75,7 +75,10 @@ box.cfg{
> | ---
> | ...
>
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
> | ---
> | - true
> | ...
> @@ -130,7 +133,10 @@ box.cfg{
> | ---
> | ...
>
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> + | ---
> + | ...
> +assert(box.info.election.state == 'leader')
> | ---
> | - true
> | ...
> diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
> index eb89e5b79..ea6fc4a61 100644
> --- a/test/replication/election_qsync.test.lua
> +++ b/test/replication/election_qsync.test.lua
> @@ -39,7 +39,8 @@ box.cfg{
> replication_timeout = 0.1, \
> }
>
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
> lsn = box.info.lsn
> _ = fiber.create(function() \
> ok, err = pcall(box.space.test.replace, box.space.test, {1}) \
> @@ -69,7 +70,8 @@ box.cfg{
> replication_timeout = 0.01, \
> }
>
> -test_run:wait_cond(function() return box.info.election.state == 'leader' end)
> +box.ctl.wait_rw()
> +assert(box.info.election.state == 'leader')
> _ = box.space.test:replace{2}
> box.space.test:select{}
> box.space.test:drop()
> diff --git a/test/replication/election_qsync_stress.result b/test/replication/election_qsync_stress.result
> index 9fab2f1d7..1380c910b 100644
> --- a/test/replication/election_qsync_stress.result
> +++ b/test/replication/election_qsync_stress.result
> @@ -69,6 +69,9 @@ leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
> c = netbox.connect(leader_port)
> | ---
> | ...
> +c:eval('box.ctl.wait_rw()')
> + | ---
> + | ...
>
> _ = c:eval('box.schema.space.create("test", {is_sync=true})')
> | ---
> @@ -94,6 +97,7 @@ for i = 1,10 do
> leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
> c = netbox.connect(leader_port)
> c:eval('box.cfg{replication_synchro_timeout=1000}')
> + c:eval('box.ctl.wait_rw()')
> c.space._schema:replace{'smth'}
> c.space.test:get{i}
> test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
> diff --git a/test/replication/election_qsync_stress.test.lua b/test/replication/election_qsync_stress.test.lua
> index 0ba15eef7..d70601cc5 100644
> --- a/test/replication/election_qsync_stress.test.lua
> +++ b/test/replication/election_qsync_stress.test.lua
> @@ -40,6 +40,7 @@ old_leader_nr = get_leader(nrs)
> old_leader = 'election_replica'..old_leader_nr
> leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
> c = netbox.connect(leader_port)
> +c:eval('box.ctl.wait_rw()')
>
> _ = c:eval('box.schema.space.create("test", {is_sync=true})')
> _ = c:eval('box.space.test:create_index("pk")')
> @@ -58,6 +59,7 @@ for i = 1,10 do
> leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
> c = netbox.connect(leader_port)
> c:eval('box.cfg{replication_synchro_timeout=1000}')
> + c:eval('box.ctl.wait_rw()')
> c.space._schema:replace{'smth'}
> c.space.test:get{i}
> test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"')
--
Serge Petrenko
More information about the Tarantool-patches
mailing list