From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp60.i.mail.ru (smtp60.i.mail.ru [217.69.128.40]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 51E6145C304 for ; Mon, 30 Nov 2020 12:36:06 +0300 (MSK) From: Serge Petrenko Date: Mon, 30 Nov 2020 12:35:51 +0300 Message-Id: <20201130093551.62113-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v2] box: make instace ro while limbo is not empty List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: v.shpilevoy@tarantool.org, gorcunov@gmail.com Cc: tarantool-patches@dev.tarantool.org Users usually use box.ctl.wait_rw() to determine the moment when the instance becomes writeable. Since the synchronous replication introduction, this function became pointless, because even when an instance is writeable, it may fail at writing something because its limbo is not empty. To fix the problem introduce a new helper, txn_limbo_is_ro() and start using it in box_update_ro_summary(). Call bax_update_ro_summary() every time the limbo gets emptied out or changes an owner. Closes #5440 --- https://github.com/tarantool/tarantool/issues/5440 sp/gh-5440-on-state-update Changes in v2: - instead of updating is_ro on raft side, make sure is_ro is updated every time limbo is filled and emptied out. src/box/box.cc | 3 ++- src/box/raft.c | 9 +++---- src/box/txn_limbo.c | 25 +++++++++++++++++++ src/box/txn_limbo.h | 3 +++ test/replication/election_qsync.result | 10 ++++++-- test/replication/election_qsync.test.lua | 6 +++-- test/replication/election_qsync_stress.result | 4 +++ .../election_qsync_stress.test.lua | 2 ++ 8 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index b315635dc..4070cbeab 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -157,7 +157,8 @@ void box_update_ro_summary(void) { bool old_is_ro_summary = is_ro_summary; - is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()); + is_ro_summary = is_ro || is_orphan || raft_is_ro(box_raft()) || + txn_limbo_is_ro(&txn_limbo); /* In 99% nothing changes. Filter this out first. */ if (is_ro_summary == old_is_ro_summary) return; diff --git a/src/box/raft.c b/src/box/raft.c index 6d98f5645..8f32a9662 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -155,11 +155,10 @@ box_raft_on_update_f(struct trigger *trigger, void *event) struct raft *raft = (struct raft *)event; assert(raft == box_raft()); /* - * XXX: in case the instance became a leader, RO must be updated only - * after clearing the synchro queue. - * - * When the instance became a follower, then on the contrary - make it - * read-only ASAP, this is good. + * When the instance becomes a follower, it's good to make it read-only + * ASAP. This way we make sure followers don't write anything. + * However, if the instance is transitioning to leader it'll become + * writable only after it clears its synchro queue. */ box_update_ro_summary(); if (raft->state != RAFT_STATE_LEADER) diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index 2c35cd785..c406ed4c8 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -33,6 +33,7 @@ #include "replication.h" #include "iproto_constants.h" #include "journal.h" +#include "box.h" struct txn_limbo txn_limbo; @@ -48,10 +49,17 @@ txn_limbo_create(struct txn_limbo *limbo) limbo->is_in_rollback = false; } +bool +txn_limbo_is_ro(struct txn_limbo *limbo) +{ + return limbo->owner_id != instance_id && !txn_limbo_is_empty(limbo); +} + struct txn_limbo_entry * txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) { assert(txn_has_flag(txn, TXN_WAIT_SYNC)); + assert(limbo == &txn_limbo); /* * Transactions should be added to the limbo before WAL write. Limbo * needs that to be able rollback transactions, whose WAL write is in @@ -72,11 +80,14 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) } if (id == 0) id = instance_id; + bool make_ro = false; if (limbo->owner_id != id) { if (limbo->owner_id == REPLICA_ID_NIL || rlist_empty(&limbo->queue)) { limbo->owner_id = id; limbo->confirmed_lsn = 0; + if (id != instance_id) + make_ro = true; } else { diag_set(ClientError, ER_UNCOMMITTED_FOREIGN_SYNC_TXNS, limbo->owner_id); @@ -96,6 +107,12 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) e->is_commit = false; e->is_rollback = false; rlist_add_tail_entry(&limbo->queue, e, in_queue); + /* + * We added new entries from a remote instance to an empty limbo. + * Time to make this instance read-only. + */ + if (make_ro) + box_update_ro_summary(); return e; } @@ -349,6 +366,7 @@ static void txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn) { assert(limbo->owner_id != REPLICA_ID_NIL); + assert(limbo == &txn_limbo); struct txn_limbo_entry *e, *tmp; rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) { /* @@ -388,6 +406,9 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn) if (e->txn->signature >= 0) txn_complete_success(e->txn); } + /* Update is_ro once the limbo is clear. */ + if (txn_limbo_is_empty(limbo)) + box_update_ro_summary(); } /** @@ -410,6 +431,7 @@ static void txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn) { assert(limbo->owner_id != REPLICA_ID_NIL); + assert(limbo == &txn_limbo); struct txn_limbo_entry *e, *tmp; struct txn_limbo_entry *last_rollback = NULL; rlist_foreach_entry_reverse(e, &limbo->queue, in_queue) { @@ -452,6 +474,9 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn) if (e == last_rollback) break; } + /* Update is_ro once the limbo is clear. */ + if (txn_limbo_is_empty(limbo)) + box_update_ro_summary(); } void diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index c7e70ba64..f7d67a826 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -172,6 +172,9 @@ txn_limbo_is_empty(struct txn_limbo *limbo) return rlist_empty(&limbo->queue); } +bool +txn_limbo_is_ro(struct txn_limbo *limbo); + static inline struct txn_limbo_entry * txn_limbo_first_entry(struct txn_limbo *limbo) { diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result index cb349efcc..c06400b38 100644 --- a/test/replication/election_qsync.result +++ b/test/replication/election_qsync.result @@ -75,7 +75,10 @@ box.cfg{ | --- | ... -test_run:wait_cond(function() return box.info.election.state == 'leader' end) +box.ctl.wait_rw() + | --- + | ... +assert(box.info.election.state == 'leader') | --- | - true | ... @@ -130,7 +133,10 @@ box.cfg{ | --- | ... -test_run:wait_cond(function() return box.info.election.state == 'leader' end) +box.ctl.wait_rw() + | --- + | ... +assert(box.info.election.state == 'leader') | --- | - true | ... diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua index eb89e5b79..ea6fc4a61 100644 --- a/test/replication/election_qsync.test.lua +++ b/test/replication/election_qsync.test.lua @@ -39,7 +39,8 @@ box.cfg{ replication_timeout = 0.1, \ } -test_run:wait_cond(function() return box.info.election.state == 'leader' end) +box.ctl.wait_rw() +assert(box.info.election.state == 'leader') lsn = box.info.lsn _ = fiber.create(function() \ ok, err = pcall(box.space.test.replace, box.space.test, {1}) \ @@ -69,7 +70,8 @@ box.cfg{ replication_timeout = 0.01, \ } -test_run:wait_cond(function() return box.info.election.state == 'leader' end) +box.ctl.wait_rw() +assert(box.info.election.state == 'leader') _ = box.space.test:replace{2} box.space.test:select{} box.space.test:drop() diff --git a/test/replication/election_qsync_stress.result b/test/replication/election_qsync_stress.result index 9fab2f1d7..1380c910b 100644 --- a/test/replication/election_qsync_stress.result +++ b/test/replication/election_qsync_stress.result @@ -69,6 +69,9 @@ leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1] c = netbox.connect(leader_port) | --- | ... +c:eval('box.ctl.wait_rw()') + | --- + | ... _ = c:eval('box.schema.space.create("test", {is_sync=true})') | --- @@ -94,6 +97,7 @@ for i = 1,10 do leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1] c = netbox.connect(leader_port) c:eval('box.cfg{replication_synchro_timeout=1000}') + c:eval('box.ctl.wait_rw()') c.space._schema:replace{'smth'} c.space.test:get{i} test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"') diff --git a/test/replication/election_qsync_stress.test.lua b/test/replication/election_qsync_stress.test.lua index 0ba15eef7..d70601cc5 100644 --- a/test/replication/election_qsync_stress.test.lua +++ b/test/replication/election_qsync_stress.test.lua @@ -40,6 +40,7 @@ old_leader_nr = get_leader(nrs) old_leader = 'election_replica'..old_leader_nr leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1] c = netbox.connect(leader_port) +c:eval('box.ctl.wait_rw()') _ = c:eval('box.schema.space.create("test", {is_sync=true})') _ = c:eval('box.space.test:create_index("pk")') @@ -58,6 +59,7 @@ for i = 1,10 do leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1] c = netbox.connect(leader_port) c:eval('box.cfg{replication_synchro_timeout=1000}') + c:eval('box.ctl.wait_rw()') c.space._schema:replace{'smth'} c.space.test:get{i} test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2 0.4"') -- 2.24.3 (Apple Git-128)