[Tarantool-patches] [PATCH v8 3/6] limbo: gather promote tracking into a separate structure

Cyrill Gorcunov gorcunov at gmail.com
Mon Jul 26 18:34:49 MSK 2021


It is needed to introduce ordered promote related data
modifications in next patch.

Part-of #6036

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/box/box.cc      | 12 +++++++----
 src/box/txn_limbo.c | 24 ++++++++++++++--------
 src/box/txn_limbo.h | 49 ++++++++++++++++++++++++++++-----------------
 3 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index fb58f981d..b356508f0 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1565,7 +1565,8 @@ box_run_elections(void)
 static int
 box_check_promote_term_intact(uint64_t promote_term)
 {
-	if (txn_limbo.promote_greatest_term != promote_term) {
+	const struct txn_limbo_terms *tr = &txn_limbo.terms;
+	if (tr->terms_max != promote_term) {
 		diag_set(ClientError, ER_INTERFERING_PROMOTE,
 			 txn_limbo.owner_id);
 		return -1;
@@ -1577,7 +1578,8 @@ box_check_promote_term_intact(uint64_t promote_term)
 static int
 box_trigger_elections(void)
 {
-	uint64_t promote_term = txn_limbo.promote_greatest_term;
+	const struct txn_limbo_terms *tr = &txn_limbo.terms;
+	uint64_t promote_term = tr->terms_max;
 	raft_new_term(box_raft());
 	if (box_raft_wait_term_persisted() < 0)
 		return -1;
@@ -1588,7 +1590,8 @@ box_trigger_elections(void)
 static int
 box_try_wait_confirm(double timeout)
 {
-	uint64_t promote_term = txn_limbo.promote_greatest_term;
+	const struct txn_limbo_terms *tr = &txn_limbo.terms;
+	uint64_t promote_term = tr->terms_max;
 	txn_limbo_wait_empty(&txn_limbo, timeout);
 	return box_check_promote_term_intact(promote_term);
 }
@@ -1604,7 +1607,8 @@ box_wait_limbo_acked(void)
 	if (txn_limbo_is_empty(&txn_limbo))
 		return txn_limbo.confirmed_lsn;
 
-	uint64_t promote_term = txn_limbo.promote_greatest_term;
+	const struct txn_limbo_terms *tr = &txn_limbo.terms;
+	uint64_t promote_term = tr->terms_max;
 	int quorum = replication_synchro_quorum;
 	struct txn_limbo_entry *last_entry;
 	last_entry = txn_limbo_last_synchro_entry(&txn_limbo);
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 570f77c46..53c86f34e 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -37,6 +37,13 @@
 
 struct txn_limbo txn_limbo;
 
+static void
+txn_limbo_terms_create(struct txn_limbo_terms *tr)
+{
+	vclock_create(&tr->terms_map);
+	tr->terms_max = 0;
+}
+
 static inline void
 txn_limbo_create(struct txn_limbo *limbo)
 {
@@ -45,8 +52,7 @@ txn_limbo_create(struct txn_limbo *limbo)
 	limbo->owner_id = REPLICA_ID_NIL;
 	fiber_cond_create(&limbo->wait_cond);
 	vclock_create(&limbo->vclock);
-	vclock_create(&limbo->promote_term_map);
-	limbo->promote_greatest_term = 0;
+	txn_limbo_terms_create(&limbo->terms);
 	limbo->confirmed_lsn = 0;
 	limbo->rollback_count = 0;
 	limbo->is_in_rollback = false;
@@ -305,10 +311,11 @@ void
 txn_limbo_checkpoint(const struct txn_limbo *limbo,
 		     struct synchro_request *req)
 {
+	const struct txn_limbo_terms *tr = &limbo->terms;
 	req->type = IPROTO_PROMOTE;
 	req->replica_id = limbo->owner_id;
 	req->lsn = limbo->confirmed_lsn;
-	req->term = limbo->promote_greatest_term;
+	req->term = tr->terms_max;
 }
 
 static void
@@ -726,20 +733,21 @@ txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout)
 void
 txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req)
 {
+	struct txn_limbo_terms *tr = &limbo->terms;
 	uint64_t term = req->term;
 	uint32_t origin = req->origin_id;
 	if (txn_limbo_replica_term(limbo, origin) < term) {
-		vclock_follow(&limbo->promote_term_map, origin, term);
-		if (term > limbo->promote_greatest_term)
-			limbo->promote_greatest_term = term;
+		vclock_follow(&tr->terms_map, origin, term);
+		if (term > tr->terms_max)
+			tr->terms_max = term;
 	} else if (iproto_type_is_promote_request(req->type) &&
-		   limbo->promote_greatest_term > 1) {
+		   tr->terms_max > 1) {
 		/* PROMOTE for outdated term. Ignore. */
 		say_info("RAFT: ignoring %s request from instance "
 			 "id %u for term %llu. Greatest term seen "
 			 "before (%llu) is bigger.",
 			 iproto_type_name(req->type), origin, (long long)term,
-			 (long long)limbo->promote_greatest_term);
+			 (long long)tr->terms_max);
 		return;
 	}
 
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 53e52f676..dc980bf7c 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -75,6 +75,31 @@ txn_limbo_entry_is_complete(const struct txn_limbo_entry *e)
 	return e->is_commit || e->is_rollback;
 }
 
+/**
+ * Keep state of promote requests to handle split-brain
+ * situation and other errors.
+ */
+struct txn_limbo_terms {
+	/**
+	 * Latest terms received with PROMOTE entries from remote instances.
+	 * Limbo uses them to filter out the transactions coming not from the
+	 * limbo owner, but so outdated that they are rolled back everywhere
+	 * except outdated nodes.
+	 */
+	struct vclock terms_map;
+	/**
+	 * The biggest PROMOTE term seen by the instance and persisted in WAL.
+	 * It is related to raft term, but not the same. Synchronous replication
+	 * represented by the limbo is interested only in the won elections
+	 * ended with PROMOTE request.
+	 * It means the limbo's term might be smaller than the raft term, while
+	 * there are ongoing elections, or the leader is already known and this
+	 * instance hasn't read its PROMOTE request yet. During other times the
+	 * limbo and raft are in sync and the terms are the same.
+	 */
+	uint64_t terms_max;
+};
+
 /**
  * Limbo is a place where transactions are stored, which are
  * finished, but not committed nor rolled back. These are
@@ -130,23 +155,9 @@ struct txn_limbo {
 	 */
 	struct vclock vclock;
 	/**
-	 * Latest terms received with PROMOTE entries from remote instances.
-	 * Limbo uses them to filter out the transactions coming not from the
-	 * limbo owner, but so outdated that they are rolled back everywhere
-	 * except outdated nodes.
-	 */
-	struct vclock promote_term_map;
-	/**
-	 * The biggest PROMOTE term seen by the instance and persisted in WAL.
-	 * It is related to raft term, but not the same. Synchronous replication
-	 * represented by the limbo is interested only in the won elections
-	 * ended with PROMOTE request.
-	 * It means the limbo's term might be smaller than the raft term, while
-	 * there are ongoing elections, or the leader is already known and this
-	 * instance hasn't read its PROMOTE request yet. During other times the
-	 * limbo and raft are in sync and the terms are the same.
+	 * Track promote requests.
 	 */
-	uint64_t promote_greatest_term;
+	struct txn_limbo_terms terms;
 	/**
 	 * Maximal LSN gathered quorum and either already confirmed in WAL, or
 	 * whose confirmation is in progress right now. Any attempt to confirm
@@ -218,7 +229,8 @@ txn_limbo_last_entry(struct txn_limbo *limbo)
 static inline uint64_t
 txn_limbo_replica_term(const struct txn_limbo *limbo, uint32_t replica_id)
 {
-	return vclock_get(&limbo->promote_term_map, replica_id);
+	const struct txn_limbo_terms *tr = &limbo->terms;
+	return vclock_get(&tr->terms_map, replica_id);
 }
 
 /**
@@ -229,8 +241,9 @@ static inline bool
 txn_limbo_is_replica_outdated(const struct txn_limbo *limbo,
 			      uint32_t replica_id)
 {
+	const struct txn_limbo_terms *tr = &limbo->terms;
 	return txn_limbo_replica_term(limbo, replica_id) <
-	       limbo->promote_greatest_term;
+	       tr->terms_max;
 }
 
 /**
-- 
2.31.1



More information about the Tarantool-patches mailing list