Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko <sergepetrenko@tarantool.org>
To: v.shpilevoy@tarantool.org, gorcunov@gmail.com, sergos@tarantool.org
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH 2/2] box: introduce a cfg handle to become syncro leader
Date: Sun,  5 Jul 2020 00:55:49 +0300	[thread overview]
Message-ID: <9a78892071bb44779f3bc21788b86b8c53a8ace5.1593899478.git.sergepetrenko@tarantool.org> (raw)
In-Reply-To: <cover.1593899478.git.sergepetrenko@tarantool.org>

Introduce replication_synchro_leader option to box.cfg.

Once an instance is promoted to leader, it makes sure that txn_limbo is
free of previous leader's transactions. In order to achieve this goal,
the instance first waits for 2 replication_synchro_timeouts  so that
confirmations and rollbacks from the former leader reach it.

If the limbo remains non-empty, the new leader starts figuring out which
transactions should be confirmed and which should be rolled back. In
order to do so the instance scans through vclocks of all the instances
that replicate from it and defines which former leader's lsn is the last
reached by replication_synchro_quorum of replicas.

Then the instance writes appropriate CONFIRM and ROLLBACK entries.
After these actions the limbo must be empty, and the instance may
proceed with appending its own entries to the limbo.

Closes #4849
---
 src/box/box.cc           | 79 ++++++++++++++++++++++++++++++++++++++++
 src/box/box.h            |  1 +
 src/box/lua/cfg.cc       |  9 +++++
 src/box/lua/load_cfg.lua |  4 ++
 src/box/txn_limbo.c      | 16 +-------
 src/box/txn_limbo.h      | 15 ++++++++
 6 files changed, 110 insertions(+), 14 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index ca24b98ca..087710383 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -78,6 +78,7 @@
 #include "sequence.h"
 #include "sql_stmt_cache.h"
 #include "msgpack.h"
+#include "trivia/util.h"
 
 static char status[64] = "unknown";
 
@@ -945,6 +946,84 @@ box_set_replication_anon(void)
 
 }
 
+void
+box_set_replication_synchro_leader(void)
+{
+	bool is_leader = cfg_geti("replication_synchro_leader");
+	/*
+	 * For now no actions required when an instance stops
+	 * being a leader. We should probably wait until txn_limbo
+	 * becomes empty.
+	 */
+	if (!is_leader)
+		return;
+	uint32_t former_leader_id = txn_limbo.instance_id;
+	if (former_leader_id == REPLICA_ID_NIL ||
+	    former_leader_id == instance_id) {
+		return;
+	}
+
+	/* Wait until pending confirmations/rollbacks reach us. */
+	double timeout = 2 * txn_limbo_confirm_timeout(&txn_limbo);
+	double start_tm = fiber_time();
+	while (!txn_limbo_is_empty(&txn_limbo)) {
+		if (fiber_time() - start_tm > timeout)
+			break;
+		fiber_sleep(0.001);
+	}
+
+	if (!txn_limbo_is_empty(&txn_limbo)) {
+		int64_t lsns[VCLOCK_MAX];
+		int len = 0;
+		const struct vclock  *vclock;
+		replicaset_foreach(replica) {
+			if (replica->relay != NULL &&
+			    relay_get_state(replica->relay) != RELAY_OFF &&
+			    !replica->anon) {
+				assert(!tt_uuid_is_equal(&INSTANCE_UUID,
+							 &replica->uuid));
+				vclock = relay_vclock(replica->relay);
+				int64_t lsn = vclock_get(vclock,
+							 former_leader_id);
+				lsns[len++] = lsn;
+			}
+		}
+		lsns[len++] = vclock_get(box_vclock, former_leader_id);
+		assert(len < VCLOCK_MAX);
+
+		int64_t confirm_lsn = 0;
+		if (len >= replication_synchro_quorum) {
+			qsort(lsns, len, sizeof(int64_t), cmp_i64);
+			confirm_lsn = lsns[len - replication_synchro_quorum];
+		}
+
+		struct txn_limbo_entry *e, *last_quorum = NULL;
+		struct txn_limbo_entry *rollback = NULL;
+		rlist_foreach_entry(e, &txn_limbo.queue, in_queue) {
+			if (txn_has_flag(e->txn, TXN_WAIT_ACK)) {
+				if (e->lsn <= confirm_lsn) {
+					last_quorum = e;
+				} else {
+					rollback = e;
+					break;
+				}
+			}
+		}
+
+		if (last_quorum != NULL) {
+			confirm_lsn = last_quorum->lsn;
+			txn_limbo_write_confirm(&txn_limbo, confirm_lsn);
+			txn_limbo_read_confirm(&txn_limbo, confirm_lsn);
+		}
+		if (rollback != NULL) {
+			txn_limbo_write_rollback(&txn_limbo, rollback->lsn);
+			txn_limbo_read_rollback(&txn_limbo, rollback->lsn - 1);
+		}
+
+		assert(txn_limbo_is_empty(&txn_limbo));
+	}
+}
+
 void
 box_listen(void)
 {
diff --git a/src/box/box.h b/src/box/box.h
index f9789154e..565b0ebce 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -245,6 +245,7 @@ void box_set_replication_connect_quorum(void);
 void box_set_replication_sync_lag(void);
 int box_set_replication_synchro_quorum(void);
 int box_set_replication_synchro_timeout(void);
+void box_set_replication_synchro_leader(void);
 void box_set_replication_sync_timeout(void);
 void box_set_replication_skip_conflict(void);
 void box_set_replication_anon(void);
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index d481155cd..adc1fcf3f 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -329,6 +329,14 @@ lbox_cfg_set_replication_synchro_timeout(struct lua_State *L)
 	return 0;
 }
 
+static int
+lbox_cfg_set_replication_synchro_leader(struct lua_State *L)
+{
+	(void) L;
+	box_set_replication_synchro_leader();
+	return 0;
+}
+
 static int
 lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
 {
@@ -388,6 +396,7 @@ box_lua_cfg_init(struct lua_State *L)
 		{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
 		{"cfg_set_replication_synchro_quorum", lbox_cfg_set_replication_synchro_quorum},
 		{"cfg_set_replication_synchro_timeout", lbox_cfg_set_replication_synchro_timeout},
+		{"cfg_set_replication_synchro_leader", lbox_cfg_set_replication_synchro_leader},
 		{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
 		{"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
 		{"cfg_set_replication_anon", lbox_cfg_set_replication_anon},
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index 107bc1582..9a968f30e 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -91,6 +91,7 @@ local default_cfg = {
     replication_sync_timeout = 300,
     replication_synchro_quorum = 1,
     replication_synchro_timeout = 5,
+    replication_synchro_leader = false,
     replication_connect_timeout = 30,
     replication_connect_quorum = nil, -- connect all
     replication_skip_conflict = false,
@@ -168,6 +169,7 @@ local template_cfg = {
     replication_sync_timeout = 'number',
     replication_synchro_quorum = 'number',
     replication_synchro_timeout = 'number',
+    replication_synchro_leader = 'boolean',
     replication_connect_timeout = 'number',
     replication_connect_quorum = 'number',
     replication_skip_conflict = 'boolean',
@@ -286,6 +288,7 @@ local dynamic_cfg = {
     replication_sync_timeout = private.cfg_set_replication_sync_timeout,
     replication_synchro_quorum = private.cfg_set_replication_synchro_quorum,
     replication_synchro_timeout = private.cfg_set_replication_synchro_timeout,
+    replication_synchro_leader = private.cfg_set_replication_synchro_leader,
     replication_skip_conflict = private.cfg_set_replication_skip_conflict,
     replication_anon        = private.cfg_set_replication_anon,
     instance_uuid           = check_instance_uuid,
@@ -333,6 +336,7 @@ local dynamic_cfg_order = {
     -- the new one. This should be fixed when box.cfg is able to
     -- apply some parameters together and atomically.
     replication_anon        = 250,
+    replication_synchro__leader = 250,
 }
 
 local function sort_cfg_cb(l, r)
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 44a0c7273..992115ad1 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -148,9 +148,6 @@ txn_limbo_check_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 	return entry->is_commit;
 }
 
-static int
-txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn);
-
 int
 txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 {
@@ -261,11 +258,7 @@ rollback:
 	return -1;
 }
 
-/**
- * Write a confirmation entry to WAL. After it's written all the
- * transactions waiting for confirmation may be finished.
- */
-static int
+int
 txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn)
 {
 	return txn_limbo_write_confirm_rollback(limbo, lsn, true);
@@ -303,12 +296,7 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
 	}
 }
 
-/**
- * Write a rollback message to WAL. After it's written
- * all the transactions following the current one and waiting
- * for confirmation must be rolled back.
- */
-static int
+int
 txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn)
 {
 	return txn_limbo_write_confirm_rollback(limbo, lsn, false);
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 3abbe9e85..5bf5827ac 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -205,6 +205,21 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn);
 int
 txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry);
 
+/**
+ * Write a confirmation entry to WAL. After it's written all the
+ * transactions waiting for confirmation may be finished.
+ */
+int
+txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn);
+
+/**
+ * Write a rollback message to WAL. After it's written
+ * all the transactions following the current one and waiting
+ * for confirmation must be rolled back.
+ */
+int
+txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn);
+
 /**
  * Confirm all the entries up to the given master's LSN.
  */
-- 
2.24.3 (Apple Git-128)

  parent reply	other threads:[~2020-07-04 21:56 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-04 21:55 [Tarantool-patches] [PATCH 0/2] introduce actions on leader election Serge Petrenko
2020-07-04 21:55 ` [Tarantool-patches] [PATCH 1/2] util: move cmp_i64 from xlog.c to util.h Serge Petrenko
2020-07-04 21:55 ` Serge Petrenko [this message]
2020-07-04 23:03   ` [Tarantool-patches] [PATCH 2/2] box: introduce a cfg handle to become syncro leader Vladislav Shpilevoy
2020-07-04 23:18   ` Vladislav Shpilevoy
2020-07-05 11:09     ` Serge Petrenko
2020-07-09 22:03   ` Leonid Vasiliev
2020-07-20 21:13     ` Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9a78892071bb44779f3bc21788b86b8c53a8ace5.1593899478.git.sergepetrenko@tarantool.org \
    --to=sergepetrenko@tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 2/2] box: introduce a cfg handle to become syncro leader' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox