[Tarantool-patches] [RFC 4/4] qsync: allow to specify replication_synchro_quorum as a formula

Cyrill Gorcunov gorcunov at gmail.com
Thu Nov 19 22:41:00 MSK 2020


When synchronous replication is used we prefer a user to specify
a quorum number, ie the number of replicas where data must be
replicated before the master node continue accepting new operations.

This is not that convenient since a user may not know initially
how many replicas will be used. Moreover the number of replicas
may variaty dynamically. For this sake we allow to specify the
number of quorum in symbolic way.

For example

box.cfg {
	replication_synchro_quorum = "n/2+1",
}

where n is number of registered replicas in the cluster.
Once new replica attached or old one detached the number
is evaluated and propagated.

Internally on each "_cluster" system space update the trigger
runs replication_on_cluster_update() helper which counts the
number of registered replicas and update the quorum value notifying
limbo and raft about a change.

Closes #5446

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/box/alter.cc       |  2 ++
 src/box/box.cc         | 11 +++--------
 src/box/replication.cc | 31 +++++++++++++++++++++++++++++++
 src/box/replication.h  |  9 +++++++++
 4 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 075b79d33..bd291ad4f 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -4196,6 +4196,7 @@ register_replica(struct trigger *trigger, void * /* event */)
 	} else {
 		try {
 			replica = replicaset_add(id, &uuid);
+			replication_on_cluster_update();
 			/* Can't throw exceptions from on_commit trigger */
 		} catch(Exception *e) {
 			panic("Can't register replica: %s", e->errmsg);
@@ -4216,6 +4217,7 @@ unregister_replica(struct trigger *trigger, void * /* event */)
 	struct replica *replica = replica_by_uuid(&old_uuid);
 	assert(replica != NULL);
 	replica_clear_id(replica);
+	replication_on_cluster_update();
 	return 0;
 }
 
diff --git a/src/box/box.cc b/src/box/box.cc
index 5f7ddfa99..558a71468 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -634,14 +634,6 @@ box_check_replication_synchro_quorum(void)
 		 */
 		int value = replication_synchro_quorum;
 		quorum = eval_replication_synchro_quorum(value);
-		/*
-		 * FIXME: Until we get full support.
-		 */
-		diag_set(ClientError, ER_CFG,
-			 "replication_synchro_quorum",
-			 "symbolic evaluation is not yet supported");
-		diag_log();
-		quorum = -1;
 	} else {
 		quorum = cfg_geti("replication_synchro_quorum");
 	}
@@ -1004,6 +996,9 @@ box_set_replication_synchro_quorum(void)
 	int value = box_check_replication_synchro_quorum();
 	if (value < 0)
 		return -1;
+
+	bool isnumber = cfg_isnumber("replication_synchro_quorum");
+	replication_synchro_quorum_eval = !isnumber;
 	replication_synchro_quorum_update(value);
 	return 0;
 }
diff --git a/src/box/replication.cc b/src/box/replication.cc
index c83392f81..bde850c1c 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -53,6 +53,7 @@ double replication_connect_timeout = 30.0; /* seconds */
 int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
 double replication_sync_lag = 10.0; /* seconds */
 int replication_synchro_quorum = 1;
+bool replication_synchro_quorum_eval = false;
 double replication_synchro_timeout = 5.0; /* seconds */
 double replication_sync_timeout = 300.0; /* seconds */
 bool replication_skip_conflict = false;
@@ -100,6 +101,36 @@ replication_synchro_quorum_update(int value)
 	raft_cfg_election_quorum(box_raft());
 }
 
+/**
+ * Evaluate the new synchro quorum number when replica
+ * get registered/unregistered and the quorum depends on
+ * their amount via formula in config.
+ */
+void
+replication_on_cluster_update(void)
+{
+	if (!replication_synchro_quorum_eval)
+		return;
+
+	/*
+	 * Account only registered replicas when evaluating
+	 * quorum number from a fromula present in config.
+	 */
+	int value = replicaset.registered_count - replicaset.anon_count;
+	int quorum = eval_replication_synchro_quorum(value);
+
+	/*
+	 * Upon node bootstrap we verify configuration so there
+	 * must never be a value out of bounds, still better to
+	 * be sure since evaluation code lays far from here.
+	 */
+	if (quorum <= 0 || quorum >= VCLOCK_MAX)
+		panic("Unexpected result for replication_synchro_quorum eval");
+
+	say_info("replication: evaluated quorum is %d", quorum);
+	replication_synchro_quorum_update(quorum);
+}
+
 void
 replication_init(void)
 {
diff --git a/src/box/replication.h b/src/box/replication.h
index ced519612..fa651d1c5 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -131,6 +131,12 @@ extern double replication_sync_lag;
  */
 extern int replication_synchro_quorum;
 
+/**
+ * A flag to point that replication_synchro_quorum needs
+ * to be evaluated as a formula.
+ */
+extern bool replication_synchro_quorum_eval;
+
 /**
  * Time in seconds which the master node is able to wait for ACKs
  * for a synchronous transaction until it is rolled back.
@@ -178,6 +184,9 @@ replication_disconnect_timeout(void)
 void
 replication_synchro_quorum_update(int value);
 
+void
+replication_on_cluster_update(void);
+
 void
 replication_init(void);
 
-- 
2.26.2



More information about the Tarantool-patches mailing list