[PATCH 2/2] replication: do not stop syncing if replicas are loading

Vladimir Davydov vdavydov.dev at gmail.com
Thu May 24 21:00:28 MSK 2018


If a replica disconnects while sync is in progress, box.cfg{} may stop
syncing leaving the instance in 'orphan' mode. This will happen if not
enough replicas are connected to form a quorum. This makes sense e.g. on
network error, but not when a replica is loading, because in the latter
case it should be up and running quite soon. Let's account replicas that
disconnected because they haven't completed initial configuration yet
and continue syncing if connected + loading > quorum.

Closes #3422
---
 src/box/replication.cc                | 17 ++++++++++++-
 src/box/replication.h                 | 10 ++++++++
 test/replication/rebootstrap.lua      | 26 ++++++++++++++++++++
 test/replication/rebootstrap.result   | 45 +++++++++++++++++++++++++++++++++++
 test/replication/rebootstrap.test.lua | 21 ++++++++++++++++
 test/replication/rebootstrap1.lua     |  1 +
 test/replication/rebootstrap2.lua     |  1 +
 test/replication/suite.cfg            |  1 +
 8 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/rebootstrap.lua
 create mode 100644 test/replication/rebootstrap.result
 create mode 100644 test/replication/rebootstrap.test.lua
 create mode 120000 test/replication/rebootstrap1.lua
 create mode 120000 test/replication/rebootstrap2.lua

diff --git a/src/box/replication.cc b/src/box/replication.cc
index 6d90ee5f..a9fab319 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -138,6 +138,7 @@ replica_new(void)
 	trigger_create(&replica->on_applier_state,
 		       replica_on_applier_state_f, NULL, NULL);
 	replica->state = REPLICA_DISCONNECTED;
+	replica->is_loading = false;
 	return replica;
 }
 
@@ -234,6 +235,7 @@ replica_on_applier_connect(struct replica *replica)
 	assert(tt_uuid_is_nil(&replica->uuid));
 	assert(!tt_uuid_is_nil(&applier->uuid));
 	assert(replica->state == REPLICA_DISCONNECTED);
+	assert(!replica->is_loading);
 
 	replica->uuid = applier->uuid;
 
@@ -277,6 +279,12 @@ replica_on_applier_reconnect(struct replica *replica)
 	assert(!tt_uuid_is_nil(&applier->uuid));
 	assert(replica->state == REPLICA_DISCONNECTED);
 
+	if (replica->is_loading) {
+		assert(replicaset.applier.loading > 0);
+		replicaset.applier.loading--;
+		replica->is_loading = false;
+	}
+
 	if (!tt_uuid_is_equal(&replica->uuid, &applier->uuid)) {
 		/*
 		 * Master's UUID changed, most likely because it was
@@ -316,6 +324,10 @@ replica_on_applier_disconnect(struct replica *replica)
 	case REPLICA_CONNECTED:
 		assert(replicaset.applier.connected > 0);
 		replicaset.applier.connected--;
+		if (replica->applier->last_logged_errcode == ER_LOADING) {
+			replicaset.applier.loading++;
+			replica->is_loading = true;
+		}
 		break;
 	case REPLICA_DISCONNECTED:
 		break;
@@ -424,6 +436,7 @@ replicaset_update(struct applier **appliers, int count)
 		applier = replica->applier;
 		replica_clear_applier(replica);
 		replica->state = REPLICA_DISCONNECTED;
+		replica->is_loading = false;
 		applier_stop(applier);
 		applier_delete(applier);
 	}
@@ -439,6 +452,7 @@ replicaset_update(struct applier **appliers, int count)
 	/* Save new appliers */
 	replicaset.applier.total = count;
 	replicaset.applier.connected = 0;
+	replicaset.applier.loading = 0;
 	replicaset.applier.synced = 0;
 
 	replica_hash_foreach_safe(&uniq, replica, next) {
@@ -646,7 +660,8 @@ replicaset_sync(void)
 	 * replication_sync_lag
 	 */
 	while (replicaset.applier.synced < quorum &&
-	       replicaset.applier.connected >= quorum)
+	       replicaset.applier.connected +
+	       replicaset.applier.loading >= quorum)
 		fiber_cond_wait(&replicaset.applier.cond);
 
 	if (replicaset.applier.synced < quorum) {
diff --git a/src/box/replication.h b/src/box/replication.h
index 8a9d5754..6d83f43b 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -194,6 +194,11 @@ struct replicaset {
 		 */
 		int connected;
 		/**
+		 * Number of appliers that are disconnected,
+		 * because replica is loading.
+		 */
+		int loading;
+		/**
 		 * Number of appliers that have successfully
 		 * synchronized and hence contribute to the
 		 * quorum.
@@ -256,6 +261,11 @@ struct replica {
 	struct trigger on_applier_state;
 	/** Replica sync state. */
 	enum replica_state state;
+	/**
+	 * Set if we failed to sync to the replica, because it
+	 * hasn't finished initial configuration yet.
+	 */
+	bool is_loading;
 };
 
 enum {
diff --git a/test/replication/rebootstrap.lua b/test/replication/rebootstrap.lua
new file mode 100644
index 00000000..0b67e7b0
--- /dev/null
+++ b/test/replication/rebootstrap.lua
@@ -0,0 +1,26 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (quorum1.lua => quorum1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+
+local SOCKET_DIR = require('fio').cwd()
+local function instance_uri(instance_id)
+    return SOCKET_DIR..'/rebootstrap'..instance_id..'.sock';
+end
+
+-- start console first
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = instance_uri(INSTANCE_ID);
+    instance_uuid = '12345678-abcd-1234-abcd-123456789ef' .. INSTANCE_ID,
+    replication_timeout = 0.1;
+    replication = {
+        instance_uri(1);
+        instance_uri(2);
+    };
+})
+
+box.once("bootstrap", function()
+    box.schema.user.grant('guest', 'replication')
+end)
diff --git a/test/replication/rebootstrap.result b/test/replication/rebootstrap.result
new file mode 100644
index 00000000..afbfc8e6
--- /dev/null
+++ b/test/replication/rebootstrap.result
@@ -0,0 +1,45 @@
+test_run = require('test_run').new()
+---
+...
+SERVERS = {'rebootstrap1', 'rebootstrap2'}
+---
+...
+test_run:create_cluster(SERVERS)
+---
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+--
+-- gh-3422: If quorum can't be formed, because some replicas are
+-- re-bootstrapping, box.cfg{} must wait for bootstrap to complete
+-- instead of stopping synchronization and leaving the instance
+-- in 'orphan' mode.
+--
+test_run:cmd('stop server rebootstrap1')
+---
+- true
+...
+test_run:cmd('restart server rebootstrap2 with cleanup=True, wait=False, wait_load=False')
+---
+- true
+...
+test_run:cmd('start server rebootstrap1')
+---
+- true
+...
+test_run:cmd('switch rebootstrap1')
+---
+- true
+...
+box.info.status -- running
+---
+- running
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:drop_cluster(SERVERS)
+---
+...
diff --git a/test/replication/rebootstrap.test.lua b/test/replication/rebootstrap.test.lua
new file mode 100644
index 00000000..954726dd
--- /dev/null
+++ b/test/replication/rebootstrap.test.lua
@@ -0,0 +1,21 @@
+test_run = require('test_run').new()
+
+SERVERS = {'rebootstrap1', 'rebootstrap2'}
+
+test_run:create_cluster(SERVERS)
+test_run:wait_fullmesh(SERVERS)
+
+--
+-- gh-3422: If quorum can't be formed, because some replicas are
+-- re-bootstrapping, box.cfg{} must wait for bootstrap to complete
+-- instead of stopping synchronization and leaving the instance
+-- in 'orphan' mode.
+--
+test_run:cmd('stop server rebootstrap1')
+test_run:cmd('restart server rebootstrap2 with cleanup=True, wait=False, wait_load=False')
+test_run:cmd('start server rebootstrap1')
+test_run:cmd('switch rebootstrap1')
+box.info.status -- running
+
+test_run:cmd('switch default')
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/rebootstrap1.lua b/test/replication/rebootstrap1.lua
new file mode 120000
index 00000000..0840babd
--- /dev/null
+++ b/test/replication/rebootstrap1.lua
@@ -0,0 +1 @@
+rebootstrap.lua
\ No newline at end of file
diff --git a/test/replication/rebootstrap2.lua b/test/replication/rebootstrap2.lua
new file mode 120000
index 00000000..0840babd
--- /dev/null
+++ b/test/replication/rebootstrap2.lua
@@ -0,0 +1 @@
+rebootstrap.lua
\ No newline at end of file
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 7ae078f7..95e94e5a 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -5,6 +5,7 @@
     "status.test.lua": {},
     "wal_off.test.lua": {},
     "hot_standby.test.lua": {},
+    "rebootstrap.test.lua": {},
     "*": {
         "memtx": {"engine": "memtx"},
         "vinyl": {"engine": "vinyl"}
-- 
2.11.0




More information about the Tarantool-patches mailing list