[PATCH] replication: fix broken cases with quorum=0

Konstantin Belyavskiy k.belyavskiy at tarantool.org
Tue Apr 17 17:07:26 MSK 2018


Ticket: https://github.com/tarantool/tarantool/issues/3278
branch: https://github.com/tarantool/tarantool/compare/gh-3278-quorum-fix

This commit is related with 6d81fa99
With replication_connect_quorum=0 set, previous commit broke replication
since skip applier_resume() and applier_start() parts.
Fix it and add more test cases.

Close #3278
---
 src/box/replication.cc              | 13 +++--
 test/replication/master_quorum.lua  | 33 +++++++++++++
 test/replication/master_quorum1.lua |  1 +
 test/replication/master_quorum2.lua |  1 +
 test/replication/quorum.result      | 98 ++++++++++++++++++++++++++++++++++++-
 test/replication/quorum.test.lua    | 33 ++++++++++++-
 6 files changed, 173 insertions(+), 6 deletions(-)
 create mode 100644 test/replication/master_quorum.lua
 create mode 120000 test/replication/master_quorum1.lua
 create mode 120000 test/replication/master_quorum2.lua

diff --git a/src/box/replication.cc b/src/box/replication.cc
index 760f83751..b4d5cc2a2 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -600,11 +600,9 @@ error:
 void
 replicaset_follow(void)
 {
-	if (replicaset.applier.total == 0 || replicaset_quorum() == 0) {
+	if (replicaset.applier.total == 0) {
 		/*
-		 * Replication is not configured or quorum is set to
-		 * zero so in the latter case we have no need to wait
-		 * for others.
+		 * Replication is not configured.
 		 */
 		box_clear_orphan();
 		return;
@@ -619,6 +617,13 @@ replicaset_follow(void)
 		/* Restart appliers that failed to connect. */
 		applier_start(replica->applier);
 	}
+	if (replicaset_quorum() == 0) {
+		/*
+		 * Leaving orphan mode, since
+		 * replication_connect_quorum is set to 0.
+		 */
+		box_clear_orphan();
+	}
 }
 
 void
diff --git a/test/replication/master_quorum.lua b/test/replication/master_quorum.lua
new file mode 100644
index 000000000..fb5f7ec2b
--- /dev/null
+++ b/test/replication/master_quorum.lua
@@ -0,0 +1,33 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (master_quorum1.lua => master_quorum1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+
+local SOCKET_DIR = require('fio').cwd()
+local function instance_uri(instance_id)
+    --return 'localhost:'..(3310 + instance_id)
+    return SOCKET_DIR..'/master_quorum'..instance_id..'.sock';
+end
+
+-- start console first
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = instance_uri(INSTANCE_ID);
+--    log_level = 7;
+    replication = {
+        instance_uri(1);
+        instance_uri(2);
+    };
+    replication_connect_quorum = 0;
+    replication_connect_timeout = 0.1;
+})
+
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+
+box.once("bootstrap", function()
+    box.schema.user.grant("guest", 'replication')
+    box.schema.space.create('test', {engine = engine})
+    box.space.test:create_index('primary')
+end)
diff --git a/test/replication/master_quorum1.lua b/test/replication/master_quorum1.lua
new file mode 120000
index 000000000..07096d4b7
--- /dev/null
+++ b/test/replication/master_quorum1.lua
@@ -0,0 +1 @@
+master_quorum.lua
\ No newline at end of file
diff --git a/test/replication/master_quorum2.lua b/test/replication/master_quorum2.lua
new file mode 120000
index 000000000..07096d4b7
--- /dev/null
+++ b/test/replication/master_quorum2.lua
@@ -0,0 +1 @@
+master_quorum.lua
\ No newline at end of file
diff --git a/test/replication/quorum.result b/test/replication/quorum.result
index 909bfb55b..8f6e7a070 100644
--- a/test/replication/quorum.result
+++ b/test/replication/quorum.result
@@ -245,6 +245,17 @@ test_run:drop_cluster(SERVERS)
 box.schema.user.grant('guest', 'replication')
 ---
 ...
+space = box.schema.space.create('test', {engine = test_run:get_cfg('engine')});
+---
+...
+index = box.space.test:create_index('primary')
+---
+...
+-- Insert something just to check that replica with quorum = 0 works as expected.
+space:insert{1}
+---
+- [1]
+...
 test_run:cmd("create server replica with rpl_master=default, script='replication/replica_no_quorum.lua'")
 ---
 - true
@@ -261,6 +272,10 @@ box.info.status -- running
 ---
 - running
 ...
+box.space.test:select()
+---
+- - [1]
+...
 test_run:cmd("switch default")
 ---
 - true
@@ -291,6 +306,37 @@ test_run:cmd("switch default")
 ---
 - true
 ...
+-- Check that replica is able to reconnect, case was broken with earlier quorum "fix".
+box.cfg{listen = listen}
+---
+...
+space:insert{2}
+---
+- [2]
+...
+vclock = test_run:get_vclock("default")
+---
+...
+_ = test_run:wait_vclock("replica", vclock)
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.info.status -- running
+---
+- running
+...
+box.space.test:select()
+---
+- - [1]
+  - [2]
+...
+test_run:cmd("switch default")
+---
+- true
+...
 test_run:cmd("stop server replica")
 ---
 - true
@@ -299,9 +345,59 @@ test_run:cmd("cleanup server replica")
 ---
 - true
 ...
+space:drop()
+---
+...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
-box.cfg{listen = listen}
+-- Second case, check that master-master works.
+SERVERS = {'master_quorum1', 'master_quorum2'}
+---
+...
+-- Deploy a cluster.
+test_run:create_cluster(SERVERS)
+---
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+test_run:cmd("switch master_quorum1")
+---
+- true
+...
+repl = box.cfg.replication
+---
+...
+box.cfg{replication = ""}
+---
+...
+box.space.test:insert{1}
+---
+- [1]
+...
+box.cfg{replication = repl}
+---
+...
+vclock = test_run:get_vclock("master_quorum1")
+---
+...
+_ = test_run:wait_vclock("master_quorum2", vclock)
+---
+...
+test_run:cmd("switch master_quorum2")
+---
+- true
+...
+box.space.test:select()
+---
+- - [1]
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- Cleanup.
+test_run:drop_cluster(SERVERS)
 ---
 ...
diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua
index a96dec759..1df0ae1e7 100644
--- a/test/replication/quorum.test.lua
+++ b/test/replication/quorum.test.lua
@@ -103,10 +103,15 @@ test_run:drop_cluster(SERVERS)
 --
 
 box.schema.user.grant('guest', 'replication')
+space = box.schema.space.create('test', {engine = test_run:get_cfg('engine')});
+index = box.space.test:create_index('primary')
+-- Insert something just to check that replica with quorum = 0 works as expected.
+space:insert{1}
 test_run:cmd("create server replica with rpl_master=default, script='replication/replica_no_quorum.lua'")
 test_run:cmd("start server replica")
 test_run:cmd("switch replica")
 box.info.status -- running
+box.space.test:select()
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 listen = box.cfg.listen
@@ -115,7 +120,33 @@ test_run:cmd("start server replica")
 test_run:cmd("switch replica")
 box.info.status -- running
 test_run:cmd("switch default")
+-- Check that replica is able to reconnect, case was broken with earlier quorum "fix".
+box.cfg{listen = listen}
+space:insert{2}
+vclock = test_run:get_vclock("default")
+_ = test_run:wait_vclock("replica", vclock)
+test_run:cmd("switch replica")
+box.info.status -- running
+box.space.test:select()
+test_run:cmd("switch default")
 test_run:cmd("stop server replica")
 test_run:cmd("cleanup server replica")
+space:drop()
 box.schema.user.revoke('guest', 'replication')
-box.cfg{listen = listen}
+-- Second case, check that master-master works.
+SERVERS = {'master_quorum1', 'master_quorum2'}
+-- Deploy a cluster.
+test_run:create_cluster(SERVERS)
+test_run:wait_fullmesh(SERVERS)
+test_run:cmd("switch master_quorum1")
+repl = box.cfg.replication
+box.cfg{replication = ""}
+box.space.test:insert{1}
+box.cfg{replication = repl}
+vclock = test_run:get_vclock("master_quorum1")
+_ = test_run:wait_vclock("master_quorum2", vclock)
+test_run:cmd("switch master_quorum2")
+box.space.test:select()
+test_run:cmd("switch default")
+-- Cleanup.
+test_run:drop_cluster(SERVERS)
-- 
2.14.3 (Apple Git-98)




More information about the Tarantool-patches mailing list