From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Konstantin Belyavskiy Subject: [PATCH] replication: fix broken cases with quorum=0 Date: Tue, 17 Apr 2018 17:07:26 +0300 Message-Id: <20180417140726.91732-1-k.belyavskiy@tarantool.org> To: vdavydov@tarantool.org, georgy@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Ticket: https://github.com/tarantool/tarantool/issues/3278 branch: https://github.com/tarantool/tarantool/compare/gh-3278-quorum-fix This commit is related with 6d81fa99 With replication_connect_quorum=0 set, previous commit broke replication since skip applier_resume() and applier_start() parts. Fix it and add more test cases. Close #3278 --- src/box/replication.cc | 13 +++-- test/replication/master_quorum.lua | 33 +++++++++++++ test/replication/master_quorum1.lua | 1 + test/replication/master_quorum2.lua | 1 + test/replication/quorum.result | 98 ++++++++++++++++++++++++++++++++++++- test/replication/quorum.test.lua | 33 ++++++++++++- 6 files changed, 173 insertions(+), 6 deletions(-) create mode 100644 test/replication/master_quorum.lua create mode 120000 test/replication/master_quorum1.lua create mode 120000 test/replication/master_quorum2.lua diff --git a/src/box/replication.cc b/src/box/replication.cc index 760f83751..b4d5cc2a2 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -600,11 +600,9 @@ error: void replicaset_follow(void) { - if (replicaset.applier.total == 0 || replicaset_quorum() == 0) { + if (replicaset.applier.total == 0) { /* - * Replication is not configured or quorum is set to - * zero so in the latter case we have no need to wait - * for others. + * Replication is not configured. */ box_clear_orphan(); return; @@ -619,6 +617,13 @@ replicaset_follow(void) /* Restart appliers that failed to connect. */ applier_start(replica->applier); } + if (replicaset_quorum() == 0) { + /* + * Leaving orphan mode, since + * replication_connect_quorum is set to 0. + */ + box_clear_orphan(); + } } void diff --git a/test/replication/master_quorum.lua b/test/replication/master_quorum.lua new file mode 100644 index 000000000..fb5f7ec2b --- /dev/null +++ b/test/replication/master_quorum.lua @@ -0,0 +1,33 @@ +#!/usr/bin/env tarantool + +-- get instance name from filename (master_quorum1.lua => master_quorum1) +local INSTANCE_ID = string.match(arg[0], "%d") + +local SOCKET_DIR = require('fio').cwd() +local function instance_uri(instance_id) + --return 'localhost:'..(3310 + instance_id) + return SOCKET_DIR..'/master_quorum'..instance_id..'.sock'; +end + +-- start console first +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = instance_uri(INSTANCE_ID); +-- log_level = 7; + replication = { + instance_uri(1); + instance_uri(2); + }; + replication_connect_quorum = 0; + replication_connect_timeout = 0.1; +}) + +test_run = require('test_run').new() +engine = test_run:get_cfg('engine') + +box.once("bootstrap", function() + box.schema.user.grant("guest", 'replication') + box.schema.space.create('test', {engine = engine}) + box.space.test:create_index('primary') +end) diff --git a/test/replication/master_quorum1.lua b/test/replication/master_quorum1.lua new file mode 120000 index 000000000..07096d4b7 --- /dev/null +++ b/test/replication/master_quorum1.lua @@ -0,0 +1 @@ +master_quorum.lua \ No newline at end of file diff --git a/test/replication/master_quorum2.lua b/test/replication/master_quorum2.lua new file mode 120000 index 000000000..07096d4b7 --- /dev/null +++ b/test/replication/master_quorum2.lua @@ -0,0 +1 @@ +master_quorum.lua \ No newline at end of file diff --git a/test/replication/quorum.result b/test/replication/quorum.result index 909bfb55b..8f6e7a070 100644 --- a/test/replication/quorum.result +++ b/test/replication/quorum.result @@ -245,6 +245,17 @@ test_run:drop_cluster(SERVERS) box.schema.user.grant('guest', 'replication') --- ... +space = box.schema.space.create('test', {engine = test_run:get_cfg('engine')}); +--- +... +index = box.space.test:create_index('primary') +--- +... +-- Insert something just to check that replica with quorum = 0 works as expected. +space:insert{1} +--- +- [1] +... test_run:cmd("create server replica with rpl_master=default, script='replication/replica_no_quorum.lua'") --- - true @@ -261,6 +272,10 @@ box.info.status -- running --- - running ... +box.space.test:select() +--- +- - [1] +... test_run:cmd("switch default") --- - true @@ -291,6 +306,37 @@ test_run:cmd("switch default") --- - true ... +-- Check that replica is able to reconnect, case was broken with earlier quorum "fix". +box.cfg{listen = listen} +--- +... +space:insert{2} +--- +- [2] +... +vclock = test_run:get_vclock("default") +--- +... +_ = test_run:wait_vclock("replica", vclock) +--- +... +test_run:cmd("switch replica") +--- +- true +... +box.info.status -- running +--- +- running +... +box.space.test:select() +--- +- - [1] + - [2] +... +test_run:cmd("switch default") +--- +- true +... test_run:cmd("stop server replica") --- - true @@ -299,9 +345,59 @@ test_run:cmd("cleanup server replica") --- - true ... +space:drop() +--- +... box.schema.user.revoke('guest', 'replication') --- ... -box.cfg{listen = listen} +-- Second case, check that master-master works. +SERVERS = {'master_quorum1', 'master_quorum2'} +--- +... +-- Deploy a cluster. +test_run:create_cluster(SERVERS) +--- +... +test_run:wait_fullmesh(SERVERS) +--- +... +test_run:cmd("switch master_quorum1") +--- +- true +... +repl = box.cfg.replication +--- +... +box.cfg{replication = ""} +--- +... +box.space.test:insert{1} +--- +- [1] +... +box.cfg{replication = repl} +--- +... +vclock = test_run:get_vclock("master_quorum1") +--- +... +_ = test_run:wait_vclock("master_quorum2", vclock) +--- +... +test_run:cmd("switch master_quorum2") +--- +- true +... +box.space.test:select() +--- +- - [1] +... +test_run:cmd("switch default") +--- +- true +... +-- Cleanup. +test_run:drop_cluster(SERVERS) --- ... diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua index a96dec759..1df0ae1e7 100644 --- a/test/replication/quorum.test.lua +++ b/test/replication/quorum.test.lua @@ -103,10 +103,15 @@ test_run:drop_cluster(SERVERS) -- box.schema.user.grant('guest', 'replication') +space = box.schema.space.create('test', {engine = test_run:get_cfg('engine')}); +index = box.space.test:create_index('primary') +-- Insert something just to check that replica with quorum = 0 works as expected. +space:insert{1} test_run:cmd("create server replica with rpl_master=default, script='replication/replica_no_quorum.lua'") test_run:cmd("start server replica") test_run:cmd("switch replica") box.info.status -- running +box.space.test:select() test_run:cmd("switch default") test_run:cmd("stop server replica") listen = box.cfg.listen @@ -115,7 +120,33 @@ test_run:cmd("start server replica") test_run:cmd("switch replica") box.info.status -- running test_run:cmd("switch default") +-- Check that replica is able to reconnect, case was broken with earlier quorum "fix". +box.cfg{listen = listen} +space:insert{2} +vclock = test_run:get_vclock("default") +_ = test_run:wait_vclock("replica", vclock) +test_run:cmd("switch replica") +box.info.status -- running +box.space.test:select() +test_run:cmd("switch default") test_run:cmd("stop server replica") test_run:cmd("cleanup server replica") +space:drop() box.schema.user.revoke('guest', 'replication') -box.cfg{listen = listen} +-- Second case, check that master-master works. +SERVERS = {'master_quorum1', 'master_quorum2'} +-- Deploy a cluster. +test_run:create_cluster(SERVERS) +test_run:wait_fullmesh(SERVERS) +test_run:cmd("switch master_quorum1") +repl = box.cfg.replication +box.cfg{replication = ""} +box.space.test:insert{1} +box.cfg{replication = repl} +vclock = test_run:get_vclock("master_quorum1") +_ = test_run:wait_vclock("master_quorum2", vclock) +test_run:cmd("switch master_quorum2") +box.space.test:select() +test_run:cmd("switch default") +-- Cleanup. +test_run:drop_cluster(SERVERS) -- 2.14.3 (Apple Git-98)