From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id C5BF4229B3 for ; Tue, 7 Aug 2018 06:17:25 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id WGY-sdZ78mRa for ; Tue, 7 Aug 2018 06:17:25 -0400 (EDT) Received: from smtp31.i.mail.ru (smtp31.i.mail.ru [94.100.177.91]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id E0EBB21914 for ; Tue, 7 Aug 2018 06:17:22 -0400 (EDT) From: Serge Petrenko Subject: [tarantool-patches] [PATCH] replication: do not ignore replication_connect_quorum. Date: Tue, 7 Aug 2018 13:17:13 +0300 Message-Id: <20180807101713.92203-1-sergepetrenko@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: georgy@tarantool.org, Serge Petrenko On bootstrap and after replication reconfiguration replication_connect_quorum was ignored. The instance tried to connect to every replica listed in replication parameter, and errored if it wasn't possible. The patch alters this behaviour. The instance still tries to connect to every node listed in replication, but does not raise an error if it was able to connect to at least replication_connect_quorum instances. Closes #3428 --- https://github.com/tarantool/tarantool/issues/3428 https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3428-replication-connect-quorum src/box/box.cc | 6 +++--- src/box/replication.cc | 8 +++----- src/box/replication.h | 7 ++++--- test/replication-py/init_storage.test.py | 2 +- test/replication-py/master.lua | 2 ++ test/replication-py/replica.lua | 2 ++ test/replication/autobootstrap.lua | 3 ++- test/replication/autobootstrap_guest.lua | 2 +- test/replication/ddl.lua | 1 + test/replication/errinj.result | 6 +++--- test/replication/errinj.test.lua | 6 +++--- test/replication/master.lua | 1 + test/replication/master_quorum.lua | 3 ++- test/replication/on_replace.lua | 3 ++- test/replication/quorum.lua | 4 ++-- test/replication/rebootstrap.lua | 2 +- test/replication/replica_no_quorum.lua | 3 ++- test/replication/replica_timeout.lua | 3 ++- test/replication/replica_uuid_ro.lua | 2 +- 19 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index e3eb2738f..f8731f464 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -595,7 +595,7 @@ cfg_get_replication(int *p_count) * don't start appliers. */ static void -box_sync_replication(double timeout, bool connect_all) +box_sync_replication(double timeout, bool reach_quorum) { int count = 0; struct applier **appliers = cfg_get_replication(&count); @@ -607,7 +607,7 @@ box_sync_replication(double timeout, bool connect_all) applier_delete(appliers[i]); /* doesn't affect diag */ }); - replicaset_connect(appliers, count, timeout, connect_all); + replicaset_connect(appliers, count, timeout, reach_quorum); guard.is_active = false; } @@ -1888,7 +1888,7 @@ box_cfg_xc(void) * receive the same replica set UUID when a new cluster * is deployed. */ - box_sync_replication(TIMEOUT_INFINITY, true); + box_sync_replication(replication_connect_timeout, true); /* Bootstrap a new master */ bootstrap(&replicaset_uuid, &is_bootstrap_leader); } diff --git a/src/box/replication.cc b/src/box/replication.cc index 528fe4459..a6c60220f 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -46,7 +46,7 @@ struct tt_uuid INSTANCE_UUID; struct tt_uuid REPLICASET_UUID; double replication_timeout = 1.0; /* seconds */ -double replication_connect_timeout = 4.0; /* seconds */ +double replication_connect_timeout = 10.0; /* seconds */ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL; double replication_sync_lag = 10.0; /* seconds */ @@ -540,7 +540,7 @@ applier_on_connect_f(struct trigger *trigger, void *event) void replicaset_connect(struct applier **appliers, int count, - double timeout, bool connect_all) + double timeout, bool reach_quorum) { if (count == 0) { /* Cleanup the replica set. */ @@ -587,15 +587,13 @@ replicaset_connect(struct applier **appliers, int count, double wait_start = ev_monotonic_now(loop()); if (fiber_cond_wait_timeout(&state.wakeup, timeout) != 0) break; - if (state.failed > 0 && connect_all) - break; timeout -= ev_monotonic_now(loop()) - wait_start; } if (state.connected < count) { say_crit("failed to connect to %d out of %d replicas", count - state.connected, count); /* Timeout or connection failure. */ - if (connect_all) + if (reach_quorum && state.connected < replication_connect_quorum) goto error; } else { say_verbose("connected to %d replicas", state.connected); diff --git a/src/box/replication.h b/src/box/replication.h index 95122eb45..c16c8b56c 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -357,12 +357,13 @@ replicaset_add(uint32_t replica_id, const struct tt_uuid *instance_uuid); * \param appliers the array of appliers * \param count size of appliers array * \param timeout connection timeout - * \param connect_all if this flag is set, fail unless all - * appliers have successfully connected + * \param reach_quorum if this flag is set, fail unless at + * least replication_connect_quorum + * appliers have successfully connected. */ void replicaset_connect(struct applier **appliers, int count, - double timeout, bool connect_all); + double timeout, bool reach_quorum); /** * Resume all appliers registered with the replica set. diff --git a/test/replication-py/init_storage.test.py b/test/replication-py/init_storage.test.py index 0911a02c0..32b4639f1 100644 --- a/test/replication-py/init_storage.test.py +++ b/test/replication-py/init_storage.test.py @@ -57,7 +57,7 @@ print '-------------------------------------------------------------' server.stop() replica = TarantoolServer(server.ini) -replica.script = 'replication/replica.lua' +replica.script = 'replication-py/replica.lua' replica.vardir = server.vardir #os.path.join(server.vardir, 'replica') replica.rpl_master = master replica.deploy(wait=False) diff --git a/test/replication-py/master.lua b/test/replication-py/master.lua index 0f9f7a6f0..51283efdf 100644 --- a/test/replication-py/master.lua +++ b/test/replication-py/master.lua @@ -3,6 +3,8 @@ os = require('os') box.cfg({ listen = os.getenv("LISTEN"), memtx_memory = 107374182, + replication_connect_timeout = 1.0, + replication_timeout = 0.3 }) require('console').listen(os.getenv('ADMIN')) diff --git a/test/replication-py/replica.lua b/test/replication-py/replica.lua index 278291bba..b9d193b70 100644 --- a/test/replication-py/replica.lua +++ b/test/replication-py/replica.lua @@ -7,6 +7,8 @@ box.cfg({ listen = os.getenv("LISTEN"), replication = os.getenv("MASTER"), memtx_memory = 107374182, + replication_connect_timeout = 1.0, + replication_timeout = 0.3 }) box_cfg_done = true diff --git a/test/replication/autobootstrap.lua b/test/replication/autobootstrap.lua index 4f55417ae..8fc6809de 100644 --- a/test/replication/autobootstrap.lua +++ b/test/replication/autobootstrap.lua @@ -21,7 +21,8 @@ box.cfg({ USER..':'..PASSWORD..'@'..instance_uri(2); USER..':'..PASSWORD..'@'..instance_uri(3); }; - replication_connect_timeout = 0.5, + replication_connect_timeout = 3.0; + replication_timeout = 0.5; }) box.once("bootstrap", function() diff --git a/test/replication/autobootstrap_guest.lua b/test/replication/autobootstrap_guest.lua index 40fef2c7a..7cd921e3c 100644 --- a/test/replication/autobootstrap_guest.lua +++ b/test/replication/autobootstrap_guest.lua @@ -20,7 +20,7 @@ box.cfg({ instance_uri(2); instance_uri(3); }; - replication_connect_timeout = 0.5, + replication_connect_timeout = 5, }) box.once("bootstrap", function() diff --git a/test/replication/ddl.lua b/test/replication/ddl.lua index 694f40eac..09d038a72 100644 --- a/test/replication/ddl.lua +++ b/test/replication/ddl.lua @@ -22,6 +22,7 @@ box.cfg({ USER..':'..PASSWORD..'@'..instance_uri(3); USER..':'..PASSWORD..'@'..instance_uri(4); }; + replication_timeout = 0.1, replication_connect_timeout = 0.5, }) diff --git a/test/replication/errinj.result b/test/replication/errinj.result index ca8af2988..19d7d9a05 100644 --- a/test/replication/errinj.result +++ b/test/replication/errinj.result @@ -418,7 +418,7 @@ test_run:cmd("create server replica_timeout with rpl_master=default, script='rep --- - true ... -test_run:cmd("start server replica_timeout with args='0.01'") +test_run:cmd("start server replica_timeout with args='0.1, 0.5'") --- - true ... @@ -474,7 +474,7 @@ errinj.set("ERRINJ_RELAY_REPORT_INTERVAL", 0) ... -- Check replica's ACKs don't prevent the master from sending -- heartbeat messages (gh-3160). -test_run:cmd("start server replica_timeout with args='0.009'") +test_run:cmd("start server replica_timeout with args='0.009, 0.5'") --- - true ... @@ -522,7 +522,7 @@ for i = 0, 9999 do box.space.test:replace({i, 4, 5, 'test'}) end -- during the join stage, i.e. a replica with a minuscule -- timeout successfully bootstraps and breaks connection only -- after subscribe. -test_run:cmd("start server replica_timeout with args='0.00001'") +test_run:cmd("start server replica_timeout with args='0.00001, 0.5'") --- - true ... diff --git a/test/replication/errinj.test.lua b/test/replication/errinj.test.lua index 463d89a8f..f00b98eed 100644 --- a/test/replication/errinj.test.lua +++ b/test/replication/errinj.test.lua @@ -173,7 +173,7 @@ errinj.set("ERRINJ_RELAY_EXIT_DELAY", 0) box.cfg{replication_timeout = 0.01} test_run:cmd("create server replica_timeout with rpl_master=default, script='replication/replica_timeout.lua'") -test_run:cmd("start server replica_timeout with args='0.01'") +test_run:cmd("start server replica_timeout with args='0.1, 0.5'") test_run:cmd("switch replica_timeout") fiber = require('fiber') @@ -199,7 +199,7 @@ errinj.set("ERRINJ_RELAY_REPORT_INTERVAL", 0) -- Check replica's ACKs don't prevent the master from sending -- heartbeat messages (gh-3160). -test_run:cmd("start server replica_timeout with args='0.009'") +test_run:cmd("start server replica_timeout with args='0.009, 0.5'") test_run:cmd("switch replica_timeout") fiber = require('fiber') @@ -219,7 +219,7 @@ for i = 0, 9999 do box.space.test:replace({i, 4, 5, 'test'}) end -- during the join stage, i.e. a replica with a minuscule -- timeout successfully bootstraps and breaks connection only -- after subscribe. -test_run:cmd("start server replica_timeout with args='0.00001'") +test_run:cmd("start server replica_timeout with args='0.00001, 0.5'") test_run:cmd("switch replica_timeout") fiber = require('fiber') while box.info.replication[1].upstream.message ~= 'timed out' do fiber.sleep(0.0001) end diff --git a/test/replication/master.lua b/test/replication/master.lua index 6d431aaeb..9b96b7891 100644 --- a/test/replication/master.lua +++ b/test/replication/master.lua @@ -4,6 +4,7 @@ box.cfg({ listen = os.getenv("LISTEN"), memtx_memory = 107374182, replication_connect_timeout = 0.5, + replication_timeout = 0.1 }) require('console').listen(os.getenv('ADMIN')) diff --git a/test/replication/master_quorum.lua b/test/replication/master_quorum.lua index fb5f7ec2b..6e0429f65 100644 --- a/test/replication/master_quorum.lua +++ b/test/replication/master_quorum.lua @@ -20,7 +20,8 @@ box.cfg({ instance_uri(2); }; replication_connect_quorum = 0; - replication_connect_timeout = 0.1; + replication_timeout = 0.5; + replication_connect_timeout = 2.0; }) test_run = require('test_run').new() diff --git a/test/replication/on_replace.lua b/test/replication/on_replace.lua index 03f15d94c..bafead48d 100644 --- a/test/replication/on_replace.lua +++ b/test/replication/on_replace.lua @@ -20,7 +20,8 @@ box.cfg({ USER..':'..PASSWORD..'@'..instance_uri(1); USER..':'..PASSWORD..'@'..instance_uri(2); }; - replication_connect_timeout = 0.5, + replication_timeout = 0.5, + replication_connect_timeout = 1.0, }) env = require('test_run') diff --git a/test/replication/quorum.lua b/test/replication/quorum.lua index 9c7bf5c93..7f85d7b13 100644 --- a/test/replication/quorum.lua +++ b/test/replication/quorum.lua @@ -15,8 +15,8 @@ require('console').listen(os.getenv('ADMIN')) box.cfg({ listen = instance_uri(INSTANCE_ID); replication_timeout = 0.05; - replication_sync_lag = 0.01; - replication_connect_timeout = 0.1; + replication_sync_lag = 0.1; + replication_connect_timeout = 3.0; replication_connect_quorum = 3; replication = { instance_uri(1); diff --git a/test/replication/rebootstrap.lua b/test/replication/rebootstrap.lua index e743577e4..f1e8d69e9 100644 --- a/test/replication/rebootstrap.lua +++ b/test/replication/rebootstrap.lua @@ -15,7 +15,7 @@ box.cfg({ listen = instance_uri(INSTANCE_ID), instance_uuid = '12345678-abcd-1234-abcd-123456789ef' .. INSTANCE_ID, replication_timeout = 0.1, - replication_connect_timeout = 0.5, + replication_connect_timeout = 2.0, replication = { instance_uri(1); instance_uri(2); diff --git a/test/replication/replica_no_quorum.lua b/test/replication/replica_no_quorum.lua index b9edeea94..c30c043cc 100644 --- a/test/replication/replica_no_quorum.lua +++ b/test/replication/replica_no_quorum.lua @@ -5,7 +5,8 @@ box.cfg({ replication = os.getenv("MASTER"), memtx_memory = 107374182, replication_connect_quorum = 0, - replication_connect_timeout = 0.1, + replication_timeout = 0.1, + replication_connect_timeout = 0.5, }) require('console').listen(os.getenv('ADMIN')) diff --git a/test/replication/replica_timeout.lua b/test/replication/replica_timeout.lua index 64f119763..51c718360 100644 --- a/test/replication/replica_timeout.lua +++ b/test/replication/replica_timeout.lua @@ -1,13 +1,14 @@ #!/usr/bin/env tarantool local TIMEOUT = tonumber(arg[1]) +local CON_TIMEOUT = arg[2] and tonumber(arg[2]) or TIMEOUT * 3 box.cfg({ listen = os.getenv("LISTEN"), replication = os.getenv("MASTER"), memtx_memory = 107374182, replication_timeout = TIMEOUT, - replication_connect_timeout = TIMEOUT * 3, + replication_connect_timeout = CON_TIMEOUT, }) require('console').listen(os.getenv('ADMIN')) diff --git a/test/replication/replica_uuid_ro.lua b/test/replication/replica_uuid_ro.lua index 8e1c6cc47..ff70da144 100644 --- a/test/replication/replica_uuid_ro.lua +++ b/test/replication/replica_uuid_ro.lua @@ -22,7 +22,7 @@ box.cfg({ USER..':'..PASSWORD..'@'..instance_uri(2); }; read_only = (INSTANCE_ID ~= '1' and true or false); - replication_connect_timeout = 0.5, + replication_connect_timeout = 5, }) box.once("bootstrap", function() -- 2.15.2 (Apple Git-101.1)