Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [RFC PATCH 12/12] replication: rebootstrap instance on startup if it fell behind
Date: Wed,  6 Jun 2018 20:45:12 +0300	[thread overview]
Message-ID: <8110d6961f42cff4589631d2b043df2c854b2171.1528305232.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1528305232.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1528305232.git.vdavydov.dev@gmail.com>

If a replica fell too much behind its peers in the cluster and xlog
files needed for it to get up to speed have been removed, it won't be
able to proceed without rebootstrap. This patch makes the recovery
procedure detect such cases and initiate rebootstrap procedure if
necessary.

Closes #461
---
 src/box/box.cc                           |   9 ++
 src/box/replication.cc                   |  15 +++
 src/box/replication.h                    |   9 ++
 test/replication/replica_rejoin.result   | 194 +++++++++++++++++++++++++++++++
 test/replication/replica_rejoin.test.lua |  73 ++++++++++++
 test/replication/suite.cfg               |   1 +
 6 files changed, 301 insertions(+)
 create mode 100644 test/replication/replica_rejoin.result
 create mode 100644 test/replication/replica_rejoin.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index c10124ea..a83dbff5 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1777,6 +1777,9 @@ bootstrap(const struct tt_uuid *instance_uuid,
 /**
  * Recover the instance from the local directory.
  * Enter hot standby if the directory is locked.
+ * Invoke rebootstrap if the instance fell too much
+ * behind its peers in the replica set and needs
+ * to be rebootstrapped.
  */
 static void
 local_recovery(const struct tt_uuid *instance_uuid,
@@ -1812,6 +1815,12 @@ local_recovery(const struct tt_uuid *instance_uuid,
 	if (wal_dir_lock >= 0) {
 		box_listen();
 		box_sync_replication(replication_connect_timeout, false);
+
+		struct replica *master;
+		if (replicaset_needs_rejoin(&master)) {
+			say_info("replica is too old, initiating rejoin");
+			return bootstrap_from_master(master);
+		}
 	}
 
 	/*
diff --git a/src/box/replication.cc b/src/box/replication.cc
index c1e17698..0dda5dec 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -625,6 +625,21 @@ error:
 		  "failed to connect to one or more replicas");
 }
 
+bool
+replicaset_needs_rejoin(struct replica **master)
+{
+	replicaset_foreach(replica) {
+		if (replica->applier != NULL &&
+		    vclock_compare(&replica->applier->gc_vclock,
+				   &replicaset.vclock) > 0) {
+			*master = replica;
+			return true;
+		}
+	}
+	*master = NULL;
+	return false;
+}
+
 void
 replicaset_follow(void)
 {
diff --git a/src/box/replication.h b/src/box/replication.h
index fdf995c3..e8b391af 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -360,6 +360,15 @@ replicaset_connect(struct applier **appliers, int count,
 		   double timeout, bool connect_all);
 
 /**
+ * Check if the current instance fell too much behind its
+ * peers in the replica set and needs to be rebootstrapped.
+ * If it does, return true and set @master to the instance
+ * to use for rebootstrap, otherwise return false.
+ */
+bool
+replicaset_needs_rejoin(struct replica **master);
+
+/**
  * Resume all appliers registered with the replica set.
  */
 void
diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result
new file mode 100644
index 00000000..2deccd01
--- /dev/null
+++ b/test/replication/replica_rejoin.result
@@ -0,0 +1,194 @@
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+test_run:cleanup_cluster()
+---
+...
+--
+-- gh-461: check that a replica refetches the last checkpoint
+-- in case it fell behind the master.
+--
+box.schema.user.grant('guest', 'replication')
+---
+...
+_ = box.schema.space.create('test')
+---
+...
+_ = box.space.test:create_index('pk')
+---
+...
+_ = box.space.test:insert{1}
+---
+...
+_ = box.space.test:insert{2}
+---
+...
+_ = box.space.test:insert{3}
+---
+...
+-- Join a replica, then stop it.
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.info.replication[1].upstream.status == 'follow' or box.info
+---
+- true
+...
+box.space.test:select()
+---
+- - [1]
+  - [2]
+  - [3]
+...
+_ = box.schema.space.create('replica') -- will disappear after rejoin
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+-- Restart the server to purge the replica from
+-- the garbage collection state.
+test_run:cmd("restart server default")
+-- Make some checkpoints to remove old xlogs.
+checkpoint_count = box.cfg.checkpoint_count
+---
+...
+box.cfg{checkpoint_count = 1}
+---
+...
+_ = box.space.test:delete{1}
+---
+...
+_ = box.space.test:insert{10}
+---
+...
+box.snapshot()
+---
+- ok
+...
+_ = box.space.test:delete{2}
+---
+...
+_ = box.space.test:insert{20}
+---
+...
+box.snapshot()
+---
+- ok
+...
+_ = box.space.test:delete{3}
+---
+...
+_ = box.space.test:insert{30}
+---
+...
+#box.info.gc().checkpoints -- 1
+---
+- 1
+...
+box.cfg{checkpoint_count = checkpoint_count}
+---
+...
+-- Restart the replica. Since xlogs have been removed,
+-- it is supposed to rejoin without changing id.
+test_run:cmd("start server replica")
+---
+- true
+...
+box.info.replication[2].downstream.vclock ~= nil or box.info
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.info.replication[1].upstream.status == 'follow' or box.info
+---
+- true
+...
+box.space.test:select()
+---
+- - [10]
+  - [20]
+  - [30]
+...
+box.space.replica == nil -- was removed by rejoin
+---
+- true
+...
+_ = box.schema.space.create('replica')
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- Make sure the replica follows new changes.
+for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end
+---
+...
+vclock = test_run:get_vclock('default')
+---
+...
+_ = test_run:wait_vclock('replica', vclock)
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.space.test:select()
+---
+- - [10, 10]
+  - [20, 20]
+  - [30, 30]
+...
+-- Check that restart works as usual.
+test_run:cmd("restart server replica")
+box.info.replication[1].upstream.status == 'follow' or box.info
+---
+- true
+...
+box.space.test:select()
+---
+- - [10, 10]
+  - [20, 20]
+  - [30, 30]
+...
+box.space.replica ~= nil
+---
+- true
+...
+-- Cleanup.
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+box.space.test:drop()
+---
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua
new file mode 100644
index 00000000..5bd92119
--- /dev/null
+++ b/test/replication/replica_rejoin.test.lua
@@ -0,0 +1,73 @@
+env = require('test_run')
+test_run = env.new()
+
+test_run:cleanup_cluster()
+
+--
+-- gh-461: check that a replica refetches the last checkpoint
+-- in case it fell behind the master.
+--
+box.schema.user.grant('guest', 'replication')
+_ = box.schema.space.create('test')
+_ = box.space.test:create_index('pk')
+_ = box.space.test:insert{1}
+_ = box.space.test:insert{2}
+_ = box.space.test:insert{3}
+
+-- Join a replica, then stop it.
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+test_run:cmd("switch replica")
+box.info.replication[1].upstream.status == 'follow' or box.info
+box.space.test:select()
+_ = box.schema.space.create('replica') -- will disappear after rejoin
+test_run:cmd("switch default")
+test_run:cmd("stop server replica")
+
+-- Restart the server to purge the replica from
+-- the garbage collection state.
+test_run:cmd("restart server default")
+
+-- Make some checkpoints to remove old xlogs.
+checkpoint_count = box.cfg.checkpoint_count
+box.cfg{checkpoint_count = 1}
+_ = box.space.test:delete{1}
+_ = box.space.test:insert{10}
+box.snapshot()
+_ = box.space.test:delete{2}
+_ = box.space.test:insert{20}
+box.snapshot()
+_ = box.space.test:delete{3}
+_ = box.space.test:insert{30}
+#box.info.gc().checkpoints -- 1
+box.cfg{checkpoint_count = checkpoint_count}
+
+-- Restart the replica. Since xlogs have been removed,
+-- it is supposed to rejoin without changing id.
+test_run:cmd("start server replica")
+box.info.replication[2].downstream.vclock ~= nil or box.info
+test_run:cmd("switch replica")
+box.info.replication[1].upstream.status == 'follow' or box.info
+box.space.test:select()
+box.space.replica == nil -- was removed by rejoin
+_ = box.schema.space.create('replica')
+test_run:cmd("switch default")
+
+-- Make sure the replica follows new changes.
+for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end
+vclock = test_run:get_vclock('default')
+_ = test_run:wait_vclock('replica', vclock)
+test_run:cmd("switch replica")
+box.space.test:select()
+
+-- Check that restart works as usual.
+test_run:cmd("restart server replica")
+box.info.replication[1].upstream.status == 'follow' or box.info
+box.space.test:select()
+box.space.replica ~= nil
+
+-- Cleanup.
+test_run:cmd("switch default")
+test_run:cmd("cleanup server replica")
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 95e94e5a..2b609f16 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -6,6 +6,7 @@
     "wal_off.test.lua": {},
     "hot_standby.test.lua": {},
     "rebootstrap.test.lua": {},
+    "replica_rejoin.test.lua": {},
     "*": {
         "memtx": {"engine": "memtx"},
         "vinyl": {"engine": "vinyl"}
-- 
2.11.0

      parent reply	other threads:[~2018-06-06 17:45 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-06 17:45 [RFC PATCH 00/12] Replica rejoin Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 01/12] recovery: drop unused recovery_exit Vladimir Davydov
2018-06-08  4:13   ` Konstantin Osipov
2018-06-06 17:45 ` [RFC PATCH 02/12] recovery: constify vclock argument Vladimir Davydov
2018-06-08  4:14   ` Konstantin Osipov
2018-06-06 17:45 ` [RFC PATCH 03/12] applier: remove extra new line in log message printed on connect Vladimir Davydov
2018-06-08  4:15   ` Konstantin Osipov
2018-06-06 17:45 ` [RFC PATCH 04/12] xrow: add helper function for encoding vclock Vladimir Davydov
2018-06-08  4:16   ` Konstantin Osipov
2018-06-06 17:45 ` [RFC PATCH 05/12] box: retrieve instance uuid before starting local recovery Vladimir Davydov
2018-06-08  4:22   ` Konstantin Osipov
2018-06-06 17:45 ` [RFC PATCH 06/12] box: refactor hot standby recovery Vladimir Davydov
2018-06-08  4:40   ` Konstantin Osipov
2018-06-08  6:43     ` Vladimir Davydov
2018-06-08 13:15       ` Konstantin Osipov
2018-06-08 13:30         ` Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 07/12] box: retrieve end vclock before starting local recovery Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 08/12] box: open the port " Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 09/12] box: connect to remote peers " Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 10/12] box: factor out local recovery function Vladimir Davydov
2018-06-06 17:45 ` [RFC PATCH 11/12] applier: inquire oldest vclock on connect Vladimir Davydov
2018-06-06 17:45 ` Vladimir Davydov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8110d6961f42cff4589631d2b043df2c854b2171.1528305232.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [RFC PATCH 12/12] replication: rebootstrap instance on startup if it fell behind' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox