[PATCH 3/9] recovery: restore garbage collector vclock after restart

Wed Nov 28 19:14:41 MSK 2018

After restart the garbage collector vclock is reset to the vclock of the
oldest preserved checkpoint, which is incorrect - it may be less in case
there is a replica that lagged behind, and it may be greater as well in
case the WAL thread hit ENOSPC and had to remove some WAL files to
continue. Fix it.

A note about xlog/panic_on_wal_error test. To check that replication
stops if some xlogs are missing, the test first removes xlogs on the
master, then restarts the master, then tries to start the replica
expecting that replication should fail. Well, it shouldn't - the replica
should rebootstrap instead. It didn't rebootstrap before this patch
though, because the master reported wrong garbage collector vclock (as
it didn't recover it on restart). After this patch the replica would
rebootstrap and the test would hang. Fix this by restarting the master
before removing xlog files.
---
 src/box/box.cc                        |  2 +-
 src/box/recovery.cc                   | 14 +++++++++-----
 src/box/recovery.h                    |  5 ++++-
 test/replication/gc_no_space.result   | 15 +++++++++++----
 test/replication/gc_no_space.test.lua |  7 +++++--
 test/xlog/panic_on_wal_error.result   |  2 +-
 test/xlog/panic_on_wal_error.test.lua |  2 +-
 7 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 72788f82..a5e1f07a 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1918,7 +1918,7 @@ local_recovery(const struct tt_uuid *instance_uuid,
 	 * so we must reflect this in replicaset vclock to
 	 * not attempt to apply these rows twice.
 	 */
-	recovery_scan(recovery, &replicaset.vclock);
+	recovery_scan(recovery, &replicaset.vclock, &gc.vclock);
 
 	if (wal_dir_lock >= 0) {
 		box_listen();
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index 77357f04..64d50989 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -117,21 +117,25 @@ recovery_new(const char *wal_dirname, bool force_recovery,
 }
 
 void
-recovery_scan(struct recovery *r, struct vclock *end_vclock)
+recovery_scan(struct recovery *r, struct vclock *end_vclock,
+	      struct vclock *gc_vclock)
 {
 	xdir_scan_xc(&r->wal_dir);
 
-	struct vclock *vclock = vclockset_last(&r->wal_dir.index);
-	if (vclock == NULL || vclock_compare(vclock, &r->vclock) < 0) {
+	if (xdir_last_vclock(&r->wal_dir, end_vclock) < 0 ||
+	    vclock_compare(end_vclock, &r->vclock) < 0) {
 		/* No xlogs after last checkpoint. */
+		vclock_copy(gc_vclock, &r->vclock);
 		vclock_copy(end_vclock, &r->vclock);
 		return;
 	}
 
+	if (xdir_first_vclock(&r->wal_dir, gc_vclock) < 0)
+		unreachable();
+
 	/* Scan the last xlog to find end vclock. */
-	vclock_copy(end_vclock, vclock);
 	struct xlog_cursor cursor;
-	if (xdir_open_cursor(&r->wal_dir, vclock_sum(vclock), &cursor) != 0)
+	if (xdir_open_cursor(&r->wal_dir, vclock_sum(end_vclock), &cursor) != 0)
 		return;
 	struct xrow_header row;
 	while (xlog_cursor_next(&cursor, &row, true) == 0)
diff --git a/src/box/recovery.h b/src/box/recovery.h
index 5882d969..662be3ca 100644
--- a/src/box/recovery.h
+++ b/src/box/recovery.h
@@ -72,9 +72,12 @@ recovery_delete(struct recovery *r);
  * Scan the WAL directory, build an index of all found
  * WAL files, then scan the most recent WAL file to find
  * the vclock of the last record (returned in @end_vclock).
+ * @gc_vclock is set to the oldest vclock available in the
+ * WAL directory.
  */
 void
-recovery_scan(struct recovery *r, struct vclock *end_vclock);
+recovery_scan(struct recovery *r,  struct vclock *end_vclock,
+	      struct vclock *gc_vclock);
 
 void
 recovery_follow_local(struct recovery *r, struct xstream *stream,
diff --git a/test/replication/gc_no_space.result b/test/replication/gc_no_space.result
index 5c64bea4..b2d3e207 100644
--- a/test/replication/gc_no_space.result
+++ b/test/replication/gc_no_space.result
@@ -43,9 +43,6 @@ test_run:cmd("setopt delimiter ''");
 ---
 - true
 ...
-default_checkpoint_count = box.cfg.checkpoint_count
----
-...
 box.cfg{checkpoint_count = 2}
 ---
 ...
@@ -264,6 +261,16 @@ box.schema.user.revoke('guest', 'replication')
 test_run:cleanup_cluster()
 ---
 ...
-box.cfg{checkpoint_count = default_checkpoint_count}
+-- Check that the garbage collector vclock is recovered correctly.
+test_run:cmd("restart server default")
+gc = box.info.gc()
+---
+...
+#gc.checkpoints -- 2
 ---
+- 2
+...
+gc.signature == gc.checkpoints[2].signature
+---
+- true
 ...
diff --git a/test/replication/gc_no_space.test.lua b/test/replication/gc_no_space.test.lua
index 7f5ab803..6940996f 100644
--- a/test/replication/gc_no_space.test.lua
+++ b/test/replication/gc_no_space.test.lua
@@ -26,7 +26,6 @@ function check_snap_count(count)
 end;
 test_run:cmd("setopt delimiter ''");
 
-default_checkpoint_count = box.cfg.checkpoint_count
 box.cfg{checkpoint_count = 2}
 
 test_run:cleanup_cluster()
@@ -111,4 +110,8 @@ s:drop()
 box.schema.user.revoke('guest', 'replication')
 test_run:cleanup_cluster()
 
-box.cfg{checkpoint_count = default_checkpoint_count}
+-- Check that the garbage collector vclock is recovered correctly.
+test_run:cmd("restart server default")
+gc = box.info.gc()
+#gc.checkpoints -- 2
+gc.signature == gc.checkpoints[2].signature
diff --git a/test/xlog/panic_on_wal_error.result b/test/xlog/panic_on_wal_error.result
index 345534ba..22f14f91 100644
--- a/test/xlog/panic_on_wal_error.result
+++ b/test/xlog/panic_on_wal_error.result
@@ -67,6 +67,7 @@ test_run:cmd("stop server replica")
 ---
 - true
 ...
+test_run:cmd("restart server default")
 box.space.test:auto_increment{'after snapshot'}
 ---
 - [2, 'after snapshot']
@@ -94,7 +95,6 @@ files = fio.glob(glob)
 for _, file in pairs(files) do fio.unlink(file) end
 ---
 ...
-test_run:cmd("restart server default")
 --
 -- make sure the server has some xlogs, otherwise the
 -- replica doesn't discover the gap in the logs
diff --git a/test/xlog/panic_on_wal_error.test.lua b/test/xlog/panic_on_wal_error.test.lua
index 29410cb2..2e95431c 100644
--- a/test/xlog/panic_on_wal_error.test.lua
+++ b/test/xlog/panic_on_wal_error.test.lua
@@ -31,6 +31,7 @@ box.space.test:select{}
 --
 test_run:cmd("switch default")
 test_run:cmd("stop server replica")
+test_run:cmd("restart server default")
 box.space.test:auto_increment{'after snapshot'}
 box.space.test:auto_increment{'after snapshot - one more row'}
 --
@@ -41,7 +42,6 @@ fio = require('fio')
 glob = fio.pathjoin(box.cfg.wal_dir, '*.xlog')
 files = fio.glob(glob)
 for _, file in pairs(files) do fio.unlink(file) end
-test_run:cmd("restart server default")
 --
 -- make sure the server has some xlogs, otherwise the
 -- replica doesn't discover the gap in the logs
-- 
2.11.0