[PATCH v2 03/11] box: retrieve end vclock before starting local recovery

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jun 8 20:34:21 MSK 2018


In order to find out if the current instance fell too much behind its
peers in the cluster and so needs to be rebootstrapped, we need to know
its vclock before we start local recovery. To do that, let's scan the
most recent xlog. In future, we can optimize that by either storing end
vclock in xlog eof marker or by making a new xlog on server stop.

Needed for #461
---
 src/box/box.cc      | 20 +++++++++++++-------
 src/box/recovery.cc | 23 +++++++++++++++++++++++
 src/box/recovery.h  |  3 +++
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index c1d15644..3457cf19 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1858,6 +1858,14 @@ box_cfg_xc(void)
 		auto guard = make_scoped_guard([=]{ recovery_delete(recovery); });
 
 		/*
+		 * Initialize the replica set vclock from recovery.
+		 * The local WAL may contain rows from remote masters,
+		 * so we must reflect this in replicaset vclock to
+		 * not attempt to apply these rows twice.
+		 */
+		recovery_end_vclock(recovery, &replicaset.vclock);
+
+		/*
 		 * recovery->vclock is needed by Vinyl to filter
 		 * WAL rows that were dumped before restart.
 		 *
@@ -1909,6 +1917,11 @@ box_cfg_xc(void)
 			recovery_stop_local(recovery);
 			recover_remaining_wals(recovery, &wal_stream.base,
 					       NULL, true);
+			/*
+			 * Advance replica set vclock to reflect records
+			 * applied in hot standby mode.
+			 */
+			vclock_copy(&replicaset.vclock, &recovery->vclock);
 			box_bind();
 		}
 		recovery_finalize(recovery);
@@ -1924,13 +1937,6 @@ box_cfg_xc(void)
 
 		/* Clear the pointer to journal before it goes out of scope */
 		journal_set(NULL);
-		/*
-		 * Initialize the replica set vclock from recovery.
-		 * The local WAL may contain rows from remote masters,
-		 * so we must reflect this in replicaset vclock to
-		 * not attempt to apply these rows twice.
-		 */
-		vclock_copy(&replicaset.vclock, &recovery->vclock);
 
 		/** Begin listening only when the local recovery is complete. */
 		box_listen();
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index 71f6bd8c..eb77476d 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -137,6 +137,29 @@ recovery_new(const char *wal_dirname, bool force_recovery,
 	return r;
 }
 
+void
+recovery_end_vclock(struct recovery *r, struct vclock *end_vclock)
+{
+	xdir_scan_xc(&r->wal_dir);
+
+	struct vclock *vclock = vclockset_last(&r->wal_dir.index);
+	if (vclock == NULL || vclock_compare(vclock, &r->vclock) < 0) {
+		/* No xlogs after last checkpoint. */
+		vclock_copy(end_vclock, &r->vclock);
+		return;
+	}
+
+	/* Scan the last xlog to find end vclock. */
+	vclock_copy(end_vclock, vclock);
+	struct xlog_cursor cursor;
+	if (xdir_open_cursor(&r->wal_dir, vclock_sum(vclock), &cursor) != 0)
+		return;
+	struct xrow_header row;
+	while (xlog_cursor_next(&cursor, &row, true) == 0)
+		vclock_follow(end_vclock, row.replica_id, row.lsn);
+	xlog_cursor_close(&cursor, false);
+}
+
 static inline void
 recovery_close_log(struct recovery *r)
 {
diff --git a/src/box/recovery.h b/src/box/recovery.h
index 6aba922b..1ae6f2c3 100644
--- a/src/box/recovery.h
+++ b/src/box/recovery.h
@@ -69,6 +69,9 @@ void
 recovery_delete(struct recovery *r);
 
 void
+recovery_end_vclock(struct recovery *r, struct vclock *end_vclock);
+
+void
 recovery_follow_local(struct recovery *r, struct xstream *stream,
 		      const char *name, ev_tstamp wal_dir_rescan_delay);
 
-- 
2.11.0




More information about the Tarantool-patches mailing list