[PATCH] replication: improve join/subscribe logging

Vladimir Davydov vdavydov.dev at gmail.com
Thu Feb 14 20:44:36 MSK 2019


This patch makes logging a little bit more verbose so as to facilitate
post-mortem replication failure analysis:

 - When an instance starts up, print its uuid and vclock.
 - When a replica connects to a master, print the master's uuid.
 - Log join/subscribe events in the master's log; print replica uuid,
   address, and vclock.
---
https://github.com/tarantool/tarantool/commits/dv/make-replication-logging-more-verbose

 src/box/applier.cc |  7 ++++---
 src/box/box.cc     | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index c09253be..276c2039 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -207,11 +207,12 @@ applier_connect(struct applier *applier)
 	}
 
 	if (applier->version_id != greeting.version_id) {
-		say_info("remote master is %u.%u.%u at %s",
+		say_info("remote master %s at %s running Tarantool %u.%u.%u",
+			 tt_uuid_str(&greeting.uuid),
+			 sio_strfaddr(&applier->addr, applier->addr_len),
 			 version_id_major(greeting.version_id),
 			 version_id_minor(greeting.version_id),
-			 version_id_patch(greeting.version_id),
-			 sio_strfaddr(&applier->addr, applier->addr_len));
+			 version_id_patch(greeting.version_id));
 	}
 
 	/* Save the remote instance version and UUID on connect. */
diff --git a/src/box/box.cc b/src/box/box.cc
index 6ea83a65..a28a42f3 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1509,6 +1509,9 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	row.sync = header->sync;
 	coio_write_xrow(io, &row);
 
+	say_info("joining replica %s at %s",
+		 tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
+
 	/*
 	 * Initial stream: feed replica with dirty data from engines.
 	 */
@@ -1551,6 +1554,13 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	relay_final_join(io->fd, header->sync, &start_vclock, &stop_vclock);
 	say_info("final data sent.");
 
+	char *local_vclock_str = vclock_to_string(&replicaset.vclock);
+	char *remote_vclock_str = vclock_to_string(&stop_vclock);
+	say_info("remote vclock %s local vclock %s",
+		 remote_vclock_str, local_vclock_str);
+	free(local_vclock_str);
+	free(remote_vclock_str);
+
 	/* Send end of WAL stream marker */
 	xrow_encode_vclock_xc(&row, &replicaset.vclock);
 	row.sync = header->sync;
@@ -1600,6 +1610,9 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 			  "wal_mode = 'none'");
 	}
 
+	struct vclock vclock;
+	vclock_create(&vclock);
+	vclock_copy(&vclock, &replicaset.vclock);
 	/*
 	 * Send a response to SUBSCRIBE request, tell
 	 * the replica how many rows we have in stock for it,
@@ -1612,9 +1625,7 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	 * the additional field.
 	 */
 	struct xrow_header row;
-	xrow_encode_subscribe_response_xc(&row,
-					  &REPLICASET_UUID,
-					  &replicaset.vclock);
+	xrow_encode_subscribe_response_xc(&row, &REPLICASET_UUID, &vclock);
 	/*
 	 * Identify the message with the replica id of this
 	 * instance, this is the only way for a replica to find
@@ -1626,6 +1637,16 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	row.sync = header->sync;
 	coio_write_xrow(io, &row);
 
+	say_info("subscribed replica %s at %s",
+		 tt_uuid_str(&replica_uuid), sio_socketname(io->fd));
+
+	char *local_vclock_str = vclock_to_string(&vclock);
+	char *remote_vclock_str = vclock_to_string(&replica_clock);
+	say_info("remote vclock %s local vclock %s",
+		 remote_vclock_str, local_vclock_str);
+	free(local_vclock_str);
+	free(remote_vclock_str);
+
 	/*
 	 * Process SUBSCRIBE request via replication relay
 	 * Send current recovery vector clock as a marker
@@ -1852,6 +1873,9 @@ bootstrap(const struct tt_uuid *instance_uuid,
 		INSTANCE_UUID = *instance_uuid;
 	else
 		tt_uuid_create(&INSTANCE_UUID);
+
+	say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
 	/*
 	 * Begin listening on the socket to enable
 	 * master-master replication leader election.
@@ -1909,6 +1933,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
 			  tt_uuid_str(&INSTANCE_UUID));
 	}
 
+	say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
 	struct wal_stream wal_stream;
 	wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal"));
 
@@ -1935,6 +1961,10 @@ local_recovery(const struct tt_uuid *instance_uuid,
 	 */
 	recovery_scan(recovery, &replicaset.vclock, &gc.vclock);
 
+	char *vclock_str = vclock_to_string(&replicaset.vclock);
+	say_info("instance vclock %s", vclock_str);
+	free(vclock_str);
+
 	if (wal_dir_lock >= 0) {
 		box_listen();
 		box_sync_replication(false);
-- 
2.11.0




More information about the Tarantool-patches mailing list