[PATCH v2 3/3] replication: improve join/subscribe logging

Vladimir Davydov vdavydov.dev at gmail.com
Fri Feb 15 15:25:49 MSK 2019


This patch makes logging a little bit more verbose so as to facilitate
post-mortem replication failure analysis:

 - When an instance starts up, print its uuid and vclock.
 - When a replica connects to a master, print the master's uuid to
   the replica's log.
 - Print replica uuid and address to the master's log on join/subscribe.
 - Log local and remote vclock on subscribe, both on master and on
   replica.
 - Log ids assigned to replicas both on recovery and when online.
   Also, log when a replica is deleted from the cluster.
---
 src/box/alter.cc       |  1 +
 src/box/applier.cc     | 17 +++++++++++++----
 src/box/box.cc         | 21 ++++++++++++++++++---
 src/box/replication.cc |  5 +++++
 4 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index de8372b3..0e340468 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -3017,6 +3017,7 @@ on_replace_dd_schema(struct trigger * /* trigger */, void *event)
 		tt_uuid uu;
 		tuple_field_uuid_xc(new_tuple, BOX_CLUSTER_FIELD_UUID, &uu);
 		REPLICASET_UUID = uu;
+		say_info("cluster uuid %s", tt_uuid_str(&uu));
 	}
 }
 
diff --git a/src/box/applier.cc b/src/box/applier.cc
index c09253be..e9addcb3 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -207,11 +207,12 @@ applier_connect(struct applier *applier)
 	}
 
 	if (applier->version_id != greeting.version_id) {
-		say_info("remote master is %u.%u.%u at %s",
+		say_info("remote master %s at %s running Tarantool %u.%u.%u",
+			 tt_uuid_str(&greeting.uuid),
+			 sio_strfaddr(&applier->addr, applier->addr_len),
 			 version_id_major(greeting.version_id),
 			 version_id_minor(greeting.version_id),
-			 version_id_patch(greeting.version_id),
-			 sio_strfaddr(&applier->addr, applier->addr_len));
+			 version_id_patch(greeting.version_id));
 	}
 
 	/* Save the remote instance version and UUID on connect. */
@@ -393,8 +394,11 @@ applier_subscribe(struct applier *applier)
 	struct vclock remote_vclock_at_subscribe;
 	struct tt_uuid cluster_id = uuid_nil;
 
+	struct vclock vclock;
+	vclock_create(&vclock);
+	vclock_copy(&vclock, &replicaset.vclock);
 	xrow_encode_subscribe_xc(&row, &REPLICASET_UUID, &INSTANCE_UUID,
-				 &replicaset.vclock);
+				 &vclock);
 	coio_write_xrow(coio, &row);
 
 	/* Read SUBSCRIBE response */
@@ -429,6 +433,11 @@ applier_subscribe(struct applier *applier)
 				  tt_uuid_str(&cluster_id),
 				  tt_uuid_str(&REPLICASET_UUID));
 		}
+
+		say_info("subscribed");
+		say_info("remote vclock %s local vclock %s",
+			 vclock_to_string(&remote_vclock_at_subscribe),
+			 vclock_to_string(&vclock));
 	}
 	/*
 	 * Tarantool < 1.6.7:
diff --git a/src/box/box.cc b/src/box/box.cc
index 8136cf33..058ba266 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1509,6 +1509,9 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	row.sync = header->sync;
 	coio_write_xrow(io, &row);
 
+	say_info("joining replica %s at %s",
+		 tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
+
 	/*
 	 * Initial stream: feed replica with dirty data from engines.
 	 */
@@ -1600,6 +1603,9 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 			  "wal_mode = 'none'");
 	}
 
+	struct vclock vclock;
+	vclock_create(&vclock);
+	vclock_copy(&vclock, &replicaset.vclock);
 	/*
 	 * Send a response to SUBSCRIBE request, tell
 	 * the replica how many rows we have in stock for it,
@@ -1612,9 +1618,7 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	 * the additional field.
 	 */
 	struct xrow_header row;
-	xrow_encode_subscribe_response_xc(&row,
-					  &REPLICASET_UUID,
-					  &replicaset.vclock);
+	xrow_encode_subscribe_response_xc(&row, &REPLICASET_UUID, &vclock);
 	/*
 	 * Identify the message with the replica id of this
 	 * instance, this is the only way for a replica to find
@@ -1626,6 +1630,11 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	row.sync = header->sync;
 	coio_write_xrow(io, &row);
 
+	say_info("subscribed replica %s at %s",
+		 tt_uuid_str(&replica_uuid), sio_socketname(io->fd));
+	say_info("remote vclock %s local vclock %s",
+		 vclock_to_string(&replica_clock), vclock_to_string(&vclock));
+
 	/*
 	 * Process SUBSCRIBE request via replication relay
 	 * Send current recovery vector clock as a marker
@@ -1848,6 +1857,9 @@ bootstrap(const struct tt_uuid *instance_uuid,
 		INSTANCE_UUID = *instance_uuid;
 	else
 		tt_uuid_create(&INSTANCE_UUID);
+
+	say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
 	/*
 	 * Begin listening on the socket to enable
 	 * master-master replication leader election.
@@ -1905,6 +1917,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
 			  tt_uuid_str(&INSTANCE_UUID));
 	}
 
+	say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
 	struct wal_stream wal_stream;
 	wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal"));
 
@@ -1930,6 +1944,7 @@ local_recovery(const struct tt_uuid *instance_uuid,
 	 * not attempt to apply these rows twice.
 	 */
 	recovery_scan(recovery, &replicaset.vclock, &gc.vclock);
+	say_info("instance vclock %s", vclock_to_string(&replicaset.vclock));
 
 	if (wal_dir_lock >= 0) {
 		box_listen();
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 19ad5026..e3ceb009 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -197,6 +197,9 @@ replica_set_id(struct replica *replica, uint32_t replica_id)
 		instance_id = replica_id;
 	}
 	replicaset.replica_by_id[replica_id] = replica;
+
+	say_info("assigned id %d to replica %s",
+		 replica->id, tt_uuid_str(&replica->uuid));
 }
 
 void
@@ -214,6 +217,8 @@ replica_clear_id(struct replica *replica)
 	 */
 	replicaset.replica_by_id[replica->id] = NULL;
 	replica->id = REPLICA_ID_NIL;
+	say_info("removed replica %s", tt_uuid_str(&replica->uuid));
+
 	/*
 	 * The replica will never resubscribe so we don't need to keep
 	 * WALs for it anymore. Unregister it with the garbage collector
-- 
2.11.0




More information about the Tarantool-patches mailing list