[PATCH] replication: improve join/subscribe logging
Vladimir Davydov
vdavydov.dev at gmail.com
Thu Feb 14 20:44:36 MSK 2019
This patch makes logging a little bit more verbose so as to facilitate
post-mortem replication failure analysis:
- When an instance starts up, print its uuid and vclock.
- When a replica connects to a master, print the master's uuid.
- Log join/subscribe events in the master's log; print replica uuid,
address, and vclock.
---
https://github.com/tarantool/tarantool/commits/dv/make-replication-logging-more-verbose
src/box/applier.cc | 7 ++++---
src/box/box.cc | 36 +++++++++++++++++++++++++++++++++---
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/src/box/applier.cc b/src/box/applier.cc
index c09253be..276c2039 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -207,11 +207,12 @@ applier_connect(struct applier *applier)
}
if (applier->version_id != greeting.version_id) {
- say_info("remote master is %u.%u.%u at %s",
+ say_info("remote master %s at %s running Tarantool %u.%u.%u",
+ tt_uuid_str(&greeting.uuid),
+ sio_strfaddr(&applier->addr, applier->addr_len),
version_id_major(greeting.version_id),
version_id_minor(greeting.version_id),
- version_id_patch(greeting.version_id),
- sio_strfaddr(&applier->addr, applier->addr_len));
+ version_id_patch(greeting.version_id));
}
/* Save the remote instance version and UUID on connect. */
diff --git a/src/box/box.cc b/src/box/box.cc
index 6ea83a65..a28a42f3 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1509,6 +1509,9 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
row.sync = header->sync;
coio_write_xrow(io, &row);
+ say_info("joining replica %s at %s",
+ tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
+
/*
* Initial stream: feed replica with dirty data from engines.
*/
@@ -1551,6 +1554,13 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
relay_final_join(io->fd, header->sync, &start_vclock, &stop_vclock);
say_info("final data sent.");
+ char *local_vclock_str = vclock_to_string(&replicaset.vclock);
+ char *remote_vclock_str = vclock_to_string(&stop_vclock);
+ say_info("remote vclock %s local vclock %s",
+ remote_vclock_str, local_vclock_str);
+ free(local_vclock_str);
+ free(remote_vclock_str);
+
/* Send end of WAL stream marker */
xrow_encode_vclock_xc(&row, &replicaset.vclock);
row.sync = header->sync;
@@ -1600,6 +1610,9 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
"wal_mode = 'none'");
}
+ struct vclock vclock;
+ vclock_create(&vclock);
+ vclock_copy(&vclock, &replicaset.vclock);
/*
* Send a response to SUBSCRIBE request, tell
* the replica how many rows we have in stock for it,
@@ -1612,9 +1625,7 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
* the additional field.
*/
struct xrow_header row;
- xrow_encode_subscribe_response_xc(&row,
- &REPLICASET_UUID,
- &replicaset.vclock);
+ xrow_encode_subscribe_response_xc(&row, &REPLICASET_UUID, &vclock);
/*
* Identify the message with the replica id of this
* instance, this is the only way for a replica to find
@@ -1626,6 +1637,16 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
row.sync = header->sync;
coio_write_xrow(io, &row);
+ say_info("subscribed replica %s at %s",
+ tt_uuid_str(&replica_uuid), sio_socketname(io->fd));
+
+ char *local_vclock_str = vclock_to_string(&vclock);
+ char *remote_vclock_str = vclock_to_string(&replica_clock);
+ say_info("remote vclock %s local vclock %s",
+ remote_vclock_str, local_vclock_str);
+ free(local_vclock_str);
+ free(remote_vclock_str);
+
/*
* Process SUBSCRIBE request via replication relay
* Send current recovery vector clock as a marker
@@ -1852,6 +1873,9 @@ bootstrap(const struct tt_uuid *instance_uuid,
INSTANCE_UUID = *instance_uuid;
else
tt_uuid_create(&INSTANCE_UUID);
+
+ say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
/*
* Begin listening on the socket to enable
* master-master replication leader election.
@@ -1909,6 +1933,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
tt_uuid_str(&INSTANCE_UUID));
}
+ say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
struct wal_stream wal_stream;
wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal"));
@@ -1935,6 +1961,10 @@ local_recovery(const struct tt_uuid *instance_uuid,
*/
recovery_scan(recovery, &replicaset.vclock, &gc.vclock);
+ char *vclock_str = vclock_to_string(&replicaset.vclock);
+ say_info("instance vclock %s", vclock_str);
+ free(vclock_str);
+
if (wal_dir_lock >= 0) {
box_listen();
box_sync_replication(false);
--
2.11.0
More information about the Tarantool-patches
mailing list