* [PATCH] replication: improve join/subscribe logging
@ 2019-02-14 17:44 Vladimir Davydov
2019-02-15 9:42 ` Vladimir Davydov
0 siblings, 1 reply; 2+ messages in thread
From: Vladimir Davydov @ 2019-02-14 17:44 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
This patch makes logging a little bit more verbose so as to facilitate
post-mortem replication failure analysis:
- When an instance starts up, print its uuid and vclock.
- When a replica connects to a master, print the master's uuid.
- Log join/subscribe events in the master's log; print replica uuid,
address, and vclock.
---
https://github.com/tarantool/tarantool/commits/dv/make-replication-logging-more-verbose
src/box/applier.cc | 7 ++++---
src/box/box.cc | 36 +++++++++++++++++++++++++++++++++---
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/src/box/applier.cc b/src/box/applier.cc
index c09253be..276c2039 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -207,11 +207,12 @@ applier_connect(struct applier *applier)
}
if (applier->version_id != greeting.version_id) {
- say_info("remote master is %u.%u.%u at %s",
+ say_info("remote master %s at %s running Tarantool %u.%u.%u",
+ tt_uuid_str(&greeting.uuid),
+ sio_strfaddr(&applier->addr, applier->addr_len),
version_id_major(greeting.version_id),
version_id_minor(greeting.version_id),
- version_id_patch(greeting.version_id),
- sio_strfaddr(&applier->addr, applier->addr_len));
+ version_id_patch(greeting.version_id));
}
/* Save the remote instance version and UUID on connect. */
diff --git a/src/box/box.cc b/src/box/box.cc
index 6ea83a65..a28a42f3 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1509,6 +1509,9 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
row.sync = header->sync;
coio_write_xrow(io, &row);
+ say_info("joining replica %s at %s",
+ tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
+
/*
* Initial stream: feed replica with dirty data from engines.
*/
@@ -1551,6 +1554,13 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
relay_final_join(io->fd, header->sync, &start_vclock, &stop_vclock);
say_info("final data sent.");
+ char *local_vclock_str = vclock_to_string(&replicaset.vclock);
+ char *remote_vclock_str = vclock_to_string(&stop_vclock);
+ say_info("remote vclock %s local vclock %s",
+ remote_vclock_str, local_vclock_str);
+ free(local_vclock_str);
+ free(remote_vclock_str);
+
/* Send end of WAL stream marker */
xrow_encode_vclock_xc(&row, &replicaset.vclock);
row.sync = header->sync;
@@ -1600,6 +1610,9 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
"wal_mode = 'none'");
}
+ struct vclock vclock;
+ vclock_create(&vclock);
+ vclock_copy(&vclock, &replicaset.vclock);
/*
* Send a response to SUBSCRIBE request, tell
* the replica how many rows we have in stock for it,
@@ -1612,9 +1625,7 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
* the additional field.
*/
struct xrow_header row;
- xrow_encode_subscribe_response_xc(&row,
- &REPLICASET_UUID,
- &replicaset.vclock);
+ xrow_encode_subscribe_response_xc(&row, &REPLICASET_UUID, &vclock);
/*
* Identify the message with the replica id of this
* instance, this is the only way for a replica to find
@@ -1626,6 +1637,16 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
row.sync = header->sync;
coio_write_xrow(io, &row);
+ say_info("subscribed replica %s at %s",
+ tt_uuid_str(&replica_uuid), sio_socketname(io->fd));
+
+ char *local_vclock_str = vclock_to_string(&vclock);
+ char *remote_vclock_str = vclock_to_string(&replica_clock);
+ say_info("remote vclock %s local vclock %s",
+ remote_vclock_str, local_vclock_str);
+ free(local_vclock_str);
+ free(remote_vclock_str);
+
/*
* Process SUBSCRIBE request via replication relay
* Send current recovery vector clock as a marker
@@ -1852,6 +1873,9 @@ bootstrap(const struct tt_uuid *instance_uuid,
INSTANCE_UUID = *instance_uuid;
else
tt_uuid_create(&INSTANCE_UUID);
+
+ say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
/*
* Begin listening on the socket to enable
* master-master replication leader election.
@@ -1909,6 +1933,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
tt_uuid_str(&INSTANCE_UUID));
}
+ say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));
+
struct wal_stream wal_stream;
wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal"));
@@ -1935,6 +1961,10 @@ local_recovery(const struct tt_uuid *instance_uuid,
*/
recovery_scan(recovery, &replicaset.vclock, &gc.vclock);
+ char *vclock_str = vclock_to_string(&replicaset.vclock);
+ say_info("instance vclock %s", vclock_str);
+ free(vclock_str);
+
if (wal_dir_lock >= 0) {
box_listen();
box_sync_replication(false);
--
2.11.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] replication: improve join/subscribe logging
2019-02-14 17:44 [PATCH] replication: improve join/subscribe logging Vladimir Davydov
@ 2019-02-15 9:42 ` Vladimir Davydov
0 siblings, 0 replies; 2+ messages in thread
From: Vladimir Davydov @ 2019-02-15 9:42 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
On Thu, Feb 14, 2019 at 08:44:36PM +0300, Vladimir Davydov wrote:
> This patch makes logging a little bit more verbose so as to facilitate
> post-mortem replication failure analysis:
>
> - When an instance starts up, print its uuid and vclock.
> - When a replica connects to a master, print the master's uuid.
> - Log join/subscribe events in the master's log; print replica uuid,
> address, and vclock.
Come to think of it, we also need to print id assigned to
each replica, otherwise vclocks wouldn't make much sense.
I'll patch replica_{set,clear}_id in v2.
Also, we should probably be more verbose in applier as well
and print local and remote vclocks when an applier subscribes.
Will do it, too.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-02-15 9:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-14 17:44 [PATCH] replication: improve join/subscribe logging Vladimir Davydov
2019-02-15 9:42 ` Vladimir Davydov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox