From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH] replication: improve join/subscribe logging Date: Thu, 14 Feb 2019 20:44:36 +0300 Message-Id: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: This patch makes logging a little bit more verbose so as to facilitate post-mortem replication failure analysis: - When an instance starts up, print its uuid and vclock. - When a replica connects to a master, print the master's uuid. - Log join/subscribe events in the master's log; print replica uuid, address, and vclock. --- https://github.com/tarantool/tarantool/commits/dv/make-replication-logging-more-verbose src/box/applier.cc | 7 ++++--- src/box/box.cc | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/box/applier.cc b/src/box/applier.cc index c09253be..276c2039 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -207,11 +207,12 @@ applier_connect(struct applier *applier) } if (applier->version_id != greeting.version_id) { - say_info("remote master is %u.%u.%u at %s", + say_info("remote master %s at %s running Tarantool %u.%u.%u", + tt_uuid_str(&greeting.uuid), + sio_strfaddr(&applier->addr, applier->addr_len), version_id_major(greeting.version_id), version_id_minor(greeting.version_id), - version_id_patch(greeting.version_id), - sio_strfaddr(&applier->addr, applier->addr_len)); + version_id_patch(greeting.version_id)); } /* Save the remote instance version and UUID on connect. */ diff --git a/src/box/box.cc b/src/box/box.cc index 6ea83a65..a28a42f3 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1509,6 +1509,9 @@ box_process_join(struct ev_io *io, struct xrow_header *header) row.sync = header->sync; coio_write_xrow(io, &row); + say_info("joining replica %s at %s", + tt_uuid_str(&instance_uuid), sio_socketname(io->fd)); + /* * Initial stream: feed replica with dirty data from engines. */ @@ -1551,6 +1554,13 @@ box_process_join(struct ev_io *io, struct xrow_header *header) relay_final_join(io->fd, header->sync, &start_vclock, &stop_vclock); say_info("final data sent."); + char *local_vclock_str = vclock_to_string(&replicaset.vclock); + char *remote_vclock_str = vclock_to_string(&stop_vclock); + say_info("remote vclock %s local vclock %s", + remote_vclock_str, local_vclock_str); + free(local_vclock_str); + free(remote_vclock_str); + /* Send end of WAL stream marker */ xrow_encode_vclock_xc(&row, &replicaset.vclock); row.sync = header->sync; @@ -1600,6 +1610,9 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) "wal_mode = 'none'"); } + struct vclock vclock; + vclock_create(&vclock); + vclock_copy(&vclock, &replicaset.vclock); /* * Send a response to SUBSCRIBE request, tell * the replica how many rows we have in stock for it, @@ -1612,9 +1625,7 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) * the additional field. */ struct xrow_header row; - xrow_encode_subscribe_response_xc(&row, - &REPLICASET_UUID, - &replicaset.vclock); + xrow_encode_subscribe_response_xc(&row, &REPLICASET_UUID, &vclock); /* * Identify the message with the replica id of this * instance, this is the only way for a replica to find @@ -1626,6 +1637,16 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) row.sync = header->sync; coio_write_xrow(io, &row); + say_info("subscribed replica %s at %s", + tt_uuid_str(&replica_uuid), sio_socketname(io->fd)); + + char *local_vclock_str = vclock_to_string(&vclock); + char *remote_vclock_str = vclock_to_string(&replica_clock); + say_info("remote vclock %s local vclock %s", + remote_vclock_str, local_vclock_str); + free(local_vclock_str); + free(remote_vclock_str); + /* * Process SUBSCRIBE request via replication relay * Send current recovery vector clock as a marker @@ -1852,6 +1873,9 @@ bootstrap(const struct tt_uuid *instance_uuid, INSTANCE_UUID = *instance_uuid; else tt_uuid_create(&INSTANCE_UUID); + + say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID)); + /* * Begin listening on the socket to enable * master-master replication leader election. @@ -1909,6 +1933,8 @@ local_recovery(const struct tt_uuid *instance_uuid, tt_uuid_str(&INSTANCE_UUID)); } + say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID)); + struct wal_stream wal_stream; wal_stream_create(&wal_stream, cfg_geti64("rows_per_wal")); @@ -1935,6 +1961,10 @@ local_recovery(const struct tt_uuid *instance_uuid, */ recovery_scan(recovery, &replicaset.vclock, &gc.vclock); + char *vclock_str = vclock_to_string(&replicaset.vclock); + say_info("instance vclock %s", vclock_str); + free(vclock_str); + if (wal_dir_lock >= 0) { box_listen(); box_sync_replication(false); -- 2.11.0