From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id C03EF4696C3 for ; Mon, 30 Dec 2019 20:47:05 +0300 (MSK) From: Ilya Kosarev Date: Mon, 30 Dec 2019 20:47:00 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Subject: [Tarantool-patches] [PATCH 1/2] relay: fix vclock obtainment on join List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org Cc: v.shpilevoy@tarantool.org In case of high load vclock used to join replica could be in advance comparing to an actual WAL. Therefore replica could have missed some tuples from master. In order to fix this wal_sync is updated so that now we can obtain up to date vclock on the flushed state using it. Prerequisites #4160 --- src/box/relay.cc | 7 +++---- src/box/vinyl.c | 4 ++-- src/box/wal.c | 23 +++++++++++++++++------ src/box/wal.h | 4 +++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/box/relay.cc b/src/box/relay.cc index e849fcf4f50..b9ed27503a3 100644 --- a/src/box/relay.cc +++ b/src/box/relay.cc @@ -307,13 +307,12 @@ relay_initial_join(int fd, uint64_t sync, struct vclock *vclock) /* * Sync WAL to make sure that all changes visible from - * the frozen read view are successfully committed. + * the frozen read view are successfully committed and + * obtain corresponding vclock. */ - if (wal_sync() != 0) + if (wal_sync(vclock) != 0) diag_raise(); - vclock_copy(vclock, &replicaset.vclock); - /* Respond to the JOIN request with the current vclock. */ struct xrow_header row; xrow_encode_vclock_xc(&row, vclock); diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 15a136f8109..5f169f09b25 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -1087,7 +1087,7 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format) */ int rc; if (need_wal_sync) { - rc = wal_sync(); + rc = wal_sync(NULL); if (rc != 0) goto out; } @@ -4180,7 +4180,7 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index, */ int rc; if (need_wal_sync) { - rc = wal_sync(); + rc = wal_sync(NULL); if (rc != 0) goto out; } diff --git a/src/box/wal.c b/src/box/wal.c index 5e2c13e0e08..6348ef4565e 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -519,21 +519,27 @@ wal_free(void) wal_writer_destroy(writer); } +struct wal_vclock_msg { + struct cbus_call_msg base; + struct vclock vclock; +}; + static int -wal_sync_f(struct cbus_call_msg *msg) +wal_sync_f(struct cbus_call_msg *data) { - (void)msg; + struct wal_vclock_msg *msg = (struct wal_vclock_msg *) data; struct wal_writer *writer = &wal_writer_singleton; if (writer->in_rollback.route != NULL) { /* We're rolling back a failed write. */ diag_set(ClientError, ER_WAL_IO); return -1; } + vclock_copy(&msg->vclock, &writer->vclock); return 0; } int -wal_sync(void) +wal_sync(struct vclock *vclock) { ERROR_INJECT(ERRINJ_WAL_SYNC, { diag_set(ClientError, ER_INJECTION, "wal sync"); @@ -541,18 +547,23 @@ wal_sync(void) }); struct wal_writer *writer = &wal_writer_singleton; - if (writer->wal_mode == WAL_NONE) + if (writer->wal_mode == WAL_NONE) { + if (vclock != NULL) + vclock_copy(vclock, &writer->vclock); return 0; + } if (!stailq_empty(&writer->rollback)) { /* We're rolling back a failed write. */ diag_set(ClientError, ER_WAL_IO); return -1; } bool cancellable = fiber_set_cancellable(false); - struct cbus_call_msg msg; + struct wal_vclock_msg msg; int rc = cbus_call(&writer->wal_pipe, &writer->tx_prio_pipe, - &msg, wal_sync_f, NULL, TIMEOUT_INFINITY); + &msg.base, wal_sync_f, NULL, TIMEOUT_INFINITY); fiber_set_cancellable(cancellable); + if (vclock != NULL) + vclock_copy(vclock, &msg.vclock); return rc; } diff --git a/src/box/wal.h b/src/box/wal.h index b76b0a41f93..76b44941a7a 100644 --- a/src/box/wal.h +++ b/src/box/wal.h @@ -182,9 +182,11 @@ wal_mode(); /** * Wait until all submitted writes are successfully flushed * to disk. Returns 0 on success, -1 if write failed. + * Corresponding vclock is returned in @a vclock unless it is + * NULL. */ int -wal_sync(void); +wal_sync(struct vclock *vclock); struct wal_checkpoint { struct cbus_call_msg base; -- 2.17.1