Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 1/9] wal: separate checkpoint and flush paths
Date: Wed, 28 Nov 2018 19:14:39 +0300	[thread overview]
Message-ID: <b0daa530e174da15a2042cc55c554add04ed41ac.1543419109.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com>

Currently, wal_checkpoint() is used for two purposes. First, to make a
checkpoint (rotate = true). Second, to flush all pending WAL requests
(rotate = false). Since checkpointing has to fail if cascading rollback
is in progress so does flushing. This is confusing. Let's separate the
two paths.

While we are at it, let's also rewrite WAL checkpointing using cbus_call
instead of cpipe_push as it's a more convenient way of exchanging simple
two-hop messages between two threads.
---
 src/box/box.cc  |  5 ++--
 src/box/vinyl.c |  5 +---
 src/box/wal.c   | 91 +++++++++++++++++++++++++++------------------------------
 src/box/wal.h   | 15 +++++++---
 4 files changed, 57 insertions(+), 59 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 8d6e966e..5ea2f014 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -2190,10 +2190,9 @@ box_checkpoint()
 		goto end;
 
 	struct vclock vclock;
-	if ((rc = wal_checkpoint(&vclock, true))) {
-		tnt_error(ClientError, ER_CHECKPOINT_ROLLBACK);
+	if ((rc = wal_checkpoint(&vclock)))
 		goto end;
-	}
+
 	rc = engine_commit_checkpoint(&vclock);
 end:
 	if (rc)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 1794489d..05df1329 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1014,10 +1014,7 @@ vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
 	 * Wait for prepared transactions to complete
 	 * (we can't abort them as they reached WAL).
 	 */
-	struct vclock unused;
-	if (wal_checkpoint(&unused, false) != 0)
-		return -1;
-
+	wal_flush();
 	return 0;
 }
 
diff --git a/src/box/wal.c b/src/box/wal.c
index 11aae5fc..bc396ca5 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -461,29 +461,39 @@ wal_thread_stop()
 		wal_writer_destroy(&wal_writer_singleton);
 }
 
+void
+wal_flush(void)
+{
+	struct wal_writer *writer = &wal_writer_singleton;
+	if (writer->wal_mode == WAL_NONE)
+		return;
+	cbus_flush(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, NULL);
+}
+
 struct wal_checkpoint
 {
-	struct cmsg base;
-	struct vclock *vclock;
-	struct fiber *fiber;
-	bool rotate;
-	int res;
+	struct cbus_call_msg base;
+	struct vclock vclock;
 };
 
-void
-wal_checkpoint_f(struct cmsg *data)
+static int
+wal_checkpoint_f(struct cbus_call_msg *data)
 {
 	struct wal_checkpoint *msg = (struct wal_checkpoint *) data;
 	struct wal_writer *writer = &wal_writer_singleton;
 	if (writer->in_rollback.route != NULL) {
-		/* We're rolling back a failed write. */
-		msg->res = -1;
-		return;
+		/*
+		 * We're rolling back a failed write and so
+		 * can't make a checkpoint - see the comment
+		 * in wal_checkpoint() for the explanation.
+		 */
+		diag_set(ClientError, ER_CHECKPOINT_ROLLBACK);
+		return -1;
 	}
 	/*
 	 * Avoid closing the current WAL if it has no rows (empty).
 	 */
-	if (msg->rotate && xlog_is_open(&writer->current_wal) &&
+	if (xlog_is_open(&writer->current_wal) &&
 	    vclock_sum(&writer->current_wal.meta.vclock) !=
 	    vclock_sum(&writer->vclock)) {
 
@@ -492,53 +502,38 @@ wal_checkpoint_f(struct cmsg *data)
 		 * The next WAL will be created on the first write.
 		 */
 	}
-	vclock_copy(msg->vclock, &writer->vclock);
-}
-
-void
-wal_checkpoint_done_f(struct cmsg *data)
-{
-	struct wal_checkpoint *msg = (struct wal_checkpoint *) data;
-	fiber_wakeup(msg->fiber);
+	vclock_copy(&msg->vclock, &writer->vclock);
+	return 0;
 }
 
 int
-wal_checkpoint(struct vclock *vclock, bool rotate)
+wal_checkpoint(struct vclock *vclock)
 {
 	struct wal_writer *writer = &wal_writer_singleton;
-	if (! stailq_empty(&writer->rollback)) {
-		/*
-		 * The writer rollback queue is not empty,
-		 * roll back this transaction immediately.
-		 * This is to ensure we do not accidentally
-		 * commit a transaction which has seen changes
-		 * that will be rolled back.
-		 */
-		say_error("Aborting transaction %llu during "
-			  "cascading rollback",
-			  vclock_sum(&writer->vclock));
-		return -1;
-	}
 	if (writer->wal_mode == WAL_NONE) {
 		vclock_copy(vclock, &writer->vclock);
 		return 0;
 	}
-	static struct cmsg_hop wal_checkpoint_route[] = {
-		{wal_checkpoint_f, &wal_thread.tx_prio_pipe},
-		{wal_checkpoint_done_f, NULL},
-	};
-	vclock_create(vclock);
+	if (!stailq_empty(&writer->rollback)) {
+		/*
+		 * If cascading rollback is in progress, in-memory
+		 * indexes can contain changes scheduled for rollback.
+		 * If we made a checkpoint, we could write them to
+		 * the snapshot. So we abort checkpointing in this
+		 * case.
+		 */
+		diag_set(ClientError, ER_CHECKPOINT_ROLLBACK);
+		return -1;
+	}
 	struct wal_checkpoint msg;
-	cmsg_init(&msg.base, wal_checkpoint_route);
-	msg.vclock = vclock;
-	msg.fiber = fiber();
-	msg.rotate = rotate;
-	msg.res = 0;
-	cpipe_push(&wal_thread.wal_pipe, &msg.base);
-	fiber_set_cancellable(false);
-	fiber_yield();
-	fiber_set_cancellable(true);
-	return msg.res;
+	bool cancellable = fiber_set_cancellable(false);
+	int rc = cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe,
+			   &msg.base, wal_checkpoint_f, NULL, TIMEOUT_INFINITY);
+	fiber_set_cancellable(cancellable);
+	if (rc != 0)
+		return -1;
+	vclock_copy(vclock, &msg.vclock);
+	return 0;
 }
 
 struct wal_gc_msg
diff --git a/src/box/wal.h b/src/box/wal.h
index e4094b1e..7ca27f1a 100644
--- a/src/box/wal.h
+++ b/src/box/wal.h
@@ -166,13 +166,20 @@ wal_mode();
 
 /**
  * Wait till all pending changes to the WAL are flushed.
- * Rotates the WAL.
- *
- * @param[out] vclock WAL vclock
+ */
+void
+wal_flush(void);
+
+/**
+ * Prepare WAL for checkpointing.
  *
+ * This function flushes all pending changes and rotates the
+ * current WAL. The vclock of the last record written to the
+ * rotated WAL is returned in @vclock. This is the vclock that
+ * is supposed to be used to identify the new checkpoint.
  */
 int
-wal_checkpoint(struct vclock *vclock, bool rotate);
+wal_checkpoint(struct vclock *vclock);
 
 /**
  * Remove WAL files that are not needed by consumers reading
-- 
2.11.0

  reply	other threads:[~2018-11-28 16:14 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-28 16:14 [PATCH 0/9] Allow to limit size of WAL files Vladimir Davydov
2018-11-28 16:14 ` Vladimir Davydov [this message]
2018-11-29 16:24   ` [tarantool-patches] Re: [PATCH 1/9] wal: separate checkpoint and flush paths Konstantin Osipov
2018-11-28 16:14 ` [PATCH 2/9] wal: remove files needed for recovery from backup checkpoints on ENOSPC Vladimir Davydov
2018-11-29 16:31   ` [tarantool-patches] " Konstantin Osipov
2018-11-29 17:42     ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 3/9] recovery: restore garbage collector vclock after restart Vladimir Davydov
2018-11-29 16:37   ` [tarantool-patches] " Konstantin Osipov
2018-11-29 17:42     ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 4/9] gc: run garbage collection in background Vladimir Davydov
2018-11-29 16:42   ` [tarantool-patches] " Konstantin Osipov
2018-11-29 17:43     ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 5/9] gc: do not use WAL watcher API for deactivating stale consumers Vladimir Davydov
2018-11-29 17:02   ` [tarantool-patches] " Konstantin Osipov
2018-11-28 16:14 ` [PATCH 6/9] wal: simplify watcher API Vladimir Davydov
2018-11-29 17:33   ` [tarantool-patches] " Konstantin Osipov
2018-11-28 16:14 ` [PATCH 7/9] box: rewrite checkpoint daemon in C Vladimir Davydov
2018-11-30  8:58   ` [tarantool-patches] " Konstantin Osipov
2018-11-30  9:41     ` Vladimir Davydov
2018-12-05 16:21       ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 8/9] wal: pass struct instead of vclock to checkpoint methods Vladimir Davydov
2018-11-30  9:00   ` [tarantool-patches] " Konstantin Osipov
2018-11-30  9:43     ` Vladimir Davydov
2018-12-03 20:20       ` Konstantin Osipov
2018-11-28 16:14 ` [PATCH 9/9] wal: trigger checkpoint if there are too many WALs Vladimir Davydov
2018-12-03 20:34   ` [tarantool-patches] " Konstantin Osipov
2018-12-04 11:25     ` Vladimir Davydov
2018-12-04 12:53       ` Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b0daa530e174da15a2042cc55c554add04ed41ac.1543419109.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 1/9] wal: separate checkpoint and flush paths' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox