Tarantool development patches archive
 help / color / mirror / Atom feed
From: Georgy Kirichenko <georgy@tarantool.org>
To: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v4 06/11] wal: extract log write batch into a separate routine
Date: Wed, 12 Feb 2020 12:39:15 +0300	[thread overview]
Message-ID: <1f79b6013791a30b4f70f8679e99be9530e5c0c8.1581500169.git.georgy@tarantool.org> (raw)
In-Reply-To: <cover.1581500169.git.georgy@tarantool.org>

Introduce a routine which transfers journal entries from an input
to an output queue writing them to a xlog file. On xlog output
the routine breaks transferring loop and returns writing result code.
After this the output queue containing entries which were written to
xlog (despite the disk write status) whereas the input queue contains
untouched entries. If an input queue is processed without actual
xlog write then a xlog file is flushed manually.
This refactoring helps to implement wal memory buffer.

Part of #980, #3794
---
 src/box/wal.c | 87 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/src/box/wal.c b/src/box/wal.c
index 0ae66ff32..ce15cb459 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -958,6 +958,36 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
 	}
 }
 
+/*
+ * This function shifts entries from input queue and writes
+ * them to the current log file until the current log flushes
+ * or write error happened. All touched entries are moved to
+ * the output queue. The function returns count of written
+ * bytes or -1 in case of error.
+ */
+static ssize_t
+wal_write_xlog_batch(struct wal_writer *writer, struct stailq *input,
+		     struct stailq *output, struct vclock *vclock_diff)
+{
+	struct xlog *l = &writer->current_wal;
+	ssize_t rc;
+	do {
+		struct journal_entry *entry =
+			stailq_shift_entry(input, struct journal_entry, fifo);
+		stailq_add_tail(output, &entry->fifo);
+
+		wal_assign_lsn(vclock_diff, &writer->vclock,
+			       entry->rows, entry->rows + entry->n_rows);
+		entry->res = vclock_sum(vclock_diff) +
+			     vclock_sum(&writer->vclock);
+		rc = xlog_write_entry(l, entry);
+	} while (rc == 0 && !stailq_empty(input));
+	/* If log was not flushed then flush it explicitly. */
+	if (rc == 0)
+		rc = xlog_flush(l);
+	return rc;
+}
+
 static void
 wal_write_to_disk(struct cmsg *msg)
 {
@@ -1017,36 +1047,31 @@ wal_write_to_disk(struct cmsg *msg)
 	 * of request in xlog file is stored inside `struct journal_entry`.
 	 */
 
-	struct xlog *l = &writer->current_wal;
-
-	/*
-	 * Iterate over requests (transactions)
-	 */
-	int rc;
-	struct journal_entry *entry;
-	struct stailq_entry *last_committed = NULL;
-	stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
-		wal_assign_lsn(&vclock_diff, &writer->vclock,
-			       entry->rows, entry->rows + entry->n_rows);
-		entry->res = vclock_sum(&vclock_diff) +
-			     vclock_sum(&writer->vclock);
-		rc = xlog_write_entry(l, entry);
-		if (rc < 0)
-			goto done;
-		if (rc > 0) {
+	struct stailq input;
+	stailq_create(&input);
+	stailq_concat(&input, &wal_msg->commit);
+	struct stailq output;
+	stailq_create(&output);
+	while (!stailq_empty(&input)) {
+		ssize_t rc = wal_write_xlog_batch(writer, &input, &output,
+						  &vclock_diff);
+		if (rc < 0) {
+			/*
+			 * Put processed entries and tail of write
+			 * queue to a rollback list.
+			 */
+			stailq_concat(&wal_msg->rollback, &output);
+			stailq_concat(&wal_msg->rollback, &input);
+		} else {
+			/*
+			 * Schedule processed entries to commit
+			 * and update the wal vclock.
+			 */
+			stailq_concat(&wal_msg->commit, &output);
 			writer->checkpoint_wal_size += rc;
-			last_committed = &entry->fifo;
 			vclock_merge(&writer->vclock, &vclock_diff);
 		}
-		/* rc == 0: the write is buffered in xlog_tx */
 	}
-	rc = xlog_flush(l);
-	if (rc < 0)
-		goto done;
-
-	writer->checkpoint_wal_size += rc;
-	last_committed = stailq_last(&wal_msg->commit);
-	vclock_merge(&writer->vclock, &vclock_diff);
 
 	/*
 	 * Notify TX if the checkpoint threshold has been exceeded.
@@ -1070,7 +1095,6 @@ wal_write_to_disk(struct cmsg *msg)
 		}
 	}
 
-done:
 	error = diag_last_error(diag_get());
 	if (error) {
 		/* Until we can pass the error to tx, log it and clear. */
@@ -1090,15 +1114,12 @@ done:
 	 * nothing, and need to start rollback from the first
 	 * request. Otherwise we rollback from the first request.
 	 */
-	struct stailq rollback;
-	stailq_cut_tail(&wal_msg->commit, last_committed, &rollback);
-
-	if (!stailq_empty(&rollback)) {
+	if (!stailq_empty(&wal_msg->rollback)) {
+		struct journal_entry *entry;
 		/* Update status of the successfully committed requests. */
-		stailq_foreach_entry(entry, &rollback, fifo)
+		stailq_foreach_entry(entry, &wal_msg->rollback, fifo)
 			entry->res = -1;
 		/* Rollback unprocessed requests */
-		stailq_concat(&wal_msg->rollback, &rollback);
 		wal_writer_begin_rollback(writer);
 	}
 	fiber_gc();
-- 
2.25.0

  parent reply	other threads:[~2020-02-12  9:39 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12  9:39 [Tarantool-patches] [PATCH v4 00/11] Replication from memory Georgy Kirichenko
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 01/11] recovery: do not call recovery_stop_local inside recovery_delete Georgy Kirichenko
2020-03-19  7:55   ` Konstantin Osipov
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 02/11] recovery: do not throw an error Georgy Kirichenko
2020-03-19  7:56   ` Konstantin Osipov
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 03/11] coio: do not allow parallel usage of coio Georgy Kirichenko
2020-03-19 18:09   ` Konstantin Osipov
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 04/11] coio: do not throw an error, minor refactoring Georgy Kirichenko
2020-03-23  6:59   ` Konstantin Osipov
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 05/11] xstream: get rid of an exception Georgy Kirichenko
2020-02-12  9:39 ` Georgy Kirichenko [this message]
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 07/11] wal: matrix clock structure Georgy Kirichenko
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 08/11] wal: track relay vclock and collect logs in wal thread Georgy Kirichenko
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 09/11] wal: xrow memory buffer and cursor Georgy Kirichenko
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 10/11] wal: use a xrow buffer object for entry encoding Georgy Kirichenko
2020-02-12  9:39 ` [Tarantool-patches] [PATCH v4 11/11] replication: use wal memory buffer to fetch rows Georgy Kirichenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1f79b6013791a30b4f70f8679e99be9530e5c0c8.1581500169.git.georgy@tarantool.org \
    --to=georgy@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v4 06/11] wal: extract log write batch into a separate routine' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox