[tarantool-patches] [PATCH 09/10] Enable asyncronous wal writes

Georgy Kirichenko georgy at tarantool.org
Fri Apr 19 15:44:05 MSK 2019


Allow to send a journal entry to wal without wait until the writing
was finished. Two methods were introduced:
 * async_write method emits an entry to be written, returns 0 if the
 entry was successfully scheduled;
 * async_wait method waits until writing was finished and returns a
 result of journal write.

Prerequisites: #1254
---
 src/box/box.cc    | 21 ++++++++++++++++++++-
 src/box/journal.c | 18 ++++++++++++++++++
 src/box/journal.h | 30 ++++++++++++++++++++++++++++++
 src/box/wal.c     | 42 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 103 insertions(+), 8 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 46cd444fd..88be886f3 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -311,10 +311,29 @@ recovery_journal_write(struct journal *base,
 	return vclock_sum(journal->vclock);
 }
 
+static int64_t
+recovery_journal_async_write(struct journal *base,
+			     struct journal_entry * /* entry */)
+{
+	(void) base;
+	return 0;
+}
+
+static int64_t
+recovery_journal_async_wait(struct journal *base,
+			    struct journal_entry * /* entry */)
+{
+	struct recovery_journal *journal = (struct recovery_journal *) base;
+	return vclock_sum(journal->vclock);
+}
+
 static inline void
 recovery_journal_create(struct recovery_journal *journal, struct vclock *v)
 {
-	journal_create(&journal->base, recovery_journal_write, NULL);
+	journal_create(&journal->base, recovery_journal_write,
+		       recovery_journal_async_write,
+		       recovery_journal_async_wait,
+		       NULL);
 	journal->vclock = v;
 }
 
diff --git a/src/box/journal.c b/src/box/journal.c
index b0f4d48b5..7ccbd8594 100644
--- a/src/box/journal.c
+++ b/src/box/journal.c
@@ -44,8 +44,26 @@ dummy_journal_write(struct journal *journal, struct journal_entry *entry)
 	return 0;
 }
 
+static int64_t
+dummy_async_write(struct journal *journal, struct journal_entry *entry)
+{
+	(void) journal;
+	(void) entry;
+	return 0;
+}
+
+static int64_t
+dummy_async_wait(struct journal *journal, struct journal_entry *entry)
+{
+	(void) journal;
+	(void) entry;
+	return 0;
+}
+
 static struct journal dummy_journal = {
 	dummy_journal_write,
+	dummy_async_write,
+	dummy_async_wait,
 	NULL,
 };
 
diff --git a/src/box/journal.h b/src/box/journal.h
index 4a2fb3585..0292d77f3 100644
--- a/src/box/journal.h
+++ b/src/box/journal.h
@@ -112,6 +112,10 @@ journal_entry_on_error(struct journal_entry *entry, struct trigger *trigger)
 struct journal {
 	int64_t (*write)(struct journal *journal,
 			 struct journal_entry *req);
+	int64_t (*async_write)(struct journal *journal,
+			       struct journal_entry *req);
+	int64_t (*async_wait)(struct journal *journal,
+			      struct journal_entry *req);
 	void (*destroy)(struct journal *journal);
 };
 
@@ -139,6 +143,28 @@ journal_write(struct journal_entry *entry)
 	return current_journal->write(current_journal, entry);
 }
 
+/**
+ * Send a single entry to write.
+ *
+ * @return   0 if write was scheduled or -1 on error.
+ */
+static inline int64_t
+journal_async_write(struct journal_entry *entry)
+{
+	return current_journal->async_write(current_journal, entry);
+}
+
+/**
+ * Wait until entry processing finished.
+ * @return   a log sequence number (vclock signature) of the entry
+ *           or -1 on error.
+ */
+static inline int64_t
+journal_async_wait(struct journal_entry *entry)
+{
+	return current_journal->async_wait(current_journal, entry);
+}
+
 /**
  * Change the current implementation of the journaling API.
  * Happens during life cycle of an instance:
@@ -171,9 +197,13 @@ journal_set(struct journal *new_journal)
 static inline void
 journal_create(struct journal *journal,
 	       int64_t (*write)(struct journal *, struct journal_entry *),
+	       int64_t (*async_write)(struct journal *, struct journal_entry *),
+	       int64_t (*async_wait)(struct journal *, struct journal_entry *),
 	       void (*destroy)(struct journal *))
 {
 	journal->write = write;
+	journal->async_write = async_write,
+	journal->async_wait = async_wait,
 	journal->destroy = destroy;
 }
 
diff --git a/src/box/wal.c b/src/box/wal.c
index f0352e938..39b049b06 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -63,6 +63,12 @@ int wal_dir_lock = -1;
 static int64_t
 wal_write(struct journal *, struct journal_entry *);
 
+static int64_t
+wal_async_write(struct journal *, struct journal_entry *);
+
+static int64_t
+wal_async_wait(struct journal *, struct journal_entry *);
+
 static int64_t
 wal_write_in_wal_mode_none(struct journal *, struct journal_entry *);
 
@@ -362,7 +368,10 @@ wal_writer_create(struct wal_writer *writer, enum wal_mode wal_mode,
 	writer->wal_max_rows = wal_max_rows;
 	writer->wal_max_size = wal_max_size;
 	journal_create(&writer->base, wal_mode == WAL_NONE ?
-		       wal_write_in_wal_mode_none : wal_write, NULL);
+		       wal_write_in_wal_mode_none : wal_write,
+		       wal_mode == WAL_NONE ?
+		       wal_write_in_wal_mode_none: wal_async_write,
+		       wal_async_wait, NULL);
 
 	struct xlog_opts opts = xlog_opts_default;
 	opts.sync_is_async = true;
@@ -1135,12 +1144,8 @@ on_wal_write_done(struct trigger *trigger, void *event)
 	fiber_cond_signal(cond);
 }
 
-/**
- * WAL writer main entry point: queue a single request
- * to be written to disk and wait until this task is completed.
- */
 int64_t
-wal_write(struct journal *journal, struct journal_entry *entry)
+wal_async_write(struct journal *journal, struct journal_entry *entry)
 {
 	struct wal_writer *writer = (struct wal_writer *) journal;
 
@@ -1185,6 +1190,15 @@ wal_write(struct journal *journal, struct journal_entry *entry)
 	batch->approx_len += entry->approx_len;
 	writer->wal_pipe.n_input += entry->n_rows * XROW_IOVMAX;
 	cpipe_flush_input(&writer->wal_pipe);
+	return 0;
+}
+
+int64_t
+wal_async_wait(struct journal *journal, struct journal_entry *entry)
+{
+	(void) journal;
+	if (entry->done)
+		return entry->res;
 
 	struct fiber_cond done_cond;
 	fiber_cond_create(&done_cond);
@@ -1201,6 +1215,18 @@ wal_write(struct journal *journal, struct journal_entry *entry)
 	return entry->res;
 }
 
+/**
+ * WAL writer main entry point: queue a single request
+ * to be written to disk and wait until this task is completed.
+ */
+int64_t
+wal_write(struct journal *journal, struct journal_entry *entry)
+{
+	if (wal_async_write(journal, entry) != 0)
+		return -1;
+	return wal_async_wait(journal, entry);
+}
+
 int64_t
 wal_write_in_wal_mode_none(struct journal *journal,
 			   struct journal_entry *entry)
@@ -1212,7 +1238,9 @@ wal_write_in_wal_mode_none(struct journal *journal,
 		       entry->rows + entry->n_rows);
 	vclock_merge(&writer->vclock, &vclock_diff);
 	vclock_copy(&replicaset.vclock, &writer->vclock);
-	return vclock_sum(&writer->vclock);
+	entry->done = true;
+	entry->res = vclock_sum(&writer->vclock);
+	return entry->res;
 }
 
 void
-- 
2.21.0





More information about the Tarantool-patches mailing list