From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 6D4BA2F2D8 for ; Thu, 23 May 2019 04:21:40 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id E8LV94vhb-mX for ; Thu, 23 May 2019 04:21:40 -0400 (EDT) Received: from smtp37.i.mail.ru (smtp37.i.mail.ru [94.100.177.97]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 573832F294 for ; Thu, 23 May 2019 04:19:47 -0400 (EDT) From: Georgy Kirichenko Subject: [tarantool-patches] [PATCH v2 7/8] Enable asyncronous wal writes Date: Thu, 23 May 2019 11:19:39 +0300 Message-Id: <3dc25e9c195d551ce27fbaf8446a80ddcfca8a07.1558598679.git.georgy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: Georgy Kirichenko Allow to send a journal entry to wal without wait until the writing was finished. Two methods were introduced: * async_write method emits an entry to be written, returns 0 if the entry was successfully scheduled; * async_wait method waits until writing was finished and returns a result of journal write. Prerequisites: #1254 --- src/box/box.cc | 21 ++++++++++++++++++++- src/box/journal.c | 18 ++++++++++++++++++ src/box/journal.h | 30 ++++++++++++++++++++++++++++++ src/box/wal.c | 42 +++++++++++++++++++++++++++++++++++------- 4 files changed, 103 insertions(+), 8 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index b8ef4b9ed..95c406ea7 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -320,10 +320,29 @@ recovery_journal_write(struct journal *base, return vclock_sum(journal->vclock); } +static int64_t +recovery_journal_async_write(struct journal *base, + struct journal_entry * /* entry */) +{ + (void) base; + return 0; +} + +static int64_t +recovery_journal_async_wait(struct journal *base, + struct journal_entry * /* entry */) +{ + struct recovery_journal *journal = (struct recovery_journal *) base; + return vclock_sum(journal->vclock); +} + static inline void recovery_journal_create(struct recovery_journal *journal, struct vclock *v) { - journal_create(&journal->base, recovery_journal_write, NULL); + journal_create(&journal->base, recovery_journal_write, + recovery_journal_async_write, + recovery_journal_async_wait, + NULL); journal->vclock = v; } diff --git a/src/box/journal.c b/src/box/journal.c index 8d213d57e..4a1d38dd5 100644 --- a/src/box/journal.c +++ b/src/box/journal.c @@ -45,8 +45,26 @@ dummy_journal_write(struct journal *journal, struct journal_entry *entry) return 0; } +static int64_t +dummy_async_write(struct journal *journal, struct journal_entry *entry) +{ + (void) journal; + (void) entry; + return 0; +} + +static int64_t +dummy_async_wait(struct journal *journal, struct journal_entry *entry) +{ + (void) journal; + (void) entry; + return 0; +} + static struct journal dummy_journal = { dummy_journal_write, + dummy_async_write, + dummy_async_wait, NULL, }; diff --git a/src/box/journal.h b/src/box/journal.h index 5e1323464..978942a83 100644 --- a/src/box/journal.h +++ b/src/box/journal.h @@ -97,6 +97,10 @@ journal_entry_new(size_t n_rows, struct region *region); struct journal { int64_t (*write)(struct journal *journal, struct journal_entry *req); + int64_t (*async_write)(struct journal *journal, + struct journal_entry *req); + int64_t (*async_wait)(struct journal *journal, + struct journal_entry *req); void (*destroy)(struct journal *journal); }; @@ -124,6 +128,28 @@ journal_write(struct journal_entry *entry) return current_journal->write(current_journal, entry); } +/** + * Send a single entry to write. + * + * @return 0 if write was scheduled or -1 on error. + */ +static inline int64_t +journal_async_write(struct journal_entry *entry) +{ + return current_journal->async_write(current_journal, entry); +} + +/** + * Wait until entry processing finished. + * @return a log sequence number (vclock signature) of the entry + * or -1 on error. + */ +static inline int64_t +journal_async_wait(struct journal_entry *entry) +{ + return current_journal->async_wait(current_journal, entry); +} + /** * Change the current implementation of the journaling API. * Happens during life cycle of an instance: @@ -156,9 +182,13 @@ journal_set(struct journal *new_journal) static inline void journal_create(struct journal *journal, int64_t (*write)(struct journal *, struct journal_entry *), + int64_t (*async_write)(struct journal *, struct journal_entry *), + int64_t (*async_wait)(struct journal *, struct journal_entry *), void (*destroy)(struct journal *)) { journal->write = write; + journal->async_write = async_write, + journal->async_wait = async_wait, journal->destroy = destroy; } diff --git a/src/box/wal.c b/src/box/wal.c index 4b0a7c802..d27dbff59 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -63,6 +63,12 @@ int wal_dir_lock = -1; static int64_t wal_write(struct journal *, struct journal_entry *); +static int64_t +wal_async_write(struct journal *, struct journal_entry *); + +static int64_t +wal_async_wait(struct journal *, struct journal_entry *); + static int64_t wal_write_in_wal_mode_none(struct journal *, struct journal_entry *); @@ -358,7 +364,10 @@ wal_writer_create(struct wal_writer *writer, enum wal_mode wal_mode, writer->wal_max_rows = wal_max_rows; writer->wal_max_size = wal_max_size; journal_create(&writer->base, wal_mode == WAL_NONE ? - wal_write_in_wal_mode_none : wal_write, NULL); + wal_write_in_wal_mode_none : wal_write, + wal_mode == WAL_NONE ? + wal_write_in_wal_mode_none: wal_async_write, + wal_async_wait, NULL); struct xlog_opts opts = xlog_opts_default; opts.sync_is_async = true; @@ -1131,12 +1140,8 @@ on_wal_write_done(struct trigger *trigger, void *event) fiber_cond_signal(cond); } -/** - * WAL writer main entry point: queue a single request - * to be written to disk and wait until this task is completed. - */ int64_t -wal_write(struct journal *journal, struct journal_entry *entry) +wal_async_write(struct journal *journal, struct journal_entry *entry) { struct wal_writer *writer = (struct wal_writer *) journal; @@ -1181,6 +1186,15 @@ wal_write(struct journal *journal, struct journal_entry *entry) batch->approx_len += entry->approx_len; writer->wal_pipe.n_input += entry->n_rows * XROW_IOVMAX; cpipe_flush_input(&writer->wal_pipe); + return 0; +} + +int64_t +wal_async_wait(struct journal *journal, struct journal_entry *entry) +{ + (void) journal; + if (entry->done) + return entry->res; struct fiber_cond done_cond; fiber_cond_create(&done_cond); @@ -1197,6 +1211,18 @@ wal_write(struct journal *journal, struct journal_entry *entry) return entry->res; } +/** + * WAL writer main entry point: queue a single request + * to be written to disk and wait until this task is completed. + */ +int64_t +wal_write(struct journal *journal, struct journal_entry *entry) +{ + if (wal_async_write(journal, entry) != 0) + return -1; + return wal_async_wait(journal, entry); +} + int64_t wal_write_in_wal_mode_none(struct journal *journal, struct journal_entry *entry) @@ -1208,7 +1234,9 @@ wal_write_in_wal_mode_none(struct journal *journal, entry->rows + entry->n_rows); vclock_merge(&writer->vclock, &vclock_diff); vclock_copy(&replicaset.vclock, &writer->vclock); - return vclock_sum(&writer->vclock); + entry->done = true; + entry->res = vclock_sum(&writer->vclock); + return entry->res; } void -- 2.21.0