From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 1/9] wal: separate checkpoint and flush paths Date: Wed, 28 Nov 2018 19:14:39 +0300 [thread overview] Message-ID: <b0daa530e174da15a2042cc55c554add04ed41ac.1543419109.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com> Currently, wal_checkpoint() is used for two purposes. First, to make a checkpoint (rotate = true). Second, to flush all pending WAL requests (rotate = false). Since checkpointing has to fail if cascading rollback is in progress so does flushing. This is confusing. Let's separate the two paths. While we are at it, let's also rewrite WAL checkpointing using cbus_call instead of cpipe_push as it's a more convenient way of exchanging simple two-hop messages between two threads. --- src/box/box.cc | 5 ++-- src/box/vinyl.c | 5 +--- src/box/wal.c | 91 +++++++++++++++++++++++++++------------------------------ src/box/wal.h | 15 +++++++--- 4 files changed, 57 insertions(+), 59 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index 8d6e966e..5ea2f014 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -2190,10 +2190,9 @@ box_checkpoint() goto end; struct vclock vclock; - if ((rc = wal_checkpoint(&vclock, true))) { - tnt_error(ClientError, ER_CHECKPOINT_ROLLBACK); + if ((rc = wal_checkpoint(&vclock))) goto end; - } + rc = engine_commit_checkpoint(&vclock); end: if (rc) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 1794489d..05df1329 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -1014,10 +1014,7 @@ vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm) * Wait for prepared transactions to complete * (we can't abort them as they reached WAL). */ - struct vclock unused; - if (wal_checkpoint(&unused, false) != 0) - return -1; - + wal_flush(); return 0; } diff --git a/src/box/wal.c b/src/box/wal.c index 11aae5fc..bc396ca5 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -461,29 +461,39 @@ wal_thread_stop() wal_writer_destroy(&wal_writer_singleton); } +void +wal_flush(void) +{ + struct wal_writer *writer = &wal_writer_singleton; + if (writer->wal_mode == WAL_NONE) + return; + cbus_flush(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, NULL); +} + struct wal_checkpoint { - struct cmsg base; - struct vclock *vclock; - struct fiber *fiber; - bool rotate; - int res; + struct cbus_call_msg base; + struct vclock vclock; }; -void -wal_checkpoint_f(struct cmsg *data) +static int +wal_checkpoint_f(struct cbus_call_msg *data) { struct wal_checkpoint *msg = (struct wal_checkpoint *) data; struct wal_writer *writer = &wal_writer_singleton; if (writer->in_rollback.route != NULL) { - /* We're rolling back a failed write. */ - msg->res = -1; - return; + /* + * We're rolling back a failed write and so + * can't make a checkpoint - see the comment + * in wal_checkpoint() for the explanation. + */ + diag_set(ClientError, ER_CHECKPOINT_ROLLBACK); + return -1; } /* * Avoid closing the current WAL if it has no rows (empty). */ - if (msg->rotate && xlog_is_open(&writer->current_wal) && + if (xlog_is_open(&writer->current_wal) && vclock_sum(&writer->current_wal.meta.vclock) != vclock_sum(&writer->vclock)) { @@ -492,53 +502,38 @@ wal_checkpoint_f(struct cmsg *data) * The next WAL will be created on the first write. */ } - vclock_copy(msg->vclock, &writer->vclock); -} - -void -wal_checkpoint_done_f(struct cmsg *data) -{ - struct wal_checkpoint *msg = (struct wal_checkpoint *) data; - fiber_wakeup(msg->fiber); + vclock_copy(&msg->vclock, &writer->vclock); + return 0; } int -wal_checkpoint(struct vclock *vclock, bool rotate) +wal_checkpoint(struct vclock *vclock) { struct wal_writer *writer = &wal_writer_singleton; - if (! stailq_empty(&writer->rollback)) { - /* - * The writer rollback queue is not empty, - * roll back this transaction immediately. - * This is to ensure we do not accidentally - * commit a transaction which has seen changes - * that will be rolled back. - */ - say_error("Aborting transaction %llu during " - "cascading rollback", - vclock_sum(&writer->vclock)); - return -1; - } if (writer->wal_mode == WAL_NONE) { vclock_copy(vclock, &writer->vclock); return 0; } - static struct cmsg_hop wal_checkpoint_route[] = { - {wal_checkpoint_f, &wal_thread.tx_prio_pipe}, - {wal_checkpoint_done_f, NULL}, - }; - vclock_create(vclock); + if (!stailq_empty(&writer->rollback)) { + /* + * If cascading rollback is in progress, in-memory + * indexes can contain changes scheduled for rollback. + * If we made a checkpoint, we could write them to + * the snapshot. So we abort checkpointing in this + * case. + */ + diag_set(ClientError, ER_CHECKPOINT_ROLLBACK); + return -1; + } struct wal_checkpoint msg; - cmsg_init(&msg.base, wal_checkpoint_route); - msg.vclock = vclock; - msg.fiber = fiber(); - msg.rotate = rotate; - msg.res = 0; - cpipe_push(&wal_thread.wal_pipe, &msg.base); - fiber_set_cancellable(false); - fiber_yield(); - fiber_set_cancellable(true); - return msg.res; + bool cancellable = fiber_set_cancellable(false); + int rc = cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, + &msg.base, wal_checkpoint_f, NULL, TIMEOUT_INFINITY); + fiber_set_cancellable(cancellable); + if (rc != 0) + return -1; + vclock_copy(vclock, &msg.vclock); + return 0; } struct wal_gc_msg diff --git a/src/box/wal.h b/src/box/wal.h index e4094b1e..7ca27f1a 100644 --- a/src/box/wal.h +++ b/src/box/wal.h @@ -166,13 +166,20 @@ wal_mode(); /** * Wait till all pending changes to the WAL are flushed. - * Rotates the WAL. - * - * @param[out] vclock WAL vclock + */ +void +wal_flush(void); + +/** + * Prepare WAL for checkpointing. * + * This function flushes all pending changes and rotates the + * current WAL. The vclock of the last record written to the + * rotated WAL is returned in @vclock. This is the vclock that + * is supposed to be used to identify the new checkpoint. */ int -wal_checkpoint(struct vclock *vclock, bool rotate); +wal_checkpoint(struct vclock *vclock); /** * Remove WAL files that are not needed by consumers reading -- 2.11.0
next prev parent reply other threads:[~2018-11-28 16:14 UTC|newest] Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-11-28 16:14 [PATCH 0/9] Allow to limit size of WAL files Vladimir Davydov 2018-11-28 16:14 ` Vladimir Davydov [this message] 2018-11-29 16:24 ` [tarantool-patches] Re: [PATCH 1/9] wal: separate checkpoint and flush paths Konstantin Osipov 2018-11-28 16:14 ` [PATCH 2/9] wal: remove files needed for recovery from backup checkpoints on ENOSPC Vladimir Davydov 2018-11-29 16:31 ` [tarantool-patches] " Konstantin Osipov 2018-11-29 17:42 ` Vladimir Davydov 2018-11-28 16:14 ` [PATCH 3/9] recovery: restore garbage collector vclock after restart Vladimir Davydov 2018-11-29 16:37 ` [tarantool-patches] " Konstantin Osipov 2018-11-29 17:42 ` Vladimir Davydov 2018-11-28 16:14 ` [PATCH 4/9] gc: run garbage collection in background Vladimir Davydov 2018-11-29 16:42 ` [tarantool-patches] " Konstantin Osipov 2018-11-29 17:43 ` Vladimir Davydov 2018-11-28 16:14 ` [PATCH 5/9] gc: do not use WAL watcher API for deactivating stale consumers Vladimir Davydov 2018-11-29 17:02 ` [tarantool-patches] " Konstantin Osipov 2018-11-28 16:14 ` [PATCH 6/9] wal: simplify watcher API Vladimir Davydov 2018-11-29 17:33 ` [tarantool-patches] " Konstantin Osipov 2018-11-28 16:14 ` [PATCH 7/9] box: rewrite checkpoint daemon in C Vladimir Davydov 2018-11-30 8:58 ` [tarantool-patches] " Konstantin Osipov 2018-11-30 9:41 ` Vladimir Davydov 2018-12-05 16:21 ` Vladimir Davydov 2018-11-28 16:14 ` [PATCH 8/9] wal: pass struct instead of vclock to checkpoint methods Vladimir Davydov 2018-11-30 9:00 ` [tarantool-patches] " Konstantin Osipov 2018-11-30 9:43 ` Vladimir Davydov 2018-12-03 20:20 ` Konstantin Osipov 2018-11-28 16:14 ` [PATCH 9/9] wal: trigger checkpoint if there are too many WALs Vladimir Davydov 2018-12-03 20:34 ` [tarantool-patches] " Konstantin Osipov 2018-12-04 11:25 ` Vladimir Davydov 2018-12-04 12:53 ` Konstantin Osipov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=b0daa530e174da15a2042cc55c554add04ed41ac.1543419109.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 1/9] wal: separate checkpoint and flush paths' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox