From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 08/13] gc: keep track of available checkpoints Date: Thu, 4 Oct 2018 20:20:10 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Currently, the checkpoint iterator is in fact a wrapper around memtx_engine::snap_dir while the garbage collector knows nothing about checkpoints. This feels like encapsulation violation. Let's keep track of all available checkpoints right in the garbage collector instead and export gc_ API to iterate over checkpoints. --- src/box/CMakeLists.txt | 1 - src/box/box.cc | 50 ++++++++++++++------------ src/box/checkpoint.c | 72 ------------------------------------- src/box/checkpoint.h | 97 -------------------------------------------------- src/box/gc.c | 84 ++++++++++++++++++++++++++++++++++--------- src/box/gc.h | 68 ++++++++++++++++++++++++++++++----- src/box/lua/info.c | 10 ++---- src/box/memtx_engine.c | 7 ++++ src/box/vinyl.c | 3 +- src/box/vy_scheduler.c | 1 - 10 files changed, 167 insertions(+), 226 deletions(-) delete mode 100644 src/box/checkpoint.c delete mode 100644 src/box/checkpoint.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 67750898..52413d3c 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -102,7 +102,6 @@ add_library(box STATIC txn.c box.cc gc.c - checkpoint.c user_def.c user.cc authentication.cc diff --git a/src/box/box.cc b/src/box/box.cc index 49deea61..a33b80ef 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -68,7 +68,6 @@ #include "authentication.h" #include "path_lock.h" #include "gc.h" -#include "checkpoint.h" #include "systemd.h" #include "call.h" #include "func.h" @@ -1446,17 +1445,20 @@ box_process_join(struct ev_io *io, struct xrow_header *header) "wal_mode = 'none'"); } - /* Remember start vclock. */ - struct vclock start_vclock; /* * The only case when the directory index is empty is * when someone has deleted a snapshot and tries to join * as a replica. Our best effort is to not crash in such * case: raise ER_MISSING_SNAPSHOT. */ - if (checkpoint_last(&start_vclock) < 0) + struct gc_checkpoint *checkpoint = gc_last_checkpoint(); + if (checkpoint == NULL) tnt_raise(ClientError, ER_MISSING_SNAPSHOT); + /* Remember start vclock. */ + struct vclock start_vclock; + vclock_copy(&start_vclock, &checkpoint->vclock); + /* Register the replica with the garbage collector. */ struct gc_consumer *gc = gc_consumer_register(&start_vclock, GC_CONSUMER_WAL, "replica %s", tt_uuid_str(&instance_uuid)); @@ -1725,6 +1727,8 @@ bootstrap_master(const struct tt_uuid *replicaset_uuid) if (engine_begin_checkpoint() || engine_commit_checkpoint(&replicaset.vclock)) panic("failed to create a checkpoint"); + + gc_add_checkpoint(&replicaset.vclock); } /** @@ -1785,6 +1789,8 @@ bootstrap_from_master(struct replica *master) if (engine_begin_checkpoint() || engine_commit_checkpoint(&replicaset.vclock)) panic("failed to create a checkpoint"); + + gc_add_checkpoint(&replicaset.vclock); } /** @@ -2036,8 +2042,7 @@ box_cfg_xc(void) xstream_create(&join_stream, apply_initial_join_row); xstream_create(&subscribe_stream, apply_row); - struct vclock last_checkpoint_vclock; - int64_t last_checkpoint_lsn = checkpoint_last(&last_checkpoint_vclock); + struct gc_checkpoint *checkpoint = gc_last_checkpoint(); /* * Lock the write ahead log directory to avoid multiple @@ -2051,14 +2056,14 @@ box_cfg_xc(void) * refuse to start. In hot standby mode, a busy * WAL dir must contain at least one xlog. */ - if (!cfg_geti("hot_standby") || last_checkpoint_lsn < 0) + if (!cfg_geti("hot_standby") || checkpoint == NULL) tnt_raise(ClientError, ER_ALREADY_RUNNING, cfg_gets("wal_dir")); } bool is_bootstrap_leader = false; - if (last_checkpoint_lsn >= 0) { + if (checkpoint != NULL) { /* Recover the instance from the local directory */ local_recovery(&instance_uuid, &replicaset_uuid, - &last_checkpoint_vclock); + &checkpoint->vclock); } else { /* Bootstrap a new master */ bootstrap(&instance_uuid, &replicaset_uuid, @@ -2151,7 +2156,8 @@ end: if (rc) engine_abort_checkpoint(); else - gc_run(); + gc_add_checkpoint(&vclock); + latch_unlock(&schema_lock); box_checkpoint_is_in_progress = false; return rc; @@ -2165,20 +2171,20 @@ box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg) diag_set(ClientError, ER_BACKUP_IN_PROGRESS); return -1; } - const struct vclock *vclock; - struct checkpoint_iterator it; - checkpoint_iterator_init(&it); - do { - vclock = checkpoint_iterator_prev(&it); - if (vclock == NULL) { - diag_set(ClientError, ER_MISSING_SNAPSHOT); - return -1; - } - } while (checkpoint_idx-- > 0); - backup_gc = gc_consumer_register(vclock, GC_CONSUMER_ALL, "backup"); + struct gc_checkpoint *checkpoint; + gc_foreach_checkpoint_reverse(checkpoint) { + if (checkpoint_idx-- == 0) + break; + } + if (checkpoint_idx >= 0) { + diag_set(ClientError, ER_MISSING_SNAPSHOT); + return -1; + } + backup_gc = gc_consumer_register(&checkpoint->vclock, + GC_CONSUMER_ALL, "backup"); if (backup_gc == NULL) return -1; - int rc = engine_backup(vclock, cb, cb_arg); + int rc = engine_backup(&checkpoint->vclock, cb, cb_arg); if (rc != 0) { gc_consumer_unregister(backup_gc); backup_gc = NULL; diff --git a/src/box/checkpoint.c b/src/box/checkpoint.c deleted file mode 100644 index cc32e75c..00000000 --- a/src/box/checkpoint.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "checkpoint.h" - -#include -#include - -#include "engine.h" -#include "memtx_engine.h" - -int64_t -checkpoint_last(struct vclock *vclock) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - assert(memtx != NULL); - return xdir_last_vclock(&memtx->snap_dir, vclock); -} - -const struct vclock * -checkpoint_iterator_next(struct checkpoint_iterator *it) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - assert(memtx != NULL); - it->curr = it->curr == NULL ? - vclockset_first(&memtx->snap_dir.index) : - vclockset_next(&memtx->snap_dir.index, - (struct vclock *)it->curr); - return it->curr; -} - -const struct vclock * -checkpoint_iterator_prev(struct checkpoint_iterator *it) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - assert(memtx != NULL); - it->curr = it->curr == NULL ? - vclockset_last(&memtx->snap_dir.index) : - vclockset_prev(&memtx->snap_dir.index, - (struct vclock *)it->curr); - return it->curr; -} diff --git a/src/box/checkpoint.h b/src/box/checkpoint.h deleted file mode 100644 index 00a1e705..00000000 --- a/src/box/checkpoint.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef TARANTOOL_BOX_CHECKPOINT_H_INCLUDED -#define TARANTOOL_BOX_CHECKPOINT_H_INCLUDED -/* - * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - -/** - * This module implements a simple API for working with checkpoints. - * As checkpoints are, in fact, memtx snapshots, functions exported - * by this module are C wrappers around corresponding memtx_engine - * methods. - */ - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -struct vclock; - -/** - * Return LSN and vclock (unless @vclock is NULL) of the most - * recent checkpoint or -1 if there is no checkpoint. - */ -int64_t -checkpoint_last(struct vclock *vclock); - -/** Iterator over all existing checkpoints. */ -struct checkpoint_iterator { - const struct vclock *curr; -}; - -/** - * Init a checkpoint iterator. The iterator is valid as long - * as the caller doesn't yield. - */ -static inline void -checkpoint_iterator_init(struct checkpoint_iterator *it) -{ - it->curr = NULL; -} - -/** - * Iterate to the next checkpoint. Return NULL if the current - * checkpoint is the most recent one. - * - * If called on the last iteration, this function positions - * the iterator to the oldest checkpoint. - */ -const struct vclock * -checkpoint_iterator_next(struct checkpoint_iterator *it); - -/** - * Iterate to the previous checkpoint. Return NULL if the current - * checkpoint is the oldest one. - * - * If called on the first iteration, this function positions - * the iterator to the newest checkpoint. - */ -const struct vclock * -checkpoint_iterator_prev(struct checkpoint_iterator *it); - -#if defined(__cplusplus) -} /* extern "C" */ -#endif /* defined(__cplusplus) */ - -#endif /* TARANTOOL_BOX_CHECKPOINT_H_INCLUDED */ diff --git a/src/box/gc.c b/src/box/gc.c index dca278af..e6951f4c 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -32,7 +32,10 @@ #include +#include +#include #include +#include #include #include #include @@ -45,7 +48,6 @@ #include "say.h" #include "latch.h" #include "vclock.h" -#include "checkpoint.h" #include "engine.h" /* engine_collect_garbage() */ #include "wal.h" /* wal_collect_garbage() */ @@ -75,6 +77,10 @@ rb_gen(MAYBE_UNUSED static inline, gc_tree_, gc_tree_t, void gc_init(void) { + /* Don't delete any files until recovery is complete. */ + gc.min_checkpoint_count = INT_MAX; + + rlist_create(&gc.checkpoints); vclock_create(&gc.wal_vclock); vclock_create(&gc.checkpoint_vclock); gc_tree_new(&gc.consumers); @@ -84,6 +90,12 @@ gc_init(void) void gc_free(void) { + /* Free checkpoints. */ + struct gc_checkpoint *checkpoint, *next_checkpoint; + rlist_foreach_entry_safe(checkpoint, &gc.checkpoints, in_checkpoints, + next_checkpoint) { + free(checkpoint); + } /* Free all registered consumers. */ struct gc_consumer *consumer = gc_tree_first(&gc.consumers); while (consumer != NULL) { @@ -105,12 +117,14 @@ gc_tree_first_checkpoint(gc_tree_t *consumers) return consumer; } -void +/** + * Invoke garbage collection in order to remove files left + * from old checkpoints. The number of checkpoints saved by + * this function is specified by box.cfg.checkpoint_count. + */ +static void gc_run(void) { - int min_checkpoint_count = gc.min_checkpoint_count; - assert(min_checkpoint_count > 0); - /* Look up the consumer that uses the oldest WAL. */ struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); /* Look up the consumer that uses the oldest checkpoint. */ @@ -126,20 +140,25 @@ gc_run(void) struct vclock gc_checkpoint_vclock; vclock_create(&gc_checkpoint_vclock); - struct checkpoint_iterator checkpoints; - checkpoint_iterator_init(&checkpoints); - - const struct vclock *vclock; - while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) { - if (--min_checkpoint_count > 0) - continue; + struct gc_checkpoint *checkpoint = NULL; + while (true) { + checkpoint = rlist_first_entry(&gc.checkpoints, + struct gc_checkpoint, in_checkpoints); + vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock); + if (gc.checkpoint_count <= gc.min_checkpoint_count) + break; if (leftmost_checkpoint != NULL && - vclock_sum(&leftmost_checkpoint->vclock) < vclock_sum(vclock)) - continue; - vclock_copy(&gc_checkpoint_vclock, vclock); - break; + vclock_sum(&checkpoint->vclock) >= + vclock_sum(&leftmost_checkpoint->vclock)) + break; /* checkpoint is in use */ + rlist_del_entry(checkpoint, in_checkpoints); + free(checkpoint); + gc.checkpoint_count--; } + /* At least one checkpoint must always be available. */ + assert(checkpoint != NULL); + struct vclock gc_wal_vclock; if (leftmost != NULL && vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock)) @@ -186,6 +205,39 @@ gc_set_min_checkpoint_count(int min_checkpoint_count) gc.min_checkpoint_count = min_checkpoint_count; } +void +gc_add_checkpoint(const struct vclock *vclock) +{ + struct gc_checkpoint *last_checkpoint = gc_last_checkpoint(); + if (last_checkpoint != NULL && + vclock_sum(&last_checkpoint->vclock) == vclock_sum(vclock)) { + /* + * No new checkpoint was actually created. + * Rerun the garbage collector to delete old + * files in case box.cfg.checkpoint_count + * was changed. + */ + gc_run(); + return; + } + assert(last_checkpoint == NULL || + vclock_sum(&last_checkpoint->vclock) < vclock_sum(vclock)); + + struct gc_checkpoint *checkpoint = calloc(1, sizeof(*checkpoint)); + /* + * This function is called after a checkpoint is written + * to disk so it can't fail. + */ + if (checkpoint == NULL) + panic("out of memory"); + + vclock_copy(&checkpoint->vclock, vclock); + rlist_add_tail_entry(&gc.checkpoints, checkpoint, in_checkpoints); + gc.checkpoint_count++; + + gc_run(); +} + struct gc_consumer * gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type, const char *format, ...) diff --git a/src/box/gc.h b/src/box/gc.h index 418f8d5e..e26b1017 100644 --- a/src/box/gc.h +++ b/src/box/gc.h @@ -32,6 +32,7 @@ */ #include +#include #include "vclock.h" #include "latch.h" @@ -55,6 +56,17 @@ enum gc_consumer_type { typedef rb_node(struct gc_consumer) gc_node_t; /** + * Garbage collector keeps track of all preserved checkpoints. + * The following structure represents a checkpoint. + */ +struct gc_checkpoint { + /** Link in gc_state::checkpoints. */ + struct rlist in_checkpoints; + /** VClock of the checkpoint. */ + struct vclock vclock; +}; + +/** * The object of this type is used to prevent garbage * collection from removing files that are still in use. */ @@ -80,6 +92,17 @@ struct gc_state { * Configured by box.cfg.checkpoint_count. */ int min_checkpoint_count; + /** + * Number of preserved checkpoints. May be greater than + * @min_checkpoint_count, because some checkpoints may + * be in use by replication or backup. + */ + int checkpoint_count; + /** + * List of preserved checkpoints. New checkpoints are added + * to the tail. Linked by gc_checkpoint::in_checkpoints. + */ + struct rlist checkpoints; /** Max vclock WAL garbage collection has been called for. */ struct vclock wal_vclock; /** Max vclock checkpoint garbage collection has been called for. */ @@ -95,6 +118,35 @@ struct gc_state { extern struct gc_state gc; /** + * Iterate over all checkpoints tracked by the garbage collector, + * starting from the oldest and ending with the newest. + */ +#define gc_foreach_checkpoint(checkpoint) \ + rlist_foreach_entry(checkpoint, &gc.checkpoints, in_checkpoints) + +/** + * Iterate over all checkpoints tracked by the garbage collector + * in the reverse order, that is starting from the newest and + * ending with the oldest. + */ +#define gc_foreach_checkpoint_reverse(checkpoint) \ + rlist_foreach_entry_reverse(checkpoint, &gc.checkpoints, in_checkpoints) + +/** + * Return the last (newest) checkpoint known to the garbage + * collector. If there's no checkpoint, return NULL. + */ +static inline struct gc_checkpoint * +gc_last_checkpoint(void) +{ + if (rlist_empty(&gc.checkpoints)) + return NULL; + + return rlist_last_entry(&gc.checkpoints, struct gc_checkpoint, + in_checkpoints); +} + +/** * Initialize the garbage collection state. */ void @@ -107,14 +159,6 @@ void gc_free(void); /** - * Invoke garbage collection in order to remove files left - * from old checkpoints. The number of checkpoints saved by - * this function is specified by box.cfg.checkpoint_count. - */ -void -gc_run(void); - -/** * Update the minimal number of checkpoints to preserve. * Called when box.cfg.checkpoint_count is updated. * @@ -126,6 +170,14 @@ void gc_set_min_checkpoint_count(int min_checkpoint_count); /** + * Track a new checkpoint in the garbage collector state. + * Note, this function may run garbage collector to remove + * old checkpoints. + */ +void +gc_add_checkpoint(const struct vclock *vclock); + +/** * Register a consumer. * * This will stop garbage collection of objects newer than diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 85b21c65..97d5aba3 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -47,7 +47,6 @@ #include "box/replication.h" #include "box/info.h" #include "box/gc.h" -#include "box/checkpoint.h" #include "box/engine.h" #include "box/vinyl.h" #include "main.h" @@ -363,22 +362,19 @@ static int lbox_info_gc_call(struct lua_State *L) { int count; - const struct vclock *vclock; lua_newtable(L); lua_pushstring(L, "checkpoints"); lua_newtable(L); - struct checkpoint_iterator checkpoints; - checkpoint_iterator_init(&checkpoints); - count = 0; - while ((vclock = checkpoint_iterator_next(&checkpoints)) != NULL) { + struct gc_checkpoint *checkpoint; + gc_foreach_checkpoint(checkpoint) { lua_createtable(L, 0, 1); lua_pushstring(L, "signature"); - luaL_pushint64(L, vclock_sum(vclock)); + luaL_pushint64(L, vclock_sum(&checkpoint->vclock)); lua_settable(L, -3); lua_rawseti(L, -2, ++count); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 1f80ce54..ae1f5a0e 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -1046,6 +1046,13 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, xlog_cursor_close(&cursor, false); } + /* Apprise the garbage collector of available checkpoints. */ + for (struct vclock *vclock = vclockset_first(&memtx->snap_dir.index); + vclock != NULL; + vclock = vclockset_next(&memtx->snap_dir.index, vclock)) { + gc_add_checkpoint(vclock); + } + stailq_create(&memtx->gc_queue); memtx->gc_fiber = fiber_new("memtx.gc", memtx_engine_gc_f); if (memtx->gc_fiber == NULL) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index fd24f9b5..acfe86d1 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -70,7 +70,6 @@ #include "info.h" #include "column_mask.h" #include "trigger.h" -#include "checkpoint.h" #include "session.h" #include "wal.h" /* wal_mode() */ @@ -3304,7 +3303,7 @@ vinyl_engine_collect_garbage(struct engine *engine, int64_t lsn) vy_log_collect_garbage(lsn); /* Cleanup run files. */ - int64_t signature = checkpoint_last(NULL); + int64_t signature = vy_log_signature(); struct vy_recovery *recovery = vy_recovery_new(signature, 0); if (recovery == NULL) { say_error("failed to recover vylog for garbage collection"); diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 2f85424a..eab3f6c5 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -39,7 +39,6 @@ #include #include -#include "checkpoint.h" #include "diag.h" #include "errcode.h" #include "errinj.h" -- 2.11.0