From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 1/2] gc: do not abort garbage collection if failed to unlink snap file Date: Fri, 25 Jan 2019 17:34:48 +0300 Message-Id: <783662fb698347f972a57b925bf13f227b710d63.1548425270.git.vdavydov.dev@gmail.com> To: tarantool-patches@freelists.org List-ID: We build the checkpoint list from the list of memtx snap files. So to ensure that it is always possible to recover from any checkpoint present in box.info.gc() output, we abort garbage collection if we fail to unlink a snap file. This introduces extra complexity to the garbage collection code, which makes it difficult to make WAL file removal fully asynchronous. Actually, it looks like we are being way too overcautious here, because unlink() doesn't normally fail so an error while removing a snap file is highly unlikely to occur. Besides, even if it happens, it still won't be critical, because we never delete the last checkpoint, which is usually used for backups/recovery. So let's simplify the code by removing that check. Needed for #3938 --- https://github.com/tarantool/tarantool/issues/3938 https://github.com/tarantool/tarantool/commits/dv/gh-3938-async-wal-file-removal src/box/engine.c | 12 ++++-------- src/box/engine.h | 8 ++++---- src/box/gc.c | 17 +++++++++++------ src/box/memtx_engine.c | 17 +++-------------- src/box/vinyl.c | 5 ++--- src/box/wal.c | 12 +----------- src/box/xlog.c | 7 +------ src/box/xlog.h | 2 +- 8 files changed, 27 insertions(+), 53 deletions(-) diff --git a/src/box/engine.c b/src/box/engine.c index 409479a3..d0a08f5c 100644 --- a/src/box/engine.c +++ b/src/box/engine.c @@ -147,15 +147,12 @@ engine_abort_checkpoint(void) engine->vtab->abort_checkpoint(engine); } -int +void engine_collect_garbage(const struct vclock *vclock) { struct engine *engine; - engine_foreach(engine) { - if (engine->vtab->collect_garbage(engine, vclock) < 0) - return -1; - } - return 0; + engine_foreach(engine) + engine->vtab->collect_garbage(engine, vclock); } int @@ -316,13 +313,12 @@ generic_engine_abort_checkpoint(struct engine *engine) (void)engine; } -int +void generic_engine_collect_garbage(struct engine *engine, const struct vclock *vclock) { (void)engine; (void)vclock; - return 0; } int diff --git a/src/box/engine.h b/src/box/engine.h index 87c84e51..a6949d28 100644 --- a/src/box/engine.h +++ b/src/box/engine.h @@ -166,8 +166,8 @@ struct engine_vtab { * fails to delete a snapshot file, because we recover * checkpoint list by scanning the snapshot directory. */ - int (*collect_garbage)(struct engine *engine, - const struct vclock *vclock); + void (*collect_garbage)(struct engine *engine, + const struct vclock *vclock); /** * Backup callback. It is supposed to call @cb for each file * that needs to be backed up in order to restore from the @@ -337,7 +337,7 @@ engine_commit_checkpoint(const struct vclock *vclock); void engine_abort_checkpoint(void); -int +void engine_collect_garbage(const struct vclock *vclock); int @@ -370,7 +370,7 @@ int generic_engine_begin_checkpoint(struct engine *); int generic_engine_wait_checkpoint(struct engine *, const struct vclock *); void generic_engine_commit_checkpoint(struct engine *, const struct vclock *); void generic_engine_abort_checkpoint(struct engine *); -int generic_engine_collect_garbage(struct engine *, const struct vclock *); +void generic_engine_collect_garbage(struct engine *, const struct vclock *); int generic_engine_backup(struct engine *, const struct vclock *, engine_backup_cb, void *); void generic_engine_memory_stat(struct engine *, struct engine_memory_stat *); diff --git a/src/box/gc.c b/src/box/gc.c index 8e2b5bb6..7411314a 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -209,14 +209,19 @@ gc_run_cleanup(void) /* * Run garbage collection. * - * The order is important here: we must invoke garbage - * collection for memtx snapshots first and abort if it - * fails - see comment to memtx_engine_collect_garbage(). + * It may occur that we proceed to deletion of WAL files + * and other engine files after having failed to delete + * a memtx snap file. If this happens, the corresponding + * checkpoint won't be removed from box.info.gc(), because + * we use snap files to build the checkpoint list, but + * it won't be possible to back it up or recover from it. + * This is OK as unlink() shouldn't normally fail. Besides + * we never remove the last checkpoint and the following + * WALs so this may only affect backup checkpoints. */ - int rc = 0; if (run_engine_gc) - rc = engine_collect_garbage(&checkpoint->vclock); - if (run_wal_gc && rc == 0) + engine_collect_garbage(&checkpoint->vclock); + if (run_wal_gc) wal_collect_garbage(vclock); } diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 254ef24b..c024fa1a 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -821,23 +821,12 @@ memtx_engine_abort_checkpoint(struct engine *engine) memtx->checkpoint = NULL; } -static int +static void memtx_engine_collect_garbage(struct engine *engine, const struct vclock *vclock) { struct memtx_engine *memtx = (struct memtx_engine *)engine; - /* - * We recover the checkpoint list by scanning the snapshot - * directory so deletion of an xlog file or a file that - * belongs to another engine without the corresponding snap - * file would result in a corrupted checkpoint on the list. - * That said, we have to abort garbage collection if we - * fail to delete a snap file. - */ - if (xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock), - XDIR_GC_USE_COIO) != 0) - return -1; - - return 0; + xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock), + XDIR_GC_USE_COIO); } static int diff --git a/src/box/vinyl.c b/src/box/vinyl.c index b66360d4..1832a29c 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -3317,7 +3317,7 @@ vy_gc(struct vy_env *env, struct vy_recovery *recovery, } } -static int +static void vinyl_engine_collect_garbage(struct engine *engine, const struct vclock *vclock) { struct vy_env *env = vy_env(engine); @@ -3329,11 +3329,10 @@ vinyl_engine_collect_garbage(struct engine *engine, const struct vclock *vclock) struct vy_recovery *recovery = vy_recovery_new(vy_log_signature(), 0); if (recovery == NULL) { say_error("failed to recover vylog for garbage collection"); - return 0; + return; } vy_gc(env, recovery, VY_GC_DROPPED, vclock_sum(vclock)); vy_recovery_delete(recovery); - return 0; } /* }}} Garbage collection */ diff --git a/src/box/wal.c b/src/box/wal.c index 3b50d362..07ea76ed 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -808,17 +808,7 @@ retry: warn_no_space = false; } - /* Keep the original error. */ - struct diag diag; - diag_create(&diag); - diag_move(diag_get(), &diag); - if (xdir_collect_garbage(&writer->wal_dir, gc_lsn, - XDIR_GC_REMOVE_ONE) != 0) { - diag_move(&diag, diag_get()); - goto error; - } - diag_destroy(&diag); - + xdir_collect_garbage(&writer->wal_dir, gc_lsn, XDIR_GC_REMOVE_ONE); notify_gc = true; goto retry; error: diff --git a/src/box/xlog.c b/src/box/xlog.c index 881dcd3b..6d2d96cd 100644 --- a/src/box/xlog.c +++ b/src/box/xlog.c @@ -663,7 +663,7 @@ xdir_format_filename(struct xdir *dir, int64_t signature, return filename; } -int +void xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags) { struct vclock *vclock; @@ -680,10 +680,6 @@ xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags) if (errno != ENOENT) { say_syserror("error while removing %s", filename); - diag_set(SystemError, - "failed to unlink file '%s'", - filename); - return -1; } } else say_info("removed %s", filename); @@ -693,7 +689,6 @@ xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags) if (flags & XDIR_GC_REMOVE_ONE) break; } - return 0; } void diff --git a/src/box/xlog.h b/src/box/xlog.h index 7c69cc4f..918ec06d 100644 --- a/src/box/xlog.h +++ b/src/box/xlog.h @@ -211,7 +211,7 @@ enum { * Remove files whose signature is less than specified. * For possible values of @flags see XDIR_GC_*. */ -int +void xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags); /** -- 2.11.0