[PATCH 1/2] gc: do not abort garbage collection if failed to unlink snap file

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jan 25 17:34:48 MSK 2019


We build the checkpoint list from the list of memtx snap files. So to
ensure that it is always possible to recover from any checkpoint present
in box.info.gc() output, we abort garbage collection if we fail to
unlink a snap file. This introduces extra complexity to the garbage
collection code, which makes it difficult to make WAL file removal fully
asynchronous.

Actually, it looks like we are being way too overcautious here, because
unlink() doesn't normally fail so an error while removing a snap file is
highly unlikely to occur. Besides, even if it happens, it still won't be
critical, because we never delete the last checkpoint, which is usually
used for backups/recovery. So let's simplify the code by removing that
check.

Needed for #3938
---
https://github.com/tarantool/tarantool/issues/3938
https://github.com/tarantool/tarantool/commits/dv/gh-3938-async-wal-file-removal

 src/box/engine.c       | 12 ++++--------
 src/box/engine.h       |  8 ++++----
 src/box/gc.c           | 17 +++++++++++------
 src/box/memtx_engine.c | 17 +++--------------
 src/box/vinyl.c        |  5 ++---
 src/box/wal.c          | 12 +-----------
 src/box/xlog.c         |  7 +------
 src/box/xlog.h         |  2 +-
 8 files changed, 27 insertions(+), 53 deletions(-)

diff --git a/src/box/engine.c b/src/box/engine.c
index 409479a3..d0a08f5c 100644
--- a/src/box/engine.c
+++ b/src/box/engine.c
@@ -147,15 +147,12 @@ engine_abort_checkpoint(void)
 		engine->vtab->abort_checkpoint(engine);
 }
 
-int
+void
 engine_collect_garbage(const struct vclock *vclock)
 {
 	struct engine *engine;
-	engine_foreach(engine) {
-		if (engine->vtab->collect_garbage(engine, vclock) < 0)
-			return -1;
-	}
-	return 0;
+	engine_foreach(engine)
+		engine->vtab->collect_garbage(engine, vclock);
 }
 
 int
@@ -316,13 +313,12 @@ generic_engine_abort_checkpoint(struct engine *engine)
 	(void)engine;
 }
 
-int
+void
 generic_engine_collect_garbage(struct engine *engine,
 			       const struct vclock *vclock)
 {
 	(void)engine;
 	(void)vclock;
-	return 0;
 }
 
 int
diff --git a/src/box/engine.h b/src/box/engine.h
index 87c84e51..a6949d28 100644
--- a/src/box/engine.h
+++ b/src/box/engine.h
@@ -166,8 +166,8 @@ struct engine_vtab {
 	 * fails to delete a snapshot file, because we recover
 	 * checkpoint list by scanning the snapshot directory.
 	 */
-	int (*collect_garbage)(struct engine *engine,
-			       const struct vclock *vclock);
+	void (*collect_garbage)(struct engine *engine,
+				const struct vclock *vclock);
 	/**
 	 * Backup callback. It is supposed to call @cb for each file
 	 * that needs to be backed up in order to restore from the
@@ -337,7 +337,7 @@ engine_commit_checkpoint(const struct vclock *vclock);
 void
 engine_abort_checkpoint(void);
 
-int
+void
 engine_collect_garbage(const struct vclock *vclock);
 
 int
@@ -370,7 +370,7 @@ int generic_engine_begin_checkpoint(struct engine *);
 int generic_engine_wait_checkpoint(struct engine *, const struct vclock *);
 void generic_engine_commit_checkpoint(struct engine *, const struct vclock *);
 void generic_engine_abort_checkpoint(struct engine *);
-int generic_engine_collect_garbage(struct engine *, const struct vclock *);
+void generic_engine_collect_garbage(struct engine *, const struct vclock *);
 int generic_engine_backup(struct engine *, const struct vclock *,
 			  engine_backup_cb, void *);
 void generic_engine_memory_stat(struct engine *, struct engine_memory_stat *);
diff --git a/src/box/gc.c b/src/box/gc.c
index 8e2b5bb6..7411314a 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -209,14 +209,19 @@ gc_run_cleanup(void)
 	/*
 	 * Run garbage collection.
 	 *
-	 * The order is important here: we must invoke garbage
-	 * collection for memtx snapshots first and abort if it
-	 * fails - see comment to memtx_engine_collect_garbage().
+	 * It may occur that we proceed to deletion of WAL files
+	 * and other engine files after having failed to delete
+	 * a memtx snap file. If this happens, the corresponding
+	 * checkpoint won't be removed from box.info.gc(), because
+	 * we use snap files to build the checkpoint list, but
+	 * it won't be possible to back it up or recover from it.
+	 * This is OK as unlink() shouldn't normally fail. Besides
+	 * we never remove the last checkpoint and the following
+	 * WALs so this may only affect backup checkpoints.
 	 */
-	int rc = 0;
 	if (run_engine_gc)
-		rc = engine_collect_garbage(&checkpoint->vclock);
-	if (run_wal_gc && rc == 0)
+		engine_collect_garbage(&checkpoint->vclock);
+	if (run_wal_gc)
 		wal_collect_garbage(vclock);
 }
 
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 254ef24b..c024fa1a 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -821,23 +821,12 @@ memtx_engine_abort_checkpoint(struct engine *engine)
 	memtx->checkpoint = NULL;
 }
 
-static int
+static void
 memtx_engine_collect_garbage(struct engine *engine, const struct vclock *vclock)
 {
 	struct memtx_engine *memtx = (struct memtx_engine *)engine;
-	/*
-	 * We recover the checkpoint list by scanning the snapshot
-	 * directory so deletion of an xlog file or a file that
-	 * belongs to another engine without the corresponding snap
-	 * file would result in a corrupted checkpoint on the list.
-	 * That said, we have to abort garbage collection if we
-	 * fail to delete a snap file.
-	 */
-	if (xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock),
-				 XDIR_GC_USE_COIO) != 0)
-		return -1;
-
-	return 0;
+	xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock),
+			     XDIR_GC_USE_COIO);
 }
 
 static int
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index b66360d4..1832a29c 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -3317,7 +3317,7 @@ vy_gc(struct vy_env *env, struct vy_recovery *recovery,
 	}
 }
 
-static int
+static void
 vinyl_engine_collect_garbage(struct engine *engine, const struct vclock *vclock)
 {
 	struct vy_env *env = vy_env(engine);
@@ -3329,11 +3329,10 @@ vinyl_engine_collect_garbage(struct engine *engine, const struct vclock *vclock)
 	struct vy_recovery *recovery = vy_recovery_new(vy_log_signature(), 0);
 	if (recovery == NULL) {
 		say_error("failed to recover vylog for garbage collection");
-		return 0;
+		return;
 	}
 	vy_gc(env, recovery, VY_GC_DROPPED, vclock_sum(vclock));
 	vy_recovery_delete(recovery);
-	return 0;
 }
 
 /* }}} Garbage collection */
diff --git a/src/box/wal.c b/src/box/wal.c
index 3b50d362..07ea76ed 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -808,17 +808,7 @@ retry:
 		warn_no_space = false;
 	}
 
-	/* Keep the original error. */
-	struct diag diag;
-	diag_create(&diag);
-	diag_move(diag_get(), &diag);
-	if (xdir_collect_garbage(&writer->wal_dir, gc_lsn,
-				 XDIR_GC_REMOVE_ONE) != 0) {
-		diag_move(&diag, diag_get());
-		goto error;
-	}
-	diag_destroy(&diag);
-
+	xdir_collect_garbage(&writer->wal_dir, gc_lsn, XDIR_GC_REMOVE_ONE);
 	notify_gc = true;
 	goto retry;
 error:
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 881dcd3b..6d2d96cd 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -663,7 +663,7 @@ xdir_format_filename(struct xdir *dir, int64_t signature,
 	return filename;
 }
 
-int
+void
 xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags)
 {
 	struct vclock *vclock;
@@ -680,10 +680,6 @@ xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags)
 			if (errno != ENOENT) {
 				say_syserror("error while removing %s",
 					     filename);
-				diag_set(SystemError,
-					 "failed to unlink file '%s'",
-					 filename);
-				return -1;
 			}
 		} else
 			say_info("removed %s", filename);
@@ -693,7 +689,6 @@ xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags)
 		if (flags & XDIR_GC_REMOVE_ONE)
 			break;
 	}
-	return 0;
 }
 
 void
diff --git a/src/box/xlog.h b/src/box/xlog.h
index 7c69cc4f..918ec06d 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -211,7 +211,7 @@ enum {
  * Remove files whose signature is less than specified.
  * For possible values of @flags see XDIR_GC_*.
  */
-int
+void
 xdir_collect_garbage(struct xdir *dir, int64_t signature, unsigned flags);
 
 /**
-- 
2.11.0




More information about the Tarantool-patches mailing list