[PATCH 06/10] vinyl: lock out compaction while checkpointing is in progress

Vladimir Davydov vdavydov.dev at gmail.com
Fri May 17 17:52:40 MSK 2019


Upon compaction completion we delete run files that are not needed for
recovery from the last checkpoint. If we start compaction while
checkpointing is in progress, it might compact and delete a run file
dumped during checkpointing, which would make it impossible to backup
files corresponding to the last checkpoint. Commit b25e31685096 ("vinyl:
fix compaction vs checkpoint race resulting in invalid gc") claimed to
eliminate the races, but in fact it just minimized probability of it: no
matter whether we use the last checkpoint signature or vylog signature
when checking if a run file doesn't belong to the last checkpoint,
there's still a race window, which opens wider in case the more indexes
we have to dump. Let's fix it once and for all by locking out compaction
until checkpointing is complete.
---
 src/box/vy_scheduler.c     | 17 +++++++++++++++++
 test/vinyl/errinj.result   | 16 +++++++++++++---
 test/vinyl/errinj.test.lua |  9 ++++++---
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 0180331e..2d1abc2e 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -741,6 +741,13 @@ vy_scheduler_end_checkpoint(struct vy_scheduler *scheduler)
 		 */
 		vy_scheduler_trigger_dump(scheduler);
 	}
+
+	/*
+	 * Checkpointing temporarily blocks compaction.
+	 * Wake up the scheduler to check if there are
+	 * pending compaction tasks.
+	 */
+	fiber_cond_signal(&scheduler->scheduler_cond);
 }
 
 /**
@@ -1896,6 +1903,16 @@ vy_scheduler_peek_compaction(struct vy_scheduler *scheduler,
 	struct vy_worker *worker = NULL;
 retry:
 	*ptask = NULL;
+	/*
+	 * Upon completion a compaction task removes compacted run
+	 * files unless they are needed for recovery from the last
+	 * checkpoint. If we start compaction while checkpointing
+	 * is in progress we might compact a run that belongs to
+	 * the new, to be created, checkpoint. To avoid that we
+	 * lock out compaction while checkpointing is in progress.
+	 */
+	if (scheduler->checkpoint_in_progress)
+		goto no_task;
 	struct vy_lsm *lsm = vy_compaction_heap_top(&scheduler->compaction_heap);
 	if (lsm == NULL)
 		goto no_task; /* nothing to do */
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index e8795143..e05a616a 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -966,8 +966,9 @@ box.snapshot()
 - ok
 ...
 -- Create another run file. This will trigger compaction
--- as run_count_per_level is set to 1. Due to the error
--- injection compaction will finish before snapshot.
+-- as run_count_per_level is set to 1. Delay checkpointing
+-- completion and check that compaction doesn't remove
+-- files that are still needed for backup.
 _ = s:replace{2}
 ---
 ...
@@ -981,8 +982,13 @@ c = fiber.channel(1)
 _ = fiber.create(function() box.snapshot() c:put(true) end)
 ---
 ...
-while s.index.pk:stat().disk.compaction.count == 0 do fiber.sleep(0.001) end
+test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.queue.bytes > 0 end)
 ---
+- true
+...
+test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_inprogress == 0 end)
+---
+- true
 ...
 errinj.set('ERRINJ_SNAP_COMMIT_DELAY', false)
 ---
@@ -992,6 +998,10 @@ c:get()
 ---
 - true
 ...
+test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.count > 0 end)
+---
+- true
+...
 -- Check that all files corresponding to the last checkpoint
 -- are present.
 files = box.backup.start()
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 034ed34c..4317ccb8 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -346,15 +346,18 @@ _ = s:create_index('pk', {run_count_per_level = 1})
 _ = s:replace{1}
 box.snapshot()
 -- Create another run file. This will trigger compaction
--- as run_count_per_level is set to 1. Due to the error
--- injection compaction will finish before snapshot.
+-- as run_count_per_level is set to 1. Delay checkpointing
+-- completion and check that compaction doesn't remove
+-- files that are still needed for backup.
 _ = s:replace{2}
 errinj.set('ERRINJ_SNAP_COMMIT_DELAY', true)
 c = fiber.channel(1)
 _ = fiber.create(function() box.snapshot() c:put(true) end)
-while s.index.pk:stat().disk.compaction.count == 0 do fiber.sleep(0.001) end
+test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.queue.bytes > 0 end)
+test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_inprogress == 0 end)
 errinj.set('ERRINJ_SNAP_COMMIT_DELAY', false)
 c:get()
+test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.count > 0 end)
 -- Check that all files corresponding to the last checkpoint
 -- are present.
 files = box.backup.start()
-- 
2.11.0




More information about the Tarantool-patches mailing list