From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 06/10] vinyl: lock out compaction while checkpointing is in progress Date: Fri, 17 May 2019 17:52:40 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org List-ID: Upon compaction completion we delete run files that are not needed for recovery from the last checkpoint. If we start compaction while checkpointing is in progress, it might compact and delete a run file dumped during checkpointing, which would make it impossible to backup files corresponding to the last checkpoint. Commit b25e31685096 ("vinyl: fix compaction vs checkpoint race resulting in invalid gc") claimed to eliminate the races, but in fact it just minimized probability of it: no matter whether we use the last checkpoint signature or vylog signature when checking if a run file doesn't belong to the last checkpoint, there's still a race window, which opens wider in case the more indexes we have to dump. Let's fix it once and for all by locking out compaction until checkpointing is complete. --- src/box/vy_scheduler.c | 17 +++++++++++++++++ test/vinyl/errinj.result | 16 +++++++++++++--- test/vinyl/errinj.test.lua | 9 ++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 0180331e..2d1abc2e 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -741,6 +741,13 @@ vy_scheduler_end_checkpoint(struct vy_scheduler *scheduler) */ vy_scheduler_trigger_dump(scheduler); } + + /* + * Checkpointing temporarily blocks compaction. + * Wake up the scheduler to check if there are + * pending compaction tasks. + */ + fiber_cond_signal(&scheduler->scheduler_cond); } /** @@ -1896,6 +1903,16 @@ vy_scheduler_peek_compaction(struct vy_scheduler *scheduler, struct vy_worker *worker = NULL; retry: *ptask = NULL; + /* + * Upon completion a compaction task removes compacted run + * files unless they are needed for recovery from the last + * checkpoint. If we start compaction while checkpointing + * is in progress we might compact a run that belongs to + * the new, to be created, checkpoint. To avoid that we + * lock out compaction while checkpointing is in progress. + */ + if (scheduler->checkpoint_in_progress) + goto no_task; struct vy_lsm *lsm = vy_compaction_heap_top(&scheduler->compaction_heap); if (lsm == NULL) goto no_task; /* nothing to do */ diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index e8795143..e05a616a 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -966,8 +966,9 @@ box.snapshot() - ok ... -- Create another run file. This will trigger compaction --- as run_count_per_level is set to 1. Due to the error --- injection compaction will finish before snapshot. +-- as run_count_per_level is set to 1. Delay checkpointing +-- completion and check that compaction doesn't remove +-- files that are still needed for backup. _ = s:replace{2} --- ... @@ -981,8 +982,13 @@ c = fiber.channel(1) _ = fiber.create(function() box.snapshot() c:put(true) end) --- ... -while s.index.pk:stat().disk.compaction.count == 0 do fiber.sleep(0.001) end +test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.queue.bytes > 0 end) --- +- true +... +test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_inprogress == 0 end) +--- +- true ... errinj.set('ERRINJ_SNAP_COMMIT_DELAY', false) --- @@ -992,6 +998,10 @@ c:get() --- - true ... +test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.count > 0 end) +--- +- true +... -- Check that all files corresponding to the last checkpoint -- are present. files = box.backup.start() diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 034ed34c..4317ccb8 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -346,15 +346,18 @@ _ = s:create_index('pk', {run_count_per_level = 1}) _ = s:replace{1} box.snapshot() -- Create another run file. This will trigger compaction --- as run_count_per_level is set to 1. Due to the error --- injection compaction will finish before snapshot. +-- as run_count_per_level is set to 1. Delay checkpointing +-- completion and check that compaction doesn't remove +-- files that are still needed for backup. _ = s:replace{2} errinj.set('ERRINJ_SNAP_COMMIT_DELAY', true) c = fiber.channel(1) _ = fiber.create(function() box.snapshot() c:put(true) end) -while s.index.pk:stat().disk.compaction.count == 0 do fiber.sleep(0.001) end +test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.queue.bytes > 0 end) +test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_inprogress == 0 end) errinj.set('ERRINJ_SNAP_COMMIT_DELAY', false) c:get() +test_run:wait_cond(function() return s.index.pk:stat().disk.compaction.count > 0 end) -- Check that all files corresponding to the last checkpoint -- are present. files = box.backup.start() -- 2.11.0