From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH] vinyl: remove runs not referenced by any checkpoint immediately Date: Thu, 17 May 2018 19:09:52 +0300 Message-Id: <50ccdb32ac67e8cd3f90acd998adf0e61861ae75.1526573189.git.vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: If a compacted run was created after the last checkpoint, it is not needed to recover from any checkpoint and hence can be deleted right away to save disk space. Closes #3407 --- https://github.com/tarantool/tarantool/issues/3407 https://github.com/tarantool/tarantool/commits/gh-3407-vy-remove-unreferenced-runs-immediately src/box/vy_scheduler.c | 27 +++++++++-------- test/vinyl/gc.result | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/gc.test.lua | 35 ++++++++++++++++++++++ 3 files changed, 126 insertions(+), 14 deletions(-) diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index e1853e5d..4c9103cf 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -1139,22 +1139,21 @@ vy_task_compact_complete(struct vy_scheduler *scheduler, struct vy_task *task) return -1; } - if (gc_lsn < 0) { - /* - * If there is no last snapshot, i.e. we are in - * the middle of join, we can delete compacted - * run files right away. - */ - vy_log_tx_begin(); - rlist_foreach_entry(run, &unused_runs, in_unused) { - if (vy_run_remove_files(index->env->path, - index->space_id, index->id, - run->id) == 0) { - vy_log_forget_run(run->id); - } + /* + * Remove compacted run files that were created after + * the last checkpoint (and hence are not referenced + * by any checkpoint) immediately to save disk space. + */ + vy_log_tx_begin(); + rlist_foreach_entry(run, &unused_runs, in_unused) { + if (run->dump_lsn > gc_lsn && + vy_run_remove_files(index->env->path, + index->space_id, index->id, + run->id) == 0) { + vy_log_forget_run(run->id); } - vy_log_tx_try_commit(); } + vy_log_tx_try_commit(); /* * Account the new run if it is not empty, diff --git a/test/vinyl/gc.result b/test/vinyl/gc.result index f88b3996..b709135c 100644 --- a/test/vinyl/gc.result +++ b/test/vinyl/gc.result @@ -126,3 +126,81 @@ temp:drop() box.cfg{checkpoint_count = default_checkpoint_count} --- ... +-- +-- Check that compacted run files that are not referenced +-- by any checkpoint are deleted immediately (gh-3407). +-- +test_run:cmd("create server test with script='vinyl/low_quota.lua'") +--- +- true +... +test_run:cmd("start server test with args='1048576'") +--- +- true +... +test_run:cmd('switch test') +--- +- true +... +box.cfg{checkpoint_count = 2} +--- +... +fio = require('fio') +--- +... +fiber = require('fiber') +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk', {run_count_per_level = 3}) +--- +... +function count_runs() return #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, s.id, s.index.pk.id, '*.run')) end +--- +... +_ = s:replace{1} +--- +... +box.snapshot() +--- +- ok +... +_ = s:replace{2} +--- +... +box.snapshot() +--- +- ok +... +count_runs() -- 2 +--- +- 2 +... +for i = 1, 20 do s:replace{i, string.rep('x', 100 * 1024)} end +--- +... +while s.index.pk:info().disk.compact.count < 1 do fiber.sleep(0.001) end +--- +... +s.index.pk:info().disk.compact.count -- 1 +--- +- 1 +... +count_runs() -- 3 (compacted runs created after checkpoint are deleted) +--- +- 3 +... +test_run:cmd('switch default') +--- +- true +... +test_run:cmd("stop server test") +--- +- true +... +test_run:cmd("cleanup server test") +--- +- true +... diff --git a/test/vinyl/gc.test.lua b/test/vinyl/gc.test.lua index 3974048b..32078f00 100644 --- a/test/vinyl/gc.test.lua +++ b/test/vinyl/gc.test.lua @@ -61,3 +61,38 @@ files = ls_vylog() temp:drop() box.cfg{checkpoint_count = default_checkpoint_count} + +-- +-- Check that compacted run files that are not referenced +-- by any checkpoint are deleted immediately (gh-3407). +-- +test_run:cmd("create server test with script='vinyl/low_quota.lua'") +test_run:cmd("start server test with args='1048576'") +test_run:cmd('switch test') + +box.cfg{checkpoint_count = 2} + +fio = require('fio') +fiber = require('fiber') + +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk', {run_count_per_level = 3}) + +function count_runs() return #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, s.id, s.index.pk.id, '*.run')) end + +_ = s:replace{1} +box.snapshot() +_ = s:replace{2} +box.snapshot() + +count_runs() -- 2 + +for i = 1, 20 do s:replace{i, string.rep('x', 100 * 1024)} end +while s.index.pk:info().disk.compact.count < 1 do fiber.sleep(0.001) end +s.index.pk:info().disk.compact.count -- 1 + +count_runs() -- 3 (compacted runs created after checkpoint are deleted) + +test_run:cmd('switch default') +test_run:cmd("stop server test") +test_run:cmd("cleanup server test") -- 2.11.0