Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH] vinyl: remove runs not referenced by any checkpoint immediately
Date: Thu, 17 May 2018 19:09:52 +0300	[thread overview]
Message-ID: <50ccdb32ac67e8cd3f90acd998adf0e61861ae75.1526573189.git.vdavydov.dev@gmail.com> (raw)

If a compacted run was created after the last checkpoint, it is not
needed to recover from any checkpoint and hence can be deleted right
away to save disk space.

Closes #3407
---
https://github.com/tarantool/tarantool/issues/3407
https://github.com/tarantool/tarantool/commits/gh-3407-vy-remove-unreferenced-runs-immediately

 src/box/vy_scheduler.c | 27 +++++++++--------
 test/vinyl/gc.result   | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/gc.test.lua | 35 ++++++++++++++++++++++
 3 files changed, 126 insertions(+), 14 deletions(-)

diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index e1853e5d..4c9103cf 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1139,22 +1139,21 @@ vy_task_compact_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 		return -1;
 	}
 
-	if (gc_lsn < 0) {
-		/*
-		 * If there is no last snapshot, i.e. we are in
-		 * the middle of join, we can delete compacted
-		 * run files right away.
-		 */
-		vy_log_tx_begin();
-		rlist_foreach_entry(run, &unused_runs, in_unused) {
-			if (vy_run_remove_files(index->env->path,
-						index->space_id, index->id,
-						run->id) == 0) {
-				vy_log_forget_run(run->id);
-			}
+	/*
+	 * Remove compacted run files that were created after
+	 * the last checkpoint (and hence are not referenced
+	 * by any checkpoint) immediately to save disk space.
+	 */
+	vy_log_tx_begin();
+	rlist_foreach_entry(run, &unused_runs, in_unused) {
+		if (run->dump_lsn > gc_lsn &&
+		    vy_run_remove_files(index->env->path,
+					index->space_id, index->id,
+					run->id) == 0) {
+			vy_log_forget_run(run->id);
 		}
-		vy_log_tx_try_commit();
 	}
+	vy_log_tx_try_commit();
 
 	/*
 	 * Account the new run if it is not empty,
diff --git a/test/vinyl/gc.result b/test/vinyl/gc.result
index f88b3996..b709135c 100644
--- a/test/vinyl/gc.result
+++ b/test/vinyl/gc.result
@@ -126,3 +126,81 @@ temp:drop()
 box.cfg{checkpoint_count = default_checkpoint_count}
 ---
 ...
+--
+-- Check that compacted run files that are not referenced
+-- by any checkpoint are deleted immediately (gh-3407).
+--
+test_run:cmd("create server test with script='vinyl/low_quota.lua'")
+---
+- true
+...
+test_run:cmd("start server test with args='1048576'")
+---
+- true
+...
+test_run:cmd('switch test')
+---
+- true
+...
+box.cfg{checkpoint_count = 2}
+---
+...
+fio = require('fio')
+---
+...
+fiber = require('fiber')
+---
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk', {run_count_per_level = 3})
+---
+...
+function count_runs() return #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, s.id, s.index.pk.id, '*.run')) end
+---
+...
+_ = s:replace{1}
+---
+...
+box.snapshot()
+---
+- ok
+...
+_ = s:replace{2}
+---
+...
+box.snapshot()
+---
+- ok
+...
+count_runs() -- 2
+---
+- 2
+...
+for i = 1, 20 do s:replace{i, string.rep('x', 100 * 1024)} end
+---
+...
+while s.index.pk:info().disk.compact.count < 1 do fiber.sleep(0.001) end
+---
+...
+s.index.pk:info().disk.compact.count -- 1
+---
+- 1
+...
+count_runs() -- 3 (compacted runs created after checkpoint are deleted)
+---
+- 3
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("cleanup server test")
+---
+- true
+...
diff --git a/test/vinyl/gc.test.lua b/test/vinyl/gc.test.lua
index 3974048b..32078f00 100644
--- a/test/vinyl/gc.test.lua
+++ b/test/vinyl/gc.test.lua
@@ -61,3 +61,38 @@ files = ls_vylog()
 temp:drop()
 
 box.cfg{checkpoint_count = default_checkpoint_count}
+
+--
+-- Check that compacted run files that are not referenced
+-- by any checkpoint are deleted immediately (gh-3407).
+--
+test_run:cmd("create server test with script='vinyl/low_quota.lua'")
+test_run:cmd("start server test with args='1048576'")
+test_run:cmd('switch test')
+
+box.cfg{checkpoint_count = 2}
+
+fio = require('fio')
+fiber = require('fiber')
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk', {run_count_per_level = 3})
+
+function count_runs() return #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, s.id, s.index.pk.id, '*.run')) end
+
+_ = s:replace{1}
+box.snapshot()
+_ = s:replace{2}
+box.snapshot()
+
+count_runs() -- 2
+
+for i = 1, 20 do s:replace{i, string.rep('x', 100 * 1024)} end
+while s.index.pk:info().disk.compact.count < 1 do fiber.sleep(0.001) end
+s.index.pk:info().disk.compact.count -- 1
+
+count_runs() -- 3 (compacted runs created after checkpoint are deleted)
+
+test_run:cmd('switch default')
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
-- 
2.11.0

             reply	other threads:[~2018-05-17 16:09 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-17 16:09 Vladimir Davydov [this message]
2018-05-17 16:35 ` Vladimir Davydov
2018-05-17 20:40 ` Konstantin Osipov
2018-05-17 22:39   ` Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50ccdb32ac67e8cd3f90acd998adf0e61861ae75.1526573189.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH] vinyl: remove runs not referenced by any checkpoint immediately' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox