[PATCH] memtx: don't delay deletion of temporary tuples during snapshot

Vladimir Davydov vdavydov.dev at gmail.com
Mon Jun 4 14:42:28 MSK 2018


Since tuples stored in temporary spaces are never written to disk, we
can always delete them immediately, even when a snapshot is in progress.

Closes #3432
---
https://github.com/tarantool/tarantool/issues/3432
https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples

 src/box/memtx_engine.c   |  8 +++-
 src/box/memtx_space.c    |  1 +
 src/box/tuple_format.c   |  1 +
 src/box/tuple_format.h   |  6 +++
 src/errinj.h             |  1 +
 test/box/errinj.result   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua | 44 ++++++++++++++++++++++
 7 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index fac84ce1..675ebb59 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
 static int
 checkpoint_write_row(struct xlog *l, struct xrow_header *row)
 {
+	struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
+				       ERRINJ_DOUBLE);
+	if (errinj != NULL && errinj->dparam > 0)
+		usleep(errinj->dparam * 1000000);
+
 	static ev_tstamp last = 0;
 	if (last == 0) {
 		ev_now_update(loop());
@@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
 	struct memtx_tuple *memtx_tuple =
 		container_of(tuple, struct memtx_tuple, base);
 	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
-	    memtx_tuple->version == memtx->snapshot_version)
+	    memtx_tuple->version == memtx->snapshot_version ||
+	    format->temporary)
 		smfree(&memtx->alloc, memtx_tuple, total);
 	else
 		smfree_delayed(&memtx->alloc, memtx_tuple, total);
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index f17df58c..aef7e788 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
 		return NULL;
 	}
 	format->engine = memtx;
+	format->temporary = def->opts.temporary;
 	format->exact_field_count = def->exact_field_count;
 	tuple_format_ref(format);
 
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 277d9e7f..486646ea 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
 	format->vtab = *vtab;
 	format->engine = NULL;
 	format->extra_size = extra_size;
+	format->temporary = false;
 	if (tuple_format_register(format) < 0) {
 		tuple_format_destroy(format);
 		free(format);
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d8c898b8..9da9be3e 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -124,6 +124,12 @@ struct tuple_format {
 	/** Reference counter */
 	int refs;
 	/**
+	 * Tuples of this format belong to a temporary space and
+	 * hence can be freed immediately while checkpointing is
+	 * in progress.
+	 */
+	bool temporary;
+	/**
 	 * The number of extra bytes to reserve in tuples before
 	 * field map.
 	 * \sa struct tuple
diff --git a/src/errinj.h b/src/errinj.h
index ab578274..4998fdcd 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -112,6 +112,7 @@ struct errinj {
 	_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
+	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index e25a4594..d3765f11 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -20,6 +20,8 @@ errinj.info()
     state: false
   ERRINJ_VYRUN_DATA_READ:
     state: false
+  ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
+    state: 0
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
   ERRINJ_WAL_WRITE_PARTIAL:
@@ -1309,3 +1311,97 @@ s:select()
 s:drop()
 ---
 ...
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+---
+- true
+...
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+fiber = require('fiber')
+---
+...
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+---
+...
+_ = box.space.test:create_index('pk')
+---
+...
+for i = 1, 100 do box.space.test:insert{i} end
+---
+...
+-- Create a temporary space.
+count = 500
+---
+...
+pad = string.rep('x', 100 * 1024)
+---
+...
+_ = box.schema.space.create('tmp', {temporary = true})
+---
+...
+_ = box.space.tmp:create_index('pk')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+-- Start background snapshot.
+c = fiber.channel(1)
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+---
+- ok
+...
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+---
+...
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+---
+...
+_ = collectgarbage('collect')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+---
+- ok
+...
+c:get()
+---
+- true
+...
+box.space.tmp:drop()
+---
+...
+box.space.test:drop()
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("cleanup server test")
+---
+- true
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index f2ed823b..188c65d0 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -447,3 +447,47 @@ errinj.set('ERRINJ_WAL_IO', false)
 for i = 1, 10 do s:replace{i + 10} end
 s:select()
 s:drop()
+
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+test_run:cmd("switch test")
+
+fiber = require('fiber')
+
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+_ = box.space.test:create_index('pk')
+for i = 1, 100 do box.space.test:insert{i} end
+
+-- Create a temporary space.
+count = 500
+pad = string.rep('x', 100 * 1024)
+_ = box.schema.space.create('tmp', {temporary = true})
+_ = box.space.tmp:create_index('pk')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+-- Start background snapshot.
+c = fiber.channel(1)
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+_ = collectgarbage('collect')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+c:get()
+
+box.space.tmp:drop()
+box.space.test:drop()
+
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
-- 
2.11.0




More information about the Tarantool-patches mailing list