Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
@ 2018-06-04 11:42 Vladimir Davydov
  2018-06-08  3:56 ` Konstantin Osipov
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-04 11:42 UTC (permalink / raw)
  To: kostja; +Cc: tarantool-patches

Since tuples stored in temporary spaces are never written to disk, we
can always delete them immediately, even when a snapshot is in progress.

Closes #3432
---
https://github.com/tarantool/tarantool/issues/3432
https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples

 src/box/memtx_engine.c   |  8 +++-
 src/box/memtx_space.c    |  1 +
 src/box/tuple_format.c   |  1 +
 src/box/tuple_format.h   |  6 +++
 src/errinj.h             |  1 +
 test/box/errinj.result   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua | 44 ++++++++++++++++++++++
 7 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index fac84ce1..675ebb59 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
 static int
 checkpoint_write_row(struct xlog *l, struct xrow_header *row)
 {
+	struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
+				       ERRINJ_DOUBLE);
+	if (errinj != NULL && errinj->dparam > 0)
+		usleep(errinj->dparam * 1000000);
+
 	static ev_tstamp last = 0;
 	if (last == 0) {
 		ev_now_update(loop());
@@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
 	struct memtx_tuple *memtx_tuple =
 		container_of(tuple, struct memtx_tuple, base);
 	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
-	    memtx_tuple->version == memtx->snapshot_version)
+	    memtx_tuple->version == memtx->snapshot_version ||
+	    format->temporary)
 		smfree(&memtx->alloc, memtx_tuple, total);
 	else
 		smfree_delayed(&memtx->alloc, memtx_tuple, total);
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index f17df58c..aef7e788 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
 		return NULL;
 	}
 	format->engine = memtx;
+	format->temporary = def->opts.temporary;
 	format->exact_field_count = def->exact_field_count;
 	tuple_format_ref(format);
 
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 277d9e7f..486646ea 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
 	format->vtab = *vtab;
 	format->engine = NULL;
 	format->extra_size = extra_size;
+	format->temporary = false;
 	if (tuple_format_register(format) < 0) {
 		tuple_format_destroy(format);
 		free(format);
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d8c898b8..9da9be3e 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -124,6 +124,12 @@ struct tuple_format {
 	/** Reference counter */
 	int refs;
 	/**
+	 * Tuples of this format belong to a temporary space and
+	 * hence can be freed immediately while checkpointing is
+	 * in progress.
+	 */
+	bool temporary;
+	/**
 	 * The number of extra bytes to reserve in tuples before
 	 * field map.
 	 * \sa struct tuple
diff --git a/src/errinj.h b/src/errinj.h
index ab578274..4998fdcd 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -112,6 +112,7 @@ struct errinj {
 	_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
+	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index e25a4594..d3765f11 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -20,6 +20,8 @@ errinj.info()
     state: false
   ERRINJ_VYRUN_DATA_READ:
     state: false
+  ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
+    state: 0
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
   ERRINJ_WAL_WRITE_PARTIAL:
@@ -1309,3 +1311,97 @@ s:select()
 s:drop()
 ---
 ...
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+---
+- true
+...
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+fiber = require('fiber')
+---
+...
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+---
+...
+_ = box.space.test:create_index('pk')
+---
+...
+for i = 1, 100 do box.space.test:insert{i} end
+---
+...
+-- Create a temporary space.
+count = 500
+---
+...
+pad = string.rep('x', 100 * 1024)
+---
+...
+_ = box.schema.space.create('tmp', {temporary = true})
+---
+...
+_ = box.space.tmp:create_index('pk')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+-- Start background snapshot.
+c = fiber.channel(1)
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+---
+- ok
+...
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+---
+...
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+---
+...
+_ = collectgarbage('collect')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+---
+- ok
+...
+c:get()
+---
+- true
+...
+box.space.tmp:drop()
+---
+...
+box.space.test:drop()
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("cleanup server test")
+---
+- true
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index f2ed823b..188c65d0 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -447,3 +447,47 @@ errinj.set('ERRINJ_WAL_IO', false)
 for i = 1, 10 do s:replace{i + 10} end
 s:select()
 s:drop()
+
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+test_run:cmd("switch test")
+
+fiber = require('fiber')
+
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+_ = box.space.test:create_index('pk')
+for i = 1, 100 do box.space.test:insert{i} end
+
+-- Create a temporary space.
+count = 500
+pad = string.rep('x', 100 * 1024)
+_ = box.schema.space.create('tmp', {temporary = true})
+_ = box.space.tmp:create_index('pk')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+-- Start background snapshot.
+c = fiber.channel(1)
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+_ = collectgarbage('collect')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+c:get()
+
+box.space.tmp:drop()
+box.space.test:drop()
+
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-14 13:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-04 11:42 [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Vladimir Davydov
2018-06-08  3:56 ` Konstantin Osipov
2018-06-08  6:39   ` Vladimir Davydov
2018-06-14  8:23     ` Vladimir Davydov
2018-06-14 12:57       ` Konstantin Osipov
2018-06-14 13:29         ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox