Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
@ 2018-06-04 11:42 Vladimir Davydov
  2018-06-08  3:56 ` Konstantin Osipov
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-04 11:42 UTC (permalink / raw)
  To: kostja; +Cc: tarantool-patches

Since tuples stored in temporary spaces are never written to disk, we
can always delete them immediately, even when a snapshot is in progress.

Closes #3432
---
https://github.com/tarantool/tarantool/issues/3432
https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples

 src/box/memtx_engine.c   |  8 +++-
 src/box/memtx_space.c    |  1 +
 src/box/tuple_format.c   |  1 +
 src/box/tuple_format.h   |  6 +++
 src/errinj.h             |  1 +
 test/box/errinj.result   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua | 44 ++++++++++++++++++++++
 7 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index fac84ce1..675ebb59 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
 static int
 checkpoint_write_row(struct xlog *l, struct xrow_header *row)
 {
+	struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
+				       ERRINJ_DOUBLE);
+	if (errinj != NULL && errinj->dparam > 0)
+		usleep(errinj->dparam * 1000000);
+
 	static ev_tstamp last = 0;
 	if (last == 0) {
 		ev_now_update(loop());
@@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
 	struct memtx_tuple *memtx_tuple =
 		container_of(tuple, struct memtx_tuple, base);
 	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
-	    memtx_tuple->version == memtx->snapshot_version)
+	    memtx_tuple->version == memtx->snapshot_version ||
+	    format->temporary)
 		smfree(&memtx->alloc, memtx_tuple, total);
 	else
 		smfree_delayed(&memtx->alloc, memtx_tuple, total);
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index f17df58c..aef7e788 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
 		return NULL;
 	}
 	format->engine = memtx;
+	format->temporary = def->opts.temporary;
 	format->exact_field_count = def->exact_field_count;
 	tuple_format_ref(format);
 
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 277d9e7f..486646ea 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
 	format->vtab = *vtab;
 	format->engine = NULL;
 	format->extra_size = extra_size;
+	format->temporary = false;
 	if (tuple_format_register(format) < 0) {
 		tuple_format_destroy(format);
 		free(format);
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d8c898b8..9da9be3e 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -124,6 +124,12 @@ struct tuple_format {
 	/** Reference counter */
 	int refs;
 	/**
+	 * Tuples of this format belong to a temporary space and
+	 * hence can be freed immediately while checkpointing is
+	 * in progress.
+	 */
+	bool temporary;
+	/**
 	 * The number of extra bytes to reserve in tuples before
 	 * field map.
 	 * \sa struct tuple
diff --git a/src/errinj.h b/src/errinj.h
index ab578274..4998fdcd 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -112,6 +112,7 @@ struct errinj {
 	_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
+	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index e25a4594..d3765f11 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -20,6 +20,8 @@ errinj.info()
     state: false
   ERRINJ_VYRUN_DATA_READ:
     state: false
+  ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
+    state: 0
   ERRINJ_VY_SCHED_TIMEOUT:
     state: 0
   ERRINJ_WAL_WRITE_PARTIAL:
@@ -1309,3 +1311,97 @@ s:select()
 s:drop()
 ---
 ...
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+---
+- true
+...
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+fiber = require('fiber')
+---
+...
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+---
+...
+_ = box.space.test:create_index('pk')
+---
+...
+for i = 1, 100 do box.space.test:insert{i} end
+---
+...
+-- Create a temporary space.
+count = 500
+---
+...
+pad = string.rep('x', 100 * 1024)
+---
+...
+_ = box.schema.space.create('tmp', {temporary = true})
+---
+...
+_ = box.space.tmp:create_index('pk')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+-- Start background snapshot.
+c = fiber.channel(1)
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+---
+- ok
+...
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+---
+...
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+---
+...
+_ = collectgarbage('collect')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+---
+- ok
+...
+c:get()
+---
+- true
+...
+box.space.tmp:drop()
+---
+...
+box.space.test:drop()
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("cleanup server test")
+---
+- true
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index f2ed823b..188c65d0 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -447,3 +447,47 @@ errinj.set('ERRINJ_WAL_IO', false)
 for i = 1, 10 do s:replace{i + 10} end
 s:select()
 s:drop()
+
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+test_run:cmd("switch test")
+
+fiber = require('fiber')
+
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+_ = box.space.test:create_index('pk')
+for i = 1, 100 do box.space.test:insert{i} end
+
+-- Create a temporary space.
+count = 500
+pad = string.rep('x', 100 * 1024)
+_ = box.schema.space.create('tmp', {temporary = true})
+_ = box.space.tmp:create_index('pk')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+-- Start background snapshot.
+c = fiber.channel(1)
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+_ = collectgarbage('collect')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+c:get()
+
+box.space.tmp:drop()
+box.space.test:drop()
+
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
  2018-06-04 11:42 [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Vladimir Davydov
@ 2018-06-08  3:56 ` Konstantin Osipov
  2018-06-08  6:39   ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2018-06-08  3:56 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches

* Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> Since tuples stored in temporary spaces are never written to disk, we
> can always delete them immediately, even when a snapshot is in progress.
> 
> Closes #3432

OK to push.

> ---
> https://github.com/tarantool/tarantool/issues/3432
> https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
> 
>  src/box/memtx_engine.c   |  8 +++-
>  src/box/memtx_space.c    |  1 +
>  src/box/tuple_format.c   |  1 +
>  src/box/tuple_format.h   |  6 +++
>  src/errinj.h             |  1 +
>  test/box/errinj.result   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
>  test/box/errinj.test.lua | 44 ++++++++++++++++++++++
>  7 files changed, 156 insertions(+), 1 deletion(-)
> 
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index fac84ce1..675ebb59 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
>  static int
>  checkpoint_write_row(struct xlog *l, struct xrow_header *row)
>  {
> +	struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
> +				       ERRINJ_DOUBLE);
> +	if (errinj != NULL && errinj->dparam > 0)
> +		usleep(errinj->dparam * 1000000);
> +
>  	static ev_tstamp last = 0;
>  	if (last == 0) {
>  		ev_now_update(loop());
> @@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
>  	struct memtx_tuple *memtx_tuple =
>  		container_of(tuple, struct memtx_tuple, base);
>  	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
> -	    memtx_tuple->version == memtx->snapshot_version)
> +	    memtx_tuple->version == memtx->snapshot_version ||
> +	    format->temporary)
>  		smfree(&memtx->alloc, memtx_tuple, total);
>  	else
>  		smfree_delayed(&memtx->alloc, memtx_tuple, total);
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index f17df58c..aef7e788 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
>  		return NULL;
>  	}
>  	format->engine = memtx;
> +	format->temporary = def->opts.temporary;
>  	format->exact_field_count = def->exact_field_count;
>  	tuple_format_ref(format);
>  
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 277d9e7f..486646ea 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
>  	format->vtab = *vtab;
>  	format->engine = NULL;
>  	format->extra_size = extra_size;
> +	format->temporary = false;
>  	if (tuple_format_register(format) < 0) {
>  		tuple_format_destroy(format);
>  		free(format);
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index d8c898b8..9da9be3e 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -124,6 +124,12 @@ struct tuple_format {
>  	/** Reference counter */
>  	int refs;
>  	/**
> +	 * Tuples of this format belong to a temporary space and
> +	 * hence can be freed immediately while checkpointing is
> +	 * in progress.
> +	 */
> +	bool temporary;
> +	/**
>  	 * The number of extra bytes to reserve in tuples before
>  	 * field map.
>  	 * \sa struct tuple
> diff --git a/src/errinj.h b/src/errinj.h
> index ab578274..4998fdcd 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -112,6 +112,7 @@ struct errinj {
>  	_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
> +	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
>  
>  ENUM0(errinj_id, ERRINJ_LIST);
>  extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e25a4594..d3765f11 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -20,6 +20,8 @@ errinj.info()
>      state: false
>    ERRINJ_VYRUN_DATA_READ:
>      state: false
> +  ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
> +    state: 0
>    ERRINJ_VY_SCHED_TIMEOUT:
>      state: 0
>    ERRINJ_WAL_WRITE_PARTIAL:
> @@ -1309,3 +1311,97 @@ s:select()
>  s:drop()
>  ---
>  ...
> +for i = 1, 100 do box.space.test:insert{i} end
> +---
> +...
> +-- Create a temporary space.
> +count = 500

Please is there a chance this test writes less than 50MB of data
to the database? A couple of order of magnitudes would be really
nice.

The patch itself is good, of course.

> +---
> +...
> +pad = string.rep('x', 100 * 1024)
> +---
> +...
> +_ = box.schema.space.create('tmp', {temporary = true})
> +---
> +...
> +_ = box.space.tmp:create_index('pk')
> +---
> +...
> +for i = 1, count do box.space.tmp:insert{i, pad} end
> +---
-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
  2018-06-08  3:56 ` Konstantin Osipov
@ 2018-06-08  6:39   ` Vladimir Davydov
  2018-06-14  8:23     ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-08  6:39 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > Since tuples stored in temporary spaces are never written to disk, we
> > can always delete them immediately, even when a snapshot is in progress.
> > 
> > Closes #3432
> 
> OK to push.

Where to? 1.9 or 1.10?

The ticket is on 1.10 milestone, but there's a comment that says it
should be fixed in 1.9 ...

> > https://github.com/tarantool/tarantool/issues/3432
> > https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples

> > @@ -1309,3 +1311,97 @@ s:select()
> >  s:drop()
> >  ---
> >  ...
> > +for i = 1, 100 do box.space.test:insert{i} end
> > +---
> > +...
> > +-- Create a temporary space.
> > +count = 500
> 
> Please is there a chance this test writes less than 50MB of data
> to the database? A couple of order of magnitudes would be really
> nice.

The problem is tarantool won't start if the memory limit is < 40 MB.
So if we want to use less memory in this test, say 10 MB, we will have
to set memory limit to 40 + 10 MB, but that would raise questions, as
10 + 10 MB should fit in 50 MB. So I decided to insert 50 MB of data and
set limit 100 MB - this is clear, because 50 + 50 MB won't fit in 100 MB
no matter how much memory the core needs to start.

Anyway, the test runs with wal_mode=none so it should be fast.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
  2018-06-08  6:39   ` Vladimir Davydov
@ 2018-06-14  8:23     ` Vladimir Davydov
  2018-06-14 12:57       ` Konstantin Osipov
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-14  8:23 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > Since tuples stored in temporary spaces are never written to disk, we
> > > can always delete them immediately, even when a snapshot is in progress.
> > > 
> > > Closes #3432
> > 
> > OK to push.
> 
> Where to? 1.9 or 1.10?
> 
> The ticket is on 1.10 milestone, but there's a comment that says it
> should be fixed in 1.9 ...

ping

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
  2018-06-14  8:23     ` Vladimir Davydov
@ 2018-06-14 12:57       ` Konstantin Osipov
  2018-06-14 13:29         ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2018-06-14 12:57 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches

* Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/14 11:28]:
> On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> > On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > > Since tuples stored in temporary spaces are never written to disk, we
> > > > can always delete them immediately, even when a snapshot is in progress.
> > > > 
> > > > Closes #3432
> > > 
> > > OK to push.
> > 
> > Where to? 1.9 or 1.10?
> > 
> > The ticket is on 1.10 milestone, but there's a comment that says it
> > should be fixed in 1.9 ...
> 
> ping

1.9


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
  2018-06-14 12:57       ` Konstantin Osipov
@ 2018-06-14 13:29         ` Vladimir Davydov
  0 siblings, 0 replies; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-14 13:29 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Thu, Jun 14, 2018 at 03:57:59PM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/14 11:28]:
> > On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> > > On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > > > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > > > Since tuples stored in temporary spaces are never written to disk, we
> > > > > can always delete them immediately, even when a snapshot is in progress.
> > > > > 
> > > > > Closes #3432
> > > > 
> > > > OK to push.
> > > 
> > > Where to? 1.9 or 1.10?
> > > 
> > > The ticket is on 1.10 milestone, but there's a comment that says it
> > > should be fixed in 1.9 ...
> > 
> > ping
> 
> 1.9

Pushed.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-14 13:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-04 11:42 [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Vladimir Davydov
2018-06-08  3:56 ` Konstantin Osipov
2018-06-08  6:39   ` Vladimir Davydov
2018-06-14  8:23     ` Vladimir Davydov
2018-06-14 12:57       ` Konstantin Osipov
2018-06-14 13:29         ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox