* [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
@ 2018-06-04 11:42 Vladimir Davydov
2018-06-08 3:56 ` Konstantin Osipov
0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-04 11:42 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Since tuples stored in temporary spaces are never written to disk, we
can always delete them immediately, even when a snapshot is in progress.
Closes #3432
---
https://github.com/tarantool/tarantool/issues/3432
https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
src/box/memtx_engine.c | 8 +++-
src/box/memtx_space.c | 1 +
src/box/tuple_format.c | 1 +
src/box/tuple_format.h | 6 +++
src/errinj.h | 1 +
test/box/errinj.result | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
test/box/errinj.test.lua | 44 ++++++++++++++++++++++
7 files changed, 156 insertions(+), 1 deletion(-)
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index fac84ce1..675ebb59 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
static int
checkpoint_write_row(struct xlog *l, struct xrow_header *row)
{
+ struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
+ ERRINJ_DOUBLE);
+ if (errinj != NULL && errinj->dparam > 0)
+ usleep(errinj->dparam * 1000000);
+
static ev_tstamp last = 0;
if (last == 0) {
ev_now_update(loop());
@@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
struct memtx_tuple *memtx_tuple =
container_of(tuple, struct memtx_tuple, base);
if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
- memtx_tuple->version == memtx->snapshot_version)
+ memtx_tuple->version == memtx->snapshot_version ||
+ format->temporary)
smfree(&memtx->alloc, memtx_tuple, total);
else
smfree_delayed(&memtx->alloc, memtx_tuple, total);
diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
index f17df58c..aef7e788 100644
--- a/src/box/memtx_space.c
+++ b/src/box/memtx_space.c
@@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
return NULL;
}
format->engine = memtx;
+ format->temporary = def->opts.temporary;
format->exact_field_count = def->exact_field_count;
tuple_format_ref(format);
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 277d9e7f..486646ea 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
format->vtab = *vtab;
format->engine = NULL;
format->extra_size = extra_size;
+ format->temporary = false;
if (tuple_format_register(format) < 0) {
tuple_format_destroy(format);
free(format);
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index d8c898b8..9da9be3e 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -124,6 +124,12 @@ struct tuple_format {
/** Reference counter */
int refs;
/**
+ * Tuples of this format belong to a temporary space and
+ * hence can be freed immediately while checkpointing is
+ * in progress.
+ */
+ bool temporary;
+ /**
* The number of extra bytes to reserve in tuples before
* field map.
* \sa struct tuple
diff --git a/src/errinj.h b/src/errinj.h
index ab578274..4998fdcd 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -112,6 +112,7 @@ struct errinj {
_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
+ _(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index e25a4594..d3765f11 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -20,6 +20,8 @@ errinj.info()
state: false
ERRINJ_VYRUN_DATA_READ:
state: false
+ ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
+ state: 0
ERRINJ_VY_SCHED_TIMEOUT:
state: 0
ERRINJ_WAL_WRITE_PARTIAL:
@@ -1309,3 +1311,97 @@ s:select()
s:drop()
---
...
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+---
+- true
+...
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+fiber = require('fiber')
+---
+...
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+---
+...
+_ = box.space.test:create_index('pk')
+---
+...
+for i = 1, 100 do box.space.test:insert{i} end
+---
+...
+-- Create a temporary space.
+count = 500
+---
+...
+pad = string.rep('x', 100 * 1024)
+---
+...
+_ = box.schema.space.create('tmp', {temporary = true})
+---
+...
+_ = box.space.tmp:create_index('pk')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+-- Start background snapshot.
+c = fiber.channel(1)
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+---
+- ok
+...
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+---
+...
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+---
+...
+_ = collectgarbage('collect')
+---
+...
+for i = 1, count do box.space.tmp:insert{i, pad} end
+---
+...
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+---
+- ok
+...
+c:get()
+---
+- true
+...
+box.space.tmp:drop()
+---
+...
+box.space.test:drop()
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("cleanup server test")
+---
+- true
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index f2ed823b..188c65d0 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -447,3 +447,47 @@ errinj.set('ERRINJ_WAL_IO', false)
for i = 1, 10 do s:replace{i + 10} end
s:select()
s:drop()
+
+--
+-- gh-3432: check that deletion of temporary tuples is not delayed
+-- if snapshot is in progress.
+--
+test_run:cmd("create server test with script='box/lua/cfg_memory.lua'")
+test_run:cmd(string.format("start server test with args='%d'", 100 * 1024 * 1024))
+test_run:cmd("switch test")
+
+fiber = require('fiber')
+
+-- Create a persistent space.
+_ = box.schema.space.create('test')
+_ = box.space.test:create_index('pk')
+for i = 1, 100 do box.space.test:insert{i} end
+
+-- Create a temporary space.
+count = 500
+pad = string.rep('x', 100 * 1024)
+_ = box.schema.space.create('tmp', {temporary = true})
+_ = box.space.tmp:create_index('pk')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+-- Start background snapshot.
+c = fiber.channel(1)
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0.01)
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+
+-- Overwrite data stored in the temporary space while snapshot
+-- is in progress to make sure that tuples stored in it are freed
+-- immediately.
+for i = 1, count do box.space.tmp:delete{i} end
+_ = collectgarbage('collect')
+for i = 1, count do box.space.tmp:insert{i, pad} end
+
+box.error.injection.set('ERRINJ_SNAP_WRITE_ROW_TIMEOUT', 0)
+c:get()
+
+box.space.tmp:drop()
+box.space.test:drop()
+
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
--
2.11.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
2018-06-04 11:42 [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Vladimir Davydov
@ 2018-06-08 3:56 ` Konstantin Osipov
2018-06-08 6:39 ` Vladimir Davydov
0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2018-06-08 3:56 UTC (permalink / raw)
To: Vladimir Davydov; +Cc: tarantool-patches
* Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> Since tuples stored in temporary spaces are never written to disk, we
> can always delete them immediately, even when a snapshot is in progress.
>
> Closes #3432
OK to push.
> ---
> https://github.com/tarantool/tarantool/issues/3432
> https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
>
> src/box/memtx_engine.c | 8 +++-
> src/box/memtx_space.c | 1 +
> src/box/tuple_format.c | 1 +
> src/box/tuple_format.h | 6 +++
> src/errinj.h | 1 +
> test/box/errinj.result | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
> test/box/errinj.test.lua | 44 ++++++++++++++++++++++
> 7 files changed, 156 insertions(+), 1 deletion(-)
>
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index fac84ce1..675ebb59 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
> static int
> checkpoint_write_row(struct xlog *l, struct xrow_header *row)
> {
> + struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
> + ERRINJ_DOUBLE);
> + if (errinj != NULL && errinj->dparam > 0)
> + usleep(errinj->dparam * 1000000);
> +
> static ev_tstamp last = 0;
> if (last == 0) {
> ev_now_update(loop());
> @@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
> struct memtx_tuple *memtx_tuple =
> container_of(tuple, struct memtx_tuple, base);
> if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
> - memtx_tuple->version == memtx->snapshot_version)
> + memtx_tuple->version == memtx->snapshot_version ||
> + format->temporary)
> smfree(&memtx->alloc, memtx_tuple, total);
> else
> smfree_delayed(&memtx->alloc, memtx_tuple, total);
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index f17df58c..aef7e788 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
> return NULL;
> }
> format->engine = memtx;
> + format->temporary = def->opts.temporary;
> format->exact_field_count = def->exact_field_count;
> tuple_format_ref(format);
>
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 277d9e7f..486646ea 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
> format->vtab = *vtab;
> format->engine = NULL;
> format->extra_size = extra_size;
> + format->temporary = false;
> if (tuple_format_register(format) < 0) {
> tuple_format_destroy(format);
> free(format);
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index d8c898b8..9da9be3e 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -124,6 +124,12 @@ struct tuple_format {
> /** Reference counter */
> int refs;
> /**
> + * Tuples of this format belong to a temporary space and
> + * hence can be freed immediately while checkpointing is
> + * in progress.
> + */
> + bool temporary;
> + /**
> * The number of extra bytes to reserve in tuples before
> * field map.
> * \sa struct tuple
> diff --git a/src/errinj.h b/src/errinj.h
> index ab578274..4998fdcd 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -112,6 +112,7 @@ struct errinj {
> _(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
> _(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
> _(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
> + _(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
>
> ENUM0(errinj_id, ERRINJ_LIST);
> extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e25a4594..d3765f11 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -20,6 +20,8 @@ errinj.info()
> state: false
> ERRINJ_VYRUN_DATA_READ:
> state: false
> + ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
> + state: 0
> ERRINJ_VY_SCHED_TIMEOUT:
> state: 0
> ERRINJ_WAL_WRITE_PARTIAL:
> @@ -1309,3 +1311,97 @@ s:select()
> s:drop()
> ---
> ...
> +for i = 1, 100 do box.space.test:insert{i} end
> +---
> +...
> +-- Create a temporary space.
> +count = 500
Please is there a chance this test writes less than 50MB of data
to the database? A couple of order of magnitudes would be really
nice.
The patch itself is good, of course.
> +---
> +...
> +pad = string.rep('x', 100 * 1024)
> +---
> +...
> +_ = box.schema.space.create('tmp', {temporary = true})
> +---
> +...
> +_ = box.space.tmp:create_index('pk')
> +---
> +...
> +for i = 1, count do box.space.tmp:insert{i, pad} end
> +---
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
2018-06-08 3:56 ` Konstantin Osipov
@ 2018-06-08 6:39 ` Vladimir Davydov
2018-06-14 8:23 ` Vladimir Davydov
0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-08 6:39 UTC (permalink / raw)
To: Konstantin Osipov; +Cc: tarantool-patches
On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > Since tuples stored in temporary spaces are never written to disk, we
> > can always delete them immediately, even when a snapshot is in progress.
> >
> > Closes #3432
>
> OK to push.
Where to? 1.9 or 1.10?
The ticket is on 1.10 milestone, but there's a comment that says it
should be fixed in 1.9 ...
> > https://github.com/tarantool/tarantool/issues/3432
> > https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
> > @@ -1309,3 +1311,97 @@ s:select()
> > s:drop()
> > ---
> > ...
> > +for i = 1, 100 do box.space.test:insert{i} end
> > +---
> > +...
> > +-- Create a temporary space.
> > +count = 500
>
> Please is there a chance this test writes less than 50MB of data
> to the database? A couple of order of magnitudes would be really
> nice.
The problem is tarantool won't start if the memory limit is < 40 MB.
So if we want to use less memory in this test, say 10 MB, we will have
to set memory limit to 40 + 10 MB, but that would raise questions, as
10 + 10 MB should fit in 50 MB. So I decided to insert 50 MB of data and
set limit 100 MB - this is clear, because 50 + 50 MB won't fit in 100 MB
no matter how much memory the core needs to start.
Anyway, the test runs with wal_mode=none so it should be fast.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
2018-06-08 6:39 ` Vladimir Davydov
@ 2018-06-14 8:23 ` Vladimir Davydov
2018-06-14 12:57 ` Konstantin Osipov
0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-14 8:23 UTC (permalink / raw)
To: Konstantin Osipov; +Cc: tarantool-patches
On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > Since tuples stored in temporary spaces are never written to disk, we
> > > can always delete them immediately, even when a snapshot is in progress.
> > >
> > > Closes #3432
> >
> > OK to push.
>
> Where to? 1.9 or 1.10?
>
> The ticket is on 1.10 milestone, but there's a comment that says it
> should be fixed in 1.9 ...
ping
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
2018-06-14 8:23 ` Vladimir Davydov
@ 2018-06-14 12:57 ` Konstantin Osipov
2018-06-14 13:29 ` Vladimir Davydov
0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2018-06-14 12:57 UTC (permalink / raw)
To: Vladimir Davydov; +Cc: tarantool-patches
* Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/14 11:28]:
> On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> > On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > > Since tuples stored in temporary spaces are never written to disk, we
> > > > can always delete them immediately, even when a snapshot is in progress.
> > > >
> > > > Closes #3432
> > >
> > > OK to push.
> >
> > Where to? 1.9 or 1.10?
> >
> > The ticket is on 1.10 milestone, but there's a comment that says it
> > should be fixed in 1.9 ...
>
> ping
1.9
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot
2018-06-14 12:57 ` Konstantin Osipov
@ 2018-06-14 13:29 ` Vladimir Davydov
0 siblings, 0 replies; 6+ messages in thread
From: Vladimir Davydov @ 2018-06-14 13:29 UTC (permalink / raw)
To: Konstantin Osipov; +Cc: tarantool-patches
On Thu, Jun 14, 2018 at 03:57:59PM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/14 11:28]:
> > On Fri, Jun 08, 2018 at 09:39:15AM +0300, Vladimir Davydov wrote:
> > > On Fri, Jun 08, 2018 at 06:56:36AM +0300, Konstantin Osipov wrote:
> > > > * Vladimir Davydov <vdavydov.dev@gmail.com> [18/06/04 23:48]:
> > > > > Since tuples stored in temporary spaces are never written to disk, we
> > > > > can always delete them immediately, even when a snapshot is in progress.
> > > > >
> > > > > Closes #3432
> > > >
> > > > OK to push.
> > >
> > > Where to? 1.9 or 1.10?
> > >
> > > The ticket is on 1.10 milestone, but there's a comment that says it
> > > should be fixed in 1.9 ...
> >
> > ping
>
> 1.9
Pushed.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-06-14 13:29 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-04 11:42 [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Vladimir Davydov
2018-06-08 3:56 ` Konstantin Osipov
2018-06-08 6:39 ` Vladimir Davydov
2018-06-14 8:23 ` Vladimir Davydov
2018-06-14 12:57 ` Konstantin Osipov
2018-06-14 13:29 ` Vladimir Davydov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox