[PATCH] memtx: don't delay deletion of temporary tuples during snapshot
Konstantin Osipov
kostja at tarantool.org
Fri Jun 8 06:56:36 MSK 2018
* Vladimir Davydov <vdavydov.dev at gmail.com> [18/06/04 23:48]:
> Since tuples stored in temporary spaces are never written to disk, we
> can always delete them immediately, even when a snapshot is in progress.
>
> Closes #3432
OK to push.
> ---
> https://github.com/tarantool/tarantool/issues/3432
> https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
>
> src/box/memtx_engine.c | 8 +++-
> src/box/memtx_space.c | 1 +
> src/box/tuple_format.c | 1 +
> src/box/tuple_format.h | 6 +++
> src/errinj.h | 1 +
> test/box/errinj.result | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
> test/box/errinj.test.lua | 44 ++++++++++++++++++++++
> 7 files changed, 156 insertions(+), 1 deletion(-)
>
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index fac84ce1..675ebb59 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
> static int
> checkpoint_write_row(struct xlog *l, struct xrow_header *row)
> {
> + struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
> + ERRINJ_DOUBLE);
> + if (errinj != NULL && errinj->dparam > 0)
> + usleep(errinj->dparam * 1000000);
> +
> static ev_tstamp last = 0;
> if (last == 0) {
> ev_now_update(loop());
> @@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
> struct memtx_tuple *memtx_tuple =
> container_of(tuple, struct memtx_tuple, base);
> if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
> - memtx_tuple->version == memtx->snapshot_version)
> + memtx_tuple->version == memtx->snapshot_version ||
> + format->temporary)
> smfree(&memtx->alloc, memtx_tuple, total);
> else
> smfree_delayed(&memtx->alloc, memtx_tuple, total);
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index f17df58c..aef7e788 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
> return NULL;
> }
> format->engine = memtx;
> + format->temporary = def->opts.temporary;
> format->exact_field_count = def->exact_field_count;
> tuple_format_ref(format);
>
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 277d9e7f..486646ea 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
> format->vtab = *vtab;
> format->engine = NULL;
> format->extra_size = extra_size;
> + format->temporary = false;
> if (tuple_format_register(format) < 0) {
> tuple_format_destroy(format);
> free(format);
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index d8c898b8..9da9be3e 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -124,6 +124,12 @@ struct tuple_format {
> /** Reference counter */
> int refs;
> /**
> + * Tuples of this format belong to a temporary space and
> + * hence can be freed immediately while checkpointing is
> + * in progress.
> + */
> + bool temporary;
> + /**
> * The number of extra bytes to reserve in tuples before
> * field map.
> * \sa struct tuple
> diff --git a/src/errinj.h b/src/errinj.h
> index ab578274..4998fdcd 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -112,6 +112,7 @@ struct errinj {
> _(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
> _(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
> _(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
> + _(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
>
> ENUM0(errinj_id, ERRINJ_LIST);
> extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e25a4594..d3765f11 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -20,6 +20,8 @@ errinj.info()
> state: false
> ERRINJ_VYRUN_DATA_READ:
> state: false
> + ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
> + state: 0
> ERRINJ_VY_SCHED_TIMEOUT:
> state: 0
> ERRINJ_WAL_WRITE_PARTIAL:
> @@ -1309,3 +1311,97 @@ s:select()
> s:drop()
> ---
> ...
> +for i = 1, 100 do box.space.test:insert{i} end
> +---
> +...
> +-- Create a temporary space.
> +count = 500
Please is there a chance this test writes less than 50MB of data
to the database? A couple of order of magnitudes would be really
nice.
The patch itself is good, of course.
> +---
> +...
> +pad = string.rep('x', 100 * 1024)
> +---
> +...
> +_ = box.schema.space.create('tmp', {temporary = true})
> +---
> +...
> +_ = box.space.tmp:create_index('pk')
> +---
> +...
> +for i = 1, count do box.space.tmp:insert{i, pad} end
> +---
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
More information about the Tarantool-patches
mailing list