[PATCH] memtx: don't delay deletion of temporary tuples during snapshot

Konstantin Osipov kostja at tarantool.org
Fri Jun 8 06:56:36 MSK 2018


* Vladimir Davydov <vdavydov.dev at gmail.com> [18/06/04 23:48]:
> Since tuples stored in temporary spaces are never written to disk, we
> can always delete them immediately, even when a snapshot is in progress.
> 
> Closes #3432

OK to push.

> ---
> https://github.com/tarantool/tarantool/issues/3432
> https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples
> 
>  src/box/memtx_engine.c   |  8 +++-
>  src/box/memtx_space.c    |  1 +
>  src/box/tuple_format.c   |  1 +
>  src/box/tuple_format.h   |  6 +++
>  src/errinj.h             |  1 +
>  test/box/errinj.result   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
>  test/box/errinj.test.lua | 44 ++++++++++++++++++++++
>  7 files changed, 156 insertions(+), 1 deletion(-)
> 
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index fac84ce1..675ebb59 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine)
>  static int
>  checkpoint_write_row(struct xlog *l, struct xrow_header *row)
>  {
> +	struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT,
> +				       ERRINJ_DOUBLE);
> +	if (errinj != NULL && errinj->dparam > 0)
> +		usleep(errinj->dparam * 1000000);
> +
>  	static ev_tstamp last = 0;
>  	if (last == 0) {
>  		ev_now_update(loop());
> @@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
>  	struct memtx_tuple *memtx_tuple =
>  		container_of(tuple, struct memtx_tuple, base);
>  	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
> -	    memtx_tuple->version == memtx->snapshot_version)
> +	    memtx_tuple->version == memtx->snapshot_version ||
> +	    format->temporary)
>  		smfree(&memtx->alloc, memtx_tuple, total);
>  	else
>  		smfree_delayed(&memtx->alloc, memtx_tuple, total);
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index f17df58c..aef7e788 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx,
>  		return NULL;
>  	}
>  	format->engine = memtx;
> +	format->temporary = def->opts.temporary;
>  	format->exact_field_count = def->exact_field_count;
>  	tuple_format_ref(format);
>  
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 277d9e7f..486646ea 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
>  	format->vtab = *vtab;
>  	format->engine = NULL;
>  	format->extra_size = extra_size;
> +	format->temporary = false;
>  	if (tuple_format_register(format) < 0) {
>  		tuple_format_destroy(format);
>  		free(format);
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index d8c898b8..9da9be3e 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -124,6 +124,12 @@ struct tuple_format {
>  	/** Reference counter */
>  	int refs;
>  	/**
> +	 * Tuples of this format belong to a temporary space and
> +	 * hence can be freed immediately while checkpointing is
> +	 * in progress.
> +	 */
> +	bool temporary;
> +	/**
>  	 * The number of extra bytes to reserve in tuples before
>  	 * field map.
>  	 * \sa struct tuple
> diff --git a/src/errinj.h b/src/errinj.h
> index ab578274..4998fdcd 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -112,6 +112,7 @@ struct errinj {
>  	_(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
> +	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
>  
>  ENUM0(errinj_id, ERRINJ_LIST);
>  extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e25a4594..d3765f11 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -20,6 +20,8 @@ errinj.info()
>      state: false
>    ERRINJ_VYRUN_DATA_READ:
>      state: false
> +  ERRINJ_SNAP_WRITE_ROW_TIMEOUT:
> +    state: 0
>    ERRINJ_VY_SCHED_TIMEOUT:
>      state: 0
>    ERRINJ_WAL_WRITE_PARTIAL:
> @@ -1309,3 +1311,97 @@ s:select()
>  s:drop()
>  ---
>  ...
> +for i = 1, 100 do box.space.test:insert{i} end
> +---
> +...
> +-- Create a temporary space.
> +count = 500

Please is there a chance this test writes less than 50MB of data
to the database? A couple of order of magnitudes would be really
nice.

The patch itself is good, of course.

> +---
> +...
> +pad = string.rep('x', 100 * 1024)
> +---
> +...
> +_ = box.schema.space.create('tmp', {temporary = true})
> +---
> +...
> +_ = box.space.tmp:create_index('pk')
> +---
> +...
> +for i = 1, count do box.space.tmp:insert{i, pad} end
> +---
-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov



More information about the Tarantool-patches mailing list