From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Fri, 8 Jun 2018 06:56:36 +0300 From: Konstantin Osipov Subject: Re: [PATCH] memtx: don't delay deletion of temporary tuples during snapshot Message-ID: <20180608035636.GG6866@chai> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: Vladimir Davydov Cc: tarantool-patches@freelists.org List-ID: * Vladimir Davydov [18/06/04 23:48]: > Since tuples stored in temporary spaces are never written to disk, we > can always delete them immediately, even when a snapshot is in progress. > > Closes #3432 OK to push. > --- > https://github.com/tarantool/tarantool/issues/3432 > https://github.com/tarantool/tarantool/commits/gh-3432-memtx-dont-delay-free-temp-tuples > > src/box/memtx_engine.c | 8 +++- > src/box/memtx_space.c | 1 + > src/box/tuple_format.c | 1 + > src/box/tuple_format.h | 6 +++ > src/errinj.h | 1 + > test/box/errinj.result | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ > test/box/errinj.test.lua | 44 ++++++++++++++++++++++ > 7 files changed, 156 insertions(+), 1 deletion(-) > > diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c > index fac84ce1..675ebb59 100644 > --- a/src/box/memtx_engine.c > +++ b/src/box/memtx_engine.c > @@ -463,6 +463,11 @@ memtx_engine_bootstrap(struct engine *engine) > static int > checkpoint_write_row(struct xlog *l, struct xrow_header *row) > { > + struct errinj *errinj = errinj(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, > + ERRINJ_DOUBLE); > + if (errinj != NULL && errinj->dparam > 0) > + usleep(errinj->dparam * 1000000); > + > static ev_tstamp last = 0; > if (last == 0) { > ev_now_update(loop()); > @@ -1138,7 +1143,8 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple) > struct memtx_tuple *memtx_tuple = > container_of(tuple, struct memtx_tuple, base); > if (memtx->alloc.free_mode != SMALL_DELAYED_FREE || > - memtx_tuple->version == memtx->snapshot_version) > + memtx_tuple->version == memtx->snapshot_version || > + format->temporary) > smfree(&memtx->alloc, memtx_tuple, total); > else > smfree_delayed(&memtx->alloc, memtx_tuple, total); > diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c > index f17df58c..aef7e788 100644 > --- a/src/box/memtx_space.c > +++ b/src/box/memtx_space.c > @@ -896,6 +896,7 @@ memtx_space_new(struct memtx_engine *memtx, > return NULL; > } > format->engine = memtx; > + format->temporary = def->opts.temporary; > format->exact_field_count = def->exact_field_count; > tuple_format_ref(format); > > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index 277d9e7f..486646ea 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -270,6 +270,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys, > format->vtab = *vtab; > format->engine = NULL; > format->extra_size = extra_size; > + format->temporary = false; > if (tuple_format_register(format) < 0) { > tuple_format_destroy(format); > free(format); > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > index d8c898b8..9da9be3e 100644 > --- a/src/box/tuple_format.h > +++ b/src/box/tuple_format.h > @@ -124,6 +124,12 @@ struct tuple_format { > /** Reference counter */ > int refs; > /** > + * Tuples of this format belong to a temporary space and > + * hence can be freed immediately while checkpointing is > + * in progress. > + */ > + bool temporary; > + /** > * The number of extra bytes to reserve in tuples before > * field map. > * \sa struct tuple > diff --git a/src/errinj.h b/src/errinj.h > index ab578274..4998fdcd 100644 > --- a/src/errinj.h > +++ b/src/errinj.h > @@ -112,6 +112,7 @@ struct errinj { > _(ERRINJ_HTTPC_EXECUTE, ERRINJ_BOOL, {.bparam = false}) \ > _(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \ > _(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \ > + _(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \ > > ENUM0(errinj_id, ERRINJ_LIST); > extern struct errinj errinjs[]; > diff --git a/test/box/errinj.result b/test/box/errinj.result > index e25a4594..d3765f11 100644 > --- a/test/box/errinj.result > +++ b/test/box/errinj.result > @@ -20,6 +20,8 @@ errinj.info() > state: false > ERRINJ_VYRUN_DATA_READ: > state: false > + ERRINJ_SNAP_WRITE_ROW_TIMEOUT: > + state: 0 > ERRINJ_VY_SCHED_TIMEOUT: > state: 0 > ERRINJ_WAL_WRITE_PARTIAL: > @@ -1309,3 +1311,97 @@ s:select() > s:drop() > --- > ... > +for i = 1, 100 do box.space.test:insert{i} end > +--- > +... > +-- Create a temporary space. > +count = 500 Please is there a chance this test writes less than 50MB of data to the database? A couple of order of magnitudes would be really nice. The patch itself is good, of course. > +--- > +... > +pad = string.rep('x', 100 * 1024) > +--- > +... > +_ = box.schema.space.create('tmp', {temporary = true}) > +--- > +... > +_ = box.space.tmp:create_index('pk') > +--- > +... > +for i = 1, count do box.space.tmp:insert{i, pad} end > +--- -- Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 http://tarantool.io - www.twitter.com/kostja_osipov