[Tarantool-patches] [PATCH v3 3/3] vinyl: clean-up write iterator if vy_task_write_run() fails

Aleksandr Lyapunov alyapunov at tarantool.org
Wed May 6 13:37:57 MSK 2020


It's very strange, as I see start() method already makes its own cleanup
in case of failure and does not require 'stop' call in this case.
If it does not do it correctly - it must be fixed (or removed completely,
but it seems to be very serious change).

On 4/27/20 3:52 AM, Nikita Pettik wrote:
> In vy_task_write_run if write_iterator->start() fails, there's no call
> of corresponding stop() method. vy_task_write_run() is executed in
> auxiliary thread (dump or compaction). Meanwhile, creating/destroying
> tuples in these threads does not change reference counter of corresponding
> tuple formats (see vy_tuple_delete() and vy_stmt_alloc()).
> Without cleaning up write iterator right in vy_task_write_run() after
> fail, this procedure takes place in vy_task_compaction_abort() and
> vy_task_dump_abort(). These *_abort() functions in turn are executed in
> the main thread. Taking this into consideration, tuple may be allocated
> in aux. thread and deleted in the main thread. As a result, format
> reference counter decreases, whereas it shouldn't change (otherwise
> tuple format will be destroyed before all tuples of this format are gone).
>
> Real example of the bug described above can be achieved in the following
> way:
> 1. run compaction process;
> 2. add one or more slice sources in vy_write_iterator_start():
> corresponding slice_stream structures obtain newly created tuples
> in vy_slice_stream_next();
> 3. the next call of vy_write_iterator_add_src() fails due to OOM,
> invalid run file or whatever;
> 4. since there's no clean-up of tuples in slice streams, they are
> destroyed in vy_task_compaction_abort() in the main thread exit;
> 5. now format reference counter is less than it was before compaction.
>
> Closes #4864
> ---
>   src/box/vy_scheduler.c                        |  4 +-
>   src/box/vy_write_iterator.c                   |  9 ++
>   src/errinj.h                                  |  1 +
>   test/box/errinj.result                        |  1 +
>   .../gh-4864-stmt-alloc-fail-compact.result    | 85 +++++++++++++++++++
>   .../gh-4864-stmt-alloc-fail-compact.test.lua  | 44 ++++++++++
>   6 files changed, 143 insertions(+), 1 deletion(-)
>
> diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
> index 9dba93d34..387f58723 100644
> --- a/src/box/vy_scheduler.c
> +++ b/src/box/vy_scheduler.c
> @@ -1065,8 +1065,10 @@ vy_task_write_run(struct vy_task *task, bool no_compression)
>   				 no_compression) != 0)
>   		goto fail;
>   
> -	if (wi->iface->start(wi) != 0)
> +	if (wi->iface->start(wi) != 0) {
> +		wi->iface->stop(wi);
>   		goto fail_abort_writer;
> +	}
>   	int rc;
>   	int loops = 0;
>   	struct tuple *stmt = NULL;
> diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
> index efb88d1ae..0b741b3dc 100644
> --- a/src/box/vy_write_iterator.c
> +++ b/src/box/vy_write_iterator.c
> @@ -400,6 +400,15 @@ vy_write_iterator_start(struct vy_stmt_stream *vstream)
>   	rlist_foreach_entry_safe(src, &stream->src_list, in_src_list, tmp) {
>   		if (vy_write_iterator_add_src(stream, src) != 0)
>   			return -1;
> +#ifndef NDEBUG
> +		struct errinj *inj =
> +			errinj(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL);
> +		if (inj != NULL && inj->bparam) {
> +			inj->bparam = false;
> +			diag_set(OutOfMemory, 666, "malloc", "struct vy_stmt");
> +			return -1;
> +		}
> +#endif
>   	}
>   	return 0;
>   }
> diff --git a/src/errinj.h b/src/errinj.h
> index b7550bb5e..8562aab1c 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -129,6 +129,7 @@ struct errinj {
>   	_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
>   	_(ERRINJ_VY_STMT_ALLOC, ERRINJ_INT, {.iparam = -1})\
>   	_(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
> +	_(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL, {.bparam = false})\
>   
>   ENUM0(errinj_id, ERRINJ_LIST);
>   extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e1b9fbe2a..2a87b5f33 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -78,6 +78,7 @@ evals
>     - ERRINJ_VY_SQUASH_TIMEOUT: 0
>     - ERRINJ_VY_STMT_ALLOC: -1
>     - ERRINJ_VY_TASK_COMPLETE: false
> +  - ERRINJ_VY_WRITE_ITERATOR_START_FAIL: false
>     - ERRINJ_WAL_BREAK_LSN: -1
>     - ERRINJ_WAL_DELAY: false
>     - ERRINJ_WAL_FALLOCATE: 0
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> index af116a4b4..ea8dce0ba 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> @@ -242,6 +242,91 @@ s:drop()
>    | ---
>    | ...
>   
> +-- Make sure that there's no extra format unref due to tuple
> +-- clean-up in the main thread. To achieve this let's sabotage
> +-- compaction process and delete all tuples: in case ref/unref
> +-- is the same, format will be deleted alongside with the last
> +-- tuple.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> + | ---
> + | ...
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> + | ---
> + | ...
> +
> +dump(true)
> + | ---
> + | ...
> +dump()
> + | ---
> + | ...
> +
> +compact()
> + | ---
> + | ...
> +
> +dump()
> + | ---
> + | ...
> +assert(s.index.pk:stat().range_count == 1)
> + | ---
> + | - true
> + | ...
> +assert(s.index.pk:stat().run_count == 2)
> + | ---
> + | - true
> + | ...
> +
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', true)
> + | ---
> + | - ok
> + | ...
> +-- Prevent next attempt to compact in a row.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 1)
> + | ---
> + | - ok
> + | ...
> +
> +s.index.pk:compact()
> + | ---
> + | ...
> +-- Leave a time gap between compaction and index drop just in case
> +-- (to make sure that compaction is already finished (re-scheduled)
> +--  when at the moment of index drop).
> +--
> +fiber.sleep(0.5)
> + | ---
> + | ...
> +
> +-- Drop is required to unref all tuples.
> +--
> +s:drop()
> + | ---
> + | ...
> +-- After index is dropped, not all tuples are deallocated at once:
> +-- they may be still referenced (while being pushed) in Lua. So
> +-- invoke GC explicitly.
> +--
> +collectgarbage("collect")
> + | ---
> + | - 0
> + | ...
> +-- Give GC some time to operate on.
> +--
> +fiber.sleep(1)
> + | ---
> + | ...
> +
> +assert(errinj.get('ERRINJ_VY_WRITE_ITERATOR_START_FAIL') == false)
> + | ---
> + | - true
> + | ...
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', false)
> + | ---
> + | - ok
> + | ...
>   errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
>    | ---
>    | - ok
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> index a68c73d32..3c2b38160 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> @@ -105,4 +105,48 @@ assert(errinj.get('ERRINJ_VY_READ_VIEW_MERGE_FAIL') == false)
>   errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', false)
>   s:drop()
>   
> +-- Make sure that there's no extra format unref due to tuple
> +-- clean-up in the main thread. To achieve this let's sabotage
> +-- compaction process and delete all tuples: in case ref/unref
> +-- is the same, format will be deleted alongside with the last
> +-- tuple.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> +
> +dump(true)
> +dump()
> +
> +compact()
> +
> +dump()
> +assert(s.index.pk:stat().range_count == 1)
> +assert(s.index.pk:stat().run_count == 2)
> +
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', true)
> +-- Prevent next attempt to compact in a row.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 1)
> +
> +s.index.pk:compact()
> +-- Leave a time gap between compaction and index drop just in case
> +-- (to make sure that compaction is already finished (re-scheduled)
> +--  when at the moment of index drop).
> +--
> +fiber.sleep(0.5)
> +
> +-- Drop is required to unref all tuples.
> +--
> +s:drop()
> +-- After index is dropped, not all tuples are deallocated at once:
> +-- they may be still referenced (while being pushed) in Lua. So
> +-- invoke GC explicitly.
> +--
> +collectgarbage("collect")
> +-- Give GC some time to operate on.
> +--
> +fiber.sleep(1)
> +
> +assert(errinj.get('ERRINJ_VY_WRITE_ITERATOR_START_FAIL') == false)
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', false)
>   errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)


More information about the Tarantool-patches mailing list