[Tarantool-patches] [PATCH v3 3/3] vinyl: clean-up write iterator if vy_task_write_run() fails
Aleksandr Lyapunov
alyapunov at tarantool.org
Wed May 6 13:37:57 MSK 2020
It's very strange, as I see start() method already makes its own cleanup
in case of failure and does not require 'stop' call in this case.
If it does not do it correctly - it must be fixed (or removed completely,
but it seems to be very serious change).
On 4/27/20 3:52 AM, Nikita Pettik wrote:
> In vy_task_write_run if write_iterator->start() fails, there's no call
> of corresponding stop() method. vy_task_write_run() is executed in
> auxiliary thread (dump or compaction). Meanwhile, creating/destroying
> tuples in these threads does not change reference counter of corresponding
> tuple formats (see vy_tuple_delete() and vy_stmt_alloc()).
> Without cleaning up write iterator right in vy_task_write_run() after
> fail, this procedure takes place in vy_task_compaction_abort() and
> vy_task_dump_abort(). These *_abort() functions in turn are executed in
> the main thread. Taking this into consideration, tuple may be allocated
> in aux. thread and deleted in the main thread. As a result, format
> reference counter decreases, whereas it shouldn't change (otherwise
> tuple format will be destroyed before all tuples of this format are gone).
>
> Real example of the bug described above can be achieved in the following
> way:
> 1. run compaction process;
> 2. add one or more slice sources in vy_write_iterator_start():
> corresponding slice_stream structures obtain newly created tuples
> in vy_slice_stream_next();
> 3. the next call of vy_write_iterator_add_src() fails due to OOM,
> invalid run file or whatever;
> 4. since there's no clean-up of tuples in slice streams, they are
> destroyed in vy_task_compaction_abort() in the main thread exit;
> 5. now format reference counter is less than it was before compaction.
>
> Closes #4864
> ---
> src/box/vy_scheduler.c | 4 +-
> src/box/vy_write_iterator.c | 9 ++
> src/errinj.h | 1 +
> test/box/errinj.result | 1 +
> .../gh-4864-stmt-alloc-fail-compact.result | 85 +++++++++++++++++++
> .../gh-4864-stmt-alloc-fail-compact.test.lua | 44 ++++++++++
> 6 files changed, 143 insertions(+), 1 deletion(-)
>
> diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
> index 9dba93d34..387f58723 100644
> --- a/src/box/vy_scheduler.c
> +++ b/src/box/vy_scheduler.c
> @@ -1065,8 +1065,10 @@ vy_task_write_run(struct vy_task *task, bool no_compression)
> no_compression) != 0)
> goto fail;
>
> - if (wi->iface->start(wi) != 0)
> + if (wi->iface->start(wi) != 0) {
> + wi->iface->stop(wi);
> goto fail_abort_writer;
> + }
> int rc;
> int loops = 0;
> struct tuple *stmt = NULL;
> diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
> index efb88d1ae..0b741b3dc 100644
> --- a/src/box/vy_write_iterator.c
> +++ b/src/box/vy_write_iterator.c
> @@ -400,6 +400,15 @@ vy_write_iterator_start(struct vy_stmt_stream *vstream)
> rlist_foreach_entry_safe(src, &stream->src_list, in_src_list, tmp) {
> if (vy_write_iterator_add_src(stream, src) != 0)
> return -1;
> +#ifndef NDEBUG
> + struct errinj *inj =
> + errinj(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL);
> + if (inj != NULL && inj->bparam) {
> + inj->bparam = false;
> + diag_set(OutOfMemory, 666, "malloc", "struct vy_stmt");
> + return -1;
> + }
> +#endif
> }
> return 0;
> }
> diff --git a/src/errinj.h b/src/errinj.h
> index b7550bb5e..8562aab1c 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -129,6 +129,7 @@ struct errinj {
> _(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
> _(ERRINJ_VY_STMT_ALLOC, ERRINJ_INT, {.iparam = -1})\
> _(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
> + _(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL, {.bparam = false})\
>
> ENUM0(errinj_id, ERRINJ_LIST);
> extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index e1b9fbe2a..2a87b5f33 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -78,6 +78,7 @@ evals
> - ERRINJ_VY_SQUASH_TIMEOUT: 0
> - ERRINJ_VY_STMT_ALLOC: -1
> - ERRINJ_VY_TASK_COMPLETE: false
> + - ERRINJ_VY_WRITE_ITERATOR_START_FAIL: false
> - ERRINJ_WAL_BREAK_LSN: -1
> - ERRINJ_WAL_DELAY: false
> - ERRINJ_WAL_FALLOCATE: 0
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> index af116a4b4..ea8dce0ba 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> @@ -242,6 +242,91 @@ s:drop()
> | ---
> | ...
>
> +-- Make sure that there's no extra format unref due to tuple
> +-- clean-up in the main thread. To achieve this let's sabotage
> +-- compaction process and delete all tuples: in case ref/unref
> +-- is the same, format will be deleted alongside with the last
> +-- tuple.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> + | ---
> + | ...
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> + | ---
> + | ...
> +
> +dump(true)
> + | ---
> + | ...
> +dump()
> + | ---
> + | ...
> +
> +compact()
> + | ---
> + | ...
> +
> +dump()
> + | ---
> + | ...
> +assert(s.index.pk:stat().range_count == 1)
> + | ---
> + | - true
> + | ...
> +assert(s.index.pk:stat().run_count == 2)
> + | ---
> + | - true
> + | ...
> +
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', true)
> + | ---
> + | - ok
> + | ...
> +-- Prevent next attempt to compact in a row.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 1)
> + | ---
> + | - ok
> + | ...
> +
> +s.index.pk:compact()
> + | ---
> + | ...
> +-- Leave a time gap between compaction and index drop just in case
> +-- (to make sure that compaction is already finished (re-scheduled)
> +-- when at the moment of index drop).
> +--
> +fiber.sleep(0.5)
> + | ---
> + | ...
> +
> +-- Drop is required to unref all tuples.
> +--
> +s:drop()
> + | ---
> + | ...
> +-- After index is dropped, not all tuples are deallocated at once:
> +-- they may be still referenced (while being pushed) in Lua. So
> +-- invoke GC explicitly.
> +--
> +collectgarbage("collect")
> + | ---
> + | - 0
> + | ...
> +-- Give GC some time to operate on.
> +--
> +fiber.sleep(1)
> + | ---
> + | ...
> +
> +assert(errinj.get('ERRINJ_VY_WRITE_ITERATOR_START_FAIL') == false)
> + | ---
> + | - true
> + | ...
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', false)
> + | ---
> + | - ok
> + | ...
> errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
> | ---
> | - ok
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> index a68c73d32..3c2b38160 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> @@ -105,4 +105,48 @@ assert(errinj.get('ERRINJ_VY_READ_VIEW_MERGE_FAIL') == false)
> errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', false)
> s:drop()
>
> +-- Make sure that there's no extra format unref due to tuple
> +-- clean-up in the main thread. To achieve this let's sabotage
> +-- compaction process and delete all tuples: in case ref/unref
> +-- is the same, format will be deleted alongside with the last
> +-- tuple.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> +
> +dump(true)
> +dump()
> +
> +compact()
> +
> +dump()
> +assert(s.index.pk:stat().range_count == 1)
> +assert(s.index.pk:stat().run_count == 2)
> +
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', true)
> +-- Prevent next attempt to compact in a row.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 1)
> +
> +s.index.pk:compact()
> +-- Leave a time gap between compaction and index drop just in case
> +-- (to make sure that compaction is already finished (re-scheduled)
> +-- when at the moment of index drop).
> +--
> +fiber.sleep(0.5)
> +
> +-- Drop is required to unref all tuples.
> +--
> +s:drop()
> +-- After index is dropped, not all tuples are deallocated at once:
> +-- they may be still referenced (while being pushed) in Lua. So
> +-- invoke GC explicitly.
> +--
> +collectgarbage("collect")
> +-- Give GC some time to operate on.
> +--
> +fiber.sleep(1)
> +
> +assert(errinj.get('ERRINJ_VY_WRITE_ITERATOR_START_FAIL') == false)
> +errinj.set('ERRINJ_VY_WRITE_ITERATOR_START_FAIL', false)
> errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
More information about the Tarantool-patches
mailing list