[Tarantool-patches] [PATCH v3 2/3] vinyl: clean-up unprocessed read views in *_build_read_views()

Aleksandr Lyapunov alyapunov at tarantool.org
Wed May 6 12:56:52 MSK 2020


This will work, but I think we have to make the patch better.

The cause of the issue is that the state of an iterator is not obvious. 
Is the history
collected or not? Is it merged already? Must it be cleaned up? Even the 
cleanup
is split to two stages - tuple unrefs and region truncate.

Now you propose to call  vy_read_view_stmt_destroy from several places 
that also
unrefs tuples from read view (not only from its history). This makes  
the state
of an iterator even more unpredictable: when vy_write_iterator_stop is 
called
sometimes read views are already cleaned up, sometimes are not.

Let's fix this mess.
1. History is collected (rallocated and refed) only in 
vy_write_iterator_build_history.
2. The history must be destructed after call to 
vy_write_iterator_build_history in any case,
whether an error occurred or not (comment about it?).
3. There must be a method for history destruction with two steps: unref 
all and rtruncate.
4. Only vy_write_iterator_build_read_views must call this method. 
vy_write_iterator_stop
or anybody else shouldn't be concerned about history.
5. vy_write_iterator_build_read_views must not unref resulting tuples 
(entry.stmt) in case of error,
vy_write_iterator_stop must do it.


On 4/27/20 3:52 AM, Nikita Pettik wrote:
> vy_write_iterator->read_views[i].history objects are allocated on
> region (see vy_write_iterator_push_rv()) during building history of the
> given key. However, in case of fail of vy_write_iterator_build_history()
> region is truncated but pointers to vy_write_history objects are not
> nullified. As a result, they may be accessed (for instance while
> finalizing write_iterator object in  vy_write_iterator_stop) which in
> turn may lead to crash, segfaul or disk formatting. The same may happen
> if vy_read_view_merge() fails during processing of read view array.
> Let's clean-up those objects in case of error takes place.
>
> Part of #4864
> ---
>   src/box/vy_write_iterator.c                   |  23 +++-
>   src/errinj.h                                  |   1 +
>   test/box/errinj.result                        |   1 +
>   .../gh-4864-stmt-alloc-fail-compact.result    | 125 ++++++++++++++++++
>   .../gh-4864-stmt-alloc-fail-compact.test.lua  |  53 ++++++++
>   5 files changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
> index 7a6a20627..efb88d1ae 100644
> --- a/src/box/vy_write_iterator.c
> +++ b/src/box/vy_write_iterator.c
> @@ -790,8 +790,11 @@ next_lsn:
>   	 * statement around if this is major compaction, because
>   	 * there's no tuple it could overwrite.
>   	 */
> -	if (rc == 0 && stream->is_last_level &&
> -	    stream->deferred_delete_stmt != NULL) {
> +	if (rc != 0) {
> +		for (int i = 0; i < stream->rv_count; ++i)
> +			vy_read_view_stmt_destroy(&stream->read_views[i]);
> +	} else if (stream->is_last_level &&
> +		   stream->deferred_delete_stmt != NULL) {
>   		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
>   		stream->deferred_delete_stmt = NULL;
>   	}
> @@ -834,6 +837,15 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *hint,
>   		rv->history = NULL;
>   		return 0;
>   	}
> +#ifndef NDEBUG
> +	struct errinj *inj =
> +		errinj(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL);
> +	if (inj != NULL && inj->bparam) {
> +		inj->bparam = false;
> +		diag_set(OutOfMemory, 666, "malloc", "struct vy_stmt");
> +		return -1;
> +	}
> +#endif
>   	/*
>   	 * Two possible hints to remove the current UPSERT.
>   	 * 1. If the stream is working on the last level, we
> @@ -983,8 +995,13 @@ vy_write_iterator_build_read_views(struct vy_write_iterator *stream, int *count)
>   		if (rv->history == NULL)
>   			continue;
>   		if (vy_read_view_merge(stream, hint, rv,
> -				       is_first_insert) != 0)
> +				       is_first_insert) != 0) {
> +			while (rv >= &stream->read_views[0]) {
> +				vy_read_view_stmt_destroy(rv);
> +				rv--;
> +			}
>   			goto error;
> +		}
>   		assert(rv->history == NULL);
>   		if (rv->tuple == NULL)
>   			continue;
> diff --git a/src/errinj.h b/src/errinj.h
> index 383dafcb5..b7550bb5e 100644
> --- a/src/errinj.h
> +++ b/src/errinj.h
> @@ -128,6 +128,7 @@ struct errinj {
>   	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
>   	_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
>   	_(ERRINJ_VY_STMT_ALLOC, ERRINJ_INT, {.iparam = -1})\
> +	_(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
>   
>   ENUM0(errinj_id, ERRINJ_LIST);
>   extern struct errinj errinjs[];
> diff --git a/test/box/errinj.result b/test/box/errinj.result
> index efbb4e85e..e1b9fbe2a 100644
> --- a/test/box/errinj.result
> +++ b/test/box/errinj.result
> @@ -68,6 +68,7 @@ evals
>     - ERRINJ_VY_READ_PAGE: false
>     - ERRINJ_VY_READ_PAGE_DELAY: false
>     - ERRINJ_VY_READ_PAGE_TIMEOUT: 0
> +  - ERRINJ_VY_READ_VIEW_MERGE_FAIL: false
>     - ERRINJ_VY_RUN_DISCARD: false
>     - ERRINJ_VY_RUN_FILE_RENAME: false
>     - ERRINJ_VY_RUN_WRITE: false
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> index 1afc02bef..af116a4b4 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.result
> @@ -121,3 +121,128 @@ errinj.set('ERRINJ_VY_STMT_ALLOC', -1)
>   s:drop()
>    | ---
>    | ...
> +
> +-- All the same except for delayed vy_stmt_alloc() fail.
> +-- Re-create space for the sake of test purity.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> + | ---
> + | ...
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> + | ---
> + | ...
> +
> +dump(true)
> + | ---
> + | ...
> +dump()
> + | ---
> + | ...
> +
> +compact()
> + | ---
> + | ...
> +
> +dump()
> + | ---
> + | ...
> +
> +errinj = box.error.injection
> + | ---
> + | ...
> +errinj.set('ERRINJ_VY_STMT_ALLOC', 5)
> + | ---
> + | - ok
> + | ...
> +-- Compaction of first range fails, so it is re-scheduled and
> +-- then successfully finishes at the second attempt.
> +--
> +compact()
> + | ---
> + | ...
> +assert(s.index.pk:stat().range_count == 2)
> + | ---
> + | - true
> + | ...
> +assert(s.index.pk:stat().run_count == 2)
> + | ---
> + | - true
> + | ...
> +assert(errinj.get('ERRINJ_VY_STMT_ALLOC') == -1)
> + | ---
> + | - true
> + | ...
> +errinj.set('ERRINJ_VY_STMT_ALLOC', -1)
> + | ---
> + | - ok
> + | ...
> +-- Unthrottle scheduler to allow next dump.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.0001)
> + | ---
> + | - ok
> + | ...
> +
> +s:drop()
> + | ---
> + | ...
> +
> +-- Once again but test that clean-up is made in case
> +-- vy_read_view_merge() fails.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> + | ---
> + | ...
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> + | ---
> + | ...
> +
> +dump(true)
> + | ---
> + | ...
> +dump()
> + | ---
> + | ...
> +
> +compact()
> + | ---
> + | ...
> +
> +dump()
> + | ---
> + | ...
> +
> +errinj = box.error.injection
> + | ---
> + | ...
> +errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', true)
> + | ---
> + | - ok
> + | ...
> +compact()
> + | ---
> + | ...
> +assert(s.index.pk:stat().range_count == 2)
> + | ---
> + | - true
> + | ...
> +assert(s.index.pk:stat().run_count == 2)
> + | ---
> + | - true
> + | ...
> +assert(errinj.get('ERRINJ_VY_READ_VIEW_MERGE_FAIL') == false)
> + | ---
> + | - true
> + | ...
> +errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', false)
> + | ---
> + | - ok
> + | ...
> +s:drop()
> + | ---
> + | ...
> +
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
> + | ---
> + | - ok
> + | ...
> diff --git a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> index bf70bdf75..a68c73d32 100644
> --- a/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> +++ b/test/vinyl/gh-4864-stmt-alloc-fail-compact.test.lua
> @@ -53,3 +53,56 @@ assert(errinj.get('ERRINJ_VY_STMT_ALLOC') == -1)
>   errinj.set('ERRINJ_VY_STMT_ALLOC', -1)
>   
>   s:drop()
> +
> +-- All the same except for delayed vy_stmt_alloc() fail.
> +-- Re-create space for the sake of test purity.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> +
> +dump(true)
> +dump()
> +
> +compact()
> +
> +dump()
> +
> +errinj = box.error.injection
> +errinj.set('ERRINJ_VY_STMT_ALLOC', 5)
> +-- Compaction of first range fails, so it is re-scheduled and
> +-- then successfully finishes at the second attempt.
> +--
> +compact()
> +assert(s.index.pk:stat().range_count == 2)
> +assert(s.index.pk:stat().run_count == 2)
> +assert(errinj.get('ERRINJ_VY_STMT_ALLOC') == -1)
> +errinj.set('ERRINJ_VY_STMT_ALLOC', -1)
> +-- Unthrottle scheduler to allow next dump.
> +--
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.0001)
> +
> +s:drop()
> +
> +-- Once again but test that clean-up is made in case
> +-- vy_read_view_merge() fails.
> +--
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
> +
> +dump(true)
> +dump()
> +
> +compact()
> +
> +dump()
> +
> +errinj = box.error.injection
> +errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', true)
> +compact()
> +assert(s.index.pk:stat().range_count == 2)
> +assert(s.index.pk:stat().run_count == 2)
> +assert(errinj.get('ERRINJ_VY_READ_VIEW_MERGE_FAIL') == false)
> +errinj.set('ERRINJ_VY_READ_VIEW_MERGE_FAIL', false)
> +s:drop()
> +
> +errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)


More information about the Tarantool-patches mailing list