[tarantool-patches] [PATCH 3/4] vinyl: abort affected transactions when space is removed from cache

Георгий Кириченко georgy at tarantool.org
Mon Mar 25 08:45:41 MSK 2019


On Sunday, March 24, 2019 12:07:23 AM MSK Vladimir Davydov wrote:
> A DDL operation creates a new struct space container, moving unaffected
> indexes from the old container, then destroying it. The problem is there
> may be a DML request for this space, which was passed the old container
> in the argument list and then yielded on disk read. When it resumes, it
> may try to dereference the old space container, which may have already
> been destroyed. This will most certainly result in a crash.
> 
> To address this problem, we introduce a new space callback, invalidate,
> which is called for the old space on space_cache_replace(). In case of
> vinyl, this callback aborts all transactions involving the space. To
> prevent a DML request from dereferencing a destroyed space, we also make
> the iterator code check the current transaction state after each yield
> and return an error if it was aborted. This should make any DML request
> bail out early without dereferencing the space anymore.
> 
> Closes #3420
> ---
>  src/box/blackhole.c            |   1 +
>  src/box/memtx_space.c          |   1 +
>  src/box/schema.cc              |   3 ++
>  src/box/space.c                |   6 +++
>  src/box/space.h                |  15 ++++++
>  src/box/sysview.c              |   1 +
>  src/box/vinyl.c                |  21 ++++++++
>  src/box/vy_point_lookup.c      |  14 ++++++
>  src/box/vy_read_iterator.c     |  13 +++++
>  test/vinyl/errinj_ddl.result   | 110
> +++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj_ddl.test.lua | 
> 51 +++++++++++++++++++
>  11 files changed, 236 insertions(+)
> 
> diff --git a/src/box/blackhole.c b/src/box/blackhole.c
> index 95064e1f..b69e543a 100644
> --- a/src/box/blackhole.c
> +++ b/src/box/blackhole.c
> @@ -129,6 +129,7 @@ static const struct space_vtab blackhole_space_vtab = {
>  	/* .build_index = */ generic_space_build_index,
>  	/* .swap_index = */ generic_space_swap_index,
>  	/* .prepare_alter = */ generic_space_prepare_alter,
> +	/* .invalidate = */ generic_space_invalidate,
>  };
> 
>  static void
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index 92ec0b30..d8529fe0 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -961,6 +961,7 @@ static const struct space_vtab memtx_space_vtab = {
>  	/* .build_index = */ memtx_space_build_index,
>  	/* .swap_index = */ generic_space_swap_index,
>  	/* .prepare_alter = */ memtx_space_prepare_alter,
> +	/* .invalidate = */ generic_space_invalidate,
>  };
> 
>  struct space *
> diff --git a/src/box/schema.cc b/src/box/schema.cc
> index 74d70d8d..4123dc9e 100644
> --- a/src/box/schema.cc
> +++ b/src/box/schema.cc
> @@ -251,6 +251,9 @@ space_cache_replace(struct space *old_space, struct
> space *new_space) mh_strnptr_del(spaces_by_name, k, NULL);
>  	}
>  	space_cache_version++;
> +
> +	if (old_space != NULL)
> +		space_invalidate(old_space);
>  }
> 
>  /** A wrapper around space_new() for data dictionary spaces. */
> diff --git a/src/box/space.c b/src/box/space.c
> index e140f3d5..1379fa34 100644
> --- a/src/box/space.c
> +++ b/src/box/space.c
> @@ -637,4 +637,10 @@ generic_space_prepare_alter(struct space *old_space,
> struct space *new_space) return 0;
>  }
> 
> +void
> +generic_space_invalidate(struct space *space)
> +{
> +	(void)space;
> +}
> +
>  /* }}} */
> diff --git a/src/box/space.h b/src/box/space.h
> index 211706d2..1bcb1284 100644
> --- a/src/box/space.h
> +++ b/src/box/space.h
> @@ -133,6 +133,14 @@ struct space_vtab {
>  	 */
>  	int (*prepare_alter)(struct space *old_space,
>  			     struct space *new_space);
> +	/**
> +	 * Called right after removing the space from the cache.
> +	 * The engine should abort all transactions involving
> +	 * the space, because the space will be destroyed soon.
> +	 *
> +	 * This function isn't allowed to yield or fail.
> +	 */
> +	void (*invalidate)(struct space *space);
>  };
> 
>  struct space {
> @@ -407,6 +415,12 @@ space_prepare_alter(struct space *old_space, struct
> space *new_space) return new_space->vtab->prepare_alter(old_space,
> new_space);
>  }
> 
> +static inline void
> +space_invalidate(struct space *space)
> +{
> +	return space->vtab->invalidate(space);
> +}
> +
>  static inline bool
>  space_is_memtx(struct space *space) { return space->engine->id == 0; }
> 
> @@ -469,6 +483,7 @@ int generic_space_check_format(struct space *, struct
> tuple_format *); int generic_space_build_index(struct space *, struct index
> *,
>  			      struct tuple_format *);
>  int generic_space_prepare_alter(struct space *, struct space *);
> +void generic_space_invalidate(struct space *);
> 
>  #if defined(__cplusplus)
>  } /* extern "C" */
> diff --git a/src/box/sysview.c b/src/box/sysview.c
> index 63b669d0..f9edd2d0 100644
> --- a/src/box/sysview.c
> +++ b/src/box/sysview.c
> @@ -495,6 +495,7 @@ static const struct space_vtab sysview_space_vtab = {
>  	/* .build_index = */ generic_space_build_index,
>  	/* .swap_index = */ generic_space_swap_index,
>  	/* .prepare_alter = */ generic_space_prepare_alter,
> +	/* .invalidate = */ generic_space_invalidate,
>  };
> 
>  static void
> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
> index a6a5f187..da891025 100644
> --- a/src/box/vinyl.c
> +++ b/src/box/vinyl.c
> @@ -1031,6 +1031,26 @@ vinyl_space_prepare_alter(struct space *old_space,
> struct space *new_space) return 0;
>  }
> 
> +static void
> +vinyl_space_invalidate(struct space *space)
> +{
> +	struct vy_env *env = vy_env(space->engine);
> +	/*
> +	 * Abort all transactions involving the invalidated space.
> +	 * An aborted transaction doesn't allow any DML/DQL requests
> +	 * so the space won't be used anymore and can be safely
> +	 * destroyed.
> +	 *
> +	 * There's a subtle corner case though - a transaction can
> +	 * be reading disk from a DML request right now, with this
> +	 * space passed to it in the argument list. However, it's
> +	 * handled as well: the iterator will return an error as
> +	 * soon as it's done reading disk, which will make the DML
> +	 * request bail out early, without dereferencing the space.
> +	 */
> +	tx_manager_abort_writers_for_ddl(env->xm, space);

As tx_manager_abort_writers_for_ddl ignores applier sessions it wouldn't fix 
the issue. I think a separate function should be introduced.

> +}
> +
>  /**
>   * This function is called after installing on_replace trigger
>   * used for propagating changes done during DDL. It aborts all
> @@ -4543,6 +4563,7 @@ static const struct space_vtab vinyl_space_vtab = {
>  	/* .build_index = */ vinyl_space_build_index,
>  	/* .swap_index = */ vinyl_space_swap_index,
>  	/* .prepare_alter = */ vinyl_space_prepare_alter,
> +	/* .invalidate = */ vinyl_space_invalidate,
>  };
> 
>  static const struct index_vtab vinyl_index_vtab = {
> diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
> index 9f7796eb..9e1f3ca7 100644
> --- a/src/box/vy_point_lookup.c
> +++ b/src/box/vy_point_lookup.c
> @@ -198,6 +198,7 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
>  {
>  	/* All key parts must be set for a point lookup. */
>  	assert(vy_stmt_is_full_key(key, lsm->cmp_def));
> +	assert(tx == NULL || tx->state == VINYL_TX_READY);
> 
>  	*ret = NULL;
>  	double start_time = ev_monotonic_now(loop());
> @@ -239,6 +240,19 @@ restart:
>  		errinj(ERRINJ_VY_POINT_ITER_WAIT, ERRINJ_BOOL)->bparam = false;
>  	});
> 
> +	if (tx != NULL && tx->state == VINYL_TX_ABORT) {
> +		/*
> +		 * The transaction was aborted while we were reading
> +		 * disk. We must stop now and return an error as this
> +		 * function could be called by a DML request aborted
> +		 * by a DDL operation: failing early will prevent it
> +		 * from dereferencing a destroyed space.
> +		 */
> +		diag_set(ClientError, ER_TRANSACTION_CONFLICT);
> +		rc = -1;
> +		goto done;
> +	}
> +
>  	if (mem_list_version != lsm->mem_list_version) {
>  		/*
>  		 * Mem list was changed during yield. This could be rotation
> diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
> index 2864a280..60c43555 100644
> --- a/src/box/vy_read_iterator.c
> +++ b/src/box/vy_read_iterator.c
> @@ -501,6 +501,17 @@ rescan_disk:
>  	}
>  	vy_read_iterator_unpin_slices(itr);
>  	/*
> +	 * The transaction could have been aborted while we were
> +	 * reading disk. We must stop now and return an error as
> +	 * this function could be called by a DML request that
> +	 * was aborted by a DDL operation: failing will prevent
> +	 * it from dereferencing a destroyed space.
> +	 */
> +	if (itr->tx != NULL && itr->tx->state == VINYL_TX_ABORT) {
> +		diag_set(ClientError, ER_TRANSACTION_CONFLICT);
> +		return -1;
> +	}
> +	/*
>  	 * The list of in-memory indexes and/or the range tree could
>  	 * have been modified by dump/compaction while we were fetching
>  	 * data from disk. Restart the iterator if this is the case.
> @@ -842,6 +853,8 @@ vy_read_iterator_track_read(struct vy_read_iterator
> *itr, struct tuple *stmt) NODISCARD int
>  vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result)
>  {
> +	assert(itr->tx == NULL || itr->tx->state == VINYL_TX_READY);
> +
>  	ev_tstamp start_time = ev_monotonic_now(loop());
> 
>  	struct vy_lsm *lsm = itr->lsm;
> diff --git a/test/vinyl/errinj_ddl.result b/test/vinyl/errinj_ddl.result
> index 04cb581f..794125e8 100644
> --- a/test/vinyl/errinj_ddl.result
> +++ b/test/vinyl/errinj_ddl.result
> @@ -682,3 +682,113 @@ box.commit()
>  s:drop()
>  ---
>  ...
> +--
> +-- gh-3420: crash if DML races with DDL.
> +--
> +fiber = require('fiber')
> +---
> +...
> +-- turn off cache for error injection to work
> +default_vinyl_cache = box.cfg.vinyl_cache
> +---
> +...
> +box.cfg{vinyl_cache = 0}
> +---
> +...
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +---
> +...
> +_ = s:create_index('i1')
> +---
> +...
> +_ = s:create_index('i2', {parts = {2, 'unsigned'}})
> +---
> +...
> +_ = s:create_index('i3', {parts = {3, 'unsigned'}})
> +---
> +...
> +_ = s:create_index('i4', {parts = {4, 'unsigned'}})
> +---
> +...
> +for i = 1, 5 do s:replace({i, i, i, i}) end
> +---
> +...
> +box.snapshot()
> +---
> +- ok
> +...
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function async(f)
> +    local ch = fiber.channel(1)
> +    fiber.create(function()
> +        local _, res = pcall(f)
> +        ch:put(res)
> +    end)
> +    return ch
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +-- Issue a few DML requests that require reading disk.
> +-- Stall disk reads.
> +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.01)
> +---
> +- ok
> +...
> +ch1 = async(function() s:insert({1, 1, 1, 1}) end)
> +---
> +...
> +ch2 = async(function() s:replace({2, 3, 2, 3}) end)
> +---
> +...
> +ch3 = async(function() s:update({3}, {{'+', 4, 1}}) end)
> +---
> +...
> +ch4 = async(function() s:upsert({5, 5, 5, 5}, {{'+', 4, 1}}) end)
> +---
> +...
> +-- Execute a DDL operation on the space.
> +s.index.i4:drop()
> +---
> +...
> +-- Resume the DML requests. Check that they have been aborted.
> +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0)
> +---
> +- ok
> +...
> +ch1:get()
> +---
> +- Transaction has been aborted by conflict
> +...
> +ch2:get()
> +---
> +- Transaction has been aborted by conflict
> +...
> +ch3:get()
> +---
> +- Transaction has been aborted by conflict
> +...
> +ch4:get()
> +---
> +- Transaction has been aborted by conflict
> +...
> +s:select()
> +---
> +- - [1, 1, 1, 1]
> +  - [2, 2, 2, 2]
> +  - [3, 3, 3, 3]
> +  - [4, 4, 4, 4]
> +  - [5, 5, 5, 5]
> +...
> +s:drop()
> +---
> +...
> +box.cfg{vinyl_cache = default_vinyl_cache}
> +---
> +...
> diff --git a/test/vinyl/errinj_ddl.test.lua b/test/vinyl/errinj_ddl.test.lua
> index 50dc4aa1..84f33277 100644
> --- a/test/vinyl/errinj_ddl.test.lua
> +++ b/test/vinyl/errinj_ddl.test.lua
> @@ -298,3 +298,54 @@ s:replace{1, 3}
>  box.commit()
> 
>  s:drop()
> +
> +--
> +-- gh-3420: crash if DML races with DDL.
> +--
> +fiber = require('fiber')
> +
> +-- turn off cache for error injection to work
> +default_vinyl_cache = box.cfg.vinyl_cache
> +box.cfg{vinyl_cache = 0}
> +
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +_ = s:create_index('i1')
> +_ = s:create_index('i2', {parts = {2, 'unsigned'}})
> +_ = s:create_index('i3', {parts = {3, 'unsigned'}})
> +_ = s:create_index('i4', {parts = {4, 'unsigned'}})
> +for i = 1, 5 do s:replace({i, i, i, i}) end
> +box.snapshot()
> +
> +test_run:cmd("setopt delimiter ';'")
> +function async(f)
> +    local ch = fiber.channel(1)
> +    fiber.create(function()
> +        local _, res = pcall(f)
> +        ch:put(res)
> +    end)
> +    return ch
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +-- Issue a few DML requests that require reading disk.
> +-- Stall disk reads.
> +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0.01)
> +
> +ch1 = async(function() s:insert({1, 1, 1, 1}) end)
> +ch2 = async(function() s:replace({2, 3, 2, 3}) end)
> +ch3 = async(function() s:update({3}, {{'+', 4, 1}}) end)
> +ch4 = async(function() s:upsert({5, 5, 5, 5}, {{'+', 4, 1}}) end)
> +
> +-- Execute a DDL operation on the space.
> +s.index.i4:drop()
> +
> +-- Resume the DML requests. Check that they have been aborted.
> +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 0)
> +ch1:get()
> +ch2:get()
> +ch3:get()
> +ch4:get()
> +s:select()
> +s:drop()
> +
> +box.cfg{vinyl_cache = default_vinyl_cache}

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20190325/4d0a3d11/attachment.sig>


More information about the Tarantool-patches mailing list