From: Konstantin Osipov <kostja@tarantool.org>
To: tarantool-patches@freelists.org
Subject: [tarantool-patches] Re: [PATCH] vinyl: fix assertion while recovering dumped statement
Date: Tue, 21 May 2019 15:21:23 +0300 [thread overview]
Message-ID: <CAPZPwLrhzRd5A6bz1fnhvPZpVo4L-8uFhzKExJ0=xWpLeU3Zzw@mail.gmail.com> (raw)
In-Reply-To: <9566f14ccda537e2e4e4f9bbeb1bf69d1e2cb7f0.1558440623.git.vdavydov.dev@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 14431 bytes --]
Lgtm
On Tue, May 21, 2019, 15:16 Vladimir Davydov <vdavydov.dev@gmail.com> wrote:
> Certain kinds of DML requests don't update secondary indexes, e.g.
> UPDATE that doesn't touch secondary index parts or DELETE for which
> generation of secondary index statements is deferred. For such a request
> vy_is_committed(env, space) may return false on recovery even if it has
> actually been dumped: since such a statement is not dumped for secondary
> indexes, secondary index's vy_lsm::dump_lsn may be less than such
> statement's signature, which makes vy_is_committed() assume that the
> statement hasn't been dumped. Further in the code we have checks that
> ensure that if we execute a request on recovery, it must not be dumped
> for the primary index (as the primary index is always dumped after
> secondary indexes for the sake of recovery), which fires in this case.
>
> To fix that, let's refactor the code basing on the following two facts:
> - Primary index is always updated by a DML request.
> - Primary index may only be dumped after secondary indexes.
>
> Closes #4222
> ---
> https://github.com/tarantool/tarantool/issues/4222
>
> src/box/vinyl.c | 89
> +++++++++++++++++++--------------------------
> test/vinyl/recover.result | 60 ++++++++++++++++++++++++++++++
> test/vinyl/recover.test.lua | 27 ++++++++++++++
> 3 files changed, 125 insertions(+), 51 deletions(-)
>
> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
> index d4929a37..9372e5f7 100644
> --- a/src/box/vinyl.c
> +++ b/src/box/vinyl.c
> @@ -1277,9 +1277,17 @@ vinyl_index_compact(struct index *index)
> * If the LSM tree is going to be dropped or truncated on WAL
> * recovery, there's no point in replaying statements for it,
> * either.
> + *
> + * Note, although we may skip secondary index update in certain
> + * cases (e.g. UPDATE that doesn't touch secondary index parts
> + * or DELETE for which generation of secondary index statement
> + * is deferred), a DML request of any kind always updates the
> + * primary index. Also, we always dump the primary index after
> + * secondary indexes. So we may skip recovery of a DML request
> + * if it has been dumped for the primary index.
> */
> static inline bool
> -vy_is_committed_one(struct vy_env *env, struct vy_lsm *lsm)
> +vy_is_committed(struct vy_env *env, struct vy_lsm *lsm)
> {
> if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL))
> return false;
> @@ -1291,23 +1299,6 @@ vy_is_committed_one(struct vy_env *env, struct
> vy_lsm *lsm)
> }
>
> /**
> - * Check if a request has already been committed to a space.
> - * See also vy_is_committed_one().
> - */
> -static inline bool
> -vy_is_committed(struct vy_env *env, struct space *space)
> -{
> - if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL))
> - return false;
> - for (uint32_t iid = 0; iid < space->index_count; iid++) {
> - struct vy_lsm *lsm = vy_lsm(space->index[iid]);
> - if (!vy_is_committed_one(env, lsm))
> - return false;
> - }
> - return true;
> -}
> -
> -/**
> * Get a full tuple by a tuple read from a secondary index.
> * @param lsm LSM tree from which the tuple was read.
> * @param tx Current transaction.
> @@ -1772,11 +1763,11 @@ static int
> vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
> struct space *space, struct request *request)
> {
> - if (vy_is_committed(env, space))
> - return 0;
> struct vy_lsm *pk = vy_lsm_find(space, 0);
> if (pk == NULL)
> return -1;
> + if (vy_is_committed(env, pk))
> + return 0;
> struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id);
> if (lsm == NULL)
> return -1;
> @@ -1807,7 +1798,7 @@ vy_delete(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> return -1;
> for (uint32_t i = 0; i < space->index_count; i++) {
> struct vy_lsm *lsm = vy_lsm(space->index[i]);
> - if (vy_is_committed_one(env, lsm))
> + if (vy_is_committed(env, lsm))
> continue;
> rc = vy_tx_set(tx, lsm, delete);
> if (rc != 0)
> @@ -1891,7 +1882,7 @@ vy_perform_update(struct vy_env *env, struct vy_tx
> *tx, struct txn_stmt *stmt,
>
> for (uint32_t i = 1; i < space->index_count; ++i) {
> struct vy_lsm *lsm = vy_lsm(space->index[i]);
> - if (vy_is_committed_one(env, lsm))
> + if (vy_is_committed(env, lsm))
> continue;
> if (vy_tx_set_with_colmask(tx, lsm, delete, column_mask)
> != 0)
> goto error;
> @@ -1924,7 +1915,10 @@ vy_update(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> struct space *space, struct request *request)
> {
> assert(tx != NULL && tx->state == VINYL_TX_READY);
> - if (vy_is_committed(env, space))
> + struct vy_lsm *pk = vy_lsm_find(space, 0);
> + if (pk == NULL)
> + return -1;
> + if (vy_is_committed(env, pk))
> return 0;
> struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id);
> if (lsm == NULL)
> @@ -1942,11 +1936,6 @@ vy_update(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> return 0;
>
> /* Apply update operations. */
> - struct vy_lsm *pk = vy_lsm(space->index[0]);
> - assert(pk != NULL);
> - assert(pk->index_id == 0);
> - /* Primary key is dumped last. */
> - assert(!vy_is_committed_one(env, pk));
> uint64_t column_mask = 0;
> const char *new_tuple, *new_tuple_end;
> uint32_t new_size, old_size;
> @@ -2126,7 +2115,10 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> struct space *space, struct request *request)
> {
> assert(tx != NULL && tx->state == VINYL_TX_READY);
> - if (vy_is_committed(env, space))
> + struct vy_lsm *pk = vy_lsm_find(space, 0);
> + if (pk == NULL)
> + return -1;
> + if (vy_is_committed(env, pk))
> return 0;
> /* Check update operations. */
> if (tuple_update_check_ops(region_aligned_alloc_cb, &fiber()->gc,
> @@ -2143,11 +2135,6 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> const char *tuple_end = request->tuple_end;
> const char *ops = request->ops;
> const char *ops_end = request->ops_end;
> - struct vy_lsm *pk = vy_lsm_find(space, 0);
> - if (pk == NULL)
> - return -1;
> - /* Primary key is dumped last. */
> - assert(!vy_is_committed_one(env, pk));
> if (tuple_validate_raw(pk->mem_format, tuple))
> return -1;
>
> @@ -2240,14 +2227,12 @@ static int
> vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
> struct space *space, struct request *request)
> {
> - assert(stmt != NULL);
> + assert(tx != NULL && tx->state == VINYL_TX_READY);
> struct vy_lsm *pk = vy_lsm_find(space, 0);
> if (pk == NULL)
> - /* The space hasn't the primary index. */
> return -1;
> - assert(pk->index_id == 0);
> - /* Primary key is dumped last. */
> - assert(!vy_is_committed_one(env, pk));
> + if (vy_is_committed(env, pk))
> + return 0;
> if (tuple_validate_raw(pk->mem_format, request->tuple))
> return -1;
> /* First insert into the primary index. */
> @@ -2263,7 +2248,7 @@ vy_insert(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
>
> for (uint32_t iid = 1; iid < space->index_count; ++iid) {
> struct vy_lsm *lsm = vy_lsm(space->index[iid]);
> - if (vy_is_committed_one(env, lsm))
> + if (vy_is_committed(env, lsm))
> continue;
> if (vy_tx_set(tx, lsm, stmt->new_tuple) != 0)
> return -1;
> @@ -2290,16 +2275,11 @@ vy_replace(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> struct space *space, struct request *request)
> {
> assert(tx != NULL && tx->state == VINYL_TX_READY);
> - if (vy_is_committed(env, space))
> - return 0;
> - if (request->type == IPROTO_INSERT)
> - return vy_insert(env, tx, stmt, space, request);
> -
> struct vy_lsm *pk = vy_lsm_find(space, 0);
> if (pk == NULL)
> return -1;
> - /* Primary key is dumped last. */
> - assert(!vy_is_committed_one(env, pk));
> + if (vy_is_committed(env, pk))
> + return 0;
>
> /* Validate and create a statement for the new tuple. */
> if (tuple_validate_raw(pk->mem_format, request->tuple))
> @@ -2352,7 +2332,7 @@ vy_replace(struct vy_env *env, struct vy_tx *tx,
> struct txn_stmt *stmt,
> }
> for (uint32_t i = 1; i < space->index_count; i++) {
> struct vy_lsm *lsm = vy_lsm(space->index[i]);
> - if (vy_is_committed_one(env, lsm))
> + if (vy_is_committed(env, lsm))
> continue;
> if (delete != NULL) {
> rc = vy_tx_set(tx, lsm, delete);
> @@ -2376,7 +2356,12 @@ vinyl_space_execute_replace(struct space *space,
> struct txn *txn,
> struct vy_env *env = vy_env(space->engine);
> struct vy_tx *tx = txn->engine_tx;
> struct txn_stmt *stmt = txn_current_stmt(txn);
> - if (vy_replace(env, tx, stmt, space, request))
> + int rc;
> + if (request->type == IPROTO_INSERT)
> + rc = vy_insert(env, tx, stmt, space, request);
> + else
> + rc = vy_replace(env, tx, stmt, space, request);
> + if (rc != 0)
> return -1;
> *result = stmt->new_tuple;
> return 0;
> @@ -3300,6 +3285,8 @@ vinyl_space_apply_initial_join_row(struct space
> *space, struct request *request)
> int rc = -1;
> switch (request->type) {
> case IPROTO_INSERT:
> + rc = vy_insert(env, tx, &stmt, space, request);
> + break;
> case IPROTO_REPLACE:
> rc = vy_replace(env, tx, &stmt, space, request);
> break;
> @@ -4488,7 +4475,7 @@ vy_deferred_delete_on_rollback(struct trigger
> *trigger, void *event)
> * to restore deferred DELETE statements that haven't been dumped
> * to disk. To skip deferred DELETEs that have been dumped, we
> * use the same technique we employ for normal WAL statements,
> - * i.e. we filter them by LSN, see vy_is_committed_one(). To do
> + * i.e. we filter them by LSN, see vy_is_committed(). To do
> * that, we need to account the LSN of a WAL row that generated
> * a deferred DELETE statement to vy_lsm::dump_lsn, so we install
> * an on_commit trigger that propagates the LSN of the WAL row to
> @@ -4575,7 +4562,7 @@ vy_deferred_delete_on_replace(struct trigger
> *trigger, void *event)
> struct tuple *region_stmt = NULL;
> for (uint32_t i = 1; i < space->index_count; i++) {
> struct vy_lsm *lsm = vy_lsm(space->index[i]);
> - if (vy_is_committed_one(env, lsm))
> + if (vy_is_committed(env, lsm))
> continue;
> /*
> * As usual, rotate the active in-memory index if
> diff --git a/test/vinyl/recover.result b/test/vinyl/recover.result
> index a6313f22..345a019d 100644
> --- a/test/vinyl/recover.result
> +++ b/test/vinyl/recover.result
> @@ -479,3 +479,63 @@ test_run:cmd('cleanup server force_recovery')
> ---
> - true
> ...
> +--
> +-- gh-4222: assertion failure while recovering dumped statement
> +-- that isn't present in secondary index.
> +--
> +test_run:cmd('create server test with script = "vinyl/low_quota.lua"')
> +---
> +- true
> +...
> +test_run:cmd('start server test with args="1048576"')
> +---
> +- true
> +...
> +test_run:cmd('switch test')
> +---
> +- true
> +...
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +---
> +...
> +pk = s:create_index('primary')
> +---
> +...
> +sk = s:create_index('secondary', {unique = false, parts = {2,
> 'unsigned'}})
> +---
> +...
> +s:insert{1, 1, 1}
> +---
> +- [1, 1, 1]
> +...
> +box.snapshot()
> +---
> +- ok
> +...
> +for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end
> +---
> +...
> +test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end)
> +---
> +- true
> +...
> +sk:stat().disk.dump.count -- 1
> +---
> +- 1
> +...
> +test_run:cmd('restart server test with args="1048576"')
> +box.space.test:drop()
> +---
> +...
> +test_run:cmd('switch default')
> +---
> +- true
> +...
> +test_run:cmd('stop server test')
> +---
> +- true
> +...
> +test_run:cmd('cleanup server test')
> +---
> +- true
> +...
> diff --git a/test/vinyl/recover.test.lua b/test/vinyl/recover.test.lua
> index 1ed266a1..7bdba178 100644
> --- a/test/vinyl/recover.test.lua
> +++ b/test/vinyl/recover.test.lua
> @@ -164,3 +164,30 @@ sum
> test_run:cmd('switch default')
> test_run:cmd('stop server force_recovery')
> test_run:cmd('cleanup server force_recovery')
> +
> +--
> +-- gh-4222: assertion failure while recovering dumped statement
> +-- that isn't present in secondary index.
> +--
> +test_run:cmd('create server test with script = "vinyl/low_quota.lua"')
> +test_run:cmd('start server test with args="1048576"')
> +test_run:cmd('switch test')
> +
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +pk = s:create_index('primary')
> +sk = s:create_index('secondary', {unique = false, parts = {2,
> 'unsigned'}})
> +
> +s:insert{1, 1, 1}
> +box.snapshot()
> +
> +for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end
> +test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end)
> +sk:stat().disk.dump.count -- 1
> +
> +test_run:cmd('restart server test with args="1048576"')
> +
> +box.space.test:drop()
> +
> +test_run:cmd('switch default')
> +test_run:cmd('stop server test')
> +test_run:cmd('cleanup server test')
> --
> 2.11.0
>
>
>
[-- Attachment #2: Type: text/html, Size: 17366 bytes --]
prev parent reply other threads:[~2019-05-21 12:21 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-21 12:16 Vladimir Davydov
2019-05-21 12:20 ` Vladimir Davydov
2019-05-21 12:21 ` Konstantin Osipov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAPZPwLrhzRd5A6bz1fnhvPZpVo4L-8uFhzKExJ0=xWpLeU3Zzw@mail.gmail.com' \
--to=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='[tarantool-patches] Re: [PATCH] vinyl: fix assertion while recovering dumped statement' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox