From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@dev.tarantool.org, gorcunov@gmail.com Subject: [Tarantool-patches] [PATCH 1/1] txn_limbo: panic when synchro WAL write fails Date: Thu, 30 Jul 2020 01:39:05 +0200 [thread overview] Message-ID: <04de5b4a0f7859909155cb9f52e9fd45a338a0e8.1596065870.git.v.shpilevoy@tarantool.org> (raw) CONFIRM and ROLLBACK go to WAL. Their WAL write can fail just like any other WAL write. However it is not clear what to do in that case, especially in case of ROLLBACK fail. The patch adds panic() stub so as to at least terminate the instance. Before the patch it would work like nothing happened, with undefined behaviour. Closes #5159 --- Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5159-qsync-panic Issue: https://github.com/tarantool/tarantool/issues/5159 @ChangeLog * Instance will terminate if a synchronous transaction confirmation or rollback fail. Before it was undefined behaviour (gh-5159). src/box/txn_limbo.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index b6725ae21..4593abef2 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -178,7 +178,7 @@ txn_limbo_assign_lsn(struct txn_limbo *limbo, struct txn_limbo_entry *entry, txn_limbo_assign_remote_lsn(limbo, entry, lsn); } -static int +static void txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn); int @@ -258,7 +258,7 @@ complete: return 0; } -static int +static void txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn, bool is_confirm) { @@ -271,7 +271,7 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn, struct txn *txn = txn_begin(); if (txn == NULL) - return -1; + goto rollback; int res = 0; if (is_confirm) { @@ -299,23 +299,32 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn, goto rollback; if (txn_commit_stmt(txn, &request) != 0) goto rollback; + if (txn_commit(txn) != 0) + goto rollback; + return; - return txn_commit(txn); rollback: - txn_rollback(txn); - return -1; + /* + * XXX: the stub is supposed to be removed once it is defined what to do + * when a synchro request WAL write fails. One of the possible + * solutions: log the error, keep the limbo queue as is and probably put + * in rollback mode. Then provide a hook to call manually when WAL + * problems are fixed. Or retry automatically with some period. + */ + panic("Could write a synchro request to WAL: lsn = %lld, type = %s\n", + lsn, is_confirm ? "CONFIRM" : "ROLLBACK"); } /** * Write a confirmation entry to WAL. After it's written all the * transactions waiting for confirmation may be finished. */ -static int +static void txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn) { assert(lsn > limbo->confirmed_lsn); limbo->confirmed_lsn = lsn; - return txn_limbo_write_confirm_rollback(limbo, lsn, true); + txn_limbo_write_confirm_rollback(limbo, lsn, true); } void @@ -361,15 +370,14 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn) * transactions following the current one and waiting for * confirmation must be rolled back. */ -static int +static void txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn) { assert(lsn > limbo->confirmed_lsn); assert(!limbo->is_in_rollback); limbo->is_in_rollback = true; - int rc = txn_limbo_write_confirm_rollback(limbo, lsn, false); + txn_limbo_write_confirm_rollback(limbo, lsn, false); limbo->is_in_rollback = false; - return rc; } void @@ -445,13 +453,7 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn) } if (confirm_lsn == -1 || confirm_lsn <= limbo->confirmed_lsn) return; - if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) { - // TODO: what to do here?. - // We already failed writing the CONFIRM - // message. What are the chances we'll be - // able to write ROLLBACK? - return; - } + txn_limbo_write_confirm(limbo, confirm_lsn); txn_limbo_read_confirm(limbo, confirm_lsn); } @@ -587,10 +589,7 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo) } } if (confirm_lsn > limbo->confirmed_lsn) { - if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) { - panic("Couldn't write CONFIRM to WAL"); - return; - } + txn_limbo_write_confirm(limbo, confirm_lsn); txn_limbo_read_confirm(limbo, confirm_lsn); } /* -- 2.21.1 (Apple Git-122.3)
next reply other threads:[~2020-07-29 23:39 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-07-29 23:39 Vladislav Shpilevoy [this message] 2020-07-30 8:49 ` Cyrill Gorcunov 2020-07-30 19:57 ` Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=04de5b4a0f7859909155cb9f52e9fd45a338a0e8.1596065870.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=gorcunov@gmail.com \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH 1/1] txn_limbo: panic when synchro WAL write fails' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox