Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@dev.tarantool.org, gorcunov@gmail.com
Subject: [Tarantool-patches] [PATCH 1/1] txn_limbo: panic when synchro WAL write fails
Date: Thu, 30 Jul 2020 01:39:05 +0200	[thread overview]
Message-ID: <04de5b4a0f7859909155cb9f52e9fd45a338a0e8.1596065870.git.v.shpilevoy@tarantool.org> (raw)

CONFIRM and ROLLBACK go to WAL. Their WAL write can fail just like
any other WAL write. However it is not clear what to do in that
case, especially in case of ROLLBACK fail.

The patch adds panic() stub so as to at least terminate the
instance. Before the patch it would work like nothing happened,
with undefined behaviour.

Closes #5159
---
Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5159-qsync-panic
Issue: https://github.com/tarantool/tarantool/issues/5159

@ChangeLog
* Instance will terminate if a synchronous transaction confirmation or rollback fail. Before it was undefined behaviour (gh-5159).

 src/box/txn_limbo.c | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index b6725ae21..4593abef2 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -178,7 +178,7 @@ txn_limbo_assign_lsn(struct txn_limbo *limbo, struct txn_limbo_entry *entry,
 		txn_limbo_assign_remote_lsn(limbo, entry, lsn);
 }
 
-static int
+static void
 txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn);
 
 int
@@ -258,7 +258,7 @@ complete:
 	return 0;
 }
 
-static int
+static void
 txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn,
 				 bool is_confirm)
 {
@@ -271,7 +271,7 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn,
 
 	struct txn *txn = txn_begin();
 	if (txn == NULL)
-		return -1;
+		goto rollback;
 
 	int res = 0;
 	if (is_confirm) {
@@ -299,23 +299,32 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn,
 		goto rollback;
 	if (txn_commit_stmt(txn, &request) != 0)
 		goto rollback;
+	if (txn_commit(txn) != 0)
+		goto rollback;
+	return;
 
-	return txn_commit(txn);
 rollback:
-	txn_rollback(txn);
-	return -1;
+	/*
+	 * XXX: the stub is supposed to be removed once it is defined what to do
+	 * when a synchro request WAL write fails. One of the possible
+	 * solutions: log the error, keep the limbo queue as is and probably put
+	 * in rollback mode. Then provide a hook to call manually when WAL
+	 * problems are fixed. Or retry automatically with some period.
+	 */
+	panic("Could write a synchro request to WAL: lsn = %lld, type = %s\n",
+	      lsn, is_confirm ? "CONFIRM" : "ROLLBACK");
 }
 
 /**
  * Write a confirmation entry to WAL. After it's written all the
  * transactions waiting for confirmation may be finished.
  */
-static int
+static void
 txn_limbo_write_confirm(struct txn_limbo *limbo, int64_t lsn)
 {
 	assert(lsn > limbo->confirmed_lsn);
 	limbo->confirmed_lsn = lsn;
-	return txn_limbo_write_confirm_rollback(limbo, lsn, true);
+	txn_limbo_write_confirm_rollback(limbo, lsn, true);
 }
 
 void
@@ -361,15 +370,14 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
  * transactions following the current one and waiting for
  * confirmation must be rolled back.
  */
-static int
+static void
 txn_limbo_write_rollback(struct txn_limbo *limbo, int64_t lsn)
 {
 	assert(lsn > limbo->confirmed_lsn);
 	assert(!limbo->is_in_rollback);
 	limbo->is_in_rollback = true;
-	int rc = txn_limbo_write_confirm_rollback(limbo, lsn, false);
+	txn_limbo_write_confirm_rollback(limbo, lsn, false);
 	limbo->is_in_rollback = false;
-	return rc;
 }
 
 void
@@ -445,13 +453,7 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn)
 	}
 	if (confirm_lsn == -1 || confirm_lsn <= limbo->confirmed_lsn)
 		return;
-	if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) {
-		// TODO: what to do here?.
-		// We already failed writing the CONFIRM
-		// message. What are the chances we'll be
-		// able to write ROLLBACK?
-		return;
-	}
+	txn_limbo_write_confirm(limbo, confirm_lsn);
 	txn_limbo_read_confirm(limbo, confirm_lsn);
 }
 
@@ -587,10 +589,7 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo)
 		}
 	}
 	if (confirm_lsn > limbo->confirmed_lsn) {
-		if (txn_limbo_write_confirm(limbo, confirm_lsn) != 0) {
-			panic("Couldn't write CONFIRM to WAL");
-			return;
-		}
+		txn_limbo_write_confirm(limbo, confirm_lsn);
 		txn_limbo_read_confirm(limbo, confirm_lsn);
 	}
 	/*
-- 
2.21.1 (Apple Git-122.3)

             reply	other threads:[~2020-07-29 23:39 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-29 23:39 Vladislav Shpilevoy [this message]
2020-07-30  8:49 ` Cyrill Gorcunov
2020-07-30 19:57 ` Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=04de5b4a0f7859909155cb9f52e9fd45a338a0e8.1596065870.git.v.shpilevoy@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 1/1] txn_limbo: panic when synchro WAL write fails' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox