[Tarantool-patches] [PATCH 12/13] txn: introduce TXN_SIGNATURE_ABORT

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Jun 12 00:56:08 MSK 2021


Sometimes a transaction can fail before it goes to WAL. Then the
signature didn't have any sign of it, as well as the journal_entry
result (which might be not even created yet).

Still if txn_commit/try_async() are called, they invoke
on_rollback triggers. The triggers only can see
TXN_SIGNATURE_ROLLBACK and can't distinguish it from a real
rollback like box.rollback().

Due to that some important errors like a transaction manager
conflict or OOM are lost.

The patch introduces a new error signature TXN_SIGNATURE_ABORT
which says the transaction didn't manage to try going to WAL and
for an error need to look at the global diag.

The next patch is going to stop overriding it with
TXN_SIGNATURE_ROLLBACK.

Part of #6027
---
 src/box/applier.cc     |  4 ++--
 src/box/box.cc         |  8 ++++----
 src/box/memtx_engine.c |  2 +-
 src/box/txn.c          | 26 +++++++++++++++++++++-----
 src/box/txn.h          | 14 ++++++++++++++
 src/box/vy_scheduler.c |  2 +-
 6 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 3fd71393d..10cea26a7 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -261,7 +261,7 @@ apply_snapshot_row(struct xrow_header *row)
 rollback_stmt:
 	txn_rollback_stmt(txn);
 rollback:
-	txn_rollback(txn);
+	txn_abort(txn);
 	fiber_gc();
 	return -1;
 }
@@ -942,7 +942,7 @@ apply_plain_tx(struct stailq *rows, bool skip_conflict, bool use_triggers)
 
 	return txn_commit_try_async(txn);
 fail:
-	txn_rollback(txn);
+	txn_abort(txn);
 	return -1;
 }
 
diff --git a/src/box/box.cc b/src/box/box.cc
index 6fca337bc..750262c04 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -251,7 +251,7 @@ box_process_rw(struct request *request, struct space *space,
 
 rollback:
 	if (is_autocommit) {
-		txn_rollback(txn);
+		txn_abort(txn);
 		fiber_gc();
 	}
 error:
@@ -391,7 +391,7 @@ wal_stream_abort(struct wal_stream *stream)
 {
 	struct txn *tx = in_txn();
 	if (tx != NULL)
-		txn_rollback(tx);
+		txn_abort(tx);
 	stream->tsn = 0;
 	fiber_gc();
 }
@@ -3220,9 +3220,9 @@ local_recovery(const struct tt_uuid *instance_uuid,
 	engine_begin_final_recovery_xc();
 	recover_remaining_wals(recovery, &wal_stream.base, NULL, false);
 	if (wal_stream_has_tx(&wal_stream)) {
-		wal_stream_abort(&wal_stream);
 		diag_set(XlogError, "found a not finished transaction "
 			 "in the log");
+		wal_stream_abort(&wal_stream);
 		if (!is_force_recovery)
 			diag_raise();
 		diag_log();
@@ -3247,9 +3247,9 @@ local_recovery(const struct tt_uuid *instance_uuid,
 		recovery_stop_local(recovery);
 		recover_remaining_wals(recovery, &wal_stream.base, NULL, true);
 		if (wal_stream_has_tx(&wal_stream)) {
-			wal_stream_abort(&wal_stream);
 			diag_set(XlogError, "found a not finished transaction "
 				 "in the log in hot standby mode");
+			wal_stream_abort(&wal_stream);
 			if (!is_force_recovery)
 				diag_raise();
 			diag_log();
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 6c4982b9f..c662a3c8c 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -277,7 +277,7 @@ memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 rollback_stmt:
 	txn_rollback_stmt(txn);
 rollback:
-	txn_rollback(txn);
+	txn_abort(txn);
 	fiber_gc();
 	return -1;
 }
diff --git a/src/box/txn.c b/src/box/txn.c
index b3819b8f9..5cae7b41d 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -264,6 +264,10 @@ diag_set_txn_sign_detailed(const char *file, unsigned line, int64_t signature)
 	case TXN_SIGNATURE_SYNC_ROLLBACK:
 		diag_set_detailed(file, line, ClientError, ER_SYNC_ROLLBACK);
 		return;
+	case TXN_SIGNATURE_ABORT:
+		if (diag_is_empty(diag_get()))
+			panic("Tried to get an absent transaction error");
+		return;
 	}
 	panic("Transaction signature %lld can't be converted to an error "
 	      "at %s:%u", (long long)signature, file, line);
@@ -870,20 +874,20 @@ txn_commit_try_async(struct txn *txn)
 
 rollback:
 	assert(txn->fiber == NULL);
-	txn_rollback(txn);
+	txn_abort(txn);
 	return -1;
 }
 
 int
 txn_commit(struct txn *txn)
 {
-	struct journal_entry *req;
+	struct journal_entry *req = NULL;
 	struct txn_limbo_entry *limbo_entry = NULL;
 
 	txn->fiber = fiber();
 
 	if (txn_prepare(txn) != 0)
-		goto rollback;
+		goto rollback_abort;
 
 	if (txn_commit_nop(txn)) {
 		txn_free(txn);
@@ -892,7 +896,7 @@ txn_commit(struct txn *txn)
 
 	req = txn_journal_entry_new(txn);
 	if (req == NULL)
-		goto rollback;
+		goto rollback_abort;
 	/*
 	 * Do not cache the flag value in a variable. The flag might be deleted
 	 * during WAL write. This can happen for async transactions created
@@ -914,7 +918,7 @@ txn_commit(struct txn *txn)
 		 */
 		limbo_entry = txn_limbo_append(&txn_limbo, origin_id, txn);
 		if (limbo_entry == NULL)
-			goto rollback;
+			goto rollback_abort;
 	}
 
 	fiber_set_txn(fiber(), NULL);
@@ -951,7 +955,10 @@ rollback_io:
 	diag_log();
 	if (txn_has_flag(txn, TXN_WAIT_SYNC))
 		txn_limbo_abort(&txn_limbo, limbo_entry);
+rollback_abort:
+	txn->signature = TXN_SIGNATURE_ABORT;
 rollback:
+	assert(txn->signature != TXN_SIGNATURE_UNKNOWN);
 	assert(txn->fiber != NULL);
 	if (!txn_has_flag(txn, TXN_IS_DONE)) {
 		fiber_set_txn(fiber(), txn);
@@ -985,6 +992,15 @@ txn_rollback(struct txn *txn)
 	fiber_set_txn(fiber(), NULL);
 }
 
+void
+txn_abort(struct txn *txn)
+{
+	assert(!diag_is_empty(diag_get()));
+	assert(txn->signature == TXN_SIGNATURE_UNKNOWN);
+	txn->signature = TXN_SIGNATURE_ABORT;
+	txn_rollback(txn);
+}
+
 int
 txn_check_singlestatement(struct txn *txn, const char *where)
 {
diff --git a/src/box/txn.h b/src/box/txn.h
index 7638854a7..8741dc6a1 100644
--- a/src/box/txn.h
+++ b/src/box/txn.h
@@ -129,6 +129,11 @@ enum {
 	 * read.
 	 */
 	TXN_SIGNATURE_SYNC_ROLLBACK = JOURNAL_ENTRY_ERR_MIN - 3,
+	/**
+	 * Aborted before it could be written due an error which is already
+	 * installed into the global diag.
+	 */
+	TXN_SIGNATURE_ABORT = JOURNAL_ENTRY_ERR_MIN - 4,
 };
 
 /**
@@ -491,6 +496,15 @@ txn_commit(struct txn *txn);
 void
 txn_rollback(struct txn *txn);
 
+/**
+ * Rollback a transaction due to an error which is already installed into the
+ * global diag. This is preferable over the plain rollback when there are
+ * already triggers installed and they might need to know the exact reason for
+ * the rollback.
+ */
+void
+txn_abort(struct txn *txn);
+
 /**
  * Submit a transaction to the journal.
  * @pre txn == in_txn()
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 917b75f93..b3e4120cd 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -920,7 +920,7 @@ vy_deferred_delete_batch_process_f(struct cmsg *cmsg)
 	return;
 
 fail_rollback:
-	txn_rollback(txn);
+	txn_abort(txn);
 	fiber_gc();
 fail:
 	batch->is_failed = true;
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list