[Tarantool-patches] [PATCH 03/13] journal: make journal_write() set diag on error
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sat Jun 12 00:56:11 MSK 2021
It used to simply return -1 and set a diag only when OOM happened
inside.
The caller was forced either to ignore the result or set its own
diag regardless of what really happened.
The patch makes journal_write() set a correct diag error when it
returns -1. The only implementation to change was
wal_write_async(). The other implementations always return 0.
Part of #6027
---
src/box/applier.cc | 4 +++-
src/box/raft.c | 8 +++++---
src/box/txn.c | 14 ++++++++------
src/box/txn_limbo.c | 30 +++++++++++++++++-------------
src/box/wal.c | 2 ++
5 files changed, 35 insertions(+), 23 deletions(-)
diff --git a/src/box/applier.cc b/src/box/applier.cc
index 33181fdbf..60d648795 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -835,7 +835,9 @@ apply_synchro_row(struct xrow_header *row)
* before trying to commit. But that requires extra steps from the
* transactions side, including the async ones.
*/
- if (journal_write(&entry.base) != 0 || entry.base.res < 0) {
+ if (journal_write(&entry.base) != 0)
+ goto err;
+ if (entry.base.res < 0) {
diag_set(ClientError, ER_WAL_IO);
goto err;
}
diff --git a/src/box/raft.c b/src/box/raft.c
index 6b52c9876..55dee4cb1 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -307,17 +307,19 @@ box_raft_write(struct raft *raft, const struct raft_msg *msg)
* follows this pattern of 'protection'.
*/
bool cancellable = fiber_set_cancellable(false);
- bool ok = (journal_write(entry) == 0 && entry->res >= 0);
+ bool is_err = journal_write(entry) != 0;
fiber_set_cancellable(cancellable);
- if (!ok) {
+ if (is_err)
+ goto fail;
+ if (entry->res < 0) {
diag_set(ClientError, ER_WAL_IO);
- diag_log();
goto fail;
}
region_truncate(region, svp);
return;
fail:
+ diag_log();
/*
* XXX: the stub is supposed to be removed once it is defined what to do
* when a raft request WAL write fails.
diff --git a/src/box/txn.c b/src/box/txn.c
index 966dfafdf..761630939 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -847,7 +847,6 @@ txn_commit_try_async(struct txn *txn)
fiber_set_txn(fiber(), NULL);
if (journal_write_try_async(req) != 0) {
fiber_set_txn(fiber(), txn);
- diag_set(ClientError, ER_WAL_IO);
diag_log();
goto rollback;
}
@@ -904,12 +903,11 @@ txn_commit(struct txn *txn)
}
fiber_set_txn(fiber(), NULL);
- if (journal_write(req) != 0 || req->res < 0) {
- if (txn_has_flag(txn, TXN_WAIT_SYNC))
- txn_limbo_abort(&txn_limbo, limbo_entry);
+ if (journal_write(req) != 0)
+ goto rollback_io;
+ if (req->res < 0) {
diag_set(ClientError, ER_WAL_IO);
- diag_log();
- goto rollback;
+ goto rollback_io;
}
if (txn_has_flag(txn, TXN_WAIT_SYNC)) {
if (txn_has_flag(txn, TXN_WAIT_ACK)) {
@@ -934,6 +932,10 @@ txn_commit(struct txn *txn)
txn_free(txn);
return 0;
+rollback_io:
+ diag_log();
+ if (txn_has_flag(txn, TXN_WAIT_SYNC))
+ txn_limbo_abort(&txn_limbo, limbo_entry);
rollback:
assert(txn->fiber != NULL);
if (!txn_has_flag(txn, TXN_IS_DONE)) {
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index dae6d2df4..83b86387c 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -335,21 +335,25 @@ txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn,
journal_entry_create(entry, 1, xrow_approx_len(&row),
journal_entry_fiber_wakeup_cb, fiber());
- if (journal_write(entry) != 0 || entry->res < 0) {
+ if (journal_write(entry) != 0)
+ goto fail;
+ if (entry->res < 0) {
diag_set(ClientError, ER_WAL_IO);
- diag_log();
- /*
- * XXX: the stub is supposed to be removed once it is defined
- * what to do when a synchro request WAL write fails. One of
- * the possible solutions: log the error, keep the limbo
- * queue as is and probably put in rollback mode. Then
- * provide a hook to call manually when WAL problems are fixed.
- * Or retry automatically with some period.
- */
- panic("Could not write a synchro request to WAL: "
- "lsn = %lld, type = %s\n", (long long)lsn,
- iproto_type_name(type));
+ goto fail;
}
+ return;
+fail:
+ diag_log();
+ /*
+ * XXX: the stub is supposed to be removed once it is defined what to do
+ * when a synchro request WAL write fails. One of the possible
+ * solutions: log the error, keep the limbo queue as is and probably put
+ * in rollback mode. Then provide a hook to call manually when WAL
+ * problems are fixed. Or retry automatically with some period.
+ */
+ panic("Could not write a synchro request to WAL: lsn = %lld, "
+ "type = %s\n", (long long)lsn, iproto_type_name(type));
+
}
/**
diff --git a/src/box/wal.c b/src/box/wal.c
index 5c52142ef..25edbace6 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -1244,6 +1244,7 @@ wal_write_async(struct journal *journal, struct journal_entry *entry)
struct wal_writer *writer = (struct wal_writer *) journal;
ERROR_INJECT(ERRINJ_WAL_IO, {
+ diag_set(ClientError, ER_WAL_IO);
goto fail;
});
@@ -1258,6 +1259,7 @@ wal_write_async(struct journal *journal, struct journal_entry *entry)
say_error("Aborting transaction %lld during "
"cascading rollback",
(long long)vclock_sum(&writer->vclock));
+ diag_set(ClientError, ER_CASCADE_ROLLBACK);
goto fail;
}
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list