[Tarantool-patches] [PATCH 2/2] txn: remove TXN_IS_DONE check from txn_commit()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Jul 22 01:42:44 MSK 2020


TXN_IS_DONE was used in txn_commit() to finalize the transaction
in a case it is not finished yet. But this happens only not in
so common cases - during bootstrap and recovery. During normal
operation the transaction is always finished when WAL thread
returns it to TX thread after a disk write.

So this is a matter of journal, and should be solved here, not in
txn code with some crutch, especially in such a hot path place.

This commit makes so that after journal_write() the transaction is
always already done, i.e. txn_complete_async() was called. Nothing
changes for the normal operation mode except this is -1 'if'.
---
 src/box/box.cc      | 36 +++++++++++++++++++++++++++---------
 src/box/journal.c   | 45 +--------------------------------------------
 src/box/journal.h   | 11 -----------
 src/box/txn.c       |  6 ++----
 src/box/wal.c       |  2 +-
 test/unit/suite.ini |  1 +
 6 files changed, 32 insertions(+), 69 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 45340df2e..83eef5d98 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -344,14 +344,6 @@ recovery_journal_write(struct journal *base,
 {
 	struct recovery_journal *journal = (struct recovery_journal *) base;
 	entry->res = vclock_sum(journal->vclock);
-	return 0;
-}
-
-static int
-recovery_journal_write_async(struct journal *base,
-			     struct  journal_entry *entry)
-{
-	recovery_journal_write(base, entry);
 	/*
 	 * Since there're no actual writes, fire a
 	 * journal_async_complete callback right away.
@@ -364,7 +356,7 @@ static void
 recovery_journal_create(struct vclock *v)
 {
 	static struct recovery_journal journal;
-	journal_create(&journal.base, recovery_journal_write_async,
+	journal_create(&journal.base, recovery_journal_write,
 		       txn_complete_async, recovery_journal_write);
 	journal.vclock = v;
 	journal_set(&journal.base);
@@ -2192,6 +2184,20 @@ engine_init()
 	box_set_vinyl_timeout();
 }
 
+/**
+ * Blindly apply whatever comes from bootstrap. This is either a
+ * local snapshot, or received from a remote master. In both cases
+ * it is not WAL - records are not sorted by their commit order,
+ * and don't have headers.
+ */
+static int
+bootstrap_journal_write(struct journal *base, struct journal_entry *entry)
+{
+	entry->res = 0;
+	journal_async_complete(base, entry);
+	return 0;
+}
+
 /**
  * Initialize the first replica of a new replica set.
  */
@@ -2620,6 +2626,15 @@ box_cfg_xc(void)
 		if (!cfg_geti("hot_standby") || checkpoint == NULL)
 			tnt_raise(ClientError, ER_ALREADY_RUNNING, cfg_gets("wal_dir"));
 	}
+
+	struct journal bootstrap_journal;
+	journal_create(&bootstrap_journal, NULL, txn_complete_async,
+		       bootstrap_journal_write);
+	journal_set(&bootstrap_journal);
+	auto bootstrap_journal_guard = make_scoped_guard([] {
+		journal_set(NULL);
+	});
+
 	bool is_bootstrap_leader = false;
 	if (checkpoint != NULL) {
 		/* Recover the instance from the local directory */
@@ -2632,6 +2647,9 @@ box_cfg_xc(void)
 	}
 	fiber_gc();
 
+	bootstrap_journal_guard.is_active = false;
+	assert(current_journal != &bootstrap_journal);
+
 	/*
 	 * Check for correct registration of the instance in _cluster
 	 * The instance won't exist in _cluster space if it is an
diff --git a/src/box/journal.c b/src/box/journal.c
index d84f960d5..f1e89aaa2 100644
--- a/src/box/journal.c
+++ b/src/box/journal.c
@@ -32,50 +32,7 @@
 #include <small/region.h>
 #include <diag.h>
 
-int
-journal_no_write_async(struct journal *journal,
-		       struct journal_entry *entry)
-{
-	(void)journal;
-	assert(false);
-
-	errno = EINVAL;
-	diag_set(SystemError, "write_async wrong context");
-
-	entry->res = -1;
-	return -1;
-}
-
-void
-journal_no_write_async_cb(struct journal_entry *entry)
-{
-	assert(false);
-
-	errno = EINVAL;
-	diag_set(SystemError, "write_async_cb wrong context");
-
-	entry->res = -1;
-}
-
-/**
- * Used to load from a memtx snapshot. LSN is not used,
- * but txn_commit() must work.
- */
-static int
-dummy_journal_write(struct journal *journal, struct journal_entry *entry)
-{
-	(void) journal;
-	entry->res = 0;
-	return 0;
-}
-
-static struct journal dummy_journal = {
-	.write_async	= journal_no_write_async,
-	.write_async_cb	= journal_no_write_async_cb,
-	.write		= dummy_journal_write,
-};
-
-struct journal *current_journal = &dummy_journal;
+struct journal *current_journal = NULL;
 
 struct journal_entry *
 journal_entry_new(size_t n_rows, struct region *region,
diff --git a/src/box/journal.h b/src/box/journal.h
index ebc5cb708..1a10e66c3 100644
--- a/src/box/journal.h
+++ b/src/box/journal.h
@@ -182,17 +182,6 @@ journal_create(struct journal *journal,
 	journal->write		= write;
 }
 
-/**
- * A stub to issue an error in case if asynchronous
- * write is diabled in the backend.
- */
-extern int
-journal_no_write_async(struct journal *journal,
-		       struct journal_entry *entry);
-
-extern void
-journal_no_write_async_cb(struct journal_entry *entry);
-
 static inline bool
 journal_is_initialized(struct journal *journal)
 {
diff --git a/src/box/txn.c b/src/box/txn.c
index bd9fcdbe6..9c21258c5 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -450,6 +450,7 @@ txn_run_wal_write_triggers(struct txn *txn)
 void
 txn_complete(struct txn *txn)
 {
+	assert(!txn_has_flag(txn, TXN_IS_DONE));
 	/*
 	 * Note, engine can be NULL if transaction contains
 	 * IPROTO_NOP or IPROTO_CONFIRM statements.
@@ -846,10 +847,7 @@ txn_commit(struct txn *txn)
 		if (txn_limbo_wait_complete(&txn_limbo, limbo_entry) < 0)
 			goto rollback;
 	}
-	if (!txn_has_flag(txn, TXN_IS_DONE)) {
-		txn->signature = req->res;
-		txn_complete(txn);
-	}
+	assert(txn_has_flag(txn, TXN_IS_DONE));
 	assert(txn->signature >= 0);
 
 	/* Synchronous transactions are freed by the calling fiber. */
diff --git a/src/box/wal.c b/src/box/wal.c
index 8cedf65b7..37a8bd483 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -1304,7 +1304,7 @@ wal_write_none_async(struct journal *journal,
 	vclock_merge(&writer->vclock, &vclock_diff);
 	vclock_copy(&replicaset.vclock, &writer->vclock);
 	entry->res = vclock_sum(&writer->vclock);
-
+	journal_async_complete(journal, entry);
 	return 0;
 }
 
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index d429c95e9..d16ba413b 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,5 +1,6 @@
 [default]
 core = unittest
 description = unit tests
+disabled = snap_quorum_delay.test
 release_disabled = fiber_stack.test swim_errinj.test
 is_parallel = True
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list