Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org
Subject: [Tarantool-patches] [PATCH v2 19/19] replication: block async transactions when not empty limbo
Date: Tue, 30 Jun 2020 01:15:20 +0200	[thread overview]
Message-ID: <6cce3594cdf04b56e18b73c7f1e9fcaebbbcf215.1593472477.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1593472477.git.v.shpilevoy@tarantool.org>

When there is a not committed synchronous transaction, any attempt
to commit a next transaction should be suspended, even if it is an
async transaction.

This restriction comes from the theoretically possible dependency
of what is written in the async transactions on what was written
in the previous sync transactions.

For that there is a new txn flag - TXN_WAIT_SYNC. Previously the
only synchro replication flag was TXN_WAIT_ACK. And now a
transaction can be sync, but not wait for ACKs.

In particular, if a transaction:

- Is synchronous, the it has TXN_WAIT_SYNC (it is sync), and
  TXN_WAIT_ACK (need to collect ACKs, or get a CONFIRM);

- Is asynchronous, and the limbo was empty and the moment of
  commit, the it does not have any of these flags and committed
  like earlier;

- Is asynchronous, and the limbo was not empty and the moment of
  commit. Then it will have only TXN_WAIT_SYNC. So it will be
  finished right after all the previous sync transactions are
  done. Note: *without waiting for ACKs* - the transaction is
  still asynchronous in a sense that it is don't need to wait for
  quorum replication.

Follow-up #4845
---
 src/box/applier.cc                            |  8 ++
 src/box/txn.c                                 | 16 ++--
 src/box/txn.h                                 |  7 ++
 src/box/txn_limbo.c                           | 49 +++++++++---
 .../sync_replication_sanity.result            | 75 +++++++++++++++++++
 .../sync_replication_sanity.test.lua          | 26 +++++++
 test/unit/snap_quorum_delay.cc                |  6 +-
 7 files changed, 172 insertions(+), 15 deletions(-)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 7e63dc544..7e70211b7 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -280,6 +280,14 @@ process_confirm_rollback(struct request *request, bool is_confirm)
 			 txn_limbo.instance_id);
 		return -1;
 	}
+	assert(txn->n_applier_rows == 0);
+	/*
+	 * This is not really a transaction. It just uses txn API
+	 * to put the data into WAL. And obviously it should not
+	 * go to the limbo and block on the very same sync
+	 * transaction which it tries to confirm now.
+	 */
+	txn_set_flag(txn, TXN_FORCE_ASYNC);
 
 	if (txn_begin_stmt(txn, NULL) != 0)
 		return -1;
diff --git a/src/box/txn.c b/src/box/txn.c
index 37955752a..bc2bb8e11 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -442,7 +442,7 @@ txn_complete(struct txn *txn)
 			engine_rollback(txn->engine, txn);
 		if (txn_has_flag(txn, TXN_HAS_TRIGGERS))
 			txn_run_rollback_triggers(txn, &txn->on_rollback);
-	} else if (!txn_has_flag(txn, TXN_WAIT_ACK)) {
+	} else if (!txn_has_flag(txn, TXN_WAIT_SYNC)) {
 		/* Commit the transaction. */
 		if (txn->engine != NULL)
 			engine_commit(txn->engine, txn);
@@ -552,8 +552,14 @@ txn_journal_entry_new(struct txn *txn)
 	 * space can't be synchronous. So if there is at least one
 	 * synchronous space, the transaction is not local.
 	 */
-	if (is_sync && !txn_has_flag(txn, TXN_FORCE_ASYNC))
-		txn_set_flag(txn, TXN_WAIT_ACK);
+	if (!txn_has_flag(txn, TXN_FORCE_ASYNC)) {
+		if (is_sync) {
+			txn_set_flag(txn, TXN_WAIT_SYNC);
+			txn_set_flag(txn, TXN_WAIT_ACK);
+		} else if (!txn_limbo_is_empty(&txn_limbo)) {
+			txn_set_flag(txn, TXN_WAIT_SYNC);
+		}
+	}
 
 	assert(remote_row == req->rows + txn->n_applier_rows);
 	assert(local_row == remote_row + txn->n_new_rows);
@@ -662,7 +668,7 @@ txn_commit_async(struct txn *txn)
 		return -1;
 	}
 
-	bool is_sync = txn_has_flag(txn, TXN_WAIT_ACK);
+	bool is_sync = txn_has_flag(txn, TXN_WAIT_SYNC);
 	struct txn_limbo_entry *limbo_entry;
 	if (is_sync) {
 		/*
@@ -737,7 +743,7 @@ txn_commit(struct txn *txn)
 		return -1;
 	}
 
-	bool is_sync = txn_has_flag(txn, TXN_WAIT_ACK);
+	bool is_sync = txn_has_flag(txn, TXN_WAIT_SYNC);
 	if (is_sync) {
 		/*
 		 * Remote rows, if any, come before local rows, so
diff --git a/src/box/txn.h b/src/box/txn.h
index c631d7033..c484fcb56 100644
--- a/src/box/txn.h
+++ b/src/box/txn.h
@@ -66,11 +66,18 @@ enum txn_flag {
 	TXN_CAN_YIELD,
 	/** on_commit and/or on_rollback list is not empty. */
 	TXN_HAS_TRIGGERS,
+	/**
+	 * A transaction is either synchronous itself and needs to
+	 * be synced with replicas, or it is async, but is blocked
+	 * by a not yet finished synchronous transaction.
+	 */
+	TXN_WAIT_SYNC,
 	/**
 	 * Transaction, touched sync spaces, enters 'waiting for
 	 * acks' state before commit. In this state it waits until
 	 * it is replicated onto a quorum of replicas, and only
 	 * then finishes commit and returns success to a user.
+	 * TXN_WAIT_SYNC is always set, if TXN_WAIT_ACK is set.
 	 */
 	TXN_WAIT_ACK,
 	/**
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index fbe4dcecf..bfb404e8e 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -47,7 +47,7 @@ txn_limbo_create(struct txn_limbo *limbo)
 struct txn_limbo_entry *
 txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn)
 {
-	assert(txn_has_flag(txn, TXN_WAIT_ACK));
+	assert(txn_has_flag(txn, TXN_WAIT_SYNC));
 	if (id == 0)
 		id = instance_id;
 	if (limbo->instance_id != id) {
@@ -143,7 +143,7 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 	struct txn *txn = entry->txn;
 	assert(entry->lsn > 0);
 	assert(!txn_has_flag(txn, TXN_IS_DONE));
-	assert(txn_has_flag(txn, TXN_WAIT_ACK));
+	assert(txn_has_flag(txn, TXN_WAIT_SYNC));
 	if (txn_limbo_check_complete(limbo, entry)) {
 		txn_limbo_remove(limbo, entry);
 		return 0;
@@ -160,6 +160,7 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 			e->txn->signature = TXN_SIGNATURE_QUORUM_TIMEOUT;
 			txn_limbo_pop(limbo, e);
 			txn_clear_flag(e->txn, TXN_WAIT_ACK);
+			txn_clear_flag(e->txn, TXN_WAIT_SYNC);
 			txn_complete(e->txn);
 			if (e == entry)
 				break;
@@ -179,6 +180,7 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 	}
 	txn_limbo_remove(limbo, entry);
 	txn_clear_flag(txn, TXN_WAIT_ACK);
+	txn_clear_flag(txn, TXN_WAIT_SYNC);
 	return 0;
 }
 
@@ -209,6 +211,13 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo,
 	struct txn *txn = txn_begin();
 	if (txn == NULL)
 		return -1;
+	/*
+	 * This is not really a transaction. It just uses txn API
+	 * to put the data into WAL. And obviously it should not
+	 * go to the limbo and block on the very same sync
+	 * transaction which it tries to confirm now.
+	 */
+	txn_set_flag(txn, TXN_FORCE_ASYNC);
 
 	if (txn_begin_stmt(txn, NULL) != 0)
 		goto rollback;
@@ -238,11 +247,21 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
 	assert(limbo->instance_id != REPLICA_ID_NIL);
 	struct txn_limbo_entry *e, *tmp;
 	rlist_foreach_entry_safe(e, &limbo->queue, in_queue, tmp) {
-		if (e->lsn > lsn)
+		/*
+		 * Confirm a transaction if
+		 * - it is a sync transaction covered by the
+		 *   confirmation LSN;
+		 * - it is an async transaction, and it is the
+		 *   last in the queue. So it does not depend on
+		 *   a not finished sync transaction anymore and
+		 *   can be confirmed too.
+		 */
+		if (e->lsn > lsn && txn_has_flag(e->txn, TXN_WAIT_ACK))
 			break;
 		e->is_commit = true;
 		txn_limbo_remove(limbo, e);
 		txn_clear_flag(e->txn, TXN_WAIT_ACK);
+		txn_clear_flag(e->txn, TXN_WAIT_SYNC);
 		/*
 		 * If  txn_complete_async() was already called,
 		 * finish tx processing. Otherwise just clear the
@@ -277,6 +296,7 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
 		e->is_rollback = true;
 		txn_limbo_pop(limbo, e);
 		txn_clear_flag(e->txn, TXN_WAIT_ACK);
+		txn_clear_flag(e->txn, TXN_WAIT_SYNC);
 		if (e->txn->signature >= 0) {
 			/* Rollback the transaction. */
 			e->txn->signature = TXN_SIGNATURE_SYNC_ROLLBACK;
@@ -307,15 +327,26 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn)
 	struct txn_limbo_entry *e;
 	struct txn_limbo_entry *last_quorum = NULL;
 	rlist_foreach_entry(e, &limbo->queue, in_queue) {
-		if (e->lsn <= prev_lsn)
-			continue;
 		if (e->lsn > lsn)
 			break;
-		if (++e->ack_count >= replication_synchro_quorum) {
-			e->is_commit = true;
-			last_quorum = e;
-		}
+		if (e->lsn <= prev_lsn)
+			continue;
 		assert(e->ack_count <= VCLOCK_MAX);
+		/*
+		 * Sync transactions need to collect acks. Async
+		 * transactions are automatically committed right
+		 * after all the previous sync transactions are.
+		 */
+		if (txn_has_flag(e->txn, TXN_WAIT_ACK)) {
+			if (++e->ack_count < replication_synchro_quorum)
+				continue;
+		} else {
+			assert(txn_has_flag(e->txn, TXN_WAIT_SYNC));
+			if (last_quorum == NULL)
+				continue;
+		}
+		e->is_commit = true;
+		last_quorum = e;
 	}
 	if (last_quorum != NULL) {
 		if (txn_limbo_write_confirm(limbo, last_quorum) != 0) {
diff --git a/test/replication/sync_replication_sanity.result b/test/replication/sync_replication_sanity.result
index 8b37ba6f5..f713d4b08 100644
--- a/test/replication/sync_replication_sanity.result
+++ b/test/replication/sync_replication_sanity.result
@@ -224,6 +224,81 @@ box.space.sync:select{4}
  | - - [4]
  | ...
 
+--
+-- Async transactions should wait for existing sync transactions
+-- finish.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+-- Start 2 fibers, which will execute one right after the other
+-- in the same event loop iteration.
+f = fiber.create(box.space.sync.replace, box.space.sync, {5}) s:replace{5}
+ | ---
+ | ...
+f:status()
+ | ---
+ | - dead
+ | ...
+s:select{5}
+ | ---
+ | - - [5]
+ | ...
+box.space.sync:select{5}
+ | ---
+ | - - [5]
+ | ...
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.test:select{5}
+ | ---
+ | - - [5]
+ | ...
+box.space.sync:select{5}
+ | ---
+ | - - [5]
+ | ...
+-- Ensure sync rollback will affect all pending async transactions
+-- too.
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_timeout = 0.001, replication_synchro_quorum = 3}
+ | ---
+ | ...
+f = fiber.create(box.space.sync.replace, box.space.sync, {6}) s:replace{6}
+ | ---
+ | - error: Quorum collection for a synchronous transaction is timed out
+ | ...
+f:status()
+ | ---
+ | - dead
+ | ...
+s:select{6}
+ | ---
+ | - []
+ | ...
+box.space.sync:select{6}
+ | ---
+ | - []
+ | ...
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.test:select{6}
+ | ---
+ | - []
+ | ...
+box.space.sync:select{6}
+ | ---
+ | - []
+ | ...
+
 -- Cleanup.
 test_run:cmd('switch default')
  | ---
diff --git a/test/replication/sync_replication_sanity.test.lua b/test/replication/sync_replication_sanity.test.lua
index b0326fd4b..f84b6ee19 100644
--- a/test/replication/sync_replication_sanity.test.lua
+++ b/test/replication/sync_replication_sanity.test.lua
@@ -92,6 +92,32 @@ box.space.sync:replace{4}
 test_run:switch('replica')
 box.space.sync:select{4}
 
+--
+-- Async transactions should wait for existing sync transactions
+-- finish.
+--
+test_run:switch('default')
+-- Start 2 fibers, which will execute one right after the other
+-- in the same event loop iteration.
+f = fiber.create(box.space.sync.replace, box.space.sync, {5}) s:replace{5}
+f:status()
+s:select{5}
+box.space.sync:select{5}
+test_run:switch('replica')
+box.space.test:select{5}
+box.space.sync:select{5}
+-- Ensure sync rollback will affect all pending async transactions
+-- too.
+test_run:switch('default')
+box.cfg{replication_synchro_timeout = 0.001, replication_synchro_quorum = 3}
+f = fiber.create(box.space.sync.replace, box.space.sync, {6}) s:replace{6}
+f:status()
+s:select{6}
+box.space.sync:select{6}
+test_run:switch('replica')
+box.space.test:select{6}
+box.space.sync:select{6}
+
 -- Cleanup.
 test_run:cmd('switch default')
 
diff --git a/test/unit/snap_quorum_delay.cc b/test/unit/snap_quorum_delay.cc
index 7a200673a..e6cf381bf 100644
--- a/test/unit/snap_quorum_delay.cc
+++ b/test/unit/snap_quorum_delay.cc
@@ -97,8 +97,12 @@ txn_process_func(va_list ap)
 	enum process_type process_type = (enum process_type)va_arg(ap, int);
 	struct txn *txn = txn_begin();
 	txn->fiber = fiber();
-	/* Set the TXN_WAIT_ACK flag to simulate a sync transaction.*/
+	/*
+	 * Set the TXN_WAIT_ACK + SYNC flags to simulate a sync
+	 * transaction.
+	 */
 	txn_set_flag(txn, TXN_WAIT_ACK);
+	txn_set_flag(txn, TXN_WAIT_SYNC);
 	/*
 	 * The true way to push the transaction to limbo is to call
 	 * txn_commit() for sync transaction. But, if txn_commit()
-- 
2.21.1 (Apple Git-122.3)

  parent reply	other threads:[~2020-06-29 23:15 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1593723973.git.sergeyb@tarantool.org>
2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 01/19] replication: introduce space.is_sync option Vladislav Shpilevoy
2020-06-30 23:00     ` Vladislav Shpilevoy
2020-07-01 15:55       ` Sergey Ostanevich
2020-07-01 23:46         ` Vladislav Shpilevoy
2020-07-02  8:25       ` Serge Petrenko
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 10/19] txn_limbo: add ROLLBACK processing Vladislav Shpilevoy
2020-07-05 15:29     ` Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 11/19] box: rework local_recovery to use async txn_commit Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 12/19] replication: support ROLLBACK and CONFIRM during recovery Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 13/19] replication: add test for synchro CONFIRM/ROLLBACK Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 14/19] applier: remove writer_cond Vladislav Shpilevoy
2020-07-02  9:13     ` Serge Petrenko
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 15/19] applier: send heartbeat not only on commit, but on any write Vladislav Shpilevoy
2020-07-01 23:55     ` Vladislav Shpilevoy
2020-07-03 12:23     ` Serge Petrenko
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 16/19] txn_limbo: add diag_set in txn_limbo_wait_confirm Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 17/19] replication: delay initial join until confirmation Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 18/19] replication: only send confirmed data during final join Vladislav Shpilevoy
2020-06-29 23:15   ` Vladislav Shpilevoy [this message]
2020-07-01 17:12     ` [Tarantool-patches] [PATCH v2 19/19] replication: block async transactions when not empty limbo Sergey Ostanevich
2020-07-01 23:47       ` Vladislav Shpilevoy
2020-07-03 12:28     ` Serge Petrenko
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 02/19] replication: introduce replication_synchro_* cfg options Vladislav Shpilevoy
2020-07-01 16:05     ` Sergey Ostanevich
2020-07-01 23:46       ` Vladislav Shpilevoy
2020-07-02  8:29     ` Serge Petrenko
2020-07-02 23:36       ` Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 03/19] txn: add TXN_WAIT_ACK flag Vladislav Shpilevoy
2020-07-01 17:14     ` Sergey Ostanevich
2020-07-01 23:46     ` Vladislav Shpilevoy
2020-07-02  8:30     ` Serge Petrenko
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 04/19] replication: make sync transactions wait quorum Vladislav Shpilevoy
2020-06-30 23:00     ` Vladislav Shpilevoy
2020-07-02  8:48     ` Serge Petrenko
2020-07-03 21:16       ` Vladislav Shpilevoy
2020-07-05 16:05     ` Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 05/19] xrow: introduce CONFIRM and ROLLBACK entries Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 06/19] txn: introduce various reasons for txn rollback Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 07/19] replication: write and read CONFIRM entries Vladislav Shpilevoy
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 08/19] replication: add support of qsync to the snapshot machinery Vladislav Shpilevoy
2020-07-02  8:52     ` Serge Petrenko
2020-07-08 11:43     ` Leonid Vasiliev
2020-06-29 23:15   ` [Tarantool-patches] [PATCH v2 09/19] txn_limbo: add timeout when waiting for acks Vladislav Shpilevoy
2020-06-29 23:22   ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy
2020-06-30 23:00   ` [Tarantool-patches] [PATCH v2 20/19] replication: add test for quorum 1 Vladislav Shpilevoy
2020-07-03 12:32     ` Serge Petrenko
2020-07-02 21:13   ` [Tarantool-patches] [PATCH 1/4] replication: regression test on gh-5119 [not fixed] sergeyb
2020-07-02 21:13   ` [Tarantool-patches] [PATCH 2/4] replication: add advanced tests for sync replication sergeyb
2020-07-02 22:46     ` Sergey Bronnikov
2020-07-02 23:20     ` Vladislav Shpilevoy
2020-07-06 12:30       ` Sergey Bronnikov
2020-07-06 23:31     ` Vladislav Shpilevoy
2020-07-07 12:12       ` Sergey Bronnikov
2020-07-07 20:57         ` Vladislav Shpilevoy
2020-07-08 12:07           ` Sergey Bronnikov
2020-07-08 22:13             ` Vladislav Shpilevoy
2020-07-09  9:39               ` Sergey Bronnikov
2020-07-02 21:13   ` [Tarantool-patches] [PATCH 3/4] replication: add tests for sync replication with anon replica sergeyb
2020-07-06 23:31     ` Vladislav Shpilevoy
2020-07-02 21:13   ` [Tarantool-patches] [PATCH 4/4] replication: add tests for sync replication with snapshots sergeyb
2020-07-02 22:46     ` Sergey Bronnikov
2020-07-02 23:20     ` Vladislav Shpilevoy
2020-07-06 23:31     ` Vladislav Shpilevoy
2020-07-07 16:00       ` Sergey Bronnikov
2020-07-06 23:31   ` [Tarantool-patches] [PATCH] Add new error injection constant ERRINJ_SYNC_TIMEOUT Vladislav Shpilevoy
2020-07-10  0:50   ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy
2020-07-10  7:40   ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6cce3594cdf04b56e18b73c7f1e9fcaebbbcf215.1593472477.git.v.shpilevoy@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=sergepetrenko@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v2 19/19] replication: block async transactions when not empty limbo' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox