Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join
@ 2020-06-29 15:32 Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 1/4] [tosquash] move wait_confirm from gc.c to txn_limbo.c Serge Petrenko
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-06-29 15:32 UTC (permalink / raw)
  To: v.shpilevoy, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

https://github.com/tarantool/tarantool/issues/5097
Branch: gh-4842-sync-replication

The first patch is placed after "replication: add support of qsync to
the snapshot machinery". It renames gc_wait_confirm() to
txn_limbo_wait_confirm() and moves the machinery to txn_limbo.c from gc.c

Three other patches are on top of the branch and are pretty self-explanatory.

Serge Petrenko (4):
  [tosquash] move wait_confirm from gc.c to txn_limbo.c
  txn_limbo: add diag_set in txn_limbo_wait_confirm
  replication: delay initial join until confirmation
  replication: only send confirmed data during final join

 src/box/applier.cc  |  9 +++++
 src/box/box.cc      | 33 ++++++++++++++++++
 src/box/gc.c        | 83 ++-------------------------------------------
 src/box/relay.cc    |  7 ++++
 src/box/txn_limbo.c | 81 +++++++++++++++++++++++++++++++++++++++++++
 src/box/txn_limbo.h | 42 +++++++++++++++++++++++
 6 files changed, 175 insertions(+), 80 deletions(-)

-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH 1/4] [tosquash] move wait_confirm from gc.c to txn_limbo.c
  2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
@ 2020-06-29 15:32 ` Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 2/4] txn_limbo: add diag_set in txn_limbo_wait_confirm Serge Petrenko
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-06-29 15:32 UTC (permalink / raw)
  To: v.shpilevoy, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

Move all the machinery for waiting for confirmation of the latest limbo
entry from gc.c to txn_limbo.c

Refactor gc_wait_confirm(void) to txn_limbo_wait_confirm(struct txn_limbo *)
and make it return immediately when the limbo is empty to simplify code
a bit.

Refactoring needed for #5097

[TO BE SQUASHED INTO THE PREVIOUS COMMIT]
---
 src/box/gc.c        | 83 ++-------------------------------------------
 src/box/txn_limbo.c | 74 ++++++++++++++++++++++++++++++++++++++++
 src/box/txn_limbo.h |  7 ++++
 3 files changed, 84 insertions(+), 80 deletions(-)

diff --git a/src/box/gc.c b/src/box/gc.c
index 9b7c510f6..170c0a97f 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -68,23 +68,6 @@ gc_cleanup_fiber_f(va_list);
 static int
 gc_checkpoint_fiber_f(va_list);
 
-/**
- * Waitpoint stores information about the progress of confirmation.
- * In the case of multimaster support, it will store a bitset
- * or array instead of the boolean.
- */
-struct confirm_waitpoint {
-	/**
-	 * Variable for wake up the fiber that is waiting for
-	 * the end of confirmation.
-	 */
-	struct fiber_cond confirm_cond;
-	/**
-	 * Result flag.
-	 */
-	bool is_confirm;
-};
-
 /**
  * Comparator used for ordering gc_consumer objects
  * lexicographically by their vclock in a binary tree.
@@ -396,64 +379,6 @@ gc_add_checkpoint(const struct vclock *vclock)
 	gc_schedule_cleanup();
 }
 
-static int
-gc_txn_commit_cb(struct trigger *trigger, void *event)
-{
-	(void)event;
-	struct confirm_waitpoint *cwp =
-		(struct confirm_waitpoint *)trigger->data;
-	cwp->is_confirm = true;
-	fiber_cond_signal(&cwp->confirm_cond);
-	return 0;
-}
-
-static int
-gc_txn_rollback_cb(struct trigger *trigger, void *event)
-{
-	(void)event;
-	struct confirm_waitpoint *cwp =
-		(struct confirm_waitpoint *)trigger->data;
-	fiber_cond_signal(&cwp->confirm_cond);
-	return 0;
-}
-
-/**
- * Waiting for confirmation of all "sync" transactions
- * during confirm timeout or fail.
- */
-static int
-gc_wait_confirm(void)
-{
-	/* initialization of a waitpoint. */
-	struct confirm_waitpoint cwp;
-	fiber_cond_create(&cwp.confirm_cond);
-	cwp.is_confirm = false;
-
-	/* Set triggers for the last limbo transaction. */
-	struct trigger on_complete;
-	trigger_create(&on_complete, gc_txn_commit_cb, &cwp, NULL);
-	struct trigger on_rollback;
-	trigger_create(&on_rollback, gc_txn_rollback_cb, &cwp, NULL);
-	struct txn_limbo_entry *tle = txn_limbo_last_entry(&txn_limbo);
-	txn_on_commit(tle->txn, &on_complete);
-	txn_on_rollback(tle->txn, &on_rollback);
-
-	int rc = fiber_cond_wait_timeout(&cwp.confirm_cond,
-					 txn_limbo_confirm_timeout(&txn_limbo));
-	fiber_cond_destroy(&cwp.confirm_cond);
-	if (rc != 0) {
-		/* Clear the triggers if the timeout has been reached. */
-		trigger_clear(&on_complete);
-		trigger_clear(&on_rollback);
-		return -1;
-	}
-	if (!cwp.is_confirm) {
-		/* The transaction has been rollbacked. */
-		return -1;
-	}
-	return 0;
-}
-
 static int
 gc_do_checkpoint(bool is_scheduled)
 {
@@ -478,11 +403,9 @@ gc_do_checkpoint(bool is_scheduled)
 	 * Wait the confirms on all "sync" transactions before
 	 * create a snapshot.
 	 */
-	if (!txn_limbo_is_empty(&txn_limbo)) {
-		rc = gc_wait_confirm();
-		if (rc != 0)
-			goto out;
-	}
+	rc = txn_limbo_wait_confirm(&txn_limbo);
+	if (rc != 0)
+		goto out;
 
 	rc = engine_commit_checkpoint(&checkpoint.vclock);
 	if (rc != 0)
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 18a50d4d3..d3751a28b 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -339,6 +339,80 @@ txn_limbo_confirm_timeout(struct txn_limbo *limbo)
 	return replication_synchro_timeout;
 }
 
+/**
+ * Waitpoint stores information about the progress of confirmation.
+ * In the case of multimaster support, it will store a bitset
+ * or array instead of the boolean.
+ */
+struct confirm_waitpoint {
+	/**
+	 * Variable for wake up the fiber that is waiting for
+	 * the end of confirmation.
+	 */
+	struct fiber_cond confirm_cond;
+	/**
+	 * Result flag.
+	 */
+	bool is_confirm;
+};
+
+static int
+txn_commit_cb(struct trigger *trigger, void *event)
+{
+	(void)event;
+	struct confirm_waitpoint *cwp =
+		(struct confirm_waitpoint *)trigger->data;
+	cwp->is_confirm = true;
+	fiber_cond_signal(&cwp->confirm_cond);
+	return 0;
+}
+
+static int
+txn_rollback_cb(struct trigger *trigger, void *event)
+{
+	(void)event;
+	struct confirm_waitpoint *cwp =
+		(struct confirm_waitpoint *)trigger->data;
+	fiber_cond_signal(&cwp->confirm_cond);
+	return 0;
+}
+
+int
+txn_limbo_wait_confirm(struct txn_limbo *limbo)
+{
+	if (txn_limbo_is_empty(limbo))
+		return 0;
+
+	/* initialization of a waitpoint. */
+	struct confirm_waitpoint cwp;
+	fiber_cond_create(&cwp.confirm_cond);
+	cwp.is_confirm = false;
+
+	/* Set triggers for the last limbo transaction. */
+	struct trigger on_complete;
+	trigger_create(&on_complete, txn_commit_cb, &cwp, NULL);
+	struct trigger on_rollback;
+	trigger_create(&on_rollback, txn_rollback_cb, &cwp, NULL);
+	struct txn_limbo_entry *tle = txn_limbo_last_entry(limbo);
+	txn_on_commit(tle->txn, &on_complete);
+	txn_on_rollback(tle->txn, &on_rollback);
+
+	int rc = fiber_cond_wait_timeout(&cwp.confirm_cond,
+					 txn_limbo_confirm_timeout(limbo));
+	fiber_cond_destroy(&cwp.confirm_cond);
+	if (rc != 0) {
+		/* Clear the triggers if the timeout has been reached. */
+		trigger_clear(&on_complete);
+		trigger_clear(&on_rollback);
+		return -1;
+	}
+	if (!cwp.is_confirm) {
+		/* The transaction has been rolled back. */
+		return -1;
+	}
+	return 0;
+}
+
 void
 txn_limbo_init(void)
 {
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 987cf9271..138093c7c 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -198,6 +198,13 @@ txn_limbo_last_entry(struct txn_limbo *limbo)
 double
 txn_limbo_confirm_timeout(struct txn_limbo *limbo);
 
+/**
+ * Waiting for confirmation of all "sync" transactions
+ * during confirm timeout or fail.
+ */
+int
+txn_limbo_wait_confirm(struct txn_limbo *limbo);
+
 void
 txn_limbo_init();
 
-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH 2/4] txn_limbo: add diag_set in txn_limbo_wait_confirm
  2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 1/4] [tosquash] move wait_confirm from gc.c to txn_limbo.c Serge Petrenko
@ 2020-06-29 15:32 ` Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 3/4] replication: delay initial join until confirmation Serge Petrenko
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-06-29 15:32 UTC (permalink / raw)
  To: v.shpilevoy, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

Add failure reason to txn_limbo_wait_confirm

Prerequisite #5097
---
 src/box/txn_limbo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index d3751a28b..abea26731 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -404,10 +404,12 @@ txn_limbo_wait_confirm(struct txn_limbo *limbo)
 		/* Clear the triggers if the timeout has been reached. */
 		trigger_clear(&on_complete);
 		trigger_clear(&on_rollback);
+		diag_set(ClientError, ER_SYNC_QUORUM_TIMEOUT);
 		return -1;
 	}
 	if (!cwp.is_confirm) {
 		/* The transaction has been rolled back. */
+		diag_set(ClientError, ER_SYNC_ROLLBACK);
 		return -1;
 	}
 	return 0;
-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH 3/4] replication: delay initial join until confirmation
  2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 1/4] [tosquash] move wait_confirm from gc.c to txn_limbo.c Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 2/4] txn_limbo: add diag_set in txn_limbo_wait_confirm Serge Petrenko
@ 2020-06-29 15:32 ` Serge Petrenko
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 4/4] replication: only send confirmed data during final join Serge Petrenko
  2020-06-29 21:14 ` [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Vladislav Shpilevoy
  4 siblings, 0 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-06-29 15:32 UTC (permalink / raw)
  To: v.shpilevoy, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

All the data that master sends during the join stage (both initial and
final) is embedded into the first snapshot created on replica, so this
data mustn't contain any unconfirmed or rolled back synchronous
transactions.

Make sure that master starts sending the initial data, which contains a
snapshot-like dump of all the spaces only after the latest synchronous
tx it has is confirmed. In case of rollback, the replica may retry
joining.

Part of #5097
---
 src/box/applier.cc | 9 +++++++++
 src/box/relay.cc   | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 2d1047d43..5895afc31 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -1183,6 +1183,15 @@ applier_f(va_list ap)
 				applier_log_error(applier, e);
 				applier_disconnect(applier, APPLIER_LOADING);
 				goto reconnect;
+			} else if (e->errcode() == ER_SYNC_QUORUM_TIMEOUT ||
+				   e->errcode() == ER_SYNC_ROLLBACK) {
+				/*
+				 * Join failure due to synchronous
+				 * transaction rollback.
+				 */
+				applier_log_error(applier, e);
+				applier_disconnect(applier, APPLIER_LOADING);
+				goto reconnect;
 			} else if (e->errcode() == ER_CFG ||
 				   e->errcode() == ER_ACCESS_DENIED ||
 				   e->errcode() == ER_NO_SUCH_USER ||
diff --git a/src/box/relay.cc b/src/box/relay.cc
index 29588b6ca..a7843a8c2 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -321,6 +321,13 @@ relay_initial_join(int fd, uint64_t sync, struct vclock *vclock)
 	if (wal_sync(vclock) != 0)
 		diag_raise();
 
+	/*
+	 * Start sending data only when the latest sync
+	 * transaction is confirmed.
+	 */
+	if (txn_limbo_wait_confirm(&txn_limbo) != 0)
+		diag_raise();
+
 	/* Respond to the JOIN request with the current vclock. */
 	struct xrow_header row;
 	xrow_encode_vclock_xc(&row, vclock);
-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Tarantool-patches] [PATCH 4/4] replication: only send confirmed data during final join
  2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
                   ` (2 preceding siblings ...)
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 3/4] replication: delay initial join until confirmation Serge Petrenko
@ 2020-06-29 15:32 ` Serge Petrenko
  2020-06-29 21:14 ` [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Vladislav Shpilevoy
  4 siblings, 0 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-06-29 15:32 UTC (permalink / raw)
  To: v.shpilevoy, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

Final join (or register) stage is needed to deliver the replica its
_cluster registration. Since this stage is followed by a snapshot on
replica, the data received during this stage must be confirmed.

Make master check that there are no rollbacks for the data to be sent
during final join and that all the data is confirmed before final join
starts.

Closes #5097
---
 src/box/box.cc      | 33 +++++++++++++++++++++++++++++++++
 src/box/txn_limbo.c |  5 +++++
 src/box/txn_limbo.h | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/src/box/box.cc b/src/box/box.cc
index dfd7dcb5a..abf9d0b2a 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1712,6 +1712,11 @@ box_process_register(struct ev_io *io, struct xrow_header *header)
 	say_info("registering replica %s at %s",
 		 tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
 
+	/* See box_process_join() */
+	txn_limbo_start_recording(&txn_limbo);
+	auto limbo_guard = make_scoped_guard([&] {
+		txn_limbo_stop_recording(&txn_limbo);
+	});
 	struct vclock start_vclock;
 	vclock_copy(&start_vclock, &replicaset.vclock);
 
@@ -1727,6 +1732,14 @@ box_process_register(struct ev_io *io, struct xrow_header *header)
 	struct vclock stop_vclock;
 	vclock_copy(&stop_vclock, &replicaset.vclock);
 
+	if (txn_limbo_got_rollback(&txn_limbo))
+		tnt_raise(ClientError, ER_SYNC_ROLLBACK);
+	txn_limbo_stop_recording(&txn_limbo);
+	limbo_guard.is_active  = false;
+
+	if (txn_limbo_wait_confirm(&txn_limbo) != 0)
+		diag_raise();
+
 	/*
 	 * Feed replica with WALs in range
 	 * (start_vclock, stop_vclock) so that it gets its
@@ -1848,6 +1861,18 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	say_info("joining replica %s at %s",
 		 tt_uuid_str(&instance_uuid), sio_socketname(io->fd));
 
+	/*
+	 * In order to join a replica, master has to make sure it
+	 * doesn't send unconfirmed data. We have to check that
+	 * there are no rolled back transactions between
+	 * start_vclock and stop_vclock, and that the data right
+	 * before stop_vclock is confirmed, before we can proceed
+	 * to final join.
+	 */
+	txn_limbo_start_recording(&txn_limbo);
+	auto limbo_guard = make_scoped_guard([&] {
+		txn_limbo_stop_recording(&txn_limbo);
+	});
 	/*
 	 * Initial stream: feed replica with dirty data from engines.
 	 */
@@ -1869,6 +1894,14 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	struct vclock stop_vclock;
 	vclock_copy(&stop_vclock, &replicaset.vclock);
 
+	if (txn_limbo_got_rollback(&txn_limbo))
+		tnt_raise(ClientError, ER_SYNC_ROLLBACK);
+	txn_limbo_stop_recording(&txn_limbo);
+	limbo_guard.is_active  = false;
+
+	if (txn_limbo_wait_confirm(&txn_limbo) != 0)
+		diag_raise();
+
 	/* Send end of initial stage data marker */
 	struct xrow_header row;
 	xrow_encode_vclock_xc(&row, &stop_vclock);
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index abea26731..fbe4dcecf 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -40,6 +40,8 @@ txn_limbo_create(struct txn_limbo *limbo)
 	rlist_create(&limbo->queue);
 	limbo->instance_id = REPLICA_ID_NIL;
 	vclock_create(&limbo->vclock);
+	limbo->is_recording = false;
+	limbo->got_rollback = false;
 }
 
 struct txn_limbo_entry *
@@ -90,8 +92,11 @@ txn_limbo_pop(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 	assert(!rlist_empty(&entry->in_queue));
 	assert(rlist_last_entry(&limbo->queue, struct txn_limbo_entry,
 				 in_queue) == entry);
+	assert(entry->is_rollback);
 	(void) limbo;
 	rlist_del_entry(entry, in_queue);
+	if (limbo->is_recording)
+		limbo->got_rollback = true;
 }
 
 void
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 138093c7c..a9fc83b0c 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -117,6 +117,13 @@ struct txn_limbo {
 	 * transactions, created on the limbo's owner node.
 	 */
 	struct vclock vclock;
+	/** Set to true when limbo records rollback occurrence. */
+	bool is_recording;
+	/**
+	 * Whether any rollbacks happened during the recording
+	 * period.
+	 */
+	bool got_rollback;
 };
 
 /**
@@ -126,6 +133,34 @@ struct txn_limbo {
  */
 extern struct txn_limbo txn_limbo;
 
+/**
+ * Make limbo remember the occurrence of rollbacks due to failed
+ * quorum collection.
+ */
+static inline void
+txn_limbo_start_recording(struct txn_limbo *limbo)
+{
+	limbo->is_recording = true;
+}
+
+/** Stop the recording of failed quorum collection events. */
+static inline void
+txn_limbo_stop_recording(struct txn_limbo *limbo)
+{
+	limbo->is_recording = false;
+	limbo->got_rollback = false;
+}
+
+/**
+ * Returns true in case the limbo rolled back any tx since the
+ * moment txn_limbo_start_recording() was called.
+ */
+static inline bool
+txn_limbo_got_rollback(struct txn_limbo *limbo)
+{
+	return limbo->got_rollback;
+}
+
 /**
  * Allocate, create, and append a new transaction to the limbo.
  * The limbo entry is allocated on the transaction's region.
-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join
  2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
                   ` (3 preceding siblings ...)
  2020-06-29 15:32 ` [Tarantool-patches] [PATCH 4/4] replication: only send confirmed data during final join Serge Petrenko
@ 2020-06-29 21:14 ` Vladislav Shpilevoy
  4 siblings, 0 replies; 6+ messages in thread
From: Vladislav Shpilevoy @ 2020-06-29 21:14 UTC (permalink / raw)
  To: Serge Petrenko, gorcunov, sergos, lvasiliev; +Cc: tarantool-patches

Hi! Thanks for the patch!

All looks fine at a first glance. I propose to add any fixes as tosquash
commits if any will appear.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-06-29 21:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-29 15:32 [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Serge Petrenko
2020-06-29 15:32 ` [Tarantool-patches] [PATCH 1/4] [tosquash] move wait_confirm from gc.c to txn_limbo.c Serge Petrenko
2020-06-29 15:32 ` [Tarantool-patches] [PATCH 2/4] txn_limbo: add diag_set in txn_limbo_wait_confirm Serge Petrenko
2020-06-29 15:32 ` [Tarantool-patches] [PATCH 3/4] replication: delay initial join until confirmation Serge Petrenko
2020-06-29 15:32 ` [Tarantool-patches] [PATCH 4/4] replication: only send confirmed data during final join Serge Petrenko
2020-06-29 21:14 ` [Tarantool-patches] [PATCH 0/4] make master send only confirmed data during join Vladislav Shpilevoy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox