[server 1/5] replication: get rid of replica->pause_on_connect flag

Vladimir Davydov vdavydov.dev at gmail.com
Wed Jan 24 20:44:50 MSK 2018


replicaset_connect() leaves appliers that failed to connect within the
specified time period running. To prevent them from entering 'subscribe'
stage prematurely (i.e. before replicaset_follow() is called), we set
replica->pause_on_connect flag, which will force them to freeze upon
successful connection. We clear this flag in replicaset_follow(). This
juggling with flags looks ugly. Instead, let's stop failed appliers in
replicaset_connect() and restart them in replicaset_follow().

Follow-up #2958
---
 src/box/replication.cc | 20 +++++++++++++-------
 src/box/replication.h  |  5 -----
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/box/replication.cc b/src/box/replication.cc
index f901c365..498b9269 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -132,7 +132,6 @@ replica_new(void)
 	replica->gc = NULL;
 	rlist_create(&replica->in_anon);
 	trigger_create(&replica->on_connect, NULL, NULL, NULL);
-	replica->pause_on_connect = false;
 	return replica;
 }
 
@@ -227,7 +226,6 @@ replica_on_receive_uuid(struct trigger *trigger, void *event)
 
 	rlist_del_entry(replica, in_anon);
 
-	bool pause_on_connect = replica->pause_on_connect;
 	if (orig != NULL) {
 		/* Use existing struct replica */
 		orig->applier = applier;
@@ -237,8 +235,6 @@ replica_on_receive_uuid(struct trigger *trigger, void *event)
 		/* Add a new struct replica */
 		replicaset_insert(&replicaset, replica);
 	}
-	if (pause_on_connect)
-		applier_pause(applier);
 }
 
 /**
@@ -280,7 +276,6 @@ replicaset_update(struct applier **appliers, int count)
 			trigger_create(&replica->on_connect,
 				       replica_on_receive_uuid, NULL, NULL);
 			trigger_add(&applier->on_state, &replica->on_connect);
-			replica->pause_on_connect = true;
 			continue;
 		}
 
@@ -445,6 +440,14 @@ replicaset_connect(struct applier **appliers, int count, int quorum,
 	for (int i = 0; i < count; i++) {
 		/* Unregister the temporary trigger used to wake us up */
 		trigger_clear(&triggers[i].base);
+		/*
+		 * Stop appliers that failed to connect.
+		 * They will be restarted once we proceed
+		 * to 'subscribe', see replicaset_follow().
+		 */
+		struct applier *applier = appliers[i];
+		if (applier->state != APPLIER_CONNECTED)
+			applier_stop(applier);
 	}
 
 	/* Now all the appliers are connected, update the replica set. */
@@ -467,11 +470,14 @@ replicaset_follow(void)
 {
 	struct replica *replica;
 	replicaset_foreach(replica) {
+		/* Resume connected appliers. */
 		if (replica->applier != NULL)
 			applier_resume(replica->applier);
 	}
-	rlist_foreach_entry(replica, &anon_replicas, in_anon)
-		replica->pause_on_connect = false;
+	rlist_foreach_entry(replica, &anon_replicas, in_anon) {
+		/* Restart appliers that failed to connect. */
+		applier_start(replica->applier);
+	}
 }
 
 void
diff --git a/src/box/replication.h b/src/box/replication.h
index f2f113f3..5f192b20 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -175,11 +175,6 @@ struct replica {
 	 * the replica into the replica set.
 	 */
 	struct trigger on_connect;
-	/**
-	 * Set if the applier should be paused upon conecting
-	 * to the master.
-	 */
-	bool pause_on_connect;
 };
 
 enum {
-- 
2.11.0




More information about the Tarantool-patches mailing list