[PATCH v2 5/6] [RAW] swim: send one UDP packet per EV_WRITE event

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Dec 25 22:19:28 MSK 2018


Since the first commit of #3234, where anti-entropy component was
introduced, a single SWIM message could be split into multiple
UDP packets. But so far these packets were being sent in mere
'for' loop on a single EV_WRITE event. It is not proper way of
using event loop, but the simplest, because does not require any
externally stored positions in packet lists.

The previous commit introduced such global list of UDP packets to
send, and now it is much simpler to send each packet on separate
EV_WRITE event. This commit does it.

Part of #3234
---
 src/lib/swim/swim.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index f880066c5..ed0b323e5 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -191,6 +191,12 @@ struct swim_msg_part {
 	struct stailq_entry in_msg;
 	/** Real size. */
 	int size;
+	/**
+	 * True, if this message part carries a failure detection
+	 * component. Used to decide whether SWIM should wait for
+	 * an ACK.
+	 */
+	bool is_ack_required;
 	/** Packet body. */
 	char body[UDP_PACKET_SIZE];
 };
@@ -255,6 +261,7 @@ swim_msg_part_new(struct swim_msg *msg)
 	}
 	stailq_add_tail_entry(&msg->parts, res, in_msg);
 	res->size = 0;
+	res->is_ack_required = false;
 	return res;
 }
 
@@ -491,6 +498,8 @@ struct swim {
 	 * here, is dead too long and removed.
 	 */
 	struct swim_msg cached_round_msg;
+	/** Next to send position in cached_round_msg. */
+	struct swim_msg_part *cached_msg_pos;
 };
 
 static inline uint64_t
@@ -504,6 +513,7 @@ cached_round_msg_invalidate(struct swim *swim)
 {
 	swim_msg_destroy(&swim->cached_round_msg);
 	swim_msg_create(&swim->cached_round_msg);
+	swim->cached_msg_pos = NULL;
 }
 
 /**
@@ -1031,6 +1041,7 @@ swim_encode_failure_detection(struct swim *swim, struct swim_msg *msg,
 				  swim->self->incarnation);
 	memcpy(pos, &fd_header_bin, size);
 	swim_msg_part_advance(part, size);
+	part->is_ack_required = true;
 	return 1;
 }
 
@@ -1123,6 +1134,7 @@ swim_encode_round_msg(struct swim *swim)
 		struct swim_member *member = swim->shuffled_members[i];
 		member->is_being_sent_in_this_round = true;
 	}
+	swim->cached_msg_pos = swim_msg_first_part(msg);
 	return 0;
 error:
 	cached_round_msg_invalidate(swim);
@@ -1171,16 +1183,22 @@ swim_send_round_msg(struct swim_io_task *task)
 	say_verbose("SWIM: send to %s",
 		    sio_strfaddr((struct sockaddr *) &m->addr,
 				 sizeof(m->addr)));
-	for (struct swim_msg_part *part =
-	     swim_msg_first_part(&swim->cached_round_msg); part != NULL;
-	     part = swim_msg_part_next(part)) {
-		if (swim->transport.send_round_msg(swim->output.fd, part->body,
-						   part->size,
-						   (struct sockaddr *) &m->addr,
-						   sizeof(m->addr)) == -1)
-			diag_log();
+	struct swim_msg_part *part = swim->cached_msg_pos;
+	if (swim->transport.send_round_msg(swim->output.fd, part->body,
+					   part->size,
+					   (struct sockaddr *) &m->addr,
+					   sizeof(m->addr)) == -1)
+		diag_log();
+	if (part->is_ack_required)
+		swim_member_schedule_ack_wait(swim, m);
+	part = swim_msg_part_next(part);
+	if (part != NULL) {
+		swim->cached_msg_pos = part;
+		/* Push again until all parts are sent. */
+		swim_io_task_push(task);
+		return;
 	}
-	swim_member_schedule_ack_wait(swim, m);
+	swim->cached_msg_pos = swim_msg_first_part(&swim->cached_round_msg);
 	swim_decrease_events_ttl(swim);
 	rlist_del_entry(m, in_queue_round);
 next_round_step:
-- 
2.17.2 (Apple Git-113)




More information about the Tarantool-patches mailing list