From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladislav Shpilevoy Subject: [PATCH v2 5/6] [RAW] swim: send one UDP packet per EV_WRITE event Date: Tue, 25 Dec 2018 22:19:28 +0300 Message-Id: <8c88a8dc8863cb04cdf958f826033ae4aecc3b83.1545765055.git.v.shpilevoy@tarantool.org> In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org Cc: vdavydov.dev@gmail.com, kostja@tarantool.org List-ID: Since the first commit of #3234, where anti-entropy component was introduced, a single SWIM message could be split into multiple UDP packets. But so far these packets were being sent in mere 'for' loop on a single EV_WRITE event. It is not proper way of using event loop, but the simplest, because does not require any externally stored positions in packet lists. The previous commit introduced such global list of UDP packets to send, and now it is much simpler to send each packet on separate EV_WRITE event. This commit does it. Part of #3234 --- src/lib/swim/swim.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c index f880066c5..ed0b323e5 100644 --- a/src/lib/swim/swim.c +++ b/src/lib/swim/swim.c @@ -191,6 +191,12 @@ struct swim_msg_part { struct stailq_entry in_msg; /** Real size. */ int size; + /** + * True, if this message part carries a failure detection + * component. Used to decide whether SWIM should wait for + * an ACK. + */ + bool is_ack_required; /** Packet body. */ char body[UDP_PACKET_SIZE]; }; @@ -255,6 +261,7 @@ swim_msg_part_new(struct swim_msg *msg) } stailq_add_tail_entry(&msg->parts, res, in_msg); res->size = 0; + res->is_ack_required = false; return res; } @@ -491,6 +498,8 @@ struct swim { * here, is dead too long and removed. */ struct swim_msg cached_round_msg; + /** Next to send position in cached_round_msg. */ + struct swim_msg_part *cached_msg_pos; }; static inline uint64_t @@ -504,6 +513,7 @@ cached_round_msg_invalidate(struct swim *swim) { swim_msg_destroy(&swim->cached_round_msg); swim_msg_create(&swim->cached_round_msg); + swim->cached_msg_pos = NULL; } /** @@ -1031,6 +1041,7 @@ swim_encode_failure_detection(struct swim *swim, struct swim_msg *msg, swim->self->incarnation); memcpy(pos, &fd_header_bin, size); swim_msg_part_advance(part, size); + part->is_ack_required = true; return 1; } @@ -1123,6 +1134,7 @@ swim_encode_round_msg(struct swim *swim) struct swim_member *member = swim->shuffled_members[i]; member->is_being_sent_in_this_round = true; } + swim->cached_msg_pos = swim_msg_first_part(msg); return 0; error: cached_round_msg_invalidate(swim); @@ -1171,16 +1183,22 @@ swim_send_round_msg(struct swim_io_task *task) say_verbose("SWIM: send to %s", sio_strfaddr((struct sockaddr *) &m->addr, sizeof(m->addr))); - for (struct swim_msg_part *part = - swim_msg_first_part(&swim->cached_round_msg); part != NULL; - part = swim_msg_part_next(part)) { - if (swim->transport.send_round_msg(swim->output.fd, part->body, - part->size, - (struct sockaddr *) &m->addr, - sizeof(m->addr)) == -1) - diag_log(); + struct swim_msg_part *part = swim->cached_msg_pos; + if (swim->transport.send_round_msg(swim->output.fd, part->body, + part->size, + (struct sockaddr *) &m->addr, + sizeof(m->addr)) == -1) + diag_log(); + if (part->is_ack_required) + swim_member_schedule_ack_wait(swim, m); + part = swim_msg_part_next(part); + if (part != NULL) { + swim->cached_msg_pos = part; + /* Push again until all parts are sent. */ + swim_io_task_push(task); + return; } - swim_member_schedule_ack_wait(swim, m); + swim->cached_msg_pos = swim_msg_first_part(&swim->cached_round_msg); swim_decrease_events_ttl(swim); rlist_del_entry(m, in_queue_round); next_round_step: -- 2.17.2 (Apple Git-113)