From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v3 2/6] [RAW] swim: introduce failure detection component References: <9b4bdddc30f9554b85c9890c0d6c70645c0c7930.1546077015.git.v.shpilevoy@tarantool.org> <20190109134809.GE20509@chai> From: Vladislav Shpilevoy Message-ID: <7af50e9b-3dd7-d959-958f-20420fe96294@tarantool.org> Date: Tue, 15 Jan 2019 17:42:16 +0300 MIME-Version: 1.0 In-Reply-To: <20190109134809.GE20509@chai> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Konstantin Osipov Cc: vdavydov.dev@gmail.com List-ID: On 09/01/2019 16:48, Konstantin Osipov wrote: > * Vladislav Shpilevoy [18/12/29 15:07]: >> enum { >> /** How often to send membership messages and pings. */ >> HEARTBEAT_RATE_DEFAULT = 1, >> + /** >> + * If a ping was sent, it is considered to be lost after >> + * this time without an ack. >> + */ >> + ACK_TIMEOUT = 1, > > The timeout should be configurable. A reasonable default looks > more close 30 seconds at least - we have many cases in > (malfunctioning) production when long requests stall the event > loop for 10-15 seconds, such requests should not lead to > membership storms. Done. diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c index 97395a3a9..1c0cc2cd4 100644 --- a/src/lib/swim/swim.c +++ b/src/lib/swim/swim.c @@ -96,7 +96,7 @@ enum { * If a ping was sent, it is considered to be lost after * this time without an ack. */ - ACK_TIMEOUT = 1, + ACK_TIMEOUT_DEFAULT = 30, /** * If a member has not been responding to pings this * number of times, it is considered to be dead. @@ -765,7 +765,7 @@ swim_schedule_ping(struct swim *swim, struct swim_member *member) /** * Check for failed pings. A ping is failed if an ack was not - * received during ACK_TIMEOUT. A failed ping is resent here. + * received during ACK timeout. A failed ping is resent here. */ static void swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events) @@ -775,10 +775,11 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events) (void) events; struct swim *swim = (struct swim *) p->data; struct swim_member *m, *tmp; + double ack_timeout = swim->wait_ack_tick.interval; double current_time = fiber_time(); rlist_foreach_entry_safe(m, &swim->queue_wait_ack, in_queue_wait_ack, tmp) { - if (current_time - m->ping_ts < ACK_TIMEOUT) + if (current_time - m->ping_ts < ack_timeout) break; ++m->failed_pings; if (m->failed_pings >= NO_ACKS_TO_GC) { @@ -1113,7 +1114,7 @@ swim_new(const struct swim_transport_vtab *transport_vtab) /* Failure detection component. */ rlist_create(&swim->queue_wait_ack); ev_init(&swim->wait_ack_tick, swim_check_acks); - ev_periodic_set(&swim->wait_ack_tick, 0, ACK_TIMEOUT, NULL); + ev_periodic_set(&swim->wait_ack_tick, 0, ACK_TIMEOUT_DEFAULT, NULL); swim->wait_ack_tick.data = (void *) swim; return swim; } @@ -1133,7 +1134,8 @@ swim_uri_to_addr(const char *uri, struct sockaddr_in *addr) } int -swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate) +swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate, + double ack_timeout) { struct sockaddr_in addr; if (swim_uri_to_addr(uri, &addr) != 0) @@ -1154,6 +1156,9 @@ swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate) if (swim->round_tick.interval != heartbeat_rate && heartbeat_rate > 0) ev_periodic_set(&swim->round_tick, 0, heartbeat_rate, NULL); + if (swim->wait_ack_tick.interval != ack_timeout && ack_timeout > 0) + ev_periodic_set(&swim->wait_ack_tick, 0, ack_timeout, NULL); + swim->self = new_self; return 0; } diff --git a/src/lib/swim/swim.h b/src/lib/swim/swim.h index b54fc47b2..001900311 100644 --- a/src/lib/swim/swim.h +++ b/src/lib/swim/swim.h @@ -56,12 +56,15 @@ swim_new(const struct swim_transport_vtab *transport_vtab); * @heartbeat_rate seconds. It is rather the protocol * speed. Protocol period depends on member count and * @heartbeat_rate. + * @param ack_timeout Time in seconds after which a ping is + * considered to be unacknowledged. * * @retval 0 Success. * @retval -1 Error. */ int -swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate); +swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate, + double ack_timeout); /** * Stop listening and broadcasting messages, cleanup all internal diff --git a/src/lua/swim.c b/src/lua/swim.c index 6a78e5dd5..8b6f0b0b0 100644 --- a/src/lua/swim.c +++ b/src/lua/swim.c @@ -75,6 +75,28 @@ lua_swim_gc(struct lua_State *L) return 0; } +static inline double +lua_swim_get_timeout_field(struct lua_State *L, int ncfg, + const char *fieldname, const char *funcname) +{ + double timeout; + lua_getfield(L, ncfg, fieldname); + if (lua_isnumber(L, -1)) { + timeout = lua_tonumber(L, -1); + if (timeout <= 0) { + return luaL_error(L, "swim.%s: %s should be positive "\ + "number", funcname, fieldname); + } + } else if (! lua_isnil(L, -1)) { + return luaL_error(L, "swim.%s: %s should be positive number", + funcname, fieldname); + } else { + timeout = -1; + } + lua_pop(L, 1); + return timeout; +} + /** * Configure @a swim instance using a table stored in @a ncfg-th * position on Lua stack. @@ -106,23 +128,12 @@ lua_swim_cfg_impl(struct lua_State *L, int ncfg, struct swim *swim, } lua_pop(L, 1); - double heartbeat_rate; - lua_getfield(L, ncfg, "heartbeat"); - if (lua_isnumber(L, -1)) { - heartbeat_rate = lua_tonumber(L, -1); - if (heartbeat_rate <= 0) { - return luaL_error(L, "swim.%s: heartbeat should be "\ - "positive number", funcname); - } - } else if (! lua_isnil(L, -1)) { - return luaL_error(L, "swim.%s: heartbeat should be positive "\ - "number", funcname); - } else { - heartbeat_rate = -1; - } - lua_pop(L, 1); + double heartbeat_rate = + lua_swim_get_timeout_field(L, ncfg, "heartbeat", funcname); + double ack_timeout = + lua_swim_get_timeout_field(L, ncfg, "ack_timeout", funcname); - return swim_cfg(swim, server_uri, heartbeat_rate); + return swim_cfg(swim, server_uri, heartbeat_rate, ack_timeout); } static int > >> + /** >> + * If a member has not been responding to pings this >> + * number of times, it is considered to be dead. >> + */ >> + NO_ACKS_TO_DEAD = 3, >> + /** >> + * If a not pinned member confirmed to be dead, it is >> + * removed from the membership after at least this number >> + * of failed pings. >> + */ >> + NO_ACKS_TO_GC = NO_ACKS_TO_DEAD + 2, >> }; >> >> + bool is_pinned; >> + /** Growing number to refute old messages. */ > > reject, or perhaps ignore? > > refute is usually used for arguments in a heated conversation. No, exactly refute. Reject/ignore are almost the same in this context and mean 'do nothing'. Refute means, according to Oxford Dictionary: "Deny or contradict (a statement or accusation). Prove (a statement or theory) to be wrong or false; disprove." Here the swim instance refutes incorrect assumptions about its own incarnation emitted by other cluster members. > >> + /** >> + * How many pings did not receive an ack in a row. After >> + * a threshold the instance is marked as dead. After more >> + * it is removed from the table (if not pinned). >> + */ >> + int failed_pings; > > These are more like unacknowledged pings. Have they failed? Maybe. Done. diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c index 1c0cc2cd4..75f57f69f 100644 --- a/src/lib/swim/swim.c +++ b/src/lib/swim/swim.c @@ -105,7 +105,7 @@ enum { /** * If a not pinned member confirmed to be dead, it is * removed from the membership after at least this number - * of failed pings. + * of unacknowledged pings. */ NO_ACKS_TO_GC = NO_ACKS_TO_DEAD + 2, }; @@ -180,7 +180,7 @@ struct swim_member { * a threshold the instance is marked as dead. After more * it is removed from the table (if not pinned). */ - int failed_pings; + int unacknowledged_pings; /** When the latest ping was sent to this member. */ double ping_ts; /** Ready at hand regular ACK task. */ @@ -764,8 +764,9 @@ swim_schedule_ping(struct swim *swim, struct swim_member *member) } /** - * Check for failed pings. A ping is failed if an ack was not - * received during ACK timeout. A failed ping is resent here. + * Check for unacknowledged pings. A ping is unacknowledged if an + * ack was not received during ACK timeout. An unacknowledged ping + * is resent here. */ static void swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events) @@ -781,13 +782,13 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events) tmp) { if (current_time - m->ping_ts < ack_timeout) break; - ++m->failed_pings; - if (m->failed_pings >= NO_ACKS_TO_GC) { + ++m->unacknowledged_pings; + if (m->unacknowledged_pings >= NO_ACKS_TO_GC) { if (!m->is_pinned) swim_member_delete(swim, m); continue; } - if (m->failed_pings >= NO_ACKS_TO_DEAD) + if (m->unacknowledged_pings >= NO_ACKS_TO_DEAD) m->status = MEMBER_DEAD; swim_schedule_ping(swim, m); rlist_del_entry(m, in_queue_wait_ack); @@ -1040,7 +1041,7 @@ swim_process_failure_detection(struct swim *swim, const char **pos, } else { assert(type == SWIM_FD_MSG_ACK); if (incarnation >= sender->incarnation) { - sender->failed_pings = 0; + sender->unacknowledged_pings = 0; rlist_del_entry(sender, in_queue_wait_ack); } } > >> + /** When the latest ping was sent to this member. */ >> + double ping_ts; > > last_ping_time? Why use double and not ev_time_t? I did not use ev_time_t since fiber_time() returns double and ping_ts is initialized from fiber_time(), so I'll keep double. The name is changed. diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c index 75f57f69f..3e1b26356 100644 --- a/src/lib/swim/swim.c +++ b/src/lib/swim/swim.c @@ -182,7 +182,7 @@ struct swim_member { */ int unacknowledged_pings; /** When the latest ping was sent to this member. */ - double ping_ts; + double last_ping_time; /** Ready at hand regular ACK task. */ struct swim_task ack_task; struct swim_task ping_task; @@ -335,7 +335,7 @@ static void swim_member_schedule_ack_wait(struct swim *swim, struct swim_member *member) { if (rlist_empty(&member->in_queue_wait_ack)) { - member->ping_ts = fiber_time(); + member->last_ping_time = fiber_time(); rlist_add_tail_entry(&swim->queue_wait_ack, member, in_queue_wait_ack); } @@ -780,7 +780,7 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events) double current_time = fiber_time(); rlist_foreach_entry_safe(m, &swim->queue_wait_ack, in_queue_wait_ack, tmp) { - if (current_time - m->ping_ts < ack_timeout) + if (current_time - m->last_ping_time < ack_timeout) break; ++m->unacknowledged_pings; if (m->unacknowledged_pings >= NO_ACKS_TO_GC) { > >> + /** Type of the failure detection message: ping or ack. */ >> + SWIM_FD_MSG_TYPE, >> + /** >> + * Incarnation of the sender. To make the member alive if >> + * it was considered to be dead, but ping/ack with greater >> + * incarnation was received from it. >> + */ >> + SWIM_FD_INCARNATION, > > Uhm, FD is too commonly used for a file descriptor. Please use a > different name. Why not simply SWIM_PING and SWIM_ACK? Because I do not want to put all protocol constants into a single namespace, starting with SWIM_, so I used SWIM_FD_, SWIM_MEMBER_ etc. I tried to use SWIM_FAILURE_DETECTION_, but it is too long. Currently I have these names with 'fd': enum swim_fd_key, enum swim_fd_msg_type, const char *swim_fd_msg_type_strs[], struct swim_fd_header_bin. How about expanding 'fd' to 'fail_det', 'fail_detec'? Also, I could remove 'fd' from enum constants, but then it would be harder to determine to which component a constant belongs. Please, choose one of new suffixes, or approve removal of them at all. > >> struct swim_transport swim_udp_transport = { >> /* .send_round_msg = */ swim_udp_send_msg, >> + /* .send_ping = */ swim_udp_send_msg, >> + /* .send_ack = */ swim_udp_send_msg, > > Why do you need a separate transport api for ack/ping sends? > Shouldn't send/recv be enough? This is a transport layer, it > should be unaware of protocol details. > > Because this is what you asked me explicitly for. But honestly I prefer send/recv, without explicit send_ping/ack etc. So I fixed it in the first commit.