[PATCH] Break connection on timeout

Konstantin Belyavskiy k.belyavskiy at tarantool.org
Tue Feb 13 13:07:13 MSK 2018


In replication schema if one of the instances was powered off, it doesn't detected
by others and the connection hangs. Alive machines show 'follow' state.
Add timeout to solve this issue. It's safe since applier and relay both send
messages every replication_timeout so we can assume that if we read nothing we
have problem with connection.
Use replication_disconnect_timeout which is replication_timeout * 4 as for now.

Closes #3025
---
branch: gh-3025-break-connection-timeout
 src/box/applier.cc               | 7 ++++++-
 test/replication/errinj.result   | 4 ----
 test/replication/errinj.test.lua | 1 -
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index f0073bada..106a728cd 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -418,7 +418,12 @@ applier_subscribe(struct applier *applier)
 			applier_set_state(applier, APPLIER_FOLLOW);
 		}
 
-		coio_read_xrow(coio, ibuf, &row);
+		if (applier->version_id < version_id(1, 7, 7))
+			coio_read_xrow(coio, ibuf, &row);
+		else {
+			double timeout = replication_disconnect_timeout();
+			coio_read_xrow_timeout_xc(coio, ibuf, &row, timeout);
+		}
 
 		if (iproto_type_is_error(row.type))
 			xrow_decode_error_xc(&row);  /* error */
diff --git a/test/replication/errinj.result b/test/replication/errinj.result
index d1f1dbe91..d3008c26c 100644
--- a/test/replication/errinj.result
+++ b/test/replication/errinj.result
@@ -426,10 +426,6 @@ test_run:cmd("switch replica_ack")
 ---
 - true
 ...
-box.info.replication[1].upstream.status
----
-- follow
-...
 test_run:cmd("stop server default")
 ---
 - true
diff --git a/test/replication/errinj.test.lua b/test/replication/errinj.test.lua
index ba83481fe..b65171579 100644
--- a/test/replication/errinj.test.lua
+++ b/test/replication/errinj.test.lua
@@ -175,7 +175,6 @@ for i = 0, 9999 do box.space.test:replace({i, 4, 5, 'test'}) end
 test_run:cmd("create server replica_ack with rpl_master=default, script='replication/replica_ack.lua'")
 test_run:cmd("start server replica_ack")
 test_run:cmd("switch replica_ack")
-box.info.replication[1].upstream.status
 
 test_run:cmd("stop server default")
 test_run:cmd("deploy server default")
-- 
2.14.3 (Apple Git-98)




More information about the Tarantool-patches mailing list