[tarantool-patches] [PATCH] Don't throw an exception in a replication handler
Georgy Kirichenko
georgy at tarantool.org
Mon Aug 27 16:28:09 MSK 2018
It is an error to throw an error out of a cbus message handler because
it breaks cbus message delivery. In case of replication throwing an
error prevents iproto against replication socket closing.
Fixes 3642
---
Changes in v2:
- Move rlimit ffi bindings to separate file
Changes in v2:
- Test fixes (setrlimit, formating)
- Test result file included
https://github.com/tarantool/tarantool/issues/3642
https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3642-fix-replication-socket-leak
src/box/iproto.cc | 2 +-
test/replication/lua/rlimit.lua | 33 ++++++++++++++
test/replication/misc.result | 76 +++++++++++++++++++++++++++++++++
test/replication/misc.test.lua | 33 ++++++++++++++
test/replication/suite.ini | 2 +-
5 files changed, 144 insertions(+), 2 deletions(-)
create mode 100644 test/replication/lua/rlimit.lua
diff --git a/src/box/iproto.cc b/src/box/iproto.cc
index ab7b42169..984c6df44 100644
--- a/src/box/iproto.cc
+++ b/src/box/iproto.cc
@@ -1424,7 +1424,7 @@ tx_process_join_subscribe(struct cmsg *m)
unreachable();
}
} catch (SocketError *e) {
- throw; /* don't write error response to prevent SIGPIPE */
+ return; /* don't write error response to prevent SIGPIPE */
} catch (Exception *e) {
iproto_write_error(con->input.fd, e, ::schema_version,
msg->header.sync);
diff --git a/test/replication/lua/rlimit.lua b/test/replication/lua/rlimit.lua
new file mode 100644
index 000000000..c61b18a07
--- /dev/null
+++ b/test/replication/lua/rlimit.lua
@@ -0,0 +1,33 @@
+
+ffi = require('ffi')
+ffi.cdef([[
+typedef long rlim_t;
+struct rlimit {
+ rlim_t rlim_cur; /* Soft limit */
+ rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
+};
+int getrlimit(int resource, struct rlimit *rlim);
+int setrlimit(int resource, const struct rlimit *rlim);
+]])
+
+return {
+ RLIMIT_CPU = 0,
+ RLIMIT_FSIZE = 1,
+ RLIMIT_DATA = 2,
+ RLIMIT_STACK = 3,
+ RLIMIT_CORE = 4,
+ RLIMIT_RSS = 5,
+ RLIMIT_NPROC = 6,
+ RLIMIT_NOFILE = 7,
+ RLIMIT_MEMLOCK = 8,
+ RLIMIT_AS = 9,
+ limit = function()
+ return ffi.new('struct rlimit')
+ end,
+ getrlimit = function (id, limit)
+ ffi.C.getrlimit(id, limit)
+ end,
+ setrlimit = function (id, limit)
+ ffi.C.setrlimit(id, limit)
+ end,
+}
diff --git a/test/replication/misc.result b/test/replication/misc.result
index 76e7fd5ee..a407abab5 100644
--- a/test/replication/misc.result
+++ b/test/replication/misc.result
@@ -229,6 +229,82 @@ test_run:cmd("switch default")
test_run:drop_cluster(SERVERS)
---
...
+rlimit = require('rlimit')
+---
+...
+lim = rlimit.limit()
+---
+...
+rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
+---
+...
+old_fno = lim.rlim_cur
+---
+...
+lim.rlim_cur = 64
+---
+...
+rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
+---
+...
+test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
+---
+- true
+...
+test_run:cmd(string.format('start server sock'))
+---
+- true
+...
+test_run:cmd('switch sock')
+---
+- true
+...
+test_run = require('test_run').new()
+---
+...
+fiber = require('fiber')
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+for i = 1, 64 do
+ local replication = box.cfg.replication
+ box.cfg{replication = {}}
+ box.cfg{replication = replication}
+ while box.info.replication[1].upstream.status ~= 'follow' do
+ fiber.sleep(0.0001)
+ end
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+box.info.replication[1].upstream.status
+---
+- follow
+...
+test_run:cmd('switch default')
+---
+- true
+...
+lim.rlim_cur = old_fno
+---
+...
+rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
+---
+...
+test_run:cmd('stop server sock')
+---
+- true
+...
+test_run:cmd('cleanup server sock')
+---
+- true
+...
box.schema.user.revoke('guest', 'replication')
---
...
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index c60adf5a5..b23607eb8 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -90,6 +90,39 @@ box.space.space1:drop()
test_run:cmd("switch default")
test_run:drop_cluster(SERVERS)
+rlimit = require('rlimit')
+lim = rlimit.limit()
+rlimit.getrlimit(rlimit.RLIMIT_NOFILE, lim)
+old_fno = lim.rlim_cur
+lim.rlim_cur = 64
+rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
+
+test_run:cmd('create server sock with rpl_master=default, script="replication/replica.lua"')
+test_run:cmd(string.format('start server sock'))
+test_run:cmd('switch sock')
+test_run = require('test_run').new()
+fiber = require('fiber')
+test_run:cmd("setopt delimiter ';'")
+for i = 1, 64 do
+ local replication = box.cfg.replication
+ box.cfg{replication = {}}
+ box.cfg{replication = replication}
+ while box.info.replication[1].upstream.status ~= 'follow' do
+ fiber.sleep(0.0001)
+ end
+end;
+test_run:cmd("setopt delimiter ''");
+
+box.info.replication[1].upstream.status
+
+test_run:cmd('switch default')
+
+lim.rlim_cur = old_fno
+rlimit.setrlimit(rlimit.RLIMIT_NOFILE, lim)
+
+test_run:cmd('stop server sock')
+test_run:cmd('cleanup server sock')
+
box.schema.user.revoke('guest', 'replication')
--
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index b489add58..8b4db9c72 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -5,6 +5,6 @@ description = tarantool/box, replication
disabled = consistent.test.lua
release_disabled = catch.test.lua errinj.test.lua gc.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua
config = suite.cfg
-lua_libs = lua/fast_replica.lua
+lua_libs = lua/fast_replica.lua lua/rlimit.lua
long_run = prune.test.lua
is_parallel = False
--
2.18.0
More information about the Tarantool-patches
mailing list