[patches] [PATCH 2/2] netbox: do not clog log with repeating reconnect error messages
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Thu Feb 22 22:26:00 MSK 2018
When reconnect fails, it prints error to log. But reconnect can
fail again and again very many times until the connection is
closed.
Lets print the first error message under warn log level, and the
following ones under verbose log level until error will not be
changed.
Closes #3175
Signed-off-by: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
---
src/box/lua/net_box.lua | 10 +++++++++-
test/box/net.box.result | 29 ++++++++++++++++++++++++++++-
test/box/net.box.test.lua | 14 +++++++++++++-
3 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 041e6d1ce..29775b934 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -637,6 +637,7 @@ local space_metatable, index_metatable
local function connect(...)
local host, port, opts = parse_connect_params(...)
local user, password = opts.user, opts.password; opts.password = nil
+ local last_reconnect_error
local remote = {host = host, port = port, opts = opts, state = 'initial'}
local function callback(what, ...)
if what == 'state_changed' then
@@ -652,7 +653,14 @@ local function connect(...)
end
remote.state, remote.error = state, err
if state == 'error_reconnect' then
- log.warn('%s:%s: %s', host or "", port or "", err)
+ -- Repeat the same error in verbose log only.
+ -- Else the error clogs the log. See gh-3175.
+ if err ~= last_reconnect_error then
+ log.warn('%s:%s: %s', host or "", port or "", err)
+ last_reconnect_error = err
+ else
+ log.verbose('%s:%s: %s', host or "", port or "", err)
+ end
end
elseif what == 'handshake' then
local greeting = ...
diff --git a/test/box/net.box.result b/test/box/net.box.result
index 9e0571c63..2743b0553 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -2157,6 +2157,9 @@ weak.c:ping()
---
- true
...
+log.info(string.rep('a', 1000))
+---
+...
test_run:cmd('stop server connecter')
---
- true
@@ -2165,10 +2168,31 @@ test_run:cmd('cleanup server connecter')
---
- true
...
--- Check the connection tries to reconnect at least two times.
+-- For gh-3175 check that reconnect error messages are not
+-- repeated. At first, wait the first error message.
+while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
+---
+...
log.info(string.rep('a', 1000))
---
...
+-- Wait next reconnect attempt.
+fiber.sleep(0.6)
+---
+...
+-- The log must not contain the same error message.
+test_run:grep_log('default', 'Connection refused', 1000)
+---
+- null
+...
+-- Switch to verbose log level to see repeating error messages.
+old_log_level = box.cfg.log_level
+---
+...
+box.cfg{log_level = 6}
+---
+...
+-- Check the connection tries to reconnect at least two times.
while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
---
...
@@ -2178,6 +2202,9 @@ log.info(string.rep('a', 1000))
while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
---
...
+box.cfg{log_level = old_log_level}
+---
+...
collectgarbage('collect')
---
- 0
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 4b5f32426..b3667c641 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -883,13 +883,25 @@ weak = setmetatable({}, {__mode = 'v'})
strong = net.connect(connect_to, {reconnect_after = 0.5})
weak.c = strong
weak.c:ping()
+log.info(string.rep('a', 1000))
test_run:cmd('stop server connecter')
test_run:cmd('cleanup server connecter')
--- Check the connection tries to reconnect at least two times.
+-- For gh-3175 check that reconnect error messages are not
+-- repeated. At first, wait the first error message.
+while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
log.info(string.rep('a', 1000))
+-- Wait next reconnect attempt.
+fiber.sleep(0.6)
+-- The log must not contain the same error message.
+test_run:grep_log('default', 'Connection refused', 1000)
+-- Switch to verbose log level to see repeating error messages.
+old_log_level = box.cfg.log_level
+box.cfg{log_level = 6}
+-- Check the connection tries to reconnect at least two times.
while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
log.info(string.rep('a', 1000))
while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
+box.cfg{log_level = old_log_level}
collectgarbage('collect')
strong.state
strong == weak.c
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list