[patches] [PATCH 2/2] netbox: do not clog log with repeating reconnect error messages

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Feb 22 22:26:00 MSK 2018


When reconnect fails, it prints error to log. But reconnect can
fail again and again very many times until the connection is
closed.

Lets print the first error message under warn log level, and the
following ones under verbose log level until error will not be
changed.

Closes #3175

Signed-off-by: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
---
 src/box/lua/net_box.lua   | 10 +++++++++-
 test/box/net.box.result   | 29 ++++++++++++++++++++++++++++-
 test/box/net.box.test.lua | 14 +++++++++++++-
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 041e6d1ce..29775b934 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -637,6 +637,7 @@ local space_metatable, index_metatable
 local function connect(...)
     local host, port, opts = parse_connect_params(...)
     local user, password = opts.user, opts.password; opts.password = nil
+    local last_reconnect_error
     local remote = {host = host, port = port, opts = opts, state = 'initial'}
     local function callback(what, ...)
         if what == 'state_changed' then
@@ -652,7 +653,14 @@ local function connect(...)
             end
             remote.state, remote.error = state, err
             if state == 'error_reconnect' then
-                log.warn('%s:%s: %s', host or "", port or "", err)
+                -- Repeat the same error in verbose log only.
+                -- Else the error clogs the log. See gh-3175.
+                if err ~= last_reconnect_error then
+                    log.warn('%s:%s: %s', host or "", port or "", err)
+                    last_reconnect_error = err
+                else
+                    log.verbose('%s:%s: %s', host or "", port or "", err)
+                end
             end
         elseif what == 'handshake' then
             local greeting = ...
diff --git a/test/box/net.box.result b/test/box/net.box.result
index 9e0571c63..2743b0553 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -2157,6 +2157,9 @@ weak.c:ping()
 ---
 - true
 ...
+log.info(string.rep('a', 1000))
+---
+...
 test_run:cmd('stop server connecter')
 ---
 - true
@@ -2165,10 +2168,31 @@ test_run:cmd('cleanup server connecter')
 ---
 - true
 ...
--- Check the connection tries to reconnect at least two times.
+-- For gh-3175 check that reconnect error messages are not
+-- repeated. At first, wait the first error message.
+while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
+---
+...
 log.info(string.rep('a', 1000))
 ---
 ...
+-- Wait next reconnect attempt.
+fiber.sleep(0.6)
+---
+...
+-- The log must not contain the same error message.
+test_run:grep_log('default', 'Connection refused', 1000)
+---
+- null
+...
+-- Switch to verbose log level to see repeating error messages.
+old_log_level = box.cfg.log_level
+---
+...
+box.cfg{log_level = 6}
+---
+...
+-- Check the connection tries to reconnect at least two times.
 while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
 ---
 ...
@@ -2178,6 +2202,9 @@ log.info(string.rep('a', 1000))
 while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
 ---
 ...
+box.cfg{log_level = old_log_level}
+---
+...
 collectgarbage('collect')
 ---
 - 0
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 4b5f32426..b3667c641 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -883,13 +883,25 @@ weak = setmetatable({}, {__mode = 'v'})
 strong = net.connect(connect_to, {reconnect_after = 0.5})
 weak.c = strong
 weak.c:ping()
+log.info(string.rep('a', 1000))
 test_run:cmd('stop server connecter')
 test_run:cmd('cleanup server connecter')
--- Check the connection tries to reconnect at least two times.
+-- For gh-3175 check that reconnect error messages are not
+-- repeated. At first, wait the first error message.
+while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
 log.info(string.rep('a', 1000))
+-- Wait next reconnect attempt.
+fiber.sleep(0.6)
+-- The log must not contain the same error message.
+test_run:grep_log('default', 'Connection refused', 1000)
+-- Switch to verbose log level to see repeating error messages.
+old_log_level = box.cfg.log_level
+box.cfg{log_level = 6}
+-- Check the connection tries to reconnect at least two times.
 while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
 log.info(string.rep('a', 1000))
 while test_run:grep_log('default', 'Connection refused', 1000) == nil do fiber.sleep(0.1) end
+box.cfg{log_level = old_log_level}
 collectgarbage('collect')
 strong.state
 strong == weak.c
-- 
2.14.3 (Apple Git-98)




More information about the Tarantool-patches mailing list