[tarantool-patches] [PATCH 2/3] Refactor error reporting system

AKhatskevich avkhatskevich at tarantool.org
Wed Jun 6 18:57:13 MSK 2018


This commit applies stricter rules on error reporting:
 * error may has only arguments specified in `args` field of
   `error_message_template`
 * arguments may be used in error message
 * arguments are saved in error object by names from `args`

Part of #100
---
 test/misc/check_uuid_on_connect.result  |   8 +-
 test/router/reroute_wrong_bucket.result |  10 +-
 test/router/router.result               |  42 ++++---
 test/storage/storage.result             |   6 +-
 test/unit/error.result                  |   6 +-
 test/unit/error.test.lua                |   2 +-
 vshard/error.lua                        | 209 +++++++++++++++++++++-----------
 vshard/replicaset.lua                   |   4 +-
 vshard/router/init.lua                  |  11 +-
 vshard/storage/init.lua                 |  17 +--
 10 files changed, 197 insertions(+), 118 deletions(-)

diff --git a/test/misc/check_uuid_on_connect.result b/test/misc/check_uuid_on_connect.result
index d54c367..2695665 100644
--- a/test/misc/check_uuid_on_connect.result
+++ b/test/misc/check_uuid_on_connect.result
@@ -182,10 +182,12 @@ test_run:switch('bad_uuid_router')
 vshard.router.bucket_discovery(1)
 ---
 - null
-- type: ShardingError
-  unreachable_uuid: ac522f65-aa94-4134-9f64-51ee384f1a54
+- bucket_id: 1
   code: 8
-  bucket_id: 1
+  unreachable_uuid: ac522f65-aa94-4134-9f64-51ee384f1a54
+  message: There is no active replicas in replicaset ac522f65-aa94-4134-9f64-51ee384f1a54
+  name: UNREACHABLE_REPLICASET
+  type: ShardingError
 ...
 -- Ok to work with correct replicasets.
 vshard.router.bucket_discovery(2).uuid
diff --git a/test/router/reroute_wrong_bucket.result b/test/router/reroute_wrong_bucket.result
index eda3bab..c7b980c 100644
--- a/test/router/reroute_wrong_bucket.result
+++ b/test/router/reroute_wrong_bucket.result
@@ -109,9 +109,11 @@ test_run:switch('router_1')
 vshard.router.call(100, 'read', 'customer_lookup', {1}, {timeout = 1})
 ---
 - null
-- type: ShardingError
-  bucket_id: 100
+- bucket_id: 100
   code: 9
+  type: ShardingError
+  name: NO_ROUTE_TO_BUCKET
+  message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":100}"'
 ...
 -- Wait reconfiguration durigin timeout, if a replicaset was not
 -- found by bucket.destination from WRONG_BUCKET or
@@ -188,7 +190,9 @@ test_run:grep_log('router_1', 'please update configuration')
 ...
 err
 ---
-- {'type': 'ShardingError', 'bucket_id': 100, 'code': 1, 'destination': 'ac522f65-aa94-4134-9f64-51ee384f1a54'}
+- {'bucket_id': 100, 'code': 1, 'type': 'ShardingError', 'destination': 'ac522f65-aa94-4134-9f64-51ee384f1a54',
+  'message': 'WRONG_BUCKET: "{\"bucket_id\":100,\"destination\":\"ac522f65-aa94-4134-9f64-51ee384f1a54\"}"',
+  'name': 'WRONG_BUCKET'}
 ...
 --
 -- Now try again, but update configuration during call(). It must
diff --git a/test/router/router.result b/test/router/router.result
index 5f3ceca..6955b59 100644
--- a/test/router/router.result
+++ b/test/router/router.result
@@ -195,15 +195,19 @@ vshard.router.bucket_count()
 util.check_error(vshard.router.call, 1, 'read', 'echo', {123})
 ---
 - null
-- type: ShardingError
-  bucket_id: 1
+- bucket_id: 1
   code: 9
+  type: ShardingError
+  name: NO_ROUTE_TO_BUCKET
+  message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":1}"'
 ...
 replicaset, err = vshard.router.bucket_discovery(1); return err == nil or err
 ---
-- type: ShardingError
-  bucket_id: 1
+- bucket_id: 1
   code: 9
+  type: ShardingError
+  name: NO_ROUTE_TO_BUCKET
+  message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":1}"'
 ...
 vshard.router.bootstrap()
 ---
@@ -214,8 +218,9 @@ vshard.router.bootstrap()
 ---
 - null
 - type: ShardingError
-  code: 10
   message: Cluster is already bootstrapped
+  code: 10
+  name: NON_EMPTY
 ...
 --
 -- gh-108: negative bucket count on discovery.
@@ -312,9 +317,11 @@ util.check_error(vshard.router.call, 0, 'read', 'echo', {123})
 ...
 replicaset, err = vshard.router.bucket_discovery(0); return err == nil or err
 ---
-- type: ShardingError
-  bucket_id: 0
+- bucket_id: 0
   code: 9
+  type: ShardingError
+  name: NO_ROUTE_TO_BUCKET
+  message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":0}"'
 ...
 replicaset, err = vshard.router.bucket_discovery(1); return err == nil or err
 ---
@@ -353,7 +360,9 @@ vshard.router.call(1, 'read', 'echo', {123})
 util.check_error(vshard.router.call, 1, 'write', 'echo', {123})
 ---
 - null
-- {'type': 'ShardingError', 'bucket_id': 1, 'code': 7, 'destination': '<replicaset_1>'}
+- {'bucket_id': 1, 'code': 7, 'type': 'ShardingError', 'destination': '<replicaset_1>',
+  'message': 'TRANSFER_IS_IN_PROGRESS: "{\"bucket_id\":1,\"destination\":\"<replicaset_1>\"}"',
+  'name': 'TRANSFER_IS_IN_PROGRESS'}
 ...
 test_run:cmd('switch storage_2_a')
 ---
@@ -397,10 +406,12 @@ test_run:cmd('stop server storage_2_a')
 util.check_error(vshard.router.call, 1, 'read', 'echo', {123})
 ---
 - null
-- type: ShardingError
-  unreachable_uuid: <replicaset_2>
+- bucket_id: 1
   code: 8
-  bucket_id: 1
+  unreachable_uuid: <replicaset_2>
+  message: There is no active replicas in replicaset <replicaset_2>
+  name: UNREACHABLE_REPLICASET
+  type: ShardingError
 ...
 vshard.router.buckets_info(0, 3)
 ---
@@ -420,9 +431,11 @@ test_run:cmd('start server storage_2_a')
 vshard.router.route(vshard.consts.DEFAULT_BUCKET_COUNT + 100)
 ---
 - null
-- type: ShardingError
-  bucket_id: 3100
+- bucket_id: 3100
   code: 9
+  type: ShardingError
+  name: NO_ROUTE_TO_BUCKET
+  message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":3100}"'
 ...
 util.check_error(vshard.router.route, 'asdfg')
 ---
@@ -956,7 +969,8 @@ test_run:cmd("switch router_1")
 util.check_error(vshard.router.call, 1, 'write', 'echo', { 'hello world' })
 ---
 - null
-- {'type': 'ShardingError', 'code': 2, 'bucket_id': 1}
+- {'bucket_id': 1, 'code': 2, 'type': 'ShardingError', 'name': 'NON_MASTER', 'message': 'NON_MASTER:
+    "{\"bucket_id\":1}"'}
 ...
 -- Reconfigure router and test that the WRITE request does work
 vshard.router.cfg(cfg)
diff --git a/test/storage/storage.result b/test/storage/storage.result
index e93ad24..aee9477 100644
--- a/test/storage/storage.result
+++ b/test/storage/storage.result
@@ -594,9 +594,11 @@ vshard.storage.call(1, 'read', 'customer_lookup', {1})
 vshard.storage.call(100500, 'read', 'customer_lookup', {1})
 ---
 - null
-- type: ShardingError
-  bucket_id: 100500
+- bucket_id: 100500
   code: 1
+  type: ShardingError
+  name: WRONG_BUCKET
+  message: 'WRONG_BUCKET: "{\"bucket_id\":100500}"'
 ...
 --
 -- Test not existing space in bucket data.
diff --git a/test/unit/error.result b/test/unit/error.result
index a46fb0d..ed9904a 100644
--- a/test/unit/error.result
+++ b/test/unit/error.result
@@ -20,13 +20,13 @@ tostring(box_error)
 ---
 - '{"type":"ClientError","code":78,"message":"Timeout exceeded","trace":[{"file":"[C]","line":4294967295}]}'
 ...
-vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, {uuid = 'uuid', reason = 'reason'}, 'message 1 2 3')
+vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, 'uuid', 'reason')
 ---
 ...
 tostring(vshard_error)
 ---
-- '{"reason":"reason","code":11,"type":"ShardingError","uuid":"uuid","message":"message
-  1 2 3"}'
+- '{"reason":"reason","code":11,"type":"ShardingError","message":"Master of replicaset
+  uuid is unreachable: reason","uuid":"uuid","name":"UNREACHABLE_MASTER"}'
 ...
 log = require('log')
 ---
diff --git a/test/unit/error.test.lua b/test/unit/error.test.lua
index 4b5c20f..7b89f64 100644
--- a/test/unit/error.test.lua
+++ b/test/unit/error.test.lua
@@ -9,7 +9,7 @@ ok, err = pcall(box.error, box.error.TIMEOUT)
 box_error = lerror.box(err)
 tostring(box_error)
 
-vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, {uuid = 'uuid', reason = 'reason'}, 'message 1 2 3')
+vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, 'uuid', 'reason')
 tostring(vshard_error)
 
 log = require('log')
diff --git a/vshard/error.lua b/vshard/error.lua
index 8d26d11..1271d16 100644
--- a/vshard/error.lua
+++ b/vshard/error.lua
@@ -1,6 +1,111 @@
 local ffi = require('ffi')
 local json = require('json')
 
+--
+-- Error messages description.
+-- * name -- Key by which an error code can be retrieved from
+--   exprted `code` dictionary.
+-- * msg -- Error message which can cantain parameters described
+--   in `string.format` notation.
+-- * args -- Names of arguments passed while constructing an
+--   error. After constructed, an error-object contains provided
+--   arguments by the names specified in the field.
+--
+local error_message_template = {
+    [1] = {
+        name = 'WRONG_BUCKET',
+        args = {'bucket_id', 'destination'}
+    },
+    [2] = {
+        name = 'NON_MASTER',
+        args = {'bucket_id', 'destination'}
+    },
+    [3] = {
+        name = 'BUCKET_ALREADY_EXISTS',
+        args = {'bucket_id'}
+    },
+    [4] = {
+        name = 'NO_SUCH_REPLICASET',
+        args = {'replicaset_uuid'}
+    },
+    [5] = {
+        name = 'MOVE_TO_SELF',
+        args = {'replicaset_uuid', 'bucket_id'}
+    },
+    [6] = {
+         name = 'MISSING_MASTER',
+         msg = 'Master is not configured for replicaset %s',
+         args = {'replicaset_uuid'}
+    },
+    [7] = {
+        name = 'TRANSFER_IS_IN_PROGRESS',
+        args = {'bucket_id', 'destination'}
+    },
+    [8] = {
+        name = 'UNREACHABLE_REPLICASET',
+        msg = 'There is no active replicas in replicaset %s',
+        args = {'unreachable_uuid', 'bucket_id'}
+    },
+    [9] = {
+        name = 'NO_ROUTE_TO_BUCKET',
+        args = {'bucket_id', 'unreachable_uuid'}
+    },
+    [10] = {
+        name = 'NON_EMPTY',
+        msg = 'Cluster is already bootstrapped'
+    },
+    [11] = {
+        name = 'UNREACHABLE_MASTER',
+        msg = 'Master of replicaset %s is unreachable: %s',
+        args = {'uuid', 'reason'}
+    },
+    [12] = {
+        name = 'OUT_OF_SYNC',
+        msg = 'Replica is out of sync'
+    },
+    [13] = {
+        name = 'HIGH_REPLICATION_LAG',
+        msg = 'High replication lag: %f',
+        args = {'lag'}
+    },
+    [14] = {
+        name = 'UNREACHABLE_REPLICA',
+        msg = "Replica %s isn't active",
+        args = {'unreachable_uuid'}
+    },
+    [15] = {
+        name = 'LOW_REDUNDANCY',
+        msg = 'Only one replica is active'
+    },
+    [16] = {
+        name = 'INVALID_REBALANCING',
+        msg = 'Sending and receiving buckets at same time is not allowed'
+    },
+    [17] = {
+        name = 'SUBOPTIMAL_REPLICA',
+        msg = 'A current read replica in replicaset %s is not optimal'
+    },
+    [18] = {
+        name = 'UNKNOWN_BUCKETS',
+        msg = '%d buckets are not discovered',
+        arg = {'not_discovered_cnt'}
+    },
+    [19] = {
+        name = 'REPLICASET_IS_LOCKED',
+        msg = 'Replicaset is locked'
+    }
+}
+
+--
+-- User-visible error_name -> error_number dictionary.
+--
+local error_code = {}
+for code, err in pairs(error_message_template) do
+    assert(type(code) == 'number')
+    assert(error_code[err.name] == nil, "Dublicate error name")
+    error_code[err.name] = code
+end
+
 --
 -- There are 2 error types:
 -- * box_error - it is created on tarantool errors: client error,
@@ -11,19 +116,43 @@ local json = require('json')
 --   'ShardingError', one of codes below and optional
 --   message.
 --
-
 local function box_error(original_error)
     return setmetatable(original_error:unpack(), {__tostring = json.encode})
 end
 
-local function vshard_error(code, args, msg)
-    local ret = setmetatable({type = 'ShardingError', code = code,
-                              message = msg}, {__tostring = json.encode})
-    if args then
-        for k, v in pairs(args) do
-            ret[k] = v
-        end
+--
+-- Construct an vstard error.
+-- @param code Number, one of error_code constants.
+-- @param ... Arguments from `error_message_template` `arg` field.
+--        Caller have to pass at least as many arguments as
+--        `msg` field requires.
+-- @retval ShardingError object.
+--
+local function vshard_error(code, ...)
+    local format = error_message_template[code]
+    assert(format, 'Error message format not found.')
+    local args_passed_cnt = select('#', ...)
+    local args = format.args or {}
+    assert(#args >= args_passed_cnt,
+           string.format('Wrong number of arguments passed to %s error',
+                         format.name))
+    local ret = setmetatable({}, {__tostring = json.encode})
+    -- save error arguments
+    for i = 1, #args do
+        ret[args[i]] = select(i, ...)
+    end
+    local msg
+    if format.msg then
+        local ok
+        ok, msg = pcall(string.format, format.msg, ...)
+        assert(ok)
+    else
+        msg = string.format('%s: %s', format.name, json.encode(tostring(ret)))
     end
+    ret.type = 'ShardingError'
+    ret.code = code
+    ret.name = format.name
+    ret.message = msg
     return ret
 end
 
@@ -45,70 +174,6 @@ local function make_error(e)
     end
 end
 
-local error_code = {
-    WRONG_BUCKET = 1,
-    NON_MASTER = 2,
-    BUCKET_ALREADY_EXISTS = 3,
-    NO_SUCH_REPLICASET = 4,
-    MOVE_TO_SELF = 5,
-    MISSING_MASTER = 6,
-    TRANSFER_IS_IN_PROGRESS = 7,
-    UNREACHABLE_REPLICASET = 8,
-    NO_ROUTE_TO_BUCKET = 9,
-    NON_EMPTY = 10,
-    UNREACHABLE_MASTER = 11,
-    OUT_OF_SYNC = 12,
-    HIGH_REPLICATION_LAG = 13,
-    UNREACHABLE_REPLICA = 14,
-    LOW_REDUNDANCY = 15,
-    INVALID_REBALANCING = 16,
-    SUBOPTIMAL_REPLICA = 17,
-    UNKNOWN_BUCKETS = 18,
-    REPLICASET_IS_LOCKED = 19,
-}
-
-local error_message_template = {
-    [error_code.MISSING_MASTER] = {
-         name = 'MISSING_MASTER',
-         msg = 'Master is not configured for replicaset %s'
-    },
-    [error_code.UNREACHABLE_MASTER] = {
-        name = 'UNREACHABLE_MASTER',
-        msg = 'Master of replicaset %s is unreachable: %s'
-    },
-    [error_code.OUT_OF_SYNC] = {
-        name = 'OUT_OF_SYNC',
-        msg = 'Replica is out of sync'
-    },
-    [error_code.HIGH_REPLICATION_LAG] = {
-        name = 'HIGH_REPLICATION_LAG',
-        msg = 'High replication lag: %f'
-    },
-    [error_code.UNREACHABLE_REPLICA] = {
-        name = 'UNREACHABLE_REPLICA',
-        msg = "Replica %s isn't active"
-    },
-    [error_code.UNREACHABLE_REPLICASET] = {
-        name = 'UNREACHABLE_REPLICASET',
-        msg = 'There is no active replicas in replicaset %s'
-    },
-    [error_code.LOW_REDUNDANCY] = {
-        name = 'LOW_REDUNDANCY',
-        msg = 'Only one replica is active'
-    },
-    [error_code.INVALID_REBALANCING] = {
-        name = 'INVALID_REBALANCING',
-        msg = 'Sending and receiving buckets at same time is not allowed'
-    },
-    [error_code.SUBOPTIMAL_REPLICA] = {
-        name = 'SUBOPTIMAL_REPLICA',
-        msg = 'A current read replica in replicaset %s is not optimal'
-    },
-    [error_code.UNKNOWN_BUCKETS] = {
-        name = 'UNKNOWN_BUCKETS',
-        msg = '%d buckets are not discovered',
-    }
-}
 
 local function make_alert(code, ...)
     local format = error_message_template[code]
diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
index 7fcf2df..99f59aa 100644
--- a/vshard/replicaset.lua
+++ b/vshard/replicaset.lua
@@ -114,8 +114,8 @@ end
 local function replicaset_connect_master(replicaset)
     local master = replicaset.master
     if master == nil then
-        return nil, lerror.vshard(lerror.code.MASTER_IS_MISSING,
-                                  {replicaset_uuid = replicaset.uuid})
+        return nil, lerror.vshard(lerror.code.MISSING_MASTER,
+                                  replicaset.uuid)
     end
     return replicaset_connect_to_replica(replicaset, master)
 end
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index 42e9c86..95ee8b3 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -85,8 +85,7 @@ local function bucket_discovery(bucket_id)
         if last_err.type == 'ClientError' and
            last_err.code == box.error.NO_CONNECTION then
             err = lerror.vshard(lerror.code.UNREACHABLE_REPLICASET,
-                                {bucket_id = bucket_id,
-                                 unreachable_uuid = unreachable_uuid})
+                                unreachable_uuid, bucket_id)
         else
             err = lerror.make(last_err)
         end
@@ -97,9 +96,8 @@ local function bucket_discovery(bucket_id)
         -- bucket was found to be RECEIVING on one replicaset, and
         -- was not found on other replicasets (it was sent during
         -- discovery).
-        err = lerror.vshard(lerror.code.NO_ROUTE_TO_BUCKET,
-                            {bucket_id = bucket_id,
-                             unreachable_uuid = unreachable_uuid})
+        err = lerror.vshard(lerror.code.NO_ROUTE_TO_BUCKET, bucket_id,
+                            unreachable_uuid)
     end
 
     return nil, err
@@ -519,8 +517,7 @@ local function cluster_bootstrap()
             return nil, err
         end
         if count > 0 then
-            return nil, lerror.vshard(lerror.code.NON_EMPTY, {},
-                                      'Cluster is already bootstrapped')
+            return nil, lerror.vshard(lerror.code.NON_EMPTY)
         end
     end
     lreplicaset.calculate_etalon_balance(M.replicasets, M.total_bucket_count)
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index d5e7b57..2d38ff9 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -452,8 +452,7 @@ local function bucket_check_state(bucket_id, mode)
            mode == 'write' and bucket_is_writable(bucket))
 ::finish::
     return bucket, errcode and
-           lerror.vshard(errcode, {bucket_id = bucket_id,
-                                   destination = bucket and bucket.destination})
+           lerror.vshard(errcode, bucket_id, bucket and bucket.destination)
 end
 
 --
@@ -514,8 +513,7 @@ local function bucket_recv(bucket_id, from, data)
 
     local bucket = box.space._bucket:get({bucket_id})
     if bucket ~= nil then
-        return nil, lerror.vshard(lerror.code.BUCKET_ALREADY_EXISTS,
-                                  {bucket_id = bucket_id})
+        return nil, lerror.vshard(lerror.code.BUCKET_ALREADY_EXISTS, bucket_id)
     end
 
     box.begin()
@@ -719,14 +717,12 @@ local function bucket_send(bucket_id, destination)
     end
     local replicaset = M.replicasets[destination]
     if replicaset == nil then
-        return nil, lerror.vshard(lerror.code.NO_SUCH_REPLICASET,
-                                  {replicaset_uuid = destination})
+        return nil, lerror.vshard(lerror.code.NO_SUCH_REPLICASET, destination)
     end
 
     if destination == box.info.cluster.uuid then
-        return nil, lerror.vshard(lerror.code.MOVE_TO_SELF,
-                                  {bucket_id = bucket_id,
-                                   replicaset_uuid = replicaset_uuid})
+        return nil, lerror.vshard(lerror.code.MOVE_TO_SELF, replicaset_uuid,
+                                  bucket_id)
     end
 
     local data = bucket_collect_internal(bucket_id)
@@ -1260,8 +1256,7 @@ end
 --
 local function rebalancer_apply_routes(routes)
     if is_this_replicaset_locked() then
-        return false, lerror.vshard(lerror.code.REPLICASET_IS_LOCKED, nil,
-                                    "Replicaset is locked");
+        return false, lerror.vshard(lerror.code.REPLICASET_IS_LOCKED);
     end
     assert(not rebalancing_is_in_progress())
     -- Can not apply routes here because of gh-946 in tarantool
-- 
2.14.1





More information about the Tarantool-patches mailing list