From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 3B0A8252CB for ; Wed, 6 Jun 2018 11:57:34 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id OGxw6Wze6Etm for ; Wed, 6 Jun 2018 11:57:34 -0400 (EDT) Received: from smtp38.i.mail.ru (smtp38.i.mail.ru [94.100.177.98]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 824512528A for ; Wed, 6 Jun 2018 11:57:33 -0400 (EDT) From: AKhatskevich Subject: [tarantool-patches] [PATCH 2/3] Refactor error reporting system Date: Wed, 6 Jun 2018 18:57:13 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org, v.shpilevoy@tarantool.org This commit applies stricter rules on error reporting: * error may has only arguments specified in `args` field of `error_message_template` * arguments may be used in error message * arguments are saved in error object by names from `args` Part of #100 --- test/misc/check_uuid_on_connect.result | 8 +- test/router/reroute_wrong_bucket.result | 10 +- test/router/router.result | 42 ++++--- test/storage/storage.result | 6 +- test/unit/error.result | 6 +- test/unit/error.test.lua | 2 +- vshard/error.lua | 209 +++++++++++++++++++++----------- vshard/replicaset.lua | 4 +- vshard/router/init.lua | 11 +- vshard/storage/init.lua | 17 +-- 10 files changed, 197 insertions(+), 118 deletions(-) diff --git a/test/misc/check_uuid_on_connect.result b/test/misc/check_uuid_on_connect.result index d54c367..2695665 100644 --- a/test/misc/check_uuid_on_connect.result +++ b/test/misc/check_uuid_on_connect.result @@ -182,10 +182,12 @@ test_run:switch('bad_uuid_router') vshard.router.bucket_discovery(1) --- - null -- type: ShardingError - unreachable_uuid: ac522f65-aa94-4134-9f64-51ee384f1a54 +- bucket_id: 1 code: 8 - bucket_id: 1 + unreachable_uuid: ac522f65-aa94-4134-9f64-51ee384f1a54 + message: There is no active replicas in replicaset ac522f65-aa94-4134-9f64-51ee384f1a54 + name: UNREACHABLE_REPLICASET + type: ShardingError ... -- Ok to work with correct replicasets. vshard.router.bucket_discovery(2).uuid diff --git a/test/router/reroute_wrong_bucket.result b/test/router/reroute_wrong_bucket.result index eda3bab..c7b980c 100644 --- a/test/router/reroute_wrong_bucket.result +++ b/test/router/reroute_wrong_bucket.result @@ -109,9 +109,11 @@ test_run:switch('router_1') vshard.router.call(100, 'read', 'customer_lookup', {1}, {timeout = 1}) --- - null -- type: ShardingError - bucket_id: 100 +- bucket_id: 100 code: 9 + type: ShardingError + name: NO_ROUTE_TO_BUCKET + message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":100}"' ... -- Wait reconfiguration durigin timeout, if a replicaset was not -- found by bucket.destination from WRONG_BUCKET or @@ -188,7 +190,9 @@ test_run:grep_log('router_1', 'please update configuration') ... err --- -- {'type': 'ShardingError', 'bucket_id': 100, 'code': 1, 'destination': 'ac522f65-aa94-4134-9f64-51ee384f1a54'} +- {'bucket_id': 100, 'code': 1, 'type': 'ShardingError', 'destination': 'ac522f65-aa94-4134-9f64-51ee384f1a54', + 'message': 'WRONG_BUCKET: "{\"bucket_id\":100,\"destination\":\"ac522f65-aa94-4134-9f64-51ee384f1a54\"}"', + 'name': 'WRONG_BUCKET'} ... -- -- Now try again, but update configuration during call(). It must diff --git a/test/router/router.result b/test/router/router.result index 5f3ceca..6955b59 100644 --- a/test/router/router.result +++ b/test/router/router.result @@ -195,15 +195,19 @@ vshard.router.bucket_count() util.check_error(vshard.router.call, 1, 'read', 'echo', {123}) --- - null -- type: ShardingError - bucket_id: 1 +- bucket_id: 1 code: 9 + type: ShardingError + name: NO_ROUTE_TO_BUCKET + message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":1}"' ... replicaset, err = vshard.router.bucket_discovery(1); return err == nil or err --- -- type: ShardingError - bucket_id: 1 +- bucket_id: 1 code: 9 + type: ShardingError + name: NO_ROUTE_TO_BUCKET + message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":1}"' ... vshard.router.bootstrap() --- @@ -214,8 +218,9 @@ vshard.router.bootstrap() --- - null - type: ShardingError - code: 10 message: Cluster is already bootstrapped + code: 10 + name: NON_EMPTY ... -- -- gh-108: negative bucket count on discovery. @@ -312,9 +317,11 @@ util.check_error(vshard.router.call, 0, 'read', 'echo', {123}) ... replicaset, err = vshard.router.bucket_discovery(0); return err == nil or err --- -- type: ShardingError - bucket_id: 0 +- bucket_id: 0 code: 9 + type: ShardingError + name: NO_ROUTE_TO_BUCKET + message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":0}"' ... replicaset, err = vshard.router.bucket_discovery(1); return err == nil or err --- @@ -353,7 +360,9 @@ vshard.router.call(1, 'read', 'echo', {123}) util.check_error(vshard.router.call, 1, 'write', 'echo', {123}) --- - null -- {'type': 'ShardingError', 'bucket_id': 1, 'code': 7, 'destination': ''} +- {'bucket_id': 1, 'code': 7, 'type': 'ShardingError', 'destination': '', + 'message': 'TRANSFER_IS_IN_PROGRESS: "{\"bucket_id\":1,\"destination\":\"\"}"', + 'name': 'TRANSFER_IS_IN_PROGRESS'} ... test_run:cmd('switch storage_2_a') --- @@ -397,10 +406,12 @@ test_run:cmd('stop server storage_2_a') util.check_error(vshard.router.call, 1, 'read', 'echo', {123}) --- - null -- type: ShardingError - unreachable_uuid: +- bucket_id: 1 code: 8 - bucket_id: 1 + unreachable_uuid: + message: There is no active replicas in replicaset + name: UNREACHABLE_REPLICASET + type: ShardingError ... vshard.router.buckets_info(0, 3) --- @@ -420,9 +431,11 @@ test_run:cmd('start server storage_2_a') vshard.router.route(vshard.consts.DEFAULT_BUCKET_COUNT + 100) --- - null -- type: ShardingError - bucket_id: 3100 +- bucket_id: 3100 code: 9 + type: ShardingError + name: NO_ROUTE_TO_BUCKET + message: 'NO_ROUTE_TO_BUCKET: "{\"bucket_id\":3100}"' ... util.check_error(vshard.router.route, 'asdfg') --- @@ -956,7 +969,8 @@ test_run:cmd("switch router_1") util.check_error(vshard.router.call, 1, 'write', 'echo', { 'hello world' }) --- - null -- {'type': 'ShardingError', 'code': 2, 'bucket_id': 1} +- {'bucket_id': 1, 'code': 2, 'type': 'ShardingError', 'name': 'NON_MASTER', 'message': 'NON_MASTER: + "{\"bucket_id\":1}"'} ... -- Reconfigure router and test that the WRITE request does work vshard.router.cfg(cfg) diff --git a/test/storage/storage.result b/test/storage/storage.result index e93ad24..aee9477 100644 --- a/test/storage/storage.result +++ b/test/storage/storage.result @@ -594,9 +594,11 @@ vshard.storage.call(1, 'read', 'customer_lookup', {1}) vshard.storage.call(100500, 'read', 'customer_lookup', {1}) --- - null -- type: ShardingError - bucket_id: 100500 +- bucket_id: 100500 code: 1 + type: ShardingError + name: WRONG_BUCKET + message: 'WRONG_BUCKET: "{\"bucket_id\":100500}"' ... -- -- Test not existing space in bucket data. diff --git a/test/unit/error.result b/test/unit/error.result index a46fb0d..ed9904a 100644 --- a/test/unit/error.result +++ b/test/unit/error.result @@ -20,13 +20,13 @@ tostring(box_error) --- - '{"type":"ClientError","code":78,"message":"Timeout exceeded","trace":[{"file":"[C]","line":4294967295}]}' ... -vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, {uuid = 'uuid', reason = 'reason'}, 'message 1 2 3') +vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, 'uuid', 'reason') --- ... tostring(vshard_error) --- -- '{"reason":"reason","code":11,"type":"ShardingError","uuid":"uuid","message":"message - 1 2 3"}' +- '{"reason":"reason","code":11,"type":"ShardingError","message":"Master of replicaset + uuid is unreachable: reason","uuid":"uuid","name":"UNREACHABLE_MASTER"}' ... log = require('log') --- diff --git a/test/unit/error.test.lua b/test/unit/error.test.lua index 4b5c20f..7b89f64 100644 --- a/test/unit/error.test.lua +++ b/test/unit/error.test.lua @@ -9,7 +9,7 @@ ok, err = pcall(box.error, box.error.TIMEOUT) box_error = lerror.box(err) tostring(box_error) -vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, {uuid = 'uuid', reason = 'reason'}, 'message 1 2 3') +vshard_error = lerror.vshard(lerror.code.UNREACHABLE_MASTER, 'uuid', 'reason') tostring(vshard_error) log = require('log') diff --git a/vshard/error.lua b/vshard/error.lua index 8d26d11..1271d16 100644 --- a/vshard/error.lua +++ b/vshard/error.lua @@ -1,6 +1,111 @@ local ffi = require('ffi') local json = require('json') +-- +-- Error messages description. +-- * name -- Key by which an error code can be retrieved from +-- exprted `code` dictionary. +-- * msg -- Error message which can cantain parameters described +-- in `string.format` notation. +-- * args -- Names of arguments passed while constructing an +-- error. After constructed, an error-object contains provided +-- arguments by the names specified in the field. +-- +local error_message_template = { + [1] = { + name = 'WRONG_BUCKET', + args = {'bucket_id', 'destination'} + }, + [2] = { + name = 'NON_MASTER', + args = {'bucket_id', 'destination'} + }, + [3] = { + name = 'BUCKET_ALREADY_EXISTS', + args = {'bucket_id'} + }, + [4] = { + name = 'NO_SUCH_REPLICASET', + args = {'replicaset_uuid'} + }, + [5] = { + name = 'MOVE_TO_SELF', + args = {'replicaset_uuid', 'bucket_id'} + }, + [6] = { + name = 'MISSING_MASTER', + msg = 'Master is not configured for replicaset %s', + args = {'replicaset_uuid'} + }, + [7] = { + name = 'TRANSFER_IS_IN_PROGRESS', + args = {'bucket_id', 'destination'} + }, + [8] = { + name = 'UNREACHABLE_REPLICASET', + msg = 'There is no active replicas in replicaset %s', + args = {'unreachable_uuid', 'bucket_id'} + }, + [9] = { + name = 'NO_ROUTE_TO_BUCKET', + args = {'bucket_id', 'unreachable_uuid'} + }, + [10] = { + name = 'NON_EMPTY', + msg = 'Cluster is already bootstrapped' + }, + [11] = { + name = 'UNREACHABLE_MASTER', + msg = 'Master of replicaset %s is unreachable: %s', + args = {'uuid', 'reason'} + }, + [12] = { + name = 'OUT_OF_SYNC', + msg = 'Replica is out of sync' + }, + [13] = { + name = 'HIGH_REPLICATION_LAG', + msg = 'High replication lag: %f', + args = {'lag'} + }, + [14] = { + name = 'UNREACHABLE_REPLICA', + msg = "Replica %s isn't active", + args = {'unreachable_uuid'} + }, + [15] = { + name = 'LOW_REDUNDANCY', + msg = 'Only one replica is active' + }, + [16] = { + name = 'INVALID_REBALANCING', + msg = 'Sending and receiving buckets at same time is not allowed' + }, + [17] = { + name = 'SUBOPTIMAL_REPLICA', + msg = 'A current read replica in replicaset %s is not optimal' + }, + [18] = { + name = 'UNKNOWN_BUCKETS', + msg = '%d buckets are not discovered', + arg = {'not_discovered_cnt'} + }, + [19] = { + name = 'REPLICASET_IS_LOCKED', + msg = 'Replicaset is locked' + } +} + +-- +-- User-visible error_name -> error_number dictionary. +-- +local error_code = {} +for code, err in pairs(error_message_template) do + assert(type(code) == 'number') + assert(error_code[err.name] == nil, "Dublicate error name") + error_code[err.name] = code +end + -- -- There are 2 error types: -- * box_error - it is created on tarantool errors: client error, @@ -11,19 +116,43 @@ local json = require('json') -- 'ShardingError', one of codes below and optional -- message. -- - local function box_error(original_error) return setmetatable(original_error:unpack(), {__tostring = json.encode}) end -local function vshard_error(code, args, msg) - local ret = setmetatable({type = 'ShardingError', code = code, - message = msg}, {__tostring = json.encode}) - if args then - for k, v in pairs(args) do - ret[k] = v - end +-- +-- Construct an vstard error. +-- @param code Number, one of error_code constants. +-- @param ... Arguments from `error_message_template` `arg` field. +-- Caller have to pass at least as many arguments as +-- `msg` field requires. +-- @retval ShardingError object. +-- +local function vshard_error(code, ...) + local format = error_message_template[code] + assert(format, 'Error message format not found.') + local args_passed_cnt = select('#', ...) + local args = format.args or {} + assert(#args >= args_passed_cnt, + string.format('Wrong number of arguments passed to %s error', + format.name)) + local ret = setmetatable({}, {__tostring = json.encode}) + -- save error arguments + for i = 1, #args do + ret[args[i]] = select(i, ...) + end + local msg + if format.msg then + local ok + ok, msg = pcall(string.format, format.msg, ...) + assert(ok) + else + msg = string.format('%s: %s', format.name, json.encode(tostring(ret))) end + ret.type = 'ShardingError' + ret.code = code + ret.name = format.name + ret.message = msg return ret end @@ -45,70 +174,6 @@ local function make_error(e) end end -local error_code = { - WRONG_BUCKET = 1, - NON_MASTER = 2, - BUCKET_ALREADY_EXISTS = 3, - NO_SUCH_REPLICASET = 4, - MOVE_TO_SELF = 5, - MISSING_MASTER = 6, - TRANSFER_IS_IN_PROGRESS = 7, - UNREACHABLE_REPLICASET = 8, - NO_ROUTE_TO_BUCKET = 9, - NON_EMPTY = 10, - UNREACHABLE_MASTER = 11, - OUT_OF_SYNC = 12, - HIGH_REPLICATION_LAG = 13, - UNREACHABLE_REPLICA = 14, - LOW_REDUNDANCY = 15, - INVALID_REBALANCING = 16, - SUBOPTIMAL_REPLICA = 17, - UNKNOWN_BUCKETS = 18, - REPLICASET_IS_LOCKED = 19, -} - -local error_message_template = { - [error_code.MISSING_MASTER] = { - name = 'MISSING_MASTER', - msg = 'Master is not configured for replicaset %s' - }, - [error_code.UNREACHABLE_MASTER] = { - name = 'UNREACHABLE_MASTER', - msg = 'Master of replicaset %s is unreachable: %s' - }, - [error_code.OUT_OF_SYNC] = { - name = 'OUT_OF_SYNC', - msg = 'Replica is out of sync' - }, - [error_code.HIGH_REPLICATION_LAG] = { - name = 'HIGH_REPLICATION_LAG', - msg = 'High replication lag: %f' - }, - [error_code.UNREACHABLE_REPLICA] = { - name = 'UNREACHABLE_REPLICA', - msg = "Replica %s isn't active" - }, - [error_code.UNREACHABLE_REPLICASET] = { - name = 'UNREACHABLE_REPLICASET', - msg = 'There is no active replicas in replicaset %s' - }, - [error_code.LOW_REDUNDANCY] = { - name = 'LOW_REDUNDANCY', - msg = 'Only one replica is active' - }, - [error_code.INVALID_REBALANCING] = { - name = 'INVALID_REBALANCING', - msg = 'Sending and receiving buckets at same time is not allowed' - }, - [error_code.SUBOPTIMAL_REPLICA] = { - name = 'SUBOPTIMAL_REPLICA', - msg = 'A current read replica in replicaset %s is not optimal' - }, - [error_code.UNKNOWN_BUCKETS] = { - name = 'UNKNOWN_BUCKETS', - msg = '%d buckets are not discovered', - } -} local function make_alert(code, ...) local format = error_message_template[code] diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua index 7fcf2df..99f59aa 100644 --- a/vshard/replicaset.lua +++ b/vshard/replicaset.lua @@ -114,8 +114,8 @@ end local function replicaset_connect_master(replicaset) local master = replicaset.master if master == nil then - return nil, lerror.vshard(lerror.code.MASTER_IS_MISSING, - {replicaset_uuid = replicaset.uuid}) + return nil, lerror.vshard(lerror.code.MISSING_MASTER, + replicaset.uuid) end return replicaset_connect_to_replica(replicaset, master) end diff --git a/vshard/router/init.lua b/vshard/router/init.lua index 42e9c86..95ee8b3 100644 --- a/vshard/router/init.lua +++ b/vshard/router/init.lua @@ -85,8 +85,7 @@ local function bucket_discovery(bucket_id) if last_err.type == 'ClientError' and last_err.code == box.error.NO_CONNECTION then err = lerror.vshard(lerror.code.UNREACHABLE_REPLICASET, - {bucket_id = bucket_id, - unreachable_uuid = unreachable_uuid}) + unreachable_uuid, bucket_id) else err = lerror.make(last_err) end @@ -97,9 +96,8 @@ local function bucket_discovery(bucket_id) -- bucket was found to be RECEIVING on one replicaset, and -- was not found on other replicasets (it was sent during -- discovery). - err = lerror.vshard(lerror.code.NO_ROUTE_TO_BUCKET, - {bucket_id = bucket_id, - unreachable_uuid = unreachable_uuid}) + err = lerror.vshard(lerror.code.NO_ROUTE_TO_BUCKET, bucket_id, + unreachable_uuid) end return nil, err @@ -519,8 +517,7 @@ local function cluster_bootstrap() return nil, err end if count > 0 then - return nil, lerror.vshard(lerror.code.NON_EMPTY, {}, - 'Cluster is already bootstrapped') + return nil, lerror.vshard(lerror.code.NON_EMPTY) end end lreplicaset.calculate_etalon_balance(M.replicasets, M.total_bucket_count) diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua index d5e7b57..2d38ff9 100644 --- a/vshard/storage/init.lua +++ b/vshard/storage/init.lua @@ -452,8 +452,7 @@ local function bucket_check_state(bucket_id, mode) mode == 'write' and bucket_is_writable(bucket)) ::finish:: return bucket, errcode and - lerror.vshard(errcode, {bucket_id = bucket_id, - destination = bucket and bucket.destination}) + lerror.vshard(errcode, bucket_id, bucket and bucket.destination) end -- @@ -514,8 +513,7 @@ local function bucket_recv(bucket_id, from, data) local bucket = box.space._bucket:get({bucket_id}) if bucket ~= nil then - return nil, lerror.vshard(lerror.code.BUCKET_ALREADY_EXISTS, - {bucket_id = bucket_id}) + return nil, lerror.vshard(lerror.code.BUCKET_ALREADY_EXISTS, bucket_id) end box.begin() @@ -719,14 +717,12 @@ local function bucket_send(bucket_id, destination) end local replicaset = M.replicasets[destination] if replicaset == nil then - return nil, lerror.vshard(lerror.code.NO_SUCH_REPLICASET, - {replicaset_uuid = destination}) + return nil, lerror.vshard(lerror.code.NO_SUCH_REPLICASET, destination) end if destination == box.info.cluster.uuid then - return nil, lerror.vshard(lerror.code.MOVE_TO_SELF, - {bucket_id = bucket_id, - replicaset_uuid = replicaset_uuid}) + return nil, lerror.vshard(lerror.code.MOVE_TO_SELF, replicaset_uuid, + bucket_id) end local data = bucket_collect_internal(bucket_id) @@ -1260,8 +1256,7 @@ end -- local function rebalancer_apply_routes(routes) if is_this_replicaset_locked() then - return false, lerror.vshard(lerror.code.REPLICASET_IS_LOCKED, nil, - "Replicaset is locked"); + return false, lerror.vshard(lerror.code.REPLICASET_IS_LOCKED); end assert(not rebalancing_is_in_progress()) -- Can not apply routes here because of gh-946 in tarantool -- 2.14.1