[Tarantool-patches] [PATCH v2 11/11] error: report ER_READONLY reason in message
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri Nov 12 02:54:22 MSK 2021
A previous commit added reason details as error fields. But error
objects are serialized as strings by default thus loosing all the
details.
This commit makes ER_READONLY message contain the reason why it
happened in a human-readable form. That way it will be well
visible in the logs at least.
Follow-up #5568
---
src/box/box.cc | 33 ++++++++++++++-----
src/box/errcode.h | 2 +-
src/lib/core/diag.c | 11 +++++++
src/lib/core/diag.h | 3 ++
.../gh_5568_read_only_reason_test.lua | 19 +++++++++++
test/replication/anon.result | 6 ++--
test/replication/catch.result | 4 +--
test/replication/election_qsync.result | 11 +++++--
test/replication/election_qsync.test.lua | 4 ++-
.../gh-6034-qsync-limbo-ownership.result | 22 ++++++++++---
.../gh-6034-qsync-limbo-ownership.test.lua | 8 +++--
test/vinyl/misc.result | 2 +-
12 files changed, 101 insertions(+), 24 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index 323982969..f5162e67c 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -181,6 +181,7 @@ box_check_writable(void)
return 0;
struct error *e = diag_set(ClientError, ER_READONLY);
struct raft *raft = box_raft();
+ error_append_msg(e, " - ");
/*
* In case of multiple reasons at the same time only one is reported.
* But the order is important. For example, if the instance has election
@@ -188,40 +189,56 @@ box_check_writable(void)
* and who is the leader than just see cfg 'read_only' is true.
*/
if (raft_is_ro(raft)) {
+ const char *state = raft_state_str(raft->state);
+ uint64_t term = raft->volatile_term;
error_set_str(e, "reason", "election");
- error_set_str(e, "state", raft_state_str(raft->state));
- error_set_uint(e, "term", raft->volatile_term);
+ error_set_str(e, "state", state);
+ error_set_uint(e, "term", term);
+ error_append_msg(e, "state is election %s with term %llu",
+ state, term);
uint32_t id = raft->leader;
if (id != REPLICA_ID_NIL) {
error_set_uint(e, "leader_id", id);
+ error_append_msg(e, ", leader is %u", id);
struct replica *r = replica_by_id(id);
/*
* XXX: when the leader is dropped from _cluster, it
* is not reported to Raft.
*/
- if (r != NULL)
+ if (r != NULL) {
error_set_uuid(e, "leader_uuid", &r->uuid);
+ error_append_msg(e, " (%s)",
+ tt_uuid_str(&r->uuid));
+ }
}
} else if (txn_limbo_is_ro(&txn_limbo)) {
error_set_str(e, "reason", "synchro");
uint32_t id = txn_limbo.owner_id;
+ uint64_t term = raft->volatile_term;
error_set_uint(e, "queue_owner_id", id);
- error_set_uint(e, "term", raft->volatile_term);
+ error_set_uint(e, "term", term);
+ error_append_msg(e, "synchro queue with term %llu belongs "
+ "to %u", term, id);
struct replica *r = replica_by_id(id);
/*
* XXX: when an instance is deleted from _cluster, its limbo's
* ownership is not cleared.
*/
- if (r != NULL)
+ if (r != NULL) {
error_set_uuid(e, "queue_owner_uuid", &r->uuid);
+ error_append_msg(e, " (%s)", tt_uuid_str(&r->uuid));
+ }
} else {
error_set_str(e, "reason", "state");
- if (is_ro)
+ if (is_ro) {
error_set_str(e, "state", "read_only");
- else if (is_orphan)
+ error_append_msg(e, "box.cfg.read_only is true");
+ } else if (is_orphan) {
error_set_str(e, "state", "orphan");
- else
+ error_append_msg(e, "it is an orphan");
+ } else {
assert(false);
+ }
}
diag_log();
return -1;
diff --git a/src/box/errcode.h b/src/box/errcode.h
index ac178f1f1..66c335c20 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -59,7 +59,7 @@ struct errcode_record {
/* 4 */_(ER_TUPLE_NOT_FOUND, "Tuple doesn't exist in index '%s' in space '%s'") \
/* 5 */_(ER_UNSUPPORTED, "%s does not support %s") \
/* 6 */_(ER_NONMASTER, "Can't modify data on a replication slave. My master is: %s") \
- /* 7 */_(ER_READONLY, "Can't modify data because this instance is in read-only mode.") \
+ /* 7 */_(ER_READONLY, "Can't modify data on a read-only instance") \
/* 8 */_(ER_INJECTION, "Error injection '%s'") \
/* 9 */_(ER_CREATE_SPACE, "Failed to create space '%s': %s") \
/* 10 */_(ER_SPACE_EXISTS, "Space '%s' already exists") \
diff --git a/src/lib/core/diag.c b/src/lib/core/diag.c
index b05f2793f..2eda31ae9 100644
--- a/src/lib/core/diag.c
+++ b/src/lib/core/diag.c
@@ -155,6 +155,17 @@ error_format_msg(struct error *e, const char *format, ...)
va_end(ap);
}
+void
+error_append_msg(struct error *e, const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ int prefix_len = strlen(e->errmsg);
+ char *msg = e->errmsg + prefix_len;
+ vsnprintf(msg, sizeof(e->errmsg) - prefix_len, format, ap);
+ va_end(ap);
+}
+
void
error_vformat_msg(struct error *e, const char *format, va_list ap)
{
diff --git a/src/lib/core/diag.h b/src/lib/core/diag.h
index be81689ce..0522d9e5a 100644
--- a/src/lib/core/diag.h
+++ b/src/lib/core/diag.h
@@ -273,6 +273,9 @@ error_format_msg(struct error *e, const char *format, ...);
void
error_vformat_msg(struct error *e, const char *format, va_list ap);
+void
+error_append_msg(struct error *e, const char *format, ...);
+
/**
* Diagnostics Area - a container for errors
*/
diff --git a/test/replication-luatest/gh_5568_read_only_reason_test.lua b/test/replication-luatest/gh_5568_read_only_reason_test.lua
index 8effa69e8..5e67029c0 100644
--- a/test/replication-luatest/gh_5568_read_only_reason_test.lua
+++ b/test/replication-luatest/gh_5568_read_only_reason_test.lua
@@ -47,6 +47,8 @@ local g = t.group('gh-5568-read-only-reason1')
g.before_all(make_create_cluster(g))
g.after_all(make_destroy_cluster(g))
+local read_only_msg = "Can't modify data on a read-only instance - "
+
--
-- Read-only because of box.cfg{read_only = true}.
--
@@ -57,6 +59,8 @@ g.test_read_only_reason_cfg = function(g)
return ok, err:unpack()
end)
t.assert(not ok, 'fail ddl')
+ t.assert_str_contains(err.message, read_only_msg..
+ 'box.cfg.read_only is true')
t.assert_covers(err, {
reason = 'state',
state = 'read_only',
@@ -88,6 +92,7 @@ g.test_read_only_reason_orphan = function(g)
return old_timeout, ok, err:unpack()
end, {fake_uri})
t.assert(not ok, 'fail ddl')
+ t.assert_str_contains(err.message, read_only_msg..'it is an orphan')
t.assert_covers(err, {
reason = 'state',
state = 'orphan',
@@ -117,6 +122,8 @@ g.test_read_only_reason_election_no_leader = function(g)
end)
t.assert(not ok, 'fail ddl')
t.assert(err.term, 'has term')
+ t.assert_str_contains(err.message, read_only_msg..('state is election '..
+ '%s with term %s'):format(err.state, err.term))
t.assert_covers(err, {
reason = 'election',
state = 'follower',
@@ -148,6 +155,9 @@ g.test_read_only_reason_election_has_leader = function(g)
end)
t.assert(not ok, 'fail ddl')
t.assert(err.term, 'has term')
+ t.assert_str_contains(err.message, read_only_msg..('state is election '..
+ '%s with term %s, leader is %s (%s)'):format(
+ err.state, err.term, err.leader_id, err.leader_uuid))
t.assert_covers(err, {
reason = 'election',
state = 'follower',
@@ -187,6 +197,9 @@ g.test_read_only_reason_synchro = function(g)
end)
t.assert(not ok, 'fail ddl')
t.assert(err.term, 'has term')
+ t.assert_str_contains(err.message, read_only_msg..('synchro queue with '..
+ 'term %s belongs to %s (%s)'):format(err.term,
+ err.queue_owner_id, err.queue_owner_uuid))
t.assert_covers(err, {
reason = 'synchro',
queue_owner_id = g.master:instance_id(),
@@ -242,6 +255,9 @@ g.test_read_only_reason_election_has_leader_no_uuid = function(g)
t.assert(not ok, 'fail ddl')
t.assert(err.term, 'has term')
t.assert(not err.leader_uuid, 'has no leader uuid')
+ t.assert_str_contains(err.message, read_only_msg..('state is election %s '..
+ 'with term %s, leader is %s'):format(err.state,
+ err.term, err.leader_id))
t.assert_covers(err, {
reason = 'election',
state = 'follower',
@@ -276,6 +292,9 @@ g.test_read_only_reason_synchro_no_uuid = function(g)
t.assert(not ok, 'fail ddl')
t.assert(err.term, 'has term')
t.assert(not err.queue_owner_uuid)
+ t.assert_str_contains(err.message, read_only_msg..('synchro queue with '..
+ 'term %s belongs to %s'):format(err.term,
+ err.queue_owner_id))
t.assert_covers(err, {
reason = 'synchro',
queue_owner_id = leader_id,
diff --git a/test/replication/anon.result b/test/replication/anon.result
index dd470797c..3359be1d1 100644
--- a/test/replication/anon.result
+++ b/test/replication/anon.result
@@ -136,16 +136,16 @@ box.cfg{read_only=false}
box.space.test:insert{2}
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | - error: Can't modify data on a read-only instance - box.cfg.read_only is true
| ...
box.space.loc:drop()
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | - error: Can't modify data on a read-only instance - box.cfg.read_only is true
| ...
box.space.loc:truncate()
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | - error: Can't modify data on a read-only instance - box.cfg.read_only is true
| ...
test_run:cmd('switch default')
diff --git a/test/replication/catch.result b/test/replication/catch.result
index e1b2995ec..3fbc07fbd 100644
--- a/test/replication/catch.result
+++ b/test/replication/catch.result
@@ -92,7 +92,7 @@ box.space.test ~= nil
...
box.space.test:replace{1}
---
-- error: Can't modify data because this instance is in read-only mode.
+- error: Can't modify data on a read-only instance - it is an orphan
...
-- Case #2: replace tuple on replica by net.box.
test_run:cmd("switch default")
@@ -108,7 +108,7 @@ c = net_box.connect(r_uri)
...
d = c.space.test:replace{1}
---
-- error: Can't modify data because this instance is in read-only mode.
+- error: Can't modify data on a read-only instance - it is an orphan
...
-- Resume replication.
errinj.set('ERRINJ_RELAY_SEND_DELAY', false)
diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
index c6ec5e352..798f9d493 100644
--- a/test/replication/election_qsync.result
+++ b/test/replication/election_qsync.result
@@ -115,9 +115,16 @@ test_run:cmd('stop server replica')
| - true
| ...
-- Will fail - the node is not a leader.
-box.space.test:replace{2}
+ok, err = pcall(box.space.test.replace, box.space.test, {2})
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | ...
+assert(not ok)
+ | ---
+ | - true
+ | ...
+assert(err.code == box.error.READONLY)
+ | ---
+ | - true
| ...
-- Set synchro timeout to a huge value to ensure, that when a leader is elected,
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
index f3c7c290b..3fb6a9be7 100644
--- a/test/replication/election_qsync.test.lua
+++ b/test/replication/election_qsync.test.lua
@@ -58,7 +58,9 @@ test_run:wait_lsn('default', 'replica')
test_run:switch('default')
test_run:cmd('stop server replica')
-- Will fail - the node is not a leader.
-box.space.test:replace{2}
+ok, err = pcall(box.space.test.replace, box.space.test, {2})
+assert(not ok)
+assert(err.code == box.error.READONLY)
-- Set synchro timeout to a huge value to ensure, that when a leader is elected,
-- it won't wait for this timeout.
diff --git a/test/replication/gh-6034-qsync-limbo-ownership.result b/test/replication/gh-6034-qsync-limbo-ownership.result
index b4f53cd2a..58acf7db2 100644
--- a/test/replication/gh-6034-qsync-limbo-ownership.result
+++ b/test/replication/gh-6034-qsync-limbo-ownership.result
@@ -94,9 +94,16 @@ assert(box.info.synchro.queue.owner == test_run:get_server_id('default'))
| ---
| - true
| ...
-box.space.async:insert{2} -- failure.
+ok, err = pcall(box.space.async.insert, box.space.async, {2})
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | ...
+assert(not ok)
+ | ---
+ | - true
+ | ...
+assert(err.code == box.error.READONLY)
+ | ---
+ | - true
| ...
-- Promotion on the other node. Default should become ro.
@@ -131,9 +138,16 @@ assert(box.info.synchro.queue.owner == test_run:get_server_id('replica'))
| ---
| - true
| ...
-box.space.sync:insert{3} -- failure.
+ok, err = pcall(box.space.sync.insert, box.space.sync, {3})
| ---
- | - error: Can't modify data because this instance is in read-only mode.
+ | ...
+assert(not ok)
+ | ---
+ | - true
+ | ...
+assert(err.code == box.error.READONLY)
+ | ---
+ | - true
| ...
box.ctl.promote()
diff --git a/test/replication/gh-6034-qsync-limbo-ownership.test.lua b/test/replication/gh-6034-qsync-limbo-ownership.test.lua
index f9ef5ca41..0f62ba6a4 100644
--- a/test/replication/gh-6034-qsync-limbo-ownership.test.lua
+++ b/test/replication/gh-6034-qsync-limbo-ownership.test.lua
@@ -34,7 +34,9 @@ box.cfg{replication=rpl_listen}
test_run:switch('replica')
assert(box.info.ro)
assert(box.info.synchro.queue.owner == test_run:get_server_id('default'))
-box.space.async:insert{2} -- failure.
+ok, err = pcall(box.space.async.insert, box.space.async, {2})
+assert(not ok)
+assert(err.code == box.error.READONLY)
-- Promotion on the other node. Default should become ro.
box.ctl.promote()
@@ -46,7 +48,9 @@ test_run:switch('default')
test_run:wait_lsn('default', 'replica')
assert(box.info.ro)
assert(box.info.synchro.queue.owner == test_run:get_server_id('replica'))
-box.space.sync:insert{3} -- failure.
+ok, err = pcall(box.space.sync.insert, box.space.sync, {3})
+assert(not ok)
+assert(err.code == box.error.READONLY)
box.ctl.promote()
box.ctl.demote()
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index e5a56ee62..3bb4e379c 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -359,7 +359,7 @@ ch2:put(true)
...
ch1:get()
---
-- Can't modify data because this instance is in read-only mode.
+- Can't modify data on a read-only instance - box.cfg.read_only is true
...
ch1:get()
---
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list