* [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring @ 2021-04-08 12:18 Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:21 ` Cyrill Gorcunov via Tarantool-patches ` (2 more replies) 0 siblings, 3 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 12:18 UTC (permalink / raw) To: tml; +Cc: Vladislav Shpilevoy In commit 14fa5fd82 (cfg: support symbolic evaluation of replication_synchro_quorum) we implemented support of symbolic evaluation of `replication_synchro_quorum` parameter and there is no easy way to obtain it current run-time value, ie evaluated number value. Moreover we would like to fetch queue length on transaction limbo for tests and extend this statistics in future. Thus lets add them. Closes #5191 Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> @TarantoolBot document Title: Provide `box.info.synchro` interface The `box.info.synchro` leaf provides information about details of synchronous replication. In particular `quorum` represent the current value of synchronous replication quorum defined by `replication_synchro_quorum` configuration parameter because it can be set as dynamic formula such as `N/2+1` and the value depends on current number of replicas. Since synchronous replication does not commit data immediately but waits for its propagation to replicas the data sits in a queue gathering `commit` responses from remote nodes. Current number of entries waiting in the queue is shown via `queue.len` member. A typical output is the following ``` Lua tarantool> box.info.synchro --- - queue: len: 0 quorum: 1 ... ``` The `len` member shows current number of entries in the queue. And the `quorum` member shows an evaluated value of `replication_synchro_quorum` parameter. --- issue https://github.com/tarantool/tarantool/issues/5191 branch gorcunov/gh-5191-qsync-stat-4 changelogs/unreleased/box-info-limbo.md | 4 +++ src/box/lua/info.c | 22 ++++++++++++++ src/box/txn_limbo.c | 5 +++- src/box/txn_limbo.h | 4 +++ test/box/info.result | 1 + test/replication/qsync_basic.result | 40 +++++++++++++++++++++---- test/replication/qsync_basic.test.lua | 21 +++++++++++-- 7 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 changelogs/unreleased/box-info-limbo.md diff --git a/changelogs/unreleased/box-info-limbo.md b/changelogs/unreleased/box-info-limbo.md new file mode 100644 index 000000000..0f75a911d --- /dev/null +++ b/changelogs/unreleased/box-info-limbo.md @@ -0,0 +1,4 @@ +## feature/core + +* Provide information about state of synchronous replication via + `box.info.synchro` interface (gh-5191). diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 8cd379756..7c63c8f5e 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -50,6 +50,7 @@ #include "version.h" #include "box/box.h" #include "box/raft.h" +#include "box/txn_limbo.h" #include "lua/utils.h" #include "fiber.h" #include "sio.h" @@ -599,6 +600,26 @@ lbox_info_election(struct lua_State *L) return 1; } +static int +lbox_info_synchro(struct lua_State *L) +{ + lua_createtable(L, 0, 2); + + /* Quorum value may be evaluated via formula */ + lua_pushinteger(L, replication_synchro_quorum); + lua_setfield(L, -2, "quorum"); + + /*Queue information. */ + struct txn_limbo *queue = &txn_limbo; + lua_createtable(L, 0, 1); + lua_pushnumber(L, queue->len); + lua_setfield(L, -2, "len"); + lua_setfield(L, -2, "queue"); + + return 1; +} + + static const struct luaL_Reg lbox_info_dynamic_meta[] = { {"id", lbox_info_id}, {"uuid", lbox_info_uuid}, @@ -618,6 +639,7 @@ static const struct luaL_Reg lbox_info_dynamic_meta[] = { {"sql", lbox_info_sql}, {"listen", lbox_info_listen}, {"election", lbox_info_election}, + {"synchro", lbox_info_synchro}, {NULL, NULL} }; diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index cf0ad9350..a22e0861a 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -41,6 +41,7 @@ static inline void txn_limbo_create(struct txn_limbo *limbo) { rlist_create(&limbo->queue); + limbo->len = 0; limbo->owner_id = REPLICA_ID_NIL; fiber_cond_create(&limbo->wait_cond); vclock_create(&limbo->vclock); @@ -118,6 +119,7 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) e->is_commit = false; e->is_rollback = false; rlist_add_tail_entry(&limbo->queue, e, in_queue); + limbo->len++; /* * We added new entries from a remote instance to an empty limbo. * Time to make this instance read-only. @@ -132,8 +134,8 @@ txn_limbo_remove(struct txn_limbo *limbo, struct txn_limbo_entry *entry) { assert(!rlist_empty(&entry->in_queue)); assert(txn_limbo_first_entry(limbo) == entry); - (void) limbo; rlist_del_entry(entry, in_queue); + limbo->len--; } static inline void @@ -144,6 +146,7 @@ txn_limbo_pop(struct txn_limbo *limbo, struct txn_limbo_entry *entry) assert(entry->is_rollback); rlist_del_entry(entry, in_queue); + limbo->len--; ++limbo->rollback_count; } diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index af0addf8d..f2a98c8bb 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -94,6 +94,10 @@ struct txn_limbo { * them LSNs in the same order. */ struct rlist queue; + /** + * Number of entries in limbo queue. + */ + int64_t len; /** * Instance ID of the owner of all the transactions in the * queue. Strictly speaking, nothing prevents to store not diff --git a/test/box/info.result b/test/box/info.result index c8037818b..3ee653773 100644 --- a/test/box/info.result +++ b/test/box/info.result @@ -89,6 +89,7 @@ t - signature - sql - status + - synchro - uptime - uuid - vclock diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result index bd3c3cce1..3457d2cc9 100644 --- a/test/replication/qsync_basic.result +++ b/test/replication/qsync_basic.result @@ -637,12 +637,46 @@ box.space.sync:count() | - 0 | ... --- Cleanup. +-- +-- gh-5191: test box.info.synchro interface. For +-- this sake we stop the replica and initiate data +-- write in sync space which won't pass due to timeout. +-- While we're sitting in a wait cycle the queue should +-- not be empty. +-- +-- Make sure this test is the *LAST* one since we stop +-- the replica node and never restart it back before the +-- cleanup procedure, also we're spinning on default node +-- and do not switch to other nodes. +-- test_run:cmd('switch default') | --- | - true | ... +test_run:cmd('stop server replica') + | --- + | - true + | ... +assert(box.info.synchro.queue.len == 0) + | --- + | - true + | ... +box.cfg{replication_synchro_timeout = 2} + | --- + | ... +f = fiber.new(function() box.space.sync:insert{1024} end) + | --- + | ... +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) + | --- + | - true + | ... +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) + | --- + | - true + | ... +-- Cleanup box.cfg{ \ replication_synchro_quorum = old_synchro_quorum, \ replication_synchro_timeout = old_synchro_timeout, \ @@ -650,10 +684,6 @@ box.cfg{ } | --- | ... -test_run:cmd('stop server replica') - | --- - | - true - | ... test_run:cmd('delete server replica') | --- | - true diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua index 94235547d..a604d80ee 100644 --- a/test/replication/qsync_basic.test.lua +++ b/test/replication/qsync_basic.test.lua @@ -248,15 +248,32 @@ for i = 1, 100 do box.space.sync:delete{i} end test_run:cmd('switch replica') box.space.sync:count() --- Cleanup. +-- +-- gh-5191: test box.info.synchro interface. For +-- this sake we stop the replica and initiate data +-- write in sync space which won't pass due to timeout. +-- While we're sitting in a wait cycle the queue should +-- not be empty. +-- +-- Make sure this test is the *LAST* one since we stop +-- the replica node and never restart it back before the +-- cleanup procedure, also we're spinning on default node +-- and do not switch to other nodes. +-- test_run:cmd('switch default') +test_run:cmd('stop server replica') +assert(box.info.synchro.queue.len == 0) +box.cfg{replication_synchro_timeout = 2} +f = fiber.new(function() box.space.sync:insert{1024} end) +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) +-- Cleanup box.cfg{ \ replication_synchro_quorum = old_synchro_quorum, \ replication_synchro_timeout = old_synchro_timeout, \ replication_timeout = old_timeout, \ } -test_run:cmd('stop server replica') test_run:cmd('delete server replica') box.space.test:drop() box.space.sync:drop() -- 2.30.2 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 12:18 [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 12:21 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:58 ` Serge Petrenko via Tarantool-patches 2021-04-12 5:59 ` [Tarantool-patches] [PATCH v4] " Kirill Yukhin via Tarantool-patches 2 siblings, 0 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 12:21 UTC (permalink / raw) To: tml; +Cc: Vladislav Shpilevoy On Thu, Apr 08, 2021 at 03:18:13PM +0300, Cyrill Gorcunov wrote: > +static int > +lbox_info_synchro(struct lua_State *L) > +{ > + lua_createtable(L, 0, 2); > + > + /* Quorum value may be evaluated via formula */ > + lua_pushinteger(L, replication_synchro_quorum); > + lua_setfield(L, -2, "quorum"); > + > + /*Queue information. */ Missing whitespace after the opening comment. Force pushed update. Also what is changed since v3: - use lua_createtable - move test to qsync_basic ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 12:18 [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:21 ` Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 12:58 ` Serge Petrenko via Tarantool-patches 2021-04-08 13:13 ` Cyrill Gorcunov via Tarantool-patches 2021-04-12 5:59 ` [Tarantool-patches] [PATCH v4] " Kirill Yukhin via Tarantool-patches 2 siblings, 1 reply; 11+ messages in thread From: Serge Petrenko via Tarantool-patches @ 2021-04-08 12:58 UTC (permalink / raw) To: Cyrill Gorcunov, tml; +Cc: Vladislav Shpilevoy 08.04.2021 15:18, Cyrill Gorcunov пишет: > In commit 14fa5fd82 (cfg: support symbolic evaluation of > replication_synchro_quorum) we implemented support of > symbolic evaluation of `replication_synchro_quorum` parameter > and there is no easy way to obtain it current run-time value, > ie evaluated number value. > > Moreover we would like to fetch queue length on transaction > limbo for tests and extend this statistics in future. Thus > lets add them. > > Closes #5191 Thanks for the fixes! Please return `box.info.synchro.quorum` assertions to the test regarding quorum evaluation. Like you had it in the previous patch version. Other than that, LGTM. > > Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> > > @TarantoolBot document > Title: Provide `box.info.synchro` interface > > The `box.info.synchro` leaf provides information about details of > synchronous replication. > > In particular `quorum` represent the current value of synchronous > replication quorum defined by `replication_synchro_quorum` > configuration parameter because it can be set as dynamic formula > such as `N/2+1` and the value depends on current number of replicas. > > Since synchronous replication does not commit data immediately > but waits for its propagation to replicas the data sits in a queue > gathering `commit` responses from remote nodes. Current number of > entries waiting in the queue is shown via `queue.len` member. > > A typical output is the following > > ``` Lua > tarantool> box.info.synchro > --- > - queue: > len: 0 > quorum: 1 > ... > ``` > > The `len` member shows current number of entries in the queue. > And the `quorum` member shows an evaluated value of > `replication_synchro_quorum` parameter. > --- > issue https://github.com/tarantool/tarantool/issues/5191 > branch gorcunov/gh-5191-qsync-stat-4 > > changelogs/unreleased/box-info-limbo.md | 4 +++ > src/box/lua/info.c | 22 ++++++++++++++ > src/box/txn_limbo.c | 5 +++- > src/box/txn_limbo.h | 4 +++ > test/box/info.result | 1 + > test/replication/qsync_basic.result | 40 +++++++++++++++++++++---- > test/replication/qsync_basic.test.lua | 21 +++++++++++-- > 7 files changed, 89 insertions(+), 8 deletions(-) > create mode 100644 changelogs/unreleased/box-info-limbo.md > > diff --git a/changelogs/unreleased/box-info-limbo.md b/changelogs/unreleased/box-info-limbo.md > new file mode 100644 > index 000000000..0f75a911d > --- /dev/null > +++ b/changelogs/unreleased/box-info-limbo.md > @@ -0,0 +1,4 @@ > +## feature/core > + > +* Provide information about state of synchronous replication via > + `box.info.synchro` interface (gh-5191). > diff --git a/src/box/lua/info.c b/src/box/lua/info.c > index 8cd379756..7c63c8f5e 100644 > --- a/src/box/lua/info.c > +++ b/src/box/lua/info.c > @@ -50,6 +50,7 @@ > #include "version.h" > #include "box/box.h" > #include "box/raft.h" > +#include "box/txn_limbo.h" > #include "lua/utils.h" > #include "fiber.h" > #include "sio.h" > @@ -599,6 +600,26 @@ lbox_info_election(struct lua_State *L) > return 1; > } > > +static int > +lbox_info_synchro(struct lua_State *L) > +{ > + lua_createtable(L, 0, 2); > + > + /* Quorum value may be evaluated via formula */ > + lua_pushinteger(L, replication_synchro_quorum); > + lua_setfield(L, -2, "quorum"); > + > + /*Queue information. */ > + struct txn_limbo *queue = &txn_limbo; > + lua_createtable(L, 0, 1); > + lua_pushnumber(L, queue->len); > + lua_setfield(L, -2, "len"); > + lua_setfield(L, -2, "queue"); > + > + return 1; > +} > + > + > static const struct luaL_Reg lbox_info_dynamic_meta[] = { > {"id", lbox_info_id}, > {"uuid", lbox_info_uuid}, > @@ -618,6 +639,7 @@ static const struct luaL_Reg lbox_info_dynamic_meta[] = { > {"sql", lbox_info_sql}, > {"listen", lbox_info_listen}, > {"election", lbox_info_election}, > + {"synchro", lbox_info_synchro}, > {NULL, NULL} > }; > > diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c > index cf0ad9350..a22e0861a 100644 > --- a/src/box/txn_limbo.c > +++ b/src/box/txn_limbo.c > @@ -41,6 +41,7 @@ static inline void > txn_limbo_create(struct txn_limbo *limbo) > { > rlist_create(&limbo->queue); > + limbo->len = 0; > limbo->owner_id = REPLICA_ID_NIL; > fiber_cond_create(&limbo->wait_cond); > vclock_create(&limbo->vclock); > @@ -118,6 +119,7 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) > e->is_commit = false; > e->is_rollback = false; > rlist_add_tail_entry(&limbo->queue, e, in_queue); > + limbo->len++; > /* > * We added new entries from a remote instance to an empty limbo. > * Time to make this instance read-only. > @@ -132,8 +134,8 @@ txn_limbo_remove(struct txn_limbo *limbo, struct txn_limbo_entry *entry) > { > assert(!rlist_empty(&entry->in_queue)); > assert(txn_limbo_first_entry(limbo) == entry); > - (void) limbo; > rlist_del_entry(entry, in_queue); > + limbo->len--; > } > > static inline void > @@ -144,6 +146,7 @@ txn_limbo_pop(struct txn_limbo *limbo, struct txn_limbo_entry *entry) > assert(entry->is_rollback); > > rlist_del_entry(entry, in_queue); > + limbo->len--; > ++limbo->rollback_count; > } > > diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h > index af0addf8d..f2a98c8bb 100644 > --- a/src/box/txn_limbo.h > +++ b/src/box/txn_limbo.h > @@ -94,6 +94,10 @@ struct txn_limbo { > * them LSNs in the same order. > */ > struct rlist queue; > + /** > + * Number of entries in limbo queue. > + */ > + int64_t len; > /** > * Instance ID of the owner of all the transactions in the > * queue. Strictly speaking, nothing prevents to store not > diff --git a/test/box/info.result b/test/box/info.result > index c8037818b..3ee653773 100644 > --- a/test/box/info.result > +++ b/test/box/info.result > @@ -89,6 +89,7 @@ t > - signature > - sql > - status > + - synchro > - uptime > - uuid > - vclock > diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result > index bd3c3cce1..3457d2cc9 100644 > --- a/test/replication/qsync_basic.result > +++ b/test/replication/qsync_basic.result > @@ -637,12 +637,46 @@ box.space.sync:count() > | - 0 > | ... > > --- Cleanup. > +-- > +-- gh-5191: test box.info.synchro interface. For > +-- this sake we stop the replica and initiate data > +-- write in sync space which won't pass due to timeout. > +-- While we're sitting in a wait cycle the queue should > +-- not be empty. > +-- > +-- Make sure this test is the *LAST* one since we stop > +-- the replica node and never restart it back before the > +-- cleanup procedure, also we're spinning on default node > +-- and do not switch to other nodes. > +-- > test_run:cmd('switch default') > | --- > | - true > | ... > +test_run:cmd('stop server replica') > + | --- > + | - true > + | ... > +assert(box.info.synchro.queue.len == 0) > + | --- > + | - true > + | ... > +box.cfg{replication_synchro_timeout = 2} > + | --- > + | ... > +f = fiber.new(function() box.space.sync:insert{1024} end) > + | --- > + | ... > +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) > + | --- > + | - true > + | ... > +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) > + | --- > + | - true > + | ... > > +-- Cleanup > box.cfg{ \ > replication_synchro_quorum = old_synchro_quorum, \ > replication_synchro_timeout = old_synchro_timeout, \ > @@ -650,10 +684,6 @@ box.cfg{ > } > | --- > | ... > -test_run:cmd('stop server replica') > - | --- > - | - true > - | ... > test_run:cmd('delete server replica') > | --- > | - true > diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua > index 94235547d..a604d80ee 100644 > --- a/test/replication/qsync_basic.test.lua > +++ b/test/replication/qsync_basic.test.lua > @@ -248,15 +248,32 @@ for i = 1, 100 do box.space.sync:delete{i} end > test_run:cmd('switch replica') > box.space.sync:count() > > --- Cleanup. > +-- > +-- gh-5191: test box.info.synchro interface. For > +-- this sake we stop the replica and initiate data > +-- write in sync space which won't pass due to timeout. > +-- While we're sitting in a wait cycle the queue should > +-- not be empty. > +-- > +-- Make sure this test is the *LAST* one since we stop > +-- the replica node and never restart it back before the > +-- cleanup procedure, also we're spinning on default node > +-- and do not switch to other nodes. > +-- > test_run:cmd('switch default') > +test_run:cmd('stop server replica') > +assert(box.info.synchro.queue.len == 0) > +box.cfg{replication_synchro_timeout = 2} > +f = fiber.new(function() box.space.sync:insert{1024} end) > +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) > +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) > > +-- Cleanup > box.cfg{ \ > replication_synchro_quorum = old_synchro_quorum, \ > replication_synchro_timeout = old_synchro_timeout, \ > replication_timeout = old_timeout, \ > } > -test_run:cmd('stop server replica') > test_run:cmd('delete server replica') > box.space.test:drop() > box.space.sync:drop() -- Serge Petrenko ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 12:58 ` Serge Petrenko via Tarantool-patches @ 2021-04-08 13:13 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:15 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches 0 siblings, 2 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 13:13 UTC (permalink / raw) To: Serge Petrenko; +Cc: tml, Vladislav Shpilevoy On Thu, Apr 08, 2021 at 03:58:38PM +0300, Serge Petrenko wrote: > 08.04.2021 15:18, Cyrill Gorcunov пишет: > > In commit 14fa5fd82 (cfg: support symbolic evaluation of > > replication_synchro_quorum) we implemented support of > > symbolic evaluation of `replication_synchro_quorum` parameter > > and there is no easy way to obtain it current run-time value, > > ie evaluated number value. > > > > Moreover we would like to fetch queue length on transaction > > limbo for tests and extend this statistics in future. Thus > > lets add them. > > > > Closes #5191 > > Thanks for the fixes! > > Please return `box.info.synchro.quorum` assertions to the > test regarding quorum evaluation. Like you had it in the > previous patch version. > Say like this? --- diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result index 3457d2cc9..985a5baff 100644 --- a/test/replication/qsync_basic.result +++ b/test/replication/qsync_basic.result @@ -653,6 +653,13 @@ test_run:cmd('switch default') | --- | - true | ... +box.cfg{ replication_synchro_quorum = "N/2+1" } + | --- + | ... +assert(box.info.synchro.quorum == 2) + | --- + | - true + | ... test_run:cmd('stop server replica') | --- | - true diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua index a604d80ee..a1787648f 100644 --- a/test/replication/qsync_basic.test.lua +++ b/test/replication/qsync_basic.test.lua @@ -261,6 +261,8 @@ box.space.sync:count() -- and do not switch to other nodes. -- test_run:cmd('switch default') +box.cfg{ replication_synchro_quorum = "N/2+1" } +assert(box.info.synchro.quorum == 2) test_run:cmd('stop server replica') assert(box.info.synchro.queue.len == 0) box.cfg{replication_synchro_timeout = 2} ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 13:13 ` Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 13:15 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches 1 sibling, 0 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 13:15 UTC (permalink / raw) To: Serge Petrenko; +Cc: tml, Vladislav Shpilevoy On Thu, Apr 08, 2021 at 04:13:09PM +0300, Cyrill Gorcunov wrote: > > Please return `box.info.synchro.quorum` assertions to the > > test regarding quorum evaluation. Like you had it in the > > previous patch version. > > > > Say like this? I force pushed an update ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 13:13 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:15 ` Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches 2021-04-08 13:23 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 14:57 ` [Tarantool-patches] [PATCH v5] " Cyrill Gorcunov via Tarantool-patches 1 sibling, 2 replies; 11+ messages in thread From: Serge Petrenko via Tarantool-patches @ 2021-04-08 13:17 UTC (permalink / raw) To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy 08.04.2021 16:13, Cyrill Gorcunov пишет: > On Thu, Apr 08, 2021 at 03:58:38PM +0300, Serge Petrenko wrote: >> 08.04.2021 15:18, Cyrill Gorcunov пишет: >>> In commit 14fa5fd82 (cfg: support symbolic evaluation of >>> replication_synchro_quorum) we implemented support of >>> symbolic evaluation of `replication_synchro_quorum` parameter >>> and there is no easy way to obtain it current run-time value, >>> ie evaluated number value. >>> >>> Moreover we would like to fetch queue length on transaction >>> limbo for tests and extend this statistics in future. Thus >>> lets add them. >>> >>> Closes #5191 >> Thanks for the fixes! >> >> Please return `box.info.synchro.quorum` assertions to the >> test regarding quorum evaluation. Like you had it in the >> previous patch version. >> > Say like this? Yes, but better do it in the test `replication/gh-5446-qsync-eval-quorum` > --- > diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result > index 3457d2cc9..985a5baff 100644 > --- a/test/replication/qsync_basic.result > +++ b/test/replication/qsync_basic.result > @@ -653,6 +653,13 @@ test_run:cmd('switch default') > | --- > | - true > | ... > +box.cfg{ replication_synchro_quorum = "N/2+1" } > + | --- > + | ... > +assert(box.info.synchro.quorum == 2) > + | --- > + | - true > + | ... > test_run:cmd('stop server replica') > | --- > | - true > diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua > index a604d80ee..a1787648f 100644 > --- a/test/replication/qsync_basic.test.lua > +++ b/test/replication/qsync_basic.test.lua > @@ -261,6 +261,8 @@ box.space.sync:count() > -- and do not switch to other nodes. > -- > test_run:cmd('switch default') > +box.cfg{ replication_synchro_quorum = "N/2+1" } > +assert(box.info.synchro.quorum == 2) > test_run:cmd('stop server replica') > assert(box.info.synchro.queue.len == 0) > box.cfg{replication_synchro_timeout = 2} -- Serge Petrenko ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches @ 2021-04-08 13:23 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 14:57 ` [Tarantool-patches] [PATCH v5] " Cyrill Gorcunov via Tarantool-patches 1 sibling, 0 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 13:23 UTC (permalink / raw) To: Serge Petrenko; +Cc: tml, Vladislav Shpilevoy On Thu, Apr 08, 2021 at 04:17:23PM +0300, Serge Petrenko wrote: > > > 08.04.2021 16:13, Cyrill Gorcunov пишет: > > On Thu, Apr 08, 2021 at 03:58:38PM +0300, Serge Petrenko wrote: > > > 08.04.2021 15:18, Cyrill Gorcunov пишет: > > > > In commit 14fa5fd82 (cfg: support symbolic evaluation of > > > > replication_synchro_quorum) we implemented support of > > > > symbolic evaluation of `replication_synchro_quorum` parameter > > > > and there is no easy way to obtain it current run-time value, > > > > ie evaluated number value. > > > > > > > > Moreover we would like to fetch queue length on transaction > > > > limbo for tests and extend this statistics in future. Thus > > > > lets add them. > > > > > > > > Closes #5191 > > > Thanks for the fixes! > > > > > > Please return `box.info.synchro.quorum` assertions to the > > > test regarding quorum evaluation. Like you had it in the > > > previous patch version. > > > > > Say like this? > > Yes, but better do it in the test `replication/gh-5446-qsync-eval-quorum` OK, will move it there and post the final diff ^ permalink raw reply [flat|nested] 11+ messages in thread
* [Tarantool-patches] [PATCH v5] qsync: provide box.info.synchro interface for monitoring 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches 2021-04-08 13:23 ` Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 14:57 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 15:24 ` Serge Petrenko via Tarantool-patches 2021-04-09 22:24 ` Vladislav Shpilevoy via Tarantool-patches 1 sibling, 2 replies; 11+ messages in thread From: Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 14:57 UTC (permalink / raw) To: Serge Petrenko, Vladislav Shpilevoy; +Cc: tml In commit 14fa5fd82 (cfg: support symbolic evaluation of replication_synchro_quorum) we implemented support of symbolic evaluation of `replication_synchro_quorum` parameter and there is no easy way to obtain it current run-time value, ie evaluated number value. Moreover we would like to fetch queue length on transaction limbo for tests and extend this statistics in future. Thus lets add them. Closes #5191 Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> @TarantoolBot document Title: Provide `box.info.synchro` interface The `box.info.synchro` leaf provides information about details of synchronous replication. In particular `quorum` represent the current value of synchronous replication quorum defined by `replication_synchro_quorum` configuration parameter because it can be set as dynamic formula such as `N/2+1` and the value depends on current number of replicas. Since synchronous replication does not commit data immediately but waits for its propagation to replicas the data sits in a queue gathering `commit` responses from remote nodes. Current number of entries waiting in the queue is shown via `queue.len` member. A typical output is the following ``` Lua tarantool> box.info.synchro --- - queue: len: 0 quorum: 1 ... ``` The `len` member shows current number of entries in the queue. And the `quorum` member shows an evaluated value of `replication_synchro_quorum` parameter. --- Guys, I pushed it into gorcunov/gh-5191-qsync-stat-5 because the patch became bigger as I supposed. changelogs/unreleased/box-info-limbo.md | 4 + src/box/lua/info.c | 22 ++++++ src/box/txn_limbo.c | 5 +- src/box/txn_limbo.h | 4 + test/box/info.result | 1 + .../gh-5446-qsync-eval-quorum.result | 74 ++++++++++++------- .../gh-5446-qsync-eval-quorum.test.lua | 48 ++++++++---- test/replication/qsync_basic.result | 40 ++++++++-- test/replication/qsync_basic.test.lua | 21 +++++- 9 files changed, 172 insertions(+), 47 deletions(-) create mode 100644 changelogs/unreleased/box-info-limbo.md diff --git a/changelogs/unreleased/box-info-limbo.md b/changelogs/unreleased/box-info-limbo.md new file mode 100644 index 000000000..0f75a911d --- /dev/null +++ b/changelogs/unreleased/box-info-limbo.md @@ -0,0 +1,4 @@ +## feature/core + +* Provide information about state of synchronous replication via + `box.info.synchro` interface (gh-5191). diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 8cd379756..0eb48b823 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -50,6 +50,7 @@ #include "version.h" #include "box/box.h" #include "box/raft.h" +#include "box/txn_limbo.h" #include "lua/utils.h" #include "fiber.h" #include "sio.h" @@ -599,6 +600,26 @@ lbox_info_election(struct lua_State *L) return 1; } +static int +lbox_info_synchro(struct lua_State *L) +{ + lua_createtable(L, 0, 2); + + /* Quorum value may be evaluated via formula */ + lua_pushinteger(L, replication_synchro_quorum); + lua_setfield(L, -2, "quorum"); + + /* Queue information. */ + struct txn_limbo *queue = &txn_limbo; + lua_createtable(L, 0, 1); + lua_pushnumber(L, queue->len); + lua_setfield(L, -2, "len"); + lua_setfield(L, -2, "queue"); + + return 1; +} + + static const struct luaL_Reg lbox_info_dynamic_meta[] = { {"id", lbox_info_id}, {"uuid", lbox_info_uuid}, @@ -618,6 +639,7 @@ static const struct luaL_Reg lbox_info_dynamic_meta[] = { {"sql", lbox_info_sql}, {"listen", lbox_info_listen}, {"election", lbox_info_election}, + {"synchro", lbox_info_synchro}, {NULL, NULL} }; diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index cf0ad9350..a22e0861a 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -41,6 +41,7 @@ static inline void txn_limbo_create(struct txn_limbo *limbo) { rlist_create(&limbo->queue); + limbo->len = 0; limbo->owner_id = REPLICA_ID_NIL; fiber_cond_create(&limbo->wait_cond); vclock_create(&limbo->vclock); @@ -118,6 +119,7 @@ txn_limbo_append(struct txn_limbo *limbo, uint32_t id, struct txn *txn) e->is_commit = false; e->is_rollback = false; rlist_add_tail_entry(&limbo->queue, e, in_queue); + limbo->len++; /* * We added new entries from a remote instance to an empty limbo. * Time to make this instance read-only. @@ -132,8 +134,8 @@ txn_limbo_remove(struct txn_limbo *limbo, struct txn_limbo_entry *entry) { assert(!rlist_empty(&entry->in_queue)); assert(txn_limbo_first_entry(limbo) == entry); - (void) limbo; rlist_del_entry(entry, in_queue); + limbo->len--; } static inline void @@ -144,6 +146,7 @@ txn_limbo_pop(struct txn_limbo *limbo, struct txn_limbo_entry *entry) assert(entry->is_rollback); rlist_del_entry(entry, in_queue); + limbo->len--; ++limbo->rollback_count; } diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index af0addf8d..f2a98c8bb 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -94,6 +94,10 @@ struct txn_limbo { * them LSNs in the same order. */ struct rlist queue; + /** + * Number of entries in limbo queue. + */ + int64_t len; /** * Instance ID of the owner of all the transactions in the * queue. Strictly speaking, nothing prevents to store not diff --git a/test/box/info.result b/test/box/info.result index c8037818b..3ee653773 100644 --- a/test/box/info.result +++ b/test/box/info.result @@ -89,6 +89,7 @@ t - signature - sql - status + - synchro - uptime - uuid - vclock diff --git a/test/replication/gh-5446-qsync-eval-quorum.result b/test/replication/gh-5446-qsync-eval-quorum.result index 1d13f26db..5f83b248c 100644 --- a/test/replication/gh-5446-qsync-eval-quorum.result +++ b/test/replication/gh-5446-qsync-eval-quorum.result @@ -72,6 +72,15 @@ cfg_set_pass_tmo() | --- | ... +-- gh-5191: we may validate the evaluated number, +-- we take a canonical formula here. +function assert_quorum_value(nr_replicas) \ + local v = math.floor(nr_replicas / 2) + 1 \ + assert(box.info.synchro.quorum == v) \ +end + | --- + | ... + -- Create a sync space we will operate on s = box.schema.space.create('sync', {is_sync = true, engine = engine}) | --- @@ -96,6 +105,9 @@ test_run:cmd('start server replica1 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(2) + | --- + | ... s:insert{2} -- should pass | --- | - [2] @@ -133,6 +145,9 @@ test_run:cmd('start server replica2 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(3) + | --- + | ... test_run:cmd('create server replica3 with rpl_master=default,\ script="replication/replica-quorum-3.lua"') @@ -143,6 +158,9 @@ test_run:cmd('start server replica3 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(4) + | --- + | ... test_run:cmd('create server replica4 with rpl_master=default,\ script="replication/replica-quorum-4.lua"') @@ -153,6 +171,9 @@ test_run:cmd('start server replica4 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(5) + | --- + | ... test_run:cmd('create server replica5 with rpl_master=default,\ script="replication/replica-quorum-5.lua"') @@ -163,6 +184,9 @@ test_run:cmd('start server replica5 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(6) + | --- + | ... test_run:cmd('create server replica6 with rpl_master=default,\ script="replication/replica-quorum-6.lua"') @@ -173,6 +197,9 @@ test_run:cmd('start server replica6 with wait=True, wait_load=True') | --- | - true | ... +assert_quorum_value(7) + | --- + | ... -- All replicas are up and running s:insert{4} -- should pass @@ -254,55 +281,52 @@ s:insert{11} -- should pass | - [11] | ... --- cleanup +-- Cleanup and test formula evaluation same time, +-- which requires _cluster modification though. +function delete_replica(name) \ + local id = test_run:eval(name, 'return box.info.id')[1] \ + test_run:cmd('stop server ' .. name) \ + test_run:cmd('delete server ' .. name) \ + box.space._cluster:delete(id) \ +end + | --- + | ... -test_run:cmd('stop server replica1') +delete_replica('replica1') | --- - | - true | ... -test_run:cmd('delete server replica1') +assert_quorum_value(6) | --- - | - true | ... -test_run:cmd('stop server replica2') +delete_replica('replica2') | --- - | - true | ... -test_run:cmd('delete server replica2') +assert_quorum_value(5) | --- - | - true | ... -test_run:cmd('stop server replica3') +delete_replica('replica3') | --- - | - true | ... -test_run:cmd('delete server replica3') +assert_quorum_value(4) | --- - | - true | ... -test_run:cmd('stop server replica4') +delete_replica('replica4') | --- - | - true | ... -test_run:cmd('delete server replica4') +assert_quorum_value(3) | --- - | - true | ... -test_run:cmd('stop server replica5') +delete_replica('replica5') | --- - | - true | ... -test_run:cmd('delete server replica5') +assert_quorum_value(2) | --- - | - true | ... -test_run:cmd('stop server replica6') +delete_replica('replica6') | --- - | - true | ... -test_run:cmd('delete server replica6') +assert_quorum_value(1) | --- - | - true | ... s:drop() diff --git a/test/replication/gh-5446-qsync-eval-quorum.test.lua b/test/replication/gh-5446-qsync-eval-quorum.test.lua index 62d87ddcb..6b9e324ed 100644 --- a/test/replication/gh-5446-qsync-eval-quorum.test.lua +++ b/test/replication/gh-5446-qsync-eval-quorum.test.lua @@ -27,6 +27,13 @@ function cfg_set_fail_tmo() box.cfg{replication_synchro_timeout = 0.5} end box.cfg{replication_synchro_quorum = "N/2+1"} cfg_set_pass_tmo() +-- gh-5191: we may validate the evaluated number, +-- we take a canonical formula here. +function assert_quorum_value(nr_replicas) \ + local v = math.floor(nr_replicas / 2) + 1 \ + assert(box.info.synchro.quorum == v) \ +end + -- Create a sync space we will operate on s = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = s:create_index('pk') @@ -38,6 +45,7 @@ s:insert{1} -- should pass test_run:cmd('create server replica1 with rpl_master=default,\ script="replication/replica-quorum-1.lua"') test_run:cmd('start server replica1 with wait=True, wait_load=True') +assert_quorum_value(2) s:insert{2} -- should pass cfg_set_fail_tmo() test_run:cmd('stop server replica1') @@ -50,22 +58,27 @@ s:insert{3} -- should pass test_run:cmd('create server replica2 with rpl_master=default,\ script="replication/replica-quorum-2.lua"') test_run:cmd('start server replica2 with wait=True, wait_load=True') +assert_quorum_value(3) test_run:cmd('create server replica3 with rpl_master=default,\ script="replication/replica-quorum-3.lua"') test_run:cmd('start server replica3 with wait=True, wait_load=True') +assert_quorum_value(4) test_run:cmd('create server replica4 with rpl_master=default,\ script="replication/replica-quorum-4.lua"') test_run:cmd('start server replica4 with wait=True, wait_load=True') +assert_quorum_value(5) test_run:cmd('create server replica5 with rpl_master=default,\ script="replication/replica-quorum-5.lua"') test_run:cmd('start server replica5 with wait=True, wait_load=True') +assert_quorum_value(6) test_run:cmd('create server replica6 with rpl_master=default,\ script="replication/replica-quorum-6.lua"') test_run:cmd('start server replica6 with wait=True, wait_load=True') +assert_quorum_value(7) -- All replicas are up and running s:insert{4} -- should pass @@ -92,20 +105,27 @@ s:insert{10} -- should pass test_run:cmd('start server replica1 with wait=True, wait_load=True') s:insert{11} -- should pass --- cleanup - -test_run:cmd('stop server replica1') -test_run:cmd('delete server replica1') -test_run:cmd('stop server replica2') -test_run:cmd('delete server replica2') -test_run:cmd('stop server replica3') -test_run:cmd('delete server replica3') -test_run:cmd('stop server replica4') -test_run:cmd('delete server replica4') -test_run:cmd('stop server replica5') -test_run:cmd('delete server replica5') -test_run:cmd('stop server replica6') -test_run:cmd('delete server replica6') +-- Cleanup and test formula evaluation same time, +-- which requires _cluster modification though. +function delete_replica(name) \ + local id = test_run:eval(name, 'return box.info.id')[1] \ + test_run:cmd('stop server ' .. name) \ + test_run:cmd('delete server ' .. name) \ + box.space._cluster:delete(id) \ +end + +delete_replica('replica1') +assert_quorum_value(6) +delete_replica('replica2') +assert_quorum_value(5) +delete_replica('replica3') +assert_quorum_value(4) +delete_replica('replica4') +assert_quorum_value(3) +delete_replica('replica5') +assert_quorum_value(2) +delete_replica('replica6') +assert_quorum_value(1) s:drop() diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result index bd3c3cce1..3457d2cc9 100644 --- a/test/replication/qsync_basic.result +++ b/test/replication/qsync_basic.result @@ -637,12 +637,46 @@ box.space.sync:count() | - 0 | ... --- Cleanup. +-- +-- gh-5191: test box.info.synchro interface. For +-- this sake we stop the replica and initiate data +-- write in sync space which won't pass due to timeout. +-- While we're sitting in a wait cycle the queue should +-- not be empty. +-- +-- Make sure this test is the *LAST* one since we stop +-- the replica node and never restart it back before the +-- cleanup procedure, also we're spinning on default node +-- and do not switch to other nodes. +-- test_run:cmd('switch default') | --- | - true | ... +test_run:cmd('stop server replica') + | --- + | - true + | ... +assert(box.info.synchro.queue.len == 0) + | --- + | - true + | ... +box.cfg{replication_synchro_timeout = 2} + | --- + | ... +f = fiber.new(function() box.space.sync:insert{1024} end) + | --- + | ... +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) + | --- + | - true + | ... +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) + | --- + | - true + | ... +-- Cleanup box.cfg{ \ replication_synchro_quorum = old_synchro_quorum, \ replication_synchro_timeout = old_synchro_timeout, \ @@ -650,10 +684,6 @@ box.cfg{ } | --- | ... -test_run:cmd('stop server replica') - | --- - | - true - | ... test_run:cmd('delete server replica') | --- | - true diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua index 94235547d..a604d80ee 100644 --- a/test/replication/qsync_basic.test.lua +++ b/test/replication/qsync_basic.test.lua @@ -248,15 +248,32 @@ for i = 1, 100 do box.space.sync:delete{i} end test_run:cmd('switch replica') box.space.sync:count() --- Cleanup. +-- +-- gh-5191: test box.info.synchro interface. For +-- this sake we stop the replica and initiate data +-- write in sync space which won't pass due to timeout. +-- While we're sitting in a wait cycle the queue should +-- not be empty. +-- +-- Make sure this test is the *LAST* one since we stop +-- the replica node and never restart it back before the +-- cleanup procedure, also we're spinning on default node +-- and do not switch to other nodes. +-- test_run:cmd('switch default') +test_run:cmd('stop server replica') +assert(box.info.synchro.queue.len == 0) +box.cfg{replication_synchro_timeout = 2} +f = fiber.new(function() box.space.sync:insert{1024} end) +test_run:wait_cond(function() return box.info.synchro.queue.len == 1 end) +test_run:wait_cond(function() return box.info.synchro.queue.len == 0 end) +-- Cleanup box.cfg{ \ replication_synchro_quorum = old_synchro_quorum, \ replication_synchro_timeout = old_synchro_timeout, \ replication_timeout = old_timeout, \ } -test_run:cmd('stop server replica') test_run:cmd('delete server replica') box.space.test:drop() box.space.sync:drop() -- 2.30.2 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v5] qsync: provide box.info.synchro interface for monitoring 2021-04-08 14:57 ` [Tarantool-patches] [PATCH v5] " Cyrill Gorcunov via Tarantool-patches @ 2021-04-08 15:24 ` Serge Petrenko via Tarantool-patches 2021-04-09 22:24 ` Vladislav Shpilevoy via Tarantool-patches 1 sibling, 0 replies; 11+ messages in thread From: Serge Petrenko via Tarantool-patches @ 2021-04-08 15:24 UTC (permalink / raw) To: Cyrill Gorcunov, Vladislav Shpilevoy; +Cc: tml 08.04.2021 17:57, Cyrill Gorcunov пишет: > In commit 14fa5fd82 (cfg: support symbolic evaluation of > replication_synchro_quorum) we implemented support of > symbolic evaluation of `replication_synchro_quorum` parameter > and there is no easy way to obtain it current run-time value, > ie evaluated number value. > > Moreover we would like to fetch queue length on transaction > limbo for tests and extend this statistics in future. Thus > lets add them. > > Closes #5191 Thanks for the fixes! LGTM. -- Serge Petrenko ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v5] qsync: provide box.info.synchro interface for monitoring 2021-04-08 14:57 ` [Tarantool-patches] [PATCH v5] " Cyrill Gorcunov via Tarantool-patches 2021-04-08 15:24 ` Serge Petrenko via Tarantool-patches @ 2021-04-09 22:24 ` Vladislav Shpilevoy via Tarantool-patches 1 sibling, 0 replies; 11+ messages in thread From: Vladislav Shpilevoy via Tarantool-patches @ 2021-04-09 22:24 UTC (permalink / raw) To: Cyrill Gorcunov, Serge Petrenko; +Cc: tml Hi! Thanks for the patch! LGTM. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring 2021-04-08 12:18 [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:21 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:58 ` Serge Petrenko via Tarantool-patches @ 2021-04-12 5:59 ` Kirill Yukhin via Tarantool-patches 2 siblings, 0 replies; 11+ messages in thread From: Kirill Yukhin via Tarantool-patches @ 2021-04-12 5:59 UTC (permalink / raw) To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy Hello, On 08 апр 15:18, Cyrill Gorcunov wrote: > In commit 14fa5fd82 (cfg: support symbolic evaluation of > replication_synchro_quorum) we implemented support of > symbolic evaluation of `replication_synchro_quorum` parameter > and there is no easy way to obtain it current run-time value, > ie evaluated number value. > > Moreover we would like to fetch queue length on transaction > limbo for tests and extend this statistics in future. Thus > lets add them. > > Closes #5191 > > Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> > > @TarantoolBot document > Title: Provide `box.info.synchro` interface > > The `box.info.synchro` leaf provides information about details of > synchronous replication. > > In particular `quorum` represent the current value of synchronous > replication quorum defined by `replication_synchro_quorum` > configuration parameter because it can be set as dynamic formula > such as `N/2+1` and the value depends on current number of replicas. > > Since synchronous replication does not commit data immediately > but waits for its propagation to replicas the data sits in a queue > gathering `commit` responses from remote nodes. Current number of > entries waiting in the queue is shown via `queue.len` member. > > A typical output is the following > > ``` Lua > tarantool> box.info.synchro > --- > - queue: > len: 0 > quorum: 1 > ... > ``` > > The `len` member shows current number of entries in the queue. > And the `quorum` member shows an evaluated value of > `replication_synchro_quorum` parameter. > --- > issue https://github.com/tarantool/tarantool/issues/5191 > branch gorcunov/gh-5191-qsync-stat-4 I've checked your patch into master. -- Regards, Kirill Yukhin ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2021-04-12 5:59 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-04-08 12:18 [Tarantool-patches] [PATCH v4] qsync: provide box.info.synchro interface for monitoring Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:21 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 12:58 ` Serge Petrenko via Tarantool-patches 2021-04-08 13:13 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:15 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 13:17 ` Serge Petrenko via Tarantool-patches 2021-04-08 13:23 ` Cyrill Gorcunov via Tarantool-patches 2021-04-08 14:57 ` [Tarantool-patches] [PATCH v5] " Cyrill Gorcunov via Tarantool-patches 2021-04-08 15:24 ` Serge Petrenko via Tarantool-patches 2021-04-09 22:24 ` Vladislav Shpilevoy via Tarantool-patches 2021-04-12 5:59 ` [Tarantool-patches] [PATCH v4] " Kirill Yukhin via Tarantool-patches
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox