* [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote()
@ 2021-07-12 22:20 Vladislav Shpilevoy via Tarantool-patches
2021-07-13 10:01 ` Serge Petrenko via Tarantool-patches
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Vladislav Shpilevoy via Tarantool-patches @ 2021-07-12 22:20 UTC (permalink / raw)
To: tarantool-patches, gorcunov, sergepetrenko
box_promote() when called manually used to wait for the existing
transactions from a foreign limbo to end during a timeout. Giving
them a chance to end on their terms.
The waiting was done via polling like
while (!done)
sleep(small_timeout);
Polling is almost always super bad both for execution time and
for CPU usage. The patch replaces it with proper waiting based on
events happening in the limbo.
Closes #5190
---
Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5190-qsync-polling
Issue: https://github.com/tarantool/tarantool/issues/5190
src/box/box.cc | 7 +----
src/box/txn_limbo.c | 74 ++++++++++++++++++++++++++++++++++-----------
src/box/txn_limbo.h | 4 +++
3 files changed, 62 insertions(+), 23 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index ab7d983c9..eeb57b04e 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1627,12 +1627,7 @@ box_promote(void)
if (try_wait) {
/* Wait until pending confirmations/rollbacks reach us. */
double timeout = 2 * replication_synchro_timeout;
- double start_tm = fiber_clock();
- while (!txn_limbo_is_empty(&txn_limbo)) {
- if (fiber_clock() - start_tm > timeout)
- break;
- fiber_sleep(0.001);
- }
+ txn_limbo_wait_empty(&txn_limbo, timeout);
/*
* Our mission was to clear the limbo from former leader's
* transactions. Exit in case someone did that for us.
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 51dc2a186..fdea287c7 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -612,11 +612,14 @@ txn_rollback_cb(struct trigger *trigger, void *event)
return 0;
}
-int
-txn_limbo_wait_confirm(struct txn_limbo *limbo)
+/**
+ * Wait until the last transaction in the limbo is finished and get its result.
+ */
+static int
+txn_limbo_wait_last_txn(struct txn_limbo *limbo, bool *is_rollback,
+ double timeout)
{
- if (txn_limbo_is_empty(limbo))
- return 0;
+ assert(!txn_limbo_is_empty(limbo));
/* initialization of a waitpoint. */
struct confirm_waitpoint cwp;
@@ -632,27 +635,42 @@ txn_limbo_wait_confirm(struct txn_limbo *limbo)
struct txn_limbo_entry *tle = txn_limbo_last_entry(limbo);
txn_on_commit(tle->txn, &on_complete);
txn_on_rollback(tle->txn, &on_rollback);
- double start_time = fiber_clock();
+ double deadline = fiber_clock() + timeout;
+ int rc;
while (true) {
- double deadline = start_time + replication_synchro_timeout;
+ if (timeout < 0) {
+ rc = -1;
+ break;
+ }
bool cancellable = fiber_set_cancellable(false);
- double timeout = deadline - fiber_clock();
- int rc = fiber_cond_wait_timeout(&limbo->wait_cond, timeout);
+ rc = fiber_cond_wait_timeout(&limbo->wait_cond, timeout);
fiber_set_cancellable(cancellable);
- if (cwp.is_confirm || cwp.is_rollback)
- goto complete;
+ if (cwp.is_confirm || cwp.is_rollback) {
+ *is_rollback = cwp.is_rollback;
+ rc = 0;
+ break;
+ }
if (rc != 0)
- goto timed_out;
+ break;
+ timeout = deadline - fiber_clock();
}
-timed_out:
- /* Clear the triggers if the timeout has been reached. */
trigger_clear(&on_complete);
trigger_clear(&on_rollback);
- diag_set(ClientError, ER_SYNC_QUORUM_TIMEOUT);
- return -1;
+ return rc;
+}
-complete:
- if (!cwp.is_confirm) {
+int
+txn_limbo_wait_confirm(struct txn_limbo *limbo)
+{
+ if (txn_limbo_is_empty(limbo))
+ return 0;
+ bool is_rollback;
+ if (txn_limbo_wait_last_txn(limbo, &is_rollback,
+ replication_synchro_timeout) != 0) {
+ diag_set(ClientError, ER_SYNC_QUORUM_TIMEOUT);
+ return -1;
+ }
+ if (is_rollback) {
/* The transaction has been rolled back. */
diag_set(ClientError, ER_SYNC_ROLLBACK);
return -1;
@@ -660,6 +678,28 @@ complete:
return 0;
}
+int
+txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout)
+{
+ if (txn_limbo_is_empty(limbo))
+ return 0;
+ bool is_rollback;
+ double deadline = fiber_clock() + timeout;
+ /*
+ * Retry in the loop. More transactions might be added while waiting for
+ * the last one.
+ */
+ do {
+ if (txn_limbo_wait_last_txn(limbo, &is_rollback,
+ timeout) != 0) {
+ diag_set(ClientError, ER_TIMEOUT);
+ return -1;
+ }
+ timeout = deadline - fiber_clock();
+ } while (!txn_limbo_is_empty(limbo));
+ return 0;
+}
+
void
txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req)
{
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index e409ac657..7debbc0b9 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -311,6 +311,10 @@ txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req);
int
txn_limbo_wait_confirm(struct txn_limbo *limbo);
+/** Wait until the limbo is empty. Regardless of how its transactions end. */
+int
+txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout);
+
/**
* Write a PROMOTE request, which has the same effect as CONFIRM(@a lsn) and
* ROLLBACK(@a lsn + 1) combined.
--
2.24.3 (Apple Git-128)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote()
2021-07-12 22:20 [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote() Vladislav Shpilevoy via Tarantool-patches
@ 2021-07-13 10:01 ` Serge Petrenko via Tarantool-patches
2021-07-13 10:02 ` Serge Petrenko via Tarantool-patches
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Serge Petrenko via Tarantool-patches @ 2021-07-13 10:01 UTC (permalink / raw)
To: Vladislav Shpilevoy, tarantool-patches, gorcunov
13.07.2021 01:20, Vladislav Shpilevoy пишет:
> box_promote() when called manually used to wait for the existing
> transactions from a foreign limbo to end during a timeout. Giving
> them a chance to end on their terms.
>
> The waiting was done via polling like
>
> while (!done)
> sleep(small_timeout);
>
> Polling is almost always super bad both for execution time and
> for CPU usage. The patch replaces it with proper waiting based on
> events happening in the limbo.
>
> Closes #5190
> ---
> Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5190-qsync-polling
> Issue: https://github.com/tarantool/tarantool/issues/5190
>
> src/box/box.cc | 7 +----
> src/box/txn_limbo.c | 74 ++++++++++++++++++++++++++++++++++-----------
> src/box/txn_limbo.h | 4 +++
> 3 files changed, 62 insertions(+), 23 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index ab7d983c9..eeb57b04e 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1627,12 +1627,7 @@ box_promote(void)
> if (try_wait) {
> /* Wait until pending confirmations/rollbacks reach us. */
> double timeout = 2 * replication_synchro_timeout;
> - double start_tm = fiber_clock();
> - while (!txn_limbo_is_empty(&txn_limbo)) {
> - if (fiber_clock() - start_tm > timeout)
> - break;
> - fiber_sleep(0.001);
> - }
> + txn_limbo_wait_empty(&txn_limbo, timeout);
> /*
> * Our mission was to clear the limbo from former leader's
> * transactions. Exit in case someone did that for us.
> diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
> index 51dc2a186..fdea287c7 100644
> --- a/src/box/txn_limbo.c
> +++ b/src/box/txn_limbo.c
> @@ -612,11 +612,14 @@ txn_rollback_cb(struct trigger *trigger, void *event)
> return 0;
> }
>
> -int
> -txn_limbo_wait_confirm(struct txn_limbo *limbo)
> +/**
> + * Wait until the last transaction in the limbo is finished and get its result.
> + */
> +static int
> +txn_limbo_wait_last_txn(struct txn_limbo *limbo, bool *is_rollback,
> + double timeout)
> {
> - if (txn_limbo_is_empty(limbo))
> - return 0;
> + assert(!txn_limbo_is_empty(limbo));
>
> /* initialization of a waitpoint. */
> struct confirm_waitpoint cwp;
> @@ -632,27 +635,42 @@ txn_limbo_wait_confirm(struct txn_limbo *limbo)
> struct txn_limbo_entry *tle = txn_limbo_last_entry(limbo);
> txn_on_commit(tle->txn, &on_complete);
> txn_on_rollback(tle->txn, &on_rollback);
> - double start_time = fiber_clock();
> + double deadline = fiber_clock() + timeout;
> + int rc;
> while (true) {
> - double deadline = start_time + replication_synchro_timeout;
> + if (timeout < 0) {
> + rc = -1;
> + break;
> + }
> bool cancellable = fiber_set_cancellable(false);
> - double timeout = deadline - fiber_clock();
> - int rc = fiber_cond_wait_timeout(&limbo->wait_cond, timeout);
> + rc = fiber_cond_wait_timeout(&limbo->wait_cond, timeout);
> fiber_set_cancellable(cancellable);
> - if (cwp.is_confirm || cwp.is_rollback)
> - goto complete;
> + if (cwp.is_confirm || cwp.is_rollback) {
> + *is_rollback = cwp.is_rollback;
> + rc = 0;
> + break;
> + }
> if (rc != 0)
> - goto timed_out;
> + break;
> + timeout = deadline - fiber_clock();
> }
> -timed_out:
> - /* Clear the triggers if the timeout has been reached. */
> trigger_clear(&on_complete);
> trigger_clear(&on_rollback);
> - diag_set(ClientError, ER_SYNC_QUORUM_TIMEOUT);
> - return -1;
> + return rc;
> +}
>
> -complete:
> - if (!cwp.is_confirm) {
> +int
> +txn_limbo_wait_confirm(struct txn_limbo *limbo)
> +{
> + if (txn_limbo_is_empty(limbo))
> + return 0;
> + bool is_rollback;
> + if (txn_limbo_wait_last_txn(limbo, &is_rollback,
> + replication_synchro_timeout) != 0) {
> + diag_set(ClientError, ER_SYNC_QUORUM_TIMEOUT);
> + return -1;
> + }
> + if (is_rollback) {
> /* The transaction has been rolled back. */
> diag_set(ClientError, ER_SYNC_ROLLBACK);
> return -1;
> @@ -660,6 +678,28 @@ complete:
> return 0;
> }
>
> +int
> +txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout)
> +{
> + if (txn_limbo_is_empty(limbo))
> + return 0;
> + bool is_rollback;
> + double deadline = fiber_clock() + timeout;
> + /*
> + * Retry in the loop. More transactions might be added while waiting for
> + * the last one.
> + */
> + do {
> + if (txn_limbo_wait_last_txn(limbo, &is_rollback,
> + timeout) != 0) {
> + diag_set(ClientError, ER_TIMEOUT);
> + return -1;
> + }
> + timeout = deadline - fiber_clock();
> + } while (!txn_limbo_is_empty(limbo));
> + return 0;
> +}
> +
> void
> txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req)
> {
> diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
> index e409ac657..7debbc0b9 100644
> --- a/src/box/txn_limbo.h
> +++ b/src/box/txn_limbo.h
> @@ -311,6 +311,10 @@ txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req);
> int
> txn_limbo_wait_confirm(struct txn_limbo *limbo);
>
> +/** Wait until the limbo is empty. Regardless of how its transactions end. */
> +int
> +txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout);
> +
> /**
> * Write a PROMOTE request, which has the same effect as CONFIRM(@a lsn) and
> * ROLLBACK(@a lsn + 1) combined.
--
Serge Petrenko
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote()
2021-07-12 22:20 [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote() Vladislav Shpilevoy via Tarantool-patches
2021-07-13 10:01 ` Serge Petrenko via Tarantool-patches
@ 2021-07-13 10:02 ` Serge Petrenko via Tarantool-patches
2021-07-13 12:19 ` Cyrill Gorcunov via Tarantool-patches
2021-07-13 20:13 ` Vladislav Shpilevoy via Tarantool-patches
3 siblings, 0 replies; 5+ messages in thread
From: Serge Petrenko via Tarantool-patches @ 2021-07-13 10:02 UTC (permalink / raw)
To: Vladislav Shpilevoy, tarantool-patches, gorcunov
13.07.2021 01:20, Vladislav Shpilevoy пишет:
> box_promote() when called manually used to wait for the existing
> transactions from a foreign limbo to end during a timeout. Giving
> them a chance to end on their terms.
>
> The waiting was done via polling like
>
> while (!done)
> sleep(small_timeout);
>
> Polling is almost always super bad both for execution time and
> for CPU usage. The patch replaces it with proper waiting based on
> events happening in the limbo.
>
> Closes #5190
> ---
> Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5190-qsync-polling
> Issue: https://github.com/tarantool/tarantool/issues/5190
>
> src/box/box.cc | 7 +----
> src/box/txn_limbo.c | 74 ++++++++++++++++++++++++++++++++++-----------
> src/box/txn_limbo.h | 4 +++
> 3 files changed, 62 insertions(+), 23 deletions(-)
>
Hi! Thanks for the fix! LGTM.
--
Serge Petrenko
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote()
2021-07-12 22:20 [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote() Vladislav Shpilevoy via Tarantool-patches
2021-07-13 10:01 ` Serge Petrenko via Tarantool-patches
2021-07-13 10:02 ` Serge Petrenko via Tarantool-patches
@ 2021-07-13 12:19 ` Cyrill Gorcunov via Tarantool-patches
2021-07-13 20:13 ` Vladislav Shpilevoy via Tarantool-patches
3 siblings, 0 replies; 5+ messages in thread
From: Cyrill Gorcunov via Tarantool-patches @ 2021-07-13 12:19 UTC (permalink / raw)
To: Vladislav Shpilevoy; +Cc: tarantool-patches
On Tue, Jul 13, 2021 at 12:20:07AM +0200, Vladislav Shpilevoy wrote:
> box_promote() when called manually used to wait for the existing
> transactions from a foreign limbo to end during a timeout. Giving
> them a chance to end on their terms.
Ack
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote()
2021-07-12 22:20 [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote() Vladislav Shpilevoy via Tarantool-patches
` (2 preceding siblings ...)
2021-07-13 12:19 ` Cyrill Gorcunov via Tarantool-patches
@ 2021-07-13 20:13 ` Vladislav Shpilevoy via Tarantool-patches
3 siblings, 0 replies; 5+ messages in thread
From: Vladislav Shpilevoy via Tarantool-patches @ 2021-07-13 20:13 UTC (permalink / raw)
To: tarantool-patches, gorcunov, sergepetrenko
Pushed to master, 2.8, 2.7.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-07-13 20:13 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-12 22:20 [Tarantool-patches] [PATCH 1/1] qsync: remove polling from box_promote() Vladislav Shpilevoy via Tarantool-patches
2021-07-13 10:01 ` Serge Petrenko via Tarantool-patches
2021-07-13 10:02 ` Serge Petrenko via Tarantool-patches
2021-07-13 12:19 ` Cyrill Gorcunov via Tarantool-patches
2021-07-13 20:13 ` Vladislav Shpilevoy via Tarantool-patches
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox