Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
@ 2020-03-31 14:42 Nikita Pettik
  2020-03-31 14:53 ` Konstantin Osipov
  2020-04-21 23:13 ` Vladislav Shpilevoy
  0 siblings, 2 replies; 11+ messages in thread
From: Nikita Pettik @ 2020-03-31 14:42 UTC (permalink / raw)
  To: tarantool-patches; +Cc: v.shpilevoy

Before this patch box.snapshot() bails out immediately if it sees that
the scheduler is throttled due to errors. For instance:

box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
snapshot() -- still fails despite the fact that injected error is unset

As a result, one has to wait up to a minute to make a snapshot. The
reason why throttling was introduced was to avoid flooding the log
in case of repeating disk errors.
On the other hand, checkpoint process is either called manually or on
schedule. What is more, to deal with schedule throttling in tests, we
had to introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT).
It reduces time duration during which the scheduler remains throttled,
which is ugly and race prone.
So, let's unthrottle scheduler when checkpoint process is launched.

Closes #3519
---
Note that VY_SCHED_TIMEOUT error injection is not completely removed
from tests, since at least one of them fails (instance crashes) without
it (vinyl/errinj_stat.test.lua). It's not a problem of this patch -
reproducer is extracted and filed in gh-4821.

Branch: https://github.com/tarantool/tarantool/tree/np/gh-3519-unthrottle-sched
Issue: https://github.com/tarantool/tarantool/issues/3519

 src/box/vy_scheduler.c           | 14 +++++---------
 test/box/errinj.result           |  8 --------
 test/box/errinj.test.lua         |  2 --
 test/vinyl/errinj.result         | 12 ++----------
 test/vinyl/errinj.test.lua       |  5 +----
 test/vinyl/errinj_vylog.result   | 14 --------------
 test/vinyl/errinj_vylog.test.lua |  4 ----
 7 files changed, 8 insertions(+), 51 deletions(-)

diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 9dba93d34..f57f10119 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -687,17 +687,13 @@ vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
 	assert(!scheduler->checkpoint_in_progress);
 
 	/*
-	 * If the scheduler is throttled due to errors, do not wait
-	 * until it wakes up as it may take quite a while. Instead
-	 * fail checkpoint immediately with the last error seen by
-	 * the scheduler.
+	 * It makes no sense throttling checkpoint process since
+	 * it can be either ran manually or due to timeout. At this
+	 * point let's ignore it.
 	 */
 	if (scheduler->is_throttled) {
-		struct error *e = diag_last_error(&scheduler->diag);
-		diag_add_error(diag_get(), e);
-		say_error("cannot checkpoint vinyl, "
-			  "scheduler is throttled with: %s", e->errmsg);
-		return -1;
+		say_info("scheduler is unthrottled due to checkpoint process");
+		scheduler->is_throttled = false;
 	}
 
 	if (!vy_scheduler_dump_in_progress(scheduler)) {
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8090deedc..34715b9ab 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -1561,10 +1561,6 @@ errinj.set('ERRINJ_VY_GC', true)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
----
-- ok
-...
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 ---
 - ok
@@ -1601,10 +1597,6 @@ errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 errinj.set('ERRINJ_VY_GC', false)
 ---
 - ok
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index d6186726f..718cf516f 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -568,7 +568,6 @@ box.snapshot()
 errinj.set('ERRINJ_VY_LOG_FILE_RENAME', false)
 
 errinj.set('ERRINJ_VY_GC', true)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
 
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 box.space.test:insert{1}
@@ -583,7 +582,6 @@ box.space.test:insert{2}
 box.snapshot() -- error
 errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 errinj.set('ERRINJ_VY_GC', false)
 
 #fio.glob(fio.pathjoin(box.cfg.vinyl_dir, '*.vylog.inprogress')) > 0
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 47c10493b..e33edc286 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -78,10 +78,10 @@ errinj.set("ERRINJ_VY_RUN_WRITE", false);
 ---
 - ok
 ...
--- fails due to scheduler timeout
+-- does not fail once error is unset
 box.snapshot();
 ---
-- error: Error injection 'vinyl dump'
+- ok
 ...
 fiber.sleep(0.06);
 ---
@@ -366,10 +366,6 @@ box.snapshot();
 s:drop()
 ---
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
----
-- ok
-...
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
@@ -1056,10 +1052,6 @@ s.index.pk:stat().disk.compaction.count -- 1
 ---
 - 1
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
----
-- ok
-...
 box.snapshot() -- ok
 ---
 - ok
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 7b6118a8a..1e47c2607 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -38,7 +38,7 @@ num_rows = num_rows + range();
 -- fails due to error injection
 box.snapshot();
 errinj.set("ERRINJ_VY_RUN_WRITE", false);
--- fails due to scheduler timeout
+-- does not fail once error is unset
 box.snapshot();
 fiber.sleep(0.06);
 num_rows = num_rows + range();
@@ -128,8 +128,6 @@ box.snapshot();
 #s:select({1})
 s:drop()
 
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
-
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
@@ -384,7 +382,6 @@ s.index.pk:stat().disk.compaction.count -- 0
 errinj.set("ERRINJ_WAL_IO", false)
 while s.index.pk:stat().disk.compaction.count == 0 do fiber.sleep(0.001) end
 s.index.pk:stat().disk.compaction.count -- 1
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
 
 box.snapshot() -- ok
 s:drop()
diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result
index 06cc68188..a642f2b16 100644
--- a/test/vinyl/errinj_vylog.result
+++ b/test/vinyl/errinj_vylog.result
@@ -52,13 +52,6 @@ _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 ---
 ...
-SCHED_TIMEOUT = 0.05
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
----
-- ok
-...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 ---
 - ok
@@ -71,13 +64,6 @@ box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
 ...
-fiber.sleep(2 * SCHED_TIMEOUT)
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 _ = s:insert{2, 'y'}
 ---
 ...
diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua
index 2936f879c..2ab200423 100644
--- a/test/vinyl/errinj_vylog.test.lua
+++ b/test/vinyl/errinj_vylog.test.lua
@@ -27,15 +27,11 @@ s = box.schema.space.create('test', {engine = 'vinyl'})
 _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 
-SCHED_TIMEOUT = 0.05
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 
 box.snapshot()
 
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
-fiber.sleep(2 * SCHED_TIMEOUT)
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 _ = s:insert{2, 'y'}
 
-- 
2.17.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-03-31 14:42 [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint Nikita Pettik
@ 2020-03-31 14:53 ` Konstantin Osipov
  2020-04-21 23:13 ` Vladislav Shpilevoy
  1 sibling, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2020-03-31 14:53 UTC (permalink / raw)
  To: Nikita Pettik; +Cc: tarantool-patches, v.shpilevoy

* Nikita Pettik <korablev@tarantool.org> [20/03/31 17:45]:
> Before this patch box.snapshot() bails out immediately if it sees that
> the scheduler is throttled due to errors. For instance:
> 
> box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
> snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
> box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
> snapshot() -- still fails despite the fact that injected error is unset
> 
> As a result, one has to wait up to a minute to make a snapshot. The
> reason why throttling was introduced was to avoid flooding the log
> in case of repeating disk errors.
> On the other hand, checkpoint process is either called manually or on
> schedule. What is more, to deal with schedule throttling in tests, we
> had to introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT).
> It reduces time duration during which the scheduler remains throttled,
> which is ugly and race prone.
> So, let's unthrottle scheduler when checkpoint process is launched.
> 
> Closes #3519

Can we do it only on manual checkpoint?


-- 
Konstantin Osipov, Moscow, Russia
https://scylladb.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-03-31 14:42 [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint Nikita Pettik
  2020-03-31 14:53 ` Konstantin Osipov
@ 2020-04-21 23:13 ` Vladislav Shpilevoy
  2020-04-22  0:32   ` Konstantin Osipov
  2020-04-27 12:17   ` Nikita Pettik
  1 sibling, 2 replies; 11+ messages in thread
From: Vladislav Shpilevoy @ 2020-04-21 23:13 UTC (permalink / raw)
  To: Nikita Pettik, tarantool-patches

Hi! Thanks for the patch!

On 31/03/2020 16:42, Nikita Pettik wrote:
> Before this patch box.snapshot() bails out immediately if it sees that
> the scheduler is throttled due to errors. For instance:
> 
> box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
> snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
> box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
> snapshot() -- still fails despite the fact that injected error is unset
> 
> As a result, one has to wait up to a minute to make a snapshot. The
> reason why throttling was introduced was to avoid flooding the log
> in case of repeating disk errors.
> On the other hand, checkpoint process is either called manually or on
> schedule. What is more, to deal with schedule throttling in tests, we
> had to introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT).
> It reduces time duration during which the scheduler remains throttled,
> which is ugly and race prone.
> So, let's unthrottle scheduler when checkpoint process is launched.
> 
> Closes #3519
> ---
> Note that VY_SCHED_TIMEOUT error injection is not completely removed
> from tests, since at least one of them fails (instance crashes) without
> it (vinyl/errinj_stat.test.lua). It's not a problem of this patch -
> reproducer is extracted and filed in gh-4821.
> 
> Branch: https://github.com/tarantool/tarantool/tree/np/gh-3519-unthrottle-sched
> Issue: https://github.com/tarantool/tarantool/issues/3519
> 
>  src/box/vy_scheduler.c           | 14 +++++---------
>  test/box/errinj.result           |  8 --------
>  test/box/errinj.test.lua         |  2 --
>  test/vinyl/errinj.result         | 12 ++----------
>  test/vinyl/errinj.test.lua       |  5 +----
>  test/vinyl/errinj_vylog.result   | 14 --------------
>  test/vinyl/errinj_vylog.test.lua |  4 ----
>  7 files changed, 8 insertions(+), 51 deletions(-)
> 
> diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
> index 9dba93d34..f57f10119 100644
> --- a/src/box/vy_scheduler.c
> +++ b/src/box/vy_scheduler.c
> @@ -687,17 +687,13 @@ vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
>  	assert(!scheduler->checkpoint_in_progress);
>  
>  	/*
> -	 * If the scheduler is throttled due to errors, do not wait
> -	 * until it wakes up as it may take quite a while. Instead
> -	 * fail checkpoint immediately with the last error seen by
> -	 * the scheduler.
> +	 * It makes no sense throttling checkpoint process since
> +	 * it can be either ran manually or due to timeout. At this
> +	 * point let's ignore it.

Kostja's questio is fair. Can it be done for manual snapshots only?
Automated checkpoints already have problems with killing the disk,
when multiple instances on the same machine start them at the same
time. With unthrotled scheduler it is going to get worse.

However, if this is hard to detect, then it is ok. Just do a quick
check if it is possible.

>  	 */
>  	if (scheduler->is_throttled) {
> -		struct error *e = diag_last_error(&scheduler->diag);
> -		diag_add_error(diag_get(), e);
> -		say_error("cannot checkpoint vinyl, "
> -			  "scheduler is throttled with: %s", e->errmsg);
> -		return -1;
> +		say_info("scheduler is unthrottled due to checkpoint process");
> +		scheduler->is_throttled = false;

Is there a simple way to let the throttling continue after the
checkpoint is finished?

>  	}
>  
>  	if (!vy_scheduler_dump_in_progress(scheduler)) {

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-21 23:13 ` Vladislav Shpilevoy
@ 2020-04-22  0:32   ` Konstantin Osipov
  2020-04-27 12:17   ` Nikita Pettik
  1 sibling, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2020-04-22  0:32 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

* Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [20/04/22 02:17]:

Vinyl dump is based on memtable state, not a schedule.
If each dump is unthrottling the scheduler, then what's the point
of throttling? Having a nice infinite loop on ENOSPC?

> Hi! Thanks for the patch!
> 
> On 31/03/2020 16:42, Nikita Pettik wrote:
> > Before this patch box.snapshot() bails out immediately if it sees that
> > the scheduler is throttled due to errors. For instance:
> > 
> > box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
> > snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
> > box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
> > snapshot() -- still fails despite the fact that injected error is unset
> > 
> > As a result, one has to wait up to a minute to make a snapshot. The
> > reason why throttling was introduced was to avoid flooding the log
> > in case of repeating disk errors.
> > On the other hand, checkpoint process is either called manually or on
> > schedule. What is more, to deal with schedule throttling in tests, we
> > had to introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT).
> > It reduces time duration during which the scheduler remains throttled,
> > which is ugly and race prone.
> > So, let's unthrottle scheduler when checkpoint process is launched.
> > 
> > Closes #3519
> > ---
> > Note that VY_SCHED_TIMEOUT error injection is not completely removed
> > from tests, since at least one of them fails (instance crashes) without
> > it (vinyl/errinj_stat.test.lua). It's not a problem of this patch -
> > reproducer is extracted and filed in gh-4821.
> > 
> > Branch: https://github.com/tarantool/tarantool/tree/np/gh-3519-unthrottle-sched
> > Issue: https://github.com/tarantool/tarantool/issues/3519
> > 
> >  src/box/vy_scheduler.c           | 14 +++++---------
> >  test/box/errinj.result           |  8 --------
> >  test/box/errinj.test.lua         |  2 --
> >  test/vinyl/errinj.result         | 12 ++----------
> >  test/vinyl/errinj.test.lua       |  5 +----
> >  test/vinyl/errinj_vylog.result   | 14 --------------
> >  test/vinyl/errinj_vylog.test.lua |  4 ----
> >  7 files changed, 8 insertions(+), 51 deletions(-)
> > 
> > diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
> > index 9dba93d34..f57f10119 100644
> > --- a/src/box/vy_scheduler.c
> > +++ b/src/box/vy_scheduler.c
> > @@ -687,17 +687,13 @@ vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
> >  	assert(!scheduler->checkpoint_in_progress);
> >  
> >  	/*
> > -	 * If the scheduler is throttled due to errors, do not wait
> > -	 * until it wakes up as it may take quite a while. Instead
> > -	 * fail checkpoint immediately with the last error seen by
> > -	 * the scheduler.
> > +	 * It makes no sense throttling checkpoint process since
> > +	 * it can be either ran manually or due to timeout. At this
> > +	 * point let's ignore it.
> 
> Kostja's questio is fair. Can it be done for manual snapshots only?
> Automated checkpoints already have problems with killing the disk,
> when multiple instances on the same machine start them at the same
> time. With unthrotled scheduler it is going to get worse.
> 
> However, if this is hard to detect, then it is ok. Just do a quick
> check if it is possible.
> 
> >  	 */
> >  	if (scheduler->is_throttled) {
> > -		struct error *e = diag_last_error(&scheduler->diag);
> > -		diag_add_error(diag_get(), e);
> > -		say_error("cannot checkpoint vinyl, "
> > -			  "scheduler is throttled with: %s", e->errmsg);
> > -		return -1;
> > +		say_info("scheduler is unthrottled due to checkpoint process");
> > +		scheduler->is_throttled = false;
> 
> Is there a simple way to let the throttling continue after the
> checkpoint is finished?
> 
> >  	}
> >  
> >  	if (!vy_scheduler_dump_in_progress(scheduler)) {

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-21 23:13 ` Vladislav Shpilevoy
  2020-04-22  0:32   ` Konstantin Osipov
@ 2020-04-27 12:17   ` Nikita Pettik
  2020-04-27 12:39     ` Konstantin Osipov
  1 sibling, 1 reply; 11+ messages in thread
From: Nikita Pettik @ 2020-04-27 12:17 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

On 22 Apr 01:13, Vladislav Shpilevoy wrote:
> Hi! Thanks for the patch!
> 
> >  
> >  	/*
> > -	 * If the scheduler is throttled due to errors, do not wait
> > -	 * until it wakes up as it may take quite a while. Instead
> > -	 * fail checkpoint immediately with the last error seen by
> > -	 * the scheduler.
> > +	 * It makes no sense throttling checkpoint process since
> > +	 * it can be either ran manually or due to timeout. At this
> > +	 * point let's ignore it.
> 
> Kostja's questio is fair. Can it be done for manual snapshots only?
> Automated checkpoints already have problems with killing the disk,
> when multiple instances on the same machine start them at the same
> time. With unthrotled scheduler it is going to get worse.
> 
> However, if this is hard to detect, then it is ok. Just do a quick
> check if it is possible.

Checkpoint daemon uses directly box.snapshot(), so now we can't tell
whether checkpoint is launched manually or automatically. To differ
these scenarious we can make checkpoint daemon call sort of
box.__scheduled_snapshot() (which won't be part of public API ofc).
Then we will be able to pass boolean parameter to begin_checkpoint()
indicating manual/auto mode. Or simply close issue as wont fix :)
 
> >  	 */
> >  	if (scheduler->is_throttled) {
> > -		struct error *e = diag_last_error(&scheduler->diag);
> > -		diag_add_error(diag_get(), e);
> > -		say_error("cannot checkpoint vinyl, "
> > -			  "scheduler is throttled with: %s", e->errmsg);
> > -		return -1;
> > +		say_info("scheduler is unthrottled due to checkpoint process");
> > +		scheduler->is_throttled = false;
> 
> Is there a simple way to let the throttling continue after the
> checkpoint is finished?

Hm, not simple but..For instance, we can introduce another one flag
"is_checkpoint_aborted" alonside with workaround (tests seem to pass):

diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index f57f10119..666d0a008 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -615,7 +615,8 @@ vy_scheduler_dump(struct vy_scheduler *scheduler)
 
        /* Wait for dump to complete. */
        while (vy_scheduler_dump_in_progress(scheduler)) {
-               if (scheduler->is_throttled) {
+               if (scheduler->is_throttled &&
+                   !scheduler->checkpoint_in_progress) {
                        /* Dump error occurred. */
                        struct error *e = diag_last_error(&scheduler->diag);
                        diag_add_error(diag_get(), e);
@@ -692,8 +693,7 @@ vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
         * point let's ignore it.
         */
        if (scheduler->is_throttled) {
-               say_info("scheduler is unthrottled due to checkpoint process");
-               scheduler->is_throttled = false;
+               say_info("scheduler is temporary unthrottled due to checkpoint process");
        }
 
        if (!vy_scheduler_dump_in_progress(scheduler)) {
@@ -707,6 +707,7 @@ vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
        }
        scheduler->generation++;
        scheduler->checkpoint_in_progress = true;
+       scheduler->is_checkpoint_aborted = false;
        fiber_cond_signal(&scheduler->scheduler_cond);
        say_info("vinyl checkpoint started");
        return 0;
@@ -723,7 +724,7 @@ vy_scheduler_wait_checkpoint(struct vy_scheduler *scheduler)
         * checkpoint started have been dumped.
         */
        while (vy_scheduler_dump_in_progress(scheduler)) {
-               if (scheduler->is_throttled) {
+               if (scheduler->is_checkpoint_aborted) {
                        /* A dump error occurred, abort checkpoint. */
                        struct error *e = diag_last_error(&scheduler->diag);
                        diag_add_error(diag_get(), e);
@@ -2104,7 +2105,10 @@ error:
                        scheduler->timeout = inj->dparam;
                say_warn("throttling scheduler for %.0f second(s)",
                         scheduler->timeout);
-               scheduler->is_throttled = true;
+               if (scheduler->checkpoint_in_progress)
+                       scheduler->is_checkpoint_aborted = true;
+               else
+                       scheduler->is_throttled = true;
                fiber_sleep(scheduler->timeout);
                scheduler->is_throttled = false;
        }

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 12:17   ` Nikita Pettik
@ 2020-04-27 12:39     ` Konstantin Osipov
  2020-04-27 15:18       ` Nikita Pettik
  0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Osipov @ 2020-04-27 12:39 UTC (permalink / raw)
  To: Nikita Pettik; +Cc: tarantool-patches, Vladislav Shpilevoy

* Nikita Pettik <korablev@tarantool.org> [20/04/27 15:19]:
> Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> whether checkpoint is launched manually or automatically. To differ
> these scenarious we can make checkpoint daemon call sort of
> box.__scheduled_snapshot() (which won't be part of public API ofc).
> Then we will be able to pass boolean parameter to begin_checkpoint()
> indicating manual/auto mode. Or simply close issue as wont fix :)

Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
use box.snapshot().


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 12:39     ` Konstantin Osipov
@ 2020-04-27 15:18       ` Nikita Pettik
  2020-04-27 15:20         ` Konstantin Osipov
  0 siblings, 1 reply; 11+ messages in thread
From: Nikita Pettik @ 2020-04-27 15:18 UTC (permalink / raw)
  To: Konstantin Osipov, Vladislav Shpilevoy, tarantool-patches

On 27 Apr 15:39, Konstantin Osipov wrote:
> * Nikita Pettik <korablev@tarantool.org> [20/04/27 15:19]:
> > Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> > whether checkpoint is launched manually or automatically. To differ
> > these scenarious we can make checkpoint daemon call sort of
> > box.__scheduled_snapshot() (which won't be part of public API ofc).
> > Then we will be able to pass boolean parameter to begin_checkpoint()
> > indicating manual/auto mode. Or simply close issue as wont fix :)
> 
> Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
> use box.snapshot().

Seems we are looking at different branches: 1.10 still uses checkpoint
daemon written in Lua. If this patch should be pushed to master branch
only, then I guess it would be easy to patch engine_begin_checkpoint()
and make it accept argument responsible for checkpoint mode (scheduled
or manual).
 
> 
> -- 
> Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 15:18       ` Nikita Pettik
@ 2020-04-27 15:20         ` Konstantin Osipov
  2020-04-27 15:37           ` Nikita Pettik
  0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Osipov @ 2020-04-27 15:20 UTC (permalink / raw)
  To: Nikita Pettik; +Cc: tarantool-patches, Vladislav Shpilevoy

* Nikita Pettik <korablev@tarantool.org> [20/04/27 18:20]:
> > > Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> > > whether checkpoint is launched manually or automatically. To differ
> > > these scenarious we can make checkpoint daemon call sort of
> > > box.__scheduled_snapshot() (which won't be part of public API ofc).
> > > Then we will be able to pass boolean parameter to begin_checkpoint()
> > > indicating manual/auto mode. Or simply close issue as wont fix :)
> > 
> > Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
> > use box.snapshot().
> 
> Seems we are looking at different branches: 1.10 still uses checkpoint
> daemon written in Lua. If this patch should be pushed to master branch
> only, then I guess it would be easy to patch engine_begin_checkpoint()
> and make it accept argument responsible for checkpoint mode (scheduled
> or manual).

Why do you need to fix 1.10?

-- 
Konstantin Osipov, Moscow, Russia
https://scylladb.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 15:20         ` Konstantin Osipov
@ 2020-04-27 15:37           ` Nikita Pettik
  2020-04-27 15:48             ` Konstantin Osipov
  0 siblings, 1 reply; 11+ messages in thread
From: Nikita Pettik @ 2020-04-27 15:37 UTC (permalink / raw)
  To: Konstantin Osipov, Vladislav Shpilevoy, tarantool-patches

On 27 Apr 18:20, Konstantin Osipov wrote:
> * Nikita Pettik <korablev@tarantool.org> [20/04/27 18:20]:
> > > > Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> > > > whether checkpoint is launched manually or automatically. To differ
> > > > these scenarious we can make checkpoint daemon call sort of
> > > > box.__scheduled_snapshot() (which won't be part of public API ofc).
> > > > Then we will be able to pass boolean parameter to begin_checkpoint()
> > > > indicating manual/auto mode. Or simply close issue as wont fix :)
> > > 
> > > Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
> > > use box.snapshot().
> > 
> > Seems we are looking at different branches: 1.10 still uses checkpoint
> > daemon written in Lua. If this patch should be pushed to master branch
> > only, then I guess it would be easy to patch engine_begin_checkpoint()
> > and make it accept argument responsible for checkpoint mode (scheduled
> > or manual).
> 
> Why do you need to fix 1.10?

For instance, it would make test backporting process easier.
Otherwise we will get different behaviours while using
ERRINJ_VY_SCHED_TIMEOUT error injection.
 
> -- 
> Konstantin Osipov, Moscow, Russia
> https://scylladb.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 15:37           ` Nikita Pettik
@ 2020-04-27 15:48             ` Konstantin Osipov
  2020-04-27 15:52               ` Nikita Pettik
  0 siblings, 1 reply; 11+ messages in thread
From: Konstantin Osipov @ 2020-04-27 15:48 UTC (permalink / raw)
  To: Nikita Pettik; +Cc: tarantool-patches, Vladislav Shpilevoy

* Nikita Pettik <korablev@tarantool.org> [20/04/27 18:40]:
> > > > > Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> > > > > whether checkpoint is launched manually or automatically. To differ
> > > > > these scenarious we can make checkpoint daemon call sort of
> > > > > box.__scheduled_snapshot() (which won't be part of public API ofc).
> > > > > Then we will be able to pass boolean parameter to begin_checkpoint()
> > > > > indicating manual/auto mode. Or simply close issue as wont fix :)
> > > > 
> > > > Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
> > > > use box.snapshot().
> > > 
> > > Seems we are looking at different branches: 1.10 still uses checkpoint
> > > daemon written in Lua. If this patch should be pushed to master branch
> > > only, then I guess it would be easy to patch engine_begin_checkpoint()
> > > and make it accept argument responsible for checkpoint mode (scheduled
> > > or manual).
> > 
> > Why do you need to fix 1.10?
> 
> For instance, it would make test backporting process easier.
> Otherwise we will get different behaviours while using
> ERRINJ_VY_SCHED_TIMEOUT error injection.

Well, I guess then 1.10 will require a separate fix. 

I don't see why you even consider a possibility of unthrottling
the scheduling in all cases in 1.10. It's a stable/lts release,
and what you're trying to do is not any customer's problem, you
simply shouldn't risk breaking ppl for no good reason.


-- 
Konstantin Osipov, Moscow, Russia
https://scylladb.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint
  2020-04-27 15:48             ` Konstantin Osipov
@ 2020-04-27 15:52               ` Nikita Pettik
  0 siblings, 0 replies; 11+ messages in thread
From: Nikita Pettik @ 2020-04-27 15:52 UTC (permalink / raw)
  To: Konstantin Osipov, Vladislav Shpilevoy, tarantool-patches

On 27 Apr 18:48, Konstantin Osipov wrote:
> * Nikita Pettik <korablev@tarantool.org> [20/04/27 18:40]:
> > > > > > Checkpoint daemon uses directly box.snapshot(), so now we can't tell
> > > > > > whether checkpoint is launched manually or automatically. To differ
> > > > > > these scenarious we can make checkpoint daemon call sort of
> > > > > > box.__scheduled_snapshot() (which won't be part of public API ofc).
> > > > > > Then we will be able to pass boolean parameter to begin_checkpoint()
> > > > > > indicating manual/auto mode. Or simply close issue as wont fix :)
> > > > > 
> > > > > Uhm, no, checkpoint daemon uses gc_do_checkpoint() and it doesn't
> > > > > use box.snapshot().
> > > > 
> > > > Seems we are looking at different branches: 1.10 still uses checkpoint
> > > > daemon written in Lua. If this patch should be pushed to master branch
> > > > only, then I guess it would be easy to patch engine_begin_checkpoint()
> > > > and make it accept argument responsible for checkpoint mode (scheduled
> > > > or manual).
> > > 
> > > Why do you need to fix 1.10?
> > 
> > For instance, it would make test backporting process easier.
> > Otherwise we will get different behaviours while using
> > ERRINJ_VY_SCHED_TIMEOUT error injection.
> 
> Well, I guess then 1.10 will require a separate fix. 
> 
> I don't see why you even consider a possibility of unthrottling
> the scheduling in all cases in 1.10. It's a stable/lts release,
> and what you're trying to do is not any customer's problem, you
> simply shouldn't risk breaking ppl for no good reason.

Ok, then I'm going to send v2 of this patch affected only 2.5
and unthrottling scheduler only in case of manual launched
checkpoint process.

> 
> -- 
> Konstantin Osipov, Moscow, Russia
> https://scylladb.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-04-27 15:52 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-31 14:42 [Tarantool-patches] [PATCH] vinyl: unthrottle scheduler on checkpoint Nikita Pettik
2020-03-31 14:53 ` Konstantin Osipov
2020-04-21 23:13 ` Vladislav Shpilevoy
2020-04-22  0:32   ` Konstantin Osipov
2020-04-27 12:17   ` Nikita Pettik
2020-04-27 12:39     ` Konstantin Osipov
2020-04-27 15:18       ` Nikita Pettik
2020-04-27 15:20         ` Konstantin Osipov
2020-04-27 15:37           ` Nikita Pettik
2020-04-27 15:48             ` Konstantin Osipov
2020-04-27 15:52               ` Nikita Pettik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox