[Tarantool-patches] [PATCH v2] recovery: make it yield when positioning in a WAL

Serge Petrenko sergepetrenko at tarantool.org
Thu May 13 13:11:41 MSK 2021



12.05.2021 23:36, Vladislav Shpilevoy пишет:
> Hi! Thanks for the patch!
>
> See 4 comments below.
>
> On 12.05.2021 13:29, Serge Petrenko wrote:

Thanks for the review!

>> We had various places in box.cc and relay.cc which counted processed
>> rows and yielded every now and then. These yields didn't cover cases,
>> when recovery has to position inside a long WAL file:
>>
>> For example, when tarantool exits without leaving an empty WAL file
>> which'll be used to recover instance vclock on restart. In this case
>> the instance freezes while processing the last availabe WAL in order
> 1. availabe -> available.

Fixed.

>
>> diff --git a/changelogs/unreleased/gh-5979-recovery-ligs.md b/changelogs/unreleased/gh-5979-recovery-ligs.md
>> new file mode 100644
>> index 000000000..86abfd66a
>> --- /dev/null
>> +++ b/changelogs/unreleased/gh-5979-recovery-ligs.md
>> @@ -0,0 +1,11 @@
>> +# bugfix/core
>> +
>> +* Now tarantool yields when scanning `.xlog` files for the latest applied vclock
>> +  and when finding the right place in `.xlog`s to start recovering. This means
>> +  that the instance is responsive right after `box.cfg` call even when an empty
>> +  `.xlog` was not created on previous exit.
>> +  Also this prevents relay from timing out when a freshly subscribed replica
>> +  needs rows from the end of a relatively long (hundreds of MBs) `.xlog`
>> +  (gh-5979).
> 2. Maybe an empty line here? Could simplify reading a bit.

Ok. Just noticed, the file had a strange name, so changed it as well.

>
>> +* The counter in `x.yM rows processed` log messages does not reset on each new
>> +  recovered `xlog` anymore.
>> diff --git a/src/box/box.cc b/src/box/box.cc
>> index 59925962d..8a7b8593d 100644
>> --- a/src/box/box.cc
>> +++ b/src/box/box.cc
>> @@ -610,15 +597,28 @@ end_error:
>>   	diag_raise();
>>   }
>>   
>> +static struct wal_stream wal_stream;
>> +
>> +/**
>> + * Plan a yield in recovery stream. Wal stream will execute it as soon as it's
>> + * ready.
>> + */
>> +static void
>> +wal_stream_schedule_yield(void)
> 3. Since now you have an object owning the callback, you
> could pass it as a first argument, like 'this' in C++,
> and like apply_row() does. Then you wouldn't need the
> global wal_stream.

Indeed. Fixed.

>
>> +{
>> +	wal_stream.has_yield = true;
>> +	wal_stream_try_yield(&wal_stream);
>> +}
>> diff --git a/src/box/xstream.h b/src/box/xstream.h
>> index d29ff4213..d27de09a3 100644
>> --- a/src/box/xstream.h
>> +++ b/src/box/xstream.h
>> @@ -41,16 +41,40 @@ extern "C" {
>>   struct xrow_header;
>>   struct xstream;
>>   
>> +/**
>> + * A type for a callback invoked by recovery after some batch of rows is
>> + * processed. Is used mostly to unblock the event loop every now and then.
>> + */
>> +typedef void (*schedule_yield_f)(void);
> 4. How about moving closer to the consistency and calling it
> xstream_yield_f/xstream_schedule_yield_f? So it has xstream_ prefix
> like xstream_write_f does.

I thought of xstream_schedule_yield_f, but that's too long.
Let it be xstream_yield_f.

Here's the incremental diff:

==========================================

diff --git a/changelogs/unreleased/gh-5979-recovery-ligs.md 
b/changelogs/unreleased/gh-5979-recovery-yield.md
similarity index 99%
rename from changelogs/unreleased/gh-5979-recovery-ligs.md
rename to changelogs/unreleased/gh-5979-recovery-yield.md
index 86abfd66a..71bec5669 100644
--- a/changelogs/unreleased/gh-5979-recovery-ligs.md
+++ b/changelogs/unreleased/gh-5979-recovery-yield.md
@@ -7,5 +7,6 @@
    Also this prevents relay from timing out when a freshly subscribed 
replica
    needs rows from the end of a relatively long (hundreds of MBs) `.xlog`
    (gh-5979).
+
  * The counter in `x.yM rows processed` log messages does not reset on 
each new
    recovered `xlog` anymore.
diff --git a/src/box/box.cc b/src/box/box.cc
index 8a7b8593d..c10e0d8bf 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -597,17 +597,16 @@ end_error:
      diag_raise();
  }

-static struct wal_stream wal_stream;
-
  /**
   * Plan a yield in recovery stream. Wal stream will execute it as soon 
as it's
   * ready.
   */
  static void
-wal_stream_schedule_yield(void)
+wal_stream_schedule_yield(struct xstream *base)
  {
-    wal_stream.has_yield = true;
-    wal_stream_try_yield(&wal_stream);
+    struct wal_stream *stream = container_of(base, struct wal_stream, 
base);
+    stream->has_yield = true;
+    wal_stream_try_yield(stream);
  }

  static void
@@ -3124,6 +3123,7 @@ local_recovery(const struct tt_uuid *instance_uuid,

      say_info("instance uuid %s", tt_uuid_str(&INSTANCE_UUID));

+    struct wal_stream wal_stream;
      wal_stream_create(&wal_stream);
      auto stream_guard = make_scoped_guard([&]{
          wal_stream_abort(&wal_stream);
@@ -3131,7 +3131,8 @@ local_recovery(const struct tt_uuid *instance_uuid,

      struct recovery *recovery;
      bool is_force_recovery = cfg_geti("force_recovery");
-    recovery = recovery_new(wal_dir(), is_force_recovery, 
checkpoint_vclock);
+    recovery = recovery_new(wal_dir(), is_force_recovery,
+                checkpoint_vclock);

      /*
       * Make sure we report the actual recovery position
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index e5b88fcc2..1476622c3 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -142,7 +142,7 @@ recovery_scan(struct recovery *r, struct vclock 
*end_vclock,
      while (xlog_cursor_next(&cursor, &row, true) == 0) {
          vclock_follow_xrow(end_vclock, &row);
          if (++stream->row_count % WAL_ROWS_PER_YIELD == 0)
-            xstream_schedule_yield(stream);
+            xstream_yield(stream);
      }
      xlog_cursor_close(&cursor, false);

@@ -250,7 +250,7 @@ recover_xlog(struct recovery *r, struct xstream *stream,
      while (xlog_cursor_next_xc(&r->cursor, &row,
                     r->wal_dir.force_recovery) == 0) {
          if (++stream->row_count % WAL_ROWS_PER_YIELD == 0) {
-            xstream_schedule_yield(stream);
+            xstream_yield(stream);
          }
          if (stream->row_count % 100000 == 0)
              say_info("%.1fM rows processed",
diff --git a/src/box/relay.cc b/src/box/relay.cc
index 81ac35bf2..efc201e80 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -262,8 +262,9 @@ relay_new(struct replica *replica)

  /** A callback recovery calls every now and then to unblock the event 
loop. */
  static void
-relay_yield(void)
+relay_yield(struct xstream *stream)
  {
+    (void) stream;
      fiber_sleep(0);
  }

diff --git a/src/box/xstream.h b/src/box/xstream.h
index d27de09a3..b3c771eba 100644
--- a/src/box/xstream.h
+++ b/src/box/xstream.h
@@ -45,36 +45,36 @@ struct xstream;
   * A type for a callback invoked by recovery after some batch of rows is
   * processed. Is used mostly to unblock the event loop every now and then.
   */
-typedef void (*schedule_yield_f)(void);
+typedef void (*xstream_yield_f)(struct xstream *);

  typedef void (*xstream_write_f)(struct xstream *, struct xrow_header *);

  struct xstream {
      xstream_write_f write;
-    schedule_yield_f schedule_yield;
+    xstream_yield_f yield;
      uint64_t row_count;
  };

  static inline void
  xstream_create(struct xstream *xstream, xstream_write_f write,
-           schedule_yield_f schedule_yield)
+           xstream_yield_f yield)
  {
      xstream->write = write;
-    xstream->schedule_yield = schedule_yield;
+    xstream->yield = yield;
      xstream->row_count = 0;
  }

  static inline void
-xstream_schedule_yield(struct xstream *stream)
+xstream_yield(struct xstream *stream)
  {
-    stream->schedule_yield();
+    stream->yield(stream);
  }

  static inline void
  xstream_reset(struct xstream *stream)
  {
      stream->row_count = 0;
-    xstream_schedule_yield(stream);
+    xstream_yield(stream);
  }

  int


==========================================

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list