[Tarantool-patches] [PATCH 1/6] recovery: do not throw an error
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sat Nov 23 16:45:46 MSK 2019
Thanks for the patch!
See 9 comments below.
On 19/11/2019 17:04, Georgy Kirichenko wrote:
> Relaying from C-written wal requires recovery to be a C-compliant. So
> get rid of exception from recovery interface.
>
> Part of #980
> ---
> src/box/box.cc | 16 ++++++---
> src/box/recovery.cc | 87 +++++++++++++++++++++++++++------------------
> src/box/recovery.h | 14 ++++----
> src/box/relay.cc | 15 ++++----
> 4 files changed, 79 insertions(+), 53 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index b119c927b..a53b6e912 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1911,6 +1913,7 @@ local_recovery(const struct tt_uuid *instance_uuid,
> box_vclock = &recovery->vclock;
> auto guard = make_scoped_guard([&]{
> box_vclock = &replicaset.vclock;
> + recovery_stop_local(recovery);
1. AFAIU the patch is pure refactoring. Why is this functional change
here?
> recovery_delete(recovery);
> });
>
> diff --git a/src/box/recovery.cc b/src/box/recovery.cc
> index d122d618a..4693008f1 100644
> --- a/src/box/recovery.cc
> +++ b/src/box/recovery.cc
> @@ -87,14 +87,11 @@ recovery_new(const char *wal_dirname, bool force_recovery,
> calloc(1, sizeof(*r));
2. src/box/relay.cc:359 does not check for
recovery_new() == NULL.
>
> if (r == NULL) {
> - tnt_raise(OutOfMemory, sizeof(*r), "malloc",
> - "struct recovery");
> + diag_set(OutOfMemory, sizeof(*r), "malloc",
> + "struct recovery");
> + return NULL;
> }
>
> - auto guard = make_scoped_guard([=]{
> - free(r);
> - });
> -
> xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID,
> &xlog_opts_default);
> r->wal_dir.force_recovery = force_recovery;
> @@ -156,19 +158,21 @@ recovery_close_log(struct recovery *r)
> r->cursor.name);
> }
> xlog_cursor_close(&r->cursor, false);
> - trigger_run_xc(&r->on_close_log, NULL);
> + /* Suppress a trigger error if happened. */
> + trigger_run(&r->on_close_log, NULL);
3. Why do you suppress it? It was not so before your
patch, and it has nothing to do with exceptions removal.
> }
>
> -static void
> +static int
> recovery_open_log(struct recovery *r, const struct vclock *vclock)
> {
> - XlogGapError *e;
> struct xlog_meta meta = r->cursor.meta;
> enum xlog_cursor_state state = r->cursor.state;
>
> recovery_close_log(r);
>
> - xdir_open_cursor_xc(&r->wal_dir, vclock_sum(vclock), &r->cursor);
4. xdir_open_cursor_xc() is now unused and can be dropped.
> + if (xdir_open_cursor(&r->wal_dir, vclock_sum(vclock),
> + &r->cursor) != 0)
> + return -1;
>
> if (state == XLOG_CURSOR_NEW &&
> vclock_compare(vclock, &r->vclock) > 0) {
> @@ -216,8 +220,9 @@ gap_error:
> void
> recovery_delete(struct recovery *r)
> {
> - recovery_stop_local(r);
> + /* Recovery should be stopped before deleting. */
5. It should be, perhaps. But how is it related to the
exceptions removal? If that part is broken, then please,
move it to a different commit, and add a test.
>
> + assert(r->watcher == NULL);
> trigger_destroy(&r->on_close_log);
> xdir_destroy(&r->wal_dir);
> if (xlog_cursor_is_open(&r->cursor)) {
> @@ -237,25 +242,26 @@ recovery_delete(struct recovery *r)
> * The reading will be stopped on reaching stop_vclock.
> * Use NULL for boundless recover
> */
> -static void
> +static int
> recover_xlog(struct recovery *r, struct xstream *stream,
> const struct vclock *stop_vclock)
> {
> struct xrow_header row;
> uint64_t row_count = 0;
> - while (xlog_cursor_next_xc(&r->cursor, &row,
> - r->wal_dir.force_recovery) == 0) {
6. xlog_cursor_next_xc() is now unused and can be dropped.
> + int rc;
> + while ((rc = xlog_cursor_next(&r->cursor, &row,
> + r->wal_dir.force_recovery)) == 0) {
> /*
> * Read the next row from xlog file.
> *
> - * xlog_cursor_next_xc() returns 1 when
> + * xlog_cursor_next() returns 1 when
> * it can not read more rows. This doesn't mean
> * the file is fully read: it's fully read only
> * when EOF marker has been read, see i.eof_read
> */
> if (stop_vclock != NULL &&
> r->vclock.signature >= stop_vclock->signature)
> - return;
> + return 0;
> int64_t current_lsn = vclock_get(&r->vclock, row.replica_id);
> if (row.lsn <= current_lsn)
> continue; /* already applied, skip */
> @@ -279,13 +285,16 @@ recover_xlog(struct recovery *r, struct xstream *stream,
> row_count / 1000000.);
> } else {
> if (!r->wal_dir.force_recovery)
> - diag_raise();
> + return -1;
>
> say_error("skipping row {%u: %lld}",
> (unsigned)row.replica_id, (long long)row.lsn);
> diag_log();
> }
> }
> + if (rc < 0)
> + return -1;
> + return 0;
7. xlog_cursor_next() returns either -1 or 0. The cycle won't
stop until it returns 0. It means, that here rc can't be anything
except -1, and you can do 'return rc;' or 'return -1;' instead,
without the 'if'.
> }
>
> /**
> @@ -299,7 +308,7 @@ recover_xlog(struct recovery *r, struct xstream *stream,
> * This function will not close r->current_wal if
> * recovery was successful.
> */
> -void
> +int
> recover_remaining_wals(struct recovery *r, struct xstream *stream,
> const struct vclock *stop_vclock, bool scan_dir)
> {
8. The function still make a not exception safe call xdir_scan_xc().
After you will drop xdir_scan_xc() from there, it will become unused,
so you can drop the whole xdir_scan_xc() function.
9. There is 1 more function, using exceptions - hot_standby_f(). I
propose you to convert it to exception safe, convert class
WalSubscription to a struct, and turn this file into .c (in a
separate commit).
More information about the Tarantool-patches
mailing list