[Tarantool-patches] [PATCH 1/6] recovery: do not throw an error

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Nov 23 16:45:46 MSK 2019


Thanks for the patch!

See 9 comments below.

On 19/11/2019 17:04, Georgy Kirichenko wrote:
> Relaying from C-written wal requires recovery to be a C-compliant. So
> get rid of exception from recovery interface.
> 
> Part of #980
> ---
>  src/box/box.cc      | 16 ++++++---
>  src/box/recovery.cc | 87 +++++++++++++++++++++++++++------------------
>  src/box/recovery.h  | 14 ++++----
>  src/box/relay.cc    | 15 ++++----
>  4 files changed, 79 insertions(+), 53 deletions(-)
> 
> diff --git a/src/box/box.cc b/src/box/box.cc
> index b119c927b..a53b6e912 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1911,6 +1913,7 @@ local_recovery(const struct tt_uuid *instance_uuid,
>  	box_vclock = &recovery->vclock;
>  	auto guard = make_scoped_guard([&]{
>  		box_vclock = &replicaset.vclock;
> +		recovery_stop_local(recovery);

1. AFAIU the patch is pure refactoring. Why is this functional change
here?

>  		recovery_delete(recovery);
>  	});
>  
> diff --git a/src/box/recovery.cc b/src/box/recovery.cc
> index d122d618a..4693008f1 100644
> --- a/src/box/recovery.cc
> +++ b/src/box/recovery.cc
> @@ -87,14 +87,11 @@ recovery_new(const char *wal_dirname, bool force_recovery,
>  			calloc(1, sizeof(*r));

2. src/box/relay.cc:359 does not check for
recovery_new() == NULL.

>  
>  	if (r == NULL) {
> -		tnt_raise(OutOfMemory, sizeof(*r), "malloc",
> -			  "struct recovery");
> +		diag_set(OutOfMemory, sizeof(*r), "malloc",
> +			 "struct recovery");
> +		return NULL;
>  	}
>  
> -	auto guard = make_scoped_guard([=]{
> -		free(r);
> -	});
> -
>  	xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID,
>  		    &xlog_opts_default);
>  	r->wal_dir.force_recovery = force_recovery;
> @@ -156,19 +158,21 @@ recovery_close_log(struct recovery *r)
>  			 r->cursor.name);
>  	}
>  	xlog_cursor_close(&r->cursor, false);
> -	trigger_run_xc(&r->on_close_log, NULL);
> +	/* Suppress a trigger error if happened. */
> +	trigger_run(&r->on_close_log, NULL);

3. Why do you suppress it? It was not so before your
patch, and it has nothing to do with exceptions removal.

>  }
>  
> -static void
> +static int
>  recovery_open_log(struct recovery *r, const struct vclock *vclock)
>  {
> -	XlogGapError *e;
>  	struct xlog_meta meta = r->cursor.meta;
>  	enum xlog_cursor_state state = r->cursor.state;
>  
>  	recovery_close_log(r);
>  
> -	xdir_open_cursor_xc(&r->wal_dir, vclock_sum(vclock), &r->cursor);

4. xdir_open_cursor_xc() is now unused and can be dropped.

> +	if (xdir_open_cursor(&r->wal_dir, vclock_sum(vclock),
> +			     &r->cursor) != 0)
> +		return -1;
>  
>  	if (state == XLOG_CURSOR_NEW &&
>  	    vclock_compare(vclock, &r->vclock) > 0) {
> @@ -216,8 +220,9 @@ gap_error:
>  void
>  recovery_delete(struct recovery *r)
>  {
> -	recovery_stop_local(r);
> +	/* Recovery should be stopped before deleting. */

5. It should be, perhaps. But how is it related to the
exceptions removal? If that part is broken, then please,
move it to a different commit, and add a test.

>  
> +	assert(r->watcher == NULL);
>  	trigger_destroy(&r->on_close_log);
>  	xdir_destroy(&r->wal_dir);
>  	if (xlog_cursor_is_open(&r->cursor)) {
> @@ -237,25 +242,26 @@ recovery_delete(struct recovery *r)
>   * The reading will be stopped on reaching stop_vclock.
>   * Use NULL for boundless recover
>   */
> -static void
> +static int
>  recover_xlog(struct recovery *r, struct xstream *stream,
>  	     const struct vclock *stop_vclock)
>  {
>  	struct xrow_header row;
>  	uint64_t row_count = 0;
> -	while (xlog_cursor_next_xc(&r->cursor, &row,
> -				   r->wal_dir.force_recovery) == 0) {

6. xlog_cursor_next_xc() is now unused and can be dropped.

> +	int rc;
> +	while ((rc = xlog_cursor_next(&r->cursor, &row,
> +				      r->wal_dir.force_recovery)) == 0) {
>  		/*
>  		 * Read the next row from xlog file.
>  		 *
> -		 * xlog_cursor_next_xc() returns 1 when
> +		 * xlog_cursor_next() returns 1 when
>  		 * it can not read more rows. This doesn't mean
>  		 * the file is fully read: it's fully read only
>  		 * when EOF marker has been read, see i.eof_read
>  		 */
>  		if (stop_vclock != NULL &&
>  		    r->vclock.signature >= stop_vclock->signature)
> -			return;
> +			return 0;
>  		int64_t current_lsn = vclock_get(&r->vclock, row.replica_id);
>  		if (row.lsn <= current_lsn)
>  			continue; /* already applied, skip */
> @@ -279,13 +285,16 @@ recover_xlog(struct recovery *r, struct xstream *stream,
>  					 row_count / 1000000.);
>  		} else {
>  			if (!r->wal_dir.force_recovery)
> -				diag_raise();
> +				return -1;
>  
>  			say_error("skipping row {%u: %lld}",
>  				  (unsigned)row.replica_id, (long long)row.lsn);
>  			diag_log();
>  		}
>  	}
> +	if (rc < 0)
> +		return -1;
> +	return 0;

7. xlog_cursor_next() returns either -1 or 0. The cycle won't
stop until it returns 0. It means, that here rc can't be anything
except -1, and you can do 'return rc;' or 'return -1;' instead,
without the 'if'.

>  }
>  
>  /**
> @@ -299,7 +308,7 @@ recover_xlog(struct recovery *r, struct xstream *stream,
>   * This function will not close r->current_wal if
>   * recovery was successful.
>   */
> -void
> +int
>  recover_remaining_wals(struct recovery *r, struct xstream *stream,
>  		       const struct vclock *stop_vclock, bool scan_dir)
>  {

8. The function still make a not exception safe call xdir_scan_xc().
After you will drop xdir_scan_xc() from there, it will become unused,
so you can drop the whole xdir_scan_xc() function.

9. There is 1 more function, using exceptions - hot_standby_f(). I
propose you to convert it to exception safe, convert class
WalSubscription to a struct, and turn this file into .c (in a
separate commit).


More information about the Tarantool-patches mailing list