[tarantool-patches] [PATCH v2 1/5] Do not promote wal vclock for failed writes

Vladimir Davydov vdavydov.dev at gmail.com
Mon Jan 28 14:20:18 MSK 2019


On Tue, Jan 22, 2019 at 01:31:09PM +0300, Georgy Kirichenko wrote:
> Increase replica lsn only if row was successfully written to disk. This
> prevents wal from lsn gaps in case of IO errors and enforces wal
> consistency.
> 
> Needs for #980
> ---
>  src/box/wal.c                     | 19 ++++++---
>  test/xlog/errinj.result           |  1 -
>  test/xlog/panic_on_lsn_gap.result | 65 +++++++++++++++----------------
>  3 files changed, 45 insertions(+), 40 deletions(-)
> 
> diff --git a/src/box/wal.c b/src/box/wal.c
> index 3b50d3629..a55b544aa 100644
> --- a/src/box/wal.c
> +++ b/src/box/wal.c
> @@ -901,16 +901,16 @@ wal_writer_begin_rollback(struct wal_writer *writer)
>  }
>  
>  static void
> -wal_assign_lsn(struct wal_writer *writer, struct xrow_header **row,
> +wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
>  	       struct xrow_header **end)
>  {
>  	/** Assign LSN to all local rows. */
>  	for ( ; row < end; row++) {
>  		if ((*row)->replica_id == 0) {
> -			(*row)->lsn = vclock_inc(&writer->vclock, instance_id);
> +			(*row)->lsn = vclock_inc(vclock, instance_id);
>  			(*row)->replica_id = instance_id;
>  		} else {
> -			vclock_follow_xrow(&writer->vclock, *row);
> +			vclock_follow_xrow(vclock, *row);
>  		}
>  	}
>  }
> @@ -922,6 +922,11 @@ wal_write_to_disk(struct cmsg *msg)
>  	struct wal_msg *wal_msg = (struct wal_msg *) msg;
>  	struct error *error;
>  
> +	/* Local vclock copy. */
> +	struct vclock vclock;
> +	vclock_create(&vclock);
> +	vclock_copy(&vclock, &writer->vclock);
> +
>  	struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL);
>  	while (inj != NULL && inj->bparam)
>  		usleep(10);
> @@ -974,14 +979,15 @@ wal_write_to_disk(struct cmsg *msg)
>  	struct journal_entry *entry;
>  	struct stailq_entry *last_committed = NULL;
>  	stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
> -		wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows);
> -		entry->res = vclock_sum(&writer->vclock);
> +		wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows);
> +		entry->res = vclock_sum(&vclock);
>  		rc = xlog_write_entry(l, entry);
>  		if (rc < 0)
>  			goto done;
>  		if (rc > 0) {
>  			writer->checkpoint_wal_size += rc;
>  			last_committed = &entry->fifo;
> +			vclock_copy(&writer->vclock, &vclock);

I don't like that you copy a vclock after applying each entry.
Currently, it should be pretty cheap, but in future, when we make
vclock store any number of ids, this might get pretty heavy.
Can we minimize the number of memcpys somehow, ideally do it only
on the rollback path?

>  		}
>  		/* rc == 0: the write is buffered in xlog_tx */
>  	}
> @@ -991,6 +997,7 @@ wal_write_to_disk(struct cmsg *msg)
>  
>  	writer->checkpoint_wal_size += rc;
>  	last_committed = stailq_last(&wal_msg->commit);
> +	vclock_copy(&writer->vclock, &vclock);
>  
>  	/*
>  	 * Notify TX if the checkpoint threshold has been exceeded.
> @@ -1185,7 +1192,7 @@ wal_write_in_wal_mode_none(struct journal *journal,
>  			   struct journal_entry *entry)
>  {
>  	struct wal_writer *writer = (struct wal_writer *) journal;
> -	wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows);
> +	wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows);
>  	int64_t old_lsn = vclock_get(&replicaset.vclock, instance_id);
>  	int64_t new_lsn = vclock_get(&writer->vclock, instance_id);
>  	if (new_lsn > old_lsn) {
> diff --git a/test/xlog/errinj.result b/test/xlog/errinj.result
> index 390404b47..7f15bef35 100644
> --- a/test/xlog/errinj.result
> +++ b/test/xlog/errinj.result
> @@ -43,7 +43,6 @@ require('fio').glob(name .. "/*.xlog")
>  ---
>  - - xlog/00000000000000000000.xlog
>    - xlog/00000000000000000001.xlog
> -  - xlog/00000000000000000002.xlog
>  ...
>  test_run:cmd('restart server default with cleanup=1')
>  -- gh-881 iproto request with wal IO error
> diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result
> index 4dd1291f8..8054baab4 100644
> --- a/test/xlog/panic_on_lsn_gap.result
> +++ b/test/xlog/panic_on_lsn_gap.result
> @@ -105,7 +105,7 @@ test_run:cmd("restart server panic")
>  --
>  box.info.vclock
>  ---
> -- {1: 11}
> +- {1: 1}

After this patch the comments contradict the expected result of this
test. Please fix.



More information about the Tarantool-patches mailing list