[PATCH 10/13] wal: make wal_sync fail on write error

Vladimir Davydov vdavydov.dev at gmail.com
Sat Aug 10 13:03:37 MSK 2019


wal_sync() simply flushes the tx<->wal request queue, it doesn't
guarantee that all pending writes are successfully committed to disk.
This works for now, but in order to implement replica join off the
current read view, we need to make sure that all pending writes have
been persisted and won't be rolled back before we can use memtx
snapshot iterators. So this patch adds a return code to wal_sync():
since now on it returns -1 if rollback is in progress and hence
some in-memory changes are going to be rolled back. We will use
this method after opening memtx snapshot iterators used for feeding
a consistent read view a newly joined replica so as to ensure that
changes frozen by the iterators have made it to the disk.
---
 src/box/vinyl.c | 22 ++++++++++++++--------
 src/box/wal.c   | 29 ++++++++++++++++++++++++++---
 src/box/wal.h   |  5 +++--
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index ed7c21dd..9e93153b 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1098,13 +1098,16 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 	 * trigger so that changes made by newer transactions are checked
 	 * by the trigger callback.
 	 */
-	if (need_wal_sync)
-		wal_sync();
+	int rc;
+	if (need_wal_sync) {
+		rc = wal_sync();
+		if (rc != 0)
+			goto out;
+	}
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
 			      &env->xm->p_committed_read_view);
-	int rc;
 	int loops = 0;
 	struct vy_entry entry;
 	while ((rc = vy_read_iterator_next(&itr, &entry)) == 0) {
@@ -1129,7 +1132,7 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 			break;
 	}
 	vy_read_iterator_close(&itr);
-
+out:
 	diag_destroy(&ctx.diag);
 	trigger_clear(&on_replace);
 	txn_can_yield(txn, false);
@@ -4373,13 +4376,16 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 	 * trigger so that changes made by newer transactions are checked
 	 * by the trigger callback.
 	 */
-	if (need_wal_sync)
-		wal_sync();
+	int rc;
+	if (need_wal_sync) {
+		rc = wal_sync();
+		if (rc != 0)
+			goto out;
+	}
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
 			      &env->xm->p_committed_read_view);
-	int rc;
 	int loops = 0;
 	struct vy_entry entry;
 	int64_t build_lsn = env->xm->lsn;
@@ -4443,7 +4449,7 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 		diag_move(&ctx.diag, diag_get());
 		rc = -1;
 	}
-
+out:
 	diag_destroy(&ctx.diag);
 	trigger_clear(&on_replace);
 	txn_can_yield(txn, false);
diff --git a/src/box/wal.c b/src/box/wal.c
index 58a58e5b..267cafed 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -524,13 +524,36 @@ wal_free(void)
 	wal_writer_destroy(writer);
 }
 
-void
+static int
+wal_sync_f(struct cbus_call_msg *msg)
+{
+	(void)msg;
+	struct wal_writer *writer = &wal_writer_singleton;
+	if (writer->in_rollback.route != NULL) {
+		/* We're rolling back a failed write. */
+		diag_set(ClientError, ER_WAL_IO);
+		return -1;
+	}
+	return 0;
+}
+
+int
 wal_sync(void)
 {
 	struct wal_writer *writer = &wal_writer_singleton;
 	if (writer->wal_mode == WAL_NONE)
-		return;
-	cbus_flush(&writer->wal_pipe, &writer->tx_prio_pipe, NULL);
+		return 0;
+	if (!stailq_empty(&writer->rollback)) {
+		/* We're rolling back a failed write. */
+		diag_set(ClientError, ER_WAL_IO);
+		return -1;
+	}
+	bool cancellable = fiber_set_cancellable(false);
+	struct cbus_call_msg msg;
+	int rc = cbus_call(&writer->wal_pipe, &writer->tx_prio_pipe,
+			   &msg, wal_sync_f, NULL, TIMEOUT_INFINITY);
+	fiber_set_cancellable(cancellable);
+	return rc;
 }
 
 static int
diff --git a/src/box/wal.h b/src/box/wal.h
index 4e500d2a..6725f26d 100644
--- a/src/box/wal.h
+++ b/src/box/wal.h
@@ -171,9 +171,10 @@ enum wal_mode
 wal_mode();
 
 /**
- * Wait till all pending changes to the WAL are flushed.
+ * Wait until all submitted writes are successfully flushed
+ * to disk. Returns 0 on success, -1 if write failed.
  */
-void
+int
 wal_sync(void);
 
 struct wal_checkpoint {
-- 
2.20.1




More information about the Tarantool-patches mailing list