[PATCH 1/5] vinyl: don't sync WAL on space alter if not necessary

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jul 5 23:25:27 MSK 2019


Changes done to an altered space while a new index is being built or
the format is being checked are propagated via an on_replace trigger.
The problem is there may be transactions that started before the alter
request. Their working set can't be checked so we simply abort them.
We can't abort transactions that have reached WAL so we also call
wal_sync() to flush all pending WAL requests. This is a yielding
operation and we call it even if there's no transactions that need
to be flushed. As a result, vinyl space alter yields unconditionally,
even if the space is empty and there is no pending transactions
affecting it. This prevents us from implementing transactional DDL.
Let's call wal_sync() only if there's actually at least one pending
transaction affecting the altered space and waiting for WAL.
---
 src/box/vinyl.c | 51 ++++++++++++++++++++++++++++++---------------------
 src/box/vy_tx.c | 11 ++++++++++-
 src/box/vy_tx.h |  7 ++++++-
 3 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 128b1199..e0de65d0 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1052,25 +1052,8 @@ vinyl_space_invalidate(struct space *space)
 	 * soon as it's done reading disk, which will make the DML
 	 * request bail out early, without dereferencing the space.
 	 */
-	tx_manager_abort_writers_for_ddl(env->xm, space);
-}
-
-/**
- * This function is called after installing on_replace trigger
- * used for propagating changes done during DDL. It aborts all
- * rw transactions affecting the given space that began
- * before the trigger was installed so that DDL doesn't miss
- * their working set.
- */
-static void
-vy_abort_writers_for_ddl(struct vy_env *env, struct space *space)
-{
-	tx_manager_abort_writers_for_ddl(env->xm, space);
-	/*
-	 * Wait for prepared transactions to complete
-	 * (we can't abort them as they reached WAL).
-	 */
-	wal_sync();
+	bool unused;
+	tx_manager_abort_writers_for_ddl(env->xm, space, &unused);
 }
 
 /** Argument passed to vy_check_format_on_replace(). */
@@ -1131,6 +1114,13 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 	 */
 	struct vy_lsm *pk = vy_lsm(space->index[0]);
 
+	/*
+	 * Transactions started before the space alter request can't
+	 * be checked with on_replace trigger so we abort them.
+	 */
+	bool need_wal_sync;
+	tx_manager_abort_writers_for_ddl(env->xm, space, &need_wal_sync);
+
 	struct trigger on_replace;
 	struct vy_check_format_ctx ctx;
 	ctx.format = format;
@@ -1139,7 +1129,13 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 	trigger_create(&on_replace, vy_check_format_on_replace, &ctx, NULL);
 	trigger_add(&space->on_replace, &on_replace);
 
-	vy_abort_writers_for_ddl(env, space);
+	/*
+	 * Flush transactions waiting on WAL after installing on_replace
+	 * trigger so that changes made by newer transactions are checked
+	 * by the trigger callback.
+	 */
+	if (need_wal_sync)
+		wal_sync();
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
@@ -4343,6 +4339,13 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 		return vy_build_recover(env, new_lsm, pk);
 
 	/*
+	 * Transactions started before the space alter request can't
+	 * be checked with on_replace trigger so we abort them.
+	 */
+	bool need_wal_sync;
+	tx_manager_abort_writers_for_ddl(env->xm, src_space, &need_wal_sync);
+
+	/*
 	 * Iterate over all tuples stored in the space and insert
 	 * each of them into the new LSM tree. Since read iterator
 	 * may yield, we install an on_replace trigger to forward
@@ -4359,7 +4362,13 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 	trigger_create(&on_replace, vy_build_on_replace, &ctx, NULL);
 	trigger_add(&src_space->on_replace, &on_replace);
 
-	vy_abort_writers_for_ddl(env, src_space);
+	/*
+	 * Flush transactions waiting on WAL after installing on_replace
+	 * trigger so that changes made by newer transactions are checked
+	 * by the trigger callback.
+	 */
+	if (need_wal_sync)
+		wal_sync();
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index 56b9f292..ca8c57ad 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -1144,13 +1144,22 @@ vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, struct tuple *stmt)
 }
 
 void
-tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space,
+				 bool *need_wal_sync)
 {
+	*need_wal_sync = false;
 	if (space->index_count == 0)
 		return; /* no indexes, no conflicts */
 	struct vy_lsm *lsm = vy_lsm(space->index[0]);
 	struct vy_tx *tx;
 	rlist_foreach_entry(tx, &xm->writers, in_writers) {
+		/*
+		 * We can't abort prepared transactions as they have
+		 * already reached WAL. The caller needs to sync WAL
+		 * to make sure they are gone.
+		 */
+		if (tx->state == VINYL_TX_COMMIT)
+			*need_wal_sync = true;
 		if (tx->state != VINYL_TX_READY)
 			continue;
 		if (tx->last_stmt_space == space ||
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index 9ee10755..376f4330 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -293,9 +293,14 @@ tx_manager_mem_used(struct tx_manager *xm);
  * Abort all rw transactions that affect the given space
  * and haven't reached WAL yet. Called before executing a DDL
  * operation.
+ *
+ * @need_wal_sync is set if at least one transaction can't be
+ * aborted, because it has reached WAL. The caller is supposed
+ * to call wal_sync() to flush them.
  */
 void
-tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space,
+				 bool *need_wal_sync);
 
 /**
  * Abort all local rw transactions that haven't reached WAL yet.
-- 
2.11.0




More information about the Tarantool-patches mailing list