Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH] vinyl: abort rw transactions before DDL
Date: Fri,  3 Aug 2018 18:21:45 +0300	[thread overview]
Message-ID: <e5c1b5214b2f48b7d8a5fee471d35527d9c9d650.1533309498.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <20180803151700.shaatbqui63xbjt3@esperanza>

When building a new index or checking a space format, we propagate
changes done to the space during the procedure with the aid of an
on_replace trigger. The problem is there may be transactions with a
non-empty write set when we install the trigger. Changes done by
those transactions will not be seen by the trigger and so they won't
make it to the new index, resulting in an inconsistency between the
primary and secondary indexes. To fix this issue, let's abort all
rw transactions after setting the trigger. Note, transactions that
reached WAL can't be aborted so we wait for them to complete.

Closes #3458
---
https://github.com/tarantool/tarantool/issues/3458
https://github.com/tarantool/tarantool/commits/dv/gh-3458-vy-abort-tx-before-ddl

 src/box/vinyl.c          | 36 +++++++++++++++++++++++++++---------
 src/box/vy_lsm.h         | 11 +++++++++++
 src/box/vy_tx.c          | 13 +++++++++++++
 src/box/vy_tx.h          | 13 +++++++++++++
 test/vinyl/ddl.result    |  1 -
 test/vinyl/ddl.test.lua  |  1 -
 test/vinyl/misc.result   |  5 +++++
 test/vinyl/misc.test.lua |  4 ++++
 8 files changed, 73 insertions(+), 11 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index a0d2078a..3283ae95 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1014,6 +1014,24 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
 	return 0;
 }
 
+/**
+ * This function is called after installing on_replace trigger
+ * used for propagating changes done during DDL. It aborts all
+ * rw transactions that began before the trigger was installed
+ * so that DDL doesn't miss their working set.
+ */
+static void
+vy_abort_writers_for_ddl(struct vy_env *env)
+{
+	tx_manager_abort_writers(env->xm);
+	/*
+	 * Wait for prepared transactions to complete
+	 * (we can't abort them as they reached WAL).
+	 */
+	struct vclock unused;
+	wal_checkpoint(&unused, false);
+}
+
 /** Argument passed to vy_check_format_on_replace(). */
 struct vy_check_format_ctx {
 	/** Format to check new tuples against. */
@@ -1084,9 +1102,11 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 	trigger_create(&on_replace, vy_check_format_on_replace, &ctx, NULL);
 	trigger_add(&space->on_replace, &on_replace);
 
+	vy_abort_writers_for_ddl(env);
+
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, key,
-			      &env->xm->p_global_read_view);
+			      &env->xm->p_committed_read_view);
 	int rc;
 	int loops = 0;
 	struct tuple *tuple;
@@ -4169,10 +4189,8 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 {
 	struct vy_env *env = vy_env(src_space->engine);
 	struct vy_lsm *pk = vy_lsm(src_space->index[0]);
-	bool is_empty = (pk->stat.disk.count.rows == 0 &&
-			 pk->stat.memory.count.rows == 0);
 
-	if (new_index->def->iid == 0 && !is_empty) {
+	if (new_index->def->iid == 0 && !vy_lsm_is_empty(pk)) {
 		diag_set(ClientError, ER_UNSUPPORTED, "Vinyl",
 			 "rebuilding the primary index of a non-empty space");
 		return -1;
@@ -4189,9 +4207,6 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 	    env->status == VINYL_FINAL_RECOVERY_LOCAL)
 		return vy_build_recover(env, new_lsm, pk);
 
-	if (is_empty)
-		return 0;
-
 	/*
 	 * Iterate over all tuples stored in the space and insert
 	 * each of them into the new LSM tree. Since read iterator
@@ -4214,6 +4229,8 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 	trigger_create(&on_replace, vy_build_on_replace, &ctx, NULL);
 	trigger_add(&src_space->on_replace, &on_replace);
 
+	vy_abort_writers_for_ddl(env);
+
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, key,
 			      &env->xm->p_committed_read_view);
@@ -4264,9 +4281,10 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 
 	/*
 	 * Dump the new index upon build completion so that we don't
-	 * have to rebuild it on recovery.
+	 * have to rebuild it on recovery. No need to trigger dump if
+	 * the space happens to be empty.
 	 */
-	if (rc == 0)
+	if (rc == 0 && !vy_lsm_is_empty(new_lsm))
 		rc = vy_scheduler_dump(&env->scheduler);
 
 	if (rc == 0 && ctx.is_failed) {
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index f0b7ec9c..08d30530 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -311,6 +311,17 @@ void
 vy_lsm_delete(struct vy_lsm *lsm);
 
 /**
+ * Return true if the LSM tree has no statements, neither on disk
+ * nor in memory.
+ */
+static inline bool
+vy_lsm_is_empty(struct vy_lsm *lsm)
+{
+	return (lsm->stat.disk.count.rows == 0 &&
+		lsm->stat.memory.count.rows == 0);
+}
+
+/**
  * Increment the reference counter of an LSM tree.
  * An LSM tree cannot be deleted if its reference
  * counter is elevated.
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index 5c186b87..117db68c 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -106,6 +106,7 @@ tx_manager_new(void)
 		return NULL;
 	}
 
+	rlist_create(&xm->transactions);
 	rlist_create(&xm->read_views);
 	vy_global_read_view_create((struct vy_read_view *)&xm->global_read_view,
 				   INT64_MAX);
@@ -288,6 +289,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
 	vy_tx_read_set_new(&tx->read_set);
 	tx->psn = 0;
 	rlist_create(&tx->on_destroy);
+	rlist_add_entry(&xm->transactions, tx, in_transactions);
 }
 
 void
@@ -306,6 +308,7 @@ vy_tx_destroy(struct vy_tx *tx)
 	}
 
 	vy_tx_read_set_iter(&tx->read_set, NULL, vy_tx_read_set_free_cb, NULL);
+	rlist_del_entry(tx, in_transactions);
 }
 
 /** Return true if the transaction is read-only. */
@@ -856,6 +859,16 @@ vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, struct tuple *stmt)
 }
 
 void
+tx_manager_abort_writers(struct tx_manager *xm)
+{
+	struct vy_tx *tx;
+	rlist_foreach_entry(tx, &xm->transactions, in_transactions) {
+		if (tx->state == VINYL_TX_READY && !vy_tx_is_ro(tx))
+			tx->state = VINYL_TX_ABORT;
+	}
+}
+
+void
 vy_txw_iterator_open(struct vy_txw_iterator *itr,
 		     struct vy_txw_iterator_stat *stat,
 		     struct vy_tx *tx, struct vy_lsm *lsm,
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index dcf6a739..0378aee9 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -133,6 +133,8 @@ write_set_search_key(write_set_t *tree, struct vy_lsm *lsm,
 
 /** Transaction object. */
 struct vy_tx {
+	/** Link in tx_manager::transactions. */
+	struct rlist in_transactions;
 	/** Transaction manager. */
 	struct tx_manager *xm;
 	/**
@@ -209,6 +211,10 @@ struct tx_manager {
 	 */
 	struct vy_tx *last_prepared_tx;
 	/**
+	 * List of open transactions, linked by vy_tx::in_transactions.
+	 */
+	struct rlist transactions;
+	/**
 	 * The list of TXs with a read view in order of vlsn.
 	 */
 	struct rlist read_views;
@@ -262,6 +268,13 @@ tx_manager_new(void);
 void
 tx_manager_delete(struct tx_manager *xm);
 
+/**
+ * Abort all transactions that started before this point of time
+ * and haven't reached WAL yet.
+ */
+void
+tx_manager_abort_writers(struct tx_manager *xm);
+
 /** Initialize a tx object. */
 void
 vy_tx_create(struct tx_manager *xm, struct vy_tx *tx);
diff --git a/test/vinyl/ddl.result b/test/vinyl/ddl.result
index 3e65e232..bb4fc984 100644
--- a/test/vinyl/ddl.result
+++ b/test/vinyl/ddl.result
@@ -656,7 +656,6 @@ last_val = 1000;
 ---
 ...
 function gen_load()
-    fiber.sleep(0.001)
     local s = box.space.test
     for i = 1, 200 do
         local op = math.random(4)
diff --git a/test/vinyl/ddl.test.lua b/test/vinyl/ddl.test.lua
index 45c5cf8e..f7bf03bb 100644
--- a/test/vinyl/ddl.test.lua
+++ b/test/vinyl/ddl.test.lua
@@ -253,7 +253,6 @@ box.commit();
 last_val = 1000;
 
 function gen_load()
-    fiber.sleep(0.001)
     local s = box.space.test
     for i = 1, 200 do
         local op = math.random(4)
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 409df3e0..d2a7ca68 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -140,6 +140,11 @@ i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned',
 i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
 ---
 ...
+-- space.create_index() does a lookup in the primary index
+-- so reset the stats before calling space.insert().
+box.stat.reset()
+---
+...
 s:insert{1, 1, 1, 1, 1, 1}
 ---
 - [1, 1, 1, 1, 1, 1]
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 3dbfdb54..9f61ca0a 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -62,6 +62,10 @@ i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned',
 i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
 i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
 
+-- space.create_index() does a lookup in the primary index
+-- so reset the stats before calling space.insert().
+box.stat.reset()
+
 s:insert{1, 1, 1, 1, 1, 1}
 
 i1:stat().lookup -- 1
-- 
2.11.0

  reply	other threads:[~2018-08-03 15:21 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-01 10:44 [PATCH] vinyl: flush transactions before setting trigger on altered space Vladimir Davydov
2018-08-03 15:17 ` Vladimir Davydov
2018-08-03 15:21   ` Vladimir Davydov [this message]
2018-08-23 21:10     ` [PATCH] vinyl: abort rw transactions before DDL Konstantin Osipov
2018-08-24  8:28       ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e5c1b5214b2f48b7d8a5fee471d35527d9c9d650.1533309498.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH] vinyl: abort rw transactions before DDL' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox