Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 1/4] vinyl: introduce hash of spaces affected by transaction
Date: Sun, 24 Mar 2019 00:07:21 +0300	[thread overview]
Message-ID: <77567cf07633e5b3f8c60772cbf7fc702cf15297.1553373792.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1553373792.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1553373792.git.vdavydov.dev@gmail.com>

We need to abort all transactions writing to an altered space when
a new index is build. Currently, we use the write set to look up such
transactions, but it isn't quite correct, because a transaction could
yield on disk read before inserting a statement into the write set.
To address this problem, this patch introduces a hash of spaces affected
by each transaction and makes tx_manager_abort_writers_for_ddl() use it
instead of the write set to abort transactions for DDL.

Needed for #3420
---
 src/box/vinyl.c | 12 ++++++------
 src/box/vy_tx.c | 26 ++++++++++++++++++++++----
 src/box/vy_tx.h | 44 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 3ef43e18..a6a5f187 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1034,14 +1034,14 @@ vinyl_space_prepare_alter(struct space *old_space, struct space *new_space)
 /**
  * This function is called after installing on_replace trigger
  * used for propagating changes done during DDL. It aborts all
- * rw transactions affecting the given LSM tree that began
+ * rw transactions affecting the given space that began
  * before the trigger was installed so that DDL doesn't miss
  * their working set.
  */
 static void
-vy_abort_writers_for_ddl(struct vy_env *env, struct vy_lsm *lsm)
+vy_abort_writers_for_ddl(struct vy_env *env, struct space *space)
 {
-	tx_manager_abort_writers_for_ddl(env->xm, lsm);
+	tx_manager_abort_writers_for_ddl(env->xm, space);
 	/*
 	 * Wait for prepared transactions to complete
 	 * (we can't abort them as they reached WAL).
@@ -1115,7 +1115,7 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format)
 	trigger_create(&on_replace, vy_check_format_on_replace, &ctx, NULL);
 	trigger_add(&space->on_replace, &on_replace);
 
-	vy_abort_writers_for_ddl(env, pk);
+	vy_abort_writers_for_ddl(env, space);
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
@@ -2434,7 +2434,7 @@ vinyl_engine_begin_statement(struct engine *engine, struct txn *txn)
 	struct vy_tx *tx = txn->engine_tx;
 	struct txn_stmt *stmt = txn_current_stmt(txn);
 	assert(tx != NULL);
-	return vy_tx_begin_statement(tx, &stmt->engine_savepoint);
+	return vy_tx_begin_statement(tx, stmt->space, &stmt->engine_savepoint);
 }
 
 static void
@@ -4229,7 +4229,7 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index,
 	trigger_create(&on_replace, vy_build_on_replace, &ctx, NULL);
 	trigger_add(&src_space->on_replace, &on_replace);
 
-	vy_abort_writers_for_ddl(env, pk);
+	vy_abort_writers_for_ddl(env, src_space);
 
 	struct vy_read_iterator itr;
 	vy_read_iterator_open(&itr, pk, NULL, ITER_ALL, pk->env->empty_key,
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index ae660dd8..56d594e5 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -311,6 +311,7 @@ vy_tx_create(struct tx_manager *xm, struct vy_tx *tx)
 	tx->is_applier_session = false;
 	tx->read_view = (struct vy_read_view *)xm->p_global_read_view;
 	vy_tx_read_set_new(&tx->read_set);
+	vy_tx_space_hash_new(&tx->space_hash);
 	tx->psn = 0;
 	rlist_create(&tx->on_destroy);
 	rlist_create(&tx->in_writers);
@@ -849,13 +850,31 @@ vy_tx_rollback(struct vy_tx *tx)
 }
 
 int
-vy_tx_begin_statement(struct vy_tx *tx, void **savepoint)
+vy_tx_begin_statement(struct vy_tx *tx, struct space *space, void **savepoint)
 {
 	if (tx->state == VINYL_TX_ABORT) {
 		diag_set(ClientError, ER_TRANSACTION_CONFLICT);
 		return -1;
 	}
 	assert(tx->state == VINYL_TX_READY);
+
+	struct vy_tx_space_hash_entry *space_entry;
+	space_entry = vy_tx_space_hash_search(&tx->space_hash, space);
+	if (space_entry == NULL) {
+		/*
+		 * Allocate a slot for the modified space in the hash.
+		 * It will be freed automatically along with the region
+		 * when the transaction is committed or rolled back.
+		 */
+		space_entry = region_alloc(&fiber()->gc, sizeof(*space_entry));
+		if (space_entry == NULL) {
+			diag_set(OutOfMemory, sizeof(*space_entry),
+				 "region", "space hash entry");
+			return -1;
+		}
+		space_entry->space = space;
+		vy_tx_space_hash_insert(&tx->space_hash, space_entry);
+	}
 	if (stailq_empty(&tx->log))
 		rlist_add_entry(&tx->xm->writers, tx, in_writers);
 	*savepoint = stailq_last(&tx->log);
@@ -1086,13 +1105,12 @@ vy_tx_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm,
 }
 
 void
-tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm)
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space)
 {
 	struct vy_tx *tx;
 	rlist_foreach_entry(tx, &xm->writers, in_writers) {
 		if (tx->state == VINYL_TX_READY &&
-		    write_set_search_key(&tx->write_set, lsm,
-					 lsm->env->empty_key) != NULL)
+		    vy_tx_space_hash_search(&tx->space_hash, space) != NULL)
 			vy_tx_abort(tx);
 	}
 }
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index aaa31bee..fada41f1 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -51,6 +51,7 @@
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+struct space;
 struct tuple;
 struct tx_manager;
 struct vy_mem;
@@ -133,6 +134,38 @@ write_set_search_key(write_set_t *tree, struct vy_lsm *lsm,
 	return write_set_search(tree, &key);
 }
 
+/**
+ * Represents a space affected by a transaction.
+ * See vy_tx::space_hash.
+ */
+struct vy_tx_space_hash_entry {
+	struct space *space;
+	rb_node(struct vy_tx_space_hash_entry) in_hash;
+};
+
+static inline int
+vy_tx_space_hash_cmp(struct vy_tx_space_hash_entry *a,
+		     struct vy_tx_space_hash_entry *b)
+{
+	if (a->space != b->space)
+		return a->space < b->space ? -1 : 1;
+	return 0;
+}
+
+static inline int
+vy_tx_space_hash_key_cmp(struct space *space,
+			 struct vy_tx_space_hash_entry *entry)
+{
+	if (space != entry->space)
+		return space < entry->space ? -1 : 1;
+	return 0;
+}
+
+typedef rb_tree(struct vy_tx_space_hash_entry) vy_tx_space_hash_t;
+rb_gen_ext_key(MAYBE_UNUSED static inline, vy_tx_space_hash_,
+	       vy_tx_space_hash_t, struct vy_tx_space_hash_entry, in_hash, 
+	       vy_tx_space_hash_cmp, struct space *, vy_tx_space_hash_key_cmp);
+
 /** Transaction object. */
 struct vy_tx {
 	/** Link in tx_manager::writers. */
@@ -181,6 +214,11 @@ struct vy_tx {
 	 */
 	vy_tx_read_set_t read_set;
 	/**
+	 * Spaces affected by this transaction.
+	 * Used for aborting transactions on DDL.
+	 */
+	vy_tx_space_hash_t space_hash;
+	/**
 	 * Prepare sequence number or -1 if the transaction
 	 * is not prepared.
 	 */
@@ -277,12 +315,12 @@ size_t
 tx_manager_mem_used(struct tx_manager *xm);
 
 /**
- * Abort all rw transactions that affect the given LSM tree
+ * Abort all rw transactions that affect the given space
  * and haven't reached WAL yet. Called before executing a DDL
  * operation.
  */
 void
-tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct vy_lsm *lsm);
+tx_manager_abort_writers_for_ddl(struct tx_manager *xm, struct space *space);
 
 /**
  * Abort all local rw transactions that haven't reached WAL yet.
@@ -327,7 +365,7 @@ vy_tx_rollback(struct vy_tx *tx);
  * to a save point with vy_tx_rollback_statement().
  */
 int
-vy_tx_begin_statement(struct vy_tx *tx, void **savepoint);
+vy_tx_begin_statement(struct vy_tx *tx, struct space *space, void **savepoint);
 
 /**
  * Rollback a transaction statement.
-- 
2.11.0

  reply	other threads:[~2019-03-23 21:07 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-23 21:07 [PATCH 0/4] Fix DML vs DDL race Vladimir Davydov
2019-03-23 21:07 ` Vladimir Davydov [this message]
2019-03-28 13:25   ` [tarantool-patches] Re: [PATCH 1/4] vinyl: introduce hash of spaces affected by transaction Konstantin Osipov
2019-03-28 13:58     ` Vladimir Davydov
2019-03-23 21:07 ` [PATCH 2/4] vinyl: don't abort transactions that modify only local spaces for ro Vladimir Davydov
2019-03-25  5:27   ` [tarantool-patches] " Георгий Кириченко
2019-03-25  8:13     ` Vladimir Davydov
2019-03-25  8:58       ` Георгий Кириченко
2019-03-25  9:30         ` Vladimir Davydov
2019-03-23 21:07 ` [PATCH 3/4] vinyl: abort affected transactions when space is removed from cache Vladimir Davydov
2019-03-25  5:26   ` [tarantool-patches] " Георгий Кириченко
2019-03-25  8:17     ` Vladimir Davydov
     [not found]       ` <1564677.EMV258VVK2@home.lan>
2019-03-25  9:51         ` Vladimir Davydov
2019-03-25  5:45   ` Георгий Кириченко
2019-03-25  8:21     ` Vladimir Davydov
2019-03-25  9:03       ` Георгий Кириченко
2019-03-28 13:45   ` [tarantool-patches] " Konstantin Osipov
2019-03-28 14:02     ` Vladimir Davydov
2019-03-28 14:12       ` Konstantin Osipov
2019-03-23 21:07 ` [PATCH 4/4] Revert "test: skip ddl test for vinyl on travis" Vladimir Davydov
2019-03-28 13:46   ` [tarantool-patches] " Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=77567cf07633e5b3f8c60772cbf7fc702cf15297.1553373792.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 1/4] vinyl: introduce hash of spaces affected by transaction' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox