Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 4/4] vinyl: purge dropped indexes from vylog on garbage collection
Date: Wed, 23 May 2018 19:10:07 +0300	[thread overview]
Message-ID: <046324f80e8377bc4438b343da75ec99da958947.1527090319.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1527090319.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1527090319.git.vdavydov.dev@gmail.com>

Currently, when an index is dropped, we remove all ranges/slices
associated with it and mark all runs as dropped in vylog immediately.
To find ranges/slices/runs, we use vy_lsm struct, see vy_log_lsm_prune.

The problem is vy_lsm struct may be inconsistent with the state stored
in vylog if index drop races with compaction, because we first write
changes done by compaction task to vylog and only then update vy_lsm
struct, see vy_task_compact_complete. Since write to vylog yields, this
opens a time window during which the index can be dropped. If this
happens, objects that were created by compaction but haven't been logged
yet (such as new runs, slices, ranges) will be deleted from vylog by
index drop, and this will permanently break vylog, making recovery
impossible.

To fix this issue, let's rework garbage collection of objects associated
with dropped indexes as follows. Now when an index is dropped, we write
a single record to vylog, VY_LOG_DROP_LSM, i.e. just mark the index as
dropped without deleting associated objects. Actual index cleanup takes
place in the garbage collection procedure, see vy_gc, which purges all
ranges/slices linked to marked indexes from vylog and marks all their
runs as dropped. When all runs are actually deleted from disk and
"forgotten" in vylog, we remove the index record from vylog by writing
VY_LOG_FORGET_LSM record. Since garbage collection procedure uses vylog
itself instead of vy_lsm struct for iterating over vinyl objects, no
race between index drop and dump/compaction can now lead to broken
vylog.

Closes #3416
---
 src/box/vinyl.c        | 62 +++++++++++++++++++++---------------
 src/box/vy_log.c       | 86 ++++++++++++++++++++++++++++++--------------------
 src/box/vy_log.h       | 21 ++++++++++++
 test/vinyl/gc.result   |  5 +++
 test/vinyl/gc.test.lua |  3 ++
 5 files changed, 117 insertions(+), 60 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 63f1c5ff..f0d26874 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -903,31 +903,6 @@ vinyl_index_commit_modify(struct index *index, int64_t lsn)
 	vy_log_tx_try_commit();
 }
 
-/*
- * Delete all runs, ranges, and slices of a given LSM tree
- * from the metadata log.
- */
-static void
-vy_log_lsm_prune(struct vy_lsm *lsm, int64_t gc_lsn)
-{
-	int loops = 0;
-	for (struct vy_range *range = vy_range_tree_first(lsm->tree);
-	     range != NULL; range = vy_range_tree_next(lsm->tree, range)) {
-		struct vy_slice *slice;
-		rlist_foreach_entry(slice, &range->slices, in_range)
-			vy_log_delete_slice(slice->id);
-		vy_log_delete_range(range->id);
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0);
-	}
-	struct vy_run *run;
-	rlist_foreach_entry(run, &lsm->runs, in_lsm) {
-		vy_log_drop_run(run->id, gc_lsn);
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0);
-	}
-}
-
 static void
 vinyl_index_commit_drop(struct index *index, int64_t lsn)
 {
@@ -950,7 +925,6 @@ vinyl_index_commit_drop(struct index *index, int64_t lsn)
 	lsm->is_dropped = true;
 
 	vy_log_tx_begin();
-	vy_log_lsm_prune(lsm, checkpoint_last(NULL));
 	vy_log_drop_lsm(lsm->id, lsn);
 	vy_log_tx_try_commit();
 }
@@ -3352,6 +3326,38 @@ vy_gc_run(struct vy_env *env,
 }
 
 /**
+ * Given a dropped LSM tree, delete all its ranges and slices and
+ * mark all its runs as dropped. Forget the LSM tree if it has no
+ * associated objects.
+ */
+static void
+vy_gc_lsm(struct vy_lsm_recovery_info *lsm_info)
+{
+	assert(lsm_info->drop_lsn >= 0);
+
+	vy_log_tx_begin();
+	struct vy_range_recovery_info *range_info;
+	rlist_foreach_entry(range_info, &lsm_info->ranges, in_lsm) {
+		struct vy_slice_recovery_info *slice_info;
+		rlist_foreach_entry(slice_info, &range_info->slices, in_range)
+			vy_log_delete_slice(slice_info->id);
+		vy_log_delete_range(range_info->id);
+	}
+	struct vy_run_recovery_info *run_info;
+	rlist_foreach_entry(run_info, &lsm_info->runs, in_lsm) {
+		if (!run_info->is_dropped) {
+			run_info->is_dropped = true;
+			run_info->gc_lsn = lsm_info->drop_lsn;
+			vy_log_drop_run(run_info->id, run_info->gc_lsn);
+		}
+	}
+	if (rlist_empty(&lsm_info->ranges) &&
+	    rlist_empty(&lsm_info->runs))
+		vy_log_forget_lsm(lsm_info->id);
+	vy_log_tx_try_commit();
+}
+
+/**
  * Delete unused run files stored in the recovery context.
  * @param env      Vinyl environment.
  * @param recovery Recovery context.
@@ -3365,6 +3371,10 @@ vy_gc(struct vy_env *env, struct vy_recovery *recovery,
 	int loops = 0;
 	struct vy_lsm_recovery_info *lsm_info;
 	rlist_foreach_entry(lsm_info, &recovery->lsms, in_recovery) {
+		if ((gc_mask & VY_GC_DROPPED) != 0 &&
+		    lsm_info->drop_lsn >= 0)
+			vy_gc_lsm(lsm_info);
+
 		struct vy_run_recovery_info *run_info;
 		rlist_foreach_entry(run_info, &lsm_info->runs, in_lsm) {
 			if ((run_info->is_dropped &&
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 4e459459..4d561354 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -120,6 +120,7 @@ static const char *vy_log_type_name[] = {
 	[VY_LOG_SNAPSHOT]		= "snapshot",
 	[VY_LOG_TRUNCATE_LSM]		= "truncate_lsm",
 	[VY_LOG_MODIFY_LSM]		= "modify_lsm",
+	[VY_LOG_FORGET_LSM]		= "forget_lsm",
 };
 
 /** Metadata log object. */
@@ -1173,6 +1174,21 @@ vy_recovery_hash_index_id(struct vy_recovery *recovery,
 	return 0;
 }
 
+/**
+ * Remove the entry corresponding to the space_id/index_id
+ * from vy_recovery::index_id_hash.
+ */
+static void
+vy_recovery_unhash_index_id(struct vy_recovery *recovery,
+			    uint32_t space_id, uint32_t index_id)
+{
+	struct mh_i64ptr_t *h = recovery->index_id_hash;
+	int64_t key = vy_recovery_index_id_hash(space_id, index_id);
+	mh_int_t k = mh_i64ptr_find(h, key, NULL);
+	if (k != mh_end(h))
+		mh_i64ptr_del(h, k, NULL);
+}
+
 /** Lookup an LSM tree in vy_recovery::index_id_hash map. */
 struct vy_lsm_recovery_info *
 vy_recovery_lsm_by_index_id(struct vy_recovery *recovery,
@@ -1369,7 +1385,6 @@ vy_recovery_modify_lsm(struct vy_recovery *recovery, int64_t id,
 /**
  * Handle a VY_LOG_DROP_LSM log record.
  * This function marks the LSM tree with ID @id as dropped.
- * All ranges and runs of the LSM tree must have been deleted by now.
  * Returns 0 on success, -1 if ID not found or LSM tree is already marked.
  */
 static int
@@ -1389,23 +1404,44 @@ vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id, int64_t drop_lsn)
 				    (long long)id));
 		return -1;
 	}
-	if (!rlist_empty(&lsm->ranges)) {
+	assert(drop_lsn >= 0);
+	lsm->drop_lsn = drop_lsn;
+	return 0;
+}
+
+/**
+ * Handle a VY_LOG_FORGET_LSM log record.
+ * This function removes the LSM tree with ID @id from the context.
+ * All ranges and runs of the LSM tree must have been deleted by now.
+ * Returns 0 on success, -1 if ID was not found or there are objects
+ * associated with the LSM tree.
+ */
+static int
+vy_recovery_forget_lsm(struct vy_recovery *recovery, int64_t id)
+{
+	struct mh_i64ptr_t *h = recovery->lsm_hash;
+	mh_int_t k = mh_i64ptr_find(h, id, NULL);
+	if (k == mh_end(h)) {
 		diag_set(ClientError, ER_INVALID_VYLOG_FILE,
-			 tt_sprintf("Dropped LSM tree %lld has ranges",
+			 tt_sprintf("LSM tree %lld forgotten but not registered",
 				    (long long)id));
 		return -1;
 	}
-	struct vy_run_recovery_info *run;
-	rlist_foreach_entry(run, &lsm->runs, in_lsm) {
-		if (!run->is_dropped && !run->is_incomplete) {
-			diag_set(ClientError, ER_INVALID_VYLOG_FILE,
-				 tt_sprintf("Dropped LSM tree %lld has active "
-					    "runs", (long long)id));
-			return -1;
-		}
+	struct vy_lsm_recovery_info *lsm = mh_i64ptr_node(h, k)->val;
+	if (!rlist_empty(&lsm->ranges) || !rlist_empty(&lsm->runs)) {
+		diag_set(ClientError, ER_INVALID_VYLOG_FILE,
+			 tt_sprintf("Forgotten LSM tree %lld has ranges/runs",
+				    (long long)id));
+		return -1;
 	}
-	assert(drop_lsn >= 0);
-	lsm->drop_lsn = drop_lsn;
+	mh_i64ptr_del(h, k, NULL);
+	rlist_del_entry(lsm, in_recovery);
+	if (lsm == vy_recovery_lsm_by_index_id(recovery,
+				lsm->space_id, lsm->index_id))
+		vy_recovery_unhash_index_id(recovery,
+				lsm->space_id, lsm->index_id);
+	free(lsm->key_parts);
+	free(lsm);
 	return 0;
 }
 
@@ -1427,12 +1463,6 @@ vy_recovery_dump_lsm(struct vy_recovery *recovery,
 				    (long long)id));
 		return -1;
 	}
-	if (lsm->drop_lsn >= 0) {
-		diag_set(ClientError, ER_INVALID_VYLOG_FILE,
-			 tt_sprintf("Dump of deleted LSM tree %lld",
-				    (long long)id));
-		return -1;
-	}
 	lsm->dump_lsn = dump_lsn;
 	return 0;
 }
@@ -1527,13 +1557,6 @@ vy_recovery_create_run(struct vy_recovery *recovery, int64_t lsm_id,
 				    (long long)lsm_id));
 		return -1;
 	}
-	if (lsm->drop_lsn >= 0) {
-		diag_set(ClientError, ER_INVALID_VYLOG_FILE,
-			 tt_sprintf("Run %lld created for deleted "
-				    "LSM tree %lld", (long long)run_id,
-				    (long long)lsm_id));
-		return -1;
-	}
 	struct vy_run_recovery_info *run;
 	run = vy_recovery_lookup_run(recovery, run_id);
 	if (run != NULL && run->is_dropped) {
@@ -1851,6 +1874,9 @@ vy_recovery_process_record(struct vy_recovery *recovery,
 		rc = vy_recovery_drop_lsm(recovery, record->lsm_id,
 					  record->drop_lsn);
 		break;
+	case VY_LOG_FORGET_LSM:
+		rc = vy_recovery_forget_lsm(recovery, record->lsm_id);
+		break;
 	case VY_LOG_INSERT_RANGE:
 		rc = vy_recovery_insert_range(recovery, record->lsm_id,
 				record->range_id, record->begin, record->end);
@@ -2178,14 +2204,6 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery)
 
 	struct vy_lsm_recovery_info *lsm;
 	rlist_foreach_entry(lsm, &recovery->lsms, in_recovery) {
-		/*
-		 * Purge dropped LSM trees that are not referenced by runs
-		 * (and thus not needed for garbage collection) from the
-		 * log on rotation.
-		 */
-		if (lsm->drop_lsn >= 0 && rlist_empty(&lsm->runs))
-			continue;
-
 		/* Create the log file on the first write. */
 		if (!xlog_is_open(&xlog) &&
 		    xdir_create_xlog(&vy_log.dir, &xlog, vclock) != 0)
diff --git a/src/box/vy_log.h b/src/box/vy_log.h
index 22c31825..d77cb298 100644
--- a/src/box/vy_log.h
+++ b/src/box/vy_log.h
@@ -169,6 +169,16 @@ enum vy_log_record_type {
 	 * Requires vy_log_record::lsm_id, key_def, modify_lsn.
 	 */
 	VY_LOG_MODIFY_LSM		= 13,
+	/**
+	 * Forget an LSM tree.
+	 * Requires vy_log_record::lsm_id.
+	 *
+	 * A record of this type indicates that all objects associated
+	 * with the given LSM tree have been cleaned up from vylog and
+	 * so the LSM tree is not needed any longer and can be removed
+	 * from vylog on the next rotation.
+	 */
+	VY_LOG_FORGET_LSM		= 14,
 
 	vy_log_record_type_MAX
 };
@@ -546,6 +556,17 @@ vy_log_modify_lsm(int64_t id, const struct key_def *key_def, int64_t modify_lsn)
 	vy_log_write(&record);
 }
 
+/** Helper to log  a vinyl LSM tree cleanup. */
+static inline void
+vy_log_forget_lsm(int64_t id)
+{
+	struct vy_log_record record;
+	vy_log_record_init(&record);
+	record.type = VY_LOG_FORGET_LSM;
+	record.lsm_id = id;
+	vy_log_write(&record);
+}
+
 /** Helper to log a vinyl LSM tree drop. */
 static inline void
 vy_log_drop_lsm(int64_t id, int64_t drop_lsn)
diff --git a/test/vinyl/gc.result b/test/vinyl/gc.result
index b709135c..bab6f31b 100644
--- a/test/vinyl/gc.result
+++ b/test/vinyl/gc.result
@@ -90,6 +90,11 @@ files = ls_data()
 --
 -- Check that vylog files are removed if vinyl is not used.
 --
+-- This purges records corresponding to dropped runs, but
+-- dropped index records are still stored in vylog.
+gc()
+---
+...
 files = ls_vylog()
 ---
 ...
diff --git a/test/vinyl/gc.test.lua b/test/vinyl/gc.test.lua
index 32078f00..eb56a1c6 100644
--- a/test/vinyl/gc.test.lua
+++ b/test/vinyl/gc.test.lua
@@ -44,6 +44,9 @@ files = ls_data()
 -- Check that vylog files are removed if vinyl is not used.
 --
 
+-- This purges records corresponding to dropped runs, but
+-- dropped index records are still stored in vylog.
+gc()
 files = ls_vylog()
 #files == 2 or {files, gc_info()}
 
-- 
2.11.0

      parent reply	other threads:[~2018-05-23 16:10 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-23 16:10 [PATCH 0/4] vinyl: fix index drop vs compaction race Vladimir Davydov
2018-05-23 16:10 ` [PATCH 1/4] vinyl: do not reuse lsm objects during recovery from vylog Vladimir Davydov
2018-05-23 16:10 ` [PATCH 2/4] alter: pass lsn of index drop record to engine Vladimir Davydov
2018-05-23 16:10 ` [PATCH 3/4] vinyl: store lsn of index drop record in vylog Vladimir Davydov
2018-05-23 16:10 ` Vladimir Davydov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=046324f80e8377bc4438b343da75ec99da958947.1527090319.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 4/4] vinyl: purge dropped indexes from vylog on garbage collection' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox