[PATCH v3 1/3] vinyl: do not yield on dump completion

Vladimir Davydov vdavydov.dev at gmail.com
Thu Jun 7 13:56:16 MSK 2018


The fact that we may yield after we added a new slice created by dump,
but before we removed the dumped in-memory index from the LSM tree
complicates read iterator logic, as it has to detect such a case and
filter out sources that contain duplicates. This logic relies on the
fact that no two slices of the same range intersect by LSN. For the
sake of ALTER we have to relax this limitation, as statements inserted
during index build can have arbitrary (not monotonically growing) LSNs,
so the no-LSN-intersection property won't be fulfilled for whole slices,
only for individual keys. Since there shouldn't be more than 1000 ranges
in the same LSM tree, yielding doesn't make much sense as iteration over
the whole range tree should be pretty fast. Besides, dump isn't done
frequently. That said, let's remove yielding altogether.

Needed for #1653
---
 src/box/vy_read_iterator.c | 13 -------------
 src/box/vy_scheduler.c     | 39 ++++++---------------------------------
 2 files changed, 6 insertions(+), 46 deletions(-)

diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 61c3b683..d8d66229 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -635,19 +635,6 @@ vy_read_iterator_add_disk(struct vy_read_iterator *itr)
 	 * format in vy_mem.
 	 */
 	rlist_foreach_entry(slice, &itr->curr_range->slices, in_range) {
-		/*
-		 * vy_task_dump_complete() may yield after adding
-		 * a new run slice to a range and before removing
-		 * dumped in-memory trees. We must not add both
-		 * the slice and the trees in this case, because
-		 * the read iterator can't deal with duplicates.
-		 * Since lsm->dump_lsn is bumped after deletion
-		 * of dumped in-memory trees, we can filter out
-		 * the run slice containing duplicates by LSN.
-		 */
-		if (slice->run->info.min_lsn > lsm->dump_lsn)
-			continue;
-		assert(slice->run->info.max_lsn <= lsm->dump_lsn);
 		struct vy_read_src *sub_src = vy_read_iterator_add_src(itr);
 		vy_run_iterator_open(&sub_src->run_iterator,
 				     &lsm->stat.disk.iterator, slice,
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 3631f644..f4746d68 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -57,16 +57,6 @@
 #include "trivia/util.h"
 #include "tt_pthread.h"
 
-/**
- * Yield after iterating over this many objects (e.g. ranges).
- * Yield more often in debug mode.
- */
-#if defined(NDEBUG)
-enum { VY_YIELD_LOOPS = 128 };
-#else
-enum { VY_YIELD_LOOPS = 2 };
-#endif
-
 /* Min and max values for vy_scheduler::timeout. */
 #define VY_SCHEDULER_TIMEOUT_MIN	1
 #define VY_SCHEDULER_TIMEOUT_MAX	60
@@ -712,7 +702,7 @@ vy_task_dump_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 	struct vy_slice **new_slices, *slice;
 	struct vy_range *range, *begin_range, *end_range;
 	struct tuple *min_key, *max_key;
-	int i, loops = 0;
+	int i;
 
 	assert(lsm->is_dumping);
 
@@ -777,12 +767,6 @@ vy_task_dump_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 
 		assert(i < lsm->range_count);
 		new_slices[i] = slice;
-		/*
-		 * It's OK to yield here for the range tree can only
-		 * be changed from the scheduler fiber.
-		 */
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0);
 	}
 
 	/*
@@ -797,9 +781,6 @@ vy_task_dump_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 		vy_log_insert_slice(range->id, new_run->id, slice->id,
 				    tuple_data_or_null(slice->begin),
 				    tuple_data_or_null(slice->end));
-
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0); /* see comment above */
 	}
 	vy_log_dump_lsm(lsm->id, dump_lsn);
 	if (vy_log_tx_commit() < 0)
@@ -816,6 +797,11 @@ vy_task_dump_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 
 	/*
 	 * Add new slices to ranges.
+	 *
+	 * Note, we must not yield after this point, because if we
+	 * do, a concurrent read iterator may see an inconsistent
+	 * LSM tree state, when the same statement is present twice,
+	 * in memory and on disk.
 	 */
 	for (range = begin_range, i = 0; range != end_range;
 	     range = vy_range_tree_next(lsm->tree, range), i++) {
@@ -829,17 +815,6 @@ vy_task_dump_complete(struct vy_scheduler *scheduler, struct vy_task *task)
 			vy_range_heap_update(&lsm->range_heap,
 					     &range->heap_node);
 		range->version++;
-		/*
-		 * If we yield here, a concurrent fiber will see
-		 * a range with a run slice containing statements
-		 * present in the in-memory indexes of the LSM tree.
-		 * This is OK, because read iterator won't use the
-		 * new run slice until lsm->dump_lsn is bumped,
-		 * which is only done after in-memory trees are
-		 * removed (see vy_read_iterator_add_disk()).
-		 */
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0);
 	}
 	free(new_slices);
 
@@ -878,8 +853,6 @@ fail_free_slices:
 		slice = new_slices[i];
 		if (slice != NULL)
 			vy_slice_delete(slice);
-		if (++loops % VY_YIELD_LOOPS == 0)
-			fiber_sleep(0);
 	}
 	free(new_slices);
 fail:
-- 
2.11.0




More information about the Tarantool-patches mailing list