[PATCH 3/4] vinyl: fix discrepancy between vy_log.tx_size and actual tx len

Vladimir Davydov vdavydov.dev at gmail.com
Tue Mar 27 18:03:02 MSK 2018


When a vylog transaction is rolled back, we always reset vy_log.tx_size.
Generally speaking, this is incorrect as rollback doesn't necessarily
remove all pending records from the tx buffer - there still may be
records committed with vy_log_tx_try_commit() that were left in the
buffer due to write errors.  We don't rollback such records, but we
still reset tx_size, which leads to a discrepancy between vy_log.tx_size
and the actual length of vy_log.tx list, which further on results in an
assertion failure:

  src/box/vy_log.c:698: vy_log_flush: Assertion `i < vy_log.tx_size' failed.

We need vy_log.tx_size to allocate xrow_header array of a proper size so
that we can flush pending vylog records to disk. This isn't a hot path
there, because vylog operations are rare. Besides, we iterate over all
records anyway to fill the xrow_header array. That said, let's remove
vy_log.tx_size altogether and instead calculate the vy_log.tx list
length right in place.
---
 src/box/vy_log.c                 | 22 ++++++++++------------
 test/vinyl/errinj_vylog.result   | 22 ++++++++++++++++++++++
 test/vinyl/errinj_vylog.test.lua |  9 +++++++++
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 720c9491..aeb31781 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -143,8 +143,6 @@ struct vy_log {
 	 * Linked by vy_log_record::in_tx;
 	 */
 	struct stailq tx;
-	/** Number of entries in the @tx list. */
-	int tx_size;
 	/** Start of the current transaction in the pool, for rollback */
 	size_t tx_svp;
 	/**
@@ -670,7 +668,7 @@ vy_log_init(const char *dir)
 static int
 vy_log_flush(void)
 {
-	if (vy_log.tx_size == 0)
+	if (stailq_empty(&vy_log.tx))
 		return 0; /* nothing to do */
 
 	ERROR_INJECT(ERRINJ_VY_LOG_FLUSH, {
@@ -678,13 +676,18 @@ vy_log_flush(void)
 		return -1;
 	});
 
-	struct journal_entry *entry = journal_entry_new(vy_log.tx_size);
+	int tx_size = 0;
+	struct vy_log_record *record;
+	stailq_foreach_entry(record, &vy_log.tx, in_tx)
+		tx_size++;
+
+	struct journal_entry *entry = journal_entry_new(tx_size);
 	if (entry == NULL)
 		return -1;
 
 	struct xrow_header *rows;
 	rows = region_aligned_alloc(&fiber()->gc,
-				    vy_log.tx_size * sizeof(struct xrow_header),
+				    tx_size * sizeof(struct xrow_header),
 				    alignof(struct xrow_header));
 	if (rows == NULL)
 		return -1;
@@ -693,16 +696,15 @@ vy_log_flush(void)
 	 * Encode buffered records.
 	 */
 	int i = 0;
-	struct vy_log_record *record;
 	stailq_foreach_entry(record, &vy_log.tx, in_tx) {
-		assert(i < vy_log.tx_size);
+		assert(i < tx_size);
 		struct xrow_header *row = &rows[i];
 		if (vy_log_record_encode(record, row) < 0)
 			return -1;
 		entry->rows[i] = row;
 		i++;
 	}
-	assert(i == vy_log.tx_size);
+	assert(i == tx_size);
 
 	/*
 	 * Do actual disk writes on behalf of the WAL
@@ -714,7 +716,6 @@ vy_log_flush(void)
 	/* Success. Free flushed records. */
 	region_reset(&vy_log.pool);
 	stailq_create(&vy_log.tx);
-	vy_log.tx_size = 0;
 	return 0;
 }
 
@@ -1057,7 +1058,6 @@ done:
 rollback:
 	stailq_cut_tail(&vy_log.tx, vy_log.tx_begin, &rollback);
 	region_truncate(&vy_log.pool, vy_log.tx_svp);
-	vy_log.tx_size = 0;
 	vy_log.tx_svp = 0;
 	say_verbose("rollback vylog transaction");
 	latch_unlock(&vy_log.latch);
@@ -1091,9 +1091,7 @@ vy_log_write(const struct vy_log_record *record)
 	}
 
 	say_verbose("write vylog record: %s", vy_log_record_str(tx_record));
-
 	stailq_add_tail_entry(&vy_log.tx, tx_record, in_tx);
-	vy_log.tx_size++;
 }
 
 /**
diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result
index 76288e25..f78201c9 100644
--- a/test/vinyl/errinj_vylog.result
+++ b/test/vinyl/errinj_vylog.result
@@ -70,6 +70,9 @@ s:drop()
 -- Check that an index drop/truncate/create record we failed to
 -- write to vylog is flushed along with the next record.
 --
+fiber = require 'fiber'
+---
+...
 s1 = box.schema.space.create('test1', {engine = 'vinyl'})
 ---
 ...
@@ -98,6 +101,13 @@ _ = s1:insert{3, 'c'}
 _ = s2:insert{4, 'd'}
 ---
 ...
+SCHED_TIMEOUT = 0.01
+---
+...
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
+---
+- ok
+...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 ---
 - ok
@@ -120,10 +130,22 @@ _ = s3:create_index('pk')
 _ = s3:insert{6, 'f'}
 ---
 ...
+-- pending records must not be rolled back on error
+box.snapshot()
+---
+- error: Error injection 'vinyl log flush'
+...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
 ...
+fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle
+---
+...
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
+---
+- ok
+...
 box.snapshot()
 ---
 - ok
diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua
index 64e51f35..36b3659d 100644
--- a/test/vinyl/errinj_vylog.test.lua
+++ b/test/vinyl/errinj_vylog.test.lua
@@ -35,6 +35,8 @@ s:drop()
 -- Check that an index drop/truncate/create record we failed to
 -- write to vylog is flushed along with the next record.
 --
+fiber = require 'fiber'
+
 s1 = box.schema.space.create('test1', {engine = 'vinyl'})
 _ = s1:create_index('pk')
 _ = s1:insert{1, 'a'}
@@ -48,6 +50,8 @@ box.snapshot()
 _ = s1:insert{3, 'c'}
 _ = s2:insert{4, 'd'}
 
+SCHED_TIMEOUT = 0.01
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 
 s1:drop()
@@ -58,7 +62,12 @@ s3 = box.schema.space.create('test3', {engine = 'vinyl'})
 _ = s3:create_index('pk')
 _ = s3:insert{6, 'f'}
 
+-- pending records must not be rolled back on error
+box.snapshot()
+
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
+fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 box.snapshot()
 
-- 
2.11.0




More information about the Tarantool-patches mailing list