[PATCH 3/4] vinyl: fix discrepancy between vy_log.tx_size and actual tx len
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Mar 27 18:03:02 MSK 2018
When a vylog transaction is rolled back, we always reset vy_log.tx_size.
Generally speaking, this is incorrect as rollback doesn't necessarily
remove all pending records from the tx buffer - there still may be
records committed with vy_log_tx_try_commit() that were left in the
buffer due to write errors. We don't rollback such records, but we
still reset tx_size, which leads to a discrepancy between vy_log.tx_size
and the actual length of vy_log.tx list, which further on results in an
assertion failure:
src/box/vy_log.c:698: vy_log_flush: Assertion `i < vy_log.tx_size' failed.
We need vy_log.tx_size to allocate xrow_header array of a proper size so
that we can flush pending vylog records to disk. This isn't a hot path
there, because vylog operations are rare. Besides, we iterate over all
records anyway to fill the xrow_header array. That said, let's remove
vy_log.tx_size altogether and instead calculate the vy_log.tx list
length right in place.
---
src/box/vy_log.c | 22 ++++++++++------------
test/vinyl/errinj_vylog.result | 22 ++++++++++++++++++++++
test/vinyl/errinj_vylog.test.lua | 9 +++++++++
3 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 720c9491..aeb31781 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -143,8 +143,6 @@ struct vy_log {
* Linked by vy_log_record::in_tx;
*/
struct stailq tx;
- /** Number of entries in the @tx list. */
- int tx_size;
/** Start of the current transaction in the pool, for rollback */
size_t tx_svp;
/**
@@ -670,7 +668,7 @@ vy_log_init(const char *dir)
static int
vy_log_flush(void)
{
- if (vy_log.tx_size == 0)
+ if (stailq_empty(&vy_log.tx))
return 0; /* nothing to do */
ERROR_INJECT(ERRINJ_VY_LOG_FLUSH, {
@@ -678,13 +676,18 @@ vy_log_flush(void)
return -1;
});
- struct journal_entry *entry = journal_entry_new(vy_log.tx_size);
+ int tx_size = 0;
+ struct vy_log_record *record;
+ stailq_foreach_entry(record, &vy_log.tx, in_tx)
+ tx_size++;
+
+ struct journal_entry *entry = journal_entry_new(tx_size);
if (entry == NULL)
return -1;
struct xrow_header *rows;
rows = region_aligned_alloc(&fiber()->gc,
- vy_log.tx_size * sizeof(struct xrow_header),
+ tx_size * sizeof(struct xrow_header),
alignof(struct xrow_header));
if (rows == NULL)
return -1;
@@ -693,16 +696,15 @@ vy_log_flush(void)
* Encode buffered records.
*/
int i = 0;
- struct vy_log_record *record;
stailq_foreach_entry(record, &vy_log.tx, in_tx) {
- assert(i < vy_log.tx_size);
+ assert(i < tx_size);
struct xrow_header *row = &rows[i];
if (vy_log_record_encode(record, row) < 0)
return -1;
entry->rows[i] = row;
i++;
}
- assert(i == vy_log.tx_size);
+ assert(i == tx_size);
/*
* Do actual disk writes on behalf of the WAL
@@ -714,7 +716,6 @@ vy_log_flush(void)
/* Success. Free flushed records. */
region_reset(&vy_log.pool);
stailq_create(&vy_log.tx);
- vy_log.tx_size = 0;
return 0;
}
@@ -1057,7 +1058,6 @@ done:
rollback:
stailq_cut_tail(&vy_log.tx, vy_log.tx_begin, &rollback);
region_truncate(&vy_log.pool, vy_log.tx_svp);
- vy_log.tx_size = 0;
vy_log.tx_svp = 0;
say_verbose("rollback vylog transaction");
latch_unlock(&vy_log.latch);
@@ -1091,9 +1091,7 @@ vy_log_write(const struct vy_log_record *record)
}
say_verbose("write vylog record: %s", vy_log_record_str(tx_record));
-
stailq_add_tail_entry(&vy_log.tx, tx_record, in_tx);
- vy_log.tx_size++;
}
/**
diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result
index 76288e25..f78201c9 100644
--- a/test/vinyl/errinj_vylog.result
+++ b/test/vinyl/errinj_vylog.result
@@ -70,6 +70,9 @@ s:drop()
-- Check that an index drop/truncate/create record we failed to
-- write to vylog is flushed along with the next record.
--
+fiber = require 'fiber'
+---
+...
s1 = box.schema.space.create('test1', {engine = 'vinyl'})
---
...
@@ -98,6 +101,13 @@ _ = s1:insert{3, 'c'}
_ = s2:insert{4, 'd'}
---
...
+SCHED_TIMEOUT = 0.01
+---
+...
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
+---
+- ok
+...
box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
---
- ok
@@ -120,10 +130,22 @@ _ = s3:create_index('pk')
_ = s3:insert{6, 'f'}
---
...
+-- pending records must not be rolled back on error
+box.snapshot()
+---
+- error: Error injection 'vinyl log flush'
+...
box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
---
- ok
...
+fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle
+---
+...
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
+---
+- ok
+...
box.snapshot()
---
- ok
diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua
index 64e51f35..36b3659d 100644
--- a/test/vinyl/errinj_vylog.test.lua
+++ b/test/vinyl/errinj_vylog.test.lua
@@ -35,6 +35,8 @@ s:drop()
-- Check that an index drop/truncate/create record we failed to
-- write to vylog is flushed along with the next record.
--
+fiber = require 'fiber'
+
s1 = box.schema.space.create('test1', {engine = 'vinyl'})
_ = s1:create_index('pk')
_ = s1:insert{1, 'a'}
@@ -48,6 +50,8 @@ box.snapshot()
_ = s1:insert{3, 'c'}
_ = s2:insert{4, 'd'}
+SCHED_TIMEOUT = 0.01
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
s1:drop()
@@ -58,7 +62,12 @@ s3 = box.schema.space.create('test3', {engine = 'vinyl'})
_ = s3:create_index('pk')
_ = s3:insert{6, 'f'}
+-- pending records must not be rolled back on error
+box.snapshot()
+
box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
+fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle
+box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
box.snapshot()
--
2.11.0
More information about the Tarantool-patches
mailing list