From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 3/4] vinyl: fix discrepancy between vy_log.tx_size and actual tx len Date: Tue, 27 Mar 2018 18:03:02 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: When a vylog transaction is rolled back, we always reset vy_log.tx_size. Generally speaking, this is incorrect as rollback doesn't necessarily remove all pending records from the tx buffer - there still may be records committed with vy_log_tx_try_commit() that were left in the buffer due to write errors. We don't rollback such records, but we still reset tx_size, which leads to a discrepancy between vy_log.tx_size and the actual length of vy_log.tx list, which further on results in an assertion failure: src/box/vy_log.c:698: vy_log_flush: Assertion `i < vy_log.tx_size' failed. We need vy_log.tx_size to allocate xrow_header array of a proper size so that we can flush pending vylog records to disk. This isn't a hot path there, because vylog operations are rare. Besides, we iterate over all records anyway to fill the xrow_header array. That said, let's remove vy_log.tx_size altogether and instead calculate the vy_log.tx list length right in place. --- src/box/vy_log.c | 22 ++++++++++------------ test/vinyl/errinj_vylog.result | 22 ++++++++++++++++++++++ test/vinyl/errinj_vylog.test.lua | 9 +++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/box/vy_log.c b/src/box/vy_log.c index 720c9491..aeb31781 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -143,8 +143,6 @@ struct vy_log { * Linked by vy_log_record::in_tx; */ struct stailq tx; - /** Number of entries in the @tx list. */ - int tx_size; /** Start of the current transaction in the pool, for rollback */ size_t tx_svp; /** @@ -670,7 +668,7 @@ vy_log_init(const char *dir) static int vy_log_flush(void) { - if (vy_log.tx_size == 0) + if (stailq_empty(&vy_log.tx)) return 0; /* nothing to do */ ERROR_INJECT(ERRINJ_VY_LOG_FLUSH, { @@ -678,13 +676,18 @@ vy_log_flush(void) return -1; }); - struct journal_entry *entry = journal_entry_new(vy_log.tx_size); + int tx_size = 0; + struct vy_log_record *record; + stailq_foreach_entry(record, &vy_log.tx, in_tx) + tx_size++; + + struct journal_entry *entry = journal_entry_new(tx_size); if (entry == NULL) return -1; struct xrow_header *rows; rows = region_aligned_alloc(&fiber()->gc, - vy_log.tx_size * sizeof(struct xrow_header), + tx_size * sizeof(struct xrow_header), alignof(struct xrow_header)); if (rows == NULL) return -1; @@ -693,16 +696,15 @@ vy_log_flush(void) * Encode buffered records. */ int i = 0; - struct vy_log_record *record; stailq_foreach_entry(record, &vy_log.tx, in_tx) { - assert(i < vy_log.tx_size); + assert(i < tx_size); struct xrow_header *row = &rows[i]; if (vy_log_record_encode(record, row) < 0) return -1; entry->rows[i] = row; i++; } - assert(i == vy_log.tx_size); + assert(i == tx_size); /* * Do actual disk writes on behalf of the WAL @@ -714,7 +716,6 @@ vy_log_flush(void) /* Success. Free flushed records. */ region_reset(&vy_log.pool); stailq_create(&vy_log.tx); - vy_log.tx_size = 0; return 0; } @@ -1057,7 +1058,6 @@ done: rollback: stailq_cut_tail(&vy_log.tx, vy_log.tx_begin, &rollback); region_truncate(&vy_log.pool, vy_log.tx_svp); - vy_log.tx_size = 0; vy_log.tx_svp = 0; say_verbose("rollback vylog transaction"); latch_unlock(&vy_log.latch); @@ -1091,9 +1091,7 @@ vy_log_write(const struct vy_log_record *record) } say_verbose("write vylog record: %s", vy_log_record_str(tx_record)); - stailq_add_tail_entry(&vy_log.tx, tx_record, in_tx); - vy_log.tx_size++; } /** diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result index 76288e25..f78201c9 100644 --- a/test/vinyl/errinj_vylog.result +++ b/test/vinyl/errinj_vylog.result @@ -70,6 +70,9 @@ s:drop() -- Check that an index drop/truncate/create record we failed to -- write to vylog is flushed along with the next record. -- +fiber = require 'fiber' +--- +... s1 = box.schema.space.create('test1', {engine = 'vinyl'}) --- ... @@ -98,6 +101,13 @@ _ = s1:insert{3, 'c'} _ = s2:insert{4, 'd'} --- ... +SCHED_TIMEOUT = 0.01 +--- +... +box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) +--- +- ok +... box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true); --- - ok @@ -120,10 +130,22 @@ _ = s3:create_index('pk') _ = s3:insert{6, 'f'} --- ... +-- pending records must not be rolled back on error +box.snapshot() +--- +- error: Error injection 'vinyl log flush' +... box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); --- - ok ... +fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle +--- +... +box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) +--- +- ok +... box.snapshot() --- - ok diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua index 64e51f35..36b3659d 100644 --- a/test/vinyl/errinj_vylog.test.lua +++ b/test/vinyl/errinj_vylog.test.lua @@ -35,6 +35,8 @@ s:drop() -- Check that an index drop/truncate/create record we failed to -- write to vylog is flushed along with the next record. -- +fiber = require 'fiber' + s1 = box.schema.space.create('test1', {engine = 'vinyl'}) _ = s1:create_index('pk') _ = s1:insert{1, 'a'} @@ -48,6 +50,8 @@ box.snapshot() _ = s1:insert{3, 'c'} _ = s2:insert{4, 'd'} +SCHED_TIMEOUT = 0.01 +box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true); s1:drop() @@ -58,7 +62,12 @@ s3 = box.schema.space.create('test3', {engine = 'vinyl'}) _ = s3:create_index('pk') _ = s3:insert{6, 'f'} +-- pending records must not be rolled back on error +box.snapshot() + box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); +fiber.sleep(2 * SCHED_TIMEOUT) -- wait for scheduler to unthrottle +box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) box.snapshot() -- 2.11.0