[Tarantool-patches] [PATCH 1/1] [tosquash] txn_limbo: local WAL write rollback should start from end
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Wed Jul 8 02:32:41 MSK 2020
Transactions are rolled back in reversed order, always. Limbo
somewhy removed rolled back transactions from the beginning, not
from the end.
Closes #5147
---
Branch: http://github.com/tarantool/tarantool/tree/gh-4842-sync-replication
Issue 1: https://github.com/tarantool/tarantool/issues/4842
Issue 2: https://github.com/tarantool/tarantool/issues/5147
src/box/txn_limbo.c | 8 ++-
test/replication/qsync_errinj.result | 76 ++++++++++++++++++++++++++
test/replication/qsync_errinj.test.lua | 25 +++++++++
3 files changed, 108 insertions(+), 1 deletion(-)
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 884c188b2..709bef9d3 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -109,7 +109,13 @@ void
txn_limbo_abort(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
{
entry->is_rollback = true;
- txn_limbo_remove(limbo, entry);
+ /*
+ * The simple rule about rollback/commit order applies
+ * here as well: commit always in the order of WAL write,
+ * rollback in the reversed order. Rolled back transaction
+ * is always the last.
+ */
+ txn_limbo_pop(limbo, entry);
}
void
diff --git a/test/replication/qsync_errinj.result b/test/replication/qsync_errinj.result
index 15dbc9bb2..49429cc80 100644
--- a/test/replication/qsync_errinj.result
+++ b/test/replication/qsync_errinj.result
@@ -138,6 +138,82 @@ box.space.sync:select{12}
| - - [12]
| ...
+--
+-- gh-5147: at local WAL write fail limbo entries should be
+-- deleted from the end of the limbo, not from the beginning.
+-- Otherwise it should crash.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000}
+ | ---
+ | ...
+box.error.injection.set("ERRINJ_WAL_DELAY", true)
+ | ---
+ | - ok
+ | ...
+ok1, err1 = nil
+ | ---
+ | ...
+f1 = fiber.create(function() \
+ ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13}) \
+end)
+ | ---
+ | ...
+box.error.injection.set("ERRINJ_WAL_IO", true)
+ | ---
+ | - ok
+ | ...
+box.space.sync:replace({14})
+ | ---
+ | - error: Failed to write to disk
+ | ...
+box.error.injection.set("ERRINJ_WAL_IO", false)
+ | ---
+ | - ok
+ | ...
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+ | ---
+ | - ok
+ | ...
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+box.space.sync:replace({15})
+ | ---
+ | - [15]
+ | ...
+test_run:wait_cond(function() return f1:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+ok1, err1
+ | ---
+ | - true
+ | - [13]
+ | ...
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+ | ---
+ | - - [13]
+ | - []
+ | - - [15]
+ | ...
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+ | ---
+ | - - [13]
+ | - []
+ | - - [15]
+ | ...
+
test_run:cmd('switch default')
| ---
| - true
diff --git a/test/replication/qsync_errinj.test.lua b/test/replication/qsync_errinj.test.lua
index 82abf7999..fe8bb4387 100644
--- a/test/replication/qsync_errinj.test.lua
+++ b/test/replication/qsync_errinj.test.lua
@@ -52,6 +52,31 @@ box.error.injection.set('ERRINJ_WAL_IO', false)
test_run:cmd('restart server replica')
box.space.sync:select{12}
+--
+-- gh-5147: at local WAL write fail limbo entries should be
+-- deleted from the end of the limbo, not from the beginning.
+-- Otherwise it should crash.
+--
+test_run:switch('default')
+fiber = require('fiber')
+box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000}
+box.error.injection.set("ERRINJ_WAL_DELAY", true)
+ok1, err1 = nil
+f1 = fiber.create(function() \
+ ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13}) \
+end)
+box.error.injection.set("ERRINJ_WAL_IO", true)
+box.space.sync:replace({14})
+box.error.injection.set("ERRINJ_WAL_IO", false)
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+box.cfg{replication_synchro_quorum = 2}
+box.space.sync:replace({15})
+test_run:wait_cond(function() return f1:status() == 'dead' end)
+ok1, err1
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+test_run:switch('replica')
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+
test_run:cmd('switch default')
box.cfg{ \
--
2.21.1 (Apple Git-122.3)
More information about the Tarantool-patches
mailing list