From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id F34C521572 for ; Wed, 6 Feb 2019 03:27:59 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id XGOhpTloEpq6 for ; Wed, 6 Feb 2019 03:27:59 -0500 (EST) Received: from smtp18.mail.ru (smtp18.mail.ru [94.100.176.155]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 55922215F9 for ; Wed, 6 Feb 2019 03:27:59 -0500 (EST) From: Georgy Kirichenko Subject: [tarantool-patches] [PATCH 1/3] Do not promote wal vclock for failed writes Date: Wed, 6 Feb 2019 11:29:57 +0300 Message-Id: <10dda7d827c22bb8af33de31646f5ad34576f760.1549441084.git.georgy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Georgy Kirichenko Increase replica lsn only if row was successfully written to disk. This prevents wal from lsn gaps in case of IO errors. Obsoletes xlog/panic_on_lsn_gap.test. Needs for #2283 --- src/box/wal.c | 19 +- test/box/errinj.result | 23 ++ test/box/errinj.test.lua | 8 + test/xlog/errinj.result | 1 - test/xlog/panic_on_lsn_gap.result | 377 ---------------------------- test/xlog/panic_on_lsn_gap.test.lua | 136 ---------- 6 files changed, 44 insertions(+), 520 deletions(-) delete mode 100644 test/xlog/panic_on_lsn_gap.result delete mode 100644 test/xlog/panic_on_lsn_gap.test.lua diff --git a/src/box/wal.c b/src/box/wal.c index 3b50d3629..a55b544aa 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -901,16 +901,16 @@ wal_writer_begin_rollback(struct wal_writer *writer) } static void -wal_assign_lsn(struct wal_writer *writer, struct xrow_header **row, +wal_assign_lsn(struct vclock *vclock, struct xrow_header **row, struct xrow_header **end) { /** Assign LSN to all local rows. */ for ( ; row < end; row++) { if ((*row)->replica_id == 0) { - (*row)->lsn = vclock_inc(&writer->vclock, instance_id); + (*row)->lsn = vclock_inc(vclock, instance_id); (*row)->replica_id = instance_id; } else { - vclock_follow_xrow(&writer->vclock, *row); + vclock_follow_xrow(vclock, *row); } } } @@ -922,6 +922,11 @@ wal_write_to_disk(struct cmsg *msg) struct wal_msg *wal_msg = (struct wal_msg *) msg; struct error *error; + /* Local vclock copy. */ + struct vclock vclock; + vclock_create(&vclock); + vclock_copy(&vclock, &writer->vclock); + struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL); while (inj != NULL && inj->bparam) usleep(10); @@ -974,14 +979,15 @@ wal_write_to_disk(struct cmsg *msg) struct journal_entry *entry; struct stailq_entry *last_committed = NULL; stailq_foreach_entry(entry, &wal_msg->commit, fifo) { - wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows); - entry->res = vclock_sum(&writer->vclock); + wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows); + entry->res = vclock_sum(&vclock); rc = xlog_write_entry(l, entry); if (rc < 0) goto done; if (rc > 0) { writer->checkpoint_wal_size += rc; last_committed = &entry->fifo; + vclock_copy(&writer->vclock, &vclock); } /* rc == 0: the write is buffered in xlog_tx */ } @@ -991,6 +997,7 @@ wal_write_to_disk(struct cmsg *msg) writer->checkpoint_wal_size += rc; last_committed = stailq_last(&wal_msg->commit); + vclock_copy(&writer->vclock, &vclock); /* * Notify TX if the checkpoint threshold has been exceeded. @@ -1185,7 +1192,7 @@ wal_write_in_wal_mode_none(struct journal *journal, struct journal_entry *entry) { struct wal_writer *writer = (struct wal_writer *) journal; - wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows); + wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows); int64_t old_lsn = vclock_get(&replicaset.vclock, instance_id); int64_t new_lsn = vclock_get(&writer->vclock, instance_id); if (new_lsn > old_lsn) { diff --git a/test/box/errinj.result b/test/box/errinj.result index 12303670e..1d9a16d8d 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -141,6 +141,15 @@ errinj.set("ERRINJ_TESTING", false) --- - ok ... +env = require('test_run') +--- +... +test_run = env.new() +--- +... +lsn1 = box.info.vclock[box.info.id] +--- +... -- Check how well we handle a failed log write errinj.set("ERRINJ_WAL_IO", true) --- @@ -161,6 +170,11 @@ space:insert{1} --- - [1] ... +-- Check vclock was promoted only one time +box.info.vclock[box.info.id] == lsn1 + 1 +--- +- true +... errinj.set("ERRINJ_WAL_IO", true) --- - ok @@ -180,6 +194,15 @@ errinj.set("ERRINJ_WAL_IO", false) --- - ok ... +space:update(1, {{'=', 2, 2}}) +--- +- [1, 2] +... +-- Check vclock was promoted only two times +box.info.vclock[box.info.id] == lsn1 + 2 +--- +- true +... space:truncate() --- ... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 6f491d623..566630f67 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -12,17 +12,25 @@ errinj.set("ERRINJ_TESTING", true) space:select{222444} errinj.set("ERRINJ_TESTING", false) +env = require('test_run') +test_run = env.new() +lsn1 = box.info.vclock[box.info.id] -- Check how well we handle a failed log write errinj.set("ERRINJ_WAL_IO", true) space:insert{1} space:get{1} errinj.set("ERRINJ_WAL_IO", false) space:insert{1} +-- Check vclock was promoted only one time +box.info.vclock[box.info.id] == lsn1 + 1 errinj.set("ERRINJ_WAL_IO", true) space:update(1, {{'=', 2, 2}}) space:get{1} space:get{2} errinj.set("ERRINJ_WAL_IO", false) +space:update(1, {{'=', 2, 2}}) +-- Check vclock was promoted only two times +box.info.vclock[box.info.id] == lsn1 + 2 space:truncate() -- Check a failed log rotation diff --git a/test/xlog/errinj.result b/test/xlog/errinj.result index 390404b47..7f15bef35 100644 --- a/test/xlog/errinj.result +++ b/test/xlog/errinj.result @@ -43,7 +43,6 @@ require('fio').glob(name .. "/*.xlog") --- - - xlog/00000000000000000000.xlog - xlog/00000000000000000001.xlog - - xlog/00000000000000000002.xlog ... test_run:cmd('restart server default with cleanup=1') -- gh-881 iproto request with wal IO error diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result deleted file mode 100644 index 4dd1291f8..000000000 --- a/test/xlog/panic_on_lsn_gap.result +++ /dev/null @@ -1,377 +0,0 @@ --- --- we actually need to know what xlogs the server creates, --- so start from a clean state --- --- --- Check how the server is able to find the next --- xlog if there are failed writes (lsn gaps). --- -env = require('test_run') ---- -... -test_run = env.new() ---- -... -test_run:cmd("create server panic with script='xlog/panic.lua'") ---- -- true -... -test_run:cmd("start server panic") ---- -- true -... -test_run:cmd("switch panic") ---- -- true -... -box.info.vclock ---- -- {} -... -s = box.space._schema ---- -... --- we need to have at least one record in the --- xlog otherwise the server believes that there --- is an lsn gap during recovery. --- -s:replace{"key", 'test 1'} ---- -- ['key', 'test 1'] -... -box.info.vclock ---- -- {1: 1} -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -t = {} ---- -... --- --- Try to insert rows, so that it's time to --- switch WALs. No switch will happen though, --- since no writes were made. --- -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... -t ---- -- - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk -... --- --- Before restart: our LSN is 1, because --- LSN is promoted in tx only on successful --- WAL write. --- -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -box.info.vclock ---- -- {1: 1} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog -... -test_run:cmd("restart server panic") --- --- After restart: our LSN is the LSN of the --- last empty WAL created on shutdown, i.e. 11. --- -box.info.vclock ---- -- {1: 11} -... -box.space._schema:select{'key'} ---- -- - ['key', 'test 1'] -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -t = {} ---- -... -s = box.space._schema ---- -... --- --- now do the same --- -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... -t ---- -- - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk -... -box.info.vclock ---- -- {1: 11} -... -box.error.injection.set("ERRINJ_WAL_WRITE", false) ---- -- ok -... --- --- Write a good row after a series of failed --- rows. There is a gap in LSN, correct, --- but it's *inside* a single WAL, so doesn't --- affect WAL search in recover_remaining_wals() --- -s:replace{'key', 'test 2'} ---- -- ['key', 'test 2'] -... --- --- notice that vclock before and after --- server stop is the same -- because it's --- recorded in the last row --- -box.info.vclock ---- -- {1: 22} -... -test_run:cmd("restart server panic") -box.info.vclock ---- -- {1: 22} -... -box.space._schema:select{'key'} ---- -- - ['key', 'test 2'] -... --- list all the logs -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog -... --- now insert 10 rows - so that the next --- row will need to switch the WAL -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - box.space._schema:replace{"key", 'test 3'} -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... --- the next insert should switch xlog, but aha - it fails --- a new xlog file is created but has 0 rows -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -box.space._schema:replace{"key", 'test 3'} ---- -- error: Failed to write to disk -... -box.info.vclock ---- -- {1: 32} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... --- and the next one (just to be sure -box.space._schema:replace{"key", 'test 3'} ---- -- error: Failed to write to disk -... -box.info.vclock ---- -- {1: 32} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... -box.error.injection.set("ERRINJ_WAL_WRITE", false) ---- -- ok -... --- then a success -box.space._schema:replace{"key", 'test 4'} ---- -- ['key', 'test 4'] -... -box.info.vclock ---- -- {1: 35} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... --- restart is ok -test_run:cmd("restart server panic") -box.space._schema:select{'key'} ---- -- - ['key', 'test 4'] -... --- --- Check that if there's an LSN gap between two WALs --- that appeared due to a disk error and no files is --- actually missing, we won't panic on recovery. --- -box.space._schema:replace{'key', 'test 4'} -- creates new WAL ---- -- ['key', 'test 4'] -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) ---- -- ok -... -box.space._schema:replace{'key', 'test 5'} -- fails, makes gap ---- -- error: Failed to write to disk -... -box.snapshot() -- fails, rotates WAL ---- -- error: Error injection 'xlog write injection' -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false) ---- -- ok -... -box.space._schema:replace{'key', 'test 5'} -- creates new WAL ---- -- ['key', 'test 5'] -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) ---- -- ok -... -box.space._schema:replace{'key', 'test 6'} -- fails, makes gap ---- -- error: Failed to write to disk -... -box.snapshot() -- fails, rotates WAL ---- -- error: Error injection 'xlog write injection' -... -box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL ---- -- error: Failed to write to disk -... -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog - - panic/00000000000000000035.xlog - - panic/00000000000000000037.xlog - - panic/00000000000000000039.xlog -... -test_run:cmd("restart server panic") -box.space._schema:select{'key'} ---- -- - ['key', 'test 5'] -... --- Check that we don't create a WAL in the gap between the last two. -box.space._schema:replace{'key', 'test 6'} ---- -- ['key', 'test 6'] -... -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog - - panic/00000000000000000035.xlog - - panic/00000000000000000037.xlog - - panic/00000000000000000039.xlog - - panic/00000000000000000040.xlog -... -test_run:cmd('switch default') ---- -- true -... -test_run:cmd("stop server panic") ---- -- true -... -test_run:cmd("cleanup server panic") ---- -- true -... diff --git a/test/xlog/panic_on_lsn_gap.test.lua b/test/xlog/panic_on_lsn_gap.test.lua deleted file mode 100644 index 6221261a7..000000000 --- a/test/xlog/panic_on_lsn_gap.test.lua +++ /dev/null @@ -1,136 +0,0 @@ --- --- we actually need to know what xlogs the server creates, --- so start from a clean state --- --- --- Check how the server is able to find the next --- xlog if there are failed writes (lsn gaps). --- -env = require('test_run') -test_run = env.new() -test_run:cmd("create server panic with script='xlog/panic.lua'") -test_run:cmd("start server panic") -test_run:cmd("switch panic") -box.info.vclock -s = box.space._schema --- we need to have at least one record in the --- xlog otherwise the server believes that there --- is an lsn gap during recovery. --- -s:replace{"key", 'test 1'} -box.info.vclock -box.error.injection.set("ERRINJ_WAL_WRITE", true) -t = {} --- --- Try to insert rows, so that it's time to --- switch WALs. No switch will happen though, --- since no writes were made. --- -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; -test_run:cmd("setopt delimiter ''"); -t --- --- Before restart: our LSN is 1, because --- LSN is promoted in tx only on successful --- WAL write. --- -name = string.match(arg[0], "([^,]+)%.lua") -box.info.vclock -require('fio').glob(name .. "/*.xlog") -test_run:cmd("restart server panic") --- --- After restart: our LSN is the LSN of the --- last empty WAL created on shutdown, i.e. 11. --- -box.info.vclock -box.space._schema:select{'key'} -box.error.injection.set("ERRINJ_WAL_WRITE", true) -t = {} -s = box.space._schema --- --- now do the same --- -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; -test_run:cmd("setopt delimiter ''"); -t -box.info.vclock -box.error.injection.set("ERRINJ_WAL_WRITE", false) --- --- Write a good row after a series of failed --- rows. There is a gap in LSN, correct, --- but it's *inside* a single WAL, so doesn't --- affect WAL search in recover_remaining_wals() --- -s:replace{'key', 'test 2'} --- --- notice that vclock before and after --- server stop is the same -- because it's --- recorded in the last row --- -box.info.vclock -test_run:cmd("restart server panic") -box.info.vclock -box.space._schema:select{'key'} --- list all the logs -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") --- now insert 10 rows - so that the next --- row will need to switch the WAL -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - box.space._schema:replace{"key", 'test 3'} -end; -test_run:cmd("setopt delimiter ''"); --- the next insert should switch xlog, but aha - it fails --- a new xlog file is created but has 0 rows -require('fio').glob(name .. "/*.xlog") -box.error.injection.set("ERRINJ_WAL_WRITE", true) -box.space._schema:replace{"key", 'test 3'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") --- and the next one (just to be sure -box.space._schema:replace{"key", 'test 3'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") -box.error.injection.set("ERRINJ_WAL_WRITE", false) --- then a success -box.space._schema:replace{"key", 'test 4'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") --- restart is ok -test_run:cmd("restart server panic") -box.space._schema:select{'key'} --- --- Check that if there's an LSN gap between two WALs --- that appeared due to a disk error and no files is --- actually missing, we won't panic on recovery. --- -box.space._schema:replace{'key', 'test 4'} -- creates new WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) -box.space._schema:replace{'key', 'test 5'} -- fails, makes gap -box.snapshot() -- fails, rotates WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false) -box.space._schema:replace{'key', 'test 5'} -- creates new WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) -box.space._schema:replace{'key', 'test 6'} -- fails, makes gap -box.snapshot() -- fails, rotates WAL -box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") -test_run:cmd("restart server panic") -box.space._schema:select{'key'} --- Check that we don't create a WAL in the gap between the last two. -box.space._schema:replace{'key', 'test 6'} -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") -test_run:cmd('switch default') -test_run:cmd("stop server panic") -test_run:cmd("cleanup server panic") -- 2.20.1