From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id D5466272A9 for ; Thu, 7 Feb 2019 12:25:31 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id bZmht8lCDRQz for ; Thu, 7 Feb 2019 12:25:31 -0500 (EST) Received: from smtp16.mail.ru (smtp16.mail.ru [94.100.176.153]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 6D67327278 for ; Thu, 7 Feb 2019 12:25:31 -0500 (EST) From: Georgy Kirichenko Subject: [tarantool-patches] [PATCH v2 1/2] Do not promote wal vclock for failed writes Date: Thu, 7 Feb 2019 20:27:30 +0300 Message-Id: <79a9eeb9870e15a826d7e64f186b8010ad836a64.1549556742.git.georgy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Georgy Kirichenko Wal used to promote vclock prior to write the row. This lead to a situation when master's row would be skipped forever in case there is an error trying to write it. However, some errors are transient, and we might be able to successfully apply the same row later. So we do not promote writer vclock in order to be able to restart replication from failing point. Obsoletes xlog/panic_on_lsn_gap.test. Needed for #2283 --- src/box/wal.c | 24 +- test/box/errinj.result | 23 ++ test/box/errinj.test.lua | 20 ++ test/xlog/errinj.result | 1 - test/xlog/panic_on_lsn_gap.result | 377 ---------------------------- test/xlog/panic_on_lsn_gap.test.lua | 136 ---------- 6 files changed, 61 insertions(+), 520 deletions(-) delete mode 100644 test/xlog/panic_on_lsn_gap.result delete mode 100644 test/xlog/panic_on_lsn_gap.test.lua diff --git a/src/box/wal.c b/src/box/wal.c index 3b50d3629..966f3bfb9 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -901,16 +901,16 @@ wal_writer_begin_rollback(struct wal_writer *writer) } static void -wal_assign_lsn(struct wal_writer *writer, struct xrow_header **row, +wal_assign_lsn(struct vclock *vclock, struct xrow_header **row, struct xrow_header **end) { /** Assign LSN to all local rows. */ for ( ; row < end; row++) { if ((*row)->replica_id == 0) { - (*row)->lsn = vclock_inc(&writer->vclock, instance_id); + (*row)->lsn = vclock_inc(vclock, instance_id); (*row)->replica_id = instance_id; } else { - vclock_follow_xrow(&writer->vclock, *row); + vclock_follow_xrow(vclock, *row); } } } @@ -922,6 +922,16 @@ wal_write_to_disk(struct cmsg *msg) struct wal_msg *wal_msg = (struct wal_msg *) msg; struct error *error; + /* + * In order to not to promote writer's vclock in case of an error we + * create a copy to assign lsn's before rows were actually written. + * After successful xlog flash we update writer vclock to the + * last written vclock value. + */ + struct vclock vclock; + vclock_create(&vclock); + vclock_copy(&vclock, &writer->vclock); + struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL); while (inj != NULL && inj->bparam) usleep(10); @@ -974,14 +984,15 @@ wal_write_to_disk(struct cmsg *msg) struct journal_entry *entry; struct stailq_entry *last_committed = NULL; stailq_foreach_entry(entry, &wal_msg->commit, fifo) { - wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows); - entry->res = vclock_sum(&writer->vclock); + wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows); + entry->res = vclock_sum(&vclock); rc = xlog_write_entry(l, entry); if (rc < 0) goto done; if (rc > 0) { writer->checkpoint_wal_size += rc; last_committed = &entry->fifo; + vclock_copy(&writer->vclock, &vclock); } /* rc == 0: the write is buffered in xlog_tx */ } @@ -991,6 +1002,7 @@ wal_write_to_disk(struct cmsg *msg) writer->checkpoint_wal_size += rc; last_committed = stailq_last(&wal_msg->commit); + vclock_copy(&writer->vclock, &vclock); /* * Notify TX if the checkpoint threshold has been exceeded. @@ -1185,7 +1197,7 @@ wal_write_in_wal_mode_none(struct journal *journal, struct journal_entry *entry) { struct wal_writer *writer = (struct wal_writer *) journal; - wal_assign_lsn(writer, entry->rows, entry->rows + entry->n_rows); + wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows); int64_t old_lsn = vclock_get(&replicaset.vclock, instance_id); int64_t new_lsn = vclock_get(&writer->vclock, instance_id); if (new_lsn > old_lsn) { diff --git a/test/box/errinj.result b/test/box/errinj.result index 12303670e..1d9a16d8d 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -141,6 +141,15 @@ errinj.set("ERRINJ_TESTING", false) --- - ok ... +env = require('test_run') +--- +... +test_run = env.new() +--- +... +lsn1 = box.info.vclock[box.info.id] +--- +... -- Check how well we handle a failed log write errinj.set("ERRINJ_WAL_IO", true) --- @@ -161,6 +170,11 @@ space:insert{1} --- - [1] ... +-- Check vclock was promoted only one time +box.info.vclock[box.info.id] == lsn1 + 1 +--- +- true +... errinj.set("ERRINJ_WAL_IO", true) --- - ok @@ -180,6 +194,15 @@ errinj.set("ERRINJ_WAL_IO", false) --- - ok ... +space:update(1, {{'=', 2, 2}}) +--- +- [1, 2] +... +-- Check vclock was promoted only two times +box.info.vclock[box.info.id] == lsn1 + 2 +--- +- true +... space:truncate() --- ... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 6f491d623..39d73b57e 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -18,11 +18,31 @@ space:insert{1} space:get{1} errinj.set("ERRINJ_WAL_IO", false) space:insert{1} +-- Check vclock was promoted only one time errinj.set("ERRINJ_WAL_IO", true) space:update(1, {{'=', 2, 2}}) space:get{1} space:get{2} errinj.set("ERRINJ_WAL_IO", false) +space:update(1, {{'=', 2, 2}}) +-- Check vclock was promoted only two times +space:truncate() + +lsn1 = box.info.vclock[box.info.id] +-- Check how well we handle a failed log write +errinj.set("ERRINJ_WAL_WRITE_PARTIAL", 0) +space:insert{1} +errinj.set("ERRINJ_WAL_WRITE_PARTIAL", -1) +space:insert{1} +-- Check vclock was promoted only one time +box.info.vclock[box.info.id] == lsn1 + 1 +errinj.set("ERRINJ_WAL_WRITE_PARTIAL", 0) +space:update(1, {{'=', 2, 2}}) +space:get{1} +errinj.set("ERRINJ_WAL_WRITE_PARTIAL", -1) +space:update(1, {{'=', 2, 2}}) +-- Check vclock was promoted only two times +box.info.vclock[box.info.id] == lsn1 + 2 space:truncate() -- Check a failed log rotation diff --git a/test/xlog/errinj.result b/test/xlog/errinj.result index 390404b47..7f15bef35 100644 --- a/test/xlog/errinj.result +++ b/test/xlog/errinj.result @@ -43,7 +43,6 @@ require('fio').glob(name .. "/*.xlog") --- - - xlog/00000000000000000000.xlog - xlog/00000000000000000001.xlog - - xlog/00000000000000000002.xlog ... test_run:cmd('restart server default with cleanup=1') -- gh-881 iproto request with wal IO error diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result deleted file mode 100644 index 4dd1291f8..000000000 --- a/test/xlog/panic_on_lsn_gap.result +++ /dev/null @@ -1,377 +0,0 @@ --- --- we actually need to know what xlogs the server creates, --- so start from a clean state --- --- --- Check how the server is able to find the next --- xlog if there are failed writes (lsn gaps). --- -env = require('test_run') ---- -... -test_run = env.new() ---- -... -test_run:cmd("create server panic with script='xlog/panic.lua'") ---- -- true -... -test_run:cmd("start server panic") ---- -- true -... -test_run:cmd("switch panic") ---- -- true -... -box.info.vclock ---- -- {} -... -s = box.space._schema ---- -... --- we need to have at least one record in the --- xlog otherwise the server believes that there --- is an lsn gap during recovery. --- -s:replace{"key", 'test 1'} ---- -- ['key', 'test 1'] -... -box.info.vclock ---- -- {1: 1} -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -t = {} ---- -... --- --- Try to insert rows, so that it's time to --- switch WALs. No switch will happen though, --- since no writes were made. --- -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... -t ---- -- - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk -... --- --- Before restart: our LSN is 1, because --- LSN is promoted in tx only on successful --- WAL write. --- -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -box.info.vclock ---- -- {1: 1} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog -... -test_run:cmd("restart server panic") --- --- After restart: our LSN is the LSN of the --- last empty WAL created on shutdown, i.e. 11. --- -box.info.vclock ---- -- {1: 11} -... -box.space._schema:select{'key'} ---- -- - ['key', 'test 1'] -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -t = {} ---- -... -s = box.space._schema ---- -... --- --- now do the same --- -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... -t ---- -- - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk - - Failed to write to disk -... -box.info.vclock ---- -- {1: 11} -... -box.error.injection.set("ERRINJ_WAL_WRITE", false) ---- -- ok -... --- --- Write a good row after a series of failed --- rows. There is a gap in LSN, correct, --- but it's *inside* a single WAL, so doesn't --- affect WAL search in recover_remaining_wals() --- -s:replace{'key', 'test 2'} ---- -- ['key', 'test 2'] -... --- --- notice that vclock before and after --- server stop is the same -- because it's --- recorded in the last row --- -box.info.vclock ---- -- {1: 22} -... -test_run:cmd("restart server panic") -box.info.vclock ---- -- {1: 22} -... -box.space._schema:select{'key'} ---- -- - ['key', 'test 2'] -... --- list all the logs -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog -... --- now insert 10 rows - so that the next --- row will need to switch the WAL -test_run:cmd("setopt delimiter ';'") ---- -- true -... -for i=1,box.cfg.rows_per_wal do - box.space._schema:replace{"key", 'test 3'} -end; ---- -... -test_run:cmd("setopt delimiter ''"); ---- -- true -... --- the next insert should switch xlog, but aha - it fails --- a new xlog file is created but has 0 rows -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog -... -box.error.injection.set("ERRINJ_WAL_WRITE", true) ---- -- ok -... -box.space._schema:replace{"key", 'test 3'} ---- -- error: Failed to write to disk -... -box.info.vclock ---- -- {1: 32} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... --- and the next one (just to be sure -box.space._schema:replace{"key", 'test 3'} ---- -- error: Failed to write to disk -... -box.info.vclock ---- -- {1: 32} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... -box.error.injection.set("ERRINJ_WAL_WRITE", false) ---- -- ok -... --- then a success -box.space._schema:replace{"key", 'test 4'} ---- -- ['key', 'test 4'] -... -box.info.vclock ---- -- {1: 35} -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog -... --- restart is ok -test_run:cmd("restart server panic") -box.space._schema:select{'key'} ---- -- - ['key', 'test 4'] -... --- --- Check that if there's an LSN gap between two WALs --- that appeared due to a disk error and no files is --- actually missing, we won't panic on recovery. --- -box.space._schema:replace{'key', 'test 4'} -- creates new WAL ---- -- ['key', 'test 4'] -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) ---- -- ok -... -box.space._schema:replace{'key', 'test 5'} -- fails, makes gap ---- -- error: Failed to write to disk -... -box.snapshot() -- fails, rotates WAL ---- -- error: Error injection 'xlog write injection' -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false) ---- -- ok -... -box.space._schema:replace{'key', 'test 5'} -- creates new WAL ---- -- ['key', 'test 5'] -... -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) ---- -- ok -... -box.space._schema:replace{'key', 'test 6'} -- fails, makes gap ---- -- error: Failed to write to disk -... -box.snapshot() -- fails, rotates WAL ---- -- error: Error injection 'xlog write injection' -... -box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL ---- -- error: Failed to write to disk -... -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog - - panic/00000000000000000035.xlog - - panic/00000000000000000037.xlog - - panic/00000000000000000039.xlog -... -test_run:cmd("restart server panic") -box.space._schema:select{'key'} ---- -- - ['key', 'test 5'] -... --- Check that we don't create a WAL in the gap between the last two. -box.space._schema:replace{'key', 'test 6'} ---- -- ['key', 'test 6'] -... -name = string.match(arg[0], "([^,]+)%.lua") ---- -... -require('fio').glob(name .. "/*.xlog") ---- -- - panic/00000000000000000000.xlog - - panic/00000000000000000011.xlog - - panic/00000000000000000022.xlog - - panic/00000000000000000032.xlog - - panic/00000000000000000035.xlog - - panic/00000000000000000037.xlog - - panic/00000000000000000039.xlog - - panic/00000000000000000040.xlog -... -test_run:cmd('switch default') ---- -- true -... -test_run:cmd("stop server panic") ---- -- true -... -test_run:cmd("cleanup server panic") ---- -- true -... diff --git a/test/xlog/panic_on_lsn_gap.test.lua b/test/xlog/panic_on_lsn_gap.test.lua deleted file mode 100644 index 6221261a7..000000000 --- a/test/xlog/panic_on_lsn_gap.test.lua +++ /dev/null @@ -1,136 +0,0 @@ --- --- we actually need to know what xlogs the server creates, --- so start from a clean state --- --- --- Check how the server is able to find the next --- xlog if there are failed writes (lsn gaps). --- -env = require('test_run') -test_run = env.new() -test_run:cmd("create server panic with script='xlog/panic.lua'") -test_run:cmd("start server panic") -test_run:cmd("switch panic") -box.info.vclock -s = box.space._schema --- we need to have at least one record in the --- xlog otherwise the server believes that there --- is an lsn gap during recovery. --- -s:replace{"key", 'test 1'} -box.info.vclock -box.error.injection.set("ERRINJ_WAL_WRITE", true) -t = {} --- --- Try to insert rows, so that it's time to --- switch WALs. No switch will happen though, --- since no writes were made. --- -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; -test_run:cmd("setopt delimiter ''"); -t --- --- Before restart: our LSN is 1, because --- LSN is promoted in tx only on successful --- WAL write. --- -name = string.match(arg[0], "([^,]+)%.lua") -box.info.vclock -require('fio').glob(name .. "/*.xlog") -test_run:cmd("restart server panic") --- --- After restart: our LSN is the LSN of the --- last empty WAL created on shutdown, i.e. 11. --- -box.info.vclock -box.space._schema:select{'key'} -box.error.injection.set("ERRINJ_WAL_WRITE", true) -t = {} -s = box.space._schema --- --- now do the same --- -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - status, msg = pcall(s.replace, s, {"key"}) - table.insert(t, msg) -end; -test_run:cmd("setopt delimiter ''"); -t -box.info.vclock -box.error.injection.set("ERRINJ_WAL_WRITE", false) --- --- Write a good row after a series of failed --- rows. There is a gap in LSN, correct, --- but it's *inside* a single WAL, so doesn't --- affect WAL search in recover_remaining_wals() --- -s:replace{'key', 'test 2'} --- --- notice that vclock before and after --- server stop is the same -- because it's --- recorded in the last row --- -box.info.vclock -test_run:cmd("restart server panic") -box.info.vclock -box.space._schema:select{'key'} --- list all the logs -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") --- now insert 10 rows - so that the next --- row will need to switch the WAL -test_run:cmd("setopt delimiter ';'") -for i=1,box.cfg.rows_per_wal do - box.space._schema:replace{"key", 'test 3'} -end; -test_run:cmd("setopt delimiter ''"); --- the next insert should switch xlog, but aha - it fails --- a new xlog file is created but has 0 rows -require('fio').glob(name .. "/*.xlog") -box.error.injection.set("ERRINJ_WAL_WRITE", true) -box.space._schema:replace{"key", 'test 3'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") --- and the next one (just to be sure -box.space._schema:replace{"key", 'test 3'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") -box.error.injection.set("ERRINJ_WAL_WRITE", false) --- then a success -box.space._schema:replace{"key", 'test 4'} -box.info.vclock -require('fio').glob(name .. "/*.xlog") --- restart is ok -test_run:cmd("restart server panic") -box.space._schema:select{'key'} --- --- Check that if there's an LSN gap between two WALs --- that appeared due to a disk error and no files is --- actually missing, we won't panic on recovery. --- -box.space._schema:replace{'key', 'test 4'} -- creates new WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) -box.space._schema:replace{'key', 'test 5'} -- fails, makes gap -box.snapshot() -- fails, rotates WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false) -box.space._schema:replace{'key', 'test 5'} -- creates new WAL -box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) -box.space._schema:replace{'key', 'test 6'} -- fails, makes gap -box.snapshot() -- fails, rotates WAL -box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") -test_run:cmd("restart server panic") -box.space._schema:select{'key'} --- Check that we don't create a WAL in the gap between the last two. -box.space._schema:replace{'key', 'test 6'} -name = string.match(arg[0], "([^,]+)%.lua") -require('fio').glob(name .. "/*.xlog") -test_run:cmd('switch default') -test_run:cmd("stop server panic") -test_run:cmd("cleanup server panic") -- 2.20.1