[Tarantool-patches] [PATCH] vinyl: rotate mem during index build on demand

Nikita Pettik korablev at tarantool.org
Thu Jun 4 20:49:03 MSK 2020


Meanwhile in 17f6af7dc the similar problem has been fixed, still it may
appear that in-memory level of secondary index being constructed has
lagging memory generation (in other words, it's values is less than
the value of current global generation). Such situation can be achieved
if yield which is required to check uniqueness of value being inserted
is too long. In this time gap other space may trigger dump process
bumping global memory generation counter, but dump itself is still not
yet scheduled.

So to get rid of generations mismatch, let's rotate in-memory level
after yield on demand.

Closes #5042
---
Branch: https://github.com/tarantool/tarantool/tree/np/gh-5042-rotate-mem-after-yield
Issue: https://github.com/tarantool/tarantool/issues/5042

@ChangeLog:
 * Fix crash due to triggered dump process during secondary index creation (gh-5042).

 src/box/vinyl.c                               |  14 ++
 src/errinj.h                                  |   1 +
 test/box/errinj.result                        |   1 +
 .../gh-5042-dump-during-index-build-2.result  | 189 ++++++++++++++++++
 ...gh-5042-dump-during-index-build-2.test.lua |  98 +++++++++
 test/vinyl/suite.ini                          |   2 +-
 6 files changed, 304 insertions(+), 1 deletion(-)
 create mode 100644 test/vinyl/gh-5042-dump-during-index-build-2.result
 create mode 100644 test/vinyl/gh-5042-dump-during-index-build-2.test.lua

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index a90b786a7..19e37947a 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -4083,6 +4083,14 @@ vy_build_insert_tuple(struct vy_env *env, struct vy_lsm *lsm,
 	vy_mem_pin(mem);
 	rc = vy_check_is_unique_secondary(NULL, &env->xm->p_committed_read_view,
 					  space_name, index_name, lsm, stmt);
+#ifndef NDEBUG
+	struct errinj *inj = errinj(ERRINJ_BUILD_UNIQUE_IDX_DELAY, ERRINJ_BOOL);
+	if (inj != NULL) {
+		while (inj->bparam) {
+			fiber_sleep(0.01);
+		}
+	}
+#endif
 	vy_mem_unpin(mem);
 	if (rc != 0) {
 		tuple_unref(stmt);
@@ -4097,6 +4105,12 @@ vy_build_insert_tuple(struct vy_env *env, struct vy_lsm *lsm,
 	 * Hence, to get right mem (during mem rotation it becomes
 	 * sealed i.e. read-only) we should fetch it from lsm again.
 	 */
+	if (lsm->mem->generation != *lsm->env->p_generation) {
+		if (vy_lsm_rotate_mem(lsm) != 0) {
+			tuple_unref(stmt);
+			return -1;
+		}
+	}
 	mem = lsm->mem;
 
 	/* Insert the new tuple into the in-memory index. */
diff --git a/src/errinj.h b/src/errinj.h
index 92bff5e4d..1adbb9e05 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -131,6 +131,7 @@ struct errinj {
 	_(ERRINJ_VY_READ_VIEW_MERGE_FAIL, ERRINJ_BOOL, {.bparam = false})\
 	_(ERRINJ_VY_WRITE_ITERATOR_START_FAIL, ERRINJ_BOOL, {.bparam = false})\
 	_(ERRINJ_VY_RUN_OPEN, ERRINJ_INT, {.iparam = -1})\
+	_(ERRINJ_BUILD_UNIQUE_IDX_DELAY, ERRINJ_BOOL, {.bparam = false})\
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 3f075a2d0..cf1e81072 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -33,6 +33,7 @@ end
 evals
 ---
 - - ERRINJ_BUILD_INDEX: -1
+  - ERRINJ_BUILD_UNIQUE_IDX_DELAY: false
   - ERRINJ_DYN_MODULE_COUNT: 0
   - ERRINJ_HTTPC_EXECUTE: false
   - ERRINJ_HTTP_RESPONSE_ADD_WAIT: false
diff --git a/test/vinyl/gh-5042-dump-during-index-build-2.result b/test/vinyl/gh-5042-dump-during-index-build-2.result
new file mode 100644
index 000000000..a4fbc56ec
--- /dev/null
+++ b/test/vinyl/gh-5042-dump-during-index-build-2.result
@@ -0,0 +1,189 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+test_run:cmd("create server test with script='vinyl/low_quota.lua'")
+ | ---
+ | - true
+ | ...
+test_run:cmd("start server test with args='13421772'")
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch test')
+ | ---
+ | - true
+ | ...
+
+fiber = require 'fiber'
+ | ---
+ | ...
+errinj = box.error.injection
+ | ---
+ | ...
+
+--
+-- Test is quite similar to gh-4810, but data is inserted to
+-- the space different from one which builds the new index.
+-- Consider following scenario:
+-- 1. during building secondary index, we yield to scan disk
+-- in order to check tuple uniqueness;
+-- 2. insertion to another space bumps memory generation (owing to
+-- exceeding memory limit), but dump itself is not yet scheduled
+-- (e.g. scheduler fiber does not get time to execute, or it fails
+-- due for whatever reason);
+-- 3. we return from yield point and now generation of mem does not
+-- match with global one (which was bumped during second step). It may
+-- lead to unpredictable behavior (starting from fired assertions in
+-- debug mode to segfaults or lost data in release).
+-- To equalize memory generations, we must rotate mem of corresponding
+-- LSM tree before processing further operations with it.
+--
+s1 = box.schema.space.create('insert_to', {engine = 'vinyl'})
+ | ---
+ | ...
+_ = s1:create_index('pk', {parts = {1, 'unsigned'}})
+ | ---
+ | ...
+
+s2 = box.schema.space.create('build_index', {engine = 'vinyl'})
+ | ---
+ | ...
+_ = s2:create_index('pk', {parts = {1, 'unsigned'}})
+ | ---
+ | ...
+
+val = 0
+ | ---
+ | ...
+PADDING = string.rep('x', 100)
+ | ---
+ | ...
+
+test_run:cmd("setopt delimiter ';'")
+ | ---
+ | - true
+ | ...
+
+function gen_insert(space)
+    pcall(space.insert, space, {val, val, val, val, PADDING})
+    val = val + 1
+end;
+ | ---
+ | ...
+
+function fill_L0_without_dump(space, watermark)
+    while box.stat.vinyl().memory.level0 < watermark do
+        gen_insert(space)
+    end
+end;
+ | ---
+ | ...
+
+function insert_loop()
+    while not stop do
+        gen_insert(s1)
+    end
+    ch:put(true)
+end;
+ | ---
+ | ...
+
+function idx_build()
+    _ = s2:create_index('i1', {unique = true, parts = {2, 'unsigned', 3, 'unsigned'}})
+    ch:put(true)
+end;
+ | ---
+ | ...
+
+dump_watermark = box.cfg.vinyl_memory / 2;
+ | ---
+ | ...
+fill_L0_without_dump(s1, dump_watermark / 2);
+ | ---
+ | ...
+fill_L0_without_dump(s2, dump_watermark);
+ | ---
+ | ...
+
+box.snapshot();
+ | ---
+ | - ok
+ | ...
+
+fill_L0_without_dump(s1, dump_watermark / 2 * 3);
+ | ---
+ | ...
+
+function idx_build()
+    errinj.set("ERRINJ_BUILD_UNIQUE_IDX_DELAY", true)
+    _ = s2:create_index('i1', {unique = true, parts = {2, 'unsigned', 3, 'unsigned'}})
+    ch:put(true)
+end;
+ | ---
+ | ...
+
+stop = false;
+ | ---
+ | ...
+ch = fiber.channel(2);
+ | ---
+ | ...
+
+_ = fiber.create(idx_build);
+ | ---
+ | ...
+_ = fiber.create(insert_loop, s1);
+ | ---
+ | ...
+errinj.set("ERRINJ_VY_INDEX_DUMP", 1);
+ | ---
+ | - ok
+ | ...
+
+fiber.sleep(2);
+ | ---
+ | ...
+errinj.set("ERRINJ_BUILD_UNIQUE_IDX_DELAY", false);
+ | ---
+ | - ok
+ | ...
+errinj.set("ERRINJ_VY_INDEX_DUMP", -1);
+ | ---
+ | - ok
+ | ...
+
+stop = true;
+ | ---
+ | ...
+for i = 1, ch:size() do
+    ch:get()
+end;
+ | ---
+ | ...
+
+s1:drop();
+ | ---
+ | ...
+s2:drop();
+ | ---
+ | ...
+
+test_run:cmd("setopt delimiter ''");
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd("stop server test")
+ | ---
+ | - true
+ | ...
+test_run:cmd("cleanup server test")
+ | ---
+ | - true
+ | ...
diff --git a/test/vinyl/gh-5042-dump-during-index-build-2.test.lua b/test/vinyl/gh-5042-dump-during-index-build-2.test.lua
new file mode 100644
index 000000000..16cdfa668
--- /dev/null
+++ b/test/vinyl/gh-5042-dump-during-index-build-2.test.lua
@@ -0,0 +1,98 @@
+test_run = require('test_run').new()
+
+test_run:cmd("create server test with script='vinyl/low_quota.lua'")
+test_run:cmd("start server test with args='13421772'")
+test_run:cmd('switch test')
+
+fiber = require 'fiber'
+errinj = box.error.injection
+
+--
+-- Test is quite similar to gh-4810, but data is inserted to
+-- the space different from one which builds the new index.
+-- Consider following scenario:
+-- 1. during building secondary index, we yield to scan disk
+-- in order to check tuple uniqueness;
+-- 2. insertion to another space bumps memory generation (owing to
+-- exceeding memory limit), but dump itself is not yet scheduled
+-- (e.g. scheduler fiber does not get time to execute, or it fails
+-- due for whatever reason);
+-- 3. we return from yield point and now generation of mem does not
+-- match with global one (which was bumped during second step). It may
+-- lead to unpredictable behavior (starting from fired assertions in
+-- debug mode to segfaults or lost data in release).
+-- To equalize memory generations, we must rotate mem of corresponding
+-- LSM tree before processing further operations with it.
+--
+s1 = box.schema.space.create('insert_to', {engine = 'vinyl'})
+_ = s1:create_index('pk', {parts = {1, 'unsigned'}})
+
+s2 = box.schema.space.create('build_index', {engine = 'vinyl'})
+_ = s2:create_index('pk', {parts = {1, 'unsigned'}})
+
+val = 0
+PADDING = string.rep('x', 100)
+
+test_run:cmd("setopt delimiter ';'")
+
+function gen_insert(space)
+    pcall(space.insert, space, {val, val, val, val, PADDING})
+    val = val + 1
+end;
+
+function fill_L0_without_dump(space, watermark)
+    while box.stat.vinyl().memory.level0 < watermark do
+        gen_insert(space)
+    end
+end;
+
+function insert_loop()
+    while not stop do
+        gen_insert(s1)
+    end
+    ch:put(true)
+end;
+
+function idx_build()
+    _ = s2:create_index('i1', {unique = true, parts = {2, 'unsigned', 3, 'unsigned'}})
+    ch:put(true)
+end;
+
+dump_watermark = box.cfg.vinyl_memory / 2;
+fill_L0_without_dump(s1, dump_watermark / 2);
+fill_L0_without_dump(s2, dump_watermark);
+
+box.snapshot();
+
+fill_L0_without_dump(s1, dump_watermark / 2 * 3);
+
+function idx_build()
+    errinj.set("ERRINJ_BUILD_UNIQUE_IDX_DELAY", true)
+    _ = s2:create_index('i1', {unique = true, parts = {2, 'unsigned', 3, 'unsigned'}})
+    ch:put(true)
+end;
+
+stop = false;
+ch = fiber.channel(2);
+
+_ = fiber.create(idx_build);
+_ = fiber.create(insert_loop, s1);
+errinj.set("ERRINJ_VY_INDEX_DUMP", 1);
+
+fiber.sleep(2);
+errinj.set("ERRINJ_BUILD_UNIQUE_IDX_DELAY", false);
+errinj.set("ERRINJ_VY_INDEX_DUMP", -1);
+
+stop = true;
+for i = 1, ch:size() do
+    ch:get()
+end;
+
+s1:drop();
+s2:drop();
+
+test_run:cmd("setopt delimiter ''");
+
+test_run:cmd('switch default')
+test_run:cmd("stop server test")
+test_run:cmd("cleanup server test")
diff --git a/test/vinyl/suite.ini b/test/vinyl/suite.ini
index 4de29fccf..7f2a2896f 100644
--- a/test/vinyl/suite.ini
+++ b/test/vinyl/suite.ini
@@ -2,7 +2,7 @@
 core = tarantool
 description = vinyl integration tests
 script = vinyl.lua
-release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua gh-4864-stmt-alloc-fail-compact.test.lua gh-4805-open-run-err-recovery.test.lua
+release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua gh-4864-stmt-alloc-fail-compact.test.lua gh-4805-open-run-err-recovery.test.lua gh-5042-dump-during-index-build-2.test.lua
 config = suite.cfg
 lua_libs = suite.lua stress.lua large.lua txn_proxy.lua ../box/lua/utils.lua
 use_unix_sockets = True
-- 
2.17.1



More information about the Tarantool-patches mailing list