* [PATCH 1/4] vinyl: do not reuse lsm objects during recovery from vylog
2018-05-23 16:10 [PATCH 0/4] vinyl: fix index drop vs compaction race Vladimir Davydov
@ 2018-05-23 16:10 ` Vladimir Davydov
2018-05-23 16:10 ` [PATCH 2/4] alter: pass lsn of index drop record to engine Vladimir Davydov
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Vladimir Davydov @ 2018-05-23 16:10 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
If an index was dropped and then recreated, then while replaying vylog
we will reuse vy_lsm_recovery_info object corresponding to it. There's
no reason why we do that instead of simply allocating a new object -
amount of memory saved is negligible, but the code looks more complex.
Let's simplify the code - whenever we see VY_LOG_CREATE_LSM, create a
new vy_lsm_recovery_info object and replace the old incarnation if any
in the hash map.
---
src/box/vy_log.c | 189 +++++++++++++++++++++++------------------------
test/vinyl/layout.result | 4 +-
2 files changed, 96 insertions(+), 97 deletions(-)
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 172a1b01..d3c2bf6c 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -1130,6 +1130,31 @@ vy_recovery_index_id_hash(uint32_t space_id, uint32_t index_id)
return ((uint64_t)space_id << 32) + index_id;
}
+/**
+ * Insert an LSM tree into vy_recovery::index_id_hash.
+ *
+ * If there is an LSM tree with the same space_id/index_id
+ * present in the hash table, it will be silently replaced.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int
+vy_recovery_hash_index_id(struct vy_recovery *recovery,
+ struct vy_lsm_recovery_info *lsm)
+{
+ struct mh_i64ptr_t *h = recovery->index_id_hash;
+ struct mh_i64ptr_node_t node;
+
+ node.key = vy_recovery_index_id_hash(lsm->space_id, lsm->index_id);
+ node.val = lsm;
+
+ if (mh_i64ptr_put(h, &node, NULL, NULL) == mh_end(h)) {
+ diag_set(OutOfMemory, 0, "mh_i64ptr_put", "mh_i64ptr_node_t");
+ return -1;
+ }
+ return 0;
+}
+
/** Lookup an LSM tree in vy_recovery::index_id_hash map. */
struct vy_lsm_recovery_info *
vy_recovery_lsm_by_index_id(struct vy_recovery *recovery,
@@ -1188,6 +1213,53 @@ vy_recovery_lookup_slice(struct vy_recovery *recovery, int64_t slice_id)
}
/**
+ * Allocate a new LSM tree with the given ID and add it to
+ * the recovery context.
+ *
+ * Returns the new LSM tree on success, NULL on error.
+ */
+static struct vy_lsm_recovery_info *
+vy_recovery_do_create_lsm(struct vy_recovery *recovery, int64_t id,
+ uint32_t space_id, uint32_t index_id)
+{
+ struct vy_lsm_recovery_info *lsm = malloc(sizeof(*lsm));
+ if (lsm == NULL) {
+ diag_set(OutOfMemory, sizeof(*lsm),
+ "malloc", "struct vy_lsm_recovery_info");
+ return NULL;
+ }
+ struct mh_i64ptr_t *h = recovery->lsm_hash;
+ struct mh_i64ptr_node_t node = { id, lsm };
+ struct mh_i64ptr_node_t *old_node = NULL;
+ if (mh_i64ptr_put(h, &node, &old_node, NULL) == mh_end(h)) {
+ diag_set(OutOfMemory, 0, "mh_i64ptr_put", "mh_i64ptr_node_t");
+ free(lsm);
+ return NULL;
+ }
+ assert(old_node == NULL);
+ lsm->id = id;
+ lsm->space_id = space_id;
+ lsm->index_id = index_id;
+ lsm->key_parts = NULL;
+ lsm->key_part_count = 0;
+ lsm->is_dropped = false;
+ lsm->create_lsn = -1;
+ lsm->modify_lsn = -1;
+ lsm->dump_lsn = -1;
+ rlist_create(&lsm->ranges);
+ rlist_create(&lsm->runs);
+ /*
+ * Keep newer LSM trees closer to the tail of the list
+ * so that on log rotation we create/drop past incarnations
+ * before the final version.
+ */
+ rlist_add_tail_entry(&recovery->lsms, lsm, in_recovery);
+ if (recovery->max_id < id)
+ recovery->max_id = id;
+ return lsm;
+}
+
+/**
* Handle a VY_LOG_CREATE_LSM log record.
* This function allocates a new vinyl LSM tree with ID @id
* and inserts it to the hash.
@@ -1200,116 +1272,43 @@ vy_recovery_create_lsm(struct vy_recovery *recovery, int64_t id,
uint32_t key_part_count, int64_t create_lsn,
int64_t modify_lsn, int64_t dump_lsn)
{
- struct vy_lsm_recovery_info *lsm;
- struct key_part_def *key_parts_copy;
- struct mh_i64ptr_node_t node;
- struct mh_i64ptr_t *h;
- mh_int_t k;
-
- /*
- * Make a copy of the key definition to be used for
- * the new LSM tree incarnation.
- */
if (key_parts == NULL) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("Missing key definition for LSM tree %lld",
(long long)id));
return -1;
}
- key_parts_copy = malloc(sizeof(*key_parts) * key_part_count);
- if (key_parts_copy == NULL) {
- diag_set(OutOfMemory, sizeof(*key_parts) * key_part_count,
- "malloc", "struct key_part_def");
- return -1;
- }
- memcpy(key_parts_copy, key_parts, sizeof(*key_parts) * key_part_count);
-
- /*
- * Look up the LSM tree in the hash.
- */
- h = recovery->index_id_hash;
- node.key = vy_recovery_index_id_hash(space_id, index_id);
- k = mh_i64ptr_find(h, node.key, NULL);
- lsm = (k != mh_end(h)) ? mh_i64ptr_node(h, k)->val : NULL;
-
- if (lsm == NULL) {
- /*
- * This is the first time the LSM tree is created
- * (there's no previous incarnation in the context).
- * Allocate a node for the LSM tree and add it to
- * the hash.
- */
- lsm = malloc(sizeof(*lsm));
- if (lsm == NULL) {
- diag_set(OutOfMemory, sizeof(*lsm),
- "malloc", "struct vy_lsm_recovery_info");
- free(key_parts_copy);
- return -1;
- }
- lsm->index_id = index_id;
- lsm->space_id = space_id;
- rlist_create(&lsm->ranges);
- rlist_create(&lsm->runs);
-
- node.val = lsm;
- if (mh_i64ptr_put(h, &node, NULL, NULL) == mh_end(h)) {
- diag_set(OutOfMemory, 0, "mh_i64ptr_put",
- "mh_i64ptr_node_t");
- free(key_parts_copy);
- free(lsm);
- return -1;
- }
- rlist_add_entry(&recovery->lsms, lsm, in_recovery);
- } else {
- /*
- * The LSM tree was dropped and recreated with the
- * same ID. Update its key definition (because it
- * could have changed since the last time it was
- * used) and reset its state.
- */
- if (!lsm->is_dropped) {
- diag_set(ClientError, ER_INVALID_VYLOG_FILE,
- tt_sprintf("LSM tree %u/%u created twice",
- (unsigned)space_id,
- (unsigned)index_id));
- free(key_parts_copy);
- return -1;
- }
- assert(lsm->index_id == index_id);
- assert(lsm->space_id == space_id);
- free(lsm->key_parts);
- }
-
- lsm->id = id;
- lsm->key_parts = key_parts_copy;
- lsm->key_part_count = key_part_count;
- lsm->is_dropped = false;
- lsm->create_lsn = create_lsn;
- lsm->modify_lsn = modify_lsn;
- lsm->dump_lsn = dump_lsn;
-
- /*
- * Add the LSM tree to the hash.
- */
- h = recovery->lsm_hash;
- node.key = id;
- node.val = lsm;
- if (mh_i64ptr_find(h, id, NULL) != mh_end(h)) {
+ if (vy_recovery_lookup_lsm(recovery, id) != NULL) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("Duplicate LSM tree id %lld",
(long long)id));
return -1;
}
- if (mh_i64ptr_put(h, &node, NULL, NULL) == mh_end(h)) {
- diag_set(OutOfMemory, 0, "mh_i64ptr_put",
- "mh_i64ptr_node_t");
+ struct vy_lsm_recovery_info *lsm;
+ lsm = vy_recovery_lsm_by_index_id(recovery, space_id, index_id);
+ if (lsm != NULL && !lsm->is_dropped) {
+ diag_set(ClientError, ER_INVALID_VYLOG_FILE,
+ tt_sprintf("LSM tree %u/%u created twice",
+ (unsigned)space_id, (unsigned)index_id));
return -1;
}
+ lsm = vy_recovery_do_create_lsm(recovery, id, space_id, index_id);
+ if (lsm == NULL)
+ return -1;
- if (recovery->max_id < id)
- recovery->max_id = id;
+ lsm->key_parts = malloc(sizeof(*key_parts) * key_part_count);
+ if (lsm->key_parts == NULL) {
+ diag_set(OutOfMemory, sizeof(*key_parts) * key_part_count,
+ "malloc", "struct key_part_def");
+ return -1;
+ }
+ memcpy(lsm->key_parts, key_parts, sizeof(*key_parts) * key_part_count);
+ lsm->key_part_count = key_part_count;
+ lsm->create_lsn = create_lsn;
+ lsm->modify_lsn = modify_lsn;
+ lsm->dump_lsn = dump_lsn;
- return 0;
+ return vy_recovery_hash_index_id(recovery, lsm);
}
/**
diff --git a/test/vinyl/layout.result b/test/vinyl/layout.result
index e6294d3c..49826302 100644
--- a/test/vinyl/layout.result
+++ b/test/vinyl/layout.result
@@ -185,12 +185,12 @@ result
timestamp: <timestamp>
type: INSERT
BODY:
- tuple: [7, {2: 4}]
+ tuple: [7, {2: 5}]
- HEADER:
timestamp: <timestamp>
type: INSERT
BODY:
- tuple: [7, {2: 5}]
+ tuple: [7, {2: 4}]
- HEADER:
timestamp: <timestamp>
type: INSERT
--
2.11.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/4] alter: pass lsn of index drop record to engine
2018-05-23 16:10 [PATCH 0/4] vinyl: fix index drop vs compaction race Vladimir Davydov
2018-05-23 16:10 ` [PATCH 1/4] vinyl: do not reuse lsm objects during recovery from vylog Vladimir Davydov
@ 2018-05-23 16:10 ` Vladimir Davydov
2018-05-23 16:10 ` [PATCH 3/4] vinyl: store lsn of index drop record in vylog Vladimir Davydov
2018-05-23 16:10 ` [PATCH 4/4] vinyl: purge dropped indexes from vylog on garbage collection Vladimir Davydov
3 siblings, 0 replies; 5+ messages in thread
From: Vladimir Davydov @ 2018-05-23 16:10 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
We pass lsn of index alter/create records, let's pass lsn of drop record
for consistency. This is also needed by vinyl to store it in vylog (see
the next patch).
---
src/box/alter.cc | 8 ++++----
src/box/index.cc | 2 +-
src/box/index.h | 11 +++++++----
src/box/vinyl.c | 3 ++-
4 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/src/box/alter.cc b/src/box/alter.cc
index f0315ff7..6f6fcb09 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -1037,12 +1037,12 @@ DropIndex::prepare(struct alter_space *alter)
}
void
-DropIndex::commit(struct alter_space *alter, int64_t /* signature */)
+DropIndex::commit(struct alter_space *alter, int64_t signature)
{
struct index *index = space_index(alter->old_space,
old_index_def->iid);
assert(index != NULL);
- index_commit_drop(index);
+ index_commit_drop(index, signature);
}
/**
@@ -1298,7 +1298,7 @@ RebuildIndex::commit(struct alter_space *alter, int64_t signature)
struct index *old_index = space_index(alter->old_space,
old_index_def->iid);
assert(old_index != NULL);
- index_commit_drop(old_index);
+ index_commit_drop(old_index, signature);
assert(new_index != NULL);
index_commit_create(new_index, signature);
new_index = NULL;
@@ -1355,7 +1355,7 @@ TruncateIndex::commit(struct alter_space *alter, int64_t signature)
struct index *old_index = space_index(alter->old_space, iid);
struct index *new_index = space_index(alter->new_space, iid);
- index_commit_drop(old_index);
+ index_commit_drop(old_index, signature);
index_commit_create(new_index, signature);
}
diff --git a/src/box/index.cc b/src/box/index.cc
index 978935b3..f2274d23 100644
--- a/src/box/index.cc
+++ b/src/box/index.cc
@@ -562,7 +562,7 @@ generic_index_commit_modify(struct index *, int64_t)
}
void
-generic_index_commit_drop(struct index *)
+generic_index_commit_drop(struct index *, int64_t)
{
}
diff --git a/src/box/index.h b/src/box/index.h
index c7384b61..3bc41239 100644
--- a/src/box/index.h
+++ b/src/box/index.h
@@ -377,8 +377,11 @@ struct index_vtab {
/**
* Called after WAL write to commit index drop.
* Must not fail.
+ *
+ * @signature is the LSN that was assigned to the row
+ * that dropped the index.
*/
- void (*commit_drop)(struct index *);
+ void (*commit_drop)(struct index *index, int64_t signature);
/**
* Called after index definition update that did not
* require index rebuild.
@@ -522,9 +525,9 @@ index_commit_modify(struct index *index, int64_t signature)
}
static inline void
-index_commit_drop(struct index *index)
+index_commit_drop(struct index *index, int64_t signature)
{
- index->vtab->commit_drop(index);
+ index->vtab->commit_drop(index, signature);
}
static inline void
@@ -661,7 +664,7 @@ index_end_build(struct index *index)
void generic_index_commit_create(struct index *, int64_t);
void generic_index_abort_create(struct index *);
void generic_index_commit_modify(struct index *, int64_t);
-void generic_index_commit_drop(struct index *);
+void generic_index_commit_drop(struct index *, int64_t);
void generic_index_update_def(struct index *);
bool generic_index_depends_on_pk(struct index *);
ssize_t generic_index_size(struct index *);
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 2e2c145a..a423e95b 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -929,8 +929,9 @@ vy_log_lsm_prune(struct vy_lsm *lsm, int64_t gc_lsn)
}
static void
-vinyl_index_commit_drop(struct index *index)
+vinyl_index_commit_drop(struct index *index, int64_t lsn)
{
+ (void)lsn;
struct vy_env *env = vy_env(index->engine);
struct vy_lsm *lsm = vy_lsm(index);
--
2.11.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/4] vinyl: store lsn of index drop record in vylog
2018-05-23 16:10 [PATCH 0/4] vinyl: fix index drop vs compaction race Vladimir Davydov
2018-05-23 16:10 ` [PATCH 1/4] vinyl: do not reuse lsm objects during recovery from vylog Vladimir Davydov
2018-05-23 16:10 ` [PATCH 2/4] alter: pass lsn of index drop record to engine Vladimir Davydov
@ 2018-05-23 16:10 ` Vladimir Davydov
2018-05-23 16:10 ` [PATCH 4/4] vinyl: purge dropped indexes from vylog on garbage collection Vladimir Davydov
3 siblings, 0 replies; 5+ messages in thread
From: Vladimir Davydov @ 2018-05-23 16:10 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
This is required to rework garbage collection in vinyl.
---
src/box/vinyl.c | 7 +++----
src/box/vy_log.c | 43 ++++++++++++++++++++++++++++++++-----------
src/box/vy_log.h | 17 +++++++++++++----
src/box/vy_lsm.c | 2 +-
4 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index a423e95b..63f1c5ff 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -931,7 +931,6 @@ vy_log_lsm_prune(struct vy_lsm *lsm, int64_t gc_lsn)
static void
vinyl_index_commit_drop(struct index *index, int64_t lsn)
{
- (void)lsn;
struct vy_env *env = vy_env(index->engine);
struct vy_lsm *lsm = vy_lsm(index);
@@ -952,7 +951,7 @@ vinyl_index_commit_drop(struct index *index, int64_t lsn)
vy_log_tx_begin();
vy_log_lsm_prune(lsm, checkpoint_last(NULL));
- vy_log_drop_lsm(lsm->id);
+ vy_log_drop_lsm(lsm->id, lsn);
vy_log_tx_try_commit();
}
@@ -3138,7 +3137,7 @@ vy_send_lsm(struct vy_join_ctx *ctx, struct vy_lsm_recovery_info *lsm_info)
{
int rc = -1;
- if (lsm_info->is_dropped)
+ if (lsm_info->drop_lsn >= 0)
return 0;
/*
@@ -3429,7 +3428,7 @@ vinyl_engine_backup(struct engine *engine, struct vclock *vclock,
int loops = 0;
struct vy_lsm_recovery_info *lsm_info;
rlist_foreach_entry(lsm_info, &recovery->lsms, in_recovery) {
- if (lsm_info->is_dropped)
+ if (lsm_info->drop_lsn >= 0)
continue;
struct vy_run_recovery_info *run_info;
rlist_foreach_entry(run_info, &lsm_info->runs, in_lsm) {
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index d3c2bf6c..4e459459 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -82,6 +82,7 @@ enum vy_log_key {
VY_LOG_KEY_TRUNCATE_COUNT = 11,
VY_LOG_KEY_CREATE_LSN = 12,
VY_LOG_KEY_MODIFY_LSN = 13,
+ VY_LOG_KEY_DROP_LSN = 14,
};
/** vy_log_key -> human readable name. */
@@ -100,6 +101,7 @@ static const char *vy_log_key_name[] = {
[VY_LOG_KEY_TRUNCATE_COUNT] = "truncate_count",
[VY_LOG_KEY_CREATE_LSN] = "create_lsn",
[VY_LOG_KEY_MODIFY_LSN] = "modify_lsn",
+ [VY_LOG_KEY_DROP_LSN] = "drop_lsn",
};
/** vy_log_type -> human readable name. */
@@ -258,6 +260,10 @@ vy_log_record_snprint(char *buf, int size, const struct vy_log_record *record)
SNPRINT(total, snprintf, buf, size, "%s=%"PRIi64", ",
vy_log_key_name[VY_LOG_KEY_MODIFY_LSN],
record->modify_lsn);
+ if (record->drop_lsn > 0)
+ SNPRINT(total, snprintf, buf, size, "%s=%"PRIi64", ",
+ vy_log_key_name[VY_LOG_KEY_DROP_LSN],
+ record->drop_lsn);
if (record->dump_lsn > 0)
SNPRINT(total, snprintf, buf, size, "%s=%"PRIi64", ",
vy_log_key_name[VY_LOG_KEY_DUMP_LSN],
@@ -372,6 +378,11 @@ vy_log_record_encode(const struct vy_log_record *record,
size += mp_sizeof_uint(record->modify_lsn);
n_keys++;
}
+ if (record->drop_lsn > 0) {
+ size += mp_sizeof_uint(VY_LOG_KEY_DROP_LSN);
+ size += mp_sizeof_uint(record->drop_lsn);
+ n_keys++;
+ }
if (record->dump_lsn > 0) {
size += mp_sizeof_uint(VY_LOG_KEY_DUMP_LSN);
size += mp_sizeof_uint(record->dump_lsn);
@@ -448,6 +459,10 @@ vy_log_record_encode(const struct vy_log_record *record,
pos = mp_encode_uint(pos, VY_LOG_KEY_MODIFY_LSN);
pos = mp_encode_uint(pos, record->modify_lsn);
}
+ if (record->drop_lsn > 0) {
+ pos = mp_encode_uint(pos, VY_LOG_KEY_DROP_LSN);
+ pos = mp_encode_uint(pos, record->drop_lsn);
+ }
if (record->dump_lsn > 0) {
pos = mp_encode_uint(pos, VY_LOG_KEY_DUMP_LSN);
pos = mp_encode_uint(pos, record->dump_lsn);
@@ -571,6 +586,9 @@ vy_log_record_decode(struct vy_log_record *record,
case VY_LOG_KEY_MODIFY_LSN:
record->modify_lsn = mp_decode_uint(&pos);
break;
+ case VY_LOG_KEY_DROP_LSN:
+ record->drop_lsn = mp_decode_uint(&pos);
+ break;
case VY_LOG_KEY_DUMP_LSN:
record->dump_lsn = mp_decode_uint(&pos);
break;
@@ -1242,9 +1260,9 @@ vy_recovery_do_create_lsm(struct vy_recovery *recovery, int64_t id,
lsm->index_id = index_id;
lsm->key_parts = NULL;
lsm->key_part_count = 0;
- lsm->is_dropped = false;
lsm->create_lsn = -1;
lsm->modify_lsn = -1;
+ lsm->drop_lsn = -1;
lsm->dump_lsn = -1;
rlist_create(&lsm->ranges);
rlist_create(&lsm->runs);
@@ -1286,7 +1304,7 @@ vy_recovery_create_lsm(struct vy_recovery *recovery, int64_t id,
}
struct vy_lsm_recovery_info *lsm;
lsm = vy_recovery_lsm_by_index_id(recovery, space_id, index_id);
- if (lsm != NULL && !lsm->is_dropped) {
+ if (lsm != NULL && lsm->drop_lsn < 0) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("LSM tree %u/%u created twice",
(unsigned)space_id, (unsigned)index_id));
@@ -1329,7 +1347,7 @@ vy_recovery_modify_lsm(struct vy_recovery *recovery, int64_t id,
(long long)id));
return -1;
}
- if (lsm->is_dropped) {
+ if (lsm->drop_lsn >= 0) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("Update of deleted LSM tree %lld",
(long long)id));
@@ -1355,7 +1373,7 @@ vy_recovery_modify_lsm(struct vy_recovery *recovery, int64_t id,
* Returns 0 on success, -1 if ID not found or LSM tree is already marked.
*/
static int
-vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id)
+vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id, int64_t drop_lsn)
{
struct vy_lsm_recovery_info *lsm;
lsm = vy_recovery_lookup_lsm(recovery, id);
@@ -1365,7 +1383,7 @@ vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id)
(long long)id));
return -1;
}
- if (lsm->is_dropped) {
+ if (lsm->drop_lsn >= 0) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("LSM tree %lld deleted twice",
(long long)id));
@@ -1386,7 +1404,8 @@ vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id)
return -1;
}
}
- lsm->is_dropped = true;
+ assert(drop_lsn >= 0);
+ lsm->drop_lsn = drop_lsn;
return 0;
}
@@ -1408,7 +1427,7 @@ vy_recovery_dump_lsm(struct vy_recovery *recovery,
(long long)id));
return -1;
}
- if (lsm->is_dropped) {
+ if (lsm->drop_lsn >= 0) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("Dump of deleted LSM tree %lld",
(long long)id));
@@ -1508,7 +1527,7 @@ vy_recovery_create_run(struct vy_recovery *recovery, int64_t lsm_id,
(long long)lsm_id));
return -1;
}
- if (lsm->is_dropped) {
+ if (lsm->drop_lsn >= 0) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
tt_sprintf("Run %lld created for deleted "
"LSM tree %lld", (long long)run_id,
@@ -1829,7 +1848,8 @@ vy_recovery_process_record(struct vy_recovery *recovery,
record->modify_lsn);
break;
case VY_LOG_DROP_LSM:
- rc = vy_recovery_drop_lsm(recovery, record->lsm_id);
+ rc = vy_recovery_drop_lsm(recovery, record->lsm_id,
+ record->drop_lsn);
break;
case VY_LOG_INSERT_RANGE:
rc = vy_recovery_insert_range(recovery, record->lsm_id,
@@ -2132,10 +2152,11 @@ vy_log_append_lsm(struct xlog *xlog, struct vy_lsm_recovery_info *lsm)
}
}
- if (lsm->is_dropped) {
+ if (lsm->drop_lsn >= 0) {
vy_log_record_init(&record);
record.type = VY_LOG_DROP_LSM;
record.lsm_id = lsm->id;
+ record.drop_lsn = lsm->drop_lsn;
if (vy_log_append_record(xlog, &record) != 0)
return -1;
}
@@ -2162,7 +2183,7 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery)
* (and thus not needed for garbage collection) from the
* log on rotation.
*/
- if (lsm->is_dropped && rlist_empty(&lsm->runs))
+ if (lsm->drop_lsn >= 0 && rlist_empty(&lsm->runs))
continue;
/* Create the log file on the first write. */
diff --git a/src/box/vy_log.h b/src/box/vy_log.h
index 1b2b419f..22c31825 100644
--- a/src/box/vy_log.h
+++ b/src/box/vy_log.h
@@ -71,7 +71,7 @@ enum vy_log_record_type {
VY_LOG_CREATE_LSM = 0,
/**
* Drop an LSM tree.
- * Requires vy_log_record::lsm_id.
+ * Requires vy_log_record::lsm_id, drop_lsn.
*/
VY_LOG_DROP_LSM = 1,
/**
@@ -209,6 +209,11 @@ struct vy_log_record {
int64_t create_lsn;
/** LSN of the WAL row that last modified the LSM tree. */
int64_t modify_lsn;
+ /**
+ * LSN of the WAL row that dropped the LSM tree or -1
+ * if the tree is still active.
+ */
+ int64_t drop_lsn;
/** Max LSN stored on disk. */
int64_t dump_lsn;
/**
@@ -258,12 +263,15 @@ struct vy_lsm_recovery_info {
struct key_part_def *key_parts;
/** Number of key parts. */
uint32_t key_part_count;
- /** True if the LSM tree was dropped. */
- bool is_dropped;
/** LSN of the WAL row that created the LSM tree. */
int64_t create_lsn;
/** LSN of the WAL row that last modified the LSM tree. */
int64_t modify_lsn;
+ /**
+ * LSN of the WAL row that dropped the LSM tree or -1
+ * if the tree is still active.
+ */
+ int64_t drop_lsn;
/** LSN of the last LSM tree dump. */
int64_t dump_lsn;
/**
@@ -540,12 +548,13 @@ vy_log_modify_lsm(int64_t id, const struct key_def *key_def, int64_t modify_lsn)
/** Helper to log a vinyl LSM tree drop. */
static inline void
-vy_log_drop_lsm(int64_t id)
+vy_log_drop_lsm(int64_t id, int64_t drop_lsn)
{
struct vy_log_record record;
vy_log_record_init(&record);
record.type = VY_LOG_DROP_LSM;
record.lsm_id = id;
+ record.drop_lsn = drop_lsn;
vy_log_write(&record);
}
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index edc3b1a4..289d5c40 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -543,7 +543,7 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
lsm->id = lsm_info->id;
lsm->commit_lsn = lsm_info->modify_lsn;
- if (lsn < lsm_info->create_lsn || lsm_info->is_dropped) {
+ if (lsn < lsm_info->create_lsn || lsm_info->drop_lsn >= 0) {
/*
* Loading a past incarnation of the LSM tree, i.e.
* the LSM tree is going to dropped during final
--
2.11.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 4/4] vinyl: purge dropped indexes from vylog on garbage collection
2018-05-23 16:10 [PATCH 0/4] vinyl: fix index drop vs compaction race Vladimir Davydov
` (2 preceding siblings ...)
2018-05-23 16:10 ` [PATCH 3/4] vinyl: store lsn of index drop record in vylog Vladimir Davydov
@ 2018-05-23 16:10 ` Vladimir Davydov
3 siblings, 0 replies; 5+ messages in thread
From: Vladimir Davydov @ 2018-05-23 16:10 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Currently, when an index is dropped, we remove all ranges/slices
associated with it and mark all runs as dropped in vylog immediately.
To find ranges/slices/runs, we use vy_lsm struct, see vy_log_lsm_prune.
The problem is vy_lsm struct may be inconsistent with the state stored
in vylog if index drop races with compaction, because we first write
changes done by compaction task to vylog and only then update vy_lsm
struct, see vy_task_compact_complete. Since write to vylog yields, this
opens a time window during which the index can be dropped. If this
happens, objects that were created by compaction but haven't been logged
yet (such as new runs, slices, ranges) will be deleted from vylog by
index drop, and this will permanently break vylog, making recovery
impossible.
To fix this issue, let's rework garbage collection of objects associated
with dropped indexes as follows. Now when an index is dropped, we write
a single record to vylog, VY_LOG_DROP_LSM, i.e. just mark the index as
dropped without deleting associated objects. Actual index cleanup takes
place in the garbage collection procedure, see vy_gc, which purges all
ranges/slices linked to marked indexes from vylog and marks all their
runs as dropped. When all runs are actually deleted from disk and
"forgotten" in vylog, we remove the index record from vylog by writing
VY_LOG_FORGET_LSM record. Since garbage collection procedure uses vylog
itself instead of vy_lsm struct for iterating over vinyl objects, no
race between index drop and dump/compaction can now lead to broken
vylog.
Closes #3416
---
src/box/vinyl.c | 62 +++++++++++++++++++++---------------
src/box/vy_log.c | 86 ++++++++++++++++++++++++++++++--------------------
src/box/vy_log.h | 21 ++++++++++++
test/vinyl/gc.result | 5 +++
test/vinyl/gc.test.lua | 3 ++
5 files changed, 117 insertions(+), 60 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 63f1c5ff..f0d26874 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -903,31 +903,6 @@ vinyl_index_commit_modify(struct index *index, int64_t lsn)
vy_log_tx_try_commit();
}
-/*
- * Delete all runs, ranges, and slices of a given LSM tree
- * from the metadata log.
- */
-static void
-vy_log_lsm_prune(struct vy_lsm *lsm, int64_t gc_lsn)
-{
- int loops = 0;
- for (struct vy_range *range = vy_range_tree_first(lsm->tree);
- range != NULL; range = vy_range_tree_next(lsm->tree, range)) {
- struct vy_slice *slice;
- rlist_foreach_entry(slice, &range->slices, in_range)
- vy_log_delete_slice(slice->id);
- vy_log_delete_range(range->id);
- if (++loops % VY_YIELD_LOOPS == 0)
- fiber_sleep(0);
- }
- struct vy_run *run;
- rlist_foreach_entry(run, &lsm->runs, in_lsm) {
- vy_log_drop_run(run->id, gc_lsn);
- if (++loops % VY_YIELD_LOOPS == 0)
- fiber_sleep(0);
- }
-}
-
static void
vinyl_index_commit_drop(struct index *index, int64_t lsn)
{
@@ -950,7 +925,6 @@ vinyl_index_commit_drop(struct index *index, int64_t lsn)
lsm->is_dropped = true;
vy_log_tx_begin();
- vy_log_lsm_prune(lsm, checkpoint_last(NULL));
vy_log_drop_lsm(lsm->id, lsn);
vy_log_tx_try_commit();
}
@@ -3352,6 +3326,38 @@ vy_gc_run(struct vy_env *env,
}
/**
+ * Given a dropped LSM tree, delete all its ranges and slices and
+ * mark all its runs as dropped. Forget the LSM tree if it has no
+ * associated objects.
+ */
+static void
+vy_gc_lsm(struct vy_lsm_recovery_info *lsm_info)
+{
+ assert(lsm_info->drop_lsn >= 0);
+
+ vy_log_tx_begin();
+ struct vy_range_recovery_info *range_info;
+ rlist_foreach_entry(range_info, &lsm_info->ranges, in_lsm) {
+ struct vy_slice_recovery_info *slice_info;
+ rlist_foreach_entry(slice_info, &range_info->slices, in_range)
+ vy_log_delete_slice(slice_info->id);
+ vy_log_delete_range(range_info->id);
+ }
+ struct vy_run_recovery_info *run_info;
+ rlist_foreach_entry(run_info, &lsm_info->runs, in_lsm) {
+ if (!run_info->is_dropped) {
+ run_info->is_dropped = true;
+ run_info->gc_lsn = lsm_info->drop_lsn;
+ vy_log_drop_run(run_info->id, run_info->gc_lsn);
+ }
+ }
+ if (rlist_empty(&lsm_info->ranges) &&
+ rlist_empty(&lsm_info->runs))
+ vy_log_forget_lsm(lsm_info->id);
+ vy_log_tx_try_commit();
+}
+
+/**
* Delete unused run files stored in the recovery context.
* @param env Vinyl environment.
* @param recovery Recovery context.
@@ -3365,6 +3371,10 @@ vy_gc(struct vy_env *env, struct vy_recovery *recovery,
int loops = 0;
struct vy_lsm_recovery_info *lsm_info;
rlist_foreach_entry(lsm_info, &recovery->lsms, in_recovery) {
+ if ((gc_mask & VY_GC_DROPPED) != 0 &&
+ lsm_info->drop_lsn >= 0)
+ vy_gc_lsm(lsm_info);
+
struct vy_run_recovery_info *run_info;
rlist_foreach_entry(run_info, &lsm_info->runs, in_lsm) {
if ((run_info->is_dropped &&
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 4e459459..4d561354 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -120,6 +120,7 @@ static const char *vy_log_type_name[] = {
[VY_LOG_SNAPSHOT] = "snapshot",
[VY_LOG_TRUNCATE_LSM] = "truncate_lsm",
[VY_LOG_MODIFY_LSM] = "modify_lsm",
+ [VY_LOG_FORGET_LSM] = "forget_lsm",
};
/** Metadata log object. */
@@ -1173,6 +1174,21 @@ vy_recovery_hash_index_id(struct vy_recovery *recovery,
return 0;
}
+/**
+ * Remove the entry corresponding to the space_id/index_id
+ * from vy_recovery::index_id_hash.
+ */
+static void
+vy_recovery_unhash_index_id(struct vy_recovery *recovery,
+ uint32_t space_id, uint32_t index_id)
+{
+ struct mh_i64ptr_t *h = recovery->index_id_hash;
+ int64_t key = vy_recovery_index_id_hash(space_id, index_id);
+ mh_int_t k = mh_i64ptr_find(h, key, NULL);
+ if (k != mh_end(h))
+ mh_i64ptr_del(h, k, NULL);
+}
+
/** Lookup an LSM tree in vy_recovery::index_id_hash map. */
struct vy_lsm_recovery_info *
vy_recovery_lsm_by_index_id(struct vy_recovery *recovery,
@@ -1369,7 +1385,6 @@ vy_recovery_modify_lsm(struct vy_recovery *recovery, int64_t id,
/**
* Handle a VY_LOG_DROP_LSM log record.
* This function marks the LSM tree with ID @id as dropped.
- * All ranges and runs of the LSM tree must have been deleted by now.
* Returns 0 on success, -1 if ID not found or LSM tree is already marked.
*/
static int
@@ -1389,23 +1404,44 @@ vy_recovery_drop_lsm(struct vy_recovery *recovery, int64_t id, int64_t drop_lsn)
(long long)id));
return -1;
}
- if (!rlist_empty(&lsm->ranges)) {
+ assert(drop_lsn >= 0);
+ lsm->drop_lsn = drop_lsn;
+ return 0;
+}
+
+/**
+ * Handle a VY_LOG_FORGET_LSM log record.
+ * This function removes the LSM tree with ID @id from the context.
+ * All ranges and runs of the LSM tree must have been deleted by now.
+ * Returns 0 on success, -1 if ID was not found or there are objects
+ * associated with the LSM tree.
+ */
+static int
+vy_recovery_forget_lsm(struct vy_recovery *recovery, int64_t id)
+{
+ struct mh_i64ptr_t *h = recovery->lsm_hash;
+ mh_int_t k = mh_i64ptr_find(h, id, NULL);
+ if (k == mh_end(h)) {
diag_set(ClientError, ER_INVALID_VYLOG_FILE,
- tt_sprintf("Dropped LSM tree %lld has ranges",
+ tt_sprintf("LSM tree %lld forgotten but not registered",
(long long)id));
return -1;
}
- struct vy_run_recovery_info *run;
- rlist_foreach_entry(run, &lsm->runs, in_lsm) {
- if (!run->is_dropped && !run->is_incomplete) {
- diag_set(ClientError, ER_INVALID_VYLOG_FILE,
- tt_sprintf("Dropped LSM tree %lld has active "
- "runs", (long long)id));
- return -1;
- }
+ struct vy_lsm_recovery_info *lsm = mh_i64ptr_node(h, k)->val;
+ if (!rlist_empty(&lsm->ranges) || !rlist_empty(&lsm->runs)) {
+ diag_set(ClientError, ER_INVALID_VYLOG_FILE,
+ tt_sprintf("Forgotten LSM tree %lld has ranges/runs",
+ (long long)id));
+ return -1;
}
- assert(drop_lsn >= 0);
- lsm->drop_lsn = drop_lsn;
+ mh_i64ptr_del(h, k, NULL);
+ rlist_del_entry(lsm, in_recovery);
+ if (lsm == vy_recovery_lsm_by_index_id(recovery,
+ lsm->space_id, lsm->index_id))
+ vy_recovery_unhash_index_id(recovery,
+ lsm->space_id, lsm->index_id);
+ free(lsm->key_parts);
+ free(lsm);
return 0;
}
@@ -1427,12 +1463,6 @@ vy_recovery_dump_lsm(struct vy_recovery *recovery,
(long long)id));
return -1;
}
- if (lsm->drop_lsn >= 0) {
- diag_set(ClientError, ER_INVALID_VYLOG_FILE,
- tt_sprintf("Dump of deleted LSM tree %lld",
- (long long)id));
- return -1;
- }
lsm->dump_lsn = dump_lsn;
return 0;
}
@@ -1527,13 +1557,6 @@ vy_recovery_create_run(struct vy_recovery *recovery, int64_t lsm_id,
(long long)lsm_id));
return -1;
}
- if (lsm->drop_lsn >= 0) {
- diag_set(ClientError, ER_INVALID_VYLOG_FILE,
- tt_sprintf("Run %lld created for deleted "
- "LSM tree %lld", (long long)run_id,
- (long long)lsm_id));
- return -1;
- }
struct vy_run_recovery_info *run;
run = vy_recovery_lookup_run(recovery, run_id);
if (run != NULL && run->is_dropped) {
@@ -1851,6 +1874,9 @@ vy_recovery_process_record(struct vy_recovery *recovery,
rc = vy_recovery_drop_lsm(recovery, record->lsm_id,
record->drop_lsn);
break;
+ case VY_LOG_FORGET_LSM:
+ rc = vy_recovery_forget_lsm(recovery, record->lsm_id);
+ break;
case VY_LOG_INSERT_RANGE:
rc = vy_recovery_insert_range(recovery, record->lsm_id,
record->range_id, record->begin, record->end);
@@ -2178,14 +2204,6 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery)
struct vy_lsm_recovery_info *lsm;
rlist_foreach_entry(lsm, &recovery->lsms, in_recovery) {
- /*
- * Purge dropped LSM trees that are not referenced by runs
- * (and thus not needed for garbage collection) from the
- * log on rotation.
- */
- if (lsm->drop_lsn >= 0 && rlist_empty(&lsm->runs))
- continue;
-
/* Create the log file on the first write. */
if (!xlog_is_open(&xlog) &&
xdir_create_xlog(&vy_log.dir, &xlog, vclock) != 0)
diff --git a/src/box/vy_log.h b/src/box/vy_log.h
index 22c31825..d77cb298 100644
--- a/src/box/vy_log.h
+++ b/src/box/vy_log.h
@@ -169,6 +169,16 @@ enum vy_log_record_type {
* Requires vy_log_record::lsm_id, key_def, modify_lsn.
*/
VY_LOG_MODIFY_LSM = 13,
+ /**
+ * Forget an LSM tree.
+ * Requires vy_log_record::lsm_id.
+ *
+ * A record of this type indicates that all objects associated
+ * with the given LSM tree have been cleaned up from vylog and
+ * so the LSM tree is not needed any longer and can be removed
+ * from vylog on the next rotation.
+ */
+ VY_LOG_FORGET_LSM = 14,
vy_log_record_type_MAX
};
@@ -546,6 +556,17 @@ vy_log_modify_lsm(int64_t id, const struct key_def *key_def, int64_t modify_lsn)
vy_log_write(&record);
}
+/** Helper to log a vinyl LSM tree cleanup. */
+static inline void
+vy_log_forget_lsm(int64_t id)
+{
+ struct vy_log_record record;
+ vy_log_record_init(&record);
+ record.type = VY_LOG_FORGET_LSM;
+ record.lsm_id = id;
+ vy_log_write(&record);
+}
+
/** Helper to log a vinyl LSM tree drop. */
static inline void
vy_log_drop_lsm(int64_t id, int64_t drop_lsn)
diff --git a/test/vinyl/gc.result b/test/vinyl/gc.result
index b709135c..bab6f31b 100644
--- a/test/vinyl/gc.result
+++ b/test/vinyl/gc.result
@@ -90,6 +90,11 @@ files = ls_data()
--
-- Check that vylog files are removed if vinyl is not used.
--
+-- This purges records corresponding to dropped runs, but
+-- dropped index records are still stored in vylog.
+gc()
+---
+...
files = ls_vylog()
---
...
diff --git a/test/vinyl/gc.test.lua b/test/vinyl/gc.test.lua
index 32078f00..eb56a1c6 100644
--- a/test/vinyl/gc.test.lua
+++ b/test/vinyl/gc.test.lua
@@ -44,6 +44,9 @@ files = ls_data()
-- Check that vylog files are removed if vinyl is not used.
--
+-- This purges records corresponding to dropped runs, but
+-- dropped index records are still stored in vylog.
+gc()
files = ls_vylog()
#files == 2 or {files, gc_info()}
--
2.11.0
^ permalink raw reply [flat|nested] 5+ messages in thread