[PATCH v2 11/14] vinyl: do not fill secondary tuples with nulls when decoded
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Mar 13 11:52:57 MSK 2019
In contrast to a primary index, which stores full tuples, secondary
indexes only store extended (secondary + primary) keys on disk. To make
them look like tuples, we fill missing fields with nulls (aka tuple
surrogate). This isn't going to work nicely with multikey indexes
though: how would you make a surrogate array from a key? We could
special-case multikey index handling, but that would look cumbersome.
So this patch removes nulls from secondary tuples restored from disk
altogether. To achieve that, it's enough to use key_format for them -
then the comparators will detect that it's actually a key, not a tuple
and use the appropriate primitive.
---
src/box/key_def.c | 36 +++++++++++++++++++++++++++++++++++
src/box/key_def.h | 14 ++++++++++++++
src/box/vinyl.c | 46 +++++++++++++++++++++++++++++++++------------
src/box/vy_lsm.c | 30 ++++++++++++++++++++---------
src/box/vy_lsm.h | 13 +++++++++----
src/box/vy_run.c | 31 ++++++++++++++++++++----------
src/box/vy_stmt.c | 36 +++++++++++++++++------------------
test/unit/vy_point_lookup.c | 2 +-
8 files changed, 154 insertions(+), 54 deletions(-)
diff --git a/src/box/key_def.c b/src/box/key_def.c
index d4c979aa..c8993a06 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -709,6 +709,42 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
return new_def;
}
+struct key_def *
+key_def_extract(const struct key_def *cmp_def, const struct key_def *pk_def,
+ struct region *region)
+{
+ struct key_def *extracted_def = NULL;
+ size_t region_svp = region_used(region);
+
+ /* First, dump primary key parts as is. */
+ struct key_part_def *parts = region_alloc(region,
+ pk_def->part_count * sizeof(*parts));
+ if (parts == NULL) {
+ diag_set(OutOfMemory, pk_def->part_count * sizeof(*parts),
+ "region", "key def parts");
+ goto out;
+ }
+ if (key_def_dump_parts(pk_def, parts, region) != 0)
+ goto out;
+ /*
+ * Second, update field numbers to match the primary key
+ * parts in a secondary key.
+ */
+ for (uint32_t i = 0; i < pk_def->part_count; i++) {
+ const struct key_part *part = key_def_find(cmp_def,
+ &pk_def->parts[i]);
+ assert(part != NULL);
+ parts[i].fieldno = part - cmp_def->parts;
+ parts[i].path = NULL;
+ }
+
+ /* Finally, allocate the new key definition. */
+ extracted_def = key_def_new(parts, pk_def->part_count);
+out:
+ region_truncate(region, region_svp);
+ return extracted_def;
+}
+
int
key_validate_parts(const struct key_def *key_def, const char *key,
uint32_t part_count, bool allow_nullable)
diff --git a/src/box/key_def.h b/src/box/key_def.h
index e3a34ce1..21119a83 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -375,6 +375,20 @@ key_def_contains(const struct key_def *first, const struct key_def *second);
struct key_def *
key_def_merge(const struct key_def *first, const struct key_def *second);
+/**
+ * Create a key definition suitable for extracting primary key
+ * parts from an extended secondary key.
+ * @param cmp_def Extended secondary key definition
+ * (must include primary key parts).
+ * @param pk_def Primary key definition.
+ * @param region Region used for temporary allocations.
+ * @retval not NULL Pointer to the extracted key definition.
+ * @retval NULL Memory allocation error.
+ */
+struct key_def *
+key_def_extract(const struct key_def *cmp_def, const struct key_def *pk_def,
+ struct region *region);
+
/*
* Check that parts of the key match with the key definition.
* @param key_def Key definition.
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index ced0efe5..e9a4ae0d 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -719,10 +719,9 @@ vinyl_space_create_index(struct space *space, struct index_def *index_def)
pk = vy_lsm(space_index(space, 0));
assert(pk != NULL);
}
- struct vy_lsm *lsm = vy_lsm_new(&env->lsm_env, &env->stmt_env,
- &env->cache_env, &env->mem_env,
- index_def, space->format, pk,
- space_group_id(space));
+ struct vy_lsm *lsm = vy_lsm_new(&env->lsm_env, &env->cache_env,
+ &env->mem_env, index_def, space->format,
+ pk, space_group_id(space));
if (lsm == NULL) {
free(index);
return NULL;
@@ -1301,13 +1300,34 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
const struct vy_read_view **rv,
struct tuple *tuple, struct tuple **result)
{
+ int rc = 0;
assert(lsm->index_id > 0);
- if (vy_point_lookup(lsm->pk, tx, rv, tuple, result) != 0)
- return -1;
+ /*
+ * Lookup the full tuple by a secondary statement.
+ * There are two cases: the secondary statement may be
+ * a key, in which case we need to extract the primary
+ * key parts from it; or it may be a full tuple, which
+ * we may pass it immediately to the iterator.
+ */
+ struct tuple *key;
+ if (vy_stmt_is_key(tuple)) {
+ key = vy_stmt_extract_key(tuple, lsm->pk_in_cmp_def,
+ lsm->env->key_format);
+ if (key == NULL)
+ return -1;
+ } else {
+ key = tuple;
+ tuple_ref(key);
+ }
+
+ if (vy_point_lookup(lsm->pk, tx, rv, key, result) != 0) {
+ rc = -1;
+ goto out;
+ }
if (*result == NULL ||
- vy_stmt_compare(*result, tuple, lsm->key_def) != 0) {
+ vy_stmt_compare(*result, tuple, lsm->cmp_def) != 0) {
/*
* If a tuple read from a secondary index doesn't
* match the tuple corresponding to it in the
@@ -1327,7 +1347,7 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
* the tuple cache implementation.
*/
vy_cache_on_write(&lsm->cache, tuple, NULL);
- return 0;
+ goto out;
}
/*
@@ -1340,13 +1360,15 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
*/
if (tx != NULL && vy_tx_track_point(tx, lsm->pk, *result) != 0) {
tuple_unref(*result);
- return -1;
+ rc = -1;
+ goto out;
}
if ((*rv)->vlsn == INT64_MAX)
- vy_cache_add(&lsm->pk->cache, *result, NULL, tuple, ITER_EQ);
-
- return 0;
+ vy_cache_add(&lsm->pk->cache, *result, NULL, key, ITER_EQ);
+out:
+ tuple_unref(key);
+ return rc;
}
/**
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 2dc8f639..28cd487c 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -118,10 +118,9 @@ vy_lsm_mem_tree_size(struct vy_lsm *lsm)
}
struct vy_lsm *
-vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env,
- struct vy_cache_env *cache_env, struct vy_mem_env *mem_env,
- struct index_def *index_def, struct tuple_format *format,
- struct vy_lsm *pk, uint32_t group_id)
+vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env,
+ struct vy_mem_env *mem_env, struct index_def *index_def,
+ struct tuple_format *format, struct vy_lsm *pk, uint32_t group_id)
{
static int64_t run_buckets[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 100,
@@ -156,10 +155,19 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env,
*/
lsm->disk_format = format;
} else {
- lsm->disk_format = vy_stmt_format_new(stmt_env, &cmp_def, 1,
- NULL, 0, 0, NULL);
- if (lsm->disk_format == NULL)
- goto fail_format;
+ /*
+ * To save disk space, we do not store full tuples
+ * in secondary index runs. Instead we only store
+ * extended keys (i.e. keys consisting of secondary
+ * and primary index parts). This is enough to look
+ * up a full tuple in the primary index.
+ */
+ lsm->disk_format = lsm_env->key_format;
+
+ lsm->pk_in_cmp_def = key_def_extract(lsm->cmp_def, pk->key_def,
+ &fiber()->gc);
+ if (lsm->pk_in_cmp_def == NULL)
+ goto fail_pk_in_cmp_def;
}
tuple_format_ref(lsm->disk_format);
@@ -208,7 +216,9 @@ fail_run_hist:
vy_lsm_stat_destroy(&lsm->stat);
fail_stat:
tuple_format_unref(lsm->disk_format);
-fail_format:
+ if (lsm->pk_in_cmp_def != NULL)
+ key_def_delete(lsm->pk_in_cmp_def);
+fail_pk_in_cmp_def:
key_def_delete(cmp_def);
fail_cmp_def:
key_def_delete(key_def);
@@ -262,6 +272,8 @@ vy_lsm_delete(struct vy_lsm *lsm)
tuple_format_unref(lsm->disk_format);
key_def_delete(lsm->cmp_def);
key_def_delete(lsm->key_def);
+ if (lsm->pk_in_cmp_def != NULL)
+ key_def_delete(lsm->pk_in_cmp_def);
histogram_delete(lsm->run_hist);
vy_lsm_stat_destroy(&lsm->stat);
vy_cache_destroy(&lsm->cache);
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index b94e7a43..e969e45c 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -200,6 +200,12 @@ struct vy_lsm {
/** Key definition passed by the user. */
struct key_def *key_def;
/**
+ * Key definition to extract primary key parts from
+ * a secondary key. NULL if this LSM tree corresponds
+ * to a primary index.
+ */
+ struct key_def *pk_in_cmp_def;
+ /**
* If the following flag is set, the index this LSM tree
* is created for is unique and it must be checked for
* duplicates on INSERT. Otherwise, the check can be skipped,
@@ -333,10 +339,9 @@ vy_lsm_mem_tree_size(struct vy_lsm *lsm);
/** Allocate a new LSM tree object. */
struct vy_lsm *
-vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env,
- struct vy_cache_env *cache_env, struct vy_mem_env *mem_env,
- struct index_def *index_def, struct tuple_format *format,
- struct vy_lsm *pk, uint32_t group_id);
+vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env,
+ struct vy_mem_env *mem_env, struct index_def *index_def,
+ struct tuple_format *format, struct vy_lsm *pk, uint32_t group_id);
/** Free an LSM tree object. */
void
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 76b2f94a..928ca57f 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2139,7 +2139,8 @@ vy_run_writer_start_page(struct vy_run_writer *writer,
if (run->info.page_count >= writer->page_info_capacity &&
vy_run_alloc_page_info(run, &writer->page_info_capacity) != 0)
return -1;
- const char *key = tuple_extract_key(first_stmt, writer->cmp_def, NULL);
+ const char *key = vy_stmt_is_key(first_stmt) ? tuple_data(first_stmt) :
+ tuple_extract_key(first_stmt, writer->cmp_def, NULL);
if (key == NULL)
return -1;
if (run->info.page_count == 0) {
@@ -2289,7 +2290,9 @@ vy_run_writer_commit(struct vy_run_writer *writer)
}
assert(writer->last_stmt != NULL);
- const char *key = tuple_extract_key(writer->last_stmt,
+ const char *key = vy_stmt_is_key(writer->last_stmt) ?
+ tuple_data(writer->last_stmt) :
+ tuple_extract_key(writer->last_stmt,
writer->cmp_def, NULL);
if (key == NULL)
goto out;
@@ -2360,6 +2363,7 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
int64_t max_lsn = 0;
int64_t min_lsn = INT64_MAX;
struct tuple *prev_tuple = NULL;
+ char *page_min_key = NULL;
struct tuple_bloom_builder *bloom_builder = NULL;
if (opts->bloom_fpr < 1) {
@@ -2377,7 +2381,6 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
if (run->info.page_count == page_info_capacity &&
vy_run_alloc_page_info(run, &page_info_capacity) != 0)
goto close_err;
- const char *page_min_key = NULL;
uint32_t page_row_count = 0;
uint64_t page_row_index_offset = 0;
uint64_t row_offset = xlog_cursor_tx_pos(&cursor);
@@ -2400,7 +2403,8 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
tuple_unref(tuple);
goto close_err;
}
- key = tuple_extract_key(tuple, cmp_def, NULL);
+ key = vy_stmt_is_key(tuple) ? tuple_data(tuple) :
+ tuple_extract_key(tuple, cmp_def, NULL);
if (prev_tuple != NULL)
tuple_unref(prev_tuple);
prev_tuple = tuple;
@@ -2411,8 +2415,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
if (run->info.min_key == NULL)
goto close_err;
}
- if (page_min_key == NULL)
- page_min_key = key;
+ if (page_min_key == NULL) {
+ page_min_key = vy_key_dup(key);
+ if (page_min_key == NULL)
+ goto close_err;
+ }
if (xrow.lsn > max_lsn)
max_lsn = xrow.lsn;
if (xrow.lsn < min_lsn)
@@ -2429,11 +2436,9 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
info->row_index_offset = page_row_index_offset;
++run->info.page_count;
vy_run_acct_page(run, info);
- }
- if (prev_tuple != NULL) {
- tuple_unref(prev_tuple);
- prev_tuple = NULL;
+ free(page_min_key);
+ page_min_key = NULL;
}
if (key != NULL) {
@@ -2444,6 +2449,10 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
run->info.max_lsn = max_lsn;
run->info.min_lsn = min_lsn;
+ if (prev_tuple != NULL) {
+ tuple_unref(prev_tuple);
+ prev_tuple = NULL;
+ }
region_truncate(region, mem_used);
run->fd = cursor.fd;
xlog_cursor_close(&cursor, true);
@@ -2473,6 +2482,8 @@ close_err:
region_truncate(region, mem_used);
if (prev_tuple != NULL)
tuple_unref(prev_tuple);
+ if (page_min_key != NULL)
+ free(page_min_key);
if (bloom_builder != NULL)
tuple_bloom_builder_delete(bloom_builder);
if (xlog_cursor_is_open(&cursor))
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 229f8b41..fd7850cc 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -748,6 +748,8 @@ int
vy_stmt_encode_primary(const struct tuple *value, struct key_def *key_def,
uint32_t space_id, struct xrow_header *xrow)
{
+ assert(!vy_stmt_is_key(value));
+
memset(xrow, 0, sizeof(*xrow));
enum iproto_type type = vy_stmt_type(value);
xrow->type = type;
@@ -804,7 +806,9 @@ vy_stmt_encode_secondary(const struct tuple *value, struct key_def *cmp_def,
memset(&request, 0, sizeof(request));
request.type = type;
uint32_t size;
- const char *extracted = tuple_extract_key(value, cmp_def, &size);
+ const char *extracted = vy_stmt_is_key(value) ?
+ tuple_data_range(value, &size) :
+ tuple_extract_key(value, cmp_def, &size);
if (extracted == NULL)
return -1;
if (type == IPROTO_REPLACE || type == IPROTO_INSERT) {
@@ -834,27 +838,23 @@ vy_stmt_decode(struct xrow_header *xrow, const struct key_def *key_def,
if (xrow_decode_dml(xrow, &request, key_map) != 0)
return NULL;
struct tuple *stmt = NULL;
- const char *key;
- (void) key;
struct iovec ops;
switch (request.type) {
case IPROTO_DELETE:
- /* extract key */
- stmt = vy_stmt_new_surrogate_from_key(request.key,
- IPROTO_DELETE,
- key_def, format);
- break;
- case IPROTO_INSERT:
- case IPROTO_REPLACE:
- if (is_primary) {
- stmt = vy_stmt_new_with_ops(format, request.tuple,
- request.tuple_end,
- NULL, 0, request.type);
- } else {
- stmt = vy_stmt_new_surrogate_from_key(request.tuple,
- request.type,
+ if (is_primary)
+ stmt = vy_stmt_new_surrogate_from_key(request.key,
+ IPROTO_DELETE,
key_def, format);
- }
+ else
+ stmt = vy_stmt_new_with_ops(format, request.key,
+ request.key_end,
+ NULL, 0, IPROTO_DELETE);
+ break;
+ case IPROTO_INSERT:
+ case IPROTO_REPLACE:
+ stmt = vy_stmt_new_with_ops(format, request.tuple,
+ request.tuple_end,
+ NULL, 0, request.type);
break;
case IPROTO_UPSERT:
ops.iov_base = (char *)request.ops;
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index 38e9f403..f3cd84d4 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -94,7 +94,7 @@ test_basic()
index_def_new(512, 0, "primary", sizeof("primary") - 1, TREE,
&index_opts, key_def, NULL);
- struct vy_lsm *pk = vy_lsm_new(&lsm_env, &stmt_env, &cache_env, &mem_env,
+ struct vy_lsm *pk = vy_lsm_new(&lsm_env, &cache_env, &mem_env,
index_def, format, NULL, 0);
isnt(pk, NULL, "lsm is not NULL")
--
2.11.0
More information about the Tarantool-patches
mailing list