From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH v2 7/7] vinyl: incorporate tuple comparison hints into vinyl data structures Date: Sat, 6 Apr 2019 23:01:54 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org List-ID: Apart from speeding up statement comparisons and hence index lookups, this is also a prerequisite for multikey indexes, which will reuse tuple comparison hints as offsets in indexed arrays. Albeit huge, this patch is pretty straightforward - all it does is replace struct tuple with struct vy_entry (which is tuple + hint pair) practically everywhere in the code. Now statements are stored and compared without hints only in a few places, primarily at the very top level. Hints are also computed at the top level so it should be pretty easy to replace them with multikey offsets when the time comes. --- src/box/vinyl.c | 261 +++++++++++++++----------- src/box/vy_cache.c | 252 +++++++++++++------------ src/box/vy_cache.h | 46 ++--- src/box/vy_history.c | 42 +++-- src/box/vy_history.h | 25 ++- src/box/vy_lsm.c | 239 +++++++++++++----------- src/box/vy_lsm.h | 17 +- src/box/vy_mem.c | 211 ++++++++++----------- src/box/vy_mem.h | 56 +++--- src/box/vy_point_lookup.c | 44 ++--- src/box/vy_point_lookup.h | 7 +- src/box/vy_range.c | 75 ++++---- src/box/vy_range.h | 14 +- src/box/vy_read_iterator.c | 240 ++++++++++++------------ src/box/vy_read_iterator.h | 17 +- src/box/vy_read_set.c | 32 ++-- src/box/vy_read_set.h | 13 +- src/box/vy_run.c | 396 +++++++++++++++++++++------------------- src/box/vy_run.h | 37 ++-- src/box/vy_scheduler.c | 17 +- src/box/vy_stmt_stream.h | 4 +- src/box/vy_tx.c | 253 +++++++++++++------------ src/box/vy_tx.h | 31 ++-- src/box/vy_upsert.h | 16 +- src/box/vy_write_iterator.c | 280 ++++++++++++++-------------- test/unit/vy_cache.c | 19 +- test/unit/vy_iterators_helper.c | 93 +++++----- test/unit/vy_iterators_helper.h | 14 +- test/unit/vy_mem.c | 76 ++++---- test/unit/vy_point_lookup.c | 33 ++-- test/unit/vy_write_iterator.c | 15 +- test/vinyl/cache.result | 6 +- test/vinyl/stat.result | 26 +-- 33 files changed, 1530 insertions(+), 1377 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index eb331906..1e993761 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -192,7 +192,7 @@ struct vinyl_iterator { */ struct vy_tx *tx; /** Search key. */ - struct tuple *key; + struct vy_entry key; /** Vinyl read iterator. */ struct vy_read_iterator iterator; /** @@ -1142,8 +1142,8 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format) &env->xm->p_committed_read_view); int rc; int loops = 0; - struct tuple *tuple; - while ((rc = vy_read_iterator_next(&itr, &tuple)) == 0) { + struct vy_entry entry; + while ((rc = vy_read_iterator_next(&itr, &entry)) == 0) { /* * Read iterator yields only when it reads runs. * Yield periodically in order not to stall the @@ -1157,9 +1157,9 @@ vinyl_space_check_format(struct space *space, struct tuple_format *format) rc = -1; break; } - if (tuple == NULL) + if (entry.stmt == NULL) break; - rc = tuple_validate(format, tuple); + rc = tuple_validate(format, entry.stmt); if (rc != 0) break; } @@ -1308,7 +1308,7 @@ vy_is_committed(struct vy_env *env, struct space *space) * @param lsm LSM tree from which the tuple was read. * @param tx Current transaction. * @param rv Read view. - * @param tuple Tuple read from a secondary index. + * @param entry Tuple read from a secondary index. * @param[out] result The found tuple is stored here. Must be * unreferenced after usage. * @@ -1318,7 +1318,7 @@ vy_is_committed(struct vy_env *env, struct space *space) static int vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, - struct tuple *tuple, struct tuple **result) + struct vy_entry entry, struct vy_entry *result) { int rc = 0; assert(lsm->index_id > 0); @@ -1332,24 +1332,30 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, * the tuple cache or level 0, in which case we may pass * it immediately to the iterator. */ - struct tuple *key; - if (vy_stmt_is_key(tuple)) { - key = vy_stmt_extract_key(tuple, lsm->pk_in_cmp_def, - lsm->env->key_format); - if (key == NULL) + struct vy_entry key; + if (vy_stmt_is_key(entry.stmt)) { + key.stmt = vy_stmt_extract_key(entry.stmt, lsm->pk_in_cmp_def, + lsm->env->key_format); + if (key.stmt == NULL) return -1; } else { - key = tuple; - tuple_ref(key); + key.stmt = entry.stmt; + tuple_ref(key.stmt); } + key.hint = vy_stmt_hint(key.stmt, lsm->pk->cmp_def); if (vy_point_lookup(lsm->pk, tx, rv, key, result) != 0) { rc = -1; goto out; } - if (*result == NULL || - vy_stmt_compare(*result, tuple, lsm->cmp_def) != 0) { + /* + * Note, result stores a hint computed for the primary + * index while entry was read from a secondary index so + * we must not use vy_entry_compare() here. + */ + if (result->stmt == NULL || + vy_stmt_compare(result->stmt, entry.stmt, lsm->cmp_def) != 0) { /* * If a tuple read from a secondary index doesn't * match the tuple corresponding to it in the @@ -1358,9 +1364,9 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, * propagated to the secondary index yet. In this * case silently skip this tuple. */ - if (*result != NULL) { - tuple_unref(*result); - *result = NULL; + if (result->stmt != NULL) { + tuple_unref(result->stmt); + *result = vy_entry_none(); } /* * We must purge stale tuples from the cache before @@ -1368,7 +1374,7 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, * chain intersections, which are not tolerated by * the tuple cache implementation. */ - vy_cache_on_write(&lsm->cache, tuple, NULL); + vy_cache_on_write(&lsm->cache, entry, NULL); goto out; } @@ -1381,15 +1387,20 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, * immediately. */ if (tx != NULL && vy_tx_track_point(tx, lsm->pk, *result) != 0) { - tuple_unref(*result); + tuple_unref(result->stmt); rc = -1; goto out; } - if ((*rv)->vlsn == INT64_MAX) - vy_cache_add(&lsm->pk->cache, *result, NULL, key, ITER_EQ); + if ((*rv)->vlsn == INT64_MAX) { + vy_cache_add(&lsm->pk->cache, *result, + vy_entry_none(), key, ITER_EQ); + } + + /* Inherit the hint from the secondary index entry. */ + result->hint = entry.hint; out: - tuple_unref(key); + tuple_unref(key.stmt); return rc; } @@ -1398,7 +1409,7 @@ out: * @param lsm LSM tree in which search. * @param tx Current transaction. * @param rv Read view. - * @param key Key statement. + * @param key_stmt Key statement. * @param[out] result The found tuple is stored here. Must be * unreferenced after usage. * @@ -1408,7 +1419,7 @@ out: static int vy_get(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, - struct tuple *key, struct tuple **result) + struct tuple *key_stmt, struct tuple **result) { /* * tx can be NULL, for example, if an user calls @@ -1417,46 +1428,54 @@ vy_get(struct vy_lsm *lsm, struct vy_tx *tx, assert(tx == NULL || tx->state == VINYL_TX_READY); int rc; - struct tuple *tuple; + struct vy_entry partial, entry; - if (vy_stmt_is_full_key(key, lsm->cmp_def)) { + struct vy_entry key; + key.stmt = key_stmt; + key.hint = vy_stmt_hint(key.stmt, lsm->cmp_def); + + if (vy_stmt_is_full_key(key.stmt, lsm->cmp_def)) { /* * Use point lookup for a full key. */ if (tx != NULL && vy_tx_track_point(tx, lsm, key) != 0) return -1; - if (vy_point_lookup(lsm, tx, rv, key, &tuple) != 0) + if (vy_point_lookup(lsm, tx, rv, key, &partial) != 0) return -1; - if (lsm->index_id > 0 && tuple != NULL) { + if (lsm->index_id > 0 && partial.stmt != NULL) { rc = vy_get_by_secondary_tuple(lsm, tx, rv, - tuple, result); - tuple_unref(tuple); + partial, &entry); + tuple_unref(partial.stmt); if (rc != 0) return -1; } else { - *result = tuple; + entry = partial; } - if ((*rv)->vlsn == INT64_MAX) - vy_cache_add(&lsm->cache, *result, NULL, key, ITER_EQ); + if ((*rv)->vlsn == INT64_MAX) { + vy_cache_add(&lsm->cache, entry, + vy_entry_none(), key, ITER_EQ); + } + *result = entry.stmt; return 0; } struct vy_read_iterator itr; vy_read_iterator_open(&itr, lsm, tx, ITER_EQ, key, rv); - while ((rc = vy_read_iterator_next(&itr, &tuple)) == 0) { - if (lsm->index_id == 0 || tuple == NULL) { - *result = tuple; - if (tuple != NULL) - tuple_ref(tuple); + while ((rc = vy_read_iterator_next(&itr, &partial)) == 0) { + if (lsm->index_id == 0 || partial.stmt == NULL) { + entry = partial; + if (entry.stmt != NULL) + tuple_ref(entry.stmt); break; } - rc = vy_get_by_secondary_tuple(lsm, tx, rv, tuple, result); - if (rc != 0 || *result != NULL) + rc = vy_get_by_secondary_tuple(lsm, tx, rv, partial, &entry); + if (rc != 0 || entry.stmt != NULL) break; } if (rc == 0) - vy_read_iterator_cache_add(&itr, *result); + vy_read_iterator_cache_add(&itr, entry); vy_read_iterator_close(&itr); + *result = entry.stmt; return rc; } @@ -1695,7 +1714,10 @@ static int vy_set_with_colmask(struct vy_tx *tx, struct vy_lsm *lsm, struct tuple *stmt, uint64_t column_mask) { - return vy_tx_set(tx, lsm, stmt, column_mask); + struct vy_entry entry; + entry.stmt = stmt; + entry.hint = vy_stmt_hint(stmt, lsm->cmp_def); + return vy_tx_set(tx, lsm, entry, column_mask); } static inline int @@ -2558,7 +2580,7 @@ vy_squash_queue_new(void); static void vy_squash_queue_delete(struct vy_squash_queue *q); static void -vy_squash_schedule(struct vy_lsm *lsm, struct tuple *stmt, +vy_squash_schedule(struct vy_lsm *lsm, struct vy_entry entry, void /* struct vy_env */ *arg); static struct vy_env * @@ -2976,24 +2998,28 @@ vy_prepare_send_slice(struct vy_join_ctx *ctx, { int rc = -1; struct vy_run *run = NULL; - struct tuple *begin = NULL, *end = NULL; + struct vy_entry begin = vy_entry_none(); + struct vy_entry end = vy_entry_none(); run = vy_run_new(&ctx->env->run_env, slice_info->run->id); if (run == NULL) goto out; - if (vy_run_recover(run, ctx->env->path, ctx->space_id, 0) != 0) + if (vy_run_recover(run, ctx->env->path, ctx->space_id, 0, + ctx->key_def) != 0) goto out; if (slice_info->begin != NULL) { - begin = vy_key_from_msgpack(ctx->env->lsm_env.key_format, - slice_info->begin); - if (begin == NULL) + begin = vy_entry_key_from_msgpack(ctx->env->lsm_env.key_format, + ctx->key_def, + slice_info->begin); + if (begin.stmt == NULL) goto out; } if (slice_info->end != NULL) { - end = vy_key_from_msgpack(ctx->env->lsm_env.key_format, - slice_info->end); - if (end == NULL) + end = vy_entry_key_from_msgpack(ctx->env->lsm_env.key_format, + ctx->key_def, + slice_info->end); + if (end.stmt == NULL) goto out; } @@ -3007,10 +3033,10 @@ vy_prepare_send_slice(struct vy_join_ctx *ctx, out: if (run != NULL) vy_run_unref(run); - if (begin != NULL) - tuple_unref(begin); - if (end != NULL) - tuple_unref(end); + if (begin.stmt != NULL) + tuple_unref(begin.stmt); + if (end.stmt != NULL) + tuple_unref(end.stmt); return rc; } @@ -3019,14 +3045,14 @@ vy_send_range_f(struct cbus_call_msg *cmsg) { struct vy_join_ctx *ctx = container_of(cmsg, struct vy_join_ctx, cmsg); - struct tuple *stmt; int rc = ctx->wi->iface->start(ctx->wi); if (rc != 0) goto err; - while ((rc = ctx->wi->iface->next(ctx->wi, &stmt)) == 0 && - stmt != NULL) { + struct vy_entry entry; + while ((rc = ctx->wi->iface->next(ctx->wi, &entry)) == 0 && + entry.stmt != NULL) { struct xrow_header xrow; - rc = vy_stmt_encode_primary(stmt, ctx->key_def, + rc = vy_stmt_encode_primary(entry.stmt, ctx->key_def, ctx->space_id, &xrow); if (rc != 0) break; @@ -3492,7 +3518,7 @@ struct vy_squash { /** LSM tree this request is for. */ struct vy_lsm *lsm; /** Key to squash upserts for. */ - struct tuple *stmt; + struct vy_entry entry; }; struct vy_squash_queue { @@ -3508,7 +3534,7 @@ struct vy_squash_queue { static struct vy_squash * vy_squash_new(struct mempool *pool, struct vy_env *env, - struct vy_lsm *lsm, struct tuple *stmt) + struct vy_lsm *lsm, struct vy_entry entry) { struct vy_squash *squash; squash = mempool_alloc(pool); @@ -3517,8 +3543,8 @@ vy_squash_new(struct mempool *pool, struct vy_env *env, squash->env = env; vy_lsm_ref(lsm); squash->lsm = lsm; - tuple_ref(stmt); - squash->stmt = stmt; + tuple_ref(entry.stmt); + squash->entry = entry; return squash; } @@ -3526,7 +3552,7 @@ static void vy_squash_delete(struct mempool *pool, struct vy_squash *squash) { vy_lsm_unref(squash->lsm); - tuple_unref(squash->stmt); + tuple_unref(squash->entry.stmt); mempool_free(pool, squash); } @@ -3547,11 +3573,11 @@ vy_squash_process(struct vy_squash *squash) * Use the committed read view to avoid squashing * prepared, but not committed statements. */ - struct tuple *result; + struct vy_entry result; if (vy_point_lookup(lsm, NULL, &env->xm->p_committed_read_view, - squash->stmt, &result) != 0) + squash->entry, &result) != 0) return -1; - if (result == NULL) + if (result.stmt == NULL) return 0; /* @@ -3563,8 +3589,8 @@ vy_squash_process(struct vy_squash *squash) */ struct vy_mem *mem = lsm->mem; struct vy_mem_tree_key tree_key = { - .stmt = result, - .lsn = vy_stmt_lsn(result), + .entry = result, + .lsn = vy_stmt_lsn(result.stmt), }; struct vy_mem_tree_iterator mem_itr = vy_mem_tree_lower_bound(&mem->tree, &tree_key, NULL); @@ -3573,19 +3599,19 @@ vy_squash_process(struct vy_squash *squash) * The in-memory tree we are squashing an upsert * for was dumped, nothing to do. */ - tuple_unref(result); + tuple_unref(result.stmt); return 0; } vy_mem_tree_iterator_prev(&mem->tree, &mem_itr); uint8_t n_upserts = 0; while (!vy_mem_tree_iterator_is_invalid(&mem_itr)) { - struct tuple *mem_stmt; - mem_stmt = *vy_mem_tree_iterator_get_elem(&mem->tree, &mem_itr); - if (vy_stmt_compare(result, mem_stmt, lsm->cmp_def) != 0 || - vy_stmt_type(mem_stmt) != IPROTO_UPSERT) + struct vy_entry mem_entry; + mem_entry = *vy_mem_tree_iterator_get_elem(&mem->tree, &mem_itr); + if (vy_entry_compare(result, mem_entry, lsm->cmp_def) != 0 || + vy_stmt_type(mem_entry.stmt) != IPROTO_UPSERT) break; - assert(vy_stmt_lsn(mem_stmt) >= MAX_LSN); - vy_stmt_set_n_upserts(mem_stmt, n_upserts); + assert(vy_stmt_lsn(mem_entry.stmt) >= MAX_LSN); + vy_stmt_set_n_upserts(mem_entry.stmt, n_upserts); if (n_upserts <= VY_UPSERT_THRESHOLD) ++n_upserts; vy_mem_tree_iterator_prev(&mem->tree, &mem_itr); @@ -3600,7 +3626,8 @@ vy_squash_process(struct vy_squash *squash) size_t mem_used_before = lsregion_used(&env->mem_env.allocator); struct tuple *region_stmt = NULL; int rc = vy_lsm_set(lsm, mem, result, ®ion_stmt); - tuple_unref(result); + tuple_unref(result.stmt); + result.stmt = region_stmt; size_t mem_used_after = lsregion_used(&env->mem_env.allocator); assert(mem_used_after >= mem_used_before); if (rc == 0) { @@ -3608,7 +3635,7 @@ vy_squash_process(struct vy_squash *squash) * We don't modify the resulting statement, * so there's no need in invalidating the cache. */ - vy_mem_commit_stmt(mem, region_stmt); + vy_mem_commit_stmt(mem, result); vy_quota_force_use(&env->quota, VY_QUOTA_CONSUMER_TX, mem_used_after - mem_used_before); vy_regulator_check_dump_watermark(&env->regulator); @@ -3669,13 +3696,13 @@ vy_squash_queue_f(va_list va) * statement after it. Done in a background fiber. */ static void -vy_squash_schedule(struct vy_lsm *lsm, struct tuple *stmt, void *arg) +vy_squash_schedule(struct vy_lsm *lsm, struct vy_entry entry, void *arg) { struct vy_env *env = arg; struct vy_squash_queue *sq = env->squash_queue; say_verbose("%s: schedule upsert optimization for %s", - vy_lsm_name(lsm), vy_stmt_str(stmt)); + vy_lsm_name(lsm), vy_stmt_str(entry.stmt)); /* Start the upsert squashing fiber on demand. */ if (sq->fiber == NULL) { @@ -3685,7 +3712,7 @@ vy_squash_schedule(struct vy_lsm *lsm, struct tuple *stmt, void *arg) fiber_start(sq->fiber, sq); } - struct vy_squash *squash = vy_squash_new(&sq->pool, env, lsm, stmt); + struct vy_squash *squash = vy_squash_new(&sq->pool, env, lsm, entry); if (squash == NULL) goto fail; @@ -3722,8 +3749,8 @@ vinyl_iterator_close(struct vinyl_iterator *it) vy_read_iterator_close(&it->iterator); vy_lsm_unref(it->lsm); it->lsm = NULL; - tuple_unref(it->key); - it->key = NULL; + tuple_unref(it->key.stmt); + it->key = vy_entry_none(); if (it->tx == &it->tx_autocommit) { /* * Rollback the automatic transaction. @@ -3774,15 +3801,17 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret) if (vinyl_iterator_check_tx(it) != 0) goto fail; - if (vy_read_iterator_next(&it->iterator, ret) != 0) + struct vy_entry entry; + if (vy_read_iterator_next(&it->iterator, &entry) != 0) goto fail; - vy_read_iterator_cache_add(&it->iterator, *ret); - if (*ret == NULL) { + vy_read_iterator_cache_add(&it->iterator, entry); + if (entry.stmt == NULL) { /* EOF. Close the iterator immediately. */ vinyl_iterator_close(it); } else { - tuple_bless(*ret); + tuple_bless(entry.stmt); } + *ret = entry.stmt; return 0; fail: vinyl_iterator_close(it); @@ -3795,18 +3824,18 @@ vinyl_iterator_secondary_next(struct iterator *base, struct tuple **ret) assert(base->next = vinyl_iterator_secondary_next); struct vinyl_iterator *it = (struct vinyl_iterator *)base; assert(it->lsm->index_id > 0); - struct tuple *tuple; + struct vy_entry partial, entry; next: if (vinyl_iterator_check_tx(it) != 0) goto fail; - if (vy_read_iterator_next(&it->iterator, &tuple) != 0) + if (vy_read_iterator_next(&it->iterator, &partial) != 0) goto fail; - if (tuple == NULL) { + if (partial.stmt == NULL) { /* EOF. Close the iterator immediately. */ - vy_read_iterator_cache_add(&it->iterator, NULL); + vy_read_iterator_cache_add(&it->iterator, vy_entry_none()); vinyl_iterator_close(it); *ret = NULL; return 0; @@ -3822,11 +3851,12 @@ next: /* Get the full tuple from the primary index. */ if (vy_get_by_secondary_tuple(it->lsm, it->tx, vy_tx_read_view(it->tx), - tuple, ret) != 0) + partial, &entry) != 0) goto fail; - if (*ret == NULL) + if (entry.stmt == NULL) goto next; - vy_read_iterator_cache_add(&it->iterator, *ret); + vy_read_iterator_cache_add(&it->iterator, entry); + *ret = entry.stmt; tuple_bless(*ret); tuple_unref(*ret); return 0; @@ -3870,8 +3900,9 @@ vinyl_index_create_iterator(struct index *base, enum iterator_type type, "mempool", "struct vinyl_iterator"); return NULL; } - it->key = vy_key_new(lsm->env->key_format, key, part_count); - if (it->key == NULL) { + it->key = vy_entry_key_new(lsm->env->key_format, lsm->cmp_def, + key, part_count); + if (it->key.stmt == NULL) { mempool_free(&env->iterator_pool, it); return NULL; } @@ -4027,9 +4058,12 @@ vy_build_insert_stmt(struct vy_lsm *lsm, struct vy_mem *mem, if (region_stmt == NULL) return -1; vy_stmt_set_lsn(region_stmt, lsn); - if (vy_mem_insert(mem, region_stmt) != 0) + struct vy_entry entry; + entry.stmt = region_stmt; + entry.hint = vy_stmt_hint(region_stmt, lsm->cmp_def); + if (vy_mem_insert(mem, entry) != 0) return -1; - vy_mem_commit_stmt(mem, region_stmt); + vy_mem_commit_stmt(mem, entry); vy_stmt_counter_acct_tuple(&lsm->stat.memory.count, region_stmt); return 0; } @@ -4108,8 +4142,9 @@ vy_build_insert_tuple(struct vy_env *env, struct vy_lsm *lsm, */ static int vy_build_recover_stmt(struct vy_lsm *lsm, struct vy_lsm *pk, - struct tuple *mem_stmt) + struct vy_entry mem_entry) { + struct tuple *mem_stmt = mem_entry.stmt; int64_t lsn = vy_stmt_lsn(mem_stmt); if (lsn <= lsm->dump_lsn) return 0; /* statement was dumped, nothing to do */ @@ -4117,8 +4152,8 @@ vy_build_recover_stmt(struct vy_lsm *lsm, struct vy_lsm *pk, /* Lookup the tuple that was affected by this statement. */ const struct vy_read_view rv = { .vlsn = lsn - 1 }; const struct vy_read_view *p_rv = &rv; - struct tuple *old_tuple; - if (vy_point_lookup(pk, NULL, &p_rv, mem_stmt, &old_tuple) != 0) + struct vy_entry old; + if (vy_point_lookup(pk, NULL, &p_rv, mem_entry, &old) != 0) return -1; /* * Create DELETE + INSERT statements corresponding to @@ -4126,6 +4161,7 @@ vy_build_recover_stmt(struct vy_lsm *lsm, struct vy_lsm *pk, */ struct tuple *delete = NULL; struct tuple *insert = NULL; + struct tuple *old_tuple = old.stmt; if (old_tuple != NULL) { delete = vy_stmt_extract_key(old_tuple, lsm->cmp_def, lsm->env->key_format); @@ -4187,9 +4223,9 @@ vy_build_recover_mem(struct vy_lsm *lsm, struct vy_lsm *pk, struct vy_mem *mem) struct vy_mem_tree_iterator itr; itr = vy_mem_tree_iterator_last(&mem->tree); while (!vy_mem_tree_iterator_is_invalid(&itr)) { - struct tuple *mem_stmt; - mem_stmt = *vy_mem_tree_iterator_get_elem(&mem->tree, &itr); - if (vy_build_recover_stmt(lsm, pk, mem_stmt) != 0) + struct vy_entry mem_entry; + mem_entry = *vy_mem_tree_iterator_get_elem(&mem->tree, &itr); + if (vy_build_recover_stmt(lsm, pk, mem_entry) != 0) return -1; vy_mem_tree_iterator_prev(&mem->tree, &itr); } @@ -4277,9 +4313,10 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index, &env->xm->p_committed_read_view); int rc; int loops = 0; - struct tuple *tuple; + struct vy_entry entry; int64_t build_lsn = env->xm->lsn; - while ((rc = vy_read_iterator_next(&itr, &tuple)) == 0) { + while ((rc = vy_read_iterator_next(&itr, &entry)) == 0) { + struct tuple *tuple = entry.stmt; if (tuple == NULL) break; /* @@ -4485,10 +4522,14 @@ vy_deferred_delete_on_replace(struct trigger *trigger, void *event) break; mem = lsm->mem; } - rc = vy_lsm_set(lsm, mem, delete, ®ion_stmt); + struct vy_entry entry; + entry.stmt = delete; + entry.hint = vy_stmt_hint(delete, lsm->cmp_def); + rc = vy_lsm_set(lsm, mem, entry, ®ion_stmt); if (rc != 0) break; - vy_lsm_commit_stmt(lsm, mem, region_stmt); + entry.stmt = region_stmt; + vy_lsm_commit_stmt(lsm, mem, entry); if (!is_first_statement) continue; diff --git a/src/box/vy_cache.c b/src/box/vy_cache.c index 79da73d7..be11b986 100644 --- a/src/box/vy_cache.c +++ b/src/box/vy_cache.c @@ -78,36 +78,37 @@ vy_cache_node_size(const struct vy_cache_node *node) * primary index tuples. */ if (node->cache->is_primary) - size += tuple_size(node->stmt); + size += tuple_size(node->entry.stmt); return size; } static struct vy_cache_node * vy_cache_node_new(struct vy_cache_env *env, struct vy_cache *cache, - struct tuple *stmt) + struct vy_entry entry) { struct vy_cache_node *node = mempool_alloc(&env->cache_node_mempool); if (node == NULL) return NULL; - tuple_ref(stmt); + tuple_ref(entry.stmt); node->cache = cache; - node->stmt = stmt; + node->entry = entry; node->flags = 0; node->left_boundary_level = cache->cmp_def->part_count; node->right_boundary_level = cache->cmp_def->part_count; rlist_add(&env->cache_lru, &node->in_lru); env->mem_used += vy_cache_node_size(node); - vy_stmt_counter_acct_tuple(&cache->stat.count, stmt); + vy_stmt_counter_acct_tuple(&cache->stat.count, entry.stmt); return node; } static void vy_cache_node_delete(struct vy_cache_env *env, struct vy_cache_node *node) { - vy_stmt_counter_unacct_tuple(&node->cache->stat.count, node->stmt); + vy_stmt_counter_unacct_tuple(&node->cache->stat.count, + node->entry.stmt); assert(env->mem_used >= vy_cache_node_size(node)); env->mem_used -= vy_cache_node_size(node); - tuple_unref(node->stmt); + tuple_unref(node->entry.stmt); rlist_del(&node->in_lru); TRASH(node); mempool_free(&env->cache_node_mempool, node); @@ -173,7 +174,7 @@ vy_cache_gc_step(struct vy_cache_env *env) if (node->flags & (VY_CACHE_LEFT_LINKED | VY_CACHE_RIGHT_LINKED)) { bool exact; struct vy_cache_tree_iterator itr = - vy_cache_tree_lower_bound(tree, node->stmt, &exact); + vy_cache_tree_lower_bound(tree, node->entry, &exact); assert(exact); if (node->flags & VY_CACHE_LEFT_LINKED) { struct vy_cache_tree_iterator prev = itr; @@ -194,7 +195,7 @@ vy_cache_gc_step(struct vy_cache_env *env) } } cache->version++; - vy_stmt_counter_acct_tuple(&cache->stat.evict, node->stmt); + vy_stmt_counter_acct_tuple(&cache->stat.evict, node->entry.stmt); vy_cache_tree_delete(&cache->cache_tree, node); vy_cache_node_delete(cache->env, node); } @@ -224,8 +225,8 @@ vy_cache_env_set_quota(struct vy_cache_env *env, size_t quota) } void -vy_cache_add(struct vy_cache *cache, struct tuple *stmt, - struct tuple *prev_stmt, struct tuple *key, +vy_cache_add(struct vy_cache *cache, struct vy_entry curr, + struct vy_entry prev, struct vy_entry key, enum iterator_type order) { if (cache->env->mem_quota == 0) { @@ -236,20 +237,20 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, /* Delete some entries if quota overused */ vy_cache_gc(cache->env); - if (stmt != NULL && vy_stmt_lsn(stmt) == INT64_MAX) { + if (curr.stmt != NULL && vy_stmt_lsn(curr.stmt) == INT64_MAX) { /* Do not store a statement from write set of a tx */ return; } /* The case of the first or the last result in key+order query */ - bool is_boundary = (stmt != NULL) != (prev_stmt != NULL); + bool is_boundary = (curr.stmt != NULL) != (prev.stmt != NULL); - if (prev_stmt != NULL && vy_stmt_lsn(prev_stmt) == INT64_MAX) { + if (prev.stmt != NULL && vy_stmt_lsn(prev.stmt) == INT64_MAX) { /* Previous statement is from tx write set, can't store it */ - prev_stmt = NULL; + prev = vy_entry_none(); } - if (prev_stmt == NULL && stmt == NULL) { + if (prev.stmt == NULL && curr.stmt == NULL) { /* Do not store empty ranges */ return; } @@ -260,48 +261,48 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, * in cache to be inserted. */ uint8_t boundary_level = cache->cmp_def->part_count; - if (stmt != NULL) { + if (curr.stmt != NULL) { if (is_boundary) { /** - * That means that the stmt is the first in a result. + * That means that the curr is the first in a result. * Regardless of order, the statement is the first in * sequence of statements that is equal to the key. */ - boundary_level = vy_stmt_key_part_count(key, + boundary_level = vy_stmt_key_part_count(key.stmt, cache->cmp_def); } } else { - assert(prev_stmt != NULL); + assert(prev.stmt != NULL); if (order == ITER_EQ || order == ITER_REQ) { /* that is the last statement that is equal to key */ - boundary_level = vy_stmt_key_part_count(key, + boundary_level = vy_stmt_key_part_count(key.stmt, cache->cmp_def); } else { /* that is the last statement */ boundary_level = 0; } /** - * That means that the search was ended, and prev_stmt was + * That means that the search was ended, and prev was * the last statement of the result. It is equivalent to * first found statement with a reverse order. Let's transform * to the equivalent case in order of further simplification. */ direction = -direction; - stmt = prev_stmt; - prev_stmt = NULL; + curr = prev; + prev = vy_entry_none(); } TRASH(&order); - assert(vy_stmt_type(stmt) == IPROTO_INSERT || - vy_stmt_type(stmt) == IPROTO_REPLACE); - assert(prev_stmt == NULL || - vy_stmt_type(prev_stmt) == IPROTO_INSERT || - vy_stmt_type(prev_stmt) == IPROTO_REPLACE); + assert(vy_stmt_type(curr.stmt) == IPROTO_INSERT || + vy_stmt_type(curr.stmt) == IPROTO_REPLACE); + assert(prev.stmt == NULL || + vy_stmt_type(prev.stmt) == IPROTO_INSERT || + vy_stmt_type(prev.stmt) == IPROTO_REPLACE); cache->version++; /* Insert/replace new node to the tree */ struct vy_cache_node *node = - vy_cache_node_new(cache->env, cache, stmt); + vy_cache_node_new(cache->env, cache, curr); if (node == NULL) { /* memory error, let's live without a cache */ return; @@ -326,10 +327,10 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, else if (direction < 0 && boundary_level < node->right_boundary_level) node->right_boundary_level = boundary_level; - vy_stmt_counter_acct_tuple(&cache->stat.put, stmt); + vy_stmt_counter_acct_tuple(&cache->stat.put, curr.stmt); /* Done if it's not a chain */ - if (prev_stmt == NULL) + if (prev.stmt == NULL) return; /* The flag that must be set in the inserted chain node */ @@ -338,21 +339,21 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, #ifndef NDEBUG /** - * Usually prev_stmt is already in the cache but there are cases + * Usually prev is already in the cache but there are cases * when it's not (see below). - * There must be no entries between (prev_stmt, stmt) interval in + * There must be no entries between (prev, curr) interval in * any case. (1) - * Farther, if the stmt node is already linked (in certain direction), - * it must be linked with prev_stmt (in that direction). (2) + * Farther, if the curr node is already linked (in certain direction), + * it must be linked with prev (in that direction). (2) * Let't check (1) and (2) for debug reasons. * - * There are two cases in which prev_stmt statement is absent + * There are two cases in which prev statement is absent * in the cache: * 1) The statement was in prepared state and then it was * committed or rollbacked. * 2) The node was popped out by vy_cache_gc. * - * Note that case when the prev_stmt is owerwritten by other TX + * Note that case when the prev is owerwritten by other TX * is impossible because this TX would be sent to read view and * wouldn't be able to add anything to the cache. */ @@ -366,17 +367,16 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, vy_cache_tree_iterator_get_elem(&cache->cache_tree, &inserted); assert(*prev_check_node != NULL); - struct tuple *prev_check_stmt = (*prev_check_node)->stmt; - int cmp = vy_stmt_compare(prev_stmt, prev_check_stmt, - cache->cmp_def); + struct vy_entry prev_check = (*prev_check_node)->entry; + int cmp = vy_entry_compare(prev, prev_check, cache->cmp_def); if (node->flags & flag) { - /* The found node must be exactly prev_stmt. (2) */ + /* The found node must be exactly prev. (2) */ assert(cmp == 0); } else { /* - * The found node must be exactly prev_stmt or lay - * farther than prev_stmt. (1) + * The found node must be exactly prev or lay + * farther than prev. (1) */ assert(cmp * direction >= 0); } @@ -391,7 +391,7 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, /* Insert/replace node with previous statement */ struct vy_cache_node *prev_node = - vy_cache_node_new(cache->env, cache, prev_stmt); + vy_cache_node_new(cache->env, cache, prev); if (prev_node == NULL) { /* memory error, let's live without a chain */ return; @@ -416,40 +416,40 @@ vy_cache_add(struct vy_cache *cache, struct tuple *stmt, VY_CACHE_RIGHT_LINKED) ^ flag; } -struct tuple * -vy_cache_get(struct vy_cache *cache, struct tuple *key) +struct vy_entry +vy_cache_get(struct vy_cache *cache, struct vy_entry key) { struct vy_cache_node **node = vy_cache_tree_find(&cache->cache_tree, key); if (node == NULL) - return NULL; - return (*node)->stmt; + return vy_entry_none(); + return (*node)->entry; } void -vy_cache_on_write(struct vy_cache *cache, struct tuple *stmt, - struct tuple **deleted) +vy_cache_on_write(struct vy_cache *cache, struct vy_entry entry, + struct vy_entry *deleted) { vy_cache_gc(cache->env); bool exact = false; struct vy_cache_tree_iterator itr; - itr = vy_cache_tree_lower_bound(&cache->cache_tree, stmt, &exact); + itr = vy_cache_tree_lower_bound(&cache->cache_tree, entry, &exact); struct vy_cache_node **node = vy_cache_tree_iterator_get_elem(&cache->cache_tree, &itr); assert(!exact || node != NULL); /* * There are three cases possible - * (1) there's a value in cache that is equal to stmt. + * (1) there's a value in cache that is equal to entry. * ('exact' == true, 'node' points the equal value in cache) - * (2) there's no value in cache that is equal to stmt, and lower_bound + * (2) there's no value in cache that is equal to entry, and lower_bound * returned the next record. * ('exact' == false, 'node' points to the equal value in cache) - * (3) there's no value in cache that is equal to stmt, and lower_bound + * (3) there's no value in cache that is equal to entry, and lower_bound * returned invalid iterator, so there's no bigger value. * ('exact' == false, 'node' == NULL) */ - if (vy_stmt_type(stmt) == IPROTO_DELETE && !exact) { + if (vy_stmt_type(entry.stmt) == IPROTO_DELETE && !exact) { /* there was nothing and there is nothing now */ return; } @@ -490,14 +490,14 @@ vy_cache_on_write(struct vy_cache *cache, struct tuple *stmt, assert(node != NULL); cache->version++; struct vy_cache_node *to_delete = *node; - assert(vy_stmt_type(to_delete->stmt) == IPROTO_INSERT || - vy_stmt_type(to_delete->stmt) == IPROTO_REPLACE); + assert(vy_stmt_type(to_delete->entry.stmt) == IPROTO_INSERT || + vy_stmt_type(to_delete->entry.stmt) == IPROTO_REPLACE); if (deleted != NULL) { - *deleted = to_delete->stmt; - tuple_ref(to_delete->stmt); + *deleted = to_delete->entry; + tuple_ref(to_delete->entry.stmt); } vy_stmt_counter_acct_tuple(&cache->stat.invalidate, - to_delete->stmt); + to_delete->entry.stmt); vy_cache_tree_delete(&cache->cache_tree, to_delete); vy_cache_node_delete(cache->env, to_delete); } @@ -506,13 +506,13 @@ vy_cache_on_write(struct vy_cache *cache, struct tuple *stmt, /** * Get a stmt by current position */ -static struct tuple * -vy_cache_iterator_curr_stmt(struct vy_cache_iterator *itr) +static struct vy_entry +vy_cache_iterator_curr(struct vy_cache_iterator *itr) { struct vy_cache_tree *tree = &itr->cache->cache_tree; struct vy_cache_node **node = vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos); - return node ? (*node)->stmt : NULL; + return node ? (*node)->entry : vy_entry_none(); } /** @@ -526,7 +526,7 @@ static inline bool vy_cache_iterator_is_stop(struct vy_cache_iterator *itr, struct vy_cache_node *node) { - uint8_t key_level = vy_stmt_key_part_count(itr->key, + uint8_t key_level = vy_stmt_key_part_count(itr->key.stmt, itr->cache->cmp_def); /* select{} is actually an EQ iterator with part_count == 0 */ bool iter_is_eq = itr->iterator_type == ITER_EQ || key_level == 0; @@ -556,7 +556,7 @@ static inline bool vy_cache_iterator_is_end_stop(struct vy_cache_iterator *itr, struct vy_cache_node *last_node) { - uint8_t key_level = vy_stmt_key_part_count(itr->key, + uint8_t key_level = vy_stmt_key_part_count(itr->key.stmt, itr->cache->cmp_def); /* select{} is actually an EQ iterator with part_count == 0 */ bool iter_is_eq = itr->iterator_type == ITER_EQ || key_level == 0; @@ -590,9 +590,9 @@ vy_cache_iterator_is_end_stop(struct vy_cache_iterator *itr, static inline bool vy_cache_iterator_step(struct vy_cache_iterator *itr) { - if (itr->curr_stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; + if (itr->curr.stmt != NULL) { + tuple_unref(itr->curr.stmt); + itr->curr = vy_entry_none(); } struct vy_cache_tree *tree = &itr->cache->cache_tree; struct vy_cache_node *prev_node = @@ -607,11 +607,11 @@ vy_cache_iterator_step(struct vy_cache_iterator *itr) *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos); if (itr->iterator_type == ITER_EQ && - vy_stmt_compare(itr->key, node->stmt, itr->cache->cmp_def)) { + vy_entry_compare(itr->key, node->entry, itr->cache->cmp_def)) { return vy_cache_iterator_is_end_stop(itr, prev_node); } - itr->curr_stmt = node->stmt; - tuple_ref(itr->curr_stmt); + itr->curr = node->entry; + tuple_ref(itr->curr.stmt); return vy_cache_iterator_is_stop(itr, node); } @@ -622,8 +622,8 @@ vy_cache_iterator_step(struct vy_cache_iterator *itr) static void vy_cache_iterator_skip_to_read_view(struct vy_cache_iterator *itr, bool *stop) { - while (itr->curr_stmt != NULL && - vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { + while (itr->curr.stmt != NULL && + vy_stmt_lsn(itr->curr.stmt) > (**itr->read_view).vlsn) { /* * The cache stores the latest tuple of the key, * but there could be older tuples in runs. @@ -643,26 +643,26 @@ vy_cache_iterator_skip_to_read_view(struct vy_cache_iterator *itr, bool *stop) * and hence the caller doesn't need to scan mems and runs. */ static bool -vy_cache_iterator_seek(struct vy_cache_iterator *itr, struct tuple *last_key) +vy_cache_iterator_seek(struct vy_cache_iterator *itr, struct vy_entry last) { struct vy_cache_tree *tree = &itr->cache->cache_tree; - if (itr->curr_stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; + if (itr->curr.stmt != NULL) { + tuple_unref(itr->curr.stmt); + itr->curr = vy_entry_none(); } itr->cache->stat.lookup++; - struct tuple *key = itr->key; + struct vy_entry key = itr->key; enum iterator_type iterator_type = itr->iterator_type; - if (last_key != NULL) { - key = last_key; + if (last.stmt != NULL) { + key = last; iterator_type = iterator_direction(itr->iterator_type) > 0 ? ITER_GT : ITER_LT; } bool exact; - if (!vy_stmt_is_empty_key(key)) { + if (!vy_stmt_is_empty_key(key.stmt)) { itr->curr_pos = iterator_type == ITER_EQ || iterator_type == ITER_GE || iterator_type == ITER_LT ? @@ -684,13 +684,13 @@ vy_cache_iterator_seek(struct vy_cache_iterator *itr, struct tuple *last_key) node = *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos); if (itr->iterator_type == ITER_EQ && - ((last_key == NULL && !exact) || - (last_key != NULL && vy_stmt_compare(itr->key, node->stmt, - itr->cache->cmp_def) != 0))) + ((last.stmt == NULL && !exact) || + (last.stmt != NULL && vy_entry_compare(itr->key, node->entry, + itr->cache->cmp_def) != 0))) return false; - itr->curr_stmt = node->stmt; - tuple_ref(itr->curr_stmt); + itr->curr = node->entry; + tuple_ref(itr->curr.stmt); return vy_cache_iterator_is_stop(itr, node); } @@ -701,60 +701,59 @@ vy_cache_iterator_next(struct vy_cache_iterator *itr, vy_history_cleanup(history); if (!itr->search_started) { - assert(itr->curr_stmt == NULL); + assert(itr->curr.stmt == NULL); itr->search_started = true; itr->version = itr->cache->version; - *stop = vy_cache_iterator_seek(itr, NULL); + *stop = vy_cache_iterator_seek(itr, vy_entry_none()); } else { assert(itr->version == itr->cache->version); - if (itr->curr_stmt == NULL) + if (itr->curr.stmt == NULL) return 0; *stop = vy_cache_iterator_step(itr); } vy_cache_iterator_skip_to_read_view(itr, stop); - if (itr->curr_stmt != NULL) { + if (itr->curr.stmt != NULL) { vy_stmt_counter_acct_tuple(&itr->cache->stat.get, - itr->curr_stmt); - return vy_history_append_stmt(history, itr->curr_stmt); + itr->curr.stmt); + return vy_history_append_stmt(history, itr->curr); } return 0; } NODISCARD int -vy_cache_iterator_skip(struct vy_cache_iterator *itr, struct tuple *last_stmt, +vy_cache_iterator_skip(struct vy_cache_iterator *itr, struct vy_entry last, struct vy_history *history, bool *stop) { assert(!itr->search_started || itr->version == itr->cache->version); /* * Check if the iterator is already positioned - * at the statement following last_stmt. + * at the statement following last. */ if (itr->search_started && - (itr->curr_stmt == NULL || last_stmt == NULL || + (itr->curr.stmt == NULL || last.stmt == NULL || iterator_direction(itr->iterator_type) * - vy_stmt_compare(itr->curr_stmt, last_stmt, - itr->cache->cmp_def) > 0)) + vy_entry_compare(itr->curr, last, itr->cache->cmp_def) > 0)) return 0; vy_history_cleanup(history); itr->search_started = true; itr->version = itr->cache->version; - *stop = vy_cache_iterator_seek(itr, last_stmt); + *stop = vy_cache_iterator_seek(itr, last); vy_cache_iterator_skip_to_read_view(itr, stop); - if (itr->curr_stmt != NULL) { + if (itr->curr.stmt != NULL) { vy_stmt_counter_acct_tuple(&itr->cache->stat.get, - itr->curr_stmt); - return vy_history_append_stmt(history, itr->curr_stmt); + itr->curr.stmt); + return vy_history_append_stmt(history, itr->curr); } return 0; } NODISCARD int -vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt, +vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct vy_entry last, struct vy_history *history, bool *stop) { if (!itr->search_started || itr->version == itr->cache->version) @@ -762,27 +761,27 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt bool pos_changed = false; itr->version = itr->cache->version; - if ((itr->curr_stmt == NULL && itr->iterator_type == ITER_EQ) || - (itr->curr_stmt != NULL && - itr->curr_stmt != vy_cache_iterator_curr_stmt(itr))) { + if ((itr->curr.stmt == NULL && itr->iterator_type == ITER_EQ) || + (itr->curr.stmt != NULL && + !vy_entry_is_equal(itr->curr, vy_cache_iterator_curr(itr)))) { /* * EQ search ended or the iterator was invalidated. * In either case the best we can do is restart the * search. */ - *stop = vy_cache_iterator_seek(itr, last_stmt); + *stop = vy_cache_iterator_seek(itr, last); vy_cache_iterator_skip_to_read_view(itr, stop); pos_changed = true; } else { /* * The iterator position is still valid, but new - * statements may have appeared between last_stmt + * statements may have appeared between last * and the current statement. Reposition to the - * statement closiest to last_stmt. + * statement closiest to last. */ bool key_belongs = false; - struct tuple *key = last_stmt; - if (key == NULL) { + struct vy_entry key = last; + if (key.stmt == NULL) { key = itr->key; key_belongs = (itr->iterator_type == ITER_EQ || itr->iterator_type == ITER_GE || @@ -792,7 +791,7 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt struct key_def *def = itr->cache->cmp_def; struct vy_cache_tree *tree = &itr->cache->cache_tree; struct vy_cache_tree_iterator pos = itr->curr_pos; - if (itr->curr_stmt == NULL) + if (itr->curr.stmt == NULL) pos = vy_cache_tree_invalid_iterator(); while (true) { if (dir > 0) @@ -803,15 +802,16 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt break; struct vy_cache_node *node = *vy_cache_tree_iterator_get_elem(tree, &pos); - int cmp = dir * vy_stmt_compare(node->stmt, key, def); + int cmp = dir * vy_entry_compare(node->entry, key, def); if (cmp < 0 || (cmp == 0 && !key_belongs)) break; - if (vy_stmt_lsn(node->stmt) <= (**itr->read_view).vlsn) { + if (vy_stmt_lsn(node->entry.stmt) <= + (**itr->read_view).vlsn) { itr->curr_pos = pos; - if (itr->curr_stmt != NULL) - tuple_unref(itr->curr_stmt); - itr->curr_stmt = node->stmt; - tuple_ref(itr->curr_stmt); + if (itr->curr.stmt != NULL) + tuple_unref(itr->curr.stmt); + itr->curr = node->entry; + tuple_ref(itr->curr.stmt); *stop = vy_cache_iterator_is_stop(itr, node); pos_changed = true; } @@ -823,10 +823,10 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt return 0; vy_history_cleanup(history); - if (itr->curr_stmt != NULL) { + if (itr->curr.stmt != NULL) { vy_stmt_counter_acct_tuple(&itr->cache->stat.get, - itr->curr_stmt); - if (vy_history_append_stmt(history, itr->curr_stmt) != 0) + itr->curr.stmt); + if (vy_history_append_stmt(history, itr->curr) != 0) return -1; } return 1; @@ -835,16 +835,14 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt void vy_cache_iterator_close(struct vy_cache_iterator *itr) { - if (itr->curr_stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; - } + if (itr->curr.stmt != NULL) + tuple_unref(itr->curr.stmt); TRASH(itr); } void vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache, - enum iterator_type iterator_type, struct tuple *key, + enum iterator_type iterator_type, struct vy_entry key, const struct vy_read_view **rv) { itr->cache = cache; @@ -852,7 +850,7 @@ vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache, itr->key = key; itr->read_view = rv; - itr->curr_stmt = NULL; + itr->curr = vy_entry_none(); itr->curr_pos = vy_cache_tree_invalid_iterator(); itr->version = 0; diff --git a/src/box/vy_cache.h b/src/box/vy_cache.h index c6233f42..a0429047 100644 --- a/src/box/vy_cache.h +++ b/src/box/vy_cache.h @@ -55,7 +55,7 @@ struct vy_cache_node { /* Cache */ struct vy_cache *cache; /* Statement in cache */ - struct tuple *stmt; + struct vy_entry entry; /* Link in LRU list */ struct rlist in_lru; /* VY_CACHE_LEFT_LINKED and/or VY_CACHE_RIGHT_LINKED, see @@ -74,17 +74,17 @@ static inline int vy_cache_tree_cmp(struct vy_cache_node *a, struct vy_cache_node *b, struct key_def *cmp_def) { - return vy_stmt_compare(a->stmt, b->stmt, cmp_def); + return vy_entry_compare(a->entry, b->entry, cmp_def); } /** * Internal comparator (2) for BPS tree. */ static inline int -vy_cache_tree_key_cmp(struct vy_cache_node *a, struct tuple *b, +vy_cache_tree_key_cmp(struct vy_cache_node *a, struct vy_entry b, struct key_def *cmp_def) { - return vy_stmt_compare(a->stmt, b, cmp_def); + return vy_entry_compare(a->entry, b, cmp_def); } #define VY_CACHE_TREE_EXTENT_SIZE (16 * 1024) @@ -95,7 +95,7 @@ vy_cache_tree_key_cmp(struct vy_cache_node *a, struct tuple *b, #define BPS_TREE_COMPARE(a, b, cmp_def) vy_cache_tree_cmp(a, b, cmp_def) #define BPS_TREE_COMPARE_KEY(a, b, cmp_def) vy_cache_tree_key_cmp(a, b, cmp_def) #define bps_tree_elem_t struct vy_cache_node * -#define bps_tree_key_t struct tuple * +#define bps_tree_key_t struct vy_entry #define bps_tree_arg_t struct key_def * #define BPS_TREE_NO_DEBUG @@ -193,35 +193,35 @@ vy_cache_destroy(struct vy_cache *cache); * Add a value to the cache. Can be used only if the reader read the latest * data (vlsn = INT64_MAX). * @param cache - pointer to tuple cache. - * @param stmt - statement that was recently read and should be added to the + * @param curr - statement that was recently read and should be added to the * cache. - * @param prev_stmt - previous statement that was read by the reader in one + * @param prev - previous statement that was read by the reader in one * sequence (by one iterator). * @param direction - direction in which the reader (iterator) observes data, * +1 - forward, -1 - backward. */ void -vy_cache_add(struct vy_cache *cache, struct tuple *stmt, - struct tuple *prev_stmt, struct tuple *key, +vy_cache_add(struct vy_cache *cache, struct vy_entry curr, + struct vy_entry prev, struct vy_entry key, enum iterator_type order); /** * Find value in cache. * @return A tuple equal to key or NULL if not found. */ -struct tuple * -vy_cache_get(struct vy_cache *cache, struct tuple *key); +struct vy_entry +vy_cache_get(struct vy_cache *cache, struct vy_entry key); /** * Invalidate possibly cached value due to its overwriting * @param cache - pointer to tuple cache. - * @param stmt - overwritten statement. + * @param entry - overwritten statement. * @param[out] deleted - If not NULL, then is set to deleted * statement. */ void -vy_cache_on_write(struct vy_cache *cache, struct tuple *stmt, - struct tuple **deleted); +vy_cache_on_write(struct vy_cache *cache, struct vy_entry entry, + struct vy_entry *deleted); /** @@ -237,8 +237,8 @@ struct vy_cache_iterator { * GE, LT to LE for beauty. */ enum iterator_type iterator_type; - /* Search key data in terms of vinyl, vy_stmt_compare argument */ - struct tuple *key; + /* Search key data in terms of vinyl, vy_entry_compare argument */ + struct vy_entry key; /* LSN visibility, iterator shows values with lsn <= vlsn */ const struct vy_read_view **read_view; @@ -246,7 +246,7 @@ struct vy_cache_iterator { /* Current position in tree */ struct vy_cache_tree_iterator curr_pos; /* stmt in current position in tree */ - struct tuple *curr_stmt; + struct vy_entry curr; /* Last version of cache */ uint32_t version; @@ -259,12 +259,12 @@ struct vy_cache_iterator { * @param itr - iterator to open. * @param cache - the cache. * @param iterator_type - iterator type (EQ, GT, GE, LT, LE or ALL) - * @param key - search key data in terms of vinyl, vy_stmt_compare argument + * @param key - search key data in terms of vinyl, vy_entry_compare argument * @param vlsn - LSN visibility, iterator shows values with lsn <= vlsn */ void vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache, - enum iterator_type iterator_type, struct tuple *key, + enum iterator_type iterator_type, struct vy_entry key, const struct vy_read_view **rv); /** @@ -280,22 +280,22 @@ vy_cache_iterator_next(struct vy_cache_iterator *itr, struct vy_history *history, bool *stop); /** - * Advance a cache iterator to the key following @last_stmt. + * Advance a cache iterator to the key following @last. * The key history is returned in @history (empty if EOF). * Returns 0 on success, -1 on memory allocation error. */ NODISCARD int -vy_cache_iterator_skip(struct vy_cache_iterator *itr, struct tuple *last_stmt, +vy_cache_iterator_skip(struct vy_cache_iterator *itr, struct vy_entry last, struct vy_history *history, bool *stop); /** * Check if a cache iterator was invalidated and needs to be restored. * If it does, set the iterator position to the first key following - * @last_stmt and return 1, otherwise return 0. Returns -1 on memory + * @last and return 1, otherwise return 0. Returns -1 on memory * allocation error. */ NODISCARD int -vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct tuple *last_stmt, +vy_cache_iterator_restore(struct vy_cache_iterator *itr, struct vy_entry last, struct vy_history *history, bool *stop); /** diff --git a/src/box/vy_history.c b/src/box/vy_history.c index b45c6d51..c3717b28 100644 --- a/src/box/vy_history.c +++ b/src/box/vy_history.c @@ -43,7 +43,7 @@ #include "vy_upsert.h" int -vy_history_append_stmt(struct vy_history *history, struct tuple *stmt) +vy_history_append_stmt(struct vy_history *history, struct vy_entry entry) { assert(history->pool->objsize == sizeof(struct vy_history_node)); struct vy_history_node *node = mempool_alloc(history->pool); @@ -52,10 +52,10 @@ vy_history_append_stmt(struct vy_history *history, struct tuple *stmt) "struct vy_history_node"); return -1; } - node->is_refable = vy_stmt_is_refable(stmt); + node->is_refable = vy_stmt_is_refable(entry.stmt); if (node->is_refable) - tuple_ref(stmt); - node->stmt = stmt; + tuple_ref(entry.stmt); + node->entry = entry; rlist_add_tail_entry(&history->stmts, node, link); return 0; } @@ -66,7 +66,7 @@ vy_history_cleanup(struct vy_history *history) struct vy_history_node *node, *tmp; rlist_foreach_entry_safe(node, &history->stmts, link, tmp) { if (node->is_refable) - tuple_unref(node->stmt); + tuple_unref(node->entry.stmt); mempool_free(history->pool, node); } rlist_create(&history->stmts); @@ -74,41 +74,45 @@ vy_history_cleanup(struct vy_history *history) int vy_history_apply(struct vy_history *history, struct key_def *cmp_def, - bool keep_delete, int *upserts_applied, struct tuple **ret) + bool keep_delete, int *upserts_applied, struct vy_entry *ret) { - *ret = NULL; + *ret = vy_entry_none(); *upserts_applied = 0; if (rlist_empty(&history->stmts)) return 0; - struct tuple *curr_stmt = NULL; + struct vy_entry curr = vy_entry_none(); struct vy_history_node *node = rlist_last_entry(&history->stmts, struct vy_history_node, link); if (vy_history_is_terminal(history)) { - if (!keep_delete && vy_stmt_type(node->stmt) == IPROTO_DELETE) { + if (!keep_delete && + vy_stmt_type(node->entry.stmt) == IPROTO_DELETE) { /* * Ignore terminal delete unless the caller * explicitly asked to keep it. */ } else if (!node->is_refable) { - curr_stmt = vy_stmt_dup(node->stmt); + curr.hint = node->entry.hint; + curr.stmt = vy_stmt_dup(node->entry.stmt); + if (curr.stmt == NULL) + return -1; } else { - curr_stmt = node->stmt; - tuple_ref(curr_stmt); + curr = node->entry; + tuple_ref(curr.stmt); } node = rlist_prev_entry_safe(node, &history->stmts, link); } while (node != NULL) { - struct tuple *stmt = vy_apply_upsert(node->stmt, curr_stmt, - cmp_def, true); + struct vy_entry entry = vy_entry_apply_upsert(node->entry, curr, + cmp_def, true); ++*upserts_applied; - if (curr_stmt != NULL) - tuple_unref(curr_stmt); - if (stmt == NULL) + if (curr.stmt != NULL) + tuple_unref(curr.stmt); + if (entry.stmt == NULL) return -1; - curr_stmt = stmt; + curr = entry; node = rlist_prev_entry_safe(node, &history->stmts, link); } - *ret = curr_stmt; + *ret = curr; return 0; } diff --git a/src/box/vy_history.h b/src/box/vy_history.h index 458ea749..b25c27f7 100644 --- a/src/box/vy_history.h +++ b/src/box/vy_history.h @@ -43,9 +43,6 @@ extern "C" { #endif /* defined(__cplusplus) */ struct mempool; -struct key_def; -struct tuple; -struct tuple_format; /** Key history. */ struct vy_history { @@ -63,7 +60,7 @@ struct vy_history_node { /** Link in a history list. */ struct rlist link; /** History statement. Referenced if @is_refable is set. */ - struct tuple *stmt; + struct vy_entry entry; /** * Set if the statement stored in this node is refable, * i.e. has a reference counter that can be incremented @@ -102,26 +99,26 @@ vy_history_is_terminal(struct vy_history *history) return false; struct vy_history_node *node = rlist_last_entry(&history->stmts, struct vy_history_node, link); - assert(vy_stmt_type(node->stmt) == IPROTO_REPLACE || - vy_stmt_type(node->stmt) == IPROTO_DELETE || - vy_stmt_type(node->stmt) == IPROTO_INSERT || - vy_stmt_type(node->stmt) == IPROTO_UPSERT); - return vy_stmt_type(node->stmt) != IPROTO_UPSERT; + assert(vy_stmt_type(node->entry.stmt) == IPROTO_REPLACE || + vy_stmt_type(node->entry.stmt) == IPROTO_DELETE || + vy_stmt_type(node->entry.stmt) == IPROTO_INSERT || + vy_stmt_type(node->entry.stmt) == IPROTO_UPSERT); + return vy_stmt_type(node->entry.stmt) != IPROTO_UPSERT; } /** * Return the last (newest, having max LSN) statement of the given * key history or NULL if the history is empty. */ -static inline struct tuple * +static inline struct vy_entry vy_history_last_stmt(struct vy_history *history) { if (rlist_empty(&history->stmts)) - return NULL; + return vy_entry_none(); /* Newest statement is at the head of the list. */ struct vy_history_node *node = rlist_first_entry(&history->stmts, struct vy_history_node, link); - return node->stmt; + return node->entry; } /** @@ -139,7 +136,7 @@ vy_history_splice(struct vy_history *dst, struct vy_history *src) * Returns 0 on success, -1 on memory allocation error. */ int -vy_history_append_stmt(struct vy_history *history, struct tuple *stmt); +vy_history_append_stmt(struct vy_history *history, struct vy_entry entry); /** * Release all statements stored in the given history and @@ -155,7 +152,7 @@ vy_history_cleanup(struct vy_history *history); */ int vy_history_apply(struct vy_history *history, struct key_def *cmp_def, - bool keep_delete, int *upserts_applied, struct tuple **ret); + bool keep_delete, int *upserts_applied, struct vy_entry *ret); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 07c4a929..e6557d7f 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -74,8 +74,9 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path, vy_upsert_thresh_cb upsert_thresh_cb, void *upsert_thresh_arg) { - env->empty_key = vy_key_new(key_format, NULL, 0); - if (env->empty_key == NULL) + env->empty_key.hint = HINT_NONE; + env->empty_key.stmt = vy_key_new(key_format, NULL, 0); + if (env->empty_key.stmt == NULL) return -1; env->path = path; env->p_generation = p_generation; @@ -93,7 +94,7 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path, void vy_lsm_env_destroy(struct vy_lsm_env *env) { - tuple_unref(env->empty_key); + tuple_unref(env->empty_key.stmt); tuple_format_unref(env->key_format); mempool_destroy(&env->history_node_pool); } @@ -325,8 +326,8 @@ vy_lsm_create(struct vy_lsm *lsm) int64_t id = vy_log_next_id(); /* Create the initial range. */ - struct vy_range *range = vy_range_new(vy_log_next_id(), NULL, NULL, - lsm->cmp_def); + struct vy_range *range = vy_range_new(vy_log_next_id(), vy_entry_none(), + vy_entry_none(), lsm->cmp_def); if (range == NULL) return -1; assert(lsm->range_count == 0); @@ -365,8 +366,8 @@ vy_lsm_recover_run(struct vy_lsm *lsm, struct vy_run_recovery_info *run_info, run->dump_lsn = run_info->dump_lsn; run->dump_count = run_info->dump_count; - if (vy_run_recover(run, lsm->env->path, - lsm->space_id, lsm->index_id) != 0 && + if (vy_run_recover(run, lsm->env->path, lsm->space_id, lsm->index_id, + lsm->cmp_def) != 0 && (!force_recovery || vy_run_rebuild_index(run, lsm->env->path, lsm->space_id, lsm->index_id, @@ -395,24 +396,27 @@ vy_lsm_recover_slice(struct vy_lsm *lsm, struct vy_range *range, struct vy_slice_recovery_info *slice_info, struct vy_run_env *run_env, bool force_recovery) { - struct tuple *begin = NULL, *end = NULL; + struct vy_entry begin = vy_entry_none(); + struct vy_entry end = vy_entry_none(); struct vy_slice *slice = NULL; struct vy_run *run; if (slice_info->begin != NULL) { - begin = vy_key_from_msgpack(lsm->env->key_format, - slice_info->begin); - if (begin == NULL) + begin = vy_entry_key_from_msgpack(lsm->env->key_format, + lsm->cmp_def, + slice_info->begin); + if (begin.stmt == NULL) goto out; } if (slice_info->end != NULL) { - end = vy_key_from_msgpack(lsm->env->key_format, - slice_info->end); - if (end == NULL) + end = vy_entry_key_from_msgpack(lsm->env->key_format, + lsm->cmp_def, + slice_info->end); + if (end.stmt == NULL) goto out; } - if (begin != NULL && end != NULL && - vy_stmt_compare(begin, end, lsm->cmp_def) >= 0) { + if (begin.stmt != NULL && end.stmt != NULL && + vy_entry_compare(begin, end, lsm->cmp_def) >= 0) { diag_set(ClientError, ER_INVALID_VYLOG_FILE, tt_sprintf("begin >= end for slice %lld", (long long)slice_info->id)); @@ -430,10 +434,10 @@ vy_lsm_recover_slice(struct vy_lsm *lsm, struct vy_range *range, vy_range_add_slice(range, slice); out: - if (begin != NULL) - tuple_unref(begin); - if (end != NULL) - tuple_unref(end); + if (begin.stmt != NULL) + tuple_unref(begin.stmt); + if (end.stmt != NULL) + tuple_unref(end.stmt); return slice; } @@ -442,23 +446,26 @@ vy_lsm_recover_range(struct vy_lsm *lsm, struct vy_range_recovery_info *range_info, struct vy_run_env *run_env, bool force_recovery) { - struct tuple *begin = NULL, *end = NULL; + struct vy_entry begin = vy_entry_none(); + struct vy_entry end = vy_entry_none(); struct vy_range *range = NULL; if (range_info->begin != NULL) { - begin = vy_key_from_msgpack(lsm->env->key_format, - range_info->begin); - if (begin == NULL) + begin = vy_entry_key_from_msgpack(lsm->env->key_format, + lsm->cmp_def, + range_info->begin); + if (begin.stmt == NULL) goto out; } if (range_info->end != NULL) { - end = vy_key_from_msgpack(lsm->env->key_format, - range_info->end); - if (end == NULL) + end = vy_entry_key_from_msgpack(lsm->env->key_format, + lsm->cmp_def, + range_info->end); + if (end.stmt == NULL) goto out; } - if (begin != NULL && end != NULL && - vy_stmt_compare(begin, end, lsm->cmp_def) >= 0) { + if (begin.stmt != NULL && end.stmt != NULL && + vy_entry_compare(begin, end, lsm->cmp_def) >= 0) { diag_set(ClientError, ER_INVALID_VYLOG_FILE, tt_sprintf("begin >= end for range %lld", (long long)range_info->id)); @@ -485,10 +492,10 @@ vy_lsm_recover_range(struct vy_lsm *lsm, } vy_lsm_add_range(lsm, range); out: - if (begin != NULL) - tuple_unref(begin); - if (end != NULL) - tuple_unref(end); + if (begin.stmt != NULL) + tuple_unref(begin.stmt); + if (end.stmt != NULL) + tuple_unref(end.stmt); return range; } @@ -584,8 +591,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, * We need range tree initialized for all LSM trees, * even for dropped ones. */ - struct vy_range *range = vy_range_new(vy_log_next_id(), - NULL, NULL, lsm->cmp_def); + struct vy_range *range; + range = vy_range_new(vy_log_next_id(), vy_entry_none(), + vy_entry_none(), lsm->cmp_def); if (range == NULL) return -1; vy_lsm_add_range(lsm, range); @@ -629,7 +637,7 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, struct vy_range *range, *prev = NULL; for (range = vy_range_tree_first(&lsm->range_tree); range != NULL; prev = range, range = vy_range_tree_next(&lsm->range_tree, range)) { - if (prev == NULL && range->begin != NULL) { + if (prev == NULL && range->begin.stmt != NULL) { diag_set(ClientError, ER_INVALID_VYLOG_FILE, tt_sprintf("Range %lld is leftmost but " "starts with a finite key", @@ -638,9 +646,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, } int cmp = 0; if (prev != NULL && - (prev->end == NULL || range->begin == NULL || - (cmp = vy_stmt_compare(prev->end, range->begin, - lsm->cmp_def)) != 0)) { + (prev->end.stmt == NULL || range->begin.stmt == NULL || + (cmp = vy_entry_compare(prev->end, range->begin, + lsm->cmp_def)) != 0)) { const char *errmsg = cmp > 0 ? "Nearby ranges %lld and %lld overlap" : "Keys between ranges %lld and %lld not spanned"; @@ -659,7 +667,7 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, (long long)lsm->id)); return -1; } - if (prev->end != NULL) { + if (prev->end.stmt != NULL) { diag_set(ClientError, ER_INVALID_VYLOG_FILE, tt_sprintf("Range %lld is rightmost but " "ends with a finite key", @@ -873,11 +881,11 @@ vy_lsm_delete_mem(struct vy_lsm *lsm, struct vy_mem *mem) int vy_lsm_set(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt, struct tuple **region_stmt) + struct vy_entry entry, struct tuple **region_stmt) { - uint32_t format_id = stmt->format_id; + uint32_t format_id = entry.stmt->format_id; - assert(vy_stmt_is_refable(stmt)); + assert(vy_stmt_is_refable(entry.stmt)); assert(*region_stmt == NULL || !vy_stmt_is_refable(*region_stmt)); /* @@ -889,25 +897,27 @@ vy_lsm_set(struct vy_lsm *lsm, struct vy_mem *mem, * while other LSM trees still use the old space format. */ if (*region_stmt == NULL || (*region_stmt)->format_id != format_id) { - *region_stmt = vy_stmt_dup_lsregion(stmt, &mem->env->allocator, + *region_stmt = vy_stmt_dup_lsregion(entry.stmt, + &mem->env->allocator, mem->generation); if (*region_stmt == NULL) return -1; } + entry.stmt = *region_stmt; /* We can't free region_stmt below, so let's add it to the stats */ - lsm->stat.memory.count.bytes += tuple_size(stmt); + lsm->stat.memory.count.bytes += tuple_size(entry.stmt); /* Abort transaction if format was changed by DDL */ - if (!vy_stmt_is_key(stmt) && + if (!vy_stmt_is_key(entry.stmt) && format_id != tuple_format_id(mem->format)) { diag_set(ClientError, ER_TRANSACTION_CONFLICT); return -1; } if (vy_stmt_type(*region_stmt) != IPROTO_UPSERT) - return vy_mem_insert(mem, *region_stmt); + return vy_mem_insert(mem, entry); else - return vy_mem_insert_upsert(mem, *region_stmt); + return vy_mem_insert_upsert(mem, entry); } /** @@ -918,23 +928,23 @@ vy_lsm_set(struct vy_lsm *lsm, struct vy_mem *mem, * * @param lsm LSM tree the statement was committed to. * @param mem In-memory tree where the statement was saved. - * @param stmt UPSERT statement to squash. + * @param entry UPSERT statement to squash. */ static void vy_lsm_commit_upsert(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt) + struct vy_entry entry) { - assert(vy_stmt_type(stmt) == IPROTO_UPSERT); - assert(vy_stmt_lsn(stmt) < MAX_LSN); + assert(vy_stmt_type(entry.stmt) == IPROTO_UPSERT); + assert(vy_stmt_lsn(entry.stmt) < MAX_LSN); /* * UPSERT is enabled only for the spaces with the single * index. */ assert(lsm->index_id == 0); - struct tuple *older; - int64_t lsn = vy_stmt_lsn(stmt); - uint8_t n_upserts = vy_stmt_n_upserts(stmt); + struct vy_entry older; + int64_t lsn = vy_stmt_lsn(entry.stmt); + uint8_t n_upserts = vy_stmt_n_upserts(entry.stmt); /* * If there are a lot of successive upserts for the same key, * select might take too long to squash them all. So once the @@ -959,20 +969,23 @@ vy_lsm_commit_upsert(struct vy_lsm *lsm, struct vy_mem *mem, * one-key continous UPSERTs sequence. */ #ifndef NDEBUG - older = vy_mem_older_lsn(mem, stmt); - assert(older != NULL && vy_stmt_type(older) == IPROTO_UPSERT && - vy_stmt_n_upserts(older) == VY_UPSERT_THRESHOLD - 1); + older = vy_mem_older_lsn(mem, entry); + assert(older.stmt != NULL && + vy_stmt_type(older.stmt) == IPROTO_UPSERT && + vy_stmt_n_upserts(older.stmt) == VY_UPSERT_THRESHOLD - 1); #endif if (lsm->env->upsert_thresh_cb == NULL) { /* Squash callback is not installed. */ return; } - struct tuple *dup = vy_stmt_dup(stmt); - if (dup != NULL) { + struct vy_entry dup; + dup.hint = entry.hint; + dup.stmt = vy_stmt_dup(entry.stmt); + if (dup.stmt != NULL) { lsm->env->upsert_thresh_cb(lsm, dup, lsm->env->upsert_thresh_arg); - tuple_unref(dup); + tuple_unref(dup.stmt); } /* * Ignore dup == NULL, because the optimization is @@ -988,18 +1001,20 @@ vy_lsm_commit_upsert(struct vy_lsm *lsm, struct vy_mem *mem, if (n_upserts == 0 && lsm->stat.memory.count.rows == lsm->mem->count.rows && lsm->run_count == 0) { - older = vy_mem_older_lsn(mem, stmt); - assert(older == NULL || vy_stmt_type(older) != IPROTO_UPSERT); - struct tuple *upserted = - vy_apply_upsert(stmt, older, lsm->cmp_def, false); + older = vy_mem_older_lsn(mem, entry); + assert(older.stmt == NULL || + vy_stmt_type(older.stmt) != IPROTO_UPSERT); + struct vy_entry upserted; + upserted = vy_entry_apply_upsert(entry, older, + lsm->cmp_def, false); lsm->stat.upsert.applied++; - if (upserted == NULL) { + if (upserted.stmt == NULL) { /* OOM */ diag_clear(diag_get()); return; } - int64_t upserted_lsn = vy_stmt_lsn(upserted); + int64_t upserted_lsn = vy_stmt_lsn(upserted.stmt); if (upserted_lsn != lsn) { /** * This could only happen if the upsert completely @@ -1007,20 +1022,22 @@ vy_lsm_commit_upsert(struct vy_lsm *lsm, struct vy_mem *mem, * In this case we shouldn't insert the same replace * again. */ - assert(older == NULL || - upserted_lsn == vy_stmt_lsn(older)); - tuple_unref(upserted); + assert(older.stmt == NULL || + upserted_lsn == vy_stmt_lsn(older.stmt)); + tuple_unref(upserted.stmt); return; } - assert(older == NULL || upserted_lsn != vy_stmt_lsn(older)); - assert(vy_stmt_type(upserted) == IPROTO_REPLACE); + assert(older.stmt == NULL || + upserted_lsn != vy_stmt_lsn(older.stmt)); + assert(vy_stmt_type(upserted.stmt) == IPROTO_REPLACE); struct tuple *region_stmt = - vy_stmt_dup_lsregion(upserted, &mem->env->allocator, + vy_stmt_dup_lsregion(upserted.stmt, + &mem->env->allocator, mem->generation); if (region_stmt == NULL) { /* OOM */ - tuple_unref(upserted); + tuple_unref(upserted.stmt); diag_clear(diag_get()); return; } @@ -1032,37 +1049,38 @@ vy_lsm_commit_upsert(struct vy_lsm *lsm, struct vy_mem *mem, * vy_lsm_set() cannot fail. */ assert(rc == 0); (void)rc; - tuple_unref(upserted); - vy_mem_commit_stmt(mem, region_stmt); + tuple_unref(upserted.stmt); + upserted.stmt = region_stmt; + vy_mem_commit_stmt(mem, upserted); lsm->stat.upsert.squashed++; } } void vy_lsm_commit_stmt(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt) + struct vy_entry entry) { - vy_mem_commit_stmt(mem, stmt); + vy_mem_commit_stmt(mem, entry); lsm->stat.memory.count.rows++; - if (vy_stmt_type(stmt) == IPROTO_UPSERT) - vy_lsm_commit_upsert(lsm, mem, stmt); + if (vy_stmt_type(entry.stmt) == IPROTO_UPSERT) + vy_lsm_commit_upsert(lsm, mem, entry); - vy_stmt_counter_acct_tuple(&lsm->stat.put, stmt); + vy_stmt_counter_acct_tuple(&lsm->stat.put, entry.stmt); /* Invalidate cache element. */ - vy_cache_on_write(&lsm->cache, stmt, NULL); + vy_cache_on_write(&lsm->cache, entry, NULL); } void vy_lsm_rollback_stmt(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt) + struct vy_entry entry) { - vy_mem_rollback_stmt(mem, stmt); + vy_mem_rollback_stmt(mem, entry); /* Invalidate cache element. */ - vy_cache_on_write(&lsm->cache, stmt, NULL); + vy_cache_on_write(&lsm->cache, entry, NULL); } int @@ -1071,20 +1089,20 @@ vy_lsm_find_range_intersection(struct vy_lsm *lsm, struct vy_range **begin, struct vy_range **end) { struct tuple_format *key_format = lsm->env->key_format; - struct tuple *stmt; + struct vy_entry entry; - stmt = vy_key_from_msgpack(key_format, min_key); - if (stmt == NULL) + entry = vy_entry_key_from_msgpack(key_format, lsm->cmp_def, min_key); + if (entry.stmt == NULL) return -1; - *begin = vy_range_tree_psearch(&lsm->range_tree, stmt); - tuple_unref(stmt); + *begin = vy_range_tree_psearch(&lsm->range_tree, entry); + tuple_unref(entry.stmt); - stmt = vy_key_from_msgpack(key_format, max_key); - if (stmt == NULL) + entry = vy_entry_key_from_msgpack(key_format, lsm->cmp_def, max_key); + if (entry.stmt == NULL) return -1; - *end = vy_range_tree_psearch(&lsm->range_tree, stmt); + *end = vy_range_tree_psearch(&lsm->range_tree, entry); *end = vy_range_tree_next(&lsm->range_tree, *end); - tuple_unref(stmt); + tuple_unref(entry.stmt); return 0; } @@ -1105,12 +1123,13 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range) /* * Determine new ranges' boundaries. */ - struct tuple *split_key = vy_key_from_msgpack(key_format, - split_key_raw); - if (split_key == NULL) + struct vy_entry split_key; + split_key = vy_entry_key_from_msgpack(key_format, lsm->cmp_def, + split_key_raw); + if (split_key.stmt == NULL) goto fail; - struct tuple *keys[3]; + struct vy_entry keys[3]; keys[0] = range->begin; keys[1] = split_key; keys[2] = range->end; @@ -1155,12 +1174,12 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range) for (int i = 0; i < n_parts; i++) { part = parts[i]; vy_log_insert_range(lsm->id, part->id, - tuple_data_or_null(part->begin), - tuple_data_or_null(part->end)); + tuple_data_or_null(part->begin.stmt), + tuple_data_or_null(part->end.stmt)); rlist_foreach_entry(slice, &part->slices, in_range) vy_log_insert_slice(part->id, slice->run->id, slice->id, - tuple_data_or_null(slice->begin), - tuple_data_or_null(slice->end)); + tuple_data_or_null(slice->begin.stmt), + tuple_data_or_null(slice->end.stmt)); } if (vy_log_tx_commit() < 0) goto fail; @@ -1179,20 +1198,20 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range) lsm->range_tree_version++; say_info("%s: split range %s by key %s", vy_lsm_name(lsm), - vy_range_str(range), tuple_str(split_key)); + vy_range_str(range), tuple_str(split_key.stmt)); rlist_foreach_entry(slice, &range->slices, in_range) vy_slice_wait_pinned(slice); vy_range_delete(range); - tuple_unref(split_key); + tuple_unref(split_key.stmt); return true; fail: for (int i = 0; i < n_parts; i++) { if (parts[i] != NULL) vy_range_delete(parts[i]); } - if (split_key != NULL) - tuple_unref(split_key); + if (split_key.stmt != NULL) + tuple_unref(split_key.stmt); diag_log(); say_error("%s: failed to split range %s", @@ -1221,8 +1240,8 @@ vy_lsm_coalesce_range(struct vy_lsm *lsm, struct vy_range *range) */ vy_log_tx_begin(); vy_log_insert_range(lsm->id, result->id, - tuple_data_or_null(result->begin), - tuple_data_or_null(result->end)); + tuple_data_or_null(result->begin.stmt), + tuple_data_or_null(result->end.stmt)); for (it = first; it != end; it = vy_range_tree_next(&lsm->range_tree, it)) { struct vy_slice *slice; @@ -1231,8 +1250,8 @@ vy_lsm_coalesce_range(struct vy_lsm *lsm, struct vy_range *range) vy_log_delete_range(it->id); rlist_foreach_entry(slice, &it->slices, in_range) { vy_log_insert_slice(result->id, slice->run->id, slice->id, - tuple_data_or_null(slice->begin), - tuple_data_or_null(slice->end)); + tuple_data_or_null(slice->begin.stmt), + tuple_data_or_null(slice->end.stmt)); } } if (vy_log_tx_commit() < 0) diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h index b61a4a52..0a8e9100 100644 --- a/src/box/vy_lsm.h +++ b/src/box/vy_lsm.h @@ -40,6 +40,7 @@ #include "index_def.h" #define HEAP_FORWARD_DECLARATION #include "salad/heap.h" +#include "vy_entry.h" #include "vy_cache.h" #include "vy_range.h" #include "vy_stat.h" @@ -61,7 +62,7 @@ struct vy_run; struct vy_run_env; typedef void -(*vy_upsert_thresh_cb)(struct vy_lsm *lsm, struct tuple *stmt, void *arg); +(*vy_upsert_thresh_cb)(struct vy_lsm *lsm, struct vy_entry entry, void *arg); /** Common LSM tree environment. */ struct vy_lsm_env { @@ -72,7 +73,7 @@ struct vy_lsm_env { /** Tuple format for keys (SELECT). */ struct tuple_format *key_format; /** Key (SELECT) with no parts. */ - struct tuple *empty_key; + struct vy_entry empty_key; /** * If read of a single statement takes longer than * the given value, warn about it in the log. @@ -589,7 +590,7 @@ vy_lsm_force_compaction(struct vy_lsm *lsm); * * @param lsm LSM tree the statement is for. * @param mem In-memory tree to insert the statement into. - * @param stmt Statement, allocated on malloc(). + * @param entry Statement, allocated on malloc(). * @param region_stmt NULL or the same statement, allocated on * lsregion. * @@ -598,7 +599,7 @@ vy_lsm_force_compaction(struct vy_lsm *lsm); */ int vy_lsm_set(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt, struct tuple **region_stmt); + struct vy_entry entry, struct tuple **region_stmt); /** * Confirm that the statement stays in the in-memory index of @@ -606,22 +607,22 @@ vy_lsm_set(struct vy_lsm *lsm, struct vy_mem *mem, * * @param lsm LSM tree the statement is for. * @param mem In-memory tree where the statement was saved. - * @param stmt Statement allocated from lsregion. + * @param entry Statement allocated from lsregion. */ void vy_lsm_commit_stmt(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt); + struct vy_entry entry); /** * Erase a statement from the in-memory index of an LSM tree. * * @param lsm LSM tree to erase from. * @param mem In-memory tree where the statement was saved. - * @param stmt Statement allocated from lsregion. + * @param entry Statement allocated from lsregion. */ void vy_lsm_rollback_stmt(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt); + struct vy_entry entry); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c index ad74e419..9513d5b8 100644 --- a/src/box/vy_mem.c +++ b/src/box/vy_mem.c @@ -131,43 +131,44 @@ vy_mem_delete(struct vy_mem *index) free(index); } -struct tuple * -vy_mem_older_lsn(struct vy_mem *mem, struct tuple *stmt) +struct vy_entry +vy_mem_older_lsn(struct vy_mem *mem, struct vy_entry entry) { struct vy_mem_tree_key tree_key; - tree_key.stmt = stmt; - tree_key.lsn = vy_stmt_lsn(stmt) - 1; + tree_key.entry = entry; + tree_key.lsn = vy_stmt_lsn(entry.stmt) - 1; bool exact = false; struct vy_mem_tree_iterator itr = vy_mem_tree_lower_bound(&mem->tree, &tree_key, &exact); if (vy_mem_tree_iterator_is_invalid(&itr)) - return NULL; + return vy_entry_none(); - struct tuple *result; + struct vy_entry result; result = *vy_mem_tree_iterator_get_elem(&mem->tree, &itr); - if (vy_stmt_compare(result, stmt, mem->cmp_def) != 0) - return NULL; + if (vy_entry_compare(result, entry, mem->cmp_def) != 0) + return vy_entry_none(); return result; } int -vy_mem_insert_upsert(struct vy_mem *mem, struct tuple *stmt) +vy_mem_insert_upsert(struct vy_mem *mem, struct vy_entry entry) { - assert(vy_stmt_type(stmt) == IPROTO_UPSERT); + assert(vy_stmt_type(entry.stmt) == IPROTO_UPSERT); /* Check if the statement can be inserted in the vy_mem. */ - assert(stmt->format_id == tuple_format_id(mem->format)); + assert(entry.stmt->format_id == tuple_format_id(mem->format)); /* The statement must be from a lsregion. */ - assert(!vy_stmt_is_refable(stmt)); - size_t size = tuple_size(stmt); - struct tuple *replaced_stmt = NULL; + assert(!vy_stmt_is_refable(entry.stmt)); + size_t size = tuple_size(entry.stmt); + struct vy_entry replaced = vy_entry_none(); struct vy_mem_tree_iterator inserted; - if (vy_mem_tree_insert_get_iterator(&mem->tree, stmt, &replaced_stmt, + if (vy_mem_tree_insert_get_iterator(&mem->tree, entry, &replaced, &inserted) != 0) return -1; assert(! vy_mem_tree_iterator_is_invalid(&inserted)); - assert(*vy_mem_tree_iterator_get_elem(&mem->tree, &inserted) == stmt); - if (replaced_stmt == NULL) + assert(vy_entry_is_equal(entry, + *vy_mem_tree_iterator_get_elem(&mem->tree, &inserted))); + if (replaced.stmt == NULL) mem->count.rows++; mem->count.bytes += size; /* @@ -192,12 +193,12 @@ vy_mem_insert_upsert(struct vy_mem *mem, struct tuple *stmt) * UPSERTs subsequence. */ vy_mem_tree_iterator_next(&mem->tree, &inserted); - struct tuple **older = vy_mem_tree_iterator_get_elem(&mem->tree, - &inserted); - if (older == NULL || vy_stmt_type(*older) != IPROTO_UPSERT || - vy_stmt_compare(stmt, *older, mem->cmp_def) != 0) + struct vy_entry *older = vy_mem_tree_iterator_get_elem(&mem->tree, + &inserted); + if (older == NULL || vy_stmt_type(older->stmt) != IPROTO_UPSERT || + vy_entry_compare(entry, *older, mem->cmp_def) != 0) return 0; - uint8_t n_upserts = vy_stmt_n_upserts(*older); + uint8_t n_upserts = vy_stmt_n_upserts(older->stmt); /* * Stop increment if the threshold is reached to avoid * creation of multiple squashing tasks. @@ -206,24 +207,24 @@ vy_mem_insert_upsert(struct vy_mem *mem, struct tuple *stmt) n_upserts++; else assert(n_upserts == VY_UPSERT_INF); - vy_stmt_set_n_upserts(stmt, n_upserts); + vy_stmt_set_n_upserts(entry.stmt, n_upserts); return 0; } int -vy_mem_insert(struct vy_mem *mem, struct tuple *stmt) +vy_mem_insert(struct vy_mem *mem, struct vy_entry entry) { - assert(vy_stmt_type(stmt) != IPROTO_UPSERT); + assert(vy_stmt_type(entry.stmt) != IPROTO_UPSERT); /* Check if the statement can be inserted in the vy_mem. */ - assert(vy_stmt_is_key(stmt) || - stmt->format_id == tuple_format_id(mem->format)); + assert(vy_stmt_is_key(entry.stmt) || + entry.stmt->format_id == tuple_format_id(mem->format)); /* The statement must be from a lsregion. */ - assert(!vy_stmt_is_refable(stmt)); - size_t size = tuple_size(stmt); - struct tuple *replaced_stmt = NULL; - if (vy_mem_tree_insert(&mem->tree, stmt, &replaced_stmt)) + assert(!vy_stmt_is_refable(entry.stmt)); + size_t size = tuple_size(entry.stmt); + struct vy_entry replaced = vy_entry_none(); + if (vy_mem_tree_insert(&mem->tree, entry, &replaced)) return -1; - if (replaced_stmt == NULL) + if (replaced.stmt == NULL) mem->count.rows++; mem->count.bytes += size; /* @@ -235,11 +236,11 @@ vy_mem_insert(struct vy_mem *mem, struct tuple *stmt) } void -vy_mem_commit_stmt(struct vy_mem *mem, struct tuple *stmt) +vy_mem_commit_stmt(struct vy_mem *mem, struct vy_entry entry) { /* The statement must be from a lsregion. */ - assert(!vy_stmt_is_refable(stmt)); - int64_t lsn = vy_stmt_lsn(stmt); + assert(!vy_stmt_is_refable(entry.stmt)); + int64_t lsn = vy_stmt_lsn(entry.stmt); /* * Normally statement LSN grows monotonically, * but not in case of building an index on an @@ -257,11 +258,11 @@ vy_mem_commit_stmt(struct vy_mem *mem, struct tuple *stmt) } void -vy_mem_rollback_stmt(struct vy_mem *mem, struct tuple *stmt) +vy_mem_rollback_stmt(struct vy_mem *mem, struct vy_entry entry) { /* This is the statement we've inserted before. */ - assert(!vy_stmt_is_refable(stmt)); - int rc = vy_mem_tree_delete(&mem->tree, stmt); + assert(!vy_stmt_is_refable(entry.stmt)); + int rc = vy_mem_tree_delete(&mem->tree, entry); assert(rc == 0); (void) rc; /* We can't free memory in case of rollback. */ @@ -287,8 +288,8 @@ vy_mem_iterator_step(struct vy_mem_iterator *itr) vy_mem_tree_iterator_next(&itr->mem->tree, &itr->curr_pos); if (vy_mem_tree_iterator_is_invalid(&itr->curr_pos)) return 1; - itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos); + itr->curr = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); return 0; } @@ -305,15 +306,16 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) { /* Skip to the first statement visible in the read view. */ assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); - assert(itr->curr_stmt == *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos)); + assert(vy_entry_is_equal(itr->curr, + *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos))); struct key_def *cmp_def = itr->mem->cmp_def; - while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn || - vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { + while (vy_stmt_lsn(itr->curr.stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(itr->curr.stmt) & VY_STMT_SKIP_READ) { if (vy_mem_iterator_step(itr) != 0 || (itr->iterator_type == ITER_EQ && - vy_stmt_compare(itr->key, itr->curr_stmt, cmp_def))) { - itr->curr_stmt = NULL; + vy_entry_compare(itr->key, itr->curr, cmp_def))) { + itr->curr = vy_entry_none(); return 1; } } @@ -330,10 +332,10 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) /* No more statements. */ return 0; } - struct tuple *prev_stmt; - prev_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &prev_pos); - if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn || - vy_stmt_compare(itr->curr_stmt, prev_stmt, cmp_def) != 0) { + struct vy_entry prev; + prev = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &prev_pos); + if (vy_stmt_lsn(prev.stmt) > (**itr->read_view).vlsn || + vy_entry_compare(itr->curr, prev, cmp_def) != 0) { /* * The next statement is either invisible in * the read view or for another key. @@ -348,20 +350,20 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) * pretty cheap anyway. */ struct vy_mem_tree_key tree_key; - tree_key.stmt = itr->curr_stmt; + tree_key.entry = itr->curr; tree_key.lsn = (**itr->read_view).vlsn; itr->curr_pos = vy_mem_tree_lower_bound(&itr->mem->tree, &tree_key, NULL); assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); - itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos); + itr->curr = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); /* Skip VY_STMT_SKIP_READ statements, if any. */ - while (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { + while (vy_stmt_flags(itr->curr.stmt) & VY_STMT_SKIP_READ) { vy_mem_tree_iterator_next(&itr->mem->tree, &itr->curr_pos); assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); - itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos); + itr->curr = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); } return 0; } @@ -375,27 +377,27 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) * @retval 1 Not found */ static int -vy_mem_iterator_seek(struct vy_mem_iterator *itr, struct tuple *last_key) +vy_mem_iterator_seek(struct vy_mem_iterator *itr, struct vy_entry last) { itr->stat->lookup++; itr->search_started = true; itr->version = itr->mem->version; - itr->curr_stmt = NULL; + itr->curr = vy_entry_none(); - struct tuple *key = itr->key; + struct vy_entry key = itr->key; enum iterator_type iterator_type = itr->iterator_type; - if (last_key != NULL) { - key = last_key; + if (last.stmt != NULL) { + key = last; iterator_type = iterator_direction(itr->iterator_type) > 0 ? ITER_GT : ITER_LT; } bool exact; struct vy_mem_tree_key tree_key; - tree_key.stmt = key; + tree_key.entry = key; /* (lsn == INT64_MAX - 1) means that lsn is ignored in comparison */ tree_key.lsn = INT64_MAX - 1; - if (!vy_stmt_is_empty_key(key)) { + if (!vy_stmt_is_empty_key(key.stmt)) { if (iterator_type == ITER_LE || iterator_type == ITER_GT) { itr->curr_pos = vy_mem_tree_upper_bound(&itr->mem->tree, @@ -419,13 +421,13 @@ vy_mem_iterator_seek(struct vy_mem_iterator *itr, struct tuple *last_key) vy_mem_tree_iterator_prev(&itr->mem->tree, &itr->curr_pos); if (vy_mem_tree_iterator_is_invalid(&itr->curr_pos)) return 1; - itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos); + itr->curr = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); if (itr->iterator_type == ITER_EQ && - ((last_key == NULL && !exact) || - (last_key != NULL && vy_stmt_compare(itr->key, itr->curr_stmt, - itr->mem->cmp_def) != 0))) { - itr->curr_stmt = NULL; + ((last.stmt == NULL && !exact) || + (last.stmt != NULL && + vy_entry_compare(itr->key, itr->curr, itr->mem->cmp_def) != 0))) { + itr->curr = vy_entry_none(); return 1; } return vy_mem_iterator_find_lsn(itr); @@ -438,11 +440,11 @@ vy_mem_iterator_seek(struct vy_mem_iterator *itr, struct tuple *last_key) void vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *stat, struct vy_mem *mem, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv) + struct vy_entry key, const struct vy_read_view **rv) { itr->stat = stat; - assert(key != NULL); + assert(key.stmt != NULL); itr->mem = mem; itr->iterator_type = iterator_type; @@ -450,7 +452,7 @@ vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *s itr->read_view = rv; itr->curr_pos = vy_mem_tree_invalid_iterator(); - itr->curr_stmt = NULL; + itr->curr = vy_entry_none(); itr->search_started = false; } @@ -464,18 +466,19 @@ static NODISCARD int vy_mem_iterator_next_key(struct vy_mem_iterator *itr) { if (!itr->search_started) - return vy_mem_iterator_seek(itr, NULL); - if (!itr->curr_stmt) /* End of search. */ + return vy_mem_iterator_seek(itr, vy_entry_none()); + if (itr->curr.stmt == NULL) /* End of search. */ return 1; assert(itr->mem->version == itr->version); assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); - assert(itr->curr_stmt == *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos)); + assert(vy_entry_is_equal(itr->curr, + *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos))); struct key_def *cmp_def = itr->mem->cmp_def; - struct tuple *prev_stmt = itr->curr_stmt; + struct vy_entry prev = itr->curr; if (vy_mem_iterator_step(itr) != 0) { - itr->curr_stmt = NULL; + itr->curr = vy_entry_none(); return 1; } /* @@ -484,12 +487,12 @@ vy_mem_iterator_next_key(struct vy_mem_iterator *itr) * for this key so instead of iterating further we simply * look up the next key - it's pretty cheap anyway. */ - if (vy_stmt_compare(prev_stmt, itr->curr_stmt, cmp_def) == 0) - return vy_mem_iterator_seek(itr, itr->curr_stmt); + if (vy_entry_compare(prev, itr->curr, cmp_def) == 0) + return vy_mem_iterator_seek(itr, itr->curr); if (itr->iterator_type == ITER_EQ && - vy_stmt_compare(itr->key, itr->curr_stmt, cmp_def) != 0) { - itr->curr_stmt = NULL; + vy_entry_compare(itr->key, itr->curr, cmp_def) != 0) { + itr->curr = vy_entry_none(); return 1; } return vy_mem_iterator_find_lsn(itr); @@ -504,12 +507,13 @@ static NODISCARD int vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr) { assert(itr->search_started); - if (!itr->curr_stmt) /* End of search. */ + if (itr->curr.stmt == NULL) /* End of search. */ return 1; assert(itr->mem->version == itr->version); assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); - assert(itr->curr_stmt == *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &itr->curr_pos)); + assert(vy_entry_is_equal(itr->curr, + *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos))); struct key_def *cmp_def = itr->mem->cmp_def; struct vy_mem_tree_iterator next_pos = itr->curr_pos; @@ -518,14 +522,14 @@ next: if (vy_mem_tree_iterator_is_invalid(&next_pos)) return 1; /* EOF */ - struct tuple *next_stmt; - next_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &next_pos); - if (vy_stmt_compare(itr->curr_stmt, next_stmt, cmp_def) != 0) + struct vy_entry next; + next = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &next_pos); + if (vy_entry_compare(itr->curr, next, cmp_def) != 0) return 1; itr->curr_pos = next_pos; - itr->curr_stmt = next_stmt; - if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) + itr->curr = next; + if (vy_stmt_flags(itr->curr.stmt) & VY_STMT_SKIP_READ) goto next; return 0; } @@ -540,9 +544,8 @@ vy_mem_iterator_get_history(struct vy_mem_iterator *itr, struct vy_history *history) { do { - struct tuple *stmt = itr->curr_stmt; - vy_stmt_counter_acct_tuple(&itr->stat->get, stmt); - if (vy_history_append_stmt(history, stmt) != 0) + vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr.stmt); + if (vy_history_append_stmt(history, itr->curr) != 0) return -1; if (vy_history_is_terminal(history)) break; @@ -561,38 +564,38 @@ vy_mem_iterator_next(struct vy_mem_iterator *itr, } NODISCARD int -vy_mem_iterator_skip(struct vy_mem_iterator *itr, struct tuple *last_stmt, +vy_mem_iterator_skip(struct vy_mem_iterator *itr, struct vy_entry last, struct vy_history *history) { assert(!itr->search_started || itr->version == itr->mem->version); /* * Check if the iterator is already positioned - * at the statement following last_stmt. + * at the statement following last. */ if (itr->search_started && - (itr->curr_stmt == NULL || last_stmt == NULL || + (itr->curr.stmt == NULL || last.stmt == NULL || iterator_direction(itr->iterator_type) * - vy_stmt_compare(itr->curr_stmt, last_stmt, itr->mem->cmp_def) > 0)) + vy_entry_compare(itr->curr, last, itr->mem->cmp_def) > 0)) return 0; vy_history_cleanup(history); - if (vy_mem_iterator_seek(itr, last_stmt) == 0) + if (vy_mem_iterator_seek(itr, last) == 0) return vy_mem_iterator_get_history(itr, history); return 0; } NODISCARD int -vy_mem_iterator_restore(struct vy_mem_iterator *itr, struct tuple *last_stmt, +vy_mem_iterator_restore(struct vy_mem_iterator *itr, struct vy_entry last, struct vy_history *history) { if (!itr->search_started || itr->version == itr->mem->version) return 0; - vy_mem_iterator_seek(itr, last_stmt); + vy_mem_iterator_seek(itr, last); vy_history_cleanup(history); - if (itr->curr_stmt != NULL && + if (itr->curr.stmt != NULL && vy_mem_iterator_get_history(itr, history) != 0) return -1; return 1; @@ -605,16 +608,16 @@ vy_mem_iterator_close(struct vy_mem_iterator *itr) } static NODISCARD int -vy_mem_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret) +vy_mem_stream_next(struct vy_stmt_stream *virt_stream, struct vy_entry *ret) { assert(virt_stream->iface->next == vy_mem_stream_next); struct vy_mem_stream *stream = (struct vy_mem_stream *)virt_stream; - struct tuple **res = (struct tuple **) + struct vy_entry *res = vy_mem_tree_iterator_get_elem(&stream->mem->tree, &stream->curr_pos); if (res == NULL) { - *ret = NULL; + *ret = vy_entry_none(); } else { *ret = *res; vy_mem_tree_iterator_next(&stream->mem->tree, diff --git a/src/box/vy_mem.h b/src/box/vy_mem.h index ba9e76ab..7df9a181 100644 --- a/src/box/vy_mem.h +++ b/src/box/vy_mem.h @@ -79,7 +79,7 @@ vy_mem_env_destroy(struct vy_mem_env *env); /** @cond false */ struct vy_mem_tree_key { - struct tuple *stmt; + struct vy_entry entry; int64_t lsn; }; @@ -87,13 +87,13 @@ struct vy_mem_tree_key { * Internal. Extracted to speed up BPS tree. */ static int -vy_mem_tree_cmp(struct tuple *a, struct tuple *b, +vy_mem_tree_cmp(struct vy_entry a, struct vy_entry b, struct key_def *cmp_def) { - int res = vy_stmt_compare(a, b, cmp_def); + int res = vy_entry_compare(a, b, cmp_def); if (res) return res; - int64_t a_lsn = vy_stmt_lsn(a), b_lsn = vy_stmt_lsn(b); + int64_t a_lsn = vy_stmt_lsn(a.stmt), b_lsn = vy_stmt_lsn(b.stmt); return a_lsn > b_lsn ? -1 : a_lsn < b_lsn; } @@ -101,14 +101,14 @@ vy_mem_tree_cmp(struct tuple *a, struct tuple *b, * Internal. Extracted to speed up BPS tree. */ static int -vy_mem_tree_cmp_key(struct tuple *a, struct vy_mem_tree_key *key, +vy_mem_tree_cmp_key(struct vy_entry entry, struct vy_mem_tree_key *key, struct key_def *cmp_def) { - int res = vy_stmt_compare(a, key->stmt, cmp_def); + int res = vy_entry_compare(entry, key->entry, cmp_def); if (res == 0) { if (key->lsn == INT64_MAX - 1) return 0; - int64_t a_lsn = vy_stmt_lsn(a); + int64_t a_lsn = vy_stmt_lsn(entry.stmt); res = a_lsn > key->lsn ? -1 : a_lsn < key->lsn; } return res; @@ -121,7 +121,7 @@ vy_mem_tree_cmp_key(struct tuple *a, struct vy_mem_tree_key *key, #define BPS_TREE_EXTENT_SIZE VY_MEM_TREE_EXTENT_SIZE #define BPS_TREE_COMPARE(a, b, cmp_def) vy_mem_tree_cmp(a, b, cmp_def) #define BPS_TREE_COMPARE_KEY(a, b, cmp_def) vy_mem_tree_cmp_key(a, b, cmp_def) -#define bps_tree_elem_t struct tuple * +#define bps_tree_elem_t struct vy_entry #define bps_tree_key_t struct vy_mem_tree_key * #define bps_tree_arg_t struct key_def * #define BPS_TREE_NO_DEBUG @@ -275,47 +275,47 @@ vy_mem_delete(struct vy_mem *index); /* * Return the older statement for the given one. */ -struct tuple * -vy_mem_older_lsn(struct vy_mem *mem, struct tuple *stmt); +struct vy_entry +vy_mem_older_lsn(struct vy_mem *mem, struct vy_entry entry); /** * Insert a statement into the in-memory level. * @param mem vy_mem. - * @param stmt Vinyl statement. + * @param entry Vinyl statement. * * @retval 0 Success. * @retval -1 Memory error. */ int -vy_mem_insert(struct vy_mem *mem, struct tuple *stmt); +vy_mem_insert(struct vy_mem *mem, struct vy_entry entry); /** * Insert an upsert statement into the mem. * * @param mem Mem to insert to. - * @param stmt Upsert statement to insert. + * @param entry Upsert statement to insert. * * @retval 0 Success. * @retval -1 Memory error. */ int -vy_mem_insert_upsert(struct vy_mem *mem, struct tuple *stmt); +vy_mem_insert_upsert(struct vy_mem *mem, struct vy_entry entry); /** * Confirm insertion of a statement into the in-memory level. * @param mem vy_mem. - * @param stmt Vinyl statement. + * @param entry Vinyl statement. */ void -vy_mem_commit_stmt(struct vy_mem *mem, struct tuple *stmt); +vy_mem_commit_stmt(struct vy_mem *mem, struct vy_entry entry); /** * Remove a statement from the in-memory level. * @param mem vy_mem. - * @param stmt Vinyl statement. + * @param entry Vinyl statement. */ void -vy_mem_rollback_stmt(struct vy_mem *mem, struct tuple *stmt); +vy_mem_rollback_stmt(struct vy_mem *mem, struct vy_entry entry); /** * Iterator for in-memory level. @@ -345,7 +345,7 @@ struct vy_mem_iterator { */ enum iterator_type iterator_type; /** Key to search. */ - struct tuple *key; + struct vy_entry key; /* LSN visibility, iterator shows values with lsn <= than that */ const struct vy_read_view **read_view; @@ -354,11 +354,11 @@ struct vy_mem_iterator { struct vy_mem_tree_iterator curr_pos; /* * The pointer on a region allocated statement from vy_mem BPS tree. - * There is no guarantee that curr_pos points on curr_stmt in the tree. - * For example, cur_pos can be invalid but curr_stmt can point on a + * There is no guarantee that curr_pos points on curr in the tree. + * For example, cur_pos can be invalid but curr can point on a * valid statement. */ - struct tuple *curr_stmt; + struct vy_entry curr; /* data version from vy_mem */ uint32_t version; @@ -372,7 +372,7 @@ struct vy_mem_iterator { void vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *stat, struct vy_mem *mem, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv); + struct vy_entry key, const struct vy_read_view **rv); /** * Advance a mem iterator to the next key. @@ -384,24 +384,22 @@ vy_mem_iterator_next(struct vy_mem_iterator *itr, struct vy_history *history); /** - * Advance a mem iterator to the key following @last_stmt. + * Advance a mem iterator to the key following @last. * The key history is returned in @history (empty if EOF). * Returns 0 on success, -1 on memory allocation error. */ NODISCARD int -vy_mem_iterator_skip(struct vy_mem_iterator *itr, - struct tuple *last_stmt, +vy_mem_iterator_skip(struct vy_mem_iterator *itr, struct vy_entry last, struct vy_history *history); /** * Check if a mem iterator was invalidated and needs to be restored. * If it does, set the iterator position to the newest statement for - * the key following @last_stmt and return 1, otherwise return 0. + * the key following @last and return 1, otherwise return 0. * Returns -1 on memory allocation error. */ NODISCARD int -vy_mem_iterator_restore(struct vy_mem_iterator *itr, - struct tuple *last_stmt, +vy_mem_iterator_restore(struct vy_mem_iterator *itr, struct vy_entry last, struct vy_history *history); /** diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index 9e1f3ca7..39aad41f 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -53,7 +53,7 @@ */ static int vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx, - struct tuple *key, struct vy_history *history) + struct vy_entry key, struct vy_history *history) { if (tx == NULL) return 0; @@ -64,8 +64,8 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx, if (txv == NULL) return 0; vy_stmt_counter_acct_tuple(&lsm->stat.txw.iterator.get, - txv->stmt); - return vy_history_append_stmt(history, txv->stmt); + txv->entry.stmt); + return vy_history_append_stmt(history, txv->entry); } /** @@ -74,16 +74,16 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx, */ static int vy_point_lookup_scan_cache(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct vy_history *history) + struct vy_entry key, struct vy_history *history) { lsm->cache.stat.lookup++; - struct tuple *stmt = vy_cache_get(&lsm->cache, key); + struct vy_entry entry = vy_cache_get(&lsm->cache, key); - if (stmt == NULL || vy_stmt_lsn(stmt) > (*rv)->vlsn) + if (entry.stmt == NULL || vy_stmt_lsn(entry.stmt) > (*rv)->vlsn) return 0; - vy_stmt_counter_acct_tuple(&lsm->cache.stat.get, stmt); - return vy_history_append_stmt(history, stmt); + vy_stmt_counter_acct_tuple(&lsm->cache.stat.get, entry.stmt); + return vy_history_append_stmt(history, entry); } /** @@ -93,7 +93,7 @@ vy_point_lookup_scan_cache(struct vy_lsm *lsm, const struct vy_read_view **rv, static int vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem, const struct vy_read_view **rv, - struct tuple *key, struct vy_history *history) + struct vy_entry key, struct vy_history *history) { struct vy_mem_iterator mem_itr; vy_mem_iterator_open(&mem_itr, &lsm->stat.memory.iterator, @@ -113,7 +113,7 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem, */ static int vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct vy_history *history) + struct vy_entry key, struct vy_history *history) { assert(lsm->mem != NULL); int rc = vy_point_lookup_scan_mem(lsm, lsm->mem, rv, key, history); @@ -133,7 +133,7 @@ vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv, */ static int vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice, - const struct vy_read_view **rv, struct tuple *key, + const struct vy_read_view **rv, struct vy_entry key, struct vy_history *history) { /* @@ -161,7 +161,7 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice, */ static int vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct vy_history *history) + struct vy_entry key, struct vy_history *history) { struct vy_range *range = vy_range_tree_find_by_key(&lsm->range_tree, ITER_EQ, key); @@ -194,13 +194,13 @@ vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv, int vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, - struct tuple *key, struct tuple **ret) + struct vy_entry key, struct vy_entry *ret) { /* All key parts must be set for a point lookup. */ - assert(vy_stmt_is_full_key(key, lsm->cmp_def)); + assert(vy_stmt_is_full_key(key.stmt, lsm->cmp_def)); assert(tx == NULL || tx->state == VINYL_TX_READY); - *ret = NULL; + *ret = vy_entry_none(); double start_time = ev_monotonic_now(loop()); int rc = 0; @@ -295,8 +295,8 @@ done: if (rc != 0) return -1; - if (*ret != NULL) - vy_stmt_counter_acct_tuple(&lsm->stat.get, *ret); + if (ret->stmt != NULL) + vy_stmt_counter_acct_tuple(&lsm->stat.get, ret->stmt); double latency = ev_monotonic_now(loop()) - start_time; latency_collect(&lsm->stat.latency, latency); @@ -304,17 +304,17 @@ done: if (latency > lsm->env->too_long_threshold) { say_warn_ratelimited("%s: get(%s) => %s " "took too long: %.3f sec", - vy_lsm_name(lsm), tuple_str(key), - vy_stmt_str(*ret), latency); + vy_lsm_name(lsm), tuple_str(key.stmt), + vy_stmt_str(ret->stmt), latency); } return 0; } int vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct tuple **ret) + struct vy_entry key, struct vy_entry *ret) { - assert(vy_stmt_is_full_key(key, lsm->cmp_def)); + assert(vy_stmt_is_full_key(key.stmt, lsm->cmp_def)); int rc; struct vy_history history; @@ -328,7 +328,7 @@ vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv, if (rc != 0 || vy_history_is_terminal(&history)) goto done; - *ret = NULL; + *ret = vy_entry_none(); goto out; done: if (rc == 0) { diff --git a/src/box/vy_point_lookup.h b/src/box/vy_point_lookup.h index 6d77ce9c..b4092ee2 100644 --- a/src/box/vy_point_lookup.h +++ b/src/box/vy_point_lookup.h @@ -45,7 +45,7 @@ * and, if the result is the latest version of the key, adds it to cache. */ -#include +#include "vy_entry.h" #if defined(__cplusplus) extern "C" { @@ -54,7 +54,6 @@ extern "C" { struct vy_lsm; struct vy_tx; struct vy_read_view; -struct tuple; /** * Given a key that has all index parts (including primary index @@ -69,7 +68,7 @@ struct tuple; int vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, - struct tuple *key, struct tuple **ret); + struct vy_entry key, struct vy_entry *ret); /** * Look up a tuple by key in memory. @@ -87,7 +86,7 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, */ int vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct tuple **ret); + struct vy_entry key, struct vy_entry *ret); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/vy_range.c b/src/box/vy_range.c index 12573436..3c36d345 100644 --- a/src/box/vy_range.c +++ b/src/box/vy_range.c @@ -57,31 +57,31 @@ vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b) return 0; /* Any key > -inf. */ - if (range_a->begin == NULL) + if (range_a->begin.stmt == NULL) return -1; - if (range_b->begin == NULL) + if (range_b->begin.stmt == NULL) return 1; assert(range_a->cmp_def == range_b->cmp_def); - return vy_stmt_compare(range_a->begin, range_b->begin, - range_a->cmp_def); + return vy_entry_compare(range_a->begin, range_b->begin, + range_a->cmp_def); } int -vy_range_tree_key_cmp(struct tuple *stmt, struct vy_range *range) +vy_range_tree_key_cmp(struct vy_entry entry, struct vy_range *range) { /* Any key > -inf. */ - if (range->begin == NULL) + if (range->begin.stmt == NULL) return 1; - return vy_stmt_compare(stmt, range->begin, range->cmp_def); + return vy_entry_compare(entry, range->begin, range->cmp_def); } struct vy_range * vy_range_tree_find_by_key(vy_range_tree_t *tree, enum iterator_type iterator_type, - struct tuple *key) + struct vy_entry key) { - if (vy_stmt_is_empty_key(key)) { + if (vy_stmt_is_empty_key(key.stmt)) { switch (iterator_type) { case ITER_LT: case ITER_LE: @@ -123,9 +123,9 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, */ range = vy_range_tree_psearch(tree, key); /* switch to previous for case (4) */ - if (range != NULL && range->begin != NULL && - !vy_stmt_is_full_key(key, range->cmp_def) && - vy_stmt_compare(key, range->begin, range->cmp_def) == 0) + if (range != NULL && range->begin.stmt != NULL && + !vy_stmt_is_full_key(key.stmt, range->cmp_def) && + vy_entry_compare(key, range->begin, range->cmp_def) == 0) range = vy_range_tree_prev(tree, range); /* for case 5 or subcase of case 4 */ if (range == NULL) @@ -158,9 +158,9 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, range = vy_range_tree_nsearch(tree, key); if (range != NULL) { /* fix curr_range for cases 2 and 3 */ - if (range->begin != NULL && - vy_stmt_compare(key, range->begin, - range->cmp_def) != 0) { + if (range->begin.stmt != NULL && + vy_entry_compare(key, range->begin, + range->cmp_def) != 0) { struct vy_range *prev; prev = vy_range_tree_prev(tree, range); if (prev != NULL) @@ -175,7 +175,7 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, } struct vy_range * -vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, +vy_range_new(int64_t id, struct vy_entry begin, struct vy_entry end, struct key_def *cmp_def) { struct vy_range *range = calloc(1, sizeof(*range)); @@ -185,14 +185,12 @@ vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, return NULL; } range->id = id; - if (begin != NULL) { - tuple_ref(begin); - range->begin = begin; - } - if (end != NULL) { - tuple_ref(end); - range->end = end; - } + range->begin = begin; + if (begin.stmt != NULL) + tuple_ref(begin.stmt); + range->end = end; + if (end.stmt != NULL) + tuple_ref(end.stmt); range->cmp_def = cmp_def; rlist_create(&range->slices); heap_node_create(&range->heap_node); @@ -202,10 +200,10 @@ vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, void vy_range_delete(struct vy_range *range) { - if (range->begin != NULL) - tuple_unref(range->begin); - if (range->end != NULL) - tuple_unref(range->end); + if (range->begin.stmt != NULL) + tuple_unref(range->begin.stmt); + if (range->end.stmt != NULL) + tuple_unref(range->end.stmt); struct vy_slice *slice, *next_slice; rlist_foreach_entry_safe(slice, &range->slices, in_range, next_slice) @@ -220,13 +218,13 @@ vy_range_snprint(char *buf, int size, const struct vy_range *range) { int total = 0; SNPRINT(total, snprintf, buf, size, "("); - if (range->begin != NULL) - SNPRINT(total, tuple_snprint, buf, size, range->begin); + if (range->begin.stmt != NULL) + SNPRINT(total, tuple_snprint, buf, size, range->begin.stmt); else SNPRINT(total, snprintf, buf, size, "-inf"); SNPRINT(total, snprintf, buf, size, ".."); - if (range->end != NULL) - SNPRINT(total, tuple_snprint, buf, size, range->end); + if (range->end.stmt != NULL) + SNPRINT(total, tuple_snprint, buf, size, range->end.stmt); else SNPRINT(total, snprintf, buf, size, "inf"); SNPRINT(total, snprintf, buf, size, ")"); @@ -499,16 +497,19 @@ vy_range_needs_split(struct vy_range *range, int64_t range_size, * * In such cases there's no point in splitting the range. */ - if (slice->begin != NULL && key_compare(mid_page->min_key, - tuple_data(slice->begin), range->cmp_def) <= 0) + if (slice->begin.stmt != NULL && + vy_entry_compare_with_raw_key(slice->begin, mid_page->min_key, + mid_page->min_key_hint, + range->cmp_def) >= 0) return false; /* * The median key can't be >= the end of the slice as we * take the min key of a page for the median key. */ - assert(slice->end == NULL || key_compare(mid_page->min_key, - tuple_data(slice->end), range->cmp_def) < 0); - + assert(slice->end.stmt == NULL || + vy_entry_compare_with_raw_key(slice->end, mid_page->min_key, + mid_page->min_key_hint, + range->cmp_def) > 0); *p_split_key = mid_page->min_key; return true; } diff --git a/src/box/vy_range.h b/src/box/vy_range.h index 0f1a50fc..2eb843b3 100644 --- a/src/box/vy_range.h +++ b/src/box/vy_range.h @@ -42,6 +42,7 @@ #define HEAP_FORWARD_DECLARATION #include "salad/heap.h" #include "trivia/util.h" +#include "vy_entry.h" #include "vy_stat.h" #if defined(__cplusplus) @@ -50,7 +51,6 @@ extern "C" { struct index_opts; struct key_def; -struct tuple; struct vy_slice; /** @@ -64,9 +64,9 @@ struct vy_range { * Both 'begin' and 'end' statements have SELECT type with * the full idexed key. */ - struct tuple *begin; + struct vy_entry begin; /** Range upper bound. NULL if range is rightmost. */ - struct tuple *end; + struct vy_entry end; /** Key definition for comparing range boundaries. * Contains secondary and primary key parts for secondary * keys, to ensure an always distinct result for @@ -167,12 +167,12 @@ vy_range_is_scheduled(struct vy_range *range) int vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b); int -vy_range_tree_key_cmp(struct tuple *stmt, struct vy_range *range); +vy_range_tree_key_cmp(struct vy_entry entry, struct vy_range *range); typedef rb_tree(struct vy_range) vy_range_tree_t; rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t, struct vy_range, tree_node, vy_range_tree_cmp, - struct tuple *, vy_range_tree_key_cmp); + struct vy_entry, vy_range_tree_key_cmp); /** * Find the first range in which a given key should be looked up. @@ -186,7 +186,7 @@ rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t, struct vy_range * vy_range_tree_find_by_key(vy_range_tree_t *tree, enum iterator_type iterator_type, - struct tuple *key); + struct vy_entry key); /** * Allocate and initialize a range (either a new one or for @@ -201,7 +201,7 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, * @retval NULL Out of memory. */ struct vy_range * -vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, +vy_range_new(int64_t id, struct vy_entry begin, struct vy_entry end, struct key_def *cmp_def); /** diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c index 4e4a3243..d6a5c65d 100644 --- a/src/box/vy_read_iterator.c +++ b/src/box/vy_read_iterator.c @@ -141,24 +141,24 @@ vy_read_iterator_unpin_slices(struct vy_read_iterator *itr) */ static bool vy_read_iterator_range_is_done(struct vy_read_iterator *itr, - struct tuple *next_key) + struct vy_entry next) { struct vy_range *range = itr->curr_range; struct key_def *cmp_def = itr->lsm->cmp_def; int dir = iterator_direction(itr->iterator_type); - if (dir > 0 && range->end != NULL && - (next_key == NULL || vy_stmt_compare(next_key, range->end, - cmp_def) >= 0) && + if (dir > 0 && range->end.stmt != NULL && + (next.stmt == NULL || vy_entry_compare(next, range->end, + cmp_def) >= 0) && (itr->iterator_type != ITER_EQ || - vy_stmt_compare(itr->key, range->end, cmp_def) >= 0)) + vy_entry_compare(itr->key, range->end, cmp_def) >= 0)) return true; - if (dir < 0 && range->begin != NULL && - (next_key == NULL || vy_stmt_compare(next_key, range->begin, - cmp_def) < 0) && + if (dir < 0 && range->begin.stmt != NULL && + (next.stmt == NULL || vy_entry_compare(next, range->begin, + cmp_def) < 0) && (itr->iterator_type != ITER_REQ || - vy_stmt_compare(itr->key, range->begin, cmp_def) <= 0)) + vy_entry_compare(itr->key, range->begin, cmp_def) <= 0)) return true; return false; @@ -176,16 +176,16 @@ vy_read_iterator_range_is_done(struct vy_read_iterator *itr, */ static inline int vy_read_iterator_cmp_stmt(struct vy_read_iterator *itr, - struct tuple *a, struct tuple *b) + struct vy_entry a, struct vy_entry b) { - if (a == NULL && b != NULL) + if (a.stmt == NULL && b.stmt != NULL) return 1; - if (a != NULL && b == NULL) + if (a.stmt != NULL && b.stmt == NULL) return -1; - if (a == NULL && b == NULL) + if (a.stmt == NULL && b.stmt == NULL) return 0; return iterator_direction(itr->iterator_type) * - vy_stmt_compare(a, b, itr->lsm->cmp_def); + vy_entry_compare(a, b, itr->lsm->cmp_def); } /** @@ -194,9 +194,8 @@ vy_read_iterator_cmp_stmt(struct vy_read_iterator *itr, */ static bool vy_read_iterator_is_exact_match(struct vy_read_iterator *itr, - struct tuple *stmt) + struct vy_entry entry) { - struct tuple *key = itr->key; enum iterator_type type = itr->iterator_type; struct key_def *cmp_def = itr->lsm->cmp_def; @@ -205,31 +204,31 @@ vy_read_iterator_is_exact_match(struct vy_read_iterator *itr, * we can avoid disk accesses on the first iteration * in case the key is found in memory. */ - return itr->last_stmt == NULL && stmt != NULL && + return itr->last.stmt == NULL && entry.stmt != NULL && (type == ITER_EQ || type == ITER_REQ || type == ITER_GE || type == ITER_LE) && - vy_stmt_is_full_key(key, cmp_def) && - vy_stmt_compare(stmt, key, cmp_def) == 0; + vy_stmt_is_full_key(itr->key.stmt, cmp_def) && + vy_entry_compare(entry, itr->key, cmp_def) == 0; } /** * Check if the statement at which the given read source * is positioned precedes the current candidate for the - * next key ('next_key') and update the latter if so. + * next key ('next') and update the latter if so. * The 'stop' flag is set if the next key is found and * older sources don't need to be evaluated. */ static void vy_read_iterator_evaluate_src(struct vy_read_iterator *itr, struct vy_read_src *src, - struct tuple **next_key, bool *stop) + struct vy_entry *next, bool *stop) { uint32_t src_id = src - itr->src; - struct tuple *stmt = vy_history_last_stmt(&src->history); - int cmp = vy_read_iterator_cmp_stmt(itr, stmt, *next_key); + struct vy_entry entry = vy_history_last_stmt(&src->history); + int cmp = vy_read_iterator_cmp_stmt(itr, entry, *next); if (cmp < 0) { - assert(stmt != NULL); - *next_key = stmt; + assert(entry.stmt != NULL); + *next = entry; itr->front_id++; } if (cmp <= 0) @@ -238,7 +237,7 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr, itr->skipped_src = MAX(itr->skipped_src, src_id + 1); if (cmp < 0 && vy_history_is_terminal(&src->history) && - vy_read_iterator_is_exact_match(itr, stmt)) { + vy_read_iterator_is_exact_match(itr, entry)) { itr->skipped_src = src_id + 1; *stop = true; } @@ -261,7 +260,7 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr, * front_id of the read iterator were used on the previous * iteration and hence need to be advanced. * - * 2. Update the candidate for the next key ('next_key') if the + * 2. Update the candidate for the next key ('next') if the * statement at which the source is positioned precedes it. * The 'stop' flag is set if older sources do not need to be * scanned (e.g. because a chain was found in the cache). @@ -270,7 +269,7 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr, static NODISCARD int vy_read_iterator_scan_txw(struct vy_read_iterator *itr, - struct tuple **next_key, bool *stop) + struct vy_entry *next, bool *stop) { struct vy_read_src *src = &itr->src[itr->txw_src]; struct vy_txw_iterator *src_itr = &src->txw_iterator; @@ -280,11 +279,10 @@ vy_read_iterator_scan_txw(struct vy_read_iterator *itr, assert(itr->txw_src < itr->skipped_src); - int rc = vy_txw_iterator_restore(src_itr, itr->last_stmt, - &src->history); + int rc = vy_txw_iterator_restore(src_itr, itr->last, &src->history); if (rc == 0) { if (!src->is_started) { - rc = vy_txw_iterator_skip(src_itr, itr->last_stmt, + rc = vy_txw_iterator_skip(src_itr, itr->last, &src->history); } else if (src->front_id == itr->prev_front_id) { rc = vy_txw_iterator_next(src_itr, &src->history); @@ -294,23 +292,23 @@ vy_read_iterator_scan_txw(struct vy_read_iterator *itr, if (rc < 0) return -1; - vy_read_iterator_evaluate_src(itr, src, next_key, stop); + vy_read_iterator_evaluate_src(itr, src, next, stop); return 0; } static NODISCARD int vy_read_iterator_scan_cache(struct vy_read_iterator *itr, - struct tuple **next_key, bool *stop) + struct vy_entry *next, bool *stop) { bool is_interval = false; struct vy_read_src *src = &itr->src[itr->cache_src]; struct vy_cache_iterator *src_itr = &src->cache_iterator; - int rc = vy_cache_iterator_restore(src_itr, itr->last_stmt, + int rc = vy_cache_iterator_restore(src_itr, itr->last, &src->history, &is_interval); if (rc == 0) { if (!src->is_started || itr->cache_src >= itr->skipped_src) { - rc = vy_cache_iterator_skip(src_itr, itr->last_stmt, + rc = vy_cache_iterator_skip(src_itr, itr->last, &src->history, &is_interval); } else if (src->front_id == itr->prev_front_id) { rc = vy_cache_iterator_next(src_itr, &src->history, @@ -321,7 +319,7 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, if (rc < 0) return -1; - vy_read_iterator_evaluate_src(itr, src, next_key, stop); + vy_read_iterator_evaluate_src(itr, src, next, stop); if (is_interval) { itr->skipped_src = itr->cache_src + 1; *stop = true; @@ -331,7 +329,7 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, static NODISCARD int vy_read_iterator_scan_mem(struct vy_read_iterator *itr, uint32_t mem_src, - struct tuple **next_key, bool *stop) + struct vy_entry *next, bool *stop) { int rc; struct vy_read_src *src = &itr->src[mem_src]; @@ -339,10 +337,10 @@ vy_read_iterator_scan_mem(struct vy_read_iterator *itr, uint32_t mem_src, assert(mem_src >= itr->mem_src && mem_src < itr->disk_src); - rc = vy_mem_iterator_restore(src_itr, itr->last_stmt, &src->history); + rc = vy_mem_iterator_restore(src_itr, itr->last, &src->history); if (rc == 0) { if (!src->is_started || mem_src >= itr->skipped_src) { - rc = vy_mem_iterator_skip(src_itr, itr->last_stmt, + rc = vy_mem_iterator_skip(src_itr, itr->last, &src->history); } else if (src->front_id == itr->prev_front_id) { rc = vy_mem_iterator_next(src_itr, &src->history); @@ -352,13 +350,13 @@ vy_read_iterator_scan_mem(struct vy_read_iterator *itr, uint32_t mem_src, if (rc < 0) return -1; - vy_read_iterator_evaluate_src(itr, src, next_key, stop); + vy_read_iterator_evaluate_src(itr, src, next, stop); return 0; } static NODISCARD int vy_read_iterator_scan_disk(struct vy_read_iterator *itr, uint32_t disk_src, - struct tuple **next_key, bool *stop) + struct vy_entry *next, bool *stop) { int rc = 0; struct vy_read_src *src = &itr->src[disk_src]; @@ -367,7 +365,7 @@ vy_read_iterator_scan_disk(struct vy_read_iterator *itr, uint32_t disk_src, assert(disk_src >= itr->disk_src && disk_src < itr->src_count); if (!src->is_started || disk_src >= itr->skipped_src) - rc = vy_run_iterator_skip(src_itr, itr->last_stmt, + rc = vy_run_iterator_skip(src_itr, itr->last, &src->history); else if (src->front_id == itr->prev_front_id) rc = vy_run_iterator_next(src_itr, &src->history); @@ -376,32 +374,32 @@ vy_read_iterator_scan_disk(struct vy_read_iterator *itr, uint32_t disk_src, if (rc < 0) return -1; - vy_read_iterator_evaluate_src(itr, src, next_key, stop); + vy_read_iterator_evaluate_src(itr, src, next, stop); return 0; } /** * Restore the position of the active in-memory tree iterator - * after a yield caused by a disk read and update 'next_key' + * after a yield caused by a disk read and update 'next' * if necessary. */ static NODISCARD int vy_read_iterator_restore_mem(struct vy_read_iterator *itr, - struct tuple **next_key) + struct vy_entry *next) { int rc; int cmp; struct vy_read_src *src = &itr->src[itr->mem_src]; rc = vy_mem_iterator_restore(&src->mem_iterator, - itr->last_stmt, &src->history); + itr->last, &src->history); if (rc < 0) return -1; /* memory allocation error */ if (rc == 0) return 0; /* nothing changed */ - struct tuple *stmt = vy_history_last_stmt(&src->history); - cmp = vy_read_iterator_cmp_stmt(itr, stmt, *next_key); + struct vy_entry entry = vy_history_last_stmt(&src->history); + cmp = vy_read_iterator_cmp_stmt(itr, entry, *next); if (cmp > 0) { /* * Memory trees are append-only so if the @@ -416,7 +414,7 @@ vy_read_iterator_restore_mem(struct vy_read_iterator *itr, * The new statement precedes the current * candidate for the next key. */ - *next_key = stmt; + *next = entry; itr->front_id++; } else { /* @@ -445,9 +443,9 @@ vy_read_iterator_next_range(struct vy_read_iterator *itr); static NODISCARD int vy_read_iterator_advance(struct vy_read_iterator *itr) { - if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ || + if (itr->last.stmt != NULL && (itr->iterator_type == ITER_EQ || itr->iterator_type == ITER_REQ) && - vy_stmt_is_full_key(itr->key, itr->lsm->cmp_def)) { + vy_stmt_is_full_key(itr->key.stmt, itr->lsm->cmp_def)) { /* * There may be one statement at max satisfying * EQ with a full key. @@ -459,7 +457,7 @@ vy_read_iterator_advance(struct vy_read_iterator *itr) * Restore the iterator position if the LSM tree has changed * since the last iteration or this is the first iteration. */ - if (itr->last_stmt == NULL || + if (itr->last.stmt == NULL || itr->mem_list_version != itr->lsm->mem_list_version || itr->range_tree_version != itr->lsm->range_tree_version || itr->range_version != itr->curr_range->version) { @@ -474,18 +472,18 @@ restart: * from the one that stores newest data. */ bool stop = false; - struct tuple *next_key = NULL; - if (vy_read_iterator_scan_txw(itr, &next_key, &stop) != 0) + struct vy_entry next = vy_entry_none(); + if (vy_read_iterator_scan_txw(itr, &next, &stop) != 0) return -1; if (stop) goto done; - if (vy_read_iterator_scan_cache(itr, &next_key, &stop) != 0) + if (vy_read_iterator_scan_cache(itr, &next, &stop) != 0) return -1; if (stop) goto done; for (uint32_t i = itr->mem_src; i < itr->disk_src; i++) { - if (vy_read_iterator_scan_mem(itr, i, &next_key, &stop) != 0) + if (vy_read_iterator_scan_mem(itr, i, &next, &stop) != 0) return -1; if (stop) goto done; @@ -494,7 +492,7 @@ rescan_disk: /* The following code may yield as it needs to access disk. */ vy_read_iterator_pin_slices(itr); for (uint32_t i = itr->disk_src; i < itr->src_count; i++) { - if (vy_read_iterator_scan_disk(itr, i, &next_key, &stop) != 0) { + if (vy_read_iterator_scan_disk(itr, i, &next, &stop) != 0) { vy_read_iterator_unpin_slices(itr); return -1; } @@ -531,22 +529,21 @@ rescan_disk: * as it is owned exclusively by the current fiber so the only * source to check is the active in-memory tree. */ - if (vy_read_iterator_restore_mem(itr, &next_key) != 0) + if (vy_read_iterator_restore_mem(itr, &next) != 0) return -1; /* * Scan the next range in case we transgressed the current * range's boundaries. */ - if (vy_read_iterator_range_is_done(itr, next_key)) { + if (vy_read_iterator_range_is_done(itr, next)) { vy_read_iterator_next_range(itr); goto rescan_disk; } done: #ifndef NDEBUG /* Check that the statement meets search criteria. */ - if (next_key != NULL) { - int cmp = vy_stmt_compare(next_key, itr->key, - itr->lsm->cmp_def); + if (next.stmt != NULL) { + int cmp = vy_entry_compare(next, itr->key, itr->lsm->cmp_def); cmp *= iterator_direction(itr->iterator_type); if (itr->iterator_type == ITER_GT || itr->iterator_type == ITER_LT) @@ -558,13 +555,12 @@ done: * Ensure the read iterator does not return duplicates * and respects statement order. */ - if (itr->last_stmt != NULL && next_key != NULL) { - assert(vy_read_iterator_cmp_stmt(itr, next_key, - itr->last_stmt) > 0); + if (itr->last.stmt != NULL && next.stmt != NULL) { + assert(vy_read_iterator_cmp_stmt(itr, next, itr->last) > 0); } #endif - if (itr->need_check_eq && next_key != NULL && - vy_stmt_compare(next_key, itr->key, itr->lsm->cmp_def) != 0) + if (itr->need_check_eq && next.stmt != NULL && + vy_entry_compare(next, itr->key, itr->lsm->cmp_def) != 0) itr->front_id++; return 0; } @@ -680,7 +676,7 @@ vy_read_iterator_cleanup(struct vy_read_iterator *itr) void vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_lsm *lsm, struct vy_tx *tx, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv) + struct vy_entry key, const struct vy_read_view **rv) { memset(itr, 0, sizeof(*itr)); @@ -689,8 +685,10 @@ vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_lsm *lsm, itr->iterator_type = iterator_type; itr->key = key; itr->read_view = rv; + itr->last = vy_entry_none(); + itr->last_cached = vy_entry_none(); - if (vy_stmt_is_empty_key(key)) { + if (vy_stmt_is_empty_key(key.stmt)) { /* * Strictly speaking, a GT/LT iterator should return * nothing if the key is empty, because every key is @@ -733,7 +731,9 @@ vy_read_iterator_restore(struct vy_read_iterator *itr) itr->mem_list_version = itr->lsm->mem_list_version; itr->range_tree_version = itr->lsm->range_tree_version; itr->curr_range = vy_range_tree_find_by_key(&itr->lsm->range_tree, - itr->iterator_type, itr->last_stmt ?: itr->key); + itr->iterator_type, + itr->last.stmt != NULL ? + itr->last : itr->key); itr->range_version = itr->curr_range->version; if (itr->tx != NULL) { @@ -768,19 +768,19 @@ vy_read_iterator_next_range(struct vy_read_iterator *itr) vy_range_tree_prev(&itr->lsm->range_tree, range); assert(range != NULL); - if (itr->last_stmt == NULL) + if (itr->last.stmt == NULL) break; /* * We could skip an entire range due to the cache. * Make sure the next statement falls in the range. */ - if (dir > 0 && (range->end == NULL || - vy_stmt_compare(itr->last_stmt, range->end, - cmp_def) < 0)) + if (dir > 0 && (range->end.stmt == NULL || + vy_entry_compare(itr->last, range->end, + cmp_def) < 0)) break; - if (dir < 0 && (range->begin == NULL || - vy_stmt_compare(itr->last_stmt, range->begin, - cmp_def) > 0)) + if (dir < 0 && (range->begin.stmt == NULL || + vy_entry_compare(itr->last, range->begin, + cmp_def) > 0)) break; } itr->curr_range = range; @@ -801,7 +801,7 @@ vy_read_iterator_next_range(struct vy_read_iterator *itr) */ static NODISCARD int vy_read_iterator_apply_history(struct vy_read_iterator *itr, - struct tuple **ret) + struct vy_entry *ret) { struct vy_lsm *lsm = itr->lsm; struct vy_history history; @@ -829,54 +829,54 @@ vy_read_iterator_apply_history(struct vy_read_iterator *itr, * Track a read in the conflict manager. */ static int -vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt) +vy_read_iterator_track_read(struct vy_read_iterator *itr, struct vy_entry entry) { if (itr->tx == NULL) return 0; - if (stmt == NULL) { - stmt = (itr->iterator_type == ITER_EQ || - itr->iterator_type == ITER_REQ ? - itr->key : itr->lsm->env->empty_key); + if (entry.stmt == NULL) { + entry = (itr->iterator_type == ITER_EQ || + itr->iterator_type == ITER_REQ ? + itr->key : itr->lsm->env->empty_key); } int rc; if (iterator_direction(itr->iterator_type) >= 0) { rc = vy_tx_track(itr->tx, itr->lsm, itr->key, itr->iterator_type != ITER_GT, - stmt, true); + entry, true); } else { - rc = vy_tx_track(itr->tx, itr->lsm, stmt, true, + rc = vy_tx_track(itr->tx, itr->lsm, entry, true, itr->key, itr->iterator_type != ITER_LT); } return rc; } NODISCARD int -vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) +vy_read_iterator_next(struct vy_read_iterator *itr, struct vy_entry *result) { assert(itr->tx == NULL || itr->tx->state == VINYL_TX_READY); ev_tstamp start_time = ev_monotonic_now(loop()); struct vy_lsm *lsm = itr->lsm; - struct tuple *stmt; + struct vy_entry entry; - if (itr->last_stmt == NULL) + if (itr->last.stmt == NULL) lsm->stat.lookup++; /* first iteration */ next_key: if (vy_read_iterator_advance(itr) != 0) return -1; - if (vy_read_iterator_apply_history(itr, &stmt) != 0) + if (vy_read_iterator_apply_history(itr, &entry) != 0) return -1; - if (vy_read_iterator_track_read(itr, stmt) != 0) + if (vy_read_iterator_track_read(itr, entry) != 0) return -1; - if (itr->last_stmt != NULL) - tuple_unref(itr->last_stmt); - itr->last_stmt = stmt; + if (itr->last.stmt != NULL) + tuple_unref(itr->last.stmt); + itr->last = entry; - if (stmt != NULL && vy_stmt_type(stmt) == IPROTO_DELETE) { + if (entry.stmt != NULL && vy_stmt_type(entry.stmt) == IPROTO_DELETE) { /* * We don't return DELETEs so skip to the next key. * If the DELETE was read from TX write set, there @@ -884,19 +884,19 @@ next_key: * the deleted key and hence we must not consider * previous + current tuple as an unbroken chain. */ - if (vy_stmt_lsn(stmt) == INT64_MAX) { - if (itr->last_cached_stmt != NULL) - tuple_unref(itr->last_cached_stmt); - itr->last_cached_stmt = NULL; + if (vy_stmt_lsn(entry.stmt) == INT64_MAX) { + if (itr->last_cached.stmt != NULL) + tuple_unref(itr->last_cached.stmt); + itr->last_cached = vy_entry_none(); } goto next_key; } - assert(stmt == NULL || - vy_stmt_type(stmt) == IPROTO_INSERT || - vy_stmt_type(stmt) == IPROTO_REPLACE); + assert(entry.stmt == NULL || + vy_stmt_type(entry.stmt) == IPROTO_INSERT || + vy_stmt_type(entry.stmt) == IPROTO_REPLACE); - if (stmt != NULL) - vy_stmt_counter_acct_tuple(&lsm->stat.get, stmt); + if (entry.stmt != NULL) + vy_stmt_counter_acct_tuple(&lsm->stat.get, entry.stmt); ev_tstamp latency = ev_monotonic_now(loop()) - start_time; latency_collect(&lsm->stat.latency, latency); @@ -904,31 +904,31 @@ next_key: if (latency > lsm->env->too_long_threshold) { say_warn_ratelimited("%s: select(%s, %s) => %s " "took too long: %.3f sec", - vy_lsm_name(lsm), tuple_str(itr->key), + vy_lsm_name(lsm), tuple_str(itr->key.stmt), iterator_type_strs[itr->iterator_type], - vy_stmt_str(stmt), latency); + vy_stmt_str(entry.stmt), latency); } - *result = stmt; + *result = entry; return 0; } void -vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct tuple *stmt) +vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct vy_entry entry) { if ((**itr->read_view).vlsn != INT64_MAX) { - if (itr->last_cached_stmt != NULL) - tuple_unref(itr->last_cached_stmt); - itr->last_cached_stmt = NULL; + if (itr->last_cached.stmt != NULL) + tuple_unref(itr->last_cached.stmt); + itr->last_cached = vy_entry_none(); return; } - vy_cache_add(&itr->lsm->cache, stmt, itr->last_cached_stmt, + vy_cache_add(&itr->lsm->cache, entry, itr->last_cached, itr->key, itr->iterator_type); - if (stmt != NULL) - tuple_ref(stmt); - if (itr->last_cached_stmt != NULL) - tuple_unref(itr->last_cached_stmt); - itr->last_cached_stmt = stmt; + if (entry.stmt != NULL) + tuple_ref(entry.stmt); + if (itr->last_cached.stmt != NULL) + tuple_unref(itr->last_cached.stmt); + itr->last_cached = entry; } /** @@ -937,10 +937,10 @@ vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct tuple *stmt) void vy_read_iterator_close(struct vy_read_iterator *itr) { - if (itr->last_stmt != NULL) - tuple_unref(itr->last_stmt); - if (itr->last_cached_stmt != NULL) - tuple_unref(itr->last_cached_stmt); + if (itr->last.stmt != NULL) + tuple_unref(itr->last.stmt); + if (itr->last_cached.stmt != NULL) + tuple_unref(itr->last_cached.stmt); vy_read_iterator_cleanup(itr); free(itr->src); TRASH(itr); diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h index baab8859..06e7f41c 100644 --- a/src/box/vy_read_iterator.h +++ b/src/box/vy_read_iterator.h @@ -36,6 +36,7 @@ #include "iterator_type.h" #include "trivia/util.h" +#include "vy_entry.h" #if defined(__cplusplus) extern "C" { @@ -54,7 +55,7 @@ struct vy_read_iterator { /** Iterator type. */ enum iterator_type iterator_type; /** Search key. */ - struct tuple *key; + struct vy_entry key; /** Read view the iterator lives in. */ const struct vy_read_view **read_view; /** @@ -63,12 +64,12 @@ struct vy_read_iterator { */ bool need_check_eq; /** Last statement returned by vy_read_iterator_next(). */ - struct tuple *last_stmt; + struct vy_entry last; /** * Last statement added to the tuple cache by * vy_read_iterator_cache_add(). */ - struct tuple *last_cached_stmt; + struct vy_entry last_cached; /** * Copy of lsm->range_tree_version. * Used for detecting range tree changes. @@ -132,7 +133,7 @@ struct vy_read_iterator { void vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_lsm *lsm, struct vy_tx *tx, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv); + struct vy_entry key, const struct vy_read_view **rv); /** * Get the next statement with another key, or start the iterator, @@ -144,12 +145,12 @@ vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_lsm *lsm, * @retval -1 Read error. */ NODISCARD int -vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result); +vy_read_iterator_next(struct vy_read_iterator *itr, struct vy_entry *result); /** * Add the last tuple returned by the read iterator to the cache. - * @param itr Read iterator - * @param stmt Last tuple returned by the iterator. + * @param itr Read iterator + * @param entry Last tuple returned by the iterator. * * We use a separate function for populating the cache rather than * doing that right in vy_read_iterator_next() so that we can store @@ -163,7 +164,7 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result); * the result to the cache. */ void -vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct tuple *stmt); +vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct vy_entry entry); /** * Close the iterator and free resources. diff --git a/src/box/vy_read_set.c b/src/box/vy_read_set.c index b95d2e4e..431b24fc 100644 --- a/src/box/vy_read_set.c +++ b/src/box/vy_read_set.c @@ -46,15 +46,15 @@ vy_read_interval_cmpl(const struct vy_read_interval *a, { assert(a->lsm == b->lsm); struct key_def *cmp_def = a->lsm->cmp_def; - int cmp = vy_stmt_compare(a->left, b->left, cmp_def); + int cmp = vy_entry_compare(a->left, b->left, cmp_def); if (cmp != 0) return cmp; if (a->left_belongs && !b->left_belongs) return -1; if (!a->left_belongs && b->left_belongs) return 1; - uint32_t a_parts = vy_stmt_key_part_count(a->left, cmp_def); - uint32_t b_parts = vy_stmt_key_part_count(b->left, cmp_def); + uint32_t a_parts = vy_stmt_key_part_count(a->left.stmt, cmp_def); + uint32_t b_parts = vy_stmt_key_part_count(b->left.stmt, cmp_def); if (a->left_belongs) return a_parts < b_parts ? -1 : a_parts > b_parts; else @@ -67,15 +67,15 @@ vy_read_interval_cmpr(const struct vy_read_interval *a, { assert(a->lsm == b->lsm); struct key_def *cmp_def = a->lsm->cmp_def; - int cmp = vy_stmt_compare(a->right, b->right, cmp_def); + int cmp = vy_entry_compare(a->right, b->right, cmp_def); if (cmp != 0) return cmp; if (a->right_belongs && !b->right_belongs) return 1; if (!a->right_belongs && b->right_belongs) return -1; - uint32_t a_parts = vy_stmt_key_part_count(a->right, cmp_def); - uint32_t b_parts = vy_stmt_key_part_count(b->right, cmp_def); + uint32_t a_parts = vy_stmt_key_part_count(a->right.stmt, cmp_def); + uint32_t b_parts = vy_stmt_key_part_count(b->right.stmt, cmp_def); if (a->right_belongs) return a_parts > b_parts ? -1 : a_parts < b_parts; else @@ -89,7 +89,7 @@ vy_read_interval_should_merge(const struct vy_read_interval *l, assert(l->lsm == r->lsm); assert(vy_read_interval_cmpl(l, r) <= 0); struct key_def *cmp_def = l->lsm->cmp_def; - int cmp = vy_stmt_compare(l->right, r->left, cmp_def); + int cmp = vy_entry_compare(l->right, r->left, cmp_def); if (cmp > 0) return true; if (cmp < 0) @@ -98,8 +98,8 @@ vy_read_interval_should_merge(const struct vy_read_interval *l, return true; if (!l->right_belongs && !r->left_belongs) return false; - uint32_t l_parts = vy_stmt_key_part_count(l->right, cmp_def); - uint32_t r_parts = vy_stmt_key_part_count(r->left, cmp_def); + uint32_t l_parts = vy_stmt_key_part_count(l->right.stmt, cmp_def); + uint32_t r_parts = vy_stmt_key_part_count(r->left.stmt, cmp_def); if (l->right_belongs) return l_parts <= r_parts; else @@ -118,7 +118,8 @@ vy_tx_conflict_iterator_next(struct vy_tx_conflict_iterator *it) assert(left == NULL || left->lsm == curr->lsm); assert(right == NULL || right->lsm == curr->lsm); - int cmp_right = vy_stmt_compare(it->stmt, last->right, cmp_def); + int cmp_right = vy_entry_compare(it->key, last->right, + cmp_def); if (cmp_right == 0 && !last->right_belongs) cmp_right = 1; @@ -133,11 +134,12 @@ vy_tx_conflict_iterator_next(struct vy_tx_conflict_iterator *it) } int cmp_left; - if (curr->left == last->right) { + if (vy_entry_is_equal(curr->left, last->right)) { /* Optimize comparison out. */ cmp_left = cmp_right; } else { - cmp_left = vy_stmt_compare(it->stmt, curr->left, cmp_def); + cmp_left = vy_entry_compare(it->key, curr->left, + cmp_def); if (cmp_left == 0 && !curr->left_belongs) cmp_left = -1; } @@ -160,12 +162,12 @@ vy_tx_conflict_iterator_next(struct vy_tx_conflict_iterator *it) /* * Check if the point is within the current interval. */ - if (curr->left == curr->right) { + if (vy_entry_is_equal(curr->left, curr->right)) { /* Optimize comparison out. */ cmp_right = cmp_left; } else if (curr != last) { - cmp_right = vy_stmt_compare(it->stmt, curr->right, - cmp_def); + cmp_right = vy_entry_compare(it->key, curr->right, + cmp_def); if (cmp_right == 0 && !curr->right_belongs) cmp_right = 1; } diff --git a/src/box/vy_read_set.h b/src/box/vy_read_set.h index b297a477..f163d647 100644 --- a/src/box/vy_read_set.h +++ b/src/box/vy_read_set.h @@ -41,12 +41,12 @@ #include "salad/stailq.h" #include "trivia/util.h" +#include "vy_entry.h" #if defined(__cplusplus) extern "C" { #endif /* defined(__cplusplus) */ -struct tuple; struct vy_tx; struct vy_lsm; @@ -59,9 +59,9 @@ struct vy_read_interval { /** LSM tree that the transaction read from. */ struct vy_lsm *lsm; /** Left boundary of the interval. */ - struct tuple *left; + struct vy_entry left; /** Right boundary of the interval. */ - struct tuple *right; + struct vy_entry right; /** Set if the left boundary belongs to the interval. */ bool left_belongs; /** Set if the right boundary belongs to the interval. */ @@ -187,7 +187,7 @@ rb_gen_aug(MAYBE_UNUSED static inline, vy_lsm_read_set_, vy_lsm_read_set_t, */ struct vy_tx_conflict_iterator { /** The statement. */ - struct tuple *stmt; + struct vy_entry key; /** * Iterator over the interval tree checked * for intersections with the statement. @@ -202,12 +202,11 @@ struct vy_tx_conflict_iterator { static inline void vy_tx_conflict_iterator_init(struct vy_tx_conflict_iterator *it, - vy_lsm_read_set_t *read_set, - struct tuple *stmt) + vy_lsm_read_set_t *read_set, struct vy_entry key) { vy_lsm_read_set_walk_init(&it->tree_walk, read_set); it->tree_dir = 0; - it->stmt = stmt; + it->key = key; } /** diff --git a/src/box/vy_run.c b/src/box/vy_run.c index c0e63700..03db2f50 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -201,12 +201,16 @@ vy_run_env_enable_coio(struct vy_run_env *env) */ static int vy_page_info_create(struct vy_page_info *page_info, uint64_t offset, - const char *min_key) + const char *min_key, struct key_def *cmp_def) { memset(page_info, 0, sizeof(*page_info)); page_info->offset = offset; page_info->unpacked_size = 0; page_info->min_key = vy_key_dup(min_key); + if (page_info->min_key == NULL) + return -1; + uint32_t part_count = mp_decode_array(&min_key); + page_info->min_key_hint = key_hint(min_key, part_count, cmp_def); return page_info->min_key == NULL ? -1 : 0; } @@ -297,7 +301,7 @@ vy_run_bloom_size(struct vy_run *run) * there no pages fulfilling the conditions. */ static uint32_t -vy_page_index_find_page(struct vy_run *run, struct tuple *key, +vy_page_index_find_page(struct vy_run *run, struct vy_entry key, struct key_def *cmp_def, enum iterator_type itype, bool *equal_key) { @@ -338,8 +342,9 @@ vy_page_index_find_page(struct vy_run *run, struct tuple *key, do { int32_t mid = range[0] + (range[1] - range[0]) / 2; struct vy_page_info *info = vy_run_page_info(run, mid); - int cmp = vy_stmt_compare_with_raw_key(key, info->min_key, - cmp_def); + int cmp = vy_entry_compare_with_raw_key(key, info->min_key, + info->min_key_hint, + cmp_def); if (is_lower_bound) range[cmp <= 0] = mid; else @@ -361,8 +366,8 @@ vy_page_index_find_page(struct vy_run *run, struct tuple *key, } struct vy_slice * -vy_slice_new(int64_t id, struct vy_run *run, struct tuple *begin, - struct tuple *end, struct key_def *cmp_def) +vy_slice_new(int64_t id, struct vy_run *run, struct vy_entry begin, + struct vy_entry end, struct key_def *cmp_def) { struct vy_slice *slice = malloc(sizeof(*slice)); if (slice == NULL) { @@ -376,11 +381,11 @@ vy_slice_new(int64_t id, struct vy_run *run, struct tuple *begin, slice->seed = rand(); vy_run_ref(run); run->slice_count++; - if (begin != NULL) - tuple_ref(begin); + if (begin.stmt != NULL) + tuple_ref(begin.stmt); slice->begin = begin; - if (end != NULL) - tuple_ref(end); + if (end.stmt != NULL) + tuple_ref(end.stmt); slice->end = end; rlist_create(&slice->in_range); fiber_cond_create(&slice->pin_cond); @@ -390,7 +395,7 @@ vy_slice_new(int64_t id, struct vy_run *run, struct tuple *begin, } /** Lookup the first and the last pages spanned by the slice. */ bool unused; - if (slice->begin == NULL) { + if (slice->begin.stmt == NULL) { slice->first_page_no = 0; } else { slice->first_page_no = @@ -398,7 +403,7 @@ vy_slice_new(int64_t id, struct vy_run *run, struct tuple *begin, ITER_GE, &unused); assert(slice->first_page_no < run->info.page_count); } - if (slice->end == NULL) { + if (slice->end.stmt == NULL) { slice->last_page_no = run->info.page_count - 1; } else { slice->last_page_no = @@ -432,38 +437,40 @@ vy_slice_delete(struct vy_slice *slice) assert(slice->run->slice_count > 0); slice->run->slice_count--; vy_run_unref(slice->run); - if (slice->begin != NULL) - tuple_unref(slice->begin); - if (slice->end != NULL) - tuple_unref(slice->end); + if (slice->begin.stmt != NULL) + tuple_unref(slice->begin.stmt); + if (slice->end.stmt != NULL) + tuple_unref(slice->end.stmt); fiber_cond_destroy(&slice->pin_cond); TRASH(slice); free(slice); } int -vy_slice_cut(struct vy_slice *slice, int64_t id, struct tuple *begin, - struct tuple *end, struct key_def *cmp_def, +vy_slice_cut(struct vy_slice *slice, int64_t id, struct vy_entry begin, + struct vy_entry end, struct key_def *cmp_def, struct vy_slice **result) { *result = NULL; - if (begin != NULL && slice->end != NULL && - vy_stmt_compare(begin, slice->end, cmp_def) >= 0) + if (begin.stmt != NULL && slice->end.stmt != NULL && + vy_entry_compare(begin, slice->end, cmp_def) >= 0) return 0; /* no intersection: begin >= slice->end */ - if (end != NULL && slice->begin != NULL && - vy_stmt_compare(end, slice->begin, cmp_def) <= 0) + if (end.stmt != NULL && slice->begin.stmt != NULL && + vy_entry_compare(end, slice->begin, cmp_def) <= 0) return 0; /* no intersection: end <= slice->end */ /* begin = MAX(begin, slice->begin) */ - if (slice->begin != NULL && - (begin == NULL || vy_stmt_compare(begin, slice->begin, cmp_def) < 0)) + if (slice->begin.stmt != NULL && + (begin.stmt == NULL || vy_entry_compare(begin, slice->begin, + cmp_def) < 0)) begin = slice->begin; /* end = MIN(end, slice->end) */ - if (slice->end != NULL && - (end == NULL || vy_stmt_compare(end, slice->end, cmp_def) > 0)) + if (slice->end.stmt != NULL && + (end.stmt == NULL || vy_entry_compare(end, slice->end, + cmp_def) > 0)) end = slice->end; *result = vy_slice_new(id, slice->run, begin, end, cmp_def); @@ -478,6 +485,7 @@ vy_slice_cut(struct vy_slice *slice, int64_t id, struct tuple *begin, * * @param[out] page Page information. * @param xrow Xrow to decode. + * @param cmp_def Definition of keys stored in the page. * @param filename Filename for error reporting. * * @retval 0 Success. @@ -485,7 +493,7 @@ vy_slice_cut(struct vy_slice *slice, int64_t id, struct tuple *begin, */ static int vy_page_info_decode(struct vy_page_info *page, const struct xrow_header *xrow, - const char *filename) + struct key_def *cmp_def, const char *filename) { assert(xrow->type == VY_INDEX_PAGE_INFO); const char *pos = xrow->body->iov_base; @@ -494,6 +502,7 @@ vy_page_info_decode(struct vy_page_info *page, const struct xrow_header *xrow, uint32_t map_size = mp_decode_map(&pos); uint32_t map_item; const char *key_beg; + uint32_t part_count; for (map_item = 0; map_item < map_size; ++map_item) { uint32_t key = mp_decode_uint(&pos); key_map &= ~(1ULL << key); @@ -513,6 +522,9 @@ vy_page_info_decode(struct vy_page_info *page, const struct xrow_header *xrow, page->min_key = vy_key_dup(key_beg); if (page->min_key == NULL) return -1; + part_count = mp_decode_array(&key_beg); + page->min_key_hint = key_hint(key_beg, part_count, + cmp_def); break; case VY_PAGE_INFO_UNPACKED_SIZE: page->unpacked_size = mp_decode_uint(&pos); @@ -707,19 +719,25 @@ vy_page_xrow(struct vy_page *page, uint32_t stmt_no, * Read raw stmt data from the page * @param page Page. * @param stmt_no Statement position in the page. + * @param cmp_def Definition of keys stored in the page. * @param format Format for REPLACE/DELETE tuples. * * @retval not NULL Statement read from page. * @retval NULL Memory error. */ -static struct tuple * +static struct vy_entry vy_page_stmt(struct vy_page *page, uint32_t stmt_no, - struct tuple_format *format) + struct key_def *cmp_def, struct tuple_format *format) { struct xrow_header xrow; if (vy_page_xrow(page, stmt_no, &xrow) != 0) - return NULL; - return vy_stmt_decode(&xrow, format); + return vy_entry_none(); + struct vy_entry entry; + entry.stmt = vy_stmt_decode(&xrow, format); + if (entry.stmt == NULL) + return vy_entry_none(); + entry.hint = vy_stmt_hint(entry.stmt, cmp_def); + return entry; } /** @@ -728,9 +746,9 @@ vy_page_stmt(struct vy_page *page, uint32_t stmt_no, static void vy_run_iterator_stop(struct vy_run_iterator *itr) { - if (itr->curr_stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; + if (itr->curr.stmt != NULL) { + tuple_unref(itr->curr.stmt); + itr->curr = vy_entry_none(); } if (itr->curr_page != NULL) { vy_page_delete(itr->curr_page); @@ -1015,14 +1033,14 @@ vy_run_iterator_load_page(struct vy_run_iterator *itr, uint32_t page_no, static NODISCARD int vy_run_iterator_read(struct vy_run_iterator *itr, struct vy_run_iterator_pos pos, - struct tuple **stmt) + struct vy_entry *ret) { struct vy_page *page; int rc = vy_run_iterator_load_page(itr, pos.page_no, &page); if (rc != 0) return rc; - *stmt = vy_page_stmt(page, pos.pos_in_page, itr->format); - if (*stmt == NULL) + *ret = vy_page_stmt(page, pos.pos_in_page, itr->cmp_def, itr->format); + if (ret->stmt == NULL) return -1; return 0; } @@ -1037,7 +1055,7 @@ vy_run_iterator_read(struct vy_run_iterator *itr, static uint32_t vy_run_iterator_search_in_page(struct vy_run_iterator *itr, enum iterator_type iterator_type, - struct tuple *key, + struct vy_entry key, struct vy_page *page, bool *equal_key) { uint32_t beg = 0; @@ -1047,17 +1065,18 @@ vy_run_iterator_search_in_page(struct vy_run_iterator *itr, iterator_type == ITER_LE ? -1 : 0); while (beg != end) { uint32_t mid = beg + (end - beg) / 2; - struct tuple *fnd_key = vy_page_stmt(page, mid, itr->format); - if (fnd_key == NULL) + struct vy_entry fnd_key = vy_page_stmt(page, mid, itr->cmp_def, + itr->format); + if (fnd_key.stmt == NULL) return end; - int cmp = vy_stmt_compare(fnd_key, key, itr->cmp_def); + int cmp = vy_entry_compare(fnd_key, key, itr->cmp_def); cmp = cmp ? cmp : zero_cmp; *equal_key = *equal_key || cmp == 0; if (cmp < 0) beg = mid + 1; else end = mid; - tuple_unref(fnd_key); + tuple_unref(fnd_key.stmt); } return end; } @@ -1075,7 +1094,7 @@ vy_run_iterator_search_in_page(struct vy_run_iterator *itr, */ static NODISCARD int vy_run_iterator_search(struct vy_run_iterator *itr, - enum iterator_type iterator_type, struct tuple *key, + enum iterator_type iterator_type, struct vy_entry key, struct vy_run_iterator_pos *pos, bool *equal_key) { pos->page_no = vy_page_index_find_page(itr->slice->run, key, @@ -1155,31 +1174,30 @@ vy_run_iterator_next_pos(struct vy_run_iterator *itr, * Affects: curr_loaded_page, curr_pos */ static NODISCARD int -vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct tuple **ret) +vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct vy_entry *ret) { struct vy_slice *slice = itr->slice; struct key_def *cmp_def = itr->cmp_def; - *ret = NULL; + *ret = vy_entry_none(); assert(itr->search_started); - assert(itr->curr_stmt != NULL); + assert(itr->curr.stmt != NULL); assert(itr->curr_pos.page_no < slice->run->info.page_count); - while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn || - vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { + while (vy_stmt_lsn(itr->curr.stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(itr->curr.stmt) & VY_STMT_SKIP_READ) { if (vy_run_iterator_next_pos(itr, itr->iterator_type, &itr->curr_pos) != 0) { vy_run_iterator_stop(itr); return 0; } - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; - if (vy_run_iterator_read(itr, itr->curr_pos, - &itr->curr_stmt) != 0) + tuple_unref(itr->curr.stmt); + itr->curr = vy_entry_none(); + if (vy_run_iterator_read(itr, itr->curr_pos, &itr->curr) != 0) return -1; if (itr->iterator_type == ITER_EQ && - vy_stmt_compare(itr->curr_stmt, itr->key, cmp_def) != 0) { + vy_entry_compare(itr->curr, itr->key, cmp_def) != 0) { vy_run_iterator_stop(itr); return 0; } @@ -1188,26 +1206,24 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct tuple **ret) struct vy_run_iterator_pos test_pos; while (vy_run_iterator_next_pos(itr, itr->iterator_type, &test_pos) == 0) { - struct tuple *test_stmt; - if (vy_run_iterator_read(itr, test_pos, - &test_stmt) != 0) + struct vy_entry test; + if (vy_run_iterator_read(itr, test_pos, &test) != 0) return -1; - if (vy_stmt_lsn(test_stmt) > (**itr->read_view).vlsn || - vy_stmt_flags(test_stmt) & VY_STMT_SKIP_READ || - vy_stmt_compare(itr->curr_stmt, test_stmt, - cmp_def) != 0) { - tuple_unref(test_stmt); + if (vy_stmt_lsn(test.stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(test.stmt) & VY_STMT_SKIP_READ || + vy_entry_compare(itr->curr, test, cmp_def) != 0) { + tuple_unref(test.stmt); break; } - tuple_unref(itr->curr_stmt); - itr->curr_stmt = test_stmt; + tuple_unref(itr->curr.stmt); + itr->curr = test; itr->curr_pos = test_pos; } } /* Check if the result is within the slice boundaries. */ if (itr->iterator_type == ITER_LE || itr->iterator_type == ITER_LT) { - if (slice->begin != NULL && - vy_stmt_compare(itr->curr_stmt, slice->begin, cmp_def) < 0) { + if (slice->begin.stmt != NULL && + vy_entry_compare(itr->curr, slice->begin, cmp_def) < 0) { vy_run_iterator_stop(itr); return 0; } @@ -1215,14 +1231,14 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct tuple **ret) assert(itr->iterator_type == ITER_GE || itr->iterator_type == ITER_GT || itr->iterator_type == ITER_EQ); - if (slice->end != NULL && - vy_stmt_compare(itr->curr_stmt, slice->end, cmp_def) >= 0) { + if (slice->end.stmt != NULL && + vy_entry_compare(itr->curr, slice->end, cmp_def) >= 0) { vy_run_iterator_stop(itr); return 0; } } - vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr_stmt); - *ret = itr->curr_stmt; + vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr.stmt); + *ret = itr->curr; return 0; } @@ -1233,7 +1249,7 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct tuple **ret) * rightmost for LE) of a series of statements matching the given * search criteria. * - * Updates itr->curr_pos. Doesn't affect itr->curr_stmt. + * Updates itr->curr_pos. Doesn't affect itr->curr. * * @retval 0 success * @retval 1 EOF @@ -1241,12 +1257,12 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, struct tuple **ret) */ static NODISCARD int vy_run_iterator_do_seek(struct vy_run_iterator *itr, - enum iterator_type iterator_type, struct tuple *key) + enum iterator_type iterator_type, struct vy_entry key) { struct vy_run *run = itr->slice->run; struct vy_run_iterator_pos end_pos = {run->info.page_count, 0}; bool equal_found = false; - if (!vy_stmt_is_empty_key(key)) { + if (!vy_stmt_is_empty_key(key.stmt)) { int rc = vy_run_iterator_search(itr, iterator_type, key, &itr->curr_pos, &equal_found); if (rc != 0) @@ -1293,22 +1309,22 @@ vy_run_iterator_do_seek(struct vy_run_iterator *itr, * (pass NULL to start iteration). */ static NODISCARD int -vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, - struct tuple **ret) +vy_run_iterator_seek(struct vy_run_iterator *itr, struct vy_entry last, + struct vy_entry *ret) { struct key_def *cmp_def = itr->cmp_def; struct vy_slice *slice = itr->slice; struct tuple_bloom *bloom = slice->run->info.bloom; - struct tuple *key = itr->key; + struct vy_entry key = itr->key; enum iterator_type iterator_type = itr->iterator_type; - *ret = NULL; + *ret = vy_entry_none(); assert(itr->search_started); /* Check the bloom filter on the first iteration. */ bool check_bloom = (itr->iterator_type == ITER_EQ && - itr->curr_stmt == NULL && bloom != NULL); - if (check_bloom && !vy_stmt_bloom_maybe_has(bloom, itr->key, + itr->curr.stmt == NULL && bloom != NULL); + if (check_bloom && !vy_stmt_bloom_maybe_has(bloom, itr->key.stmt, itr->key_def)) { vy_run_iterator_stop(itr); itr->stat->bloom_hit++; @@ -1326,16 +1342,16 @@ vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, * Modify iterator type and key so as to position it to * the first statement following the given key. */ - if (last_key != NULL) { + if (last.stmt != NULL) { if (iterator_type == ITER_EQ) check_eq = true; iterator_type = iterator_direction(iterator_type) > 0 ? ITER_GT : ITER_LT; - key = last_key; + key = last; } /* Take slice boundaries into account. */ - if (slice->begin != NULL && + if (slice->begin.stmt != NULL && (iterator_type == ITER_GT || iterator_type == ITER_GE || iterator_type == ITER_EQ)) { /* @@ -1351,7 +1367,7 @@ vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, * | ge | begin | ge | * | eq | stop | */ - int cmp = vy_stmt_compare(key, slice->begin, cmp_def); + int cmp = vy_entry_compare(key, slice->begin, cmp_def); if (cmp < 0 && iterator_type == ITER_EQ) { vy_run_iterator_stop(itr); return 0; @@ -1363,7 +1379,7 @@ vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, key = slice->begin; } } - if (slice->end != NULL && + if (slice->end.stmt != NULL && (iterator_type == ITER_LT || iterator_type == ITER_LE)) { /* * original | start @@ -1376,7 +1392,7 @@ vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, * > end | lt | end | lt | * | le | end | lt | */ - int cmp = vy_stmt_compare(key, slice->end, cmp_def); + int cmp = vy_entry_compare(key, slice->end, cmp_def); if (cmp > 0 || (cmp == 0 && iterator_type != ITER_LT)) { iterator_type = ITER_LT; key = slice->end; @@ -1392,16 +1408,16 @@ vy_run_iterator_seek(struct vy_run_iterator *itr, struct tuple *last_key, goto not_found; /* Load the found statement. */ - if (itr->curr_stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = NULL; + if (itr->curr.stmt != NULL) { + tuple_unref(itr->curr.stmt); + itr->curr = vy_entry_none(); } - if (vy_run_iterator_read(itr, itr->curr_pos, &itr->curr_stmt) != 0) + if (vy_run_iterator_read(itr, itr->curr_pos, &itr->curr) != 0) return -1; /* Check EQ constraint if necessary. */ - if (check_eq && vy_stmt_compare(itr->curr_stmt, itr->key, - itr->cmp_def) != 0) + if (check_eq && vy_entry_compare(itr->curr, itr->key, + itr->cmp_def) != 0) goto not_found; /* Skip statements invisible from the iterator read view. */ @@ -1422,7 +1438,7 @@ void vy_run_iterator_open(struct vy_run_iterator *itr, struct vy_run_iterator_stat *stat, struct vy_slice *slice, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv, + struct vy_entry key, const struct vy_read_view **rv, struct key_def *cmp_def, struct key_def *key_def, struct tuple_format *format) { @@ -1436,7 +1452,7 @@ vy_run_iterator_open(struct vy_run_iterator *itr, itr->key = key; itr->read_view = rv; - itr->curr_stmt = NULL; + itr->curr = vy_entry_none(); itr->curr_pos.page_no = slice->run->info.page_count; itr->curr_page = NULL; itr->prev_page = NULL; @@ -1459,38 +1475,38 @@ vy_run_iterator_open(struct vy_run_iterator *itr, * Returns 0 on success, -1 on memory allocation or IO error. */ static NODISCARD int -vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret) +vy_run_iterator_next_key(struct vy_run_iterator *itr, struct vy_entry *ret) { - *ret = NULL; + *ret = vy_entry_none(); if (!itr->search_started) { itr->search_started = true; - return vy_run_iterator_seek(itr, NULL, ret); + return vy_run_iterator_seek(itr, vy_entry_none(), ret); } - if (itr->curr_stmt == NULL) + if (itr->curr.stmt == NULL) return 0; assert(itr->curr_pos.page_no < itr->slice->run->info.page_count); - struct tuple *next_key = NULL; + struct vy_entry next = vy_entry_none(); do { - if (next_key != NULL) - tuple_unref(next_key); + if (next.stmt != NULL) + tuple_unref(next.stmt); if (vy_run_iterator_next_pos(itr, itr->iterator_type, &itr->curr_pos) != 0) { vy_run_iterator_stop(itr); return 0; } - if (vy_run_iterator_read(itr, itr->curr_pos, &next_key) != 0) + if (vy_run_iterator_read(itr, itr->curr_pos, &next) != 0) return -1; - } while (vy_stmt_compare(itr->curr_stmt, next_key, itr->cmp_def) == 0); + } while (vy_entry_compare(itr->curr, next, itr->cmp_def) == 0); - tuple_unref(itr->curr_stmt); - itr->curr_stmt = next_key; + tuple_unref(itr->curr.stmt); + itr->curr = next; if (itr->iterator_type == ITER_EQ && - vy_stmt_compare(next_key, itr->key, itr->cmp_def) != 0) { + vy_entry_compare(next, itr->key, itr->cmp_def) != 0) { vy_run_iterator_stop(itr); return 0; } @@ -1498,17 +1514,17 @@ vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret) } /** - * Advance a run iterator to the newest statement for the first key - * following @last_stmt. The statement is returned in @ret (NULL if EOF). + * Advance a run iterator to the next (older) statement for the + * current key. The statement is returned in @ret (NULL if EOF). * Returns 0 on success, -1 on memory allocation or IO error. */ static NODISCARD int -vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret) +vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct vy_entry *ret) { - *ret = NULL; + *ret = vy_entry_none(); assert(itr->search_started); - assert(itr->curr_stmt != NULL); + assert(itr->curr.stmt != NULL); assert(itr->curr_pos.page_no < itr->slice->run->info.page_count); struct vy_run_iterator_pos next_pos; @@ -1518,23 +1534,23 @@ next: return 0; } - struct tuple *next_key; - if (vy_run_iterator_read(itr, next_pos, &next_key) != 0) + struct vy_entry next; + if (vy_run_iterator_read(itr, next_pos, &next) != 0) return -1; - if (vy_stmt_compare(itr->curr_stmt, next_key, itr->cmp_def) != 0) { - tuple_unref(next_key); + if (vy_entry_compare(itr->curr, next, itr->cmp_def) != 0) { + tuple_unref(next.stmt); return 0; } - tuple_unref(itr->curr_stmt); - itr->curr_stmt = next_key; + tuple_unref(itr->curr.stmt); + itr->curr = next; itr->curr_pos = next_pos; - if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) + if (vy_stmt_flags(itr->curr.stmt) & VY_STMT_SKIP_READ) goto next; - vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr_stmt); - *ret = itr->curr_stmt; + vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr.stmt); + *ret = itr->curr; return 0; } @@ -1543,47 +1559,47 @@ vy_run_iterator_next(struct vy_run_iterator *itr, struct vy_history *history) { vy_history_cleanup(history); - struct tuple *stmt; - if (vy_run_iterator_next_key(itr, &stmt) != 0) + struct vy_entry entry; + if (vy_run_iterator_next_key(itr, &entry) != 0) return -1; - while (stmt != NULL) { - if (vy_history_append_stmt(history, stmt) != 0) + while (entry.stmt != NULL) { + if (vy_history_append_stmt(history, entry) != 0) return -1; if (vy_history_is_terminal(history)) break; - if (vy_run_iterator_next_lsn(itr, &stmt) != 0) + if (vy_run_iterator_next_lsn(itr, &entry) != 0) return -1; } return 0; } NODISCARD int -vy_run_iterator_skip(struct vy_run_iterator *itr, struct tuple *last_stmt, +vy_run_iterator_skip(struct vy_run_iterator *itr, struct vy_entry last, struct vy_history *history) { /* * Check if the iterator is already positioned - * at the statement following last_stmt. + * at the statement following last. */ if (itr->search_started && - (itr->curr_stmt == NULL || last_stmt == NULL || + (itr->curr.stmt == NULL || last.stmt == NULL || iterator_direction(itr->iterator_type) * - vy_stmt_compare(itr->curr_stmt, last_stmt, itr->cmp_def) > 0)) + vy_entry_compare(itr->curr, last, itr->cmp_def) > 0)) return 0; vy_history_cleanup(history); itr->search_started = true; - struct tuple *stmt; - if (vy_run_iterator_seek(itr, last_stmt, &stmt) != 0) + struct vy_entry entry; + if (vy_run_iterator_seek(itr, last, &entry) != 0) return -1; - while (stmt != NULL) { - if (vy_history_append_stmt(history, stmt) != 0) + while (entry.stmt != NULL) { + if (vy_history_append_stmt(history, entry) != 0) return -1; if (vy_history_is_terminal(history)) break; - if (vy_run_iterator_next_lsn(itr, &stmt) != 0) + if (vy_run_iterator_next_lsn(itr, &entry) != 0) return -1; } return 0; @@ -1615,7 +1631,7 @@ vy_run_acct_page(struct vy_run *run, struct vy_page_info *page) int vy_run_recover(struct vy_run *run, const char *dir, - uint32_t space_id, uint32_t iid) + uint32_t space_id, uint32_t iid, struct key_def *cmp_def) { char path[PATH_MAX]; vy_run_snprint_path(path, sizeof(path), dir, @@ -1701,7 +1717,7 @@ vy_run_recover(struct vy_run *run, const char *dir, goto fail_close; } struct vy_page_info *page = run->page_info + page_no; - if (vy_page_info_decode(page, &xrow, path) < 0) { + if (vy_page_info_decode(page, &xrow, cmp_def, path) < 0) { /** * Limit the count of pages to successfully * created pages @@ -1741,14 +1757,14 @@ fail: /* dump statement to the run page buffers (stmt header and data) */ static int -vy_run_dump_stmt(struct tuple *value, struct xlog *data_xlog, +vy_run_dump_stmt(struct vy_entry entry, struct xlog *data_xlog, struct vy_page_info *info, struct key_def *key_def, bool is_primary) { struct xrow_header xrow; int rc = (is_primary ? - vy_stmt_encode_primary(value, key_def, 0, &xrow) : - vy_stmt_encode_secondary(value, key_def, &xrow)); + vy_stmt_encode_primary(entry.stmt, key_def, 0, &xrow) : + vy_stmt_encode_secondary(entry.stmt, key_def, &xrow)); if (rc != 0) return -1; @@ -2119,22 +2135,25 @@ vy_run_writer_create_xlog(struct vy_run_writer *writer) } /** - * Start a new page with a min_key stored in @a first_stmt. + * Start a new page with a min_key stored in @a first_entry. * @param writer Run writer. - * @param first_stmt First statement of a page. + * @param first_entry First statement of a page. * * @retval -1 Memory error. * @retval 0 Success. */ static int -vy_run_writer_start_page(struct vy_run_writer *writer, struct tuple *first_stmt) +vy_run_writer_start_page(struct vy_run_writer *writer, + struct vy_entry first_entry) { struct vy_run *run = writer->run; if (run->info.page_count >= writer->page_info_capacity && vy_run_alloc_page_info(run, &writer->page_info_capacity) != 0) return -1; - const char *key = vy_stmt_is_key(first_stmt) ? tuple_data(first_stmt) : - tuple_extract_key(first_stmt, writer->cmp_def, NULL); + const char *key = vy_stmt_is_key(first_entry.stmt) ? + tuple_data(first_entry.stmt) : + tuple_extract_key(first_entry.stmt, + writer->cmp_def, NULL); if (key == NULL) return -1; if (run->info.page_count == 0) { @@ -2144,7 +2163,8 @@ vy_run_writer_start_page(struct vy_run_writer *writer, struct tuple *first_stmt) return -1; } struct vy_page_info *page = run->page_info + run->info.page_count; - if (vy_page_info_create(page, writer->data_xlog.offset, key) != 0) + if (vy_page_info_create(page, writer->data_xlog.offset, + key, writer->cmp_def) != 0) return -1; xlog_tx_begin(&writer->data_xlog); return 0; @@ -2153,22 +2173,22 @@ vy_run_writer_start_page(struct vy_run_writer *writer, struct tuple *first_stmt) /** * Write @a stmt into a current page. * @param writer Run writer. - * @param stmt Statement to write. + * @param entry Statement to write. * * @retval -1 Memory or IO error. * @retval 0 Success. */ static int -vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt) +vy_run_writer_write_to_page(struct vy_run_writer *writer, struct vy_entry entry) { if (writer->bloom != NULL && - vy_stmt_bloom_builder_add(writer->bloom, stmt, + vy_stmt_bloom_builder_add(writer->bloom, entry.stmt, writer->key_def) != 0) return -1; - if (writer->last_stmt != NULL) - vy_stmt_unref_if_possible(writer->last_stmt); - writer->last_stmt = stmt; - vy_stmt_ref_if_possible(stmt); + if (writer->last.stmt != NULL) + vy_stmt_unref_if_possible(writer->last.stmt); + writer->last = entry; + vy_stmt_ref_if_possible(entry.stmt); struct vy_run *run = writer->run; struct vy_page_info *page = run->page_info + run->info.page_count; uint32_t *offset = (uint32_t *)ibuf_alloc(&writer->row_index_buf, @@ -2178,13 +2198,13 @@ vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt) return -1; } *offset = page->unpacked_size; - if (vy_run_dump_stmt(stmt, &writer->data_xlog, page, + if (vy_run_dump_stmt(entry, &writer->data_xlog, page, writer->cmp_def, writer->iid == 0) != 0) return -1; - int64_t lsn = vy_stmt_lsn(stmt); + int64_t lsn = vy_stmt_lsn(entry.stmt); run->info.min_lsn = MIN(run->info.min_lsn, lsn); run->info.max_lsn = MAX(run->info.max_lsn, lsn); - vy_stmt_stat_acct(&run->info.stmt_stat, vy_stmt_type(stmt)); + vy_stmt_stat_acct(&run->info.stmt_stat, vy_stmt_type(entry.stmt)); return 0; } @@ -2227,7 +2247,7 @@ vy_run_writer_end_page(struct vy_run_writer *writer) } int -vy_run_writer_append_stmt(struct vy_run_writer *writer, struct tuple *stmt) +vy_run_writer_append_stmt(struct vy_run_writer *writer, struct vy_entry entry) { int rc = -1; size_t region_svp = region_used(&fiber()->gc); @@ -2235,9 +2255,9 @@ vy_run_writer_append_stmt(struct vy_run_writer *writer, struct tuple *stmt) vy_run_writer_create_xlog(writer) != 0) goto out; if (ibuf_used(&writer->row_index_buf) == 0 && - vy_run_writer_start_page(writer, stmt) != 0) + vy_run_writer_start_page(writer, entry) != 0) goto out; - if (vy_run_writer_write_to_page(writer, stmt) != 0) + if (vy_run_writer_write_to_page(writer, entry) != 0) goto out; if (obuf_size(&writer->data_xlog.obuf) >= writer->page_size && vy_run_writer_end_page(writer) != 0) @@ -2257,8 +2277,8 @@ out: static void vy_run_writer_destroy(struct vy_run_writer *writer, bool reuse_fd) { - if (writer->last_stmt != NULL) - vy_stmt_unref_if_possible(writer->last_stmt); + if (writer->last.stmt != NULL) + vy_stmt_unref_if_possible(writer->last.stmt); if (xlog_is_open(&writer->data_xlog)) xlog_close(&writer->data_xlog, reuse_fd); if (writer->bloom != NULL) @@ -2283,10 +2303,10 @@ vy_run_writer_commit(struct vy_run_writer *writer) goto out; } - assert(writer->last_stmt != NULL); - const char *key = vy_stmt_is_key(writer->last_stmt) ? - tuple_data(writer->last_stmt) : - tuple_extract_key(writer->last_stmt, + assert(writer->last.stmt != NULL); + const char *key = vy_stmt_is_key(writer->last.stmt) ? + tuple_data(writer->last.stmt) : + tuple_extract_key(writer->last.stmt, writer->cmp_def, NULL); if (key == NULL) goto out; @@ -2421,7 +2441,8 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, } struct vy_page_info *info; info = run->page_info + run->info.page_count; - if (vy_page_info_create(info, page_offset, page_min_key) != 0) + if (vy_page_info_create(info, page_offset, + page_min_key, cmp_def) != 0) goto close_err; info->row_count = page_row_count; info->size = next_page_offset - page_offset; @@ -2548,7 +2569,7 @@ vy_slice_stream_search(struct vy_stmt_stream *virt_stream) assert(virt_stream->iface->start == vy_slice_stream_search); struct vy_slice_stream *stream = (struct vy_slice_stream *)virt_stream; assert(stream->page == NULL); - if (stream->slice->begin == NULL) { + if (stream->slice->begin.stmt == NULL) { /* Already at the beginning */ assert(stream->page_no == 0); assert(stream->pos_in_page == 0); @@ -2566,17 +2587,18 @@ vy_slice_stream_search(struct vy_stmt_stream *virt_stream) uint32_t end = stream->page->row_count; while (beg != end) { uint32_t mid = beg + (end - beg) / 2; - struct tuple *fnd_key = vy_page_stmt(stream->page, mid, - stream->format); - if (fnd_key == NULL) + struct vy_entry fnd_key = vy_page_stmt(stream->page, mid, + stream->cmp_def, + stream->format); + if (fnd_key.stmt == NULL) return -1; - int cmp = vy_stmt_compare(fnd_key, stream->slice->begin, - stream->cmp_def); + int cmp = vy_entry_compare(fnd_key, stream->slice->begin, + stream->cmp_def); if (cmp < 0) beg = mid + 1; else end = mid; - tuple_unref(fnd_key); + tuple_unref(fnd_key.stmt); } stream->pos_in_page = end; @@ -2594,15 +2616,15 @@ vy_slice_stream_search(struct vy_stmt_stream *virt_stream) * Get the value from the stream and move to the next position. * Set *ret to the value or NULL if EOF. * @param virt_stream - virtual stream. - * @param ret - pointer to pointer to the result. + * @param ret - pointer to the result. * @return 0 on success, -1 on memory or read error. */ static NODISCARD int -vy_slice_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret) +vy_slice_stream_next(struct vy_stmt_stream *virt_stream, struct vy_entry *ret) { assert(virt_stream->iface->next == vy_slice_stream_next); struct vy_slice_stream *stream = (struct vy_slice_stream *)virt_stream; - *ret = NULL; + *ret = vy_entry_none(); /* If the slice is ended, return EOF */ if (stream->page_no > stream->slice->last_page_no) @@ -2613,24 +2635,24 @@ vy_slice_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret) return -1; /* Read current tuple from the page */ - struct tuple *tuple = vy_page_stmt(stream->page, stream->pos_in_page, - stream->format); - if (tuple == NULL) /* Read or memory error */ + struct vy_entry entry = vy_page_stmt(stream->page, stream->pos_in_page, + stream->cmp_def, stream->format); + if (entry.stmt == NULL) /* Read or memory error */ return -1; /* Check that the tuple is not out of slice bounds = */ - if (stream->slice->end != NULL && + if (stream->slice->end.stmt != NULL && stream->page_no >= stream->slice->last_page_no && - vy_stmt_compare(tuple, stream->slice->end, stream->cmp_def) >= 0) { - tuple_unref(tuple); + vy_entry_compare(entry, stream->slice->end, stream->cmp_def) >= 0) { + tuple_unref(entry.stmt); return 0; } /* We definitely has the next non-null tuple. Save it in stream */ - if (stream->tuple != NULL) - tuple_unref(stream->tuple); - stream->tuple = tuple; - *ret = tuple; + if (stream->entry.stmt != NULL) + tuple_unref(stream->entry.stmt); + stream->entry = entry; + *ret = entry; /* Increment position */ stream->pos_in_page++; @@ -2664,9 +2686,9 @@ vy_slice_stream_stop(struct vy_stmt_stream *virt_stream) vy_page_delete(stream->page); stream->page = NULL; } - if (stream->tuple != NULL) { - tuple_unref(stream->tuple); - stream->tuple = NULL; + if (stream->entry.stmt != NULL) { + tuple_unref(stream->entry.stmt); + stream->entry = vy_entry_none(); } } @@ -2694,7 +2716,7 @@ vy_slice_stream_open(struct vy_slice_stream *stream, struct vy_slice *slice, stream->page_no = slice->first_page_no; stream->pos_in_page = 0; /* We'll find it later */ stream->page = NULL; - stream->tuple = NULL; + stream->entry = vy_entry_none(); stream->slice = slice; stream->cmp_def = cmp_def; diff --git a/src/box/vy_run.h b/src/box/vy_run.h index ae007478..aedae959 100644 --- a/src/box/vy_run.h +++ b/src/box/vy_run.h @@ -36,7 +36,7 @@ #include "fiber_cond.h" #include "iterator_type.h" -#include "vy_stmt.h" /* for comparators */ +#include "vy_entry.h" #include "vy_stmt_stream.h" #include "vy_read_view.h" #include "vy_stat.h" @@ -105,6 +105,8 @@ struct vy_page_info { uint32_t row_count; /** Minimal key stored in the page. */ char *min_key; + /** Comparison hint of the min key. */ + hint_t min_key_hint; /** Offset of the row index in the page. */ uint32_t row_index_offset; }; @@ -182,8 +184,8 @@ struct vy_slice { * of the run. If @end is NULL, the slice ends at the end * of the run. */ - struct tuple *begin; - struct tuple *end; + struct vy_entry begin; + struct vy_entry end; /** * Random seed used for compaction randomization. * Lays in range [0, RAND_MAX]. @@ -259,7 +261,7 @@ struct vy_run_iterator { */ enum iterator_type iterator_type; /** Key to search. */ - struct tuple *key; + struct vy_entry key; /* LSN visibility, iterator shows values with lsn <= vlsn */ const struct vy_read_view **read_view; @@ -267,7 +269,7 @@ struct vy_run_iterator { /** Position of the current record */ struct vy_run_iterator_pos curr_pos; /** Statement at curr_pos. */ - struct tuple *curr_stmt; + struct vy_entry curr; /** * Last two pages read by the iterator. We keep two pages * rather than just one, because we often probe a page for @@ -374,11 +376,12 @@ vy_run_unref(struct vy_run *run) * @param dir - path to the vinyl directory * @param space_id - space id * @param iid - index id + * @param cmp_def - definition of keys stored in the run * @return - 0 on sucess, -1 on fail */ int vy_run_recover(struct vy_run *run, const char *dir, - uint32_t space_id, uint32_t iid); + uint32_t space_id, uint32_t iid, struct key_def *cmp_def); /** * Rebuild run index @@ -452,8 +455,8 @@ vy_run_remove_files(const char *dir, uint32_t space_id, * This function increments @run->refs. */ struct vy_slice * -vy_slice_new(int64_t id, struct vy_run *run, struct tuple *begin, - struct tuple *end, struct key_def *cmp_def); +vy_slice_new(int64_t id, struct vy_run *run, struct vy_entry begin, + struct vy_entry end, struct key_def *cmp_def); /** * Free a run slice. @@ -503,8 +506,8 @@ vy_slice_wait_pinned(struct vy_slice *slice) * with [@begin, @end), @result is set to NULL. */ int -vy_slice_cut(struct vy_slice *slice, int64_t id, struct tuple *begin, - struct tuple *end, struct key_def *cmp_def, +vy_slice_cut(struct vy_slice *slice, int64_t id, struct vy_entry begin, + struct vy_entry end, struct key_def *cmp_def, struct vy_slice **result); /** @@ -517,7 +520,7 @@ void vy_run_iterator_open(struct vy_run_iterator *itr, struct vy_run_iterator_stat *stat, struct vy_slice *slice, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv, + struct vy_entry key, const struct vy_read_view **rv, struct key_def *cmp_def, struct key_def *key_def, struct tuple_format *format); @@ -531,12 +534,12 @@ vy_run_iterator_next(struct vy_run_iterator *itr, struct vy_history *history); /** - * Advance a run iterator to the key following @last_stmt. + * Advance a run iterator to the key following @last. * The key history is returned in @history (empty if EOF). * Returns 0 on success, -1 on memory allocation or IO error. */ NODISCARD int -vy_run_iterator_skip(struct vy_run_iterator *itr, struct tuple *last_stmt, +vy_run_iterator_skip(struct vy_run_iterator *itr, struct vy_entry last, struct vy_history *history); /** @@ -558,7 +561,7 @@ struct vy_slice_stream { /** Last page read */ struct vy_page *page; /** The last tuple returned to user */ - struct tuple *tuple; + struct vy_entry entry; /** Members needed for memory allocation and disk access */ /** Slice to stream */ @@ -621,7 +624,7 @@ struct vy_run_writer { * Remember a last written statement to use it as a source * of max key of a finished run. */ - struct tuple *last_stmt; + struct vy_entry last; }; /** Create a run writer to fill a run with statements. */ @@ -634,13 +637,13 @@ vy_run_writer_create(struct vy_run_writer *writer, struct vy_run *run, /** * Write a specified statement into a run. * @param writer Writer to write a statement. - * @param stmt Statement to write. + * @param entry Statement to write. * * @retval -1 Memory error. * @retval 0 Success. */ int -vy_run_writer_append_stmt(struct vy_run_writer *writer, struct tuple *stmt); +vy_run_writer_append_stmt(struct vy_run_writer *writer, struct vy_entry entry); /** * Finalize run writing by writing run index into file. The writer diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index aaae1a89..8f6279dc 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -1054,13 +1054,13 @@ vy_task_write_run(struct vy_task *task) goto fail_abort_writer; int rc; int loops = 0; - struct tuple *stmt = NULL; - while ((rc = wi->iface->next(wi, &stmt)) == 0 && stmt != NULL) { + struct vy_entry entry = vy_entry_none(); + while ((rc = wi->iface->next(wi, &entry)) == 0 && entry.stmt != NULL) { inj = errinj(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE); if (inj != NULL && inj->dparam > 0) usleep(inj->dparam * 1000000); - rc = vy_run_writer_append_stmt(&writer, stmt); + rc = vy_run_writer_append_stmt(&writer, entry); if (rc != 0) break; @@ -1165,8 +1165,8 @@ vy_task_dump_complete(struct vy_task *task) assert(i < lsm->range_count); slice = new_slices[i]; vy_log_insert_slice(range->id, new_run->id, slice->id, - tuple_data_or_null(slice->begin), - tuple_data_or_null(slice->end)); + tuple_data_or_null(slice->begin.stmt), + tuple_data_or_null(slice->end.stmt)); } vy_log_dump_lsm(lsm->id, dump_lsn); if (vy_log_tx_commit() < 0) @@ -1458,7 +1458,8 @@ vy_task_compaction_complete(struct vy_task *task) */ if (!vy_run_is_empty(new_run)) { new_slice = vy_slice_new(vy_log_next_id(), new_run, - NULL, NULL, lsm->cmp_def); + vy_entry_none(), vy_entry_none(), + lsm->cmp_def); if (new_slice == NULL) return -1; } @@ -1498,8 +1499,8 @@ vy_task_compaction_complete(struct vy_task *task) vy_log_create_run(lsm->id, new_run->id, new_run->dump_lsn, new_run->dump_count); vy_log_insert_slice(range->id, new_run->id, new_slice->id, - tuple_data_or_null(new_slice->begin), - tuple_data_or_null(new_slice->end)); + tuple_data_or_null(new_slice->begin.stmt), + tuple_data_or_null(new_slice->end.stmt)); } if (vy_log_tx_commit() < 0) { if (new_slice != NULL) diff --git a/src/box/vy_stmt_stream.h b/src/box/vy_stmt_stream.h index 098cc8eb..08e4d5ff 100644 --- a/src/box/vy_stmt_stream.h +++ b/src/box/vy_stmt_stream.h @@ -37,7 +37,7 @@ extern "C" { #endif /* defined(__cplusplus) */ -struct tuple; +struct vy_entry; /** * The stream is a very simple iterator (generally over a mem or a run) @@ -55,7 +55,7 @@ typedef NODISCARD int * Get next tuple from a stream. */ typedef NODISCARD int -(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream, struct tuple **ret); +(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream, struct vy_entry *ret); /** * Close the stream. diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c index 7ead3648..100d9bc7 100644 --- a/src/box/vy_tx.c +++ b/src/box/vy_tx.c @@ -68,7 +68,7 @@ write_set_cmp(struct txv *a, struct txv *b) { int rc = a->lsm < b->lsm ? -1 : a->lsm > b->lsm; if (rc == 0) - return vy_stmt_compare(a->stmt, b->stmt, a->lsm->cmp_def); + return vy_entry_compare(a->entry, b->entry, a->lsm->cmp_def); return rc; } @@ -77,7 +77,7 @@ write_set_key_cmp(struct write_set_key *a, struct txv *b) { int rc = a->lsm < b->lsm ? -1 : a->lsm > b->lsm; if (rc == 0) - return vy_stmt_compare(a->stmt, b->stmt, a->lsm->cmp_def); + return vy_entry_compare(a->entry, b->entry, a->lsm->cmp_def); return rc; } @@ -213,7 +213,7 @@ tx_manager_destroy_read_view(struct tx_manager *xm, static struct txv * txv_new(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *stmt, uint64_t column_mask) + struct vy_entry entry, uint64_t column_mask) { struct tx_manager *xm = tx->xm; struct txv *v = mempool_alloc(&xm->txv_mempool); @@ -224,15 +224,15 @@ txv_new(struct vy_tx *tx, struct vy_lsm *lsm, v->lsm = lsm; vy_lsm_ref(v->lsm); v->mem = NULL; - v->stmt = stmt; - tuple_ref(stmt); + v->entry = entry; + tuple_ref(entry.stmt); v->region_stmt = NULL; v->column_mask = column_mask; v->tx = tx; v->is_first_insert = false; v->is_overwritten = false; v->overwritten = NULL; - xm->write_set_size += tuple_size(stmt); + xm->write_set_size += tuple_size(entry.stmt); return v; } @@ -240,8 +240,8 @@ static void txv_delete(struct txv *v) { struct tx_manager *xm = v->tx->xm; - xm->write_set_size -= tuple_size(v->stmt); - tuple_unref(v->stmt); + xm->write_set_size -= tuple_size(v->entry.stmt); + tuple_unref(v->entry.stmt); vy_lsm_unref(v->lsm); mempool_free(&xm->txv_mempool, v); } @@ -253,9 +253,9 @@ static void vy_read_interval_acct(struct vy_read_interval *interval) { struct tx_manager *xm = interval->tx->xm; - xm->read_set_size += tuple_size(interval->left); - if (interval->left != interval->right) - xm->read_set_size += tuple_size(interval->right); + xm->read_set_size += tuple_size(interval->left.stmt); + if (interval->left.stmt != interval->right.stmt) + xm->read_set_size += tuple_size(interval->right.stmt); } /** @@ -265,15 +265,15 @@ static void vy_read_interval_unacct(struct vy_read_interval *interval) { struct tx_manager *xm = interval->tx->xm; - xm->read_set_size -= tuple_size(interval->left); - if (interval->left != interval->right) - xm->read_set_size -= tuple_size(interval->right); + xm->read_set_size -= tuple_size(interval->left.stmt); + if (interval->left.stmt != interval->right.stmt) + xm->read_set_size -= tuple_size(interval->right.stmt); } static struct vy_read_interval * vy_read_interval_new(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *left, bool left_belongs, - struct tuple *right, bool right_belongs) + struct vy_entry left, bool left_belongs, + struct vy_entry right, bool right_belongs) { struct tx_manager *xm = tx->xm; struct vy_read_interval *interval; @@ -286,10 +286,10 @@ vy_read_interval_new(struct vy_tx *tx, struct vy_lsm *lsm, interval->tx = tx; vy_lsm_ref(lsm); interval->lsm = lsm; - tuple_ref(left); + tuple_ref(left.stmt); interval->left = left; interval->left_belongs = left_belongs; - tuple_ref(right); + tuple_ref(right.stmt); interval->right = right; interval->right_belongs = right_belongs; interval->subtree_last = NULL; @@ -303,8 +303,8 @@ vy_read_interval_delete(struct vy_read_interval *interval) struct tx_manager *xm = interval->tx->xm; vy_read_interval_unacct(interval); vy_lsm_unref(interval->lsm); - tuple_unref(interval->left); - tuple_unref(interval->right); + tuple_unref(interval->left.stmt); + tuple_unref(interval->right.stmt); mempool_free(&xm->read_interval_mempool, interval); } @@ -348,7 +348,7 @@ vy_tx_destroy(struct vy_tx *tx) struct txv *v, *tmp; stailq_foreach_entry_safe(v, tmp, &tx->log, next_in_log) { vy_stmt_counter_unacct_tuple(&v->lsm->stat.txw.count, - v->stmt); + v->entry.stmt); txv_delete(v); } @@ -387,7 +387,7 @@ static int vy_tx_send_to_read_view(struct vy_tx *tx, struct txv *v) { struct vy_tx_conflict_iterator it; - vy_tx_conflict_iterator_init(&it, &v->lsm->read_set, v->stmt); + vy_tx_conflict_iterator_init(&it, &v->lsm->read_set, v->entry); struct vy_tx *abort; while ((abort = vy_tx_conflict_iterator_next(&it)) != NULL) { /* Don't abort self. */ @@ -415,7 +415,7 @@ static void vy_tx_abort_readers(struct vy_tx *tx, struct txv *v) { struct vy_tx_conflict_iterator it; - vy_tx_conflict_iterator_init(&it, &v->lsm->read_set, v->stmt); + vy_tx_conflict_iterator_init(&it, &v->lsm->read_set, v->entry); struct vy_tx *abort; while ((abort = vy_tx_conflict_iterator_next(&it)) != NULL) { /* Don't abort self. */ @@ -482,7 +482,7 @@ vy_tx_write_prepare(struct txv *v) * * @param lsm LSM tree to write to. * @param mem In-memory tree to write to. - * @param stmt Statement allocated with malloc(). + * @param entry Statement allocated with malloc(). * @param region_stmt NULL or the same statement as stmt, * but allocated on lsregion. * @@ -491,9 +491,9 @@ vy_tx_write_prepare(struct txv *v) */ static int vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem, - struct tuple *stmt, struct tuple **region_stmt) + struct vy_entry entry, struct tuple **region_stmt) { - assert(vy_stmt_is_refable(stmt)); + assert(vy_stmt_is_refable(entry.stmt)); assert(*region_stmt == NULL || !vy_stmt_is_refable(*region_stmt)); /* @@ -503,19 +503,21 @@ vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem, * applied to the cached statement, can be inserted * instead of the original UPSERT. */ - if (vy_stmt_type(stmt) == IPROTO_UPSERT) { - struct tuple *deleted = NULL; + if (vy_stmt_type(entry.stmt) == IPROTO_UPSERT) { + struct vy_entry deleted = vy_entry_none(); /* Invalidate cache element. */ - vy_cache_on_write(&lsm->cache, stmt, &deleted); - if (deleted != NULL) { - struct tuple *applied = vy_apply_upsert(stmt, deleted, + vy_cache_on_write(&lsm->cache, entry, &deleted); + if (deleted.stmt != NULL) { + struct vy_entry applied; + applied = vy_entry_apply_upsert(entry, deleted, mem->cmp_def, false); - tuple_unref(deleted); - if (applied != NULL) { - assert(vy_stmt_type(applied) == IPROTO_REPLACE); + tuple_unref(deleted.stmt); + if (applied.stmt != NULL) { + assert(vy_stmt_type(applied.stmt) == + IPROTO_REPLACE); int rc = vy_lsm_set(lsm, mem, applied, region_stmt); - tuple_unref(applied); + tuple_unref(applied.stmt); return rc; } /* @@ -525,9 +527,9 @@ vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem, } } else { /* Invalidate cache element. */ - vy_cache_on_write(&lsm->cache, stmt, NULL); + vy_cache_on_write(&lsm->cache, entry, NULL); } - return vy_lsm_set(lsm, mem, stmt, region_stmt); + return vy_lsm_set(lsm, mem, entry, region_stmt); } /** @@ -545,7 +547,7 @@ vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem, * if we run out of memory, we won't be able to schedule another * dump to free some. * - * Affects @tx->log, @v->stmt. + * Affects @tx->log, @v->entry. * * Returns 0 on success, -1 on memory allocation error. */ @@ -553,7 +555,7 @@ static int vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) { struct vy_lsm *pk = v->lsm; - struct tuple *stmt = v->stmt; + struct tuple *stmt = v->entry.stmt; uint8_t flags = vy_stmt_flags(stmt); assert(pk->index_id == 0); @@ -569,12 +571,12 @@ vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) } /* Look up the tuple overwritten by this statement. */ - struct tuple *tuple; + struct vy_entry overwritten; if (vy_point_lookup_mem(pk, &tx->xm->p_global_read_view, - stmt, &tuple) != 0) + v->entry, &overwritten) != 0) return -1; - if (tuple == NULL) { + if (overwritten.stmt == NULL) { /* * Nothing's found, but there still may be * matching statements stored on disk so we @@ -590,15 +592,16 @@ vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) */ vy_stmt_set_flags(stmt, flags & ~VY_STMT_DEFERRED_DELETE); - if (vy_stmt_type(tuple) == IPROTO_DELETE) { + if (vy_stmt_type(overwritten.stmt) == IPROTO_DELETE) { /* The tuple's already deleted, nothing to do. */ - tuple_unref(tuple); + tuple_unref(overwritten.stmt); return 0; } struct tuple *delete_stmt; - delete_stmt = vy_stmt_new_surrogate_delete(pk->mem_format, tuple); - tuple_unref(tuple); + delete_stmt = vy_stmt_new_surrogate_delete(pk->mem_format, + overwritten.stmt); + tuple_unref(overwritten.stmt); if (delete_stmt == NULL) return -1; @@ -614,7 +617,7 @@ vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) tx->xm->write_set_size += tuple_size(delete_stmt); vy_stmt_counter_acct_tuple(&pk->stat.txw.count, delete_stmt); vy_stmt_counter_unacct_tuple(&pk->stat.txw.count, stmt); - v->stmt = delete_stmt; + v->entry.stmt = delete_stmt; tuple_ref(delete_stmt); tuple_unref(stmt); } @@ -626,7 +629,10 @@ vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) int rc = 0; for (uint32_t i = 1; i < space->index_count; i++) { struct vy_lsm *lsm = vy_lsm(space->index[i]); - struct txv *delete_txv = txv_new(tx, lsm, delete_stmt, + struct vy_entry delete_entry; + delete_entry.stmt = delete_stmt; + delete_entry.hint = vy_stmt_hint(delete_stmt, lsm->cmp_def); + struct txv *delete_txv = txv_new(tx, lsm, delete_entry, UINT64_MAX); if (delete_txv == NULL) { rc = -1; @@ -719,7 +725,7 @@ vy_tx_prepare(struct vy_tx *tx) continue; } - enum iproto_type type = vy_stmt_type(v->stmt); + enum iproto_type type = vy_stmt_type(v->entry.stmt); /* Optimize out INSERT + DELETE for the same key. */ if (v->is_first_insert && type == IPROTO_DELETE) @@ -732,15 +738,15 @@ vy_tx_prepare(struct vy_tx *tx) * so we can turn REPLACE into INSERT. */ type = IPROTO_INSERT; - vy_stmt_set_type(v->stmt, type); + vy_stmt_set_type(v->entry.stmt, type); /* * In case of INSERT, no statement was actually * overwritten so no need to generate a deferred * DELETE for secondary indexes. */ - uint8_t flags = vy_stmt_flags(v->stmt); + uint8_t flags = vy_stmt_flags(v->entry.stmt); if (flags & VY_STMT_DEFERRED_DELETE) { - vy_stmt_set_flags(v->stmt, flags & + vy_stmt_set_flags(v->entry.stmt, flags & ~VY_STMT_DEFERRED_DELETE); } } @@ -751,7 +757,7 @@ vy_tx_prepare(struct vy_tx *tx) * turn it into REPLACE. */ type = IPROTO_REPLACE; - vy_stmt_set_type(v->stmt, type); + vy_stmt_set_type(v->entry.stmt, type); } if (vy_tx_write_prepare(v) != 0) @@ -759,15 +765,15 @@ vy_tx_prepare(struct vy_tx *tx) assert(v->mem != NULL); if (lsm->index_id == 0 && - vy_stmt_flags(v->stmt) & VY_STMT_DEFERRED_DELETE && + vy_stmt_flags(v->entry.stmt) & VY_STMT_DEFERRED_DELETE && vy_tx_handle_deferred_delete(tx, v) != 0) return -1; /* In secondary indexes only REPLACE/DELETE can be written. */ - vy_stmt_set_lsn(v->stmt, MAX_LSN + tx->psn); + vy_stmt_set_lsn(v->entry.stmt, MAX_LSN + tx->psn); struct tuple **region_stmt = (type == IPROTO_DELETE) ? &delete : &repsert; - if (vy_tx_write(lsm, v->mem, v->stmt, region_stmt) != 0) + if (vy_tx_write(lsm, v->mem, v->entry, region_stmt) != 0) return -1; v->region_stmt = *region_stmt; } @@ -796,8 +802,11 @@ vy_tx_commit(struct vy_tx *tx, int64_t lsn) struct txv *v; stailq_foreach_entry(v, &tx->log, next_in_log) { if (v->region_stmt != NULL) { + struct vy_entry entry; + entry.stmt = v->region_stmt; + entry.hint = v->entry.hint; vy_stmt_set_lsn(v->region_stmt, lsn); - vy_lsm_commit_stmt(v->lsm, v->mem, v->region_stmt); + vy_lsm_commit_stmt(v->lsm, v->mem, entry); } if (v->mem != NULL) vy_mem_unpin(v->mem); @@ -841,9 +850,12 @@ vy_tx_rollback_after_prepare(struct vy_tx *tx) struct txv *v; stailq_foreach_entry(v, &tx->log, next_in_log) { - if (v->region_stmt != NULL) - vy_lsm_rollback_stmt(v->lsm, v->mem, - v->region_stmt); + if (v->region_stmt != NULL) { + struct vy_entry entry; + entry.stmt = v->region_stmt; + entry.hint = v->entry.hint; + vy_lsm_rollback_stmt(v->lsm, v->mem, entry); + } if (v->mem != NULL) vy_mem_unpin(v->mem); } @@ -914,8 +926,8 @@ vy_tx_rollback_statement(struct vy_tx *tx, void *svp) int vy_tx_track(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *left, bool left_belongs, - struct tuple *right, bool right_belongs) + struct vy_entry left, bool left_belongs, + struct vy_entry right, bool right_belongs) { if (vy_tx_is_in_read_view(tx)) { /* No point in tracking reads. */ @@ -969,16 +981,16 @@ vy_tx_track(struct vy_tx *tx, struct vy_lsm *lsm, interval = stailq_first_entry(&merge, struct vy_read_interval, in_merge); if (vy_read_interval_cmpl(new_interval, interval) > 0) { - tuple_ref(interval->left); - tuple_unref(new_interval->left); + tuple_ref(interval->left.stmt); + tuple_unref(new_interval->left.stmt); new_interval->left = interval->left; new_interval->left_belongs = interval->left_belongs; } interval = stailq_last_entry(&merge, struct vy_read_interval, in_merge); if (vy_read_interval_cmpr(new_interval, interval) < 0) { - tuple_ref(interval->right); - tuple_unref(new_interval->right); + tuple_ref(interval->right.stmt); + tuple_unref(new_interval->right.stmt); new_interval->right = interval->right; new_interval->right_belongs = interval->right_belongs; } @@ -998,73 +1010,74 @@ vy_tx_track(struct vy_tx *tx, struct vy_lsm *lsm, } int -vy_tx_track_point(struct vy_tx *tx, struct vy_lsm *lsm, struct tuple *stmt) +vy_tx_track_point(struct vy_tx *tx, struct vy_lsm *lsm, struct vy_entry entry) { - assert(vy_stmt_is_full_key(stmt, lsm->cmp_def)); + assert(vy_stmt_is_full_key(entry.stmt, lsm->cmp_def)); if (vy_tx_is_in_read_view(tx)) { /* No point in tracking reads. */ return 0; } - struct txv *v = write_set_search_key(&tx->write_set, lsm, stmt); - if (v != NULL && vy_stmt_type(v->stmt) != IPROTO_UPSERT) { + struct txv *v = write_set_search_key(&tx->write_set, lsm, entry); + if (v != NULL && vy_stmt_type(v->entry.stmt) != IPROTO_UPSERT) { /* Reading from own write set is serializable. */ return 0; } - return vy_tx_track(tx, lsm, stmt, true, stmt, true); + return vy_tx_track(tx, lsm, entry, true, entry, true); } int vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *stmt, uint64_t column_mask) + struct vy_entry entry, uint64_t column_mask) { - assert(vy_stmt_type(stmt) != 0); + assert(vy_stmt_type(entry.stmt) != 0); /** * A statement in write set must have and unique lsn * in order to differ it from cachable statements in mem and run. */ - vy_stmt_set_lsn(stmt, INT64_MAX); - struct tuple *applied = NULL; + vy_stmt_set_lsn(entry.stmt, INT64_MAX); + struct vy_entry applied = vy_entry_none(); - struct txv *old = write_set_search_key(&tx->write_set, lsm, stmt); + struct txv *old = write_set_search_key(&tx->write_set, lsm, entry); /* Found a match of the previous action of this transaction */ - if (old != NULL && vy_stmt_type(stmt) == IPROTO_UPSERT) { + if (old != NULL && vy_stmt_type(entry.stmt) == IPROTO_UPSERT) { assert(lsm->index_id == 0); - uint8_t old_type = vy_stmt_type(old->stmt); + uint8_t old_type = vy_stmt_type(old->entry.stmt); assert(old_type == IPROTO_UPSERT || old_type == IPROTO_INSERT || old_type == IPROTO_REPLACE || old_type == IPROTO_DELETE); (void) old_type; - applied = vy_apply_upsert(stmt, old->stmt, lsm->cmp_def, true); + applied = vy_entry_apply_upsert(entry, old->entry, + lsm->cmp_def, true); lsm->stat.upsert.applied++; - if (applied == NULL) + if (applied.stmt == NULL) return -1; - stmt = applied; - assert(vy_stmt_type(stmt) != 0); + entry = applied; + assert(vy_stmt_type(entry.stmt) != 0); lsm->stat.upsert.squashed++; } /* Allocate a MVCC container. */ - struct txv *v = txv_new(tx, lsm, stmt, column_mask); - if (applied != NULL) - tuple_unref(applied); + struct txv *v = txv_new(tx, lsm, entry, column_mask); + if (applied.stmt != NULL) + tuple_unref(applied.stmt); if (v == NULL) return -1; if (old != NULL) { /* Leave the old txv in TX log but remove it from write set */ - assert(tx->write_size >= tuple_size(old->stmt)); - tx->write_size -= tuple_size(old->stmt); + assert(tx->write_size >= tuple_size(old->entry.stmt)); + tx->write_size -= tuple_size(old->entry.stmt); write_set_remove(&tx->write_set, old); old->is_overwritten = true; v->is_first_insert = old->is_first_insert; } - if (old == NULL && vy_stmt_type(stmt) == IPROTO_INSERT) + if (old == NULL && vy_stmt_type(entry.stmt) == IPROTO_INSERT) v->is_first_insert = true; if (old != NULL) { @@ -1075,8 +1088,8 @@ vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, v->column_mask |= old->column_mask; } - if (lsm->index_id > 0 && vy_stmt_type(stmt) == IPROTO_REPLACE && - old != NULL && vy_stmt_type(old->stmt) == IPROTO_DELETE) { + if (lsm->index_id > 0 && vy_stmt_type(entry.stmt) == IPROTO_REPLACE && + old != NULL && vy_stmt_type(old->entry.stmt) == IPROTO_DELETE) { /* * The column mask of an update operation may have a bit * set even if the corresponding field doesn't actually @@ -1104,8 +1117,8 @@ vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, v->overwritten = old; write_set_insert(&tx->write_set, v); tx->write_set_version++; - tx->write_size += tuple_size(stmt); - vy_stmt_counter_acct_tuple(&lsm->stat.txw.count, stmt); + tx->write_size += tuple_size(entry.stmt); + vy_stmt_counter_acct_tuple(&lsm->stat.txw.count, entry.stmt); stailq_add_tail_entry(&tx->log, v, next_in_log); return 0; } @@ -1142,7 +1155,7 @@ void vy_txw_iterator_open(struct vy_txw_iterator *itr, struct vy_txw_iterator_stat *stat, struct vy_tx *tx, struct vy_lsm *lsm, - enum iterator_type iterator_type, struct tuple *key) + enum iterator_type iterator_type, struct vy_entry key) { itr->stat = stat; itr->tx = tx; @@ -1160,16 +1173,16 @@ vy_txw_iterator_open(struct vy_txw_iterator *itr, * given key (pass NULL to start iteration). */ static void -vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct tuple *last_key) +vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct vy_entry last) { itr->stat->lookup++; itr->version = itr->tx->write_set_version; itr->curr_txv = NULL; - struct tuple *key = itr->key; + struct vy_entry key = itr->key; enum iterator_type iterator_type = itr->iterator_type; - if (last_key != NULL) { - key = last_key; + if (last.stmt != NULL) { + key = last; iterator_type = iterator_direction(iterator_type) > 0 ? ITER_GT : ITER_LT; } @@ -1177,7 +1190,7 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct tuple *last_key) struct vy_lsm *lsm = itr->lsm; struct write_set_key k = { lsm, key }; struct txv *txv; - if (!vy_stmt_is_empty_key(key)) { + if (!vy_stmt_is_empty_key(key.stmt)) { if (iterator_type == ITER_EQ) txv = write_set_search(&itr->tx->write_set, &k); else if (iterator_type == ITER_GE || iterator_type == ITER_GT) @@ -1186,7 +1199,7 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct tuple *last_key) txv = write_set_psearch(&itr->tx->write_set, &k); if (txv == NULL || txv->lsm != lsm) return; - if (vy_stmt_compare(key, txv->stmt, lsm->cmp_def) == 0) { + if (vy_entry_compare(key, txv->entry, lsm->cmp_def) == 0) { while (true) { struct txv *next; if (iterator_type == ITER_LE || @@ -1196,8 +1209,8 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct tuple *last_key) next = write_set_prev(&itr->tx->write_set, txv); if (next == NULL || next->lsm != lsm) break; - if (vy_stmt_compare(key, next->stmt, - lsm->cmp_def) != 0) + if (vy_entry_compare(key, next->entry, + lsm->cmp_def) != 0) break; txv = next; } @@ -1214,8 +1227,8 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr, struct tuple *last_key) } if (txv == NULL || txv->lsm != lsm) return; - if (itr->iterator_type == ITER_EQ && last_key != NULL && - vy_stmt_compare(itr->key, txv->stmt, lsm->cmp_def) != 0) + if (itr->iterator_type == ITER_EQ && last.stmt != NULL && + vy_entry_compare(itr->key, txv->entry, lsm->cmp_def) != 0) return; itr->curr_txv = txv; } @@ -1227,7 +1240,7 @@ vy_txw_iterator_next(struct vy_txw_iterator *itr, vy_history_cleanup(history); if (!itr->search_started) { itr->search_started = true; - vy_txw_iterator_seek(itr, NULL); + vy_txw_iterator_seek(itr, vy_entry_none()); goto out; } assert(itr->version == itr->tx->write_set_version); @@ -1240,20 +1253,20 @@ vy_txw_iterator_next(struct vy_txw_iterator *itr, if (itr->curr_txv != NULL && itr->curr_txv->lsm != itr->lsm) itr->curr_txv = NULL; if (itr->curr_txv != NULL && itr->iterator_type == ITER_EQ && - vy_stmt_compare(itr->key, itr->curr_txv->stmt, - itr->lsm->cmp_def) != 0) + vy_entry_compare(itr->key, itr->curr_txv->entry, + itr->lsm->cmp_def) != 0) itr->curr_txv = NULL; out: if (itr->curr_txv != NULL) { vy_stmt_counter_acct_tuple(&itr->stat->get, - itr->curr_txv->stmt); - return vy_history_append_stmt(history, itr->curr_txv->stmt); + itr->curr_txv->entry.stmt); + return vy_history_append_stmt(history, itr->curr_txv->entry); } return 0; } NODISCARD int -vy_txw_iterator_skip(struct vy_txw_iterator *itr, struct tuple *last_stmt, +vy_txw_iterator_skip(struct vy_txw_iterator *itr, struct vy_entry last, struct vy_history *history) { assert(!itr->search_started || @@ -1261,42 +1274,42 @@ vy_txw_iterator_skip(struct vy_txw_iterator *itr, struct tuple *last_stmt, /* * Check if the iterator is already positioned - * at the statement following last_stmt. + * at the statement following last. */ if (itr->search_started && - (itr->curr_txv == NULL || last_stmt == NULL || + (itr->curr_txv == NULL || last.stmt == NULL || iterator_direction(itr->iterator_type) * - vy_stmt_compare(itr->curr_txv->stmt, last_stmt, - itr->lsm->cmp_def) > 0)) + vy_entry_compare(itr->curr_txv->entry, last, + itr->lsm->cmp_def) > 0)) return 0; vy_history_cleanup(history); itr->search_started = true; - vy_txw_iterator_seek(itr, last_stmt); + vy_txw_iterator_seek(itr, last); if (itr->curr_txv != NULL) { vy_stmt_counter_acct_tuple(&itr->stat->get, - itr->curr_txv->stmt); - return vy_history_append_stmt(history, itr->curr_txv->stmt); + itr->curr_txv->entry.stmt); + return vy_history_append_stmt(history, itr->curr_txv->entry); } return 0; } NODISCARD int -vy_txw_iterator_restore(struct vy_txw_iterator *itr, struct tuple *last_stmt, +vy_txw_iterator_restore(struct vy_txw_iterator *itr, struct vy_entry last, struct vy_history *history) { if (!itr->search_started || itr->version == itr->tx->write_set_version) return 0; - vy_txw_iterator_seek(itr, last_stmt); + vy_txw_iterator_seek(itr, last); vy_history_cleanup(history); if (itr->curr_txv != NULL) { vy_stmt_counter_acct_tuple(&itr->stat->get, - itr->curr_txv->stmt); - if (vy_history_append_stmt(history, itr->curr_txv->stmt) != 0) + itr->curr_txv->entry.stmt); + if (vy_history_append_stmt(history, itr->curr_txv->entry) != 0) return -1; } return 1; diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h index 6877090d..203d4f1e 100644 --- a/src/box/vy_tx.h +++ b/src/box/vy_tx.h @@ -42,6 +42,7 @@ #include "iterator_type.h" #include "salad/stailq.h" #include "trivia/util.h" +#include "vy_entry.h" #include "vy_lsm.h" #include "vy_stat.h" #include "vy_read_set.h" @@ -83,7 +84,7 @@ struct txv { /** In-memory tree to insert the statement into. */ struct vy_mem *mem; /** Statement of this operation. */ - struct tuple *stmt; + struct vy_entry entry; /** Statement allocated on vy_mem->allocator. */ struct tuple *region_stmt; /** Mask of columns modified by this operation. */ @@ -114,7 +115,7 @@ struct txv { */ struct write_set_key { struct vy_lsm *lsm; - struct tuple *stmt; + struct vy_entry entry; }; int @@ -128,9 +129,9 @@ rb_gen_ext_key(MAYBE_UNUSED static inline, write_set_, write_set_t, struct txv, static inline struct txv * write_set_search_key(write_set_t *tree, struct vy_lsm *lsm, - struct tuple *stmt) + struct vy_entry entry) { - struct write_set_key key = { .lsm = lsm, .stmt = stmt }; + struct write_set_key key = { .lsm = lsm, .entry = entry }; return write_set_search(tree, &key); } @@ -369,15 +370,15 @@ vy_tx_rollback_statement(struct vy_tx *tx, void *svp); */ int vy_tx_track(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *left, bool left_belongs, - struct tuple *right, bool right_belongs); + struct vy_entry left, bool left_belongs, + struct vy_entry right, bool right_belongs); /** * Remember a point read in the conflict manager index. * * @param tx Transaction that invoked the read. * @param lsm LSM tree that was read from. - * @param stmt Key that was read. + * @param entry Key that was read. * * @retval 0 Success. * @retval -1 Memory error. @@ -389,12 +390,12 @@ vy_tx_track(struct vy_tx *tx, struct vy_lsm *lsm, * transaction read it from its own write set. */ int -vy_tx_track_point(struct vy_tx *tx, struct vy_lsm *lsm, struct tuple *stmt); +vy_tx_track_point(struct vy_tx *tx, struct vy_lsm *lsm, struct vy_entry entry); /** Add a statement to a transaction. */ int vy_tx_set(struct vy_tx *tx, struct vy_lsm *lsm, - struct tuple *stmt, uint64_t column_mask); + struct vy_entry entry, uint64_t column_mask); /** * Iterator over the write set of a transaction. @@ -414,7 +415,7 @@ struct vy_txw_iterator { */ enum iterator_type iterator_type; /** Search key. */ - struct tuple *key; + struct vy_entry key; /* Last seen value of the write set version. */ uint32_t version; /* Current position in the write set. */ @@ -430,7 +431,7 @@ void vy_txw_iterator_open(struct vy_txw_iterator *itr, struct vy_txw_iterator_stat *stat, struct vy_tx *tx, struct vy_lsm *lsm, - enum iterator_type iterator_type, struct tuple *key); + enum iterator_type iterator_type, struct vy_entry key); /** * Advance a txw iterator to the next key. @@ -442,22 +443,22 @@ vy_txw_iterator_next(struct vy_txw_iterator *itr, struct vy_history *history); /** - * Advance a txw iterator to the key following @last_stmt. + * Advance a txw iterator to the key following @last. * The key history is returned in @history (empty if EOF). * Returns 0 on success, -1 on memory allocation error. */ NODISCARD int -vy_txw_iterator_skip(struct vy_txw_iterator *itr, struct tuple *last_stmt, +vy_txw_iterator_skip(struct vy_txw_iterator *itr, struct vy_entry last, struct vy_history *history); /** * Check if a txw iterator was invalidated and needs to be restored. * If it does, set the iterator position to the first key following - * @last_stmt and return 1, otherwise return 0. Returns -1 on memory + * @last and return 1, otherwise return 0. Returns -1 on memory * allocation error. */ int -vy_txw_iterator_restore(struct vy_txw_iterator *itr, struct tuple *last_stmt, +vy_txw_iterator_restore(struct vy_txw_iterator *itr, struct vy_entry entry, struct vy_history *history); /** diff --git a/src/box/vy_upsert.h b/src/box/vy_upsert.h index d546a2a2..9b585e0b 100644 --- a/src/box/vy_upsert.h +++ b/src/box/vy_upsert.h @@ -34,13 +34,14 @@ #include #include +#include "vy_entry.h" + #if defined(__cplusplus) extern "C" { #endif /* defined(__cplusplus) */ -struct vy_stat; struct key_def; -struct tuple_format; +struct tuple; /** * Apply the UPSERT statement to the REPLACE, UPSERT or DELETE statement. @@ -66,6 +67,17 @@ struct tuple * vy_apply_upsert(struct tuple *new_stmt, struct tuple *old_stmt, struct key_def *cmp_def, bool suppress_error); +static inline struct vy_entry +vy_entry_apply_upsert(struct vy_entry new_entry, struct vy_entry old_entry, + struct key_def *cmp_def, bool suppress_error) +{ + struct vy_entry result; + result.hint = old_entry.stmt != NULL ? old_entry.hint : new_entry.hint; + result.stmt = vy_apply_upsert(new_entry.stmt, old_entry.stmt, + cmp_def, suppress_error); + return result.stmt != NULL ? result : vy_entry_none(); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c index 6818a31c..e7bb6f06 100644 --- a/src/box/vy_write_iterator.c +++ b/src/box/vy_write_iterator.c @@ -46,7 +46,7 @@ struct vy_write_src { /* Node in vy_write_iterator::src_heap */ struct heap_node heap_node; /* Current tuple in the source (with minimal key and maximal LSN) */ - struct tuple *tuple; + struct vy_entry entry; /** * If this flag is set, this is a so called "virtual" * source. A virtual source does not stand for any mem or @@ -77,13 +77,13 @@ heap_less(heap_t *heap, struct vy_write_src *src1, struct vy_write_src *src2); /** * A sequence of versions of a key, sorted by LSN in ascending order. - * (history->tuple.lsn < history->next->tuple.lsn). + * (history->entry.stmt.lsn < history->next->entry.stmt.lsn). */ struct vy_write_history { /** Next version with greater LSN. */ struct vy_write_history *next; /** Key. */ - struct tuple *tuple; + struct vy_entry entry; }; /** @@ -92,24 +92,24 @@ struct vy_write_history { * reverses key LSN order from newest first to oldest first, i.e. * orders statements on the same key chronologically. * - * @param tuple Key version. + * @param entry Key version. * @param next Next version of the key. * * @retval not NULL Created object. * @retval NULL Memory error. */ static inline struct vy_write_history * -vy_write_history_new(struct tuple *tuple, struct vy_write_history *next) +vy_write_history_new(struct vy_entry entry, struct vy_write_history *next) { struct vy_write_history *h; h = region_alloc_object(&fiber()->gc, struct vy_write_history); if (h == NULL) return NULL; - h->tuple = tuple; - assert(next == NULL || (next->tuple != NULL && - vy_stmt_lsn(next->tuple) > vy_stmt_lsn(tuple))); + h->entry = entry; + assert(next == NULL || (next->entry.stmt != NULL && + vy_stmt_lsn(next->entry.stmt) > vy_stmt_lsn(entry.stmt))); h->next = next; - vy_stmt_ref_if_possible(tuple); + vy_stmt_ref_if_possible(entry.stmt); return h; } @@ -122,8 +122,8 @@ static inline void vy_write_history_destroy(struct vy_write_history *history) { do { - if (history->tuple != NULL) - vy_stmt_unref_if_possible(history->tuple); + if (history->entry.stmt != NULL) + vy_stmt_unref_if_possible(history->entry.stmt); history = history->next; } while (history != NULL); } @@ -133,11 +133,11 @@ struct vy_read_view_stmt { /** Read view LSN. */ int64_t vlsn; /** Result key version, visible to this @vlsn. */ - struct tuple *tuple; + struct vy_entry entry; /** * A history of changes building up to this read * view. Once built, it is merged into a single - * @tuple. + * @entry. */ struct vy_write_history *history; }; @@ -150,9 +150,9 @@ struct vy_read_view_stmt { static inline void vy_read_view_stmt_destroy(struct vy_read_view_stmt *rv) { - if (rv->tuple != NULL) - vy_stmt_unref_if_possible(rv->tuple); - rv->tuple = NULL; + if (rv->entry.stmt != NULL) + vy_stmt_unref_if_possible(rv->entry.stmt); + rv->entry = vy_entry_none(); if (rv->history != NULL) vy_write_history_destroy(rv->history); rv->history = NULL; @@ -183,7 +183,7 @@ struct vy_write_iterator { * inserted into the primary index without deletion * of the old tuple from secondary indexes. */ - struct tuple *deferred_delete_stmt; + struct vy_entry deferred_delete; /** Length of the @read_views. */ int rv_count; /** @@ -200,7 +200,7 @@ struct vy_write_iterator { /** * Last statement returned to the caller, pinned in memory. */ - struct tuple *last_stmt; + struct vy_entry last; /** * Read views of the same key sorted by LSN in descending * order, starting from INT64_MAX. @@ -223,7 +223,7 @@ heap_less(heap_t *heap, struct vy_write_src *src1, struct vy_write_src *src2) struct vy_write_iterator *stream = container_of(heap, struct vy_write_iterator, src_heap); - int cmp = vy_stmt_compare(src1->tuple, src2->tuple, stream->cmp_def); + int cmp = vy_entry_compare(src1->entry, src2->entry, stream->cmp_def); if (cmp != 0) return cmp < 0; @@ -232,8 +232,8 @@ heap_less(heap_t *heap, struct vy_write_src *src1, struct vy_write_src *src2) * Virtual sources use 0 for LSN, so they are ordered * last automatically. */ - int64_t lsn1 = src1->is_end_of_key ? 0 : vy_stmt_lsn(src1->tuple); - int64_t lsn2 = src2->is_end_of_key ? 0 : vy_stmt_lsn(src2->tuple); + int64_t lsn1 = src1->is_end_of_key ? 0 : vy_stmt_lsn(src1->entry.stmt); + int64_t lsn2 = src2->is_end_of_key ? 0 : vy_stmt_lsn(src2->entry.stmt); if (lsn1 != lsn2) return lsn1 > lsn2; @@ -244,8 +244,8 @@ heap_less(heap_t *heap, struct vy_write_src *src1, struct vy_write_src *src2) * overwrote it. Discard the deferred DELETE as the overwritten * tuple will be (or has already been) purged by the REPLACE. */ - return (vy_stmt_type(src1->tuple) == IPROTO_DELETE ? 1 : 0) < - (vy_stmt_type(src2->tuple) == IPROTO_DELETE ? 1 : 0); + return (vy_stmt_type(src1->entry.stmt) == IPROTO_DELETE ? 1 : 0) < + (vy_stmt_type(src2->entry.stmt) == IPROTO_DELETE ? 1 : 0); } @@ -264,6 +264,7 @@ vy_write_iterator_new_src(struct vy_write_iterator *stream) return NULL; } heap_node_create(&res->heap_node); + res->entry = vy_entry_none(); res->is_end_of_key = false; rlist_add(&stream->src_list, &res->in_src_list); return res; @@ -298,8 +299,8 @@ vy_write_iterator_add_src(struct vy_write_iterator *stream, if (rc != 0) return rc; } - int rc = src->stream.iface->next(&src->stream, &src->tuple); - if (rc != 0 || src->tuple == NULL) + int rc = src->stream.iface->next(&src->stream, &src->entry); + if (rc != 0 || src->entry.stmt == NULL) goto stop; rc = vy_source_heap_insert(&stream->src_heap, src); @@ -364,11 +365,16 @@ vy_write_iterator_new(struct key_def *cmp_def, bool is_primary, stream->stmt_i = -1; stream->rv_count = count; stream->read_views[0].vlsn = INT64_MAX; + stream->read_views[0].entry = vy_entry_none(); count--; struct vy_read_view *rv; /* Descending order. */ - rlist_foreach_entry(rv, read_views, in_read_views) - stream->read_views[count--].vlsn = rv->vlsn; + rlist_foreach_entry(rv, read_views, in_read_views) { + struct vy_read_view_stmt *p; + p = &stream->read_views[count--]; + p->vlsn = rv->vlsn; + p->entry = vy_entry_none(); + } assert(count == 0); stream->base.iface = &vy_slice_stream_iface; @@ -378,6 +384,8 @@ vy_write_iterator_new(struct key_def *cmp_def, bool is_primary, stream->is_primary = is_primary; stream->is_last_level = is_last_level; stream->deferred_delete_handler = handler; + stream->deferred_delete = vy_entry_none(); + stream->last = vy_entry_none(); return &stream->base; } @@ -416,13 +424,13 @@ vy_write_iterator_stop(struct vy_stmt_stream *vstream) struct vy_write_src *src; rlist_foreach_entry(src, &stream->src_list, in_src_list) vy_write_iterator_remove_src(stream, src); - if (stream->last_stmt != NULL) { - vy_stmt_unref_if_possible(stream->last_stmt); - stream->last_stmt = NULL; + if (stream->last.stmt != NULL) { + vy_stmt_unref_if_possible(stream->last.stmt); + stream->last = vy_entry_none(); } - if (stream->deferred_delete_stmt != NULL) { - vy_stmt_unref_if_possible(stream->deferred_delete_stmt); - stream->deferred_delete_stmt = NULL; + if (stream->deferred_delete.stmt != NULL) { + vy_stmt_unref_if_possible(stream->deferred_delete.stmt); + stream->deferred_delete = vy_entry_none(); } struct vy_deferred_delete_handler *handler = stream->deferred_delete_handler; @@ -489,10 +497,10 @@ vy_write_iterator_merge_step(struct vy_write_iterator *stream) { struct vy_write_src *src = vy_source_heap_top(&stream->src_heap); assert(src != NULL); - int rc = src->stream.iface->next(&src->stream, &src->tuple); + int rc = src->stream.iface->next(&src->stream, &src->entry); if (rc != 0) return rc; - if (src->tuple != NULL) + if (src->entry.stmt != NULL) vy_source_heap_update(&stream->src_heap, src); else vy_write_iterator_remove_src(stream, src); @@ -519,11 +527,9 @@ vy_write_iterator_get_vlsn(struct vy_write_iterator *stream, int rv_i) } /** - * Remember the current tuple of the @src as a part of the - * current read view. - * @param History objects allocator. + * Remember a statement as a part of the current read view. * @param stream Write iterator. - * @param src Source of the wanted tuple. + * @param entry The statement. * @param current_rv_i Index of the current read view. * * @retval 0 Success. @@ -531,13 +537,13 @@ vy_write_iterator_get_vlsn(struct vy_write_iterator *stream, int rv_i) */ static inline int vy_write_iterator_push_rv(struct vy_write_iterator *stream, - struct tuple *tuple, int current_rv_i) + struct vy_entry entry, int current_rv_i) { assert(current_rv_i < stream->rv_count); struct vy_read_view_stmt *rv = &stream->read_views[current_rv_i]; - assert(rv->vlsn >= vy_stmt_lsn(tuple)); + assert(rv->vlsn >= vy_stmt_lsn(entry.stmt)); struct vy_write_history *h = - vy_write_history_new(tuple, rv->history); + vy_write_history_new(entry, rv->history); if (h == NULL) return -1; rv->history = h; @@ -556,26 +562,26 @@ vy_write_iterator_push_rv(struct vy_write_iterator *stream, * @retval not NULL Next statement of the current key. * @retval NULL End of the key (not the end of the sources). */ -static inline struct tuple * +static inline struct vy_entry vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream) { struct vy_read_view_stmt *rv; if (stream->rv_used_count == 0) - return NULL; + return vy_entry_none(); /* Find a next non-empty history element. */ do { assert(stream->stmt_i + 1 < stream->rv_count); stream->stmt_i++; rv = &stream->read_views[stream->stmt_i]; assert(rv->history == NULL); - } while (rv->tuple == NULL); + } while (rv->entry.stmt == NULL); assert(stream->rv_used_count > 0); stream->rv_used_count--; - if (stream->last_stmt != NULL) - vy_stmt_unref_if_possible(stream->last_stmt); - stream->last_stmt = rv->tuple; - rv->tuple = NULL; - return stream->last_stmt; + if (stream->last.stmt != NULL) + vy_stmt_unref_if_possible(stream->last.stmt); + stream->last = rv->entry; + rv->entry = vy_entry_none(); + return stream->last; } /** @@ -583,15 +589,16 @@ vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream) * deletion from secondary indexes was deferred. * * @param stream Write iterator. - * @param stmt Current statement. + * @param entry Current statement. * * @retval 0 Success. * @retval -1 Error. */ static int vy_write_iterator_deferred_delete(struct vy_write_iterator *stream, - struct tuple *stmt) + struct vy_entry entry) { + struct tuple *stmt = entry.stmt; /* * UPSERTs cannot change secondary index parts neither * can they produce deferred DELETEs, so we skip them. @@ -604,15 +611,15 @@ vy_write_iterator_deferred_delete(struct vy_write_iterator *stream, * Invoke the callback to generate a deferred DELETE * in case the current tuple was overwritten. */ - if (stream->deferred_delete_stmt != NULL) { + if (stream->deferred_delete.stmt != NULL) { struct vy_deferred_delete_handler *handler = stream->deferred_delete_handler; if (handler != NULL && vy_stmt_type(stmt) != IPROTO_DELETE && handler->iface->process(handler, stmt, - stream->deferred_delete_stmt) != 0) + stream->deferred_delete.stmt) != 0) return -1; - vy_stmt_unref_if_possible(stream->deferred_delete_stmt); - stream->deferred_delete_stmt = NULL; + vy_stmt_unref_if_possible(stream->deferred_delete.stmt); + stream->deferred_delete = vy_entry_none(); } /* * Remember the current statement if it is marked with @@ -624,7 +631,7 @@ vy_write_iterator_deferred_delete(struct vy_write_iterator *stream, assert(vy_stmt_type(stmt) == IPROTO_DELETE || vy_stmt_type(stmt) == IPROTO_REPLACE); vy_stmt_ref_if_possible(stmt); - stream->deferred_delete_stmt = stmt; + stream->deferred_delete = entry; } return 0; } @@ -654,12 +661,12 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, *count = 0; *is_first_insert = false; assert(stream->stmt_i == -1); - assert(stream->deferred_delete_stmt == NULL); + assert(stream->deferred_delete.stmt == NULL); struct vy_write_src *src = vy_source_heap_top(&stream->src_heap); if (src == NULL) return 0; /* no more data */ /* Search must have been started already. */ - assert(src->tuple != NULL); + assert(src->entry.stmt != NULL); /* * A virtual source instance which represents the end on * the current key in the source heap. It is greater @@ -671,14 +678,14 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, */ struct vy_write_src end_of_key_src; end_of_key_src.is_end_of_key = true; - end_of_key_src.tuple = src->tuple; + end_of_key_src.entry = src->entry; int rc = vy_source_heap_insert(&stream->src_heap, &end_of_key_src); if (rc) { diag_set(OutOfMemory, sizeof(void *), "malloc", "vinyl write stream heap"); return rc; } - vy_stmt_ref_if_possible(src->tuple); + vy_stmt_ref_if_possible(src->entry.stmt); /* * For each pair (merge_until_lsn, current_rv_lsn] build * a history in the corresponding read view. @@ -689,10 +696,10 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, int64_t merge_until_lsn = vy_write_iterator_get_vlsn(stream, 1); while (true) { - *is_first_insert = vy_stmt_type(src->tuple) == IPROTO_INSERT; + *is_first_insert = vy_stmt_type(src->entry.stmt) == IPROTO_INSERT; if (!stream->is_primary && - (vy_stmt_flags(src->tuple) & VY_STMT_UPDATE) != 0) { + (vy_stmt_flags(src->entry.stmt) & VY_STMT_UPDATE) != 0) { /* * If a REPLACE stored in a secondary index was * generated by an update operation, it can be @@ -713,12 +720,12 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, */ if (stream->is_primary) { rc = vy_write_iterator_deferred_delete(stream, - src->tuple); + src->entry); if (rc != 0) break; } - if (vy_stmt_lsn(src->tuple) > current_rv_lsn) { + if (vy_stmt_lsn(src->entry.stmt) > current_rv_lsn) { /* * Skip statements invisible to the current read * view but older than the previous read view, @@ -726,10 +733,10 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, */ goto next_lsn; } - while (vy_stmt_lsn(src->tuple) <= merge_until_lsn) { + while (vy_stmt_lsn(src->entry.stmt) <= merge_until_lsn) { /* * Skip read views which see the same - * version of the key, until src->tuple is + * version of the key, until src->entry is * between merge_until_lsn and * current_rv_lsn. */ @@ -745,13 +752,13 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, * @sa vy_write_iterator for details about this * and other optimizations. */ - if (vy_stmt_type(src->tuple) == IPROTO_DELETE && + if (vy_stmt_type(src->entry.stmt) == IPROTO_DELETE && stream->is_last_level && merge_until_lsn == 0) { current_rv_lsn = 0; /* Force skip */ goto next_lsn; } - rc = vy_write_iterator_push_rv(stream, src->tuple, + rc = vy_write_iterator_push_rv(stream, src->entry, current_rv_i); if (rc != 0) break; @@ -761,9 +768,9 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream, * Optimization 2: skip statements overwritten * by a REPLACE or DELETE. */ - if (vy_stmt_type(src->tuple) == IPROTO_REPLACE || - vy_stmt_type(src->tuple) == IPROTO_INSERT || - vy_stmt_type(src->tuple) == IPROTO_DELETE) { + if (vy_stmt_type(src->entry.stmt) == IPROTO_REPLACE || + vy_stmt_type(src->entry.stmt) == IPROTO_INSERT || + vy_stmt_type(src->entry.stmt) == IPROTO_DELETE) { current_rv_i++; current_rv_lsn = merge_until_lsn; merge_until_lsn = @@ -776,7 +783,7 @@ next_lsn: break; src = vy_source_heap_top(&stream->src_heap); assert(src != NULL); - assert(src->tuple != NULL); + assert(src->entry.stmt != NULL); if (src->is_end_of_key) break; } @@ -787,13 +794,13 @@ next_lsn: * there's no tuple it could overwrite. */ if (rc == 0 && stream->is_last_level && - stream->deferred_delete_stmt != NULL) { - vy_stmt_unref_if_possible(stream->deferred_delete_stmt); - stream->deferred_delete_stmt = NULL; + stream->deferred_delete.stmt != NULL) { + vy_stmt_unref_if_possible(stream->deferred_delete.stmt); + stream->deferred_delete = vy_entry_none(); } vy_source_heap_delete(&stream->src_heap, &end_of_key_src); - vy_stmt_unref_if_possible(end_of_key_src.tuple); + vy_stmt_unref_if_possible(end_of_key_src.entry.stmt); return rc; } @@ -803,7 +810,7 @@ next_lsn: * one statement. * * @param stream Write iterator. - * @param prev_tuple Tuple from the previous read view (can be NULL). + * @param prev Statement from the previous read view (can be NULL). * @param rv Read view to merge. * @param is_first_insert Set if the oldest statement for the * current key among all sources is an INSERT. @@ -812,11 +819,11 @@ next_lsn: * @retval -1 Memory error. */ static NODISCARD int -vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, +vy_read_view_merge(struct vy_write_iterator *stream, struct vy_entry prev, struct vy_read_view_stmt *rv, bool is_first_insert) { assert(rv != NULL); - assert(rv->tuple == NULL); + assert(rv->entry.stmt == NULL); assert(rv->history != NULL); struct vy_write_history *h = rv->history; /* @@ -824,9 +831,9 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, * by a read view if it is preceded by another DELETE for * the same key. */ - if (prev_tuple != NULL && - vy_stmt_type(prev_tuple) == IPROTO_DELETE && - vy_stmt_type(h->tuple) == IPROTO_DELETE) { + if (prev.stmt != NULL && + vy_stmt_type(prev.stmt) == IPROTO_DELETE && + vy_stmt_type(h->entry.stmt) == IPROTO_DELETE) { vy_write_history_destroy(h); rv->history = NULL; return 0; @@ -840,32 +847,34 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, * REPLACE, then the current UPSERT can be applied to * it, whether is_last_level is true or not. */ - if (vy_stmt_type(h->tuple) == IPROTO_UPSERT && - (stream->is_last_level || (prev_tuple != NULL && - vy_stmt_type(prev_tuple) != IPROTO_UPSERT))) { - assert(!stream->is_last_level || prev_tuple == NULL || - vy_stmt_type(prev_tuple) != IPROTO_UPSERT); - struct tuple *applied = vy_apply_upsert(h->tuple, prev_tuple, - stream->cmp_def, false); - if (applied == NULL) + if (vy_stmt_type(h->entry.stmt) == IPROTO_UPSERT && + (stream->is_last_level || (prev.stmt != NULL && + vy_stmt_type(prev.stmt) != IPROTO_UPSERT))) { + assert(!stream->is_last_level || prev.stmt == NULL || + vy_stmt_type(prev.stmt) != IPROTO_UPSERT); + struct vy_entry applied; + applied = vy_entry_apply_upsert(h->entry, prev, + stream->cmp_def, false); + if (applied.stmt == NULL) return -1; - vy_stmt_unref_if_possible(h->tuple); - h->tuple = applied; + vy_stmt_unref_if_possible(h->entry.stmt); + h->entry = applied; } /* Squash the rest of UPSERTs. */ struct vy_write_history *result = h; h = h->next; while (h != NULL) { - assert(h->tuple != NULL && - vy_stmt_type(h->tuple) == IPROTO_UPSERT); - assert(result->tuple != NULL); - struct tuple *applied = vy_apply_upsert(h->tuple, result->tuple, - stream->cmp_def, false); - if (applied == NULL) + assert(h->entry.stmt != NULL && + vy_stmt_type(h->entry.stmt) == IPROTO_UPSERT); + assert(result->entry.stmt != NULL); + struct vy_entry applied; + applied = vy_entry_apply_upsert(h->entry, result->entry, + stream->cmp_def, false); + if (applied.stmt == NULL) return -1; - vy_stmt_unref_if_possible(result->tuple); - result->tuple = applied; - vy_stmt_unref_if_possible(h->tuple); + vy_stmt_unref_if_possible(result->entry.stmt); + result->entry = applied; + vy_stmt_unref_if_possible(h->entry.stmt); /* * Don't bother freeing 'h' since it's * allocated on a region. @@ -873,9 +882,9 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, h = h->next; result->next = h; } - rv->tuple = result->tuple; + rv->entry = result->entry; rv->history = NULL; - result->tuple = NULL; + result->entry = vy_entry_none(); assert(result->next == NULL); /* * The write iterator generates deferred DELETEs for all @@ -884,21 +893,22 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, * statements so as not to generate the same DELETEs on * the next compaction. */ - uint8_t flags = vy_stmt_flags(rv->tuple); + uint8_t flags = vy_stmt_flags(rv->entry.stmt); if ((flags & VY_STMT_DEFERRED_DELETE) != 0 && - rv->tuple != stream->deferred_delete_stmt) { - if (!vy_stmt_is_refable(rv->tuple)) { - rv->tuple = vy_stmt_dup(rv->tuple); - if (rv->tuple == NULL) + !vy_entry_is_equal(rv->entry, stream->deferred_delete)) { + if (!vy_stmt_is_refable(rv->entry.stmt)) { + rv->entry.stmt = vy_stmt_dup(rv->entry.stmt); + if (rv->entry.stmt == NULL) return -1; } - vy_stmt_set_flags(rv->tuple, flags & ~VY_STMT_DEFERRED_DELETE); + vy_stmt_set_flags(rv->entry.stmt, + flags & ~VY_STMT_DEFERRED_DELETE); } - if (prev_tuple != NULL) { + if (prev.stmt != NULL) { /* Not the first statement. */ return 0; } - if (is_first_insert && vy_stmt_type(rv->tuple) == IPROTO_DELETE) { + if (is_first_insert && vy_stmt_type(rv->entry.stmt) == IPROTO_DELETE) { /* * Optimization 5: discard the first DELETE if * the oldest statement for the current key among @@ -906,12 +916,12 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, * statements for this key in older runs or the * last statement is a DELETE. */ - vy_stmt_unref_if_possible(rv->tuple); - rv->tuple = NULL; + vy_stmt_unref_if_possible(rv->entry.stmt); + rv->entry = vy_entry_none(); } else if ((is_first_insert && - vy_stmt_type(rv->tuple) == IPROTO_REPLACE) || + vy_stmt_type(rv->entry.stmt) == IPROTO_REPLACE) || (!is_first_insert && - vy_stmt_type(rv->tuple) == IPROTO_INSERT)) { + vy_stmt_type(rv->entry.stmt) == IPROTO_INSERT)) { /* * If the oldest statement among all sources is an * INSERT, convert the first REPLACE to an INSERT @@ -923,16 +933,16 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple, * so as not to trigger optimization #5 on the next * compaction. */ - struct tuple *copy = vy_stmt_dup(rv->tuple); + struct tuple *copy = vy_stmt_dup(rv->entry.stmt); if (is_first_insert) vy_stmt_set_type(copy, IPROTO_INSERT); else vy_stmt_set_type(copy, IPROTO_REPLACE); if (copy == NULL) return -1; - vy_stmt_set_lsn(copy, vy_stmt_lsn(rv->tuple)); - vy_stmt_unref_if_possible(rv->tuple); - rv->tuple = copy; + vy_stmt_set_lsn(copy, vy_stmt_lsn(rv->entry.stmt)); + vy_stmt_unref_if_possible(rv->entry.stmt); + rv->entry.stmt = copy; } return 0; } @@ -975,19 +985,18 @@ vy_write_iterator_build_read_views(struct vy_write_iterator *stream, int *count) * here > 0. */ assert(rv >= &stream->read_views[0] && rv->history != NULL); - struct tuple *prev_tuple = NULL; + struct vy_entry prev = vy_entry_none(); for (; rv >= &stream->read_views[0]; --rv) { if (rv->history == NULL) continue; - if (vy_read_view_merge(stream, prev_tuple, rv, - is_first_insert) != 0) + if (vy_read_view_merge(stream, prev, rv, is_first_insert) != 0) goto error; assert(rv->history == NULL); - if (rv->tuple == NULL) + if (rv->entry.stmt == NULL) continue; stream->rv_used_count++; ++*count; - prev_tuple = rv->tuple; + prev = rv->entry; } region_truncate(region, used); return 0; @@ -1006,8 +1015,7 @@ error: * @return 0 on success or not 0 on error (diag is set). */ static NODISCARD int -vy_write_iterator_next(struct vy_stmt_stream *vstream, - struct tuple **ret) +vy_write_iterator_next(struct vy_stmt_stream *vstream, struct vy_entry *ret) { assert(vstream->iface->next == vy_write_iterator_next); struct vy_write_iterator *stream = (struct vy_write_iterator *)vstream; @@ -1016,7 +1024,7 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream, * read view statements sequence. */ *ret = vy_write_iterator_pop_read_view_stmt(stream); - if (*ret != NULL) + if (ret->stmt != NULL) return 0; /* * If we didn't generate a deferred DELETE corresponding to @@ -1024,19 +1032,19 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream, * include it into the output, because there still might be * an overwritten tuple in an older source. */ - if (stream->deferred_delete_stmt != NULL) { - if (stream->deferred_delete_stmt == stream->last_stmt) { + if (stream->deferred_delete.stmt != NULL) { + if (vy_entry_is_equal(stream->deferred_delete, stream->last)) { /* * The statement was returned via a read view. * Nothing to do. */ - vy_stmt_unref_if_possible(stream->deferred_delete_stmt); - stream->deferred_delete_stmt = NULL; + vy_stmt_unref_if_possible(stream->deferred_delete.stmt); + stream->deferred_delete = vy_entry_none(); } else { - if (stream->last_stmt != NULL) - vy_stmt_unref_if_possible(stream->last_stmt); - *ret = stream->last_stmt = stream->deferred_delete_stmt; - stream->deferred_delete_stmt = NULL; + if (stream->last.stmt != NULL) + vy_stmt_unref_if_possible(stream->last.stmt); + *ret = stream->last = stream->deferred_delete; + stream->deferred_delete = vy_entry_none(); return 0; } } diff --git a/test/unit/vy_cache.c b/test/unit/vy_cache.c index d46d6c3f..7c4292cb 100644 --- a/test/unit/vy_cache.c +++ b/test/unit/vy_cache.c @@ -18,7 +18,8 @@ test_basic() struct tuple_format *format; create_test_cache(fields, types, lengthof(fields), &cache, &key_def, &format); - struct tuple *select_all = vy_new_simple_stmt(format, &key_template); + struct vy_entry select_all = vy_new_simple_stmt(format, key_def, + &key_template); struct mempool history_node_pool; mempool_create(&history_node_pool, cord_slab_cache(), @@ -88,14 +89,14 @@ test_basic() vy_cache_iterator_open(&itr, &cache, ITER_GE, select_all, &rv_p); /* Start iterator and make several steps. */ - struct tuple *ret; + struct vy_entry ret; bool unused; struct vy_history history; vy_history_create(&history, &history_node_pool); for (int i = 0; i < 4; ++i) vy_cache_iterator_next(&itr, &history, &unused); ret = vy_history_last_stmt(&history); - ok(vy_stmt_are_same(ret, &chain1[3], format), + ok(vy_stmt_are_same(ret, &chain1[3], format, key_def), "next_key * 4"); /* @@ -111,22 +112,22 @@ test_basic() /* * Restore after the cache had changed. Restoration * makes position of the iterator be one statement after - * the last_stmt. So restore on chain1[0], but the result + * the last. So restore on chain1[0], but the result * must be chain1[1]. */ - struct tuple *last_stmt = vy_new_simple_stmt(format, &chain1[0]); - ok(vy_cache_iterator_restore(&itr, last_stmt, &history, &unused) >= 0, + struct vy_entry last = vy_new_simple_stmt(format, key_def, &chain1[0]); + ok(vy_cache_iterator_restore(&itr, last, &history, &unused) >= 0, "restore"); ret = vy_history_last_stmt(&history); - ok(vy_stmt_are_same(ret, &chain1[1], format), + ok(vy_stmt_are_same(ret, &chain1[1], format, key_def), "restore on position after last"); - tuple_unref(last_stmt); + tuple_unref(last.stmt); vy_history_cleanup(&history); vy_cache_iterator_close(&itr); mempool_destroy(&history_node_pool); - tuple_unref(select_all); + tuple_unref(select_all.stmt); destroy_test_cache(&cache, key_def, format); check_plan(); footer(); diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c index 23702bcb..0d20f19e 100644 --- a/test/unit/vy_iterators_helper.c +++ b/test/unit/vy_iterators_helper.c @@ -37,12 +37,12 @@ vy_iterator_C_test_finish() memory_free(); } -struct tuple * -vy_new_simple_stmt(struct tuple_format *format, +struct vy_entry +vy_new_simple_stmt(struct tuple_format *format, struct key_def *key_def, const struct vy_stmt_template *templ) { if (templ == NULL) - return NULL; + return vy_entry_none(); /* Calculate binary size. */ int i = 0; size_t size = 0; @@ -127,22 +127,27 @@ vy_new_simple_stmt(struct tuple_format *format, free(buf); vy_stmt_set_lsn(ret, templ->lsn); vy_stmt_set_flags(ret, templ->flags); - return ret; + struct vy_entry entry; + entry.stmt = ret; + entry.hint = vy_stmt_hint(ret, key_def); + return entry; } -struct tuple * +struct vy_entry vy_mem_insert_template(struct vy_mem *mem, const struct vy_stmt_template *templ) { - struct tuple *stmt = vy_new_simple_stmt(mem->format, templ); - struct tuple *region_stmt = vy_stmt_dup_lsregion(stmt, + struct vy_entry entry = vy_new_simple_stmt(mem->format, + mem->cmp_def, templ); + struct tuple *region_stmt = vy_stmt_dup_lsregion(entry.stmt, &mem->env->allocator, mem->generation); assert(region_stmt != NULL); - tuple_unref(stmt); + tuple_unref(entry.stmt); + entry.stmt = region_stmt; if (templ->type == IPROTO_UPSERT) - vy_mem_insert_upsert(mem, region_stmt); + vy_mem_insert_upsert(mem, entry); else - vy_mem_insert(mem, region_stmt); - return region_stmt; + vy_mem_insert(mem, entry); + return entry; } void @@ -153,30 +158,32 @@ vy_cache_insert_templates_chain(struct vy_cache *cache, const struct vy_stmt_template *key_templ, enum iterator_type order) { - struct tuple *key = vy_new_simple_stmt(format, key_templ); - struct tuple *prev_stmt = NULL; - struct tuple *stmt = NULL; + struct vy_entry key = vy_new_simple_stmt(format, cache->cmp_def, + key_templ); + struct vy_entry prev_entry = vy_entry_none(); + struct vy_entry entry = vy_entry_none(); for (uint i = 0; i < length; ++i) { - stmt = vy_new_simple_stmt(format, &chain[i]); - vy_cache_add(cache, stmt, prev_stmt, key, order); + entry = vy_new_simple_stmt(format, cache->cmp_def, &chain[i]); + vy_cache_add(cache, entry, prev_entry, key, order); if (i != 0) - tuple_unref(prev_stmt); - prev_stmt = stmt; - stmt = NULL; + tuple_unref(prev_entry.stmt); + prev_entry = entry; + entry = vy_entry_none(); } - tuple_unref(key); - if (prev_stmt != NULL) - tuple_unref(prev_stmt); + tuple_unref(key.stmt); + if (prev_entry.stmt != NULL) + tuple_unref(prev_entry.stmt); } void vy_cache_on_write_template(struct vy_cache *cache, struct tuple_format *format, const struct vy_stmt_template *templ) { - struct tuple *written = vy_new_simple_stmt(format, templ); + struct vy_entry written = vy_new_simple_stmt(format, cache->cmp_def, + templ); vy_cache_on_write(cache, written, NULL); - tuple_unref(written); + tuple_unref(written.stmt); } void @@ -226,39 +233,43 @@ destroy_test_cache(struct vy_cache *cache, struct key_def *def, } bool -vy_stmt_are_same(struct tuple *actual, +vy_stmt_are_same(struct vy_entry actual, const struct vy_stmt_template *expected, - struct tuple_format *format) + struct tuple_format *format, struct key_def *key_def) { - if (vy_stmt_type(actual) != expected->type) + if (vy_stmt_type(actual.stmt) != expected->type) return false; - struct tuple *tmp = vy_new_simple_stmt(format, expected); - fail_if(tmp == NULL); + struct vy_entry tmp = vy_new_simple_stmt(format, key_def, expected); + fail_if(tmp.stmt == NULL); + if (actual.hint != tmp.hint) { + tuple_unref(tmp.stmt); + return false; + } uint32_t a_len, b_len; const char *a, *b; - if (vy_stmt_type(actual) == IPROTO_UPSERT) { - a = vy_upsert_data_range(actual, &a_len); + if (vy_stmt_type(actual.stmt) == IPROTO_UPSERT) { + a = vy_upsert_data_range(actual.stmt, &a_len); } else { - a = tuple_data_range(actual, &a_len); + a = tuple_data_range(actual.stmt, &a_len); } - if (vy_stmt_type(tmp) == IPROTO_UPSERT) { - b = vy_upsert_data_range(tmp, &b_len); + if (vy_stmt_type(tmp.stmt) == IPROTO_UPSERT) { + b = vy_upsert_data_range(tmp.stmt, &b_len); } else { - b = tuple_data_range(tmp, &b_len); + b = tuple_data_range(tmp.stmt, &b_len); } if (a_len != b_len) { - tuple_unref(tmp); + tuple_unref(tmp.stmt); return false; } - if (vy_stmt_lsn(actual) != expected->lsn) { - tuple_unref(tmp); + if (vy_stmt_lsn(actual.stmt) != expected->lsn) { + tuple_unref(tmp.stmt); return false; } - if (vy_stmt_flags(actual) != expected->flags) { - tuple_unref(tmp); + if (vy_stmt_flags(actual.stmt) != expected->flags) { + tuple_unref(tmp.stmt); return false; } bool rc = memcmp(a, b, a_len) == 0; - tuple_unref(tmp); + tuple_unref(tmp.stmt); return rc; } diff --git a/test/unit/vy_iterators_helper.h b/test/unit/vy_iterators_helper.h index 3ee6cee6..1e69f7fa 100644 --- a/test/unit/vy_iterators_helper.h +++ b/test/unit/vy_iterators_helper.h @@ -94,12 +94,13 @@ struct vy_stmt_template { * Create a new vinyl statement using the specified template. * * @param format + * @param key_def Key definition (for computing hint). * @param templ Statement template. * * @return Created statement. */ -struct tuple * -vy_new_simple_stmt(struct tuple_format *format, +struct vy_entry +vy_new_simple_stmt(struct tuple_format *format, struct key_def *key_def, const struct vy_stmt_template *templ); /** @@ -111,7 +112,7 @@ vy_new_simple_stmt(struct tuple_format *format, * * @retval Lsregion allocated statement. */ -struct tuple * +struct vy_entry vy_mem_insert_template(struct vy_mem *mem, const struct vy_stmt_template *templ); @@ -195,16 +196,17 @@ destroy_test_cache(struct vy_cache *cache, struct key_def *def, * Check that the template specifies completely the same statement * as @stmt. * - * @param stmt Actual value. + * @param actual Actual value. * @param templ Expected value. * @param format Template statement format. + * @param key_def Key definition (for computing hint). * * @retval stmt === template. */ bool -vy_stmt_are_same(struct tuple *actual, +vy_stmt_are_same(struct vy_entry actual, const struct vy_stmt_template *expected, - struct tuple_format *format); + struct tuple_format *format, struct key_def *key_def); #if defined(__cplusplus) } diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c index acd024dc..2e461f31 100644 --- a/test/unit/vy_mem.c +++ b/test/unit/vy_mem.c @@ -26,30 +26,34 @@ test_basic(void) }; /* Check dump lsn */ - struct tuple *stmt = vy_mem_insert_template(mem, &stmts[0]); + struct vy_entry entry = vy_mem_insert_template(mem, &stmts[0]); is(mem->dump_lsn, -1, "mem->dump_lsn after prepare"); - vy_mem_commit_stmt(mem, stmt); + vy_mem_commit_stmt(mem, entry); is(mem->dump_lsn, 100, "mem->dump_lsn after commit"); /* Check vy_mem_older_lsn */ - struct tuple *older = stmt; - stmt = vy_mem_insert_template(mem, &stmts[1]); - is(vy_mem_older_lsn(mem, stmt), older, "vy_mem_older_lsn 1"); - is(vy_mem_older_lsn(mem, older), NULL, "vy_mem_older_lsn 2"); - vy_mem_commit_stmt(mem, stmt); + struct vy_entry older = entry; + entry = vy_mem_insert_template(mem, &stmts[1]); + ok(vy_entry_is_equal(vy_mem_older_lsn(mem, entry), older), + "vy_mem_older_lsn 1"); + ok(vy_entry_is_equal(vy_mem_older_lsn(mem, older), vy_entry_none()), + "vy_mem_older_lsn 2"); + vy_mem_commit_stmt(mem, entry); /* Check rollback */ - struct tuple *olderolder = stmt; + struct vy_entry olderolder = entry; older = vy_mem_insert_template(mem, &stmts[2]); - stmt = vy_mem_insert_template(mem, &stmts[3]); - is(vy_mem_older_lsn(mem, stmt), older, "vy_mem_rollback 1"); + entry = vy_mem_insert_template(mem, &stmts[3]); + ok(vy_entry_is_equal(vy_mem_older_lsn(mem, entry), older), + "vy_mem_rollback 1"); vy_mem_rollback_stmt(mem, older); - is(vy_mem_older_lsn(mem, stmt), olderolder, "vy_mem_rollback 2"); + ok(vy_entry_is_equal(vy_mem_older_lsn(mem, entry), olderolder), + "vy_mem_rollback 2"); /* Check version */ - stmt = vy_mem_insert_template(mem, &stmts[4]); + entry = vy_mem_insert_template(mem, &stmts[4]); is(mem->version, 8, "vy_mem->version") - vy_mem_commit_stmt(mem, stmt); + vy_mem_commit_stmt(mem, entry); is(mem->version, 9, "vy_mem->version") /* Clean up */ @@ -86,7 +90,8 @@ test_iterator_restore_after_insertion() struct slab_cache *slab_cache = cord_slab_cache(); lsregion_create(&lsregion, slab_cache->arena); - struct tuple *select_key = vy_key_new(stmt_env.key_format, NULL, 0); + struct vy_entry select_key = vy_entry_key_new(stmt_env.key_format, + key_def, NULL, 0); struct mempool history_node_pool; mempool_create(&history_node_pool, cord_slab_cache(), @@ -98,13 +103,18 @@ test_iterator_restore_after_insertion() char *end = data; end = mp_encode_array(end, 1); end = mp_encode_uint(end, restore_on_value); - struct tuple *restore_on_key = vy_stmt_new_replace(format, data, end); - vy_stmt_set_lsn(restore_on_key, 100); + struct vy_entry restore_on_key; + restore_on_key.stmt = vy_stmt_new_replace(format, data, end); + restore_on_key.hint = vy_stmt_hint(restore_on_key.stmt, key_def); + vy_stmt_set_lsn(restore_on_key.stmt, 100); end = data; end = mp_encode_array(end, 1); end = mp_encode_uint(end, restore_on_value_reverse); - struct tuple *restore_on_key_reverse = vy_stmt_new_replace(format, data, end); - vy_stmt_set_lsn(restore_on_key_reverse, 100); + struct vy_entry restore_on_key_reverse; + restore_on_key_reverse.stmt = vy_stmt_new_replace(format, data, end); + restore_on_key_reverse.hint = vy_stmt_hint(restore_on_key_reverse.stmt, + key_def); + vy_stmt_set_lsn(restore_on_key_reverse.stmt, 100); bool wrong_output = false; int i_fail = 0; @@ -189,22 +199,22 @@ test_iterator_restore_after_insertion() vy_mem_iterator_open(&itr, &stats, mem, direct ? ITER_GE : ITER_LE, select_key, &prv); - struct tuple *t; + struct vy_entry e; struct vy_history history; vy_history_create(&history, &history_node_pool); int rc = vy_mem_iterator_next(&itr, &history); - t = vy_history_last_stmt(&history); + e = vy_history_last_stmt(&history); assert(rc == 0); size_t j = 0; - while (t != NULL) { + while (e.stmt != NULL) { if (j >= expected_count) { wrong_output = true; break; } uint32_t val = 42; - tuple_field_u32(t, 0, &val); + tuple_field_u32(e.stmt, 0, &val); if (val != expected_values[j] || - vy_stmt_lsn(t) != expected_lsns[j]) { + vy_stmt_lsn(e.stmt) != expected_lsns[j]) { wrong_output = true; break; } @@ -214,10 +224,10 @@ test_iterator_restore_after_insertion() else if(!direct && val <= middle_value) break; int rc = vy_mem_iterator_next(&itr, &history); - t = vy_history_last_stmt(&history); + e = vy_history_last_stmt(&history); assert(rc == 0); } - if (t == NULL && j != expected_count) + if (e.stmt == NULL && j != expected_count) wrong_output = true; if (wrong_output) { i_fail = i; @@ -269,24 +279,24 @@ test_iterator_restore_after_insertion() rc = vy_mem_iterator_restore(&itr, restore_on_key, &history); else rc = vy_mem_iterator_restore(&itr, restore_on_key_reverse, &history); - t = vy_history_last_stmt(&history); + e = vy_history_last_stmt(&history); j = 0; - while (t != NULL) { + while (e.stmt != NULL) { if (j >= expected_count) { wrong_output = true; break; } uint32_t val = 42; - tuple_field_u32(t, 0, &val); + tuple_field_u32(e.stmt, 0, &val); if (val != expected_values[j] || - vy_stmt_lsn(t) != expected_lsns[j]) { + vy_stmt_lsn(e.stmt) != expected_lsns[j]) { wrong_output = true; break; } j++; int rc = vy_mem_iterator_next(&itr, &history); - t = vy_history_last_stmt(&history); + e = vy_history_last_stmt(&history); assert(rc == 0); } if (j != expected_count) @@ -306,9 +316,9 @@ test_iterator_restore_after_insertion() /* Clean up */ mempool_destroy(&history_node_pool); - tuple_unref(select_key); - tuple_unref(restore_on_key); - tuple_unref(restore_on_key_reverse); + tuple_unref(select_key.stmt); + tuple_unref(restore_on_key.stmt); + tuple_unref(restore_on_key_reverse.stmt); tuple_format_unref(format); lsregion_destroy(&lsregion); diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index f3cd84d4..7471693c 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -30,9 +30,9 @@ write_run(struct vy_run *run, const char *dir_name, if (wi->iface->start(wi) != 0) goto fail_abort_writer; int rc; - struct tuple *stmt = NULL; - while ((rc = wi->iface->next(wi, &stmt)) == 0 && stmt != NULL) { - rc = vy_run_writer_append_stmt(&writer, stmt); + struct vy_entry entry = vy_entry_none(); + while ((rc = wi->iface->next(wi, &entry)) == 0 && entry.stmt != NULL) { + rc = vy_run_writer_append_stmt(&writer, entry); if (rc != 0) break; } @@ -98,7 +98,8 @@ test_basic() index_def, format, NULL, 0); isnt(pk, NULL, "lsm is not NULL") - struct vy_range *range = vy_range_new(1, NULL, NULL, pk->cmp_def); + struct vy_range *range = vy_range_new(1, vy_entry_none(), + vy_entry_none(), pk->cmp_def); isnt(pk, NULL, "range is not NULL") vy_lsm_add_range(pk, range); @@ -205,7 +206,8 @@ test_basic() vy_mem_delete(run_mem); vy_lsm_add_run(pk, run); - struct vy_slice *slice = vy_slice_new(1, run, NULL, NULL, pk->cmp_def); + struct vy_slice *slice = vy_slice_new(1, run, vy_entry_none(), + vy_entry_none(), pk->cmp_def); vy_range_add_slice(range, slice); vy_run_unref(run); @@ -235,7 +237,8 @@ test_basic() vy_mem_delete(run_mem); vy_lsm_add_run(pk, run); - slice = vy_slice_new(1, run, NULL, NULL, pk->cmp_def); + slice = vy_slice_new(1, run, vy_entry_none(), vy_entry_none(), + pk->cmp_def); vy_range_add_slice(range, slice); vy_run_unref(run); @@ -269,31 +272,31 @@ test_basic() struct vy_stmt_template tmpl_key = STMT_TEMPLATE(0, SELECT, i); - struct tuple *key = vy_new_simple_stmt(format, - &tmpl_key); - struct tuple *res; + struct vy_entry key = vy_new_simple_stmt(format, key_def, + &tmpl_key); + struct vy_entry res; rc = vy_point_lookup(pk, NULL, &prv, key, &res); - tuple_unref(key); + tuple_unref(key.stmt); if (rc != 0) { has_errors = true; continue; } if (expect == 0) { /* No value expected. */ - if (res != NULL) + if (res.stmt != NULL) results_ok = false; continue; } else { - if (res == NULL) { + if (res.stmt == NULL) { results_ok = false; continue; } } uint32_t got = 0; - tuple_field_u32(res, 1, &got); - if (got != expect && expect_lsn != vy_stmt_lsn(res)) + tuple_field_u32(res.stmt, 1, &got); + if (got != expect && expect_lsn != vy_stmt_lsn(res.stmt)) results_ok = false; - tuple_unref(res); + tuple_unref(res.stmt); } } diff --git a/test/unit/vy_write_iterator.c b/test/unit/vy_write_iterator.c index ecbc6281..97fb2df3 100644 --- a/test/unit/vy_write_iterator.c +++ b/test/unit/vy_write_iterator.c @@ -110,24 +110,27 @@ compare_write_iterator_results(const struct vy_stmt_template *content, fail_if(wi == NULL); fail_if(vy_write_iterator_new_mem(wi, mem) != 0); - struct tuple *ret; + struct vy_entry ret; fail_if(wi->iface->start(wi) != 0); int i = 0; do { fail_if(wi->iface->next(wi, &ret) != 0); - if (ret == NULL) + if (ret.stmt == NULL) break; fail_if(i >= expected_count); - ok(vy_stmt_are_same(ret, &expected[i], mem->format), + ok(vy_stmt_are_same(ret, &expected[i], mem->format, key_def), "stmt %d is correct", i); ++i; - } while (ret != NULL); + } while (ret.stmt != NULL); ok(i == expected_count, "correct results count"); for (i = 0; i < handler.count; i++) { fail_if(i >= deferred_count); - ok(vy_stmt_are_same(handler.stmt[i], &deferred[i], - handler.format), + struct vy_entry entry; + entry.stmt = handler.stmt[i]; + entry.hint = vy_stmt_hint(entry.stmt, key_def); + ok(vy_stmt_are_same(entry, &deferred[i], + handler.format, key_def), "deferred stmt %d is correct", i); } if (deferred != NULL) { diff --git a/test/vinyl/cache.result b/test/vinyl/cache.result index 85741604..49d2bcc7 100644 --- a/test/vinyl/cache.result +++ b/test/vinyl/cache.result @@ -1033,14 +1033,14 @@ for i = 1, 100 do s:get{i} end ... box.stat.vinyl().memory.tuple_cache --- -- 107700 +- 108500 ... box.cfg{vinyl_cache = 50 * 1000} --- ... box.stat.vinyl().memory.tuple_cache --- -- 49542 +- 49910 ... box.cfg{vinyl_cache = 0} --- @@ -1116,7 +1116,7 @@ s.index.i2:count() ... box.stat.vinyl().memory.tuple_cache -- should be about 200 KB --- -- 216800 +- 219200 ... s:drop() --- diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result index ff73d42a..f49394f7 100644 --- a/test/vinyl/stat.result +++ b/test/vinyl/stat.result @@ -323,7 +323,7 @@ stat_diff(istat(), st) bytes_compressed: rows: 25 bytes: 26049 - index_size: 294 + index_size: 350 pages: 7 bytes_compressed: bloom_size: 70 @@ -370,7 +370,7 @@ stat_diff(istat(), st) bytes_compressed: rows: 50 bytes: 26042 - index_size: 252 + index_size: 300 pages: 6 bytes_compressed: compaction: @@ -769,7 +769,7 @@ _ = s:get(1) ... stat_diff(gstat(), st, 'memory.tuple_cache') --- -- 1101 +- 1109 ... s:delete(1) --- @@ -1046,7 +1046,7 @@ istat() bytes: 0 count: 0 bloom_size: 140 - index_size: 1050 + index_size: 1250 iterator: read: bytes_compressed: @@ -1122,15 +1122,15 @@ gstat() gap_locks: 0 read_views: 0 memory: - tuple_cache: 14313 + tuple_cache: 14417 tx: 0 level0: 262583 - page_index: 1050 + page_index: 1250 bloom_filter: 140 disk: data_compacted: 104300 data: 104300 - index: 1190 + index: 1390 scheduler: tasks_inprogress: 0 dump_output: 0 @@ -1338,8 +1338,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 364 -- 920 +- 420 +- 928 ... s:bsize() == st1.disk.bytes --- @@ -1400,8 +1400,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 49516 -- 50072 +- 49572 +- 50080 ... s:bsize() == st1.memory.bytes + st1.disk.bytes --- @@ -1465,8 +1465,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 364 -- 920 +- 420 +- 928 ... s:bsize() == st1.disk.bytes --- -- 2.11.0