From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 3/3] vinyl: generate deferred DELETEs on tx commit Date: Fri, 13 Jul 2018 13:53:54 +0300 [thread overview] Message-ID: <42827440bfc02363666638674f3ae5301148e44b.1531478108.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1531478108.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1531478108.git.vdavydov.dev@gmail.com> We don't need to postpone generation of secondary index DELETEs until compaction in case the overwritten tuple is present in memory or in cache. Instead we can produce the DELETEs when the transaction is committed. This should significantly decrease the number of deferred DELETEs and hence speed up lookups in secondary indexes. Follow-up #2129 --- src/box/vy_point_lookup.c | 32 ++++++++++++++++ src/box/vy_point_lookup.h | 18 +++++++++ src/box/vy_tx.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/quota.result | 2 +- 4 files changed, 148 insertions(+), 1 deletion(-) diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index 5e43340b..7b704b84 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -293,3 +293,35 @@ done: } return 0; } + +int +vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv, + struct tuple *key, struct tuple **ret) +{ + assert(tuple_field_count(key) >= lsm->cmp_def->part_count); + + int rc; + struct vy_history history; + vy_history_create(&history, &lsm->env->history_node_pool); + + rc = vy_point_lookup_scan_cache(lsm, rv, key, &history); + if (rc != 0 || vy_history_is_terminal(&history)) + goto done; + + rc = vy_point_lookup_scan_mems(lsm, rv, key, &history); + if (rc != 0 || vy_history_is_terminal(&history)) + goto done; + + *ret = NULL; + goto out; +done: + if (rc == 0) { + int upserts_applied; + rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format, + true, &upserts_applied, ret); + lsm->stat.upsert.applied += upserts_applied; + } +out: + vy_history_cleanup(&history); + return rc; +} diff --git a/src/box/vy_point_lookup.h b/src/box/vy_point_lookup.h index 3b7c5a04..6d77ce9c 100644 --- a/src/box/vy_point_lookup.h +++ b/src/box/vy_point_lookup.h @@ -71,6 +71,24 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *key, struct tuple **ret); +/** + * Look up a tuple by key in memory. + * + * This function works just like vy_point_lookup() except: + * + * - It only scans in-memory level and cache and hence doesn't yield. + * - It doesn't turn DELETE into NULL so it returns NULL if and only + * if no terminal statement matching the key is present in memory + * (there still may be statements stored on disk though). + * - It doesn't account the lookup to LSM tree stats (as it never + * descends to lower levels). + * + * The function returns 0 on success, -1 on memory allocation error. + */ +int +vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv, + struct tuple *key, struct tuple **ret); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c index bfef1ada..1421cb84 100644 --- a/src/box/vy_tx.c +++ b/src/box/vy_tx.c @@ -58,6 +58,7 @@ #include "vy_history.h" #include "vy_read_set.h" #include "vy_read_view.h" +#include "vy_point_lookup.h" int write_set_cmp(struct txv *a, struct txv *b) @@ -483,6 +484,97 @@ vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem, return vy_lsm_set(lsm, mem, stmt, region_stmt); } +/** + * Try to generate a deferred DELETE statement on tx commit. + * + * This function is supposed to be called for a primary index + * statement which was executed without deletion of the overwritten + * tuple from secondary indexes. It looks up the overwritten tuple + * in memory and, if found, produces the deferred DELETEs and + * inserts them into the transaction log. + * + * Affects @tx->log, @v->stmt. + * + * Returns 0 on success, -1 on memory allocation error. + */ +static int +vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v) +{ + struct vy_lsm *pk = v->lsm; + struct tuple *stmt = v->stmt; + uint8_t flags = vy_stmt_flags(stmt); + + assert(pk->index_id == 0); + assert(flags & VY_STMT_DEFERRED_DELETE); + + /* Look up the tuple overwritten by this statement. */ + struct tuple *tuple; + if (vy_point_lookup_mem(pk, &tx->xm->p_global_read_view, + stmt, &tuple) != 0) + return -1; + + if (tuple == NULL) { + /* + * Nothing's found, but there still may be + * matching statements stored on disk so we + * have to defer generation of DELETE until + * compaction. + */ + return 0; + } + + /* + * If a terminal statement is found, we can produce + * DELETE right away so clear the flag now. + */ + vy_stmt_set_flags(stmt, flags & ~VY_STMT_DEFERRED_DELETE); + + if (vy_stmt_type(tuple) == IPROTO_DELETE) { + /* The tuple's already deleted, nothing to do. */ + tuple_unref(tuple); + return 0; + } + + struct tuple *delete_stmt; + delete_stmt = vy_stmt_new_surrogate_delete(pk->mem_format, tuple); + tuple_unref(tuple); + if (delete_stmt == NULL) + return -1; + + if (vy_stmt_type(stmt) == IPROTO_DELETE) { + /* + * Since primary and secondary indexes of the + * same space share in-memory statements, we + * need to use the new DELETE in the primary + * index, because the original DELETE doesn't + * contain secondary key parts. + */ + vy_stmt_counter_acct_tuple(&pk->stat.txw.count, delete_stmt); + vy_stmt_counter_unacct_tuple(&pk->stat.txw.count, stmt); + v->stmt = delete_stmt; + tuple_ref(delete_stmt); + tuple_unref(stmt); + } + + /* + * Make DELETE statements for secondary indexes and + * insert them into the transaction log. + */ + int rc = 0; + struct vy_lsm *lsm; + rlist_foreach_entry(lsm, &pk->list, list) { + struct txv *delete_txv = txv_new(tx, lsm, delete_stmt); + if (delete_txv == NULL) { + rc = -1; + break; + } + stailq_insert_entry(&tx->log, delete_txv, v, next_in_log); + vy_stmt_counter_acct_tuple(&lsm->stat.txw.count, delete_stmt); + } + tuple_unref(delete_stmt); + return rc; +} + int vy_tx_prepare(struct vy_tx *tx) { @@ -591,6 +683,11 @@ vy_tx_prepare(struct vy_tx *tx) return -1; assert(v->mem != NULL); + if (lsm->index_id == 0 && + vy_stmt_flags(v->stmt) & VY_STMT_DEFERRED_DELETE && + vy_tx_handle_deferred_delete(tx, v) != 0) + return -1; + /* In secondary indexes only REPLACE/DELETE can be written. */ vy_stmt_set_lsn(v->stmt, MAX_LSN + tx->psn); const struct tuple **region_stmt = diff --git a/test/vinyl/quota.result b/test/vinyl/quota.result index e323bc4e..48042185 100644 --- a/test/vinyl/quota.result +++ b/test/vinyl/quota.result @@ -89,7 +89,7 @@ _ = space:replace{1, 1, string.rep('a', 1024 * 1024 * 5)} ... box.stat.vinyl().quota.used --- -- 5341228 +- 5341267 ... space:drop() --- -- 2.11.0
prev parent reply other threads:[~2018-07-13 10:53 UTC|newest] Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-07-08 16:48 [RFC PATCH 02/23] vinyl: always get full tuple from pk after reading from secondary index Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 00/23] vinyl: eliminate read on REPLACE/DELETE Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 01/23] vinyl: do not turn REPLACE into INSERT when processing DML request Vladimir Davydov 2018-07-10 12:15 ` Konstantin Osipov 2018-07-10 12:19 ` Vladimir Davydov 2018-07-10 18:39 ` Konstantin Osipov 2018-07-11 7:57 ` Vladimir Davydov 2018-07-11 10:25 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 03/23] vinyl: use vy_mem_iterator for point lookup Vladimir Davydov 2018-07-17 10:14 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 04/23] vinyl: make point lookup always return the latest tuple version Vladimir Davydov 2018-07-10 16:19 ` Konstantin Osipov 2018-07-10 16:43 ` Vladimir Davydov 2018-07-11 16:33 ` Vladimir Davydov 2018-07-31 19:17 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 05/23] vinyl: fold vy_replace_one and vy_replace_impl Vladimir Davydov 2018-07-31 20:28 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 06/23] vinyl: fold vy_delete_impl Vladimir Davydov 2018-07-31 20:28 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 07/23] vinyl: refactor unique check Vladimir Davydov 2018-07-31 20:28 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 08/23] vinyl: check key uniqueness before modifying tx write set Vladimir Davydov 2018-07-31 20:34 ` Konstantin Osipov 2018-08-01 10:42 ` Vladimir Davydov 2018-08-09 20:26 ` Konstantin Osipov 2018-08-10 8:26 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 09/23] vinyl: remove env argument of vy_check_is_unique_{primary,secondary} Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 10/23] vinyl: store full tuples in secondary index cache Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 11/23] xrow: allow to store flags in DML requests Vladimir Davydov 2018-07-31 20:36 ` Konstantin Osipov 2018-08-01 14:10 ` Vladimir Davydov 2018-08-17 13:34 ` Vladimir Davydov 2018-08-17 13:34 ` [PATCH 1/2] xrow: allow to store tuple metadata in request Vladimir Davydov 2018-08-17 13:34 ` [PATCH 2/2] vinyl: introduce statement flags Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 12/23] vinyl: do not pass region explicitly to write iterator functions Vladimir Davydov 2018-07-17 10:16 ` Vladimir Davydov 2018-07-31 20:38 ` Konstantin Osipov 2018-08-01 14:14 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 13/23] vinyl: fix potential use-after-free in vy_read_view_merge Vladimir Davydov 2018-07-17 10:16 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 14/23] test: unit/vy_write_iterator: minor refactoring Vladimir Davydov 2018-07-17 10:17 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 15/23] vinyl: teach write iterator to return overwritten tuples Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 16/23] vinyl: allow to skip certain statements on read Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 17/23] vinyl: do not free pending tasks on shutdown Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 18/23] vinyl: store pointer to scheduler in struct vy_task Vladimir Davydov 2018-07-31 20:39 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 19/23] vinyl: rename some members of vy_scheduler and vy_task struct Vladimir Davydov 2018-07-31 20:40 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 20/23] vinyl: use cbus for communication between scheduler and worker threads Vladimir Davydov 2018-07-31 20:43 ` Konstantin Osipov 2018-08-01 14:26 ` Vladimir Davydov 2018-07-08 16:48 ` [RFC PATCH 21/23] vinyl: zap vy_scheduler::is_worker_pool_running Vladimir Davydov 2018-07-31 20:43 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 22/23] vinyl: rename vy_task::status to is_failed Vladimir Davydov 2018-07-31 20:44 ` Konstantin Osipov 2018-07-08 16:48 ` [RFC PATCH 23/23] vinyl: eliminate read on REPLACE/DELETE Vladimir Davydov 2018-07-13 10:53 ` Vladimir Davydov 2018-07-13 10:53 ` [PATCH 1/3] stailq: add stailq_insert function Vladimir Davydov 2018-07-15 7:02 ` Konstantin Osipov 2018-07-15 13:17 ` Vladimir Davydov 2018-07-15 18:40 ` Konstantin Osipov 2018-07-17 10:18 ` Vladimir Davydov 2018-07-13 10:53 ` [PATCH 2/3] vinyl: link all indexes of the same space Vladimir Davydov 2018-07-13 10:53 ` Vladimir Davydov [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=42827440bfc02363666638674f3ae5301148e44b.1531478108.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 3/3] vinyl: generate deferred DELETEs on tx commit' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox