From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 4/9] gc: run garbage collection in background
Date: Wed, 28 Nov 2018 19:14:42 +0300 [thread overview]
Message-ID: <79d04c42c94507cdab57ab4e0621001a818bf90c.1543419109.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1543419109.git.vdavydov.dev@gmail.com>
Currently, garbage collection is executed synchronously by functions
that may trigger it, such as gc_consumer_advance or gc_add_checkpoint.
As a result, one has to be very cautious when using those functions as
they may yield at their will. For example, we can't shoot off stale
consumers right in tx_prio handler - we have to use rather clumsy WAL
watcher interface instead. Besides, in future, when the garbage
collector state is persisted, we will need to call those functions from
on_commit trigger callback, where yielding is not normally allowed.
Actually, there's no reason to remove old files synchronously - we could
as well do it in the background. So this patch introduces a background
garbage collection fiber that executes gc_run when woken up. Now all
functions that might trigger garbage collection wake up this fiber
instead of executing gc_run directly.
---
src/box/box.cc | 12 +++++++++
src/box/gc.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++------------
src/box/gc.h | 35 +++++++++++++++++++++-----
3 files changed, 104 insertions(+), 22 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index a5e1f07a..bb7c1bb9 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -2203,6 +2203,18 @@ end:
latch_unlock(&schema_lock);
box_checkpoint_is_in_progress = false;
+
+ /*
+ * Wait for background garbage collection that might
+ * have been triggered by this checkpoint to complete.
+ * Strictly speaking, it isn't necessary, but it
+ * simplifies testing as it guarantees that by the
+ * time box.snapshot() returns, all outdated checkpoint
+ * files have been removed.
+ */
+ if (!rc)
+ gc_wait();
+
return rc;
}
diff --git a/src/box/gc.c b/src/box/gc.c
index 05773b91..9c049977 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -45,8 +45,9 @@
#include <small/rlist.h>
#include "diag.h"
+#include "fiber.h"
+#include "fiber_cond.h"
#include "say.h"
-#include "latch.h"
#include "vclock.h"
#include "cbus.h"
#include "engine.h" /* engine_collect_garbage() */
@@ -54,6 +55,9 @@
struct gc_state gc;
+static int
+gc_fiber_f(va_list);
+
/**
* Comparator used for ordering gc_consumer objects by signature
* in a binary tree.
@@ -100,7 +104,13 @@ gc_init(void)
vclock_create(&gc.vclock);
rlist_create(&gc.checkpoints);
gc_tree_new(&gc.consumers);
- latch_create(&gc.latch);
+ fiber_cond_create(&gc.cond);
+
+ gc.fiber = fiber_new("gc", gc_fiber_f);
+ if (gc.fiber == NULL)
+ panic("failed to start garbage collection fiber");
+
+ fiber_start(gc.fiber);
}
static void
@@ -202,13 +212,6 @@ gc_run(void)
return; /* nothing to do */
/*
- * Engine callbacks may sleep, because they use coio for
- * removing files. Make sure we won't try to remove the
- * same file multiple times by serializing concurrent gc
- * executions.
- */
- latch_lock(&gc.latch);
- /*
* Run garbage collection.
*
* The order is important here: we must invoke garbage
@@ -220,7 +223,51 @@ gc_run(void)
rc = engine_collect_garbage(&checkpoint->vclock);
if (run_wal_gc && rc == 0)
wal_collect_garbage(vclock);
- latch_unlock(&gc.latch);
+}
+
+static int
+gc_fiber_f(va_list ap)
+{
+ (void)ap;
+ while (!fiber_is_cancelled()) {
+ int delta = gc.scheduled - gc.completed;
+ if (delta == 0) {
+ /* No pending garbage collection. */
+ fiber_sleep(TIMEOUT_INFINITY);
+ continue;
+ }
+ assert(delta > 0);
+ gc_run();
+ gc.completed += delta;
+ fiber_cond_signal(&gc.cond);
+ }
+ return 0;
+}
+
+/**
+ * Trigger asynchronous garbage collection.
+ */
+static void
+gc_schedule(void)
+{
+ /*
+ * Do not wake up the background fiber if it's executing
+ * the garbage collection procedure right now, because
+ * it may be waiting for a cbus message, which doesn't
+ * tolerate spurious wakeups. Just increment the counter
+ * then - it will rerun garbage collection as soon as
+ * the current round completes.
+ */
+ if (gc.scheduled++ == gc.completed)
+ fiber_wakeup(gc.fiber);
+}
+
+void
+gc_wait(void)
+{
+ unsigned scheduled = gc.scheduled;
+ while (gc.completed < scheduled)
+ fiber_cond_wait(&gc.cond);
}
/**
@@ -255,7 +302,7 @@ gc_process_wal_event(struct wal_watcher_msg *msg)
consumer = next;
}
- gc_run();
+ gc_schedule();
}
void
@@ -276,7 +323,7 @@ gc_add_checkpoint(const struct vclock *vclock)
* Rerun the garbage collector in this case, just
* in case box.cfg.checkpoint_count has changed.
*/
- gc_run();
+ gc_schedule();
return;
}
assert(last_checkpoint == NULL ||
@@ -295,7 +342,7 @@ gc_add_checkpoint(const struct vclock *vclock)
rlist_add_tail_entry(&gc.checkpoints, checkpoint, in_checkpoints);
gc.checkpoint_count++;
- gc_run();
+ gc_schedule();
}
void
@@ -314,7 +361,7 @@ void
gc_unref_checkpoint(struct gc_checkpoint_ref *ref)
{
rlist_del_entry(ref, in_refs);
- gc_run();
+ gc_schedule();
}
struct gc_consumer *
@@ -342,7 +389,7 @@ gc_consumer_unregister(struct gc_consumer *consumer)
{
if (!consumer->is_inactive) {
gc_tree_remove(&gc.consumers, consumer);
- gc_run();
+ gc_schedule();
}
gc_consumer_delete(consumer);
}
@@ -376,7 +423,7 @@ gc_consumer_advance(struct gc_consumer *consumer, const struct vclock *vclock)
if (update_tree)
gc_tree_insert(&gc.consumers, consumer);
- gc_run();
+ gc_schedule();
}
struct gc_consumer *
diff --git a/src/box/gc.h b/src/box/gc.h
index eab19ba3..6e96d7bb 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -34,8 +34,8 @@
#include <stddef.h>
#include <small/rlist.h>
+#include "fiber_cond.h"
#include "vclock.h"
-#include "latch.h"
#include "wal.h"
#include "trivia/util.h"
@@ -43,6 +43,7 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct fiber;
struct gc_consumer;
enum { GC_NAME_MAX = 64 };
@@ -122,15 +123,30 @@ struct gc_state {
/** Registered consumers, linked by gc_consumer::node. */
gc_tree_t consumers;
/**
- * Latch serializing concurrent invocations of engine
- * garbage collection callbacks.
- */
- struct latch latch;
- /**
* WAL event watcher. Needed to shoot off stale consumers
* when a WAL file is deleted due to ENOSPC.
*/
struct wal_watcher wal_watcher;
+ /** Fiber that removes old files in the background. */
+ struct fiber *fiber;
+ /**
+ * Condition variable signaled by the background fiber
+ * whenever it completes a round of garbage collection.
+ * Used to wait for garbage collection to complete.
+ */
+ struct fiber_cond cond;
+ /**
+ * The following two members are used for scheduling
+ * background garbage collection and waiting for it to
+ * complete. To trigger background garbage collection,
+ * @scheduled is incremented. Whenever a round of garbage
+ * collection completes, @completed is incremented. Thus
+ * to wait for background garbage collection scheduled
+ * at a particular moment of time to complete, one should
+ * sleep until @completed reaches the value of @scheduled
+ * taken at that moment of time.
+ */
+ unsigned completed, scheduled;
};
extern struct gc_state gc;
@@ -188,6 +204,13 @@ void
gc_free(void);
/**
+ * Wait for background garbage collection scheduled prior
+ * to this point to complete.
+ */
+void
+gc_wait(void);
+
+/**
* Update the minimal number of checkpoints to preserve.
* Called when box.cfg.checkpoint_count is updated.
*
--
2.11.0
next prev parent reply other threads:[~2018-11-28 16:14 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-28 16:14 [PATCH 0/9] Allow to limit size of WAL files Vladimir Davydov
2018-11-28 16:14 ` [PATCH 1/9] wal: separate checkpoint and flush paths Vladimir Davydov
2018-11-29 16:24 ` [tarantool-patches] " Konstantin Osipov
2018-11-28 16:14 ` [PATCH 2/9] wal: remove files needed for recovery from backup checkpoints on ENOSPC Vladimir Davydov
2018-11-29 16:31 ` [tarantool-patches] " Konstantin Osipov
2018-11-29 17:42 ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 3/9] recovery: restore garbage collector vclock after restart Vladimir Davydov
2018-11-29 16:37 ` [tarantool-patches] " Konstantin Osipov
2018-11-29 17:42 ` Vladimir Davydov
2018-11-28 16:14 ` Vladimir Davydov [this message]
2018-11-29 16:42 ` [tarantool-patches] Re: [PATCH 4/9] gc: run garbage collection in background Konstantin Osipov
2018-11-29 17:43 ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 5/9] gc: do not use WAL watcher API for deactivating stale consumers Vladimir Davydov
2018-11-29 17:02 ` [tarantool-patches] " Konstantin Osipov
2018-11-28 16:14 ` [PATCH 6/9] wal: simplify watcher API Vladimir Davydov
2018-11-29 17:33 ` [tarantool-patches] " Konstantin Osipov
2018-11-28 16:14 ` [PATCH 7/9] box: rewrite checkpoint daemon in C Vladimir Davydov
2018-11-30 8:58 ` [tarantool-patches] " Konstantin Osipov
2018-11-30 9:41 ` Vladimir Davydov
2018-12-05 16:21 ` Vladimir Davydov
2018-11-28 16:14 ` [PATCH 8/9] wal: pass struct instead of vclock to checkpoint methods Vladimir Davydov
2018-11-30 9:00 ` [tarantool-patches] " Konstantin Osipov
2018-11-30 9:43 ` Vladimir Davydov
2018-12-03 20:20 ` Konstantin Osipov
2018-11-28 16:14 ` [PATCH 9/9] wal: trigger checkpoint if there are too many WALs Vladimir Davydov
2018-12-03 20:34 ` [tarantool-patches] " Konstantin Osipov
2018-12-04 11:25 ` Vladimir Davydov
2018-12-04 12:53 ` Konstantin Osipov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=79d04c42c94507cdab57ab4e0621001a818bf90c.1543419109.git.vdavydov.dev@gmail.com \
--to=vdavydov.dev@gmail.com \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [PATCH 4/9] gc: run garbage collection in background' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox