[Tarantool-patches] [PATCH v3 1/3] fiber.top() refactor clock and cpu time calculation
Serge Petrenko
sergepetrenko at tarantool.org
Mon Nov 18 19:05:03 MSK 2019
Unify all the members related to fiber's clock statistics into struct
clock_stat and all the members related to cord's knowledge of cpu state
and clocks to struct cpu_stat.
Reset stats of all alive fibers on fiber.top_enable().
Follow-up #2694
---
src/lib/core/fiber.c | 179 ++++++++++++++++++++++++++-----------------
src/lib/core/fiber.h | 109 ++++++++++++++++++--------
src/lua/fiber.c | 20 +++--
3 files changed, 199 insertions(+), 109 deletions(-)
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index aebaba7f0..1e08d0ec9 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -88,6 +88,9 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
#if ENABLE_FIBER_TOP
static __thread bool fiber_top_enabled = false;
+
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat);
#endif /* ENABLE_FIBER_TOP */
/**
@@ -103,18 +106,10 @@ clock_set_on_csw(struct fiber *caller)
if (!fiber_top_enabled)
return;
- uint64_t clock;
- uint32_t cpu_id;
- clock = __rdtscp(&cpu_id);
+ uint64_t delta = cpu_stat_on_csw(&cord()->cpu_stat);
- if (cpu_id == cord()->cpu_id_last) {
- caller->clock_delta += clock - cord()->clock_last;
- cord()->clock_delta += clock - cord()->clock_last;
- } else {
- cord()->cpu_id_last = cpu_id;
- cord()->cpu_miss_count++;
- }
- cord()->clock_last = clock;
+ clock_stat_add_delta(&cord()->clock_stat, delta);
+ clock_stat_add_delta(&caller->clock_stat, delta);
#endif /* ENABLE_FIBER_TOP */
}
@@ -695,9 +690,7 @@ fiber_reset(struct fiber *fiber)
rlist_create(&fiber->on_stop);
fiber->flags = FIBER_DEFAULT_FLAGS;
#if ENABLE_FIBER_TOP
- fiber->cputime = 0;
- fiber->clock_acc = 0;
- fiber->clock_delta = 0;
+ clock_stat_reset(&fiber->clock_stat);
#endif /* ENABLE_FIBER_TOP */
}
@@ -1095,9 +1088,52 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents)
(void) watcher;
(void) revents;
- cord()->clock_last = __rdtscp(&cord()->cpu_id_last);
- cord()->cpu_miss_count = 0;
+ cpu_stat_start(&cord()->cpu_stat);
+}
+
+/**
+ * Calculate the exponential moving average for the clock deltas
+ * per loop iteration. The coeffitient is 1/16.
+ */
+static inline uint64_t
+clock_diff_accumulate(uint64_t acc, uint64_t delta)
+{
+ if (acc > 0) {
+ return delta / 16 + 15 * acc / 16;
+ } else {
+ return delta;
+ }
+}
+inline void
+clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta)
+{
+ stat->delta += clock_delta;
+}
+
+void
+clock_stat_update(struct clock_stat *stat, double nsec_per_clock)
+{
+ stat->acc = clock_diff_accumulate(stat->acc, stat->delta);
+ stat->prev_delta = stat->delta;
+ stat->cputime += stat->delta * nsec_per_clock;
+ stat->delta = 0;
+}
+
+void
+clock_stat_reset(struct clock_stat *stat)
+{
+ stat->acc = 0;
+ stat->delta = 0;
+ stat->prev_delta = 0;
+ stat->cputime = 0;
+}
+
+void
+cpu_stat_start(struct cpu_stat *stat)
+{
+ stat->prev_clock = __rdtscp(&stat->prev_cpu_id);
+ stat->cpu_miss_count = 0;
/*
* We want to measure thread cpu time here to calculate
* each fiber's cpu time, so don't use libev's ev_now() or
@@ -1106,27 +1142,59 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents)
*/
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
- say_debug("clock_gettime(): failed to get this"
- "thread's cpu time.");
+ say_debug("clock_gettime(): failed to get this thread's"
+ " cpu time.");
return;
}
- cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
- ts.tv_nsec;
+ stat->prev_cputime = (uint64_t) ts.tv_sec * FIBER_TIME_RES + ts.tv_nsec;
}
+void
+cpu_stat_reset(struct cpu_stat *stat)
+{
+ stat->prev_cpu_miss_count = 0;
+ cpu_stat_start(stat);
+}
-/**
- * Calculate the exponential moving average for the clock deltas
- * per loop iteration. The coeffitient is 1/16.
- */
-static inline uint64_t
-clock_diff_accumulate(uint64_t acc, uint64_t delta)
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat)
{
- if (acc > 0) {
- return delta / 16 + 15 * acc / 16;
+ uint32_t cpu_id;
+ uint64_t delta = 0;
+ uint64_t clock = __rdtscp(&cpu_id);
+
+ if (cpu_id == stat->prev_cpu_id) {
+ delta = clock - stat->prev_clock;
} else {
- return delta;
+ stat->prev_cpu_id = cpu_id;
+ stat->cpu_miss_count++;
+ }
+ stat->prev_clock = clock;
+
+ return delta;
+}
+
+double
+cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat)
+{
+ stat->prev_cpu_miss_count = stat->cpu_miss_count;
+ stat->cpu_miss_count = 0;
+
+ struct timespec ts;
+ uint64_t delta_time;
+ double nsec_per_clock = 0;
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
+ say_debug("clock_gettime(): failed to get this thread's"
+ " cpu time.");
+ } else {
+ delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
+ ts.tv_nsec;
+ if (delta_time > stat->prev_cputime && cord_clock_stat->delta > 0) {
+ delta_time -= stat->prev_cputime;
+ nsec_per_clock = (double) delta_time / cord()->clock_stat.delta;
+ }
}
+ return nsec_per_clock;
}
static void
@@ -1145,40 +1213,14 @@ loop_on_iteration_end(ev_loop *loop, ev_prepare *watcher, int revents)
*/
clock_set_on_csw(&cord()->sched);
- cord()->cpu_miss_count_last = cord()->cpu_miss_count;
- cord()->cpu_miss_count = 0;
+ double nsec_per_clock = cpu_stat_end(&cord()->cpu_stat,
+ &cord()->clock_stat);
- struct timespec ts;
- uint64_t delta_time;
- double nsec_per_clock = 0;
-
- if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
- say_debug("clock_gettime(): failed to get this"
- "thread's cpu time.");
- } else {
- delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
- ts.tv_nsec;
- assert(delta_time > cord()->cputime_last);
- delta_time -= cord()->cputime_last;
-
- if (cord()->clock_delta > 0)
- nsec_per_clock = (double) delta_time / cord()->clock_delta;
- }
-
- cord()->clock_acc = clock_diff_accumulate(cord()->clock_acc, cord()->clock_delta);
- cord()->clock_delta_last = cord()->clock_delta;
- cord()->clock_delta = 0;
-
- cord()->sched.clock_acc = clock_diff_accumulate(cord()->sched.clock_acc, cord()->sched.clock_delta);
- cord()->sched.clock_delta_last = cord()->sched.clock_delta;
- cord()->sched.cputime += cord()->sched.clock_delta * nsec_per_clock;
- cord()->sched.clock_delta = 0;
+ clock_stat_update(&cord()->clock_stat, nsec_per_clock);
+ clock_stat_update(&cord()->sched.clock_stat, nsec_per_clock);
rlist_foreach_entry(fiber, &cord()->alive, link) {
- fiber->clock_acc = clock_diff_accumulate(fiber->clock_acc, fiber->clock_delta);
- fiber->clock_delta_last = fiber->clock_delta;
- fiber->cputime += fiber->clock_delta * nsec_per_clock;
- fiber->clock_delta = 0;
+ clock_stat_update(&fiber->clock_stat, nsec_per_clock);
}
}
@@ -1203,17 +1245,14 @@ fiber_top_enable()
ev_check_start(cord()->loop, &cord()->check_event);
fiber_top_enabled = true;
- cord()->clock_acc = 0;
- cord()->cpu_miss_count_last = 0;
- cord()->clock_delta_last = 0;
- struct timespec ts;
- if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
- say_debug("clock_gettime(): failed to get this"
- "thread's cpu time.");
- return;
+ cpu_stat_reset(&cord()->cpu_stat);
+ clock_stat_reset(&cord()->clock_stat);
+ clock_stat_reset(&cord()->sched.clock_stat);
+
+ struct fiber *fiber;
+ rlist_foreach_entry(fiber, &cord()->alive, link) {
+ clock_stat_reset(&fiber->clock_stat);
}
- cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
- ts.tv_nsec;
}
}
diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
index ab313a050..06ce28bb1 100644
--- a/src/lib/core/fiber.h
+++ b/src/lib/core/fiber.h
@@ -56,14 +56,82 @@
#define ENABLE_FIBER_TOP 1
#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
#if ENABLE_FIBER_TOP
/* A fiber reports used up CPU time with nanosecond resolution. */
#define FIBER_TIME_RES 1000000000
-#endif /* ENABLE_FIBER_TOP */
-#if defined(__cplusplus)
-extern "C" {
-#endif /* defined(__cplusplus) */
+/**
+ * A struct containing all the info gathered for current fiber or
+ * thread as a whole when fiber.top() is enabled.
+ */
+struct clock_stat {
+ /**
+ * Accumulated clock value calculated using exponential
+ * moving average.
+ */
+ uint64_t acc;
+ /**
+ * Clock delta counter used on current event loop
+ * iteration.
+ */
+ uint64_t delta;
+ /**
+ * Clock delta calculated on previous event loop
+ * iteration.
+ */
+ uint64_t prev_delta;
+ /**
+ * Total processor time this fiber (or cord as a whole)
+ * has spent with 1 / FIBER_TIME_RES second precision.
+ */
+ uint64_t cputime;
+};
+
+void
+clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta);
+
+void
+clock_stat_update(struct clock_stat *stat, double nsec_per_clock);
+
+void
+clock_stat_reset(struct clock_stat *stat);
+
+/**
+ * A struct encapsulating all knowledge this cord has about cpu
+ * clocks and their state.
+ */
+struct cpu_stat {
+ uint64_t prev_clock;
+ /**
+ * This thread's CPU time at the beginning of event loop
+ * iteration. Used to calculate how much cpu time has
+ * each loop iteration consumed and update fiber cpu
+ * times propotionally. The resolution is
+ * 1 / FIBER_TIME_RES seconds.
+ */
+ uint64_t prev_cputime;
+ uint32_t prev_cpu_id;
+ uint32_t cpu_miss_count;
+ uint32_t prev_cpu_miss_count;
+};
+
+void
+cpu_stat_start(struct cpu_stat *stat);
+
+void
+cpu_stat_reset(struct cpu_stat *stat);
+
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat);
+
+double
+cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat);
+
+#endif /* ENABLE_FIBER_TOP */
enum { FIBER_NAME_MAX = 32 };
@@ -402,21 +470,7 @@ struct fiber {
/** Fiber flags */
uint32_t flags;
#if ENABLE_FIBER_TOP
- /**
- * Accumulated clock value calculated using exponential
- * moving average.
- */
- uint64_t clock_acc;
- /**
- * Total processor time this fiber has spent with
- * 1 / FIBER_TIME_RES second precision.
- */
- uint64_t cputime;
- /**
- * Clock delta calculated on previous event loop iteration.
- */
- uint64_t clock_delta_last;
- uint64_t clock_delta;
+ struct clock_stat clock_stat;
#endif /* ENABLE_FIBER_TOP */
/** Link in cord->alive or cord->dead list. */
struct rlist link;
@@ -490,21 +544,8 @@ struct cord {
*/
uint32_t max_fid;
#if ENABLE_FIBER_TOP
- uint64_t clock_acc;
- uint64_t clock_delta;
- uint64_t clock_delta_last;
- uint64_t clock_last;
- uint32_t cpu_id_last;
- uint32_t cpu_miss_count;
- uint32_t cpu_miss_count_last;
- /**
- * This thread's CPU time at the beginning of event loop
- * iteration. Used to calculate how much cpu time has
- * each loop iteration consumed and update fiber cpu
- * times propotionally. The resolution is
- * 1 / FIBER_TIME_RES seconds.
- */
- uint64_t cputime_last;
+ struct clock_stat clock_stat;
+ struct cpu_stat cpu_stat;
#endif /* ENABLE_FIBER_TOP */
pthread_t id;
const struct cord_on_exit *on_exit;
diff --git a/src/lua/fiber.c b/src/lua/fiber.c
index 8b3b22e55..53ebec9aa 100644
--- a/src/lua/fiber.c
+++ b/src/lua/fiber.c
@@ -278,7 +278,7 @@ lbox_fiber_statof(struct fiber *f, void *cb_ctx, bool backtrace)
lua_settable(L, -3);
lua_pushliteral(L, "time");
- lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
+ lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES);
lua_settable(L, -3);
lua_pushliteral(L, "memory");
@@ -335,13 +335,23 @@ lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
lua_newtable(L);
lua_pushliteral(L, "average");
- lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100);
+ if (cord()->clock_stat.acc != 0) {
+ lua_pushnumber(L, f->clock_stat.acc /
+ (double)cord()->clock_stat.acc * 100);
+ } else {
+ lua_pushnumber(L, 0);
+ }
lua_settable(L, -3);
lua_pushliteral(L, "instant");
- lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100);
+ if (cord()->clock_stat.prev_delta != 0) {
+ lua_pushnumber(L, f->clock_stat.prev_delta /
+ (double)cord()->clock_stat.prev_delta * 100);
+ } else {
+ lua_pushnumber(L, 0);
+ }
lua_settable(L, -3);
lua_pushliteral(L, "time");
- lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
+ lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES);
lua_settable(L, -3);
lua_settable(L, -3);
@@ -357,7 +367,7 @@ lbox_fiber_top(struct lua_State *L)
}
lua_newtable(L);
lua_pushliteral(L, "cpu_misses");
- lua_pushnumber(L, cord()->cpu_miss_count_last);
+ lua_pushnumber(L, cord()->cpu_stat.prev_cpu_miss_count);
lua_settable(L, -3);
lua_pushliteral(L, "cpu");
--
2.21.0 (Apple Git-122)
More information about the Tarantool-patches
mailing list