[Tarantool-patches] [PATCH v3 1/3] fiber.top() refactor clock and cpu time calculation

Serge Petrenko sergepetrenko at tarantool.org
Mon Nov 18 19:05:03 MSK 2019


Unify all the members related to fiber's clock statistics into struct
clock_stat and all the members related to cord's knowledge of cpu state
and clocks to struct cpu_stat.
Reset stats of all alive fibers on fiber.top_enable().

Follow-up #2694
---
 src/lib/core/fiber.c | 179 ++++++++++++++++++++++++++-----------------
 src/lib/core/fiber.h | 109 ++++++++++++++++++--------
 src/lua/fiber.c      |  20 +++--
 3 files changed, 199 insertions(+), 109 deletions(-)

diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index aebaba7f0..1e08d0ec9 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -88,6 +88,9 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
 
 #if ENABLE_FIBER_TOP
 static __thread bool fiber_top_enabled = false;
+
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat);
 #endif /* ENABLE_FIBER_TOP */
 
 /**
@@ -103,18 +106,10 @@ clock_set_on_csw(struct fiber *caller)
 	if (!fiber_top_enabled)
 		return;
 
-	uint64_t clock;
-	uint32_t cpu_id;
-	clock = __rdtscp(&cpu_id);
+	uint64_t delta = cpu_stat_on_csw(&cord()->cpu_stat);
 
-	if (cpu_id == cord()->cpu_id_last) {
-		caller->clock_delta += clock - cord()->clock_last;
-		cord()->clock_delta += clock - cord()->clock_last;
-	} else {
-		cord()->cpu_id_last = cpu_id;
-		cord()->cpu_miss_count++;
-	}
-	cord()->clock_last = clock;
+	clock_stat_add_delta(&cord()->clock_stat, delta);
+	clock_stat_add_delta(&caller->clock_stat, delta);
 #endif /* ENABLE_FIBER_TOP */
 
 }
@@ -695,9 +690,7 @@ fiber_reset(struct fiber *fiber)
 	rlist_create(&fiber->on_stop);
 	fiber->flags = FIBER_DEFAULT_FLAGS;
 #if ENABLE_FIBER_TOP
-	fiber->cputime = 0;
-	fiber->clock_acc = 0;
-	fiber->clock_delta = 0;
+	clock_stat_reset(&fiber->clock_stat);
 #endif /* ENABLE_FIBER_TOP */
 }
 
@@ -1095,9 +1088,52 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents)
 	(void) watcher;
 	(void) revents;
 
-	cord()->clock_last = __rdtscp(&cord()->cpu_id_last);
-	cord()->cpu_miss_count = 0;
+	cpu_stat_start(&cord()->cpu_stat);
+}
+
+/**
+ * Calculate the exponential moving average for the clock deltas
+ * per loop iteration. The coeffitient is 1/16.
+ */
+static inline uint64_t
+clock_diff_accumulate(uint64_t acc, uint64_t delta)
+{
+	if (acc > 0) {
+		return delta / 16 + 15 * acc / 16;
+	} else {
+		return delta;
+	}
+}
 
+inline void
+clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta)
+{
+	stat->delta += clock_delta;
+}
+
+void
+clock_stat_update(struct clock_stat *stat, double nsec_per_clock)
+{
+	stat->acc = clock_diff_accumulate(stat->acc, stat->delta);
+	stat->prev_delta = stat->delta;
+	stat->cputime += stat->delta * nsec_per_clock;
+	stat->delta = 0;
+}
+
+void
+clock_stat_reset(struct clock_stat *stat)
+{
+	stat->acc = 0;
+	stat->delta = 0;
+	stat->prev_delta = 0;
+	stat->cputime = 0;
+}
+
+void
+cpu_stat_start(struct cpu_stat *stat)
+{
+	stat->prev_clock = __rdtscp(&stat->prev_cpu_id);
+	stat->cpu_miss_count = 0;
 	/*
 	 * We want to measure thread cpu time here to calculate
 	 * each fiber's cpu time, so don't use libev's ev_now() or
@@ -1106,27 +1142,59 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents)
 	 */
 	struct timespec ts;
 	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
-		say_debug("clock_gettime(): failed to get this"
-			  "thread's cpu time.");
+		say_debug("clock_gettime(): failed to get this thread's"
+			  " cpu time.");
 		return;
 	}
-	cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
-					  ts.tv_nsec;
+	stat->prev_cputime = (uint64_t) ts.tv_sec * FIBER_TIME_RES + ts.tv_nsec;
 }
 
+void
+cpu_stat_reset(struct cpu_stat *stat)
+{
+	stat->prev_cpu_miss_count = 0;
+	cpu_stat_start(stat);
+}
 
-/**
- * Calculate the exponential moving average for the clock deltas
- * per loop iteration. The coeffitient is 1/16.
- */
-static inline uint64_t
-clock_diff_accumulate(uint64_t acc, uint64_t delta)
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat)
 {
-	if (acc > 0) {
-		return delta / 16 + 15 * acc / 16;
+	uint32_t cpu_id;
+	uint64_t delta = 0;
+	uint64_t clock = __rdtscp(&cpu_id);
+
+	if (cpu_id == stat->prev_cpu_id) {
+		delta = clock - stat->prev_clock;
 	} else {
-		return delta;
+		stat->prev_cpu_id = cpu_id;
+		stat->cpu_miss_count++;
+	}
+	stat->prev_clock = clock;
+
+	return delta;
+}
+
+double
+cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat)
+{
+	stat->prev_cpu_miss_count = stat->cpu_miss_count;
+	stat->cpu_miss_count = 0;
+
+	struct timespec ts;
+	uint64_t delta_time;
+	double nsec_per_clock = 0;
+	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
+		say_debug("clock_gettime(): failed to get this thread's"
+			  " cpu time.");
+	} else {
+		delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
+			     ts.tv_nsec;
+		if (delta_time > stat->prev_cputime && cord_clock_stat->delta > 0) {
+			delta_time -= stat->prev_cputime;
+			nsec_per_clock = (double) delta_time / cord()->clock_stat.delta;
+		}
 	}
+	return nsec_per_clock;
 }
 
 static void
@@ -1145,40 +1213,14 @@ loop_on_iteration_end(ev_loop *loop, ev_prepare *watcher, int revents)
 	 */
 	clock_set_on_csw(&cord()->sched);
 
-	cord()->cpu_miss_count_last = cord()->cpu_miss_count;
-	cord()->cpu_miss_count = 0;
+	double nsec_per_clock = cpu_stat_end(&cord()->cpu_stat,
+					     &cord()->clock_stat);
 
-	struct timespec ts;
-	uint64_t delta_time;
-	double nsec_per_clock = 0;
-
-	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
-		say_debug("clock_gettime(): failed to get this"
-			  "thread's cpu time.");
-	} else {
-		delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
-			     ts.tv_nsec;
-		assert(delta_time > cord()->cputime_last);
-		delta_time -= cord()->cputime_last;
-
-		if (cord()->clock_delta > 0)
-			nsec_per_clock = (double) delta_time / cord()->clock_delta;
-	}
-
-	cord()->clock_acc = clock_diff_accumulate(cord()->clock_acc, cord()->clock_delta);
-	cord()->clock_delta_last = cord()->clock_delta;
-	cord()->clock_delta = 0;
-
-	cord()->sched.clock_acc = clock_diff_accumulate(cord()->sched.clock_acc, cord()->sched.clock_delta);
-	cord()->sched.clock_delta_last = cord()->sched.clock_delta;
-	cord()->sched.cputime += cord()->sched.clock_delta * nsec_per_clock;
-	cord()->sched.clock_delta = 0;
+	clock_stat_update(&cord()->clock_stat, nsec_per_clock);
+	clock_stat_update(&cord()->sched.clock_stat, nsec_per_clock);
 
 	rlist_foreach_entry(fiber, &cord()->alive, link) {
-		fiber->clock_acc = clock_diff_accumulate(fiber->clock_acc, fiber->clock_delta);
-		fiber->clock_delta_last = fiber->clock_delta;
-		fiber->cputime += fiber->clock_delta * nsec_per_clock;
-		fiber->clock_delta = 0;
+		clock_stat_update(&fiber->clock_stat, nsec_per_clock);
 	}
 }
 
@@ -1203,17 +1245,14 @@ fiber_top_enable()
 		ev_check_start(cord()->loop, &cord()->check_event);
 		fiber_top_enabled = true;
 
-		cord()->clock_acc = 0;
-		cord()->cpu_miss_count_last = 0;
-		cord()->clock_delta_last = 0;
-		struct timespec ts;
-		if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
-			say_debug("clock_gettime(): failed to get this"
-				  "thread's cpu time.");
-			return;
+		cpu_stat_reset(&cord()->cpu_stat);
+		clock_stat_reset(&cord()->clock_stat);
+		clock_stat_reset(&cord()->sched.clock_stat);
+
+		struct fiber *fiber;
+		rlist_foreach_entry(fiber, &cord()->alive, link) {
+			clock_stat_reset(&fiber->clock_stat);
 		}
-		cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES +
-						  ts.tv_nsec;
 	}
 }
 
diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
index ab313a050..06ce28bb1 100644
--- a/src/lib/core/fiber.h
+++ b/src/lib/core/fiber.h
@@ -56,14 +56,82 @@
 #define ENABLE_FIBER_TOP 1
 #endif
 
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
 #if ENABLE_FIBER_TOP
 /* A fiber reports used up CPU time with nanosecond resolution. */
 #define FIBER_TIME_RES 1000000000
-#endif /* ENABLE_FIBER_TOP */
 
-#if defined(__cplusplus)
-extern "C" {
-#endif /* defined(__cplusplus) */
+/**
+ * A struct containing all the info gathered for current fiber or
+ * thread as a whole when fiber.top() is enabled.
+ */
+struct clock_stat {
+	/**
+	 * Accumulated clock value calculated using exponential
+	 * moving average.
+	 */
+	uint64_t acc;
+	/**
+	 * Clock delta counter used on current event loop
+	 * iteration.
+	 */
+	uint64_t delta;
+	/**
+	 * Clock delta calculated on previous event loop
+	 * iteration.
+	 */
+	uint64_t prev_delta;
+	/**
+	 * Total processor time this fiber (or cord as a whole)
+	 * has spent with 1 / FIBER_TIME_RES second precision.
+	 */
+	uint64_t cputime;
+};
+
+void
+clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta);
+
+void
+clock_stat_update(struct clock_stat *stat, double nsec_per_clock);
+
+void
+clock_stat_reset(struct clock_stat *stat);
+
+/**
+ * A struct encapsulating all knowledge this cord has about cpu
+ * clocks and their state.
+ */
+struct cpu_stat {
+	uint64_t prev_clock;
+	/**
+	 * This thread's CPU time at the beginning of event loop
+	 * iteration. Used to calculate how much cpu time has
+	 * each loop iteration consumed and update fiber cpu
+	 * times propotionally. The resolution is
+	 * 1 / FIBER_TIME_RES seconds.
+	 */
+	uint64_t prev_cputime;
+	uint32_t prev_cpu_id;
+	uint32_t cpu_miss_count;
+	uint32_t prev_cpu_miss_count;
+};
+
+void
+cpu_stat_start(struct cpu_stat *stat);
+
+void
+cpu_stat_reset(struct cpu_stat *stat);
+
+uint64_t
+cpu_stat_on_csw(struct cpu_stat *stat);
+
+double
+cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat);
+
+#endif /* ENABLE_FIBER_TOP */
 
 enum { FIBER_NAME_MAX = 32 };
 
@@ -402,21 +470,7 @@ struct fiber {
 	/** Fiber flags */
 	uint32_t flags;
 #if ENABLE_FIBER_TOP
-	/**
-	 * Accumulated clock value calculated using exponential
-	 * moving average.
-	 */
-	uint64_t clock_acc;
-	/**
-	 * Total processor time this fiber has spent with
-	 * 1 / FIBER_TIME_RES second precision.
-	 */
-	uint64_t cputime;
-	/**
-	 * Clock delta calculated on previous event loop iteration.
-	 */
-	uint64_t clock_delta_last;
-	uint64_t clock_delta;
+	struct clock_stat clock_stat;
 #endif /* ENABLE_FIBER_TOP */
 	/** Link in cord->alive or cord->dead list. */
 	struct rlist link;
@@ -490,21 +544,8 @@ struct cord {
 	 */
 	uint32_t max_fid;
 #if ENABLE_FIBER_TOP
-	uint64_t clock_acc;
-	uint64_t clock_delta;
-	uint64_t clock_delta_last;
-	uint64_t clock_last;
-	uint32_t cpu_id_last;
-	uint32_t cpu_miss_count;
-	uint32_t cpu_miss_count_last;
-	/**
-	 * This thread's CPU time at the beginning of event loop
-	 * iteration. Used to calculate how much cpu time has
-	 * each loop iteration consumed and update fiber cpu
-	 * times propotionally. The resolution is
-	 * 1 / FIBER_TIME_RES seconds.
-	 */
-	uint64_t cputime_last;
+	struct clock_stat clock_stat;
+	struct cpu_stat cpu_stat;
 #endif /* ENABLE_FIBER_TOP */
 	pthread_t id;
 	const struct cord_on_exit *on_exit;
diff --git a/src/lua/fiber.c b/src/lua/fiber.c
index 8b3b22e55..53ebec9aa 100644
--- a/src/lua/fiber.c
+++ b/src/lua/fiber.c
@@ -278,7 +278,7 @@ lbox_fiber_statof(struct fiber *f, void *cb_ctx, bool backtrace)
 	lua_settable(L, -3);
 
 	lua_pushliteral(L, "time");
-	lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
+	lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES);
 	lua_settable(L, -3);
 
 	lua_pushliteral(L, "memory");
@@ -335,13 +335,23 @@ lbox_fiber_top_entry(struct fiber *f, void *cb_ctx)
 	lua_newtable(L);
 
 	lua_pushliteral(L, "average");
-	lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100);
+	if (cord()->clock_stat.acc != 0) {
+		lua_pushnumber(L, f->clock_stat.acc /
+				  (double)cord()->clock_stat.acc * 100);
+	} else {
+		lua_pushnumber(L, 0);
+	}
 	lua_settable(L, -3);
 	lua_pushliteral(L, "instant");
-	lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100);
+	if (cord()->clock_stat.prev_delta != 0) {
+		lua_pushnumber(L, f->clock_stat.prev_delta /
+				  (double)cord()->clock_stat.prev_delta * 100);
+	} else {
+		lua_pushnumber(L, 0);
+	}
 	lua_settable(L, -3);
 	lua_pushliteral(L, "time");
-	lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES);
+	lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES);
 	lua_settable(L, -3);
 	lua_settable(L, -3);
 
@@ -357,7 +367,7 @@ lbox_fiber_top(struct lua_State *L)
 	}
 	lua_newtable(L);
 	lua_pushliteral(L, "cpu_misses");
-	lua_pushnumber(L, cord()->cpu_miss_count_last);
+	lua_pushnumber(L, cord()->cpu_stat.prev_cpu_miss_count);
 	lua_settable(L, -3);
 
 	lua_pushliteral(L, "cpu");
-- 
2.21.0 (Apple Git-122)



More information about the Tarantool-patches mailing list