[PATCH 1/2] xlog: cleanup setting of write options

Vladimir Davydov vdavydov.dev at gmail.com
Wed Apr 10 17:22:53 MSK 2019


The way xlog write options (sync_interval and others) are set is a mess:
if an xlog is created with xlog_create(), we overwrite them explicitly;
if an xlog is created with xdir_create_xlog(), we inherit parameters
from the xdir, which sets them depending on the xdir type (SNAP, XLOG,
or VYLOG), but sometimes we overwrite them explicitly as well. The more
options we add, the worse it gets.

To clean it up, let's add an auxiliary structure combining all xlog
write options and pass it to xlog_create() and xdir_create() everywhere.
---
https://github.com/tarantool/tarantool/issues/2389
https://github.com/tarantool/tarantool/commits/dv/gh-2389-vy-disable-l1-compression

 src/box/memtx_engine.c | 19 ++++++++-----
 src/box/recovery.cc    |  3 +-
 src/box/vy_log.c       |  5 ++--
 src/box/vy_run.c       | 16 +++++++----
 src/box/wal.c          |  6 ++--
 src/box/xlog.c         | 56 ++++++++++++++++++--------------------
 src/box/xlog.h         | 74 ++++++++++++++++++++++++++------------------------
 7 files changed, 96 insertions(+), 83 deletions(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 924f8bbc..48f700a0 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -49,6 +49,9 @@
 #include "schema.h"
 #include "gc.h"
 
+/* sync snapshot every 16MB */
+#define SNAP_SYNC_INTERVAL	(1 << 24)
+
 /*
  * Memtx yield-in-transaction trigger: roll back the effects
  * of the transaction and mark the transaction as aborted.
@@ -557,7 +560,6 @@ struct checkpoint {
 	 * read view iterators.
 	 */
 	struct rlist entries;
-	uint64_t snap_io_rate_limit;
 	struct cord cord;
 	bool waiting_for_snap_thread;
 	/** The vclock of the snapshot file. */
@@ -581,8 +583,11 @@ checkpoint_new(const char *snap_dirname, uint64_t snap_io_rate_limit)
 	}
 	rlist_create(&ckpt->entries);
 	ckpt->waiting_for_snap_thread = false;
-	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID);
-	ckpt->snap_io_rate_limit = snap_io_rate_limit;
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = snap_io_rate_limit;
+	opts.sync_interval = SNAP_SYNC_INTERVAL;
+	opts.free_cache = true;
+	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID, &opts);
 	vclock_create(&ckpt->vclock);
 	ckpt->touch = false;
 	return ckpt;
@@ -647,8 +652,6 @@ checkpoint_f(va_list ap)
 	if (xdir_create_xlog(&ckpt->dir, &snap, &ckpt->vclock) != 0)
 		return -1;
 
-	snap.rate_limit = ckpt->snap_io_rate_limit;
-
 	say_info("saving snapshot `%s'", snap.filename);
 	struct checkpoint_entry *entry;
 	rlist_foreach_entry(entry, &ckpt->entries, link) {
@@ -843,7 +846,8 @@ memtx_initial_join_f(va_list ap)
 	 * snap_dirname and INSTANCE_UUID don't change after start,
 	 * safe to use in another thread.
 	 */
-	xdir_create(&dir, snap_dirname, SNAP, &INSTANCE_UUID);
+	xdir_create(&dir, snap_dirname, SNAP, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	struct xlog_cursor cursor;
 	int rc = xdir_open_cursor(&dir, checkpoint_lsn, &cursor);
 	xdir_destroy(&dir);
@@ -997,7 +1001,8 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		return NULL;
 	}
 
-	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID);
+	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	memtx->snap_dir.force_recovery = force_recovery;
 
 	if (xdir_scan(&memtx->snap_dir) != 0)
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index cf3a707a..d122d618 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -95,7 +95,8 @@ recovery_new(const char *wal_dirname, bool force_recovery,
 		free(r);
 	});
 
-	xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID);
+	xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	r->wal_dir.force_recovery = force_recovery;
 
 	vclock_copy(&r->vclock, vclock);
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 85b61a84..be97cdbb 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -734,7 +734,8 @@ err:
 void
 vy_log_init(const char *dir)
 {
-	xdir_create(&vy_log.dir, dir, VYLOG, &INSTANCE_UUID);
+	xdir_create(&vy_log.dir, dir, VYLOG, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	latch_create(&vy_log.latch);
 	region_create(&vy_log.pool, cord_slab_cache());
 	stailq_create(&vy_log.tx);
@@ -830,7 +831,7 @@ vy_log_open(struct xlog *xlog)
 	 */
 	const char *path = vy_log_filename(vclock_sum(&vy_log.last_checkpoint));
 	if (access(path, F_OK) == 0)
-		return xlog_open(xlog, path);
+		return xlog_open(xlog, path, &vy_log.dir.opts);
 
 	if (errno != ENOENT) {
 		diag_set(SystemError, "failed to access file '%s'", path);
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 03db2f50..faa28f47 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -71,6 +71,9 @@ const char *vy_file_suffix[] = {
 	"run" inprogress_suffix, 	/* VY_FILE_RUN_INPROGRESS */
 };
 
+/* sync run and index files very 16 MB */
+#define VY_RUN_SYNC_INTERVAL (1 << 24)
+
 /**
  * We read runs in background threads so as not to stall tx.
  * This structure represents such a thread.
@@ -2032,11 +2035,12 @@ vy_run_write_index(struct vy_run *run, const char *dirpath,
 	struct xlog_meta meta;
 	xlog_meta_create(&meta, XLOG_META_TYPE_INDEX, &INSTANCE_UUID,
 			 NULL, NULL);
-	if (xlog_create(&index_xlog, path, 0, &meta) < 0)
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = run->env->snap_io_rate_limit;
+	opts.sync_interval = VY_RUN_SYNC_INTERVAL;
+	if (xlog_create(&index_xlog, path, 0, &meta, &opts) < 0)
 		return -1;
 
-	index_xlog.rate_limit = run->env->snap_io_rate_limit;
-
 	xlog_tx_begin(&index_xlog);
 	struct region *region = &fiber()->gc;
 	size_t mem_used = region_used(region);
@@ -2128,9 +2132,11 @@ vy_run_writer_create_xlog(struct vy_run_writer *writer)
 	struct xlog_meta meta;
 	xlog_meta_create(&meta, XLOG_META_TYPE_RUN, &INSTANCE_UUID,
 			 NULL, NULL);
-	if (xlog_create(&writer->data_xlog, path, 0, &meta) != 0)
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = writer->run->env->snap_io_rate_limit;
+	opts.sync_interval = VY_RUN_SYNC_INTERVAL;
+	if (xlog_create(&writer->data_xlog, path, 0, &meta, &opts) != 0)
 		return -1;
-	writer->data_xlog.rate_limit = writer->run->env->snap_io_rate_limit;
 	return 0;
 }
 
diff --git a/src/box/wal.c b/src/box/wal.c
index 3faad9c3..ad8ff7c6 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -353,7 +353,9 @@ wal_writer_create(struct wal_writer *writer, enum wal_mode wal_mode,
 	journal_create(&writer->base, wal_mode == WAL_NONE ?
 		       wal_write_in_wal_mode_none : wal_write, NULL);
 
-	xdir_create(&writer->wal_dir, wal_dirname, XLOG, instance_uuid);
+	struct xlog_opts opts = xlog_opts_default;
+	opts.sync_is_async = true;
+	xdir_create(&writer->wal_dir, wal_dirname, XLOG, instance_uuid, &opts);
 	xlog_clear(&writer->current_wal);
 	if (wal_mode == WAL_FSYNC)
 		writer->wal_dir.open_wflags |= O_SYNC;
@@ -392,7 +394,7 @@ wal_open_f(struct cbus_call_msg *msg)
 	const char *path = xdir_format_filename(&writer->wal_dir,
 				vclock_sum(&writer->vclock), NONE);
 	assert(!xlog_is_open(&writer->current_wal));
-	return xlog_open(&writer->current_wal, path);
+	return xlog_open(&writer->current_wal, path, &writer->wal_dir.opts);
 }
 
 /**
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 692b5f5e..1e423ecc 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -89,6 +89,13 @@ enum {
 	XLOG_TX_COMPRESS_THRESHOLD = 2 * 1024,
 };
 
+const struct xlog_opts xlog_opts_default = {
+	.rate_limit = 0,
+	.sync_interval = 0,
+	.free_cache = false,
+	.sync_is_async = false,
+};
+
 /* {{{ struct xlog_meta */
 
 enum {
@@ -317,14 +324,12 @@ xlog_meta_parse(struct xlog_meta *meta, const char **data,
 
 /* {{{ struct xdir */
 
-/* sync snapshot every 16MB */
-#define SNAP_SYNC_INTERVAL	(1 << 24)
-
 void
-xdir_create(struct xdir *dir, const char *dirname,
-	    enum xdir_type type, const struct tt_uuid *instance_uuid)
+xdir_create(struct xdir *dir, const char *dirname, enum xdir_type type,
+	    const struct tt_uuid *instance_uuid, const struct xlog_opts *opts)
 {
 	memset(dir, 0, sizeof(*dir));
+	dir->opts = *opts;
 	vclockset_new(&dir->index);
 	/* Default mode. */
 	dir->mode = 0660;
@@ -336,10 +341,8 @@ xdir_create(struct xdir *dir, const char *dirname,
 		dir->filetype = "SNAP";
 		dir->filename_ext = ".snap";
 		dir->suffix = INPROGRESS;
-		dir->sync_interval = SNAP_SYNC_INTERVAL;
 		break;
 	case XLOG:
-		dir->sync_is_async = true;
 		dir->filetype = "XLOG";
 		dir->filename_ext = ".xlog";
 		dir->suffix = NONE;
@@ -761,10 +764,10 @@ xlog_rename(struct xlog *l)
 }
 
 static int
-xlog_init(struct xlog *xlog)
+xlog_init(struct xlog *xlog, const struct xlog_opts *opts)
 {
 	memset(xlog, 0, sizeof(*xlog));
-	xlog->sync_interval = SNAP_SYNC_INTERVAL;
+	xlog->opts = *opts;
 	xlog->sync_time = ev_monotonic_time();
 	xlog->is_autocommit = true;
 	obuf_create(&xlog->obuf, &cord()->slabc, XLOG_TX_AUTOCOMMIT_THRESHOLD);
@@ -799,7 +802,7 @@ xlog_destroy(struct xlog *xlog)
 
 int
 xlog_create(struct xlog *xlog, const char *name, int flags,
-	    const struct xlog_meta *meta)
+	    const struct xlog_meta *meta, const struct xlog_opts *opts)
 {
 	char meta_buf[XLOG_META_LEN_MAX];
 	int meta_len;
@@ -814,7 +817,7 @@ xlog_create(struct xlog *xlog, const char *name, int flags,
 		goto err;
 	}
 
-	if (xlog_init(xlog) != 0)
+	if (xlog_init(xlog, opts) != 0)
 		goto err;
 
 	xlog->meta = *meta;
@@ -867,7 +870,7 @@ err:
 }
 
 int
-xlog_open(struct xlog *xlog, const char *name)
+xlog_open(struct xlog *xlog, const char *name, const struct xlog_opts *opts)
 {
 	char magic[sizeof(log_magic_t)];
 	char meta_buf[XLOG_META_LEN_MAX];
@@ -875,7 +878,7 @@ xlog_open(struct xlog *xlog, const char *name)
 	int meta_len;
 	int rc;
 
-	if (xlog_init(xlog) != 0)
+	if (xlog_init(xlog, opts) != 0)
 		goto err;
 
 	strncpy(xlog->filename, name, PATH_MAX);
@@ -982,17 +985,10 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
 			 vclock, prev_vclock);
 
 	char *filename = xdir_format_filename(dir, signature, NONE);
-	if (xlog_create(xlog, filename, dir->open_wflags, &meta) != 0)
+	if (xlog_create(xlog, filename, dir->open_wflags, &meta,
+			&dir->opts) != 0)
 		return -1;
 
-	/* Inherit xdir settings. */
-	xlog->sync_is_async = dir->sync_is_async;
-	xlog->sync_interval = dir->sync_interval;
-
-	/* free file cache if dir should be synced */
-	xlog->free_cache = dir->sync_interval != 0 ? true: false;
-	xlog->rate_limit = 0;
-
 	/* Rename xlog file */
 	if (dir->suffix != INPROGRESS && xlog_rename(xlog)) {
 		int save_errno = errno;
@@ -1238,16 +1234,16 @@ xlog_tx_write(struct xlog *log)
 	log->offset += written;
 	log->rows += log->tx_rows;
 	log->tx_rows = 0;
-	if ((log->sync_interval && log->offset >=
-	    (off_t)(log->synced_size + log->sync_interval)) ||
-	    (log->rate_limit && log->offset >=
-	    (off_t)(log->synced_size + log->rate_limit))) {
+	if ((log->opts.sync_interval && log->offset >=
+	    (off_t)(log->synced_size + log->opts.sync_interval)) ||
+	    (log->opts.rate_limit && log->offset >=
+	    (off_t)(log->synced_size + log->opts.rate_limit))) {
 		off_t sync_from = SYNC_ROUND_DOWN(log->synced_size);
 		size_t sync_len = SYNC_ROUND_UP(log->offset) -
 				  sync_from;
-		if (log->rate_limit > 0) {
+		if (log->opts.rate_limit > 0) {
 			double throttle_time;
-			throttle_time = (double)sync_len / log->rate_limit -
+			throttle_time = (double)sync_len / log->opts.rate_limit -
 					(ev_monotonic_time() - log->sync_time);
 			if (throttle_time > 0)
 				ev_sleep(throttle_time);
@@ -1262,7 +1258,7 @@ xlog_tx_write(struct xlog *log)
 		fdatasync(log->fd);
 #endif /* HAVE_SYNC_FILE_RANGE */
 		log->sync_time = ev_monotonic_time();
-		if (log->free_cache) {
+		if (log->opts.free_cache) {
 #ifdef HAVE_POSIX_FADVISE
 			/** free page cache */
 			if (posix_fadvise(log->fd, sync_from, sync_len,
@@ -1411,7 +1407,7 @@ sync_cb(eio_req *req)
 int
 xlog_sync(struct xlog *l)
 {
-	if (l->sync_is_async) {
+	if (l->opts.sync_is_async) {
 		int fd = dup(l->fd);
 		if (fd == -1) {
 			say_syserror("%s: dup() failed", l->filename);
diff --git a/src/box/xlog.h b/src/box/xlog.h
index 18c513be..46826573 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -49,6 +49,35 @@ struct xrow_header;
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+/**
+ * This structure combines all xlog write options set on xlog
+ * creation.
+ */
+struct xlog_opts {
+	/** Write rate limit, in bytes per second. */
+	uint64_t rate_limit;
+	/** Sync interval, in bytes. */
+	uint64_t sync_interval;
+	/**
+	 * If this flag is set and sync interval is greater than 0,
+	 * page cache will be freed after each sync.
+	 *
+	 * This option is useful for memtx snapshots, which won't
+	 * be reread soon and hence shouldn't stay cached in memory.
+	 */
+	bool free_cache;
+	/**
+	 * If this flag is set, xlog file will be synced in a coio
+	 * thread on close.
+	 *
+	 * This option is useful for WAL files as it allows not to
+	 * block writers when an xlog is rotated.
+	 */
+	bool sync_is_async;
+};
+
+extern const struct xlog_opts xlog_opts_default;
+
 /* {{{ log dir */
 
 /**
@@ -81,6 +110,8 @@ enum log_suffix { NONE, INPROGRESS };
  * through all logs, create a new log.
  */
 struct xdir {
+	/** Xlog write options. */
+	struct xlog_opts opts;
 	/**
 	 * Allow partial recovery from a damaged/incorrect
 	 * data directory. Suppresses exceptions when scanning
@@ -89,14 +120,6 @@ struct xdir {
 	 * are skipped.
 	 */
 	bool force_recovery;
-
-	/**
-	 * true if a log file in this directory can by fsync()ed
-	 * at close in a separate thread (we use this technique to
-	 * speed up sync of write ahead logs, but not snapshots).
-	 */
-	bool sync_is_async;
-
 	/* Default filename suffix for a new file. */
 	enum log_suffix suffix;
 	/**
@@ -134,12 +157,6 @@ struct xdir {
 	char dirname[PATH_MAX+1];
 	/** Snapshots or xlogs */
 	enum xdir_type type;
-	/**
-	 * Sync interval in bytes.
-	 * xlog file will be synced every sync_interval bytes,
-	 * corresponding file cache will be marked as free
-	 */
-	uint64_t sync_interval;
 };
 
 /**
@@ -147,7 +164,7 @@ struct xdir {
  */
 void
 xdir_create(struct xdir *dir, const char *dirname, enum xdir_type type,
-	    const struct tt_uuid *instance_uuid);
+	    const struct tt_uuid *instance_uuid, const struct xlog_opts *opts);
 
 /**
  * Destroy a log dir object.
@@ -306,10 +323,10 @@ xlog_meta_create(struct xlog_meta *meta, const char *filetype,
  * A single log file - a snapshot, a vylog or a write ahead log.
  */
 struct xlog {
+	/** Xlog write options. */
+	struct xlog_opts opts;
 	/** xlog meta header */
 	struct xlog_meta meta;
-	/** do sync in async mode */
-	bool sync_is_async;
 	/** File handle. */
 	int fd;
 	/**
@@ -363,26 +380,9 @@ struct xlog {
 	 */
 	struct obuf zbuf;
 	/**
-	 * Sync interval in bytes.
-	 * xlog file will be synced every sync_interval bytes,
-	 * corresponding file cache will be marked as free
-	 */
-	uint64_t sync_interval;
-	/**
 	 * Synced file size
 	 */
 	uint64_t synced_size;
-	/**
-	 * If xlog file was synced corresponding cache will be freed if true.
-	 * This can be significant for memtx snapshots (that wouldn't
-	 * be read in normal cases) and vinyl data files (that can be read
-	 * after writing)
-	 */
-	bool free_cache;
-	/**
-	 * Write rate limit
-	 */
-	uint64_t rate_limit;
 	/** Time when xlog wast synced last time */
 	double sync_time;
 };
@@ -424,6 +424,7 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
  * @param name          the assiciated name
  * @param flags		flags to open the file or 0 for defaults
  * @param meta          xlog meta
+ * @param opts          write options
  *
  * @retval 0 for success
  * @retvl -1 if error
@@ -431,18 +432,19 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
 
 int
 xlog_create(struct xlog *xlog, const char *name, int flags,
-	    const struct xlog_meta *meta);
+	    const struct xlog_meta *meta, const struct xlog_opts *opts);
 
 /**
  * Open an existing xlog file for appending.
  * @param xlog          xlog descriptor
  * @param name          file name
+ * @param opts          write options
  *
  * @retval 0 success
  * @retval -1 error
  */
 int
-xlog_open(struct xlog *xlog, const char *name);
+xlog_open(struct xlog *xlog, const char *name, const struct xlog_opts *opts);
 
 
 /**
-- 
2.11.0




More information about the Tarantool-patches mailing list