Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH 1/2] xlog: cleanup setting of write options
@ 2019-04-10 14:22 Vladimir Davydov
  2019-04-10 14:22 ` [PATCH 2/2] vinyl: don't compress L1 runs Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Vladimir Davydov @ 2019-04-10 14:22 UTC (permalink / raw)
  To: kostja; +Cc: tarantool-patches

The way xlog write options (sync_interval and others) are set is a mess:
if an xlog is created with xlog_create(), we overwrite them explicitly;
if an xlog is created with xdir_create_xlog(), we inherit parameters
from the xdir, which sets them depending on the xdir type (SNAP, XLOG,
or VYLOG), but sometimes we overwrite them explicitly as well. The more
options we add, the worse it gets.

To clean it up, let's add an auxiliary structure combining all xlog
write options and pass it to xlog_create() and xdir_create() everywhere.
---
https://github.com/tarantool/tarantool/issues/2389
https://github.com/tarantool/tarantool/commits/dv/gh-2389-vy-disable-l1-compression

 src/box/memtx_engine.c | 19 ++++++++-----
 src/box/recovery.cc    |  3 +-
 src/box/vy_log.c       |  5 ++--
 src/box/vy_run.c       | 16 +++++++----
 src/box/wal.c          |  6 ++--
 src/box/xlog.c         | 56 ++++++++++++++++++--------------------
 src/box/xlog.h         | 74 ++++++++++++++++++++++++++------------------------
 7 files changed, 96 insertions(+), 83 deletions(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 924f8bbc..48f700a0 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -49,6 +49,9 @@
 #include "schema.h"
 #include "gc.h"
 
+/* sync snapshot every 16MB */
+#define SNAP_SYNC_INTERVAL	(1 << 24)
+
 /*
  * Memtx yield-in-transaction trigger: roll back the effects
  * of the transaction and mark the transaction as aborted.
@@ -557,7 +560,6 @@ struct checkpoint {
 	 * read view iterators.
 	 */
 	struct rlist entries;
-	uint64_t snap_io_rate_limit;
 	struct cord cord;
 	bool waiting_for_snap_thread;
 	/** The vclock of the snapshot file. */
@@ -581,8 +583,11 @@ checkpoint_new(const char *snap_dirname, uint64_t snap_io_rate_limit)
 	}
 	rlist_create(&ckpt->entries);
 	ckpt->waiting_for_snap_thread = false;
-	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID);
-	ckpt->snap_io_rate_limit = snap_io_rate_limit;
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = snap_io_rate_limit;
+	opts.sync_interval = SNAP_SYNC_INTERVAL;
+	opts.free_cache = true;
+	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID, &opts);
 	vclock_create(&ckpt->vclock);
 	ckpt->touch = false;
 	return ckpt;
@@ -647,8 +652,6 @@ checkpoint_f(va_list ap)
 	if (xdir_create_xlog(&ckpt->dir, &snap, &ckpt->vclock) != 0)
 		return -1;
 
-	snap.rate_limit = ckpt->snap_io_rate_limit;
-
 	say_info("saving snapshot `%s'", snap.filename);
 	struct checkpoint_entry *entry;
 	rlist_foreach_entry(entry, &ckpt->entries, link) {
@@ -843,7 +846,8 @@ memtx_initial_join_f(va_list ap)
 	 * snap_dirname and INSTANCE_UUID don't change after start,
 	 * safe to use in another thread.
 	 */
-	xdir_create(&dir, snap_dirname, SNAP, &INSTANCE_UUID);
+	xdir_create(&dir, snap_dirname, SNAP, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	struct xlog_cursor cursor;
 	int rc = xdir_open_cursor(&dir, checkpoint_lsn, &cursor);
 	xdir_destroy(&dir);
@@ -997,7 +1001,8 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		return NULL;
 	}
 
-	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID);
+	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	memtx->snap_dir.force_recovery = force_recovery;
 
 	if (xdir_scan(&memtx->snap_dir) != 0)
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index cf3a707a..d122d618 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -95,7 +95,8 @@ recovery_new(const char *wal_dirname, bool force_recovery,
 		free(r);
 	});
 
-	xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID);
+	xdir_create(&r->wal_dir, wal_dirname, XLOG, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	r->wal_dir.force_recovery = force_recovery;
 
 	vclock_copy(&r->vclock, vclock);
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 85b61a84..be97cdbb 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -734,7 +734,8 @@ err:
 void
 vy_log_init(const char *dir)
 {
-	xdir_create(&vy_log.dir, dir, VYLOG, &INSTANCE_UUID);
+	xdir_create(&vy_log.dir, dir, VYLOG, &INSTANCE_UUID,
+		    &xlog_opts_default);
 	latch_create(&vy_log.latch);
 	region_create(&vy_log.pool, cord_slab_cache());
 	stailq_create(&vy_log.tx);
@@ -830,7 +831,7 @@ vy_log_open(struct xlog *xlog)
 	 */
 	const char *path = vy_log_filename(vclock_sum(&vy_log.last_checkpoint));
 	if (access(path, F_OK) == 0)
-		return xlog_open(xlog, path);
+		return xlog_open(xlog, path, &vy_log.dir.opts);
 
 	if (errno != ENOENT) {
 		diag_set(SystemError, "failed to access file '%s'", path);
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 03db2f50..faa28f47 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -71,6 +71,9 @@ const char *vy_file_suffix[] = {
 	"run" inprogress_suffix, 	/* VY_FILE_RUN_INPROGRESS */
 };
 
+/* sync run and index files very 16 MB */
+#define VY_RUN_SYNC_INTERVAL (1 << 24)
+
 /**
  * We read runs in background threads so as not to stall tx.
  * This structure represents such a thread.
@@ -2032,11 +2035,12 @@ vy_run_write_index(struct vy_run *run, const char *dirpath,
 	struct xlog_meta meta;
 	xlog_meta_create(&meta, XLOG_META_TYPE_INDEX, &INSTANCE_UUID,
 			 NULL, NULL);
-	if (xlog_create(&index_xlog, path, 0, &meta) < 0)
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = run->env->snap_io_rate_limit;
+	opts.sync_interval = VY_RUN_SYNC_INTERVAL;
+	if (xlog_create(&index_xlog, path, 0, &meta, &opts) < 0)
 		return -1;
 
-	index_xlog.rate_limit = run->env->snap_io_rate_limit;
-
 	xlog_tx_begin(&index_xlog);
 	struct region *region = &fiber()->gc;
 	size_t mem_used = region_used(region);
@@ -2128,9 +2132,11 @@ vy_run_writer_create_xlog(struct vy_run_writer *writer)
 	struct xlog_meta meta;
 	xlog_meta_create(&meta, XLOG_META_TYPE_RUN, &INSTANCE_UUID,
 			 NULL, NULL);
-	if (xlog_create(&writer->data_xlog, path, 0, &meta) != 0)
+	struct xlog_opts opts = xlog_opts_default;
+	opts.rate_limit = writer->run->env->snap_io_rate_limit;
+	opts.sync_interval = VY_RUN_SYNC_INTERVAL;
+	if (xlog_create(&writer->data_xlog, path, 0, &meta, &opts) != 0)
 		return -1;
-	writer->data_xlog.rate_limit = writer->run->env->snap_io_rate_limit;
 	return 0;
 }
 
diff --git a/src/box/wal.c b/src/box/wal.c
index 3faad9c3..ad8ff7c6 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -353,7 +353,9 @@ wal_writer_create(struct wal_writer *writer, enum wal_mode wal_mode,
 	journal_create(&writer->base, wal_mode == WAL_NONE ?
 		       wal_write_in_wal_mode_none : wal_write, NULL);
 
-	xdir_create(&writer->wal_dir, wal_dirname, XLOG, instance_uuid);
+	struct xlog_opts opts = xlog_opts_default;
+	opts.sync_is_async = true;
+	xdir_create(&writer->wal_dir, wal_dirname, XLOG, instance_uuid, &opts);
 	xlog_clear(&writer->current_wal);
 	if (wal_mode == WAL_FSYNC)
 		writer->wal_dir.open_wflags |= O_SYNC;
@@ -392,7 +394,7 @@ wal_open_f(struct cbus_call_msg *msg)
 	const char *path = xdir_format_filename(&writer->wal_dir,
 				vclock_sum(&writer->vclock), NONE);
 	assert(!xlog_is_open(&writer->current_wal));
-	return xlog_open(&writer->current_wal, path);
+	return xlog_open(&writer->current_wal, path, &writer->wal_dir.opts);
 }
 
 /**
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 692b5f5e..1e423ecc 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -89,6 +89,13 @@ enum {
 	XLOG_TX_COMPRESS_THRESHOLD = 2 * 1024,
 };
 
+const struct xlog_opts xlog_opts_default = {
+	.rate_limit = 0,
+	.sync_interval = 0,
+	.free_cache = false,
+	.sync_is_async = false,
+};
+
 /* {{{ struct xlog_meta */
 
 enum {
@@ -317,14 +324,12 @@ xlog_meta_parse(struct xlog_meta *meta, const char **data,
 
 /* {{{ struct xdir */
 
-/* sync snapshot every 16MB */
-#define SNAP_SYNC_INTERVAL	(1 << 24)
-
 void
-xdir_create(struct xdir *dir, const char *dirname,
-	    enum xdir_type type, const struct tt_uuid *instance_uuid)
+xdir_create(struct xdir *dir, const char *dirname, enum xdir_type type,
+	    const struct tt_uuid *instance_uuid, const struct xlog_opts *opts)
 {
 	memset(dir, 0, sizeof(*dir));
+	dir->opts = *opts;
 	vclockset_new(&dir->index);
 	/* Default mode. */
 	dir->mode = 0660;
@@ -336,10 +341,8 @@ xdir_create(struct xdir *dir, const char *dirname,
 		dir->filetype = "SNAP";
 		dir->filename_ext = ".snap";
 		dir->suffix = INPROGRESS;
-		dir->sync_interval = SNAP_SYNC_INTERVAL;
 		break;
 	case XLOG:
-		dir->sync_is_async = true;
 		dir->filetype = "XLOG";
 		dir->filename_ext = ".xlog";
 		dir->suffix = NONE;
@@ -761,10 +764,10 @@ xlog_rename(struct xlog *l)
 }
 
 static int
-xlog_init(struct xlog *xlog)
+xlog_init(struct xlog *xlog, const struct xlog_opts *opts)
 {
 	memset(xlog, 0, sizeof(*xlog));
-	xlog->sync_interval = SNAP_SYNC_INTERVAL;
+	xlog->opts = *opts;
 	xlog->sync_time = ev_monotonic_time();
 	xlog->is_autocommit = true;
 	obuf_create(&xlog->obuf, &cord()->slabc, XLOG_TX_AUTOCOMMIT_THRESHOLD);
@@ -799,7 +802,7 @@ xlog_destroy(struct xlog *xlog)
 
 int
 xlog_create(struct xlog *xlog, const char *name, int flags,
-	    const struct xlog_meta *meta)
+	    const struct xlog_meta *meta, const struct xlog_opts *opts)
 {
 	char meta_buf[XLOG_META_LEN_MAX];
 	int meta_len;
@@ -814,7 +817,7 @@ xlog_create(struct xlog *xlog, const char *name, int flags,
 		goto err;
 	}
 
-	if (xlog_init(xlog) != 0)
+	if (xlog_init(xlog, opts) != 0)
 		goto err;
 
 	xlog->meta = *meta;
@@ -867,7 +870,7 @@ err:
 }
 
 int
-xlog_open(struct xlog *xlog, const char *name)
+xlog_open(struct xlog *xlog, const char *name, const struct xlog_opts *opts)
 {
 	char magic[sizeof(log_magic_t)];
 	char meta_buf[XLOG_META_LEN_MAX];
@@ -875,7 +878,7 @@ xlog_open(struct xlog *xlog, const char *name)
 	int meta_len;
 	int rc;
 
-	if (xlog_init(xlog) != 0)
+	if (xlog_init(xlog, opts) != 0)
 		goto err;
 
 	strncpy(xlog->filename, name, PATH_MAX);
@@ -982,17 +985,10 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
 			 vclock, prev_vclock);
 
 	char *filename = xdir_format_filename(dir, signature, NONE);
-	if (xlog_create(xlog, filename, dir->open_wflags, &meta) != 0)
+	if (xlog_create(xlog, filename, dir->open_wflags, &meta,
+			&dir->opts) != 0)
 		return -1;
 
-	/* Inherit xdir settings. */
-	xlog->sync_is_async = dir->sync_is_async;
-	xlog->sync_interval = dir->sync_interval;
-
-	/* free file cache if dir should be synced */
-	xlog->free_cache = dir->sync_interval != 0 ? true: false;
-	xlog->rate_limit = 0;
-
 	/* Rename xlog file */
 	if (dir->suffix != INPROGRESS && xlog_rename(xlog)) {
 		int save_errno = errno;
@@ -1238,16 +1234,16 @@ xlog_tx_write(struct xlog *log)
 	log->offset += written;
 	log->rows += log->tx_rows;
 	log->tx_rows = 0;
-	if ((log->sync_interval && log->offset >=
-	    (off_t)(log->synced_size + log->sync_interval)) ||
-	    (log->rate_limit && log->offset >=
-	    (off_t)(log->synced_size + log->rate_limit))) {
+	if ((log->opts.sync_interval && log->offset >=
+	    (off_t)(log->synced_size + log->opts.sync_interval)) ||
+	    (log->opts.rate_limit && log->offset >=
+	    (off_t)(log->synced_size + log->opts.rate_limit))) {
 		off_t sync_from = SYNC_ROUND_DOWN(log->synced_size);
 		size_t sync_len = SYNC_ROUND_UP(log->offset) -
 				  sync_from;
-		if (log->rate_limit > 0) {
+		if (log->opts.rate_limit > 0) {
 			double throttle_time;
-			throttle_time = (double)sync_len / log->rate_limit -
+			throttle_time = (double)sync_len / log->opts.rate_limit -
 					(ev_monotonic_time() - log->sync_time);
 			if (throttle_time > 0)
 				ev_sleep(throttle_time);
@@ -1262,7 +1258,7 @@ xlog_tx_write(struct xlog *log)
 		fdatasync(log->fd);
 #endif /* HAVE_SYNC_FILE_RANGE */
 		log->sync_time = ev_monotonic_time();
-		if (log->free_cache) {
+		if (log->opts.free_cache) {
 #ifdef HAVE_POSIX_FADVISE
 			/** free page cache */
 			if (posix_fadvise(log->fd, sync_from, sync_len,
@@ -1411,7 +1407,7 @@ sync_cb(eio_req *req)
 int
 xlog_sync(struct xlog *l)
 {
-	if (l->sync_is_async) {
+	if (l->opts.sync_is_async) {
 		int fd = dup(l->fd);
 		if (fd == -1) {
 			say_syserror("%s: dup() failed", l->filename);
diff --git a/src/box/xlog.h b/src/box/xlog.h
index 18c513be..46826573 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -49,6 +49,35 @@ struct xrow_header;
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+/**
+ * This structure combines all xlog write options set on xlog
+ * creation.
+ */
+struct xlog_opts {
+	/** Write rate limit, in bytes per second. */
+	uint64_t rate_limit;
+	/** Sync interval, in bytes. */
+	uint64_t sync_interval;
+	/**
+	 * If this flag is set and sync interval is greater than 0,
+	 * page cache will be freed after each sync.
+	 *
+	 * This option is useful for memtx snapshots, which won't
+	 * be reread soon and hence shouldn't stay cached in memory.
+	 */
+	bool free_cache;
+	/**
+	 * If this flag is set, xlog file will be synced in a coio
+	 * thread on close.
+	 *
+	 * This option is useful for WAL files as it allows not to
+	 * block writers when an xlog is rotated.
+	 */
+	bool sync_is_async;
+};
+
+extern const struct xlog_opts xlog_opts_default;
+
 /* {{{ log dir */
 
 /**
@@ -81,6 +110,8 @@ enum log_suffix { NONE, INPROGRESS };
  * through all logs, create a new log.
  */
 struct xdir {
+	/** Xlog write options. */
+	struct xlog_opts opts;
 	/**
 	 * Allow partial recovery from a damaged/incorrect
 	 * data directory. Suppresses exceptions when scanning
@@ -89,14 +120,6 @@ struct xdir {
 	 * are skipped.
 	 */
 	bool force_recovery;
-
-	/**
-	 * true if a log file in this directory can by fsync()ed
-	 * at close in a separate thread (we use this technique to
-	 * speed up sync of write ahead logs, but not snapshots).
-	 */
-	bool sync_is_async;
-
 	/* Default filename suffix for a new file. */
 	enum log_suffix suffix;
 	/**
@@ -134,12 +157,6 @@ struct xdir {
 	char dirname[PATH_MAX+1];
 	/** Snapshots or xlogs */
 	enum xdir_type type;
-	/**
-	 * Sync interval in bytes.
-	 * xlog file will be synced every sync_interval bytes,
-	 * corresponding file cache will be marked as free
-	 */
-	uint64_t sync_interval;
 };
 
 /**
@@ -147,7 +164,7 @@ struct xdir {
  */
 void
 xdir_create(struct xdir *dir, const char *dirname, enum xdir_type type,
-	    const struct tt_uuid *instance_uuid);
+	    const struct tt_uuid *instance_uuid, const struct xlog_opts *opts);
 
 /**
  * Destroy a log dir object.
@@ -306,10 +323,10 @@ xlog_meta_create(struct xlog_meta *meta, const char *filetype,
  * A single log file - a snapshot, a vylog or a write ahead log.
  */
 struct xlog {
+	/** Xlog write options. */
+	struct xlog_opts opts;
 	/** xlog meta header */
 	struct xlog_meta meta;
-	/** do sync in async mode */
-	bool sync_is_async;
 	/** File handle. */
 	int fd;
 	/**
@@ -363,26 +380,9 @@ struct xlog {
 	 */
 	struct obuf zbuf;
 	/**
-	 * Sync interval in bytes.
-	 * xlog file will be synced every sync_interval bytes,
-	 * corresponding file cache will be marked as free
-	 */
-	uint64_t sync_interval;
-	/**
 	 * Synced file size
 	 */
 	uint64_t synced_size;
-	/**
-	 * If xlog file was synced corresponding cache will be freed if true.
-	 * This can be significant for memtx snapshots (that wouldn't
-	 * be read in normal cases) and vinyl data files (that can be read
-	 * after writing)
-	 */
-	bool free_cache;
-	/**
-	 * Write rate limit
-	 */
-	uint64_t rate_limit;
 	/** Time when xlog wast synced last time */
 	double sync_time;
 };
@@ -424,6 +424,7 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
  * @param name          the assiciated name
  * @param flags		flags to open the file or 0 for defaults
  * @param meta          xlog meta
+ * @param opts          write options
  *
  * @retval 0 for success
  * @retvl -1 if error
@@ -431,18 +432,19 @@ xdir_create_xlog(struct xdir *dir, struct xlog *xlog,
 
 int
 xlog_create(struct xlog *xlog, const char *name, int flags,
-	    const struct xlog_meta *meta);
+	    const struct xlog_meta *meta, const struct xlog_opts *opts);
 
 /**
  * Open an existing xlog file for appending.
  * @param xlog          xlog descriptor
  * @param name          file name
+ * @param opts          write options
  *
  * @retval 0 success
  * @retval -1 error
  */
 int
-xlog_open(struct xlog *xlog, const char *name);
+xlog_open(struct xlog *xlog, const char *name, const struct xlog_opts *opts);
 
 
 /**
-- 
2.11.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/2] vinyl: don't compress L1 runs
  2019-04-10 14:22 [PATCH 1/2] xlog: cleanup setting of write options Vladimir Davydov
@ 2019-04-10 14:22 ` Vladimir Davydov
  2019-04-11  7:28   ` [tarantool-patches] " Konstantin Osipov
  0 siblings, 1 reply; 4+ messages in thread
From: Vladimir Davydov @ 2019-04-10 14:22 UTC (permalink / raw)
  To: kostja; +Cc: tarantool-patches

L1 runs are usually the most frequently read and smallest runs at the
same time so we gain nothing by compressing them.

Closes #2389
---
https://github.com/tarantool/tarantool/issues/2389
https://github.com/tarantool/tarantool/commits/dv/gh-2389-vy-disable-l1-compression

 src/box/vy_run.c            |  4 +++-
 src/box/vy_run.h            |  4 +++-
 src/box/vy_scheduler.c      | 14 +++++++++----
 src/box/xlog.c              | 16 +++++++++------
 src/box/xlog.h              |  8 ++++++++
 test/unit/vy_point_lookup.c |  2 +-
 test/vinyl/misc.result      | 50 +++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/misc.test.lua    | 18 ++++++++++++++++
 8 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index faa28f47..ddb375bf 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2089,7 +2089,7 @@ int
 vy_run_writer_create(struct vy_run_writer *writer, struct vy_run *run,
 		     const char *dirpath, uint32_t space_id, uint32_t iid,
 		     struct key_def *cmp_def, struct key_def *key_def,
-		     uint64_t page_size, double bloom_fpr)
+		     uint64_t page_size, double bloom_fpr, bool no_compression)
 {
 	memset(writer, 0, sizeof(*writer));
 	writer->run = run;
@@ -2100,6 +2100,7 @@ vy_run_writer_create(struct vy_run_writer *writer, struct vy_run *run,
 	writer->key_def = key_def;
 	writer->page_size = page_size;
 	writer->bloom_fpr = bloom_fpr;
+	writer->no_compression = no_compression;
 	if (bloom_fpr < 1) {
 		writer->bloom = tuple_bloom_builder_new(key_def->part_count);
 		if (writer->bloom == NULL)
@@ -2135,6 +2136,7 @@ vy_run_writer_create_xlog(struct vy_run_writer *writer)
 	struct xlog_opts opts = xlog_opts_default;
 	opts.rate_limit = writer->run->env->snap_io_rate_limit;
 	opts.sync_interval = VY_RUN_SYNC_INTERVAL;
+	opts.no_compression = writer->no_compression;
 	if (xlog_create(&writer->data_xlog, path, 0, &meta, &opts) != 0)
 		return -1;
 	return 0;
diff --git a/src/box/vy_run.h b/src/box/vy_run.h
index aedae959..9618d856 100644
--- a/src/box/vy_run.h
+++ b/src/box/vy_run.h
@@ -612,6 +612,8 @@ struct vy_run_writer {
 	 * Current page info capacity. Can grow with page number.
 	 */
 	uint32_t page_info_capacity;
+	/** Don't use compression while writing xlog files. */
+	bool no_compression;
 	/** Xlog to write data. */
 	struct xlog data_xlog;
 	/** Bloom filter false positive rate. */
@@ -632,7 +634,7 @@ int
 vy_run_writer_create(struct vy_run_writer *writer, struct vy_run *run,
 		     const char *dirpath, uint32_t space_id, uint32_t iid,
 		     struct key_def *cmp_def, struct key_def *key_def,
-		     uint64_t page_size, double bloom_fpr);
+		     uint64_t page_size, double bloom_fpr, bool no_compression);
 
 /**
  * Write a specified statement into a run.
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 8f6279dc..d56a505a 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1026,7 +1026,7 @@ vy_task_deferred_delete_iface = {
 };
 
 static int
-vy_task_write_run(struct vy_task *task)
+vy_task_write_run(struct vy_task *task, bool no_compression)
 {
 	enum { YIELD_LOOPS = 32 };
 
@@ -1047,7 +1047,8 @@ vy_task_write_run(struct vy_task *task)
 	if (vy_run_writer_create(&writer, task->new_run, lsm->env->path,
 				 lsm->space_id, lsm->index_id,
 				 task->cmp_def, task->key_def,
-				 task->page_size, task->bloom_fpr) != 0)
+				 task->page_size, task->bloom_fpr,
+				 no_compression) != 0)
 		goto fail;
 
 	if (wi->iface->start(wi) != 0)
@@ -1090,7 +1091,12 @@ fail:
 static int
 vy_task_dump_execute(struct vy_task *task)
 {
-	return vy_task_write_run(task);
+	/*
+	 * Don't compress L1 runs as they are most frequently read
+	 * and smallest runs at the same time and so we would gain
+	 * nothing by compressing them.
+	 */
+	return vy_task_write_run(task, true);
 }
 
 static int
@@ -1431,7 +1437,7 @@ vy_task_compaction_execute(struct vy_task *task)
 		while (errinj->bparam)
 			fiber_sleep(0.01);
 	}
-	return vy_task_write_run(task);
+	return vy_task_write_run(task, false);
 }
 
 static int
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 1e423ecc..b70e8e25 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -94,6 +94,7 @@ const struct xlog_opts xlog_opts_default = {
 	.sync_interval = 0,
 	.free_cache = false,
 	.sync_is_async = false,
+	.no_compression = false,
 };
 
 /* {{{ struct xlog_meta */
@@ -772,11 +773,13 @@ xlog_init(struct xlog *xlog, const struct xlog_opts *opts)
 	xlog->is_autocommit = true;
 	obuf_create(&xlog->obuf, &cord()->slabc, XLOG_TX_AUTOCOMMIT_THRESHOLD);
 	obuf_create(&xlog->zbuf, &cord()->slabc, XLOG_TX_AUTOCOMMIT_THRESHOLD);
-	xlog->zctx = ZSTD_createCCtx();
-	if (xlog->zctx == NULL) {
-		diag_set(ClientError, ER_COMPRESSION,
-			 "failed to create context");
-		return -1;
+	if (!opts->no_compression) {
+		xlog->zctx = ZSTD_createCCtx();
+		if (xlog->zctx == NULL) {
+			diag_set(ClientError, ER_COMPRESSION,
+				 "failed to create context");
+			return -1;
+		}
 	}
 	return 0;
 }
@@ -1204,7 +1207,8 @@ xlog_tx_write(struct xlog *log)
 		return 0;
 	ssize_t written;
 
-	if (obuf_size(&log->obuf) >= XLOG_TX_COMPRESS_THRESHOLD) {
+	if (!log->opts.no_compression &&
+	    obuf_size(&log->obuf) >= XLOG_TX_COMPRESS_THRESHOLD) {
 		written = xlog_tx_write_zstd(log);
 	} else {
 		written = xlog_tx_write_plain(log);
diff --git a/src/box/xlog.h b/src/box/xlog.h
index 46826573..6539d1b0 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -74,6 +74,14 @@ struct xlog_opts {
 	 * block writers when an xlog is rotated.
 	 */
 	bool sync_is_async;
+	/**
+	 * If this flag is set, the xlog writer won't use zstd
+	 * compression.
+	 *
+	 * This option is useful for xlog files that are intended
+	 * to be read frequently, e.g. L1 run files in Vinyl.
+	 */
+	bool no_compression;
 };
 
 extern const struct xlog_opts xlog_opts_default;
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index 7471693c..9961a0f7 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -24,7 +24,7 @@ write_run(struct vy_run *run, const char *dir_name,
 	if (vy_run_writer_create(&writer, run, dir_name,
 				 lsm->space_id, lsm->index_id,
 				 lsm->cmp_def, lsm->key_def,
-				 4096, 0.1) != 0)
+				 4096, 0.1, false) != 0)
 		goto fail;
 
 	if (wi->iface->start(wi) != 0)
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index 4f613cb0..139edb55 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -382,3 +382,53 @@ s:select()
 s:drop()
 ---
 ...
+--
+-- gh-2389: L1 runs are not compressed.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+i = s:create_index('pk', {page_size = 100 * 1000, range_size = 1000 * 1000})
+---
+...
+pad = string.rep('x', 1000)
+---
+...
+for k = 1, 10 do s:replace{k, pad} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+stat = i:stat().disk
+---
+...
+stat.bytes_compressed >= stat.bytes
+---
+- true
+...
+for k = 1, 10 do s:replace{k, pad} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+i:compact()
+---
+...
+test_run:wait_cond(function() return i:stat().disk.compaction.count > 0 end)
+---
+- true
+...
+stat = i:stat().disk
+---
+...
+stat.bytes_compressed < stat.bytes / 10
+---
+- true
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index bffaaf16..f8da578d 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -164,3 +164,21 @@ ch2:get()
 box.cfg{read_only = false}
 s:select()
 s:drop()
+
+--
+-- gh-2389: L1 runs are not compressed.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+i = s:create_index('pk', {page_size = 100 * 1000, range_size = 1000 * 1000})
+pad = string.rep('x', 1000)
+for k = 1, 10 do s:replace{k, pad} end
+box.snapshot()
+stat = i:stat().disk
+stat.bytes_compressed >= stat.bytes
+for k = 1, 10 do s:replace{k, pad} end
+box.snapshot()
+i:compact()
+test_run:wait_cond(function() return i:stat().disk.compaction.count > 0 end)
+stat = i:stat().disk
+stat.bytes_compressed < stat.bytes / 10
+s:drop()
-- 
2.11.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tarantool-patches] Re: [PATCH 2/2] vinyl: don't compress L1 runs
  2019-04-10 14:22 ` [PATCH 2/2] vinyl: don't compress L1 runs Vladimir Davydov
@ 2019-04-11  7:28   ` Konstantin Osipov
  2019-04-11 10:24     ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Konstantin Osipov @ 2019-04-11  7:28 UTC (permalink / raw)
  To: tarantool-patches

* Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/10 18:12]:
> L1 runs are usually the most frequently read and smallest runs at the
> same time so we gain nothing by compressing them.

OK to push.
> 
> Closes #2389

I might want to open another ticket which would ensure all levels
except the last two are left uncompressed.


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [tarantool-patches] Re: [PATCH 2/2] vinyl: don't compress L1 runs
  2019-04-11  7:28   ` [tarantool-patches] " Konstantin Osipov
@ 2019-04-11 10:24     ` Vladimir Davydov
  0 siblings, 0 replies; 4+ messages in thread
From: Vladimir Davydov @ 2019-04-11 10:24 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Thu, Apr 11, 2019 at 10:28:44AM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [19/04/10 18:12]:
> > L1 runs are usually the most frequently read and smallest runs at the
> > same time so we gain nothing by compressing them.
> 
> OK to push.

Pushed to master, 2.1, and 1.10.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-04-11 10:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-10 14:22 [PATCH 1/2] xlog: cleanup setting of write options Vladimir Davydov
2019-04-10 14:22 ` [PATCH 2/2] vinyl: don't compress L1 runs Vladimir Davydov
2019-04-11  7:28   ` [tarantool-patches] " Konstantin Osipov
2019-04-11 10:24     ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox