[tarantool-patches] [PATCH v3 0/2] replication: force gc to clean xdir on ENOSPC err

Tarantool development patches archive
 help / color / mirror / Atom feed

* [tarantool-patches] [PATCH v3 0/2] replication: force gc to clean xdir on ENOSPC err
@ 2018-06-28 15:07 Konstantin Belyavskiy
  2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 1/2] replication: rename thread from tx to tx_prio Konstantin Belyavskiy
  2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
  0 siblings, 2 replies; 6+ messages in thread
From: Konstantin Belyavskiy @ 2018-06-28 15:07 UTC (permalink / raw)
  To: tarantool-patches

ticket: https://github.com/tarantool/tarantool/issues/3397
branch: https://github.com/tarantool/tarantool/tree/kbelyavs/gh-3397-force-del-logs-on-no-disk-space


This is third version of a patch, with a separate patch for rename:
  replication: rename thread from tx to tx_prio
  replication: force gc to clean xdir on ENOSPC err

 src/box/box.cc                                    |   1 +
 src/box/gc.c                                      |  51 +++++++++++
 src/box/gc.h                                      |  16 ++++
 src/box/relay.cc                                  |   1 +
 src/box/wal.cc                                    |  40 ++++++--
 src/errinj.h                                      |   1 +
 src/fio.c                                         |   7 ++
 test/box/errinj.result                            |   2 +
 test/replication/force_gc_on_err_nospace.result   | 107 ++++++++++++++++++++++
 test/replication/force_gc_on_err_nospace.test.lua |  38 ++++++++
 test/replication/suite.ini                        |   2 +-
 11 files changed, 257 insertions(+), 9 deletions(-)
 create mode 100644 test/replication/force_gc_on_err_nospace.result
 create mode 100644 test/replication/force_gc_on_err_nospace.test.lua

--
2.14.3 (Apple Git-98)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tarantool-patches] [PATCH v3 1/2] replication: rename thread from tx to tx_prio
  2018-06-28 15:07 [tarantool-patches] [PATCH v3 0/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
@ 2018-06-28 15:07 ` Konstantin Belyavskiy
  2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
  1 sibling, 0 replies; 6+ messages in thread
From: Konstantin Belyavskiy @ 2018-06-28 15:07 UTC (permalink / raw)
  To: tarantool-patches

There are two different threads: 'tx' and 'tx_prio', the latter
does not support yield(). Rename to avoid misunderstanding.

Needed for #3397
---
 src/box/wal.cc | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/box/wal.cc b/src/box/wal.cc
index 099c70caa..93c350e1f 100644
--- a/src/box/wal.cc
+++ b/src/box/wal.cc
@@ -59,8 +59,11 @@ struct wal_thread {
 	struct cord cord;
 	/** A pipe from 'tx' thread to 'wal' */
 	struct cpipe wal_pipe;
-	/** Return pipe from 'wal' to tx' */
-	struct cpipe tx_pipe;
+	/**
+	 * Return pipe from 'wal' to tx'. This is a
+	 * priority pipe and DOES NOT support yield.
+	 */
+	struct cpipe tx_prio_pipe;
 };
 
 /*
@@ -154,7 +157,7 @@ static void
 tx_schedule_commit(struct cmsg *msg);
 
 static struct cmsg_hop wal_request_route[] = {
-	{wal_write_to_disk, &wal_thread.tx_pipe},
+	{wal_write_to_disk, &wal_thread.tx_prio_pipe},
 	{tx_schedule_commit, NULL},
 };
 
@@ -414,7 +417,7 @@ wal_checkpoint(struct vclock *vclock, bool rotate)
 		return 0;
 	}
 	static struct cmsg_hop wal_checkpoint_route[] = {
-		{wal_checkpoint_f, &wal_thread.tx_pipe},
+		{wal_checkpoint_f, &wal_thread.tx_prio_pipe},
 		{wal_checkpoint_done_f, NULL},
 	};
 	vclock_create(vclock);
@@ -453,7 +456,7 @@ wal_collect_garbage(int64_t lsn)
 	struct wal_gc_msg msg;
 	msg.lsn = lsn;
 	bool cancellable = fiber_set_cancellable(false);
-	cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_pipe, &msg,
+	cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, &msg,
 		  wal_collect_garbage_f, NULL, TIMEOUT_INFINITY);
 	fiber_set_cancellable(cancellable);
 }
@@ -544,7 +547,7 @@ wal_writer_begin_rollback(struct wal_writer *writer)
 		 * list.
 		 */
 		{ wal_writer_clear_bus, &wal_thread.wal_pipe },
-		{ wal_writer_clear_bus, &wal_thread.tx_pipe },
+		{ wal_writer_clear_bus, &wal_thread.tx_prio_pipe },
 		/*
 		 * Step 2: writer->rollback queue contains all
 		 * messages which need to be rolled back,
@@ -562,7 +565,7 @@ wal_writer_begin_rollback(struct wal_writer *writer)
 	 * all input until rollback mode is off.
 	 */
 	cmsg_init(&writer->in_rollback, rollback_route);
-	cpipe_push(&wal_thread.tx_pipe, &writer->in_rollback);
+	cpipe_push(&wal_thread.tx_prio_pipe, &writer->in_rollback);
 }
 
 static void
@@ -691,7 +694,7 @@ wal_thread_f(va_list ap)
 	 * endpoint, to ensure that WAL messages are delivered
 	 * even when tx fiber pool is used up by net messages.
 	 */
-	cpipe_create(&wal_thread.tx_pipe, "tx_prio");
+	cpipe_create(&wal_thread.tx_prio_pipe, "tx_prio");
 
 	cbus_loop(&endpoint);
 
@@ -703,7 +706,7 @@ wal_thread_f(va_list ap)
 	if (xlog_is_open(&vy_log_writer.xlog))
 		xlog_close(&vy_log_writer.xlog, false);
 
-	cpipe_destroy(&wal_thread.tx_pipe);
+	cpipe_destroy(&wal_thread.tx_prio_pipe);
 	return 0;
 }
 
@@ -843,7 +846,7 @@ wal_write_vy_log(struct journal_entry *entry)
 	struct wal_write_vy_log_msg msg;
 	msg.entry= entry;
 	bool cancellable = fiber_set_cancellable(false);
-	int rc = cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_pipe, &msg,
+	int rc = cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, &msg,
 			   wal_write_vy_log_f, NULL, TIMEOUT_INFINITY);
 	fiber_set_cancellable(cancellable);
 	return rc;
@@ -863,7 +866,7 @@ wal_rotate_vy_log()
 {
 	struct cbus_call_msg msg;
 	bool cancellable = fiber_set_cancellable(false);
-	cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_pipe, &msg,
+	cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_prio_pipe, &msg,
 		  wal_rotate_vy_log_f, NULL, TIMEOUT_INFINITY);
 	fiber_set_cancellable(cancellable);
 }
-- 
2.14.3 (Apple Git-98)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tarantool-patches] [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err
  2018-06-28 15:07 [tarantool-patches] [PATCH v3 0/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
  2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 1/2] replication: rename thread from tx to tx_prio Konstantin Belyavskiy
@ 2018-06-28 15:07 ` Konstantin Belyavskiy
  2018-06-28 15:25   ` [tarantool-patches] " Konstantin Osipov
  1 sibling, 1 reply; 6+ messages in thread
From: Konstantin Belyavskiy @ 2018-06-28 15:07 UTC (permalink / raw)
  To: tarantool-patches

Garbage collector do not delete xlog unless replica do not notify
master with newer vclock. This can lead to running out of disk
space error and this is not right behaviour since it will stop the
master.
Fix it by forcing gc to clean xlogs for replica with highest lag.
Add an error injection and a test.

Changes in V2:
- Promoting error from 'wal' thread to 'tx' thread via 'cpipe'.
Changes in V3:
- Exclude 'tx' to 'tx_prio' rename with a separate patch.
- Delete consumers and only for replicas (but not backup process).
- Use timestamp to prevent gc from running more then one time in
  case of multiple failures.
- other small fixes according to code review.

Closes #3397
---
 src/box/box.cc                                    |   1 +
 src/box/gc.c                                      |  51 +++++++++++
 src/box/gc.h                                      |  16 ++++
 src/box/relay.cc                                  |   1 +
 src/box/wal.cc                                    |  21 +++++
 src/errinj.h                                      |   1 +
 src/fio.c                                         |   7 ++
 test/box/errinj.result                            |   2 +
 test/replication/force_gc_on_err_nospace.result   | 107 ++++++++++++++++++++++
 test/replication/force_gc_on_err_nospace.test.lua |  38 ++++++++
 test/replication/suite.ini                        |   2 +-
 11 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/force_gc_on_err_nospace.result
 create mode 100644 test/replication/force_gc_on_err_nospace.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index e3eb2738f..ba894c33a 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1370,6 +1370,7 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 	replica = replica_by_uuid(&instance_uuid);
 	assert(replica != NULL);
 	replica->gc = gc;
+	gc_consumer_set_replica(gc, replica);
 	gc_guard.is_active = false;

 	/* Remember master's vclock after the last request */
diff --git a/src/box/gc.c b/src/box/gc.c
index 12e68f3dc..904496366 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -35,6 +35,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>

 #define RB_COMPACT 1
 #include <small/rb.h>
@@ -61,6 +62,8 @@ struct gc_consumer {
 	char *name;
 	/** The vclock signature tracked by this consumer. */
 	int64_t signature;
+	/** Replica associated with consumer (if any). */
+	struct replica *replica;
 };

 typedef rb_tree(struct gc_consumer) gc_tree_t;
@@ -123,10 +126,18 @@ gc_consumer_new(const char *name, int64_t signature)
 	return consumer;
 }

+void
+gc_consumer_set_replica(struct gc_consumer *gc, struct replica *replica)
+{
+	gc->replica = replica;
+}
+
 /** Free a consumer object. */
 static void
 gc_consumer_delete(struct gc_consumer *consumer)
 {
+	if (consumer->replica != NULL)
+		consumer->replica->gc = NULL;
 	free(consumer->name);
 	TRASH(consumer);
 	free(consumer);
@@ -216,6 +227,46 @@ gc_set_checkpoint_count(int checkpoint_count)
 	gc.checkpoint_count = checkpoint_count;
 }

+void
+gc_xdir_clean_notify()
+{
+	/*
+	 * Compare the current time with the time of the last run.
+	 * This is needed in case of multiple failures to prevent
+	 * from deleting all replicas.
+	 */
+	static int prev_time = 0;
+	int cur_time = time(NULL);
+	if (cur_time - prev_time < 1)
+		return;
+	prev_time = cur_time;
+	/**
+	 * Mark consumer with least recent vclock as "dead" and
+	 * invoke garbage collection. If nothing to delete find
+	 * next alive consumer etc. Originally created for
+	 * cases with running out of disk space because of
+	 * disconnected replica.
+	 */
+	struct gc_consumer *leftmost =
+	    gc_tree_first(&gc.consumers);
+	/*
+	 * Exit if no consumers left or if this consumer is
+	 * not associated with replica (backup for example).
+	 */
+	if (leftmost == NULL || leftmost->replica == NULL)
+		return;
+	int64_t signature = leftmost->signature;
+	while (true) {
+		gc_consumer_unregister(leftmost);
+		leftmost = gc_tree_first(&gc.consumers);
+		if (leftmost == NULL || leftmost->replica == NULL ||
+		    leftmost->signature > signature) {
+			gc_run();
+			return;
+		}
+	}
+}
+
 struct gc_consumer *
 gc_consumer_register(const char *name, int64_t signature)
 {
diff --git a/src/box/gc.h b/src/box/gc.h
index 634ce6d38..15d966f54 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -31,9 +31,12 @@
  * SUCH DAMAGE.
  */

+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>

+#include "replication.h"
+
 #if defined(__cplusplus)
 extern "C" {
 #endif /* defined(__cplusplus) */
@@ -81,6 +84,12 @@ gc_set_checkpoint_count(int checkpoint_count);
 struct gc_consumer *
 gc_consumer_register(const char *name, int64_t signature);

+/**
+ * Bind consumer with associated replica (if any).
+ */
+void
+gc_consumer_set_replica(struct gc_consumer *gc, struct replica *replica);
+
 /**
  * Unregister a consumer and invoke garbage collection
  * if needed.
@@ -88,6 +97,13 @@ gc_consumer_register(const char *name, int64_t signature);
 void
 gc_consumer_unregister(struct gc_consumer *consumer);

+/**
+ * Notify gc to clean xdir because of running out
+ * of disk space.
+ */
+void
+gc_xdir_clean_notify();
+
 /**
  * Advance the vclock signature tracked by a consumer and
  * invoke garbage collection if needed.
diff --git a/src/box/relay.cc b/src/box/relay.cc
index d2ceaf110..c317775a4 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -535,6 +535,7 @@ relay_subscribe(int fd, uint64_t sync, struct replica *replica,
 			vclock_sum(replica_clock));
 		if (replica->gc == NULL)
 			diag_raise();
+		gc_consumer_set_replica(replica->gc, replica);
 	}

 	struct relay relay;
diff --git a/src/box/wal.cc b/src/box/wal.cc
index 93c350e1f..8dc973de8 100644
--- a/src/box/wal.cc
+++ b/src/box/wal.cc
@@ -41,6 +41,7 @@
 #include "cbus.h"
 #include "coio_task.h"
 #include "replication.h"
+#include "gc.h"


 const char *wal_mode_STRS[] = { "none", "write", "fsync", NULL };
@@ -64,6 +65,8 @@ struct wal_thread {
 	 * priority pipe and DOES NOT support yield.
 	 */
 	struct cpipe tx_prio_pipe;
+	/** Return pipe from 'wal' to tx' */
+	struct cpipe tx_pipe;
 };

 /*
@@ -584,6 +587,13 @@ wal_assign_lsn(struct wal_writer *writer, struct xrow_header **row,
 	}
 }

+static void
+gc_status_update(struct cmsg *msg)
+{
+	gc_xdir_clean_notify();
+	delete(msg);
+}
+
 static void
 wal_write_to_disk(struct cmsg *msg)
 {
@@ -655,6 +665,15 @@ done:
 		/* Until we can pass the error to tx, log it and clear. */
 		error_log(error);
 		diag_clear(diag_get());
+		if (errno == ENOSPC) {
+			struct cmsg *msg =
+			    (struct cmsg*)calloc(1, sizeof(struct cmsg));
+			static const struct cmsg_hop route[] = {
+				{gc_status_update, NULL}
+			};
+			cmsg_init(msg, route);
+			cpipe_push(&wal_thread.tx_pipe, msg);
+		}
 	}
 	/*
 	 * We need to start rollback from the first request
@@ -695,6 +714,7 @@ wal_thread_f(va_list ap)
 	 * even when tx fiber pool is used up by net messages.
 	 */
 	cpipe_create(&wal_thread.tx_prio_pipe, "tx_prio");
+	cpipe_create(&wal_thread.tx_pipe, "tx");

 	cbus_loop(&endpoint);

@@ -707,6 +727,7 @@ wal_thread_f(va_list ap)
 		xlog_close(&vy_log_writer.xlog, false);

 	cpipe_destroy(&wal_thread.tx_prio_pipe);
+	cpipe_destroy(&wal_thread.tx_pipe);
 	return 0;
 }

diff --git a/src/errinj.h b/src/errinj.h
index 895d938d5..11f1b7fdc 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -112,6 +112,7 @@ struct errinj {
 	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
 	_(ERRINJ_SNAP_WRITE_ROW_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
+	_(ERRINJ_NO_DISK_SPACE, ERRINJ_BOOL, {.bparam = false}) \

 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/fio.c b/src/fio.c
index b79d3d058..cdea11e87 100644
--- a/src/fio.c
+++ b/src/fio.c
@@ -29,6 +29,7 @@
  * SUCH DAMAGE.
  */
 #include "fio.h"
+#include "errinj.h"

 #include <sys/types.h>

@@ -141,6 +142,12 @@ fio_writev(int fd, struct iovec *iov, int iovcnt)
 	ssize_t nwr;
 restart:
 	nwr = writev(fd, iov, iovcnt);
+	/* Simulate running out of disk space to force the gc to clean logs. */
+	struct errinj *inj = errinj(ERRINJ_NO_DISK_SPACE, ERRINJ_BOOL);
+	if (inj != NULL && inj->bparam) {
+		errno = ENOSPC;
+		nwr = -1;
+	}
 	if (nwr < 0) {
 		if (errno == EINTR) {
 			errno = 0;
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 21a949965..a28688436 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -56,6 +56,8 @@ errinj.info()
     state: false
   ERRINJ_VY_RUN_WRITE:
     state: false
+  ERRINJ_NO_DISK_SPACE:
+    state: false
   ERRINJ_VY_LOG_FLUSH_DELAY:
     state: false
   ERRINJ_SNAP_COMMIT_DELAY:
diff --git a/test/replication/force_gc_on_err_nospace.result b/test/replication/force_gc_on_err_nospace.result
new file mode 100644
index 000000000..904a36ddc
--- /dev/null
+++ b/test/replication/force_gc_on_err_nospace.result
@@ -0,0 +1,107 @@
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+---
+...
+s = box.schema.space.create('test', {engine = engine});
+---
+...
+index = s:create_index('primary', {type = 'tree'})
+---
+...
+errinj = box.error.injection
+---
+...
+fio = require('fio')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+repl = box.cfg.replication
+---
+...
+box.cfg{replication = ""}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+for i = 1, 5 do s:insert{i} box.snapshot() end
+---
+...
+s:select()
+---
+- - [1]
+  - [2]
+  - [3]
+  - [4]
+  - [5]
+...
+#fio.glob(fio.pathjoin(fio.abspath("."), 'master/*.xlog'))
+---
+- 6
+...
+errinj.set("ERRINJ_NO_DISK_SPACE", true)
+---
+- ok
+...
+function insert(a) s:insert(a) end
+---
+...
+_, err = pcall(insert, {6})
+---
+...
+err:match("ailed to write")
+---
+- ailed to write
+...
+-- add a little timeout so gc could finish job
+require('fiber').sleep(0.01)
+---
+...
+#fio.glob(fio.pathjoin(fio.abspath("."), 'master/*.xlog'))
+---
+- 2
+...
+-- cleanup
+test_run:cmd("switch replica")
+---
+- true
+...
+test_run:cmd("restart server default with cleanup=1")
+---
+- true
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
diff --git a/test/replication/force_gc_on_err_nospace.test.lua b/test/replication/force_gc_on_err_nospace.test.lua
new file mode 100644
index 000000000..fea4aa73b
--- /dev/null
+++ b/test/replication/force_gc_on_err_nospace.test.lua
@@ -0,0 +1,38 @@
+env = require('test_run')
+test_run = env.new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+
+s = box.schema.space.create('test', {engine = engine});
+index = s:create_index('primary', {type = 'tree'})
+
+errinj = box.error.injection
+fio = require('fio')
+
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+repl = box.cfg.replication
+box.cfg{replication = ""}
+test_run:cmd("switch default")
+
+for i = 1, 5 do s:insert{i} box.snapshot() end
+s:select()
+#fio.glob(fio.pathjoin(fio.abspath("."), 'master/*.xlog'))
+errinj.set("ERRINJ_NO_DISK_SPACE", true)
+function insert(a) s:insert(a) end
+_, err = pcall(insert, {6})
+err:match("ailed to write")
+-- add a little timeout so gc could finish job
+require('fiber').sleep(0.01)
+#fio.glob(fio.pathjoin(fio.abspath("."), 'master/*.xlog'))
+
+-- cleanup
+test_run:cmd("switch replica")
+test_run:cmd("restart server default with cleanup=1")
+test_run:cmd("switch default")
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index b489add58..1706cf697 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua
+release_disabled = catch.test.lua errinj.test.lua force_gc_on_err_nospace.test.lua gc.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua
 long_run = prune.test.lua
--
2.14.3 (Apple Git-98)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tarantool-patches] Re: [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err
  2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
@ 2018-06-28 15:25   ` Konstantin Osipov
  2018-06-28 21:55     ` [tarantool-patches] " Konstantin Belyavskiy
  0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2018-06-28 15:25 UTC (permalink / raw)
  To: tarantool-patches

* Konstantin Belyavskiy <k.belyavskiy@tarantool.org> [18/06/28 18:13]:
> 
> +void
> +gc_xdir_clean_notify()
> +{
> +	/*
> +	 * Compare the current time with the time of the last run.
> +	 * This is needed in case of multiple failures to prevent
> +	 * from deleting all replicas.
> +	 */
> +	static int prev_time = 0;
> +	int cur_time = time(NULL);
> +	if (cur_time - prev_time < 1)

Please use fiber time.

> +	/**
> +	 * Mark consumer with least recent vclock as "dead" and
> +	 * invoke garbage collection. If nothing to delete find
> +	 * next alive consumer etc. Originally created for
> +	 * cases with running out of disk space because of
> +	 * disconnected replica.
> +	 */
> +	struct gc_consumer *leftmost =
> +	    gc_tree_first(&gc.consumers);
> +	/*
> +	 * Exit if no consumers left or if this consumer is
> +	 * not associated with replica (backup for example).
> +	 */

You should delete the consumer *only* if it's going to help with
garbage. Running out of space should not lead to deletion of all
replicas as consumers if they are not lagging behind.

> +	if (leftmost == NULL || leftmost->replica == NULL)
> +		return;
> +	int64_t signature = leftmost->signature;
> +	while (true) {
> +		gc_consumer_unregister(leftmost);
> +		leftmost = gc_tree_first(&gc.consumers);
> +		if (leftmost == NULL || leftmost->replica == NULL ||
> +		    leftmost->signature > signature) {
> +			gc_run();
> +			return;
> +		}
> +	}
> +}
> +

<cut>

>  const char *wal_mode_STRS[] = { "none", "write", "fsync", NULL };
> @@ -64,6 +65,8 @@ struct wal_thread {
>  	 * priority pipe and DOES NOT support yield.
>  	 */
>  	struct cpipe tx_prio_pipe;
> +	/** Return pipe from 'wal' to tx' */
> +	struct cpipe tx_pipe;

Why do you need a new pipe?

> 
> +static void
> +gc_status_update(struct cmsg *msg)
> +{
> +	gc_xdir_clean_notify();
> +	delete(msg);

delete()?!


>  {
> @@ -655,6 +665,15 @@ done:
>  		/* Until we can pass the error to tx, log it and clear. */
>  		error_log(error);
>  		diag_clear(diag_get());
> +		if (errno == ENOSPC) {
> +			struct cmsg *msg =
> +			    (struct cmsg*)calloc(1, sizeof(struct cmsg));

Please check calloc() return value and do nothing 
in case it's NULL.

> +			static const struct cmsg_hop route[] = {
> +				{gc_status_update, NULL}
> +			};
> +			cmsg_init(msg, route);
> +			cpipe_push(&wal_thread.tx_pipe, msg);
> +		}
> 
> index 895d938d5..11f1b7fdc 100644
> +...
> +-- add a little timeout so gc could finish job
> +require('fiber').sleep(0.01)

Please never sleep in a test case unconditionally. It doesn't work
reliably, especially in parallel runs.

The test case should test that replicas which are severely behind 
are abandoned, and replicas which are running well are not.

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tarantool-patches] Re: [tarantool-patches] Re: [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err
  2018-06-28 15:25   ` [tarantool-patches] " Konstantin Osipov
@ 2018-06-28 21:55     ` Konstantin Belyavskiy
  2018-07-03  8:56       ` Kirill Yukhin
  0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Belyavskiy @ 2018-06-28 21:55 UTC (permalink / raw)
  To: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 3593 bytes --]


Thank you for the review.

>Четверг, 28 июня 2018, 18:25 +03:00 от Konstantin Osipov <kostja@tarantool.org>:
>
>* Konstantin Belyavskiy < k.belyavskiy@tarantool.org > [18/06/28 18:13]:
>> 
>> +void
>> +gc_xdir_clean_notify()
>> +{
>> +	/*
>> +	 * Compare the current time with the time of the last run.
>> +	 * This is needed in case of multiple failures to prevent
>> +	 * from deleting all replicas.
>> +	 */
>> +	static int prev_time = 0;
>> +	int cur_time = time(NULL);
>> +	if (cur_time - prev_time < 1)
>
>Please use fiber time. 
Fixed.
>
>
>> +	/**
>> +	 * Mark consumer with least recent vclock as "dead" and
>> +	 * invoke garbage collection. If nothing to delete find
>> +	 * next alive consumer etc. Originally created for
>> +	 * cases with running out of disk space because of
>> +	 * disconnected replica.
>> +	 */
>> +	struct gc_consumer *leftmost =
>> +	    gc_tree_first(&gc.consumers);
>> +	/*
>> +	 * Exit if no consumers left or if this consumer is
>> +	 * not associated with replica (backup for example).
>> +	 */
>
>You should delete the consumer *only* if it's going to help with
>garbage. Running out of space should not lead to deletion of all
>replicas as consumers if they are not lagging behind. 
Here I delete only the consumer with the least recent vclock
(or consumers if they share the same value) since gc_tree use
vclock signature for comparison (see gc_consumer_cmp).
>
>> +	if (leftmost == NULL || leftmost->replica == NULL)
>> +		return;
>> +	int64_t signature = leftmost->signature;
>> +	while (true) {
>> +		gc_consumer_unregister(leftmost);
>> +		leftmost = gc_tree_first(&gc.consumers);
>> +		if (leftmost == NULL || leftmost->replica == NULL ||
>> +		    leftmost->signature > signature) {
>> +			gc_run();
>> +			return;
>> +		}
>> +	}
>> +}
>> +
>
><cut>
>
>>  const char *wal_mode_STRS[] = { "none", "write", "fsync", NULL };
>> @@ -64,6 +65,8 @@ struct wal_thread {
>>  	 * priority pipe and DOES NOT support yield.
>>  	 */
>>  	struct cpipe tx_prio_pipe;
>> +	/** Return pipe from 'wal' to tx' */
>> +	struct cpipe tx_pipe;
>
>Why do you need a new pipe? 
Since operations with file (like deleting of old xlogs) requires yield and
tx_prio does not support it.
'tx' is a fiber pool, but 'tx_prio' is an endpoint and supports only callbacks
without 'yield'.
>
>> 
>> +static void
>> +gc_status_update(struct cmsg *msg)
>> +{
>> +	gc_xdir_clean_notify();
>> +	delete(msg);
>
>delete()?! 
Sorry, my fault.
>
>
>>  {
>> @@ -655,6 +665,15 @@ done:
>>  		/* Until we can pass the error to tx, log it and clear. */
>>  		error_log(error);
>>  		diag_clear(diag_get());
>> +		if (errno == ENOSPC) {
>> +			struct cmsg *msg =
>> +			    (struct cmsg*)calloc(1, sizeof(struct cmsg));
>
>Please check calloc() return value and do nothing 
>in case it's NULL. 
Done.
>
>> +			static const struct cmsg_hop route[] = {
>> +				{gc_status_update, NULL}
>> +			};
>> +			cmsg_init(msg, route);
>> +			cpipe_push(&wal_thread.tx_pipe, msg);
>> +		}
>> 
>> index 895d938d5..11f1b7fdc 100644
>> +...
>> +-- add a little timeout so gc could finish job
>> +require('fiber').sleep(0.01)
>
>Please never sleep in a test case unconditionally. It doesn't work
>reliably, especially in parallel runs.
>
>The test case should test that replicas which are severely behind 
>are abandoned, and replicas which are running well are not. 
Ok, will replace it with new test case.
>
>
>-- 
>Konstantin Osipov, Moscow, Russia,  +7 903 626 22 32
>http://tarantool.io -  www.twitter.com/kostja_osipov
>


Best regards,
Konstantin Belyavskiy
k.belyavskiy@tarantool.org

[-- Attachment #2: Type: text/html, Size: 5959 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tarantool-patches] Re: [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err
  2018-06-28 21:55     ` [tarantool-patches] " Konstantin Belyavskiy
@ 2018-07-03  8:56       ` Kirill Yukhin
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill Yukhin @ 2018-07-03  8:56 UTC (permalink / raw)
  To: tarantool-patches

Hello,
On 29 июн 00:55, Konstantin Belyavskiy wrote:
> 
> Thank you for the review.
Could you pls post updated patch along w/ your answers.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-07-03  8:56 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-28 15:07 [tarantool-patches] [PATCH v3 0/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 1/2] replication: rename thread from tx to tx_prio Konstantin Belyavskiy
2018-06-28 15:07 ` [tarantool-patches] [PATCH v3 2/2] replication: force gc to clean xdir on ENOSPC err Konstantin Belyavskiy
2018-06-28 15:25   ` [tarantool-patches] " Konstantin Osipov
2018-06-28 21:55     ` [tarantool-patches] " Konstantin Belyavskiy
2018-07-03  8:56       ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox