Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH 1/1] iproto: get iproto obuf only right before usage
       [not found] <20180316135103.usjz6veqe5mpste6@esperanza>
@ 2018-03-16 14:41 ` Vladislav Shpilevoy
  2018-03-20 13:07   ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Vladislav Shpilevoy @ 2018-03-16 14:41 UTC (permalink / raw)
  To: tarantool-patches; +Cc: vdavydov.dev, Vladislav Shpilevoy

--- Branch: https://github.com/tarantool/tarantool/tree/fix-iproto-buffers
--- There is no issue, since the fix is simple, and does not
--- affect external behavior.

--- Changes: do not rely on buffers number and rotation. Just
--- send multiple fast read/write requests with a small delay.
--- With no patch the test hangs or crashes since some responses
--- are corrupted or discarded.

It is possible to discard non-sent responses using a special
sequence of requests and yields. In details: if DML requests
yield on commit too long, there are fast read requests, and a
network is saturated, then some non-sent DML responses are
discarded.

Signed-off-by: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
---
 src/box/iproto.cc        | 22 ++++++++++++++++++----
 src/errinj.h             |  1 +
 test/box/errinj.result   | 42 +++++++++++++++++++++++++++++++++++++++++-
 test/box/errinj.test.lua | 16 ++++++++++++++++
 4 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/src/box/iproto.cc b/src/box/iproto.cc
index 37313fa52..41c60c318 100644
--- a/src/box/iproto.cc
+++ b/src/box/iproto.cc
@@ -59,6 +59,7 @@
 #include "replication.h" /* instance_uuid */
 #include "iproto_constants.h"
 #include "rmean.h"
+#include "errinj.h"
 
 /* The number of iproto messages in flight */
 enum { IPROTO_MSG_MAX = 768 };
@@ -1167,7 +1168,6 @@ static void
 tx_process1(struct cmsg *m)
 {
 	struct iproto_msg *msg = tx_accept_msg(m);
-	struct obuf *out = msg->connection->tx.p_obuf;
 
 	tx_fiber_init(msg->connection->session, msg->header.sync);
 	if (tx_check_schema(msg->header.schema_version))
@@ -1175,8 +1175,15 @@ tx_process1(struct cmsg *m)
 
 	struct tuple *tuple;
 	struct obuf_svp svp;
-	if (box_process1(&msg->dml, &tuple) ||
-	    iproto_prepare_select(out, &svp))
+	ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, {
+		if (rand() % 100 < 10)
+			fiber_sleep(0.001);
+	});
+	struct obuf *out;
+	if (box_process1(&msg->dml, &tuple) != 0)
+		goto error;
+	out = msg->connection->tx.p_obuf;
+	if (iproto_prepare_select(out, &svp) != 0)
 		goto error;
 	if (tuple && tuple_to_obuf(tuple, out))
 		goto error;
@@ -1192,7 +1199,7 @@ static void
 tx_process_select(struct cmsg *m)
 {
 	struct iproto_msg *msg = tx_accept_msg(m);
-	struct obuf *out = msg->connection->tx.p_obuf;
+	struct obuf *out;
 	struct obuf_svp svp;
 	struct port port;
 	int count;
@@ -1204,11 +1211,18 @@ tx_process_select(struct cmsg *m)
 	if (tx_check_schema(msg->header.schema_version))
 		goto error;
 
+	ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, {
+		/* Select can yield, for example, on vinyl. */
+		if (rand() % 100 < 10)
+			fiber_sleep(0.001);
+	});
 	rc = box_select(req->space_id, req->index_id,
 			req->iterator, req->offset, req->limit,
 			req->key, req->key_end, &port);
 	if (rc < 0)
 		goto error;
+
+	out = msg->connection->tx.p_obuf;
 	if (iproto_prepare_select(out, &svp) != 0) {
 		port_destroy(&port);
 		goto error;
diff --git a/src/errinj.h b/src/errinj.h
index 352a3c3c9..a5417baf6 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -107,6 +107,7 @@ struct errinj {
 	_(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_VY_DELAY_PK_LOOKUP, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
+	_(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index db27dd4b3..3ccf196f8 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -60,9 +60,11 @@ errinj.info()
     state: false
   ERRINJ_WAL_ROTATE:
     state: false
+  ERRINJ_VY_POINT_ITER_WAIT:
+    state: false
   ERRINJ_RELAY_EXIT_DELAY:
     state: 0
-  ERRINJ_VY_POINT_ITER_WAIT:
+  ERRINJ_IPROTO_TX_DELAY:
     state: false
   ERRINJ_TUPLE_FIELD:
     state: false
@@ -1057,3 +1059,41 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1)
 s:drop()
 ---
 ...
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+---
+...
+s = box.schema.space.create('test')
+---
+...
+_ = s:create_index('pk')
+---
+...
+c = net_box.connect(box.cfg.listen)
+---
+...
+ch = fiber.channel(200)
+---
+...
+errinj.set("ERRINJ_IPROTO_TX_DELAY", true)
+---
+- ok
+...
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end
+---
+...
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end
+---
+...
+for i = 1, 200 do ch:get() end
+---
+...
+errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
+---
+- ok
+...
+s:drop()
+---
+...
+box.schema.user.revoke('guest', 'read,write,execute','universe')
+---
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index 9bcf53847..a24b357e6 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -348,3 +348,19 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id)
 sk:alter({parts = {2, 'number'}})
 box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1)
 s:drop()
+
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+s = box.schema.space.create('test')
+_ = s:create_index('pk')
+
+c = net_box.connect(box.cfg.listen)
+
+ch = fiber.channel(200)
+errinj.set("ERRINJ_IPROTO_TX_DELAY", true)
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end
+for i = 1, 200 do ch:get() end
+errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
+
+s:drop()
+box.schema.user.revoke('guest', 'read,write,execute','universe')
-- 
2.14.3 (Apple Git-98)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] iproto: get iproto obuf only right before usage
  2018-03-16 14:41 ` [PATCH 1/1] iproto: get iproto obuf only right before usage Vladislav Shpilevoy
@ 2018-03-20 13:07   ` Vladimir Davydov
  2018-03-20 13:43     ` [PATCH v3 " Vladislav Shpilevoy
  0 siblings, 1 reply; 4+ messages in thread
From: Vladimir Davydov @ 2018-03-20 13:07 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

On Fri, Mar 16, 2018 at 05:41:42PM +0300, Vladislav Shpilevoy wrote:
> --- Branch: https://github.com/tarantool/tarantool/tree/fix-iproto-buffers
> --- There is no issue, since the fix is simple, and does not
> --- affect external behavior.

Please open a Github ticket. Just describe the problem there. A test
case is not necessary. This will be useful for release changelog.
(FYI I asked both Kostja and Kirill - they both agree).

> 
> --- Changes: do not rely on buffers number and rotation. Just
> --- send multiple fast read/write requests with a small delay.
> --- With no patch the test hangs or crashes since some responses
> --- are corrupted or discarded.

When I talked about extra info you don't want to be included into the
commit message, I meant the '---' generated by git-format-patch, as
everything following '---' is ignored by git-am.

Also, please don't forget to add the version to the subject line
(--subject-prefix='PATCH v2').

> 
> It is possible to discard non-sent responses using a special
> sequence of requests and yields. In details: if DML requests
> yield on commit too long, there are fast read requests, and a
> network is saturated, then some non-sent DML responses are
> discarded.
> 
> Signed-off-by: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>

We don't sign patches, not yet. Please fix your git config.

> @@ -1175,8 +1175,15 @@ tx_process1(struct cmsg *m)
>  
>  	struct tuple *tuple;
>  	struct obuf_svp svp;
> -	if (box_process1(&msg->dml, &tuple) ||
> -	    iproto_prepare_select(out, &svp))

> +	ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, {
> +		if (rand() % 100 < 10)
> +			fiber_sleep(0.001);
> +	});
> +	struct obuf *out;
> +	if (box_process1(&msg->dml, &tuple) != 0)
> +		goto error;
> +	out = msg->connection->tx.p_obuf;
> +	if (iproto_prepare_select(out, &svp) != 0)
>  		goto error;
>  	if (tuple && tuple_to_obuf(tuple, out))
>  		goto error;
> @@ -1192,7 +1199,7 @@ static void
>  tx_process_select(struct cmsg *m)
>  {
>  	struct iproto_msg *msg = tx_accept_msg(m);
> -	struct obuf *out = msg->connection->tx.p_obuf;
> +	struct obuf *out;
>  	struct obuf_svp svp;
>  	struct port port;
>  	int count;
> @@ -1204,11 +1211,18 @@ tx_process_select(struct cmsg *m)
>  	if (tx_check_schema(msg->header.schema_version))
>  		goto error;
>  
> +	ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, {
> +		/* Select can yield, for example, on vinyl. */
> +		if (rand() % 100 < 10)
> +			fiber_sleep(0.001);
> +	});

There are quite a few constants in this error injection so
copying-and-pasting doesn't look good. I'd wrap it into an inline
function (tx_inject_delay or something, think of the name pls).

> diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
> index 9bcf53847..a24b357e6 100644
> --- a/test/box/errinj.test.lua
> +++ b/test/box/errinj.test.lua
> @@ -348,3 +348,19 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id)
>  sk:alter({parts = {2, 'number'}})
>  box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1)
>  s:drop()

Please add a brief comment explaining what and why we are testing here.

> +
> +box.schema.user.grant('guest', 'read,write,execute', 'universe')
> +s = box.schema.space.create('test')
> +_ = s:create_index('pk')
> +
> +c = net_box.connect(box.cfg.listen)
> +
> +ch = fiber.channel(200)
> +errinj.set("ERRINJ_IPROTO_TX_DELAY", true)
> +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end
> +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end
> +for i = 1, 200 do ch:get() end
> +errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
> +
> +s:drop()
> +box.schema.user.revoke('guest', 'read,write,execute','universe')

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3 1/1] iproto: get iproto obuf only right before usage
  2018-03-20 13:07   ` Vladimir Davydov
@ 2018-03-20 13:43     ` Vladislav Shpilevoy
  2018-03-20 16:49       ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Vladislav Shpilevoy @ 2018-03-20 13:43 UTC (permalink / raw)
  To: tarantool-patches; +Cc: vdavydov.dev, Vladislav Shpilevoy

It is possible to discard non-sent responses using a special
sequence of requests and yields. In details: if DML requests
yield on commit too long, there are fast read requests, and a
network is saturated, then some non-sent DML responses are
discarded.

Closes #3255
---
 src/box/iproto.cc        | 25 +++++++++++++++++++++----
 src/errinj.h             |  1 +
 test/box/errinj.result   | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 test/box/errinj.test.lua | 21 +++++++++++++++++++++
 4 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/src/box/iproto.cc b/src/box/iproto.cc
index 37313fa52..db5820806 100644
--- a/src/box/iproto.cc
+++ b/src/box/iproto.cc
@@ -59,6 +59,7 @@
 #include "replication.h" /* instance_uuid */
 #include "iproto_constants.h"
 #include "rmean.h"
+#include "errinj.h"
 
 /* The number of iproto messages in flight */
 enum { IPROTO_MSG_MAX = 768 };
@@ -1163,11 +1164,20 @@ tx_reply_iproto_error(struct cmsg *m)
 	iproto_wpos_create(&msg->wpos, out);
 }
 
+/** Inject a short delay on tx request processing for testing. */
+static inline void
+tx_inject_delay()
+{
+	ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, {
+		if (rand() % 100 < 10)
+			fiber_sleep(0.001);
+	});
+}
+
 static void
 tx_process1(struct cmsg *m)
 {
 	struct iproto_msg *msg = tx_accept_msg(m);
-	struct obuf *out = msg->connection->tx.p_obuf;
 
 	tx_fiber_init(msg->connection->session, msg->header.sync);
 	if (tx_check_schema(msg->header.schema_version))
@@ -1175,8 +1185,12 @@ tx_process1(struct cmsg *m)
 
 	struct tuple *tuple;
 	struct obuf_svp svp;
-	if (box_process1(&msg->dml, &tuple) ||
-	    iproto_prepare_select(out, &svp))
+	struct obuf *out;
+	tx_inject_delay();
+	if (box_process1(&msg->dml, &tuple) != 0)
+		goto error;
+	out = msg->connection->tx.p_obuf;
+	if (iproto_prepare_select(out, &svp) != 0)
 		goto error;
 	if (tuple && tuple_to_obuf(tuple, out))
 		goto error;
@@ -1192,7 +1206,7 @@ static void
 tx_process_select(struct cmsg *m)
 {
 	struct iproto_msg *msg = tx_accept_msg(m);
-	struct obuf *out = msg->connection->tx.p_obuf;
+	struct obuf *out;
 	struct obuf_svp svp;
 	struct port port;
 	int count;
@@ -1204,11 +1218,14 @@ tx_process_select(struct cmsg *m)
 	if (tx_check_schema(msg->header.schema_version))
 		goto error;
 
+	tx_inject_delay();
 	rc = box_select(req->space_id, req->index_id,
 			req->iterator, req->offset, req->limit,
 			req->key, req->key_end, &port);
 	if (rc < 0)
 		goto error;
+
+	out = msg->connection->tx.p_obuf;
 	if (iproto_prepare_select(out, &svp) != 0) {
 		port_destroy(&port);
 		goto error;
diff --git a/src/errinj.h b/src/errinj.h
index 352a3c3c9..a5417baf6 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -107,6 +107,7 @@ struct errinj {
 	_(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE, {.dparam = 0}) \
 	_(ERRINJ_VY_DELAY_PK_LOOKUP, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \
+	_(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index db27dd4b3..cd0c35c2f 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -60,9 +60,11 @@ errinj.info()
     state: false
   ERRINJ_WAL_ROTATE:
     state: false
+  ERRINJ_VY_POINT_ITER_WAIT:
+    state: false
   ERRINJ_RELAY_EXIT_DELAY:
     state: 0
-  ERRINJ_VY_POINT_ITER_WAIT:
+  ERRINJ_IPROTO_TX_DELAY:
     state: false
   ERRINJ_TUPLE_FIELD:
     state: false
@@ -1057,3 +1059,45 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1)
 s:drop()
 ---
 ...
+--
+-- gh-3255: iproto can crash and discard responses, if a network
+-- is saturated, and DML yields too long on commit.
+--
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+---
+...
+s = box.schema.space.create('test')
+---
+...
+_ = s:create_index('pk')
+---
+...
+c = net_box.connect(box.cfg.listen)
+---
+...
+ch = fiber.channel(200)
+---
+...
+errinj.set("ERRINJ_IPROTO_TX_DELAY", true)
+---
+- ok
+...
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end
+---
+...
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end
+---
+...
+for i = 1, 200 do ch:get() end
+---
+...
+errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
+---
+- ok
+...
+s:drop()
+---
+...
+box.schema.user.revoke('guest', 'read,write,execute','universe')
+---
+...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index 9bcf53847..3af1b74dc 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -348,3 +348,24 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id)
 sk:alter({parts = {2, 'number'}})
 box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1)
 s:drop()
+
+--
+-- gh-3255: iproto can crash and discard responses, if a network
+-- is saturated, and DML yields too long on commit.
+--
+
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+s = box.schema.space.create('test')
+_ = s:create_index('pk')
+
+c = net_box.connect(box.cfg.listen)
+
+ch = fiber.channel(200)
+errinj.set("ERRINJ_IPROTO_TX_DELAY", true)
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end
+for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end
+for i = 1, 200 do ch:get() end
+errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
+
+s:drop()
+box.schema.user.revoke('guest', 'read,write,execute','universe')
-- 
2.14.3 (Apple Git-98)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 1/1] iproto: get iproto obuf only right before usage
  2018-03-20 13:43     ` [PATCH v3 " Vladislav Shpilevoy
@ 2018-03-20 16:49       ` Vladimir Davydov
  0 siblings, 0 replies; 4+ messages in thread
From: Vladimir Davydov @ 2018-03-20 16:49 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

On Tue, Mar 20, 2018 at 04:43:05PM +0300, Vladislav Shpilevoy wrote:
> It is possible to discard non-sent responses using a special
> sequence of requests and yields. In details: if DML requests
> yield on commit too long, there are fast read requests, and a
> network is saturated, then some non-sent DML responses are
> discarded.
> 
> Closes #3255
> ---
>  src/box/iproto.cc        | 25 +++++++++++++++++++++----
>  src/errinj.h             |  1 +
>  test/box/errinj.result   | 46 +++++++++++++++++++++++++++++++++++++++++++++-
>  test/box/errinj.test.lua | 21 +++++++++++++++++++++
>  4 files changed, 88 insertions(+), 5 deletions(-)

Looks good to me.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-03-20 16:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20180316135103.usjz6veqe5mpste6@esperanza>
2018-03-16 14:41 ` [PATCH 1/1] iproto: get iproto obuf only right before usage Vladislav Shpilevoy
2018-03-20 13:07   ` Vladimir Davydov
2018-03-20 13:43     ` [PATCH v3 " Vladislav Shpilevoy
2018-03-20 16:49       ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox