* [PATCH 1/1] iproto: get iproto obuf only right before usage [not found] <20180316135103.usjz6veqe5mpste6@esperanza> @ 2018-03-16 14:41 ` Vladislav Shpilevoy 2018-03-20 13:07 ` Vladimir Davydov 0 siblings, 1 reply; 4+ messages in thread From: Vladislav Shpilevoy @ 2018-03-16 14:41 UTC (permalink / raw) To: tarantool-patches; +Cc: vdavydov.dev, Vladislav Shpilevoy --- Branch: https://github.com/tarantool/tarantool/tree/fix-iproto-buffers --- There is no issue, since the fix is simple, and does not --- affect external behavior. --- Changes: do not rely on buffers number and rotation. Just --- send multiple fast read/write requests with a small delay. --- With no patch the test hangs or crashes since some responses --- are corrupted or discarded. It is possible to discard non-sent responses using a special sequence of requests and yields. In details: if DML requests yield on commit too long, there are fast read requests, and a network is saturated, then some non-sent DML responses are discarded. Signed-off-by: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> --- src/box/iproto.cc | 22 ++++++++++++++++++---- src/errinj.h | 1 + test/box/errinj.result | 42 +++++++++++++++++++++++++++++++++++++++++- test/box/errinj.test.lua | 16 ++++++++++++++++ 4 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/box/iproto.cc b/src/box/iproto.cc index 37313fa52..41c60c318 100644 --- a/src/box/iproto.cc +++ b/src/box/iproto.cc @@ -59,6 +59,7 @@ #include "replication.h" /* instance_uuid */ #include "iproto_constants.h" #include "rmean.h" +#include "errinj.h" /* The number of iproto messages in flight */ enum { IPROTO_MSG_MAX = 768 }; @@ -1167,7 +1168,6 @@ static void tx_process1(struct cmsg *m) { struct iproto_msg *msg = tx_accept_msg(m); - struct obuf *out = msg->connection->tx.p_obuf; tx_fiber_init(msg->connection->session, msg->header.sync); if (tx_check_schema(msg->header.schema_version)) @@ -1175,8 +1175,15 @@ tx_process1(struct cmsg *m) struct tuple *tuple; struct obuf_svp svp; - if (box_process1(&msg->dml, &tuple) || - iproto_prepare_select(out, &svp)) + ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, { + if (rand() % 100 < 10) + fiber_sleep(0.001); + }); + struct obuf *out; + if (box_process1(&msg->dml, &tuple) != 0) + goto error; + out = msg->connection->tx.p_obuf; + if (iproto_prepare_select(out, &svp) != 0) goto error; if (tuple && tuple_to_obuf(tuple, out)) goto error; @@ -1192,7 +1199,7 @@ static void tx_process_select(struct cmsg *m) { struct iproto_msg *msg = tx_accept_msg(m); - struct obuf *out = msg->connection->tx.p_obuf; + struct obuf *out; struct obuf_svp svp; struct port port; int count; @@ -1204,11 +1211,18 @@ tx_process_select(struct cmsg *m) if (tx_check_schema(msg->header.schema_version)) goto error; + ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, { + /* Select can yield, for example, on vinyl. */ + if (rand() % 100 < 10) + fiber_sleep(0.001); + }); rc = box_select(req->space_id, req->index_id, req->iterator, req->offset, req->limit, req->key, req->key_end, &port); if (rc < 0) goto error; + + out = msg->connection->tx.p_obuf; if (iproto_prepare_select(out, &svp) != 0) { port_destroy(&port); goto error; diff --git a/src/errinj.h b/src/errinj.h index 352a3c3c9..a5417baf6 100644 --- a/src/errinj.h +++ b/src/errinj.h @@ -107,6 +107,7 @@ struct errinj { _(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE, {.dparam = 0}) \ _(ERRINJ_VY_DELAY_PK_LOOKUP, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \ + _(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index db27dd4b3..3ccf196f8 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -60,9 +60,11 @@ errinj.info() state: false ERRINJ_WAL_ROTATE: state: false + ERRINJ_VY_POINT_ITER_WAIT: + state: false ERRINJ_RELAY_EXIT_DELAY: state: 0 - ERRINJ_VY_POINT_ITER_WAIT: + ERRINJ_IPROTO_TX_DELAY: state: false ERRINJ_TUPLE_FIELD: state: false @@ -1057,3 +1059,41 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1) s:drop() --- ... +box.schema.user.grant('guest', 'read,write,execute', 'universe') +--- +... +s = box.schema.space.create('test') +--- +... +_ = s:create_index('pk') +--- +... +c = net_box.connect(box.cfg.listen) +--- +... +ch = fiber.channel(200) +--- +... +errinj.set("ERRINJ_IPROTO_TX_DELAY", true) +--- +- ok +... +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end +--- +... +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end +--- +... +for i = 1, 200 do ch:get() end +--- +... +errinj.set("ERRINJ_IPROTO_TX_DELAY", false) +--- +- ok +... +s:drop() +--- +... +box.schema.user.revoke('guest', 'read,write,execute','universe') +--- +... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 9bcf53847..a24b357e6 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -348,3 +348,19 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id) sk:alter({parts = {2, 'number'}}) box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1) s:drop() + +box.schema.user.grant('guest', 'read,write,execute', 'universe') +s = box.schema.space.create('test') +_ = s:create_index('pk') + +c = net_box.connect(box.cfg.listen) + +ch = fiber.channel(200) +errinj.set("ERRINJ_IPROTO_TX_DELAY", true) +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end +for i = 1, 200 do ch:get() end +errinj.set("ERRINJ_IPROTO_TX_DELAY", false) + +s:drop() +box.schema.user.revoke('guest', 'read,write,execute','universe') -- 2.14.3 (Apple Git-98) ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/1] iproto: get iproto obuf only right before usage 2018-03-16 14:41 ` [PATCH 1/1] iproto: get iproto obuf only right before usage Vladislav Shpilevoy @ 2018-03-20 13:07 ` Vladimir Davydov 2018-03-20 13:43 ` [PATCH v3 " Vladislav Shpilevoy 0 siblings, 1 reply; 4+ messages in thread From: Vladimir Davydov @ 2018-03-20 13:07 UTC (permalink / raw) To: Vladislav Shpilevoy; +Cc: tarantool-patches On Fri, Mar 16, 2018 at 05:41:42PM +0300, Vladislav Shpilevoy wrote: > --- Branch: https://github.com/tarantool/tarantool/tree/fix-iproto-buffers > --- There is no issue, since the fix is simple, and does not > --- affect external behavior. Please open a Github ticket. Just describe the problem there. A test case is not necessary. This will be useful for release changelog. (FYI I asked both Kostja and Kirill - they both agree). > > --- Changes: do not rely on buffers number and rotation. Just > --- send multiple fast read/write requests with a small delay. > --- With no patch the test hangs or crashes since some responses > --- are corrupted or discarded. When I talked about extra info you don't want to be included into the commit message, I meant the '---' generated by git-format-patch, as everything following '---' is ignored by git-am. Also, please don't forget to add the version to the subject line (--subject-prefix='PATCH v2'). > > It is possible to discard non-sent responses using a special > sequence of requests and yields. In details: if DML requests > yield on commit too long, there are fast read requests, and a > network is saturated, then some non-sent DML responses are > discarded. > > Signed-off-by: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> We don't sign patches, not yet. Please fix your git config. > @@ -1175,8 +1175,15 @@ tx_process1(struct cmsg *m) > > struct tuple *tuple; > struct obuf_svp svp; > - if (box_process1(&msg->dml, &tuple) || > - iproto_prepare_select(out, &svp)) > + ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, { > + if (rand() % 100 < 10) > + fiber_sleep(0.001); > + }); > + struct obuf *out; > + if (box_process1(&msg->dml, &tuple) != 0) > + goto error; > + out = msg->connection->tx.p_obuf; > + if (iproto_prepare_select(out, &svp) != 0) > goto error; > if (tuple && tuple_to_obuf(tuple, out)) > goto error; > @@ -1192,7 +1199,7 @@ static void > tx_process_select(struct cmsg *m) > { > struct iproto_msg *msg = tx_accept_msg(m); > - struct obuf *out = msg->connection->tx.p_obuf; > + struct obuf *out; > struct obuf_svp svp; > struct port port; > int count; > @@ -1204,11 +1211,18 @@ tx_process_select(struct cmsg *m) > if (tx_check_schema(msg->header.schema_version)) > goto error; > > + ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, { > + /* Select can yield, for example, on vinyl. */ > + if (rand() % 100 < 10) > + fiber_sleep(0.001); > + }); There are quite a few constants in this error injection so copying-and-pasting doesn't look good. I'd wrap it into an inline function (tx_inject_delay or something, think of the name pls). > diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua > index 9bcf53847..a24b357e6 100644 > --- a/test/box/errinj.test.lua > +++ b/test/box/errinj.test.lua > @@ -348,3 +348,19 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id) > sk:alter({parts = {2, 'number'}}) > box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1) > s:drop() Please add a brief comment explaining what and why we are testing here. > + > +box.schema.user.grant('guest', 'read,write,execute', 'universe') > +s = box.schema.space.create('test') > +_ = s:create_index('pk') > + > +c = net_box.connect(box.cfg.listen) > + > +ch = fiber.channel(200) > +errinj.set("ERRINJ_IPROTO_TX_DELAY", true) > +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end > +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end > +for i = 1, 200 do ch:get() end > +errinj.set("ERRINJ_IPROTO_TX_DELAY", false) > + > +s:drop() > +box.schema.user.revoke('guest', 'read,write,execute','universe') ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v3 1/1] iproto: get iproto obuf only right before usage 2018-03-20 13:07 ` Vladimir Davydov @ 2018-03-20 13:43 ` Vladislav Shpilevoy 2018-03-20 16:49 ` Vladimir Davydov 0 siblings, 1 reply; 4+ messages in thread From: Vladislav Shpilevoy @ 2018-03-20 13:43 UTC (permalink / raw) To: tarantool-patches; +Cc: vdavydov.dev, Vladislav Shpilevoy It is possible to discard non-sent responses using a special sequence of requests and yields. In details: if DML requests yield on commit too long, there are fast read requests, and a network is saturated, then some non-sent DML responses are discarded. Closes #3255 --- src/box/iproto.cc | 25 +++++++++++++++++++++---- src/errinj.h | 1 + test/box/errinj.result | 46 +++++++++++++++++++++++++++++++++++++++++++++- test/box/errinj.test.lua | 21 +++++++++++++++++++++ 4 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/box/iproto.cc b/src/box/iproto.cc index 37313fa52..db5820806 100644 --- a/src/box/iproto.cc +++ b/src/box/iproto.cc @@ -59,6 +59,7 @@ #include "replication.h" /* instance_uuid */ #include "iproto_constants.h" #include "rmean.h" +#include "errinj.h" /* The number of iproto messages in flight */ enum { IPROTO_MSG_MAX = 768 }; @@ -1163,11 +1164,20 @@ tx_reply_iproto_error(struct cmsg *m) iproto_wpos_create(&msg->wpos, out); } +/** Inject a short delay on tx request processing for testing. */ +static inline void +tx_inject_delay() +{ + ERROR_INJECT(ERRINJ_IPROTO_TX_DELAY, { + if (rand() % 100 < 10) + fiber_sleep(0.001); + }); +} + static void tx_process1(struct cmsg *m) { struct iproto_msg *msg = tx_accept_msg(m); - struct obuf *out = msg->connection->tx.p_obuf; tx_fiber_init(msg->connection->session, msg->header.sync); if (tx_check_schema(msg->header.schema_version)) @@ -1175,8 +1185,12 @@ tx_process1(struct cmsg *m) struct tuple *tuple; struct obuf_svp svp; - if (box_process1(&msg->dml, &tuple) || - iproto_prepare_select(out, &svp)) + struct obuf *out; + tx_inject_delay(); + if (box_process1(&msg->dml, &tuple) != 0) + goto error; + out = msg->connection->tx.p_obuf; + if (iproto_prepare_select(out, &svp) != 0) goto error; if (tuple && tuple_to_obuf(tuple, out)) goto error; @@ -1192,7 +1206,7 @@ static void tx_process_select(struct cmsg *m) { struct iproto_msg *msg = tx_accept_msg(m); - struct obuf *out = msg->connection->tx.p_obuf; + struct obuf *out; struct obuf_svp svp; struct port port; int count; @@ -1204,11 +1218,14 @@ tx_process_select(struct cmsg *m) if (tx_check_schema(msg->header.schema_version)) goto error; + tx_inject_delay(); rc = box_select(req->space_id, req->index_id, req->iterator, req->offset, req->limit, req->key, req->key_end, &port); if (rc < 0) goto error; + + out = msg->connection->tx.p_obuf; if (iproto_prepare_select(out, &svp) != 0) { port_destroy(&port); goto error; diff --git a/src/errinj.h b/src/errinj.h index 352a3c3c9..a5417baf6 100644 --- a/src/errinj.h +++ b/src/errinj.h @@ -107,6 +107,7 @@ struct errinj { _(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE, {.dparam = 0}) \ _(ERRINJ_VY_DELAY_PK_LOOKUP, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE, {.dparam = 0}) \ + _(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index db27dd4b3..cd0c35c2f 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -60,9 +60,11 @@ errinj.info() state: false ERRINJ_WAL_ROTATE: state: false + ERRINJ_VY_POINT_ITER_WAIT: + state: false ERRINJ_RELAY_EXIT_DELAY: state: 0 - ERRINJ_VY_POINT_ITER_WAIT: + ERRINJ_IPROTO_TX_DELAY: state: false ERRINJ_TUPLE_FIELD: state: false @@ -1057,3 +1059,45 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1) s:drop() --- ... +-- +-- gh-3255: iproto can crash and discard responses, if a network +-- is saturated, and DML yields too long on commit. +-- +box.schema.user.grant('guest', 'read,write,execute', 'universe') +--- +... +s = box.schema.space.create('test') +--- +... +_ = s:create_index('pk') +--- +... +c = net_box.connect(box.cfg.listen) +--- +... +ch = fiber.channel(200) +--- +... +errinj.set("ERRINJ_IPROTO_TX_DELAY", true) +--- +- ok +... +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end +--- +... +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end +--- +... +for i = 1, 200 do ch:get() end +--- +... +errinj.set("ERRINJ_IPROTO_TX_DELAY", false) +--- +- ok +... +s:drop() +--- +... +box.schema.user.revoke('guest', 'read,write,execute','universe') +--- +... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 9bcf53847..3af1b74dc 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -348,3 +348,24 @@ box.error.injection.set('ERRINJ_BUILD_SECONDARY', sk.id) sk:alter({parts = {2, 'number'}}) box.error.injection.set('ERRINJ_BUILD_SECONDARY', -1) s:drop() + +-- +-- gh-3255: iproto can crash and discard responses, if a network +-- is saturated, and DML yields too long on commit. +-- + +box.schema.user.grant('guest', 'read,write,execute', 'universe') +s = box.schema.space.create('test') +_ = s:create_index('pk') + +c = net_box.connect(box.cfg.listen) + +ch = fiber.channel(200) +errinj.set("ERRINJ_IPROTO_TX_DELAY", true) +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:replace{1} end ch:put(true) end) end +for i = 1, 100 do fiber.create(function() for j = 1, 10 do c.space.test:select() end ch:put(true) end) end +for i = 1, 200 do ch:get() end +errinj.set("ERRINJ_IPROTO_TX_DELAY", false) + +s:drop() +box.schema.user.revoke('guest', 'read,write,execute','universe') -- 2.14.3 (Apple Git-98) ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3 1/1] iproto: get iproto obuf only right before usage 2018-03-20 13:43 ` [PATCH v3 " Vladislav Shpilevoy @ 2018-03-20 16:49 ` Vladimir Davydov 0 siblings, 0 replies; 4+ messages in thread From: Vladimir Davydov @ 2018-03-20 16:49 UTC (permalink / raw) To: Vladislav Shpilevoy; +Cc: tarantool-patches On Tue, Mar 20, 2018 at 04:43:05PM +0300, Vladislav Shpilevoy wrote: > It is possible to discard non-sent responses using a special > sequence of requests and yields. In details: if DML requests > yield on commit too long, there are fast read requests, and a > network is saturated, then some non-sent DML responses are > discarded. > > Closes #3255 > --- > src/box/iproto.cc | 25 +++++++++++++++++++++---- > src/errinj.h | 1 + > test/box/errinj.result | 46 +++++++++++++++++++++++++++++++++++++++++++++- > test/box/errinj.test.lua | 21 +++++++++++++++++++++ > 4 files changed, 88 insertions(+), 5 deletions(-) Looks good to me. ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2018-03-20 16:49 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20180316135103.usjz6veqe5mpste6@esperanza> 2018-03-16 14:41 ` [PATCH 1/1] iproto: get iproto obuf only right before usage Vladislav Shpilevoy 2018-03-20 13:07 ` Vladimir Davydov 2018-03-20 13:43 ` [PATCH v3 " Vladislav Shpilevoy 2018-03-20 16:49 ` Vladimir Davydov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox