Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
@ 2019-11-26 13:13 Leonid
  2019-11-26 21:05 ` Konstantin Osipov
  2019-12-11 22:21 ` Alexander Turenko
  0 siblings, 2 replies; 24+ messages in thread
From: Leonid @ 2019-11-26 13:13 UTC (permalink / raw)
  To: alexander.turenko; +Cc: tarantool-patches

https://github.com/tarantool/tarantool/issues/4427
https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api

---
 src/box/CMakeLists.txt       |   2 +
 src/box/lua/init.c           |   4 +
 src/box/lua/schema.lua       |  71 -----------------
 src/box/lua/txn.c            | 145 +++++++++++++++++++++++++++++++++++
 src/box/lua/txn.h            |  49 ++++++++++++
 src/box/lua/txn.lua          |  53 +++++++++++++
 src/box/txn.c                |   2 +
 test/box/misc.result         |   1 +
 test/engine/savepoint.result |  12 +--
 9 files changed, 262 insertions(+), 77 deletions(-)
 create mode 100644 src/box/lua/txn.c
 create mode 100644 src/box/lua/txn.h
 create mode 100644 src/box/lua/txn.lua

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 5cd5cba81..71d691542 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -15,6 +15,7 @@ lua_source(lua_sources lua/serpent.lua)
 lua_source(lua_sources lua/xlog.lua)
 lua_source(lua_sources lua/key_def.lua)
 lua_source(lua_sources lua/merger.lua)
+lua_source(lua_sources lua/txn.lua)
 set(bin_sources)
 bin_source(bin_sources bootstrap.snap bootstrap.h)
 
@@ -143,6 +144,7 @@ add_library(box STATIC
     lua/info.c
     lua/stat.c
     lua/ctl.c
+    lua/txn.c
     lua/error.cc
     lua/session.c
     lua/net_box.c
diff --git a/src/box/lua/init.c b/src/box/lua/init.c
index 7ffed409d..0d0b0e0b9 100644
--- a/src/box/lua/init.c
+++ b/src/box/lua/init.c
@@ -53,6 +53,7 @@
 #include "box/lua/stat.h"
 #include "box/lua/info.h"
 #include "box/lua/ctl.h"
+#include "box/lua/txn.h"
 #include "box/lua/session.h"
 #include "box/lua/net_box.h"
 #include "box/lua/cfg.h"
@@ -67,6 +68,7 @@ extern char session_lua[],
 	tuple_lua[],
 	key_def_lua[],
 	schema_lua[],
+	txn_lua[],
 	load_cfg_lua[],
 	xlog_lua[],
 	feedback_daemon_lua[],
@@ -79,6 +81,7 @@ static const char *lua_sources[] = {
 	"box/session", session_lua,
 	"box/tuple", tuple_lua,
 	"box/schema", schema_lua,
+	"box/txn", txn_lua,
 	"box/feedback_daemon", feedback_daemon_lua,
 	"box/upgrade", upgrade_lua,
 	"box/net_box", net_box_lua,
@@ -312,6 +315,7 @@ box_lua_init(struct lua_State *L)
 	box_lua_info_init(L);
 	box_lua_stat_init(L);
 	box_lua_ctl_init(L);
+	box_lua_txn_init(L);
 	box_lua_session_init(L);
 	box_lua_xlog_init(L);
 	box_lua_execute_init(L);
diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
index e898c3aa6..d3c07b042 100644
--- a/src/box/lua/schema.lua
+++ b/src/box/lua/schema.lua
@@ -64,21 +64,6 @@ ffi.cdef[[
     box_index_count(uint32_t space_id, uint32_t index_id, int type,
                     const char *key, const char *key_end);
     /** \endcond public */
-    /** \cond public */
-    bool
-    box_txn();
-    int64_t
-    box_txn_id();
-    int
-    box_txn_begin();
-    /** \endcond public */
-    typedef struct txn_savepoint box_txn_savepoint_t;
-
-    box_txn_savepoint_t *
-    box_txn_savepoint();
-
-    int
-    box_txn_rollback_to_savepoint(box_txn_savepoint_t *savepoint);
 
     struct port_tuple_entry {
         struct port_tuple_entry *next;
@@ -123,7 +108,6 @@ ffi.cdef[[
         PRIV_REVOKE = 16384,
         PRIV_ALL  = 4294967295
     };
-
 ]]
 
 box.priv = {
@@ -318,61 +302,6 @@ local function update_param_table(table, defaults)
     return new_table
 end
 
-box.begin = function()
-    if builtin.box_txn_begin() == -1 then
-        box.error()
-    end
-end
-
-box.is_in_txn = builtin.box_txn
-
-box.savepoint = function()
-    local csavepoint = builtin.box_txn_savepoint()
-    if csavepoint == nil then
-        box.error()
-    end
-    return { csavepoint=csavepoint, txn_id=builtin.box_txn_id() }
-end
-
-local savepoint_type = ffi.typeof('box_txn_savepoint_t')
-
-local function check_savepoint(savepoint)
-    if savepoint == nil or savepoint.txn_id == nil or
-       savepoint.csavepoint == nil or
-       type(tonumber(savepoint.txn_id)) ~= 'number' or
-       type(savepoint.csavepoint) ~= 'cdata' or
-       not ffi.istype(savepoint_type, savepoint.csavepoint) then
-        error("Usage: box.rollback_to_savepoint(savepoint)")
-    end
-end
-
-box.rollback_to_savepoint = function(savepoint)
-    check_savepoint(savepoint)
-    if savepoint.txn_id ~= builtin.box_txn_id() then
-        box.error(box.error.NO_SUCH_SAVEPOINT)
-    end
-    if builtin.box_txn_rollback_to_savepoint(savepoint.csavepoint) == -1 then
-        box.error()
-    end
-end
-
-local function atomic_tail(status, ...)
-    if not status then
-        box.rollback()
-        error((...), 2)
-     end
-     box.commit()
-     return ...
-end
-
-box.atomic = function(fun, ...)
-    box.begin()
-    return atomic_tail(pcall(fun, ...))
-end
-
--- box.commit yields, so it's defined as Lua/C binding
--- box.rollback yields as well
-
 function update_format(format)
     local result = {}
     for i, given in ipairs(format) do
diff --git a/src/box/lua/txn.c b/src/box/lua/txn.c
new file mode 100644
index 000000000..1d07ff41e
--- /dev/null
+++ b/src/box/lua/txn.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <lua.h>
+#include <lauxlib.h>
+#include <lualib.h>
+
+#include <tarantool_ev.h>
+
+#include "box/box.h"
+#include "box/schema.h"
+#include "box/txn.h"
+
+#include "box/lua/txn.h"
+
+#include "lua/utils.h"
+#include "lua/trigger.h"
+
+
+static uint32_t CTID_STRUCT_TXN_SAVEPOINT_REF = 0;
+
+
+static int
+lbox_txn_begin(struct lua_State *L)
+{
+	int res = box_txn_begin();
+	if (res == -1)
+		return luaT_push_nil_and_error(L);
+
+	lua_pushnumber(L, 0);
+	return 1;
+}
+
+static int
+lbox_txn_is_in_txn(struct lua_State *L)
+{
+	bool res = box_txn();
+	lua_pushboolean(L, res);
+	return 1;
+}
+
+static int
+lbox_txn_id(struct lua_State *L)
+{
+	int64_t res = box_txn_id();
+
+	if (res == -1)
+		return luaT_push_nil_and_error(L);
+
+	lua_pushnumber(L, res);
+	return 1;
+}
+
+struct txn_savepoint*
+luaT_check_txn_savepoint(struct lua_State *L, int idx)
+{
+	if (lua_type(L, idx) != LUA_TCDATA)
+		return NULL;
+
+	uint32_t cdata_type;
+	struct txn_savepoint **sp_ptr = luaL_checkcdata(L, idx, &cdata_type);
+
+	if (sp_ptr == NULL || cdata_type != CTID_STRUCT_TXN_SAVEPOINT_REF)
+		return NULL;
+
+	return *sp_ptr;
+}
+
+static int 
+lbox_txn_rollback_to_savepoint(struct lua_State *L)
+{
+	struct txn_savepoint *sp;
+	if (lua_gettop(L) != 1
+	    || (sp = luaT_check_txn_savepoint(L, 1)) == NULL) {
+		 luaL_error(L, "Usage: txn:rollback to savepoint(savepoint)");
+	}
+
+	int rc = box_txn_rollback_to_savepoint(sp);
+	if (rc != 0)
+		return luaT_push_nil_and_error(L);
+
+	lua_pushnumber(L, 0);
+	return 1;
+}
+
+static int
+lbox_txn_savepoint(struct lua_State *L)
+{
+	struct txn_savepoint *sp = box_txn_savepoint();
+	if (sp == NULL)
+		return luaT_push_nil_and_error(L);
+
+	lua_pushlightuserdata(L, sp);
+	*(struct txn_savepoint **)luaL_pushcdata(L, CTID_STRUCT_TXN_SAVEPOINT_REF) = sp;
+	return 1;
+}
+
+static const struct luaL_Reg lbox_txn_lib[] = {
+	{ "begin",			lbox_txn_begin			},
+	{ "is_in_txn",			lbox_txn_is_in_txn		},
+	{ "txn_id",			lbox_txn_id			},
+	{ "savepoint",			lbox_txn_savepoint		},
+	{ "rollback_to_savepoint",	lbox_txn_rollback_to_savepoint	},
+	{ NULL,				NULL				}
+};
+
+void
+box_lua_txn_init(struct lua_State *L)
+{
+	luaL_cdef(L, "struct txn_savepoint;");
+	CTID_STRUCT_TXN_SAVEPOINT_REF = luaL_ctypeid(L, "struct txn_savepoint&");
+
+	luaL_register_module(L, "box.txn", lbox_txn_lib);
+	lua_pop(L, 1);
+}
diff --git a/src/box/lua/txn.h b/src/box/lua/txn.h
new file mode 100644
index 000000000..d9b643961
--- /dev/null
+++ b/src/box/lua/txn.h
@@ -0,0 +1,49 @@
+#ifndef INCLUDES_TARANTOOL_LUA_TXN_H
+#define INCLUDES_TARANTOOL_LUA_TXN_H
+
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+struct lua_State;
+
+void
+box_lua_txn_init(struct lua_State *L);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* INCLUDES_TARANTOOL_LUA_TXN_H */
+
diff --git a/src/box/lua/txn.lua b/src/box/lua/txn.lua
new file mode 100644
index 000000000..15e5290d8
--- /dev/null
+++ b/src/box/lua/txn.lua
@@ -0,0 +1,53 @@
+-- txn.lua (internal file)
+-- luacheck: ignore box
+
+box.begin = function()
+    local res, err = box.txn.begin()
+    if res == nil then
+        error(err)
+    end
+end
+
+box.is_in_txn = function()
+    return box.txn.is_in_txn()
+end
+
+box.savepoint = function()
+    local res, err = box.txn.savepoint()
+    if res == nil then
+        error(err)
+    end
+    return { csavepoint = res, txn_id = box.txn.txn_id() }
+end
+
+box.rollback_to_savepoint = function(savepoint)
+    if savepoint == nil then
+        error("Usage: txn:rollback to savepoint(savepoint)")
+    end
+
+    if savepoint.txn_id ~= box.txn.txn_id() then
+        box.error(box.error.NO_SUCH_SAVEPOINT)
+    end
+
+    local res, err = box.txn.rollback_to_savepoint(savepoint.csavepoint)
+    if res == nil then
+            error(err)
+    end
+end
+
+local function atomic_tail(status, ...)
+    if not status then
+        box.rollback()
+        error((...), 2)
+     end
+     box.commit()
+     return ...
+end
+
+box.atomic = function(fun, ...)
+    box.begin()
+    return atomic_tail(pcall(fun, ...))
+end
+
+-- box.commit yields, so it's defined as Lua/C binding
+-- box.rollback yields as well
diff --git a/src/box/txn.c b/src/box/txn.c
index 963ec8eeb..54a0436ad 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -33,6 +33,7 @@
 #include "tuple.h"
 #include "journal.h"
 #include <fiber.h>
+#include <lauxlib.h>
 #include "xrow.h"
 
 double too_long_threshold;
@@ -877,3 +878,4 @@ txn_on_yield(struct trigger *trigger, void *event)
 	txn_set_flag(txn, TXN_IS_ABORTED_BY_YIELD);
 	return 0;
 }
+
diff --git a/test/box/misc.result b/test/box/misc.result
index d2a20307a..3eb27fbdc 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -86,6 +86,7 @@ t
   - space
   - stat
   - tuple
+  - txn
 ...
 t = nil
 ---
diff --git a/test/engine/savepoint.result b/test/engine/savepoint.result
index 86a2d0be2..501ba80c2 100644
--- a/test/engine/savepoint.result
+++ b/test/engine/savepoint.result
@@ -18,7 +18,7 @@ s1 = box.savepoint()
 ...
 box.rollback_to_savepoint(s1)
 ---
-- error: 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- error: 'builtin/box/txn.lua: Usage: txn:rollback to savepoint(savepoint)'
 ...
 box.begin() s1 = box.savepoint()
 ---
@@ -327,27 +327,27 @@ test_run:cmd("setopt delimiter ''");
 ok1, errmsg1
 ---
 - false
-- 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- 'builtin/box/txn.lua: Usage: txn:rollback to savepoint(savepoint)'
 ...
 ok2, errmsg2
 ---
 - false
-- 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- 'builtin/box/txn.lua: Usage: txn:rollback to savepoint(savepoint)'
 ...
 ok3, errmsg3
 ---
 - false
-- 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- 'builtin/box/txn.lua: Usage: txn:rollback to savepoint(savepoint)'
 ...
 ok4, errmsg4
 ---
 - false
-- 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- 'builtin/box/txn.lua: Usage: txn:rollback to savepoint(savepoint)'
 ...
 ok5, errmsg5
 ---
 - false
-- 'builtin/box/schema.lua: Usage: box.rollback_to_savepoint(savepoint)'
+- 'Can not rollback to savepoint: the savepoint does not exist'
 ...
 s:select{}
 ---
-- 
2.17.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-26 13:13 [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI) Leonid
@ 2019-11-26 21:05 ` Konstantin Osipov
  2019-11-26 21:17   ` Alexander Turenko
  2019-12-11 22:21 ` Alexander Turenko
  1 sibling, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-26 21:05 UTC (permalink / raw)
  To: Leonid; +Cc: tarantool-patches

* Leonid <lvasiliev@tarantool.org> [19/11/26 16:17]:

> https://github.com/tarantool/tarantool/issues/4427

> https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api

Please add a test case. And an explanation of how the fix solves
the issue. The patch is an overkill - the trace is going through
box_txn_rollback_to_savepoint, so moving it off ffi to C api
should be sufficient.

Taking time to set up your mail agent so that your real name is
present would be nice, too.


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-26 21:05 ` Konstantin Osipov
@ 2019-11-26 21:17   ` Alexander Turenko
  2019-11-27  8:31     ` Konstantin Osipov
  0 siblings, 1 reply; 24+ messages in thread
From: Alexander Turenko @ 2019-11-26 21:17 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Wed, Nov 27, 2019 at 12:05:20AM +0300, Konstantin Osipov wrote:
> * Leonid <lvasiliev@tarantool.org> [19/11/26 16:17]:
> 
> > https://github.com/tarantool/tarantool/issues/4427
> 
> > https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
> 
> Please add a test case. And an explanation of how the fix solves
> the issue. The patch is an overkill - the trace is going through
> box_txn_rollback_to_savepoint, so moving it off ffi to C api
> should be sufficient.

I would add a bit more context here. The original patch was made by
Sergey O. and I asked for extracting all related functions into its own
'module'. See links below.

https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/006747.html
https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/000734.html

The moving of just one function should be sufficient, you're right.
However it is not much more work then extracting all those related
function into tnx.{c,h,lua}. So I think it worth to do just here.

Are you agree that box/lua/txn.* is more proper place for those
functions then box/lua/schema.lua?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-26 21:17   ` Alexander Turenko
@ 2019-11-27  8:31     ` Konstantin Osipov
  2019-11-28  8:10       ` Leonid Vasiliev
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-27  8:31 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches

* Alexander Turenko <alexander.turenko@tarantool.org> [19/11/27 00:17]:
> > > https://github.com/tarantool/tarantool/issues/4427
> > 
> > > https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
> > 
> > Please add a test case. And an explanation of how the fix solves
> > the issue. The patch is an overkill - the trace is going through
> > box_txn_rollback_to_savepoint, so moving it off ffi to C api
> > should be sufficient.
> 
> I would add a bit more context here. The original patch was made by
> Sergey O. and I asked for extracting all related functions into its own
> 'module'. See links below.
> 
> https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/006747.html
> https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/000734.html
> 
> The moving of just one function should be sufficient, you're right.
> However it is not much more work then extracting all those related
> function into tnx.{c,h,lua}. So I think it worth to do just here.
> 
> Are you agree that box/lua/txn.* is more proper place for those
> functions then box/lua/schema.lua?

The move also converts these functions from ffi to plain C. 
This is what I am objecting against.

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-27  8:31     ` Konstantin Osipov
@ 2019-11-28  8:10       ` Leonid Vasiliev
  2019-11-28 12:34         ` Konstantin Osipov
  0 siblings, 1 reply; 24+ messages in thread
From: Leonid Vasiliev @ 2019-11-28  8:10 UTC (permalink / raw)
  To: Konstantin Osipov, Alexander Turenko, tarantool-patches

On 11/27/19 11:31 AM, Konstantin Osipov wrote:
> * Alexander Turenko <alexander.turenko@tarantool.org> [19/11/27 00:17]:
>>>> https://github.com/tarantool/tarantool/issues/4427
>>>
>>>> https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
>>>
>>> Please add a test case. And an explanation of how the fix solves
>>> the issue. The patch is an overkill - the trace is going through
>>> box_txn_rollback_to_savepoint, so moving it off ffi to C api
>>> should be sufficient.
>>
>> I would add a bit more context here. The original patch was made by
>> Sergey O. and I asked for extracting all related functions into its own
>> 'module'. See links below.
>>
>> https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/006747.html
>> https://lists.tarantool.org/pipermail/tarantool-patches/2019-September/000734.html
>>
>> The moving of just one function should be sufficient, you're right.
>> However it is not much more work then extracting all those related
>> function into tnx.{c,h,lua}. So I think it worth to do just here.
>>
>> Are you agree that box/lua/txn.* is more proper place for those
>> functions then box/lua/schema.lua?
> 
> The move also converts these functions from ffi to plain C.
> This is what I am objecting against.
> 

Hi, thanks for your review.
1) About a test case - the bug has been detected on already exist test.
2) The problem was caused by sandwich stack Lua-FFI-> C-API-> Lua (a 
term of Vyacheslav Egorov). So, no FFI - no problem. We should not use 
FFI in box_txn_rollback_to_savepoint.
3) Alexandr point: Move all txn stuff from schema to a separate module, 
IMHO it's a good idea. Why not?
4) About converts others txn functions from FFI to C-API:
I think it's a good practice, use one or the other (FFI or C-API) in module

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28  8:10       ` Leonid Vasiliev
@ 2019-11-28 12:34         ` Konstantin Osipov
  2019-11-28 13:00           ` Igor Munkin
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-28 12:34 UTC (permalink / raw)
  To: Leonid Vasiliev; +Cc: tarantool-patches

* Leonid Vasiliev <lvasiliev@tarantool.org> [19/11/28 11:10]:
> 4) About converts others txn functions from FFI to C-API:
> I think it's a good practice, use one or the other (FFI or C-API) in module

My complaint is about this part. Jit trace can go through FFI but
can't go through Lua/C. This is why many of these functions were
in FFI in the first place. 

We could make a conscious choice to make all box API Lua/C - but
this will literally kill JIT, so then why not just move to
plain Lua 5.3 and forget about grievances with LuaJIT altogether.

Nick Zavaritsky had a patch that would detect sandwich stacks in
runtime and assert. Nobody had time to look at it back then -
everyone was busy with vinyl and sql.

Why not dig it up to protect from future erosion of the code base? 

This would be more valuable contribution than just falling back to
Lua/C for everything.

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 12:34         ` Konstantin Osipov
@ 2019-11-28 13:00           ` Igor Munkin
  2019-11-28 13:18             ` Konstantin Osipov
  0 siblings, 1 reply; 24+ messages in thread
From: Igor Munkin @ 2019-11-28 13:00 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 28.11.19, Konstantin Osipov wrote:
> * Leonid Vasiliev <lvasiliev@tarantool.org> [19/11/28 11:10]:
> > 4) About converts others txn functions from FFI to C-API:
> > I think it's a good practice, use one or the other (FFI or C-API) in module
> 
> My complaint is about this part. Jit trace can go through FFI but
> can't go through Lua/C. This is why many of these functions were
> in FFI in the first place. 
> 
> We could make a conscious choice to make all box API Lua/C - but
> this will literally kill JIT, so then why not just move to

LuaJIT v2.1 provides trace stitching feature (for more info see
here[1]), so strictly saying, it doesn't kill JIT but yes, performance
is nerfed comparing to traces recorded for an FFI code. I have no
proofs/benchmarks for now, so it sounds kinda bullshit, but I look
forward to make some in the nearest future.

Furthermore, FFI is not a silver bullet considering this issue[2].

> plain Lua 5.3 and forget about grievances with LuaJIT altogether.

JIT is not the only killing feature provided by LuaJIT and
infrastructure for Lua 5.1 is much richer.

> 
> Nick Zavaritsky had a patch that would detect sandwich stacks in
> runtime and assert. Nobody had time to look at it back then -

Could you please provide the issue/link for this changeset, I'll take a
look on it with pleasure.

> everyone was busy with vinyl and sql.
> 
> Why not dig it up to protect from future erosion of the code base? 
> 
> This would be more valuable contribution than just falling back to
> Lua/C for everything.

I have a tiny patch for JIT in my branch[3], respecting most remarks
given by Mr Egorov, however it's not yet fully tested and there're
still some pitfalls not being fixed. I'm working on it for now.

> 
> -- 
> Konstantin Osipov, Moscow, Russia

[1]: http://wiki.luajit.org/NYI
[2]: https://github.com/tarantool/tarantool/issues/4630
[3]: https://github.com/tarantool/luajit/commit/a3a47015842c7e7c1bee2f4fc30345aa7d4e5dba

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 13:00           ` Igor Munkin
@ 2019-11-28 13:18             ` Konstantin Osipov
  2019-11-28 14:03               ` Igor Munkin
  2020-03-20 18:48               ` Igor Munkin
  0 siblings, 2 replies; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-28 13:18 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

* Igor Munkin <imun@tarantool.org> [19/11/28 16:03]:
> LuaJIT v2.1 provides trace stitching feature (for more info see
> here[1]), so strictly saying, it doesn't kill JIT but yes, performance
> is nerfed comparing to traces recorded for an FFI code. I have no
> proofs/benchmarks for now, so it sounds kinda bullshit, but I look
> forward to make some in the nearest future.
> 
> Furthermore, FFI is not a silver bullet considering this issue[2].

I fully agree on all points, there is some buggy trace stitching,
there is an overhead of switching to Lua/C and back, 
(FFI is slower in Lua/C partly for this reason), and
FFI is not a silver bullet.

Despite all of the above we should be aiming at using FFI more,
not less, going forward, don't you agree?

What should be the rule of thumb in your opinion, ffi or
lua/c? 

> > plain Lua 5.3 and forget about grievances with LuaJIT altogether.
> 
> JIT is not the only killing feature provided by LuaJIT and
> infrastructure for Lua 5.1 is much richer.
> 
> > 
> > Nick Zavaritsky had a patch that would detect sandwich stacks in
> > runtime and assert. Nobody had time to look at it back then -
> 
> Could you please provide the issue/link for this changeset, I'll take a
> look on it with pleasure.

Nick Zavaritsky himself is the only link here, unfortunately.
Perhaps he has this patch in one of his personal branches. I
suggest someone contacts him :)

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 13:18             ` Konstantin Osipov
@ 2019-11-28 14:03               ` Igor Munkin
  2019-11-28 15:58                 ` Konstantin Osipov
  2020-03-20 18:48               ` Igor Munkin
  1 sibling, 1 reply; 24+ messages in thread
From: Igor Munkin @ 2019-11-28 14:03 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 28.11.19, Konstantin Osipov wrote:
> * Igor Munkin <imun@tarantool.org> [19/11/28 16:03]:
> > LuaJIT v2.1 provides trace stitching feature (for more info see
> > here[1]), so strictly saying, it doesn't kill JIT but yes, performance
> > is nerfed comparing to traces recorded for an FFI code. I have no
> > proofs/benchmarks for now, so it sounds kinda bullshit, but I look
> > forward to make some in the nearest future.
> > 
> > Furthermore, FFI is not a silver bullet considering this issue[2].
> 
> I fully agree on all points, there is some buggy trace stitching,
> there is an overhead of switching to Lua/C and back, 
> (FFI is slower in Lua/C partly for this reason), and
> FFI is not a silver bullet.

We faced several platform failures recently in FFI machinery, thus I
can't claim which one LuaJIT part from these two is much more buggy.

> 
> Despite all of the above we should be aiming at using FFI more,
> not less, going forward, don't you agree?
> 

Why should we be aiming at using FFI more? The root cause is that
current fiber machinery (as well as some parts of triggers mechanism)
doesn't respect the Lua coroutine switch semantics, thereby breaking
trace recording. Lua-C API implicitly (or non-intentionally) prevents
breakage by JIT trace aborts when recording FUNCC.

Therefore, I guess we should be aiming either at changing fiber
switching to the one respecting the LuaJIT runtime or at tuning JIT
compiler way more regarding the Lua-C usage.

Besides, we can't fully prevent platform failures if there is an FFI
misusage in users code.

> What should be the rule of thumb in your opinion, ffi or
> lua/c? 

If you want to know my rule of thumb: FFI is for external existing
libraries to be used in Lua code (and all compiler related benefits are
nothing more than a godsend consequence, since all guest stack
manipulations are implemented in LuaJIT runtime, not in an external
code) and Lua-C is a well-designed and well-documented API for embedding
Lua into a host application / extending Lua with external low-lewel
libs. I totally do not insist on my point of view, since everyone has
it's own vision on LuaJIT features.

> 
> > > plain Lua 5.3 and forget about grievances with LuaJIT altogether.
> > 
> > JIT is not the only killing feature provided by LuaJIT and
> > infrastructure for Lua 5.1 is much richer.
> > 
> > > 
> > > Nick Zavaritsky had a patch that would detect sandwich stacks in
> > > runtime and assert. Nobody had time to look at it back then -
> > 
> > Could you please provide the issue/link for this changeset, I'll take a
> > look on it with pleasure.
> 
> Nick Zavaritsky himself is the only link here, unfortunately.
> Perhaps he has this patch in one of his personal branches. I
> suggest someone contacts him :)
> 
> -- 
> Konstantin Osipov, Moscow, Russia

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 14:03               ` Igor Munkin
@ 2019-11-28 15:58                 ` Konstantin Osipov
  2019-11-28 18:36                   ` Igor Munkin
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-28 15:58 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

* Igor Munkin <imun@tarantool.org> [19/11/28 17:08]:

> Why should we be aiming at using FFI more? The root cause is that
> current fiber machinery (as well as some parts of triggers mechanism)
> doesn't respect the Lua coroutine switch semantics, thereby breaking
> trace recording. Lua-C API implicitly (or non-intentionally) prevents
> breakage by JIT trace aborts when recording FUNCC.

It's not correct. The current FFI functions were carefully crafted
to never lead to sandwich code: only those functions which can not
trigger a return to Lua were implemented as FFI. 
There was one regression between 1.10 and in 2.3 because we
started firing rollback triggers when rolling back to a savepoint, 
which was spotted by a failing tests.

One more time: When FFI bindings were written we were aware of NYI
and took it into account.

> Therefore, I guess we should be aiming either at changing fiber
> switching to the one respecting the LuaJIT runtime or at tuning JIT
> compiler way more regarding the Lua-C usage.

This is actually quite simple - we could easily call a LuaJIT hook
whenever switching a fiber, to make sure that it carefully
switches the internals as well. Mike Pall refused to cooperate on
the matter, but now we (you) control our own destiny.

> Besides, we can't fully prevent platform failures if there is an FFI
> misusage in users code.

Tarantool has never been claiming that it prevents people from
shooting themselves in the foot. Performance is the ultimate
design goal, at the cost of safety at times.


> > What should be the rule of thumb in your opinion, ffi or
> > lua/c? 
> 
> If you want to know my rule of thumb: FFI is for external existing
> libraries to be used in Lua code (and all compiler related benefits are
> nothing more than a godsend consequence, since all guest stack
> manipulations are implemented in LuaJIT runtime, not in an external
> code) and Lua-C is a well-designed and well-documented API for embedding
> Lua into a host application / extending Lua with external low-lewel
> libs. I totally do not insist on my point of view, since everyone has
> it's own vision on LuaJIT features.

OK, but there must be a single policy though. So far it was:
everything that doesn't yield and doesn't call back to Lua
uses FFI. Everything else *has* to use Lua/C API, UNTIL  there
is a way to safely sandwich FFI calls.

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 15:58                 ` Konstantin Osipov
@ 2019-11-28 18:36                   ` Igor Munkin
  2019-11-29  5:30                     ` Konstantin Osipov
                                       ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Igor Munkin @ 2019-11-28 18:36 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 28.11.19, Konstantin Osipov wrote:
> * Igor Munkin <imun@tarantool.org> [19/11/28 17:08]:
> 
> > Why should we be aiming at using FFI more? The root cause is that
> > current fiber machinery (as well as some parts of triggers mechanism)
> > doesn't respect the Lua coroutine switch semantics, thereby breaking
> > trace recording. Lua-C API implicitly (or non-intentionally) prevents
> > breakage by JIT trace aborts when recording FUNCC.
> 
> It's not correct. The current FFI functions were carefully crafted
> to never lead to sandwich code: only those functions which can not
> trigger a return to Lua were implemented as FFI. 
> There was one regression between 1.10 and in 2.3 because we
> started firing rollback triggers when rolling back to a savepoint, 
> which was spotted by a failing tests.
> 
> One more time: When FFI bindings were written we were aware of NYI
> and took it into account.
> 

OK, maybe I said it the wrong way using the word "non-intentionally". I
mean that Tarantool doesn't use any special handler to asynchroniously
abort recording, since there is no such outside the LuaJIT internals
(jit.off blacklists the function regardless the host and guest stacks
layout).

> > Therefore, I guess we should be aiming either at changing fiber
> > switching to the one respecting the LuaJIT runtime or at tuning JIT
> > compiler way more regarding the Lua-C usage.
> 
> This is actually quite simple - we could easily call a LuaJIT hook
> whenever switching a fiber, to make sure that it carefully
> switches the internals as well. Mike Pall refused to cooperate on
> the matter, but now we (you) control our own destiny.

Unfortunately, I haven't seen the thread where the subj is discussed
with Mike Pall, but the approach you proposed doesn't seem to be a
convenient one, however it still solves a problem (as does the move to
use Lua-C API for the code with possible Lua VM re-entrance underneath).

The major flaw I see in this solution, is introducing the dependency on
the JIT interface into Tarantool internals. There is already one
dependency on LuaJIT-2.1.0 presented with internal headers usage for
several hacks in utils.c. As a result we are not able to simply replace
the Lua implementation to try another one (e.g. uJIT conforming
LuaJIT-2.0.5) for comparing each other.

The best proposal we had with Kirill and Sergos is to finalize a trace
exactly at CALLXS IR, however after some research I found that the
snapshot to be replayed at the corresponding trace exit will restore the
guest stack it doesn't relate to. I hope to make further research this
direction, but it requires a way more time to adjust this behaviour and
its benefits are doubtful for me now.

For now, there is a partial fix I mentioned before, however it still
violates the flow I described here[1]. I'm going to proceed with the
research a bit later and provide another patch.

> 
> > Besides, we can't fully prevent platform failures if there is an FFI
> > misusage in users code.
> 
> Tarantool has never been claiming that it prevents people from

Sorry, I simply misread the following:
|> Why not dig it up to protect from future erosion of the code base?
|>
|> This would be more valuable contribution than just falling back to
|> Lua/C for everything.

> shooting themselves in the foot. Performance is the ultimate
> design goal, at the cost of safety at times.
> 

Great, we discussed with Leonid and Sasha offline and agreed to make
several benchmarks to be provided in this thread. With no benchmarks all
our estimates can be simply wrong.

> 
> > > What should be the rule of thumb in your opinion, ffi or
> > > lua/c? 
> > 
> > If you want to know my rule of thumb: FFI is for external existing
> > libraries to be used in Lua code (and all compiler related benefits are
> > nothing more than a godsend consequence, since all guest stack
> > manipulations are implemented in LuaJIT runtime, not in an external
> > code) and Lua-C is a well-designed and well-documented API for embedding
> > Lua into a host application / extending Lua with external low-lewel
> > libs. I totally do not insist on my point of view, since everyone has
> > it's own vision on LuaJIT features.
> 
> OK, but there must be a single policy though. So far it was:
> everything that doesn't yield and doesn't call back to Lua
> uses FFI. Everything else *has* to use Lua/C API, UNTIL  there
> is a way to safely sandwich FFI calls.
> 

I agree with you for the policy existence, but we all see the one you
mentioned above can introduce bugs leading to a platform failures. So I
guess we should reconsider it or simply dump somewhere. I think we have
to make some benchmarks and provide not only stats, but also a
reproducer with the input data, otherwise JIT tests are IMHO irrelevant.

> -- 
> Konstantin Osipov, Moscow, Russia

[1]: https://github.com/tarantool/tarantool/issues/4427#issuecomment-546056302

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 18:36                   ` Igor Munkin
@ 2019-11-29  5:30                     ` Konstantin Osipov
  2019-11-29 17:43                       ` Igor Munkin
  2019-11-29  5:41                     ` Konstantin Osipov
  2019-12-04 13:05                     ` Leonid Vasiliev
  2 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-29  5:30 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

* Igor Munkin <imun@tarantool.org> [19/11/28 21:39]:
> > This is actually quite simple - we could easily call a LuaJIT hook
> > whenever switching a fiber, to make sure that it carefully
> > switches the internals as well. Mike Pall refused to cooperate on
> > the matter, but now we (you) control our own destiny.
> 
> Unfortunately, I haven't seen the thread where the subj is discussed
> with Mike Pall, but the approach you proposed doesn't seem to be a
> convenient one, however it still solves a problem (as does the move to
> use Lua-C API for the code with possible Lua VM re-entrance underneath).
> 
> The major flaw I see in this solution, is introducing the dependency on
> the JIT interface into Tarantool internals. There is already one
> dependency on LuaJIT-2.1.0 presented with internal headers usage for
> several hacks in utils.c. As a result we are not able to simply replace
> the Lua implementation to try another one (e.g. uJIT conforming
> LuaJIT-2.0.5) for comparing each other.

Well, this is not exactly a flaw of the solution, then: it would
have been a flaw if we had a choice - whether to introduce a
dependency or not. The dependency is already there, with its
costs.
When we introduced the dependency it was not an arbitrary decision
either: the strategy has always been that Tarantool will become
bigger & stronger and will have its own JIT team, so we will be
able to sustain the costs of tight coupling. This strategy has
partly materialized with you and Sergos on board of MRG team. 

Now it's a matter of you guys being bold enough to take one step
further and integrate LuaJIT with tarantool fibers. 

I know of no good reason why yields break traces, and if they stop
doing that, we can switch everything to FFI. 

This is of course outside the scope of this patch, but is related
to the policy for FFI vs Lua/C.


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 18:36                   ` Igor Munkin
  2019-11-29  5:30                     ` Konstantin Osipov
@ 2019-11-29  5:41                     ` Konstantin Osipov
  2019-11-29 17:37                       ` Igor Munkin
  2019-12-04 13:05                     ` Leonid Vasiliev
  2 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-11-29  5:41 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

* Igor Munkin <imun@tarantool.org> [19/11/28 21:39]:

> Great, we discussed with Leonid and Sasha offline and agreed to make
> several benchmarks to be provided in this thread. With no benchmarks all
> our estimates can be simply wrong.

For benchmarks, it is always good to refresh them but since you've
been working with LuaJIT for a while you must know some of 
the conclusions already:

* JITed code can be 5-8x faster than Lua + Lua/C
* FFI is 1.5-2x slower than Lua/C in non-JIT mode. So using FFI
  is an extra cost if JIT is OFF.
* The way traces are collected, cached and stored has its own
  overhead, so JIT ON can be harmful to performance if Lua code
  base gets big.

In practice, for Tarantool apps, I found all of the above to not
have much impact. The #1 cause of performance slowdown in
Tarantool apps is boxing/unboxing Tarantool tuples and garbage
collection.

So to measure the real impact of the change, micro-benchmarking
is insufficient. One has to use run e.g. vshard or data
grid or other complex app stress test with wal_mode=off to 
see the effect of such change.

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-29  5:41                     ` Konstantin Osipov
@ 2019-11-29 17:37                       ` Igor Munkin
  0 siblings, 0 replies; 24+ messages in thread
From: Igor Munkin @ 2019-11-29 17:37 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 29.11.19, Konstantin Osipov wrote:
> * Igor Munkin <imun@tarantool.org> [19/11/28 21:39]:
> 
> > Great, we discussed with Leonid and Sasha offline and agreed to make
> > several benchmarks to be provided in this thread. With no benchmarks all
> > our estimates can be simply wrong.
> 
> For benchmarks, it is always good to refresh them but since you've
> been working with LuaJIT for a while you must know some of 
> the conclusions already:
> 
> * JITed code can be 5-8x faster than Lua + Lua/C
> * FFI is 1.5-2x slower than Lua/C in non-JIT mode. So using FFI
>   is an extra cost if JIT is OFF.
> * The way traces are collected, cached and stored has its own
>   overhead, so JIT ON can be harmful to performance if Lua code
>   base gets big.

Additionally, enabling JIT can nerf performance on a heterogeneous data
considering the guarded *LOADs.

I can confirm nothing for the numbers you've written above. There are
several benchmarks for LuaJIT itself regarding a different workload. My
point was about benchmarks for LuaJIT within Tarantool, since we are
going to tune it for Tarantool environment primarily.

> 
> In practice, for Tarantool apps, I found all of the above to not
> have much impact. The #1 cause of performance slowdown in
> Tarantool apps is boxing/unboxing Tarantool tuples and garbage
> collection.
> 
> So to measure the real impact of the change, micro-benchmarking
> is insufficient. One has to use run e.g. vshard or data
> grid or other complex app stress test with wal_mode=off to 
> see the effect of such change.
> 
> -- 
> Konstantin Osipov, Moscow, Russia

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-29  5:30                     ` Konstantin Osipov
@ 2019-11-29 17:43                       ` Igor Munkin
  0 siblings, 0 replies; 24+ messages in thread
From: Igor Munkin @ 2019-11-29 17:43 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 29.11.19, Konstantin Osipov wrote:
> * Igor Munkin <imun@tarantool.org> [19/11/28 21:39]:
> > > This is actually quite simple - we could easily call a LuaJIT hook
> > > whenever switching a fiber, to make sure that it carefully
> > > switches the internals as well. Mike Pall refused to cooperate on
> > > the matter, but now we (you) control our own destiny.
> > 
> > Unfortunately, I haven't seen the thread where the subj is discussed
> > with Mike Pall, but the approach you proposed doesn't seem to be a
> > convenient one, however it still solves a problem (as does the move to
> > use Lua-C API for the code with possible Lua VM re-entrance underneath).
> > 
> > The major flaw I see in this solution, is introducing the dependency on
> > the JIT interface into Tarantool internals. There is already one
> > dependency on LuaJIT-2.1.0 presented with internal headers usage for
> > several hacks in utils.c. As a result we are not able to simply replace
> > the Lua implementation to try another one (e.g. uJIT conforming
> > LuaJIT-2.0.5) for comparing each other.
> 
> Well, this is not exactly a flaw of the solution, then: it would
> have been a flaw if we had a choice - whether to introduce a
> dependency or not. The dependency is already there, with its
> costs.

I hope to reduce it in the nearest future, since there're lots of
valuable Lua implementations to be tried for Tarantool.

> When we introduced the dependency it was not an arbitrary decision
> either: the strategy has always been that Tarantool will become
> bigger & stronger and will have its own JIT team, so we will be
> able to sustain the costs of tight coupling. This strategy has
> partly materialized with you and Sergos on board of MRG team. 
> 
> Now it's a matter of you guys being bold enough to take one step
> further and integrate LuaJIT with tarantool fibers. 

I really appreciate your words and we will try our best for the product
enhancements. As I mentioned before, we already have some research
aiming to detect FFI sandwiches and prevent the platform failures and
we are definitely going to proceed with it.

> 
> I know of no good reason why yields break traces, and if they stop
> doing that, we can switch everything to FFI. 

I can dump some results to tarantool-discussions on demand.

> 
> This is of course outside the scope of this patch, but is related
> to the policy for FFI vs Lua/C.

Yes, and we should come to a sole one indeed. I guess it will be more
convenient to move our conversation thread to tarantool-discussions and
stop spoiling the review list.

> 
> 
> -- 
> Konstantin Osipov, Moscow, Russia

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 18:36                   ` Igor Munkin
  2019-11-29  5:30                     ` Konstantin Osipov
  2019-11-29  5:41                     ` Konstantin Osipov
@ 2019-12-04 13:05                     ` Leonid Vasiliev
  2019-12-04 13:15                       ` Konstantin Osipov
  2 siblings, 1 reply; 24+ messages in thread
From: Leonid Vasiliev @ 2019-12-04 13:05 UTC (permalink / raw)
  To: Igor Munkin, Konstantin Osipov; +Cc: tarantool-patches


So, several benchmarks (sergos/rollback_to_savepoint - 
lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api):

BM1:

local os = require('os')
local clock = require('clock')

box.cfg()

local s = box.schema.space.create('test')
s:create_index('primary')


local start_clock = clock.monotonic()

for i = 1,1000000 do
     box.begin()
     s:replace{1}
     box.is_in_txn()
     local save1 = box.savepoint()
     s:replace{2}
     box.is_in_txn()
     local save2 = box.savepoint()
     s:replace{3}
     box.is_in_txn()
     local save3 = box.savepoint()
     s:replace{4}
     box.is_in_txn()
     box.rollback_to_savepoint(save3)
     box.is_in_txn()
     box.rollback_to_savepoint(save2)
     box.is_in_txn()
     box.rollback_to_savepoint(save1)
     box.is_in_txn()
     box.commit()
end

local stop_clock = clock.monotonic()
local work_time = stop_clock - start_clock
print("Result: " .. tostring(work_time))

s:drop()
os.exit()


BM2:

local os = require('os')
local clock = require('clock')

box.cfg()

local s = box.schema.space.create('test')
s:create_index('primary')


local start_clock = clock.monotonic()

for i = 1,1000000 do
     box.is_in_txn()
end

local stop_clock = clock.monotonic()
local work_time = stop_clock - start_clock
print("Result: " .. tostring(work_time))

s:drop()
os.exit()


BM3:

local os = require('os')
local clock = require('clock')

box.cfg()

local s = box.schema.space.create('test')
s:create_index('primary')


local start_clock = clock.monotonic()

for i = 1,1000000 do
     box.begin()
     box.commit()
end

local stop_clock = clock.monotonic()
local work_time = stop_clock - start_clock
print("Result: " .. tostring(work_time))

s:drop()
os.exit()


BM4:

local os = require('os')
local clock = require('clock')

box.cfg()

local s = box.schema.space.create('test')
s:create_index('primary')


local start_clock = clock.monotonic()

for i = 1,1000000 do
     box.begin()
     s:replace{1}
     local save1 = box.savepoint()
     s:replace{2}
     box.rollback_to_savepoint(save1)
     box.commit()
end

local stop_clock = clock.monotonic()
local work_time = stop_clock - start_clock
print("Result: " .. tostring(work_time))

s:drop()
os.exit()


Results:

BM	Sergo commit(s)	Leonid commit(s)	Result
bm1	26.7092		26.7897			Sergo commit win 0.3%
bm2	0.0023		0.0195			Sergo commit win x 8.5
bm3	0.1281		0.1502			Sergo commit win 17.2%
bm4	19.6567		19.8466			Sergo commit win 0.96%


On 11/28/19 9:36 PM, Igor Munkin wrote:
> Kostja,
> 
> On 28.11.19, Konstantin Osipov wrote:
>> * Igor Munkin <imun@tarantool.org> [19/11/28 17:08]:
>>
>>> Why should we be aiming at using FFI more? The root cause is that
>>> current fiber machinery (as well as some parts of triggers mechanism)
>>> doesn't respect the Lua coroutine switch semantics, thereby breaking
>>> trace recording. Lua-C API implicitly (or non-intentionally) prevents
>>> breakage by JIT trace aborts when recording FUNCC.
>>
>> It's not correct. The current FFI functions were carefully crafted
>> to never lead to sandwich code: only those functions which can not
>> trigger a return to Lua were implemented as FFI.
>> There was one regression between 1.10 and in 2.3 because we
>> started firing rollback triggers when rolling back to a savepoint,
>> which was spotted by a failing tests.
>>
>> One more time: When FFI bindings were written we were aware of NYI
>> and took it into account.
>>
> 
> OK, maybe I said it the wrong way using the word "non-intentionally". I
> mean that Tarantool doesn't use any special handler to asynchroniously
> abort recording, since there is no such outside the LuaJIT internals
> (jit.off blacklists the function regardless the host and guest stacks
> layout).
> 
>>> Therefore, I guess we should be aiming either at changing fiber
>>> switching to the one respecting the LuaJIT runtime or at tuning JIT
>>> compiler way more regarding the Lua-C usage.
>>
>> This is actually quite simple - we could easily call a LuaJIT hook
>> whenever switching a fiber, to make sure that it carefully
>> switches the internals as well. Mike Pall refused to cooperate on
>> the matter, but now we (you) control our own destiny.
> 
> Unfortunately, I haven't seen the thread where the subj is discussed
> with Mike Pall, but the approach you proposed doesn't seem to be a
> convenient one, however it still solves a problem (as does the move to
> use Lua-C API for the code with possible Lua VM re-entrance underneath).
> 
> The major flaw I see in this solution, is introducing the dependency on
> the JIT interface into Tarantool internals. There is already one
> dependency on LuaJIT-2.1.0 presented with internal headers usage for
> several hacks in utils.c. As a result we are not able to simply replace
> the Lua implementation to try another one (e.g. uJIT conforming
> LuaJIT-2.0.5) for comparing each other.
> 
> The best proposal we had with Kirill and Sergos is to finalize a trace
> exactly at CALLXS IR, however after some research I found that the
> snapshot to be replayed at the corresponding trace exit will restore the
> guest stack it doesn't relate to. I hope to make further research this
> direction, but it requires a way more time to adjust this behaviour and
> its benefits are doubtful for me now.
> 
> For now, there is a partial fix I mentioned before, however it still
> violates the flow I described here[1]. I'm going to proceed with the
> research a bit later and provide another patch.
> 
>>
>>> Besides, we can't fully prevent platform failures if there is an FFI
>>> misusage in users code.
>>
>> Tarantool has never been claiming that it prevents people from
> 
> Sorry, I simply misread the following:
> |> Why not dig it up to protect from future erosion of the code base?
> |>
> |> This would be more valuable contribution than just falling back to
> |> Lua/C for everything.
> 
>> shooting themselves in the foot. Performance is the ultimate
>> design goal, at the cost of safety at times.
>>
> 
> Great, we discussed with Leonid and Sasha offline and agreed to make
> several benchmarks to be provided in this thread. With no benchmarks all
> our estimates can be simply wrong.
> 
>>
>>>> What should be the rule of thumb in your opinion, ffi or
>>>> lua/c?
>>>
>>> If you want to know my rule of thumb: FFI is for external existing
>>> libraries to be used in Lua code (and all compiler related benefits are
>>> nothing more than a godsend consequence, since all guest stack
>>> manipulations are implemented in LuaJIT runtime, not in an external
>>> code) and Lua-C is a well-designed and well-documented API for embedding
>>> Lua into a host application / extending Lua with external low-lewel
>>> libs. I totally do not insist on my point of view, since everyone has
>>> it's own vision on LuaJIT features.
>>
>> OK, but there must be a single policy though. So far it was:
>> everything that doesn't yield and doesn't call back to Lua
>> uses FFI. Everything else *has* to use Lua/C API, UNTIL  there
>> is a way to safely sandwich FFI calls.
>>
> 
> I agree with you for the policy existence, but we all see the one you
> mentioned above can introduce bugs leading to a platform failures. So I
> guess we should reconsider it or simply dump somewhere. I think we have
> to make some benchmarks and provide not only stats, but also a
> reproducer with the input data, otherwise JIT tests are IMHO irrelevant.
> 
>> -- 
>> Konstantin Osipov, Moscow, Russia
> 
> [1]: https://github.com/tarantool/tarantool/issues/4427#issuecomment-546056302
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-12-04 13:05                     ` Leonid Vasiliev
@ 2019-12-04 13:15                       ` Konstantin Osipov
  2019-12-05  8:27                         ` Leonid Vasiliev
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Osipov @ 2019-12-04 13:15 UTC (permalink / raw)
  To: Leonid Vasiliev; +Cc: tarantool-patches

* Leonid Vasiliev <lvasiliev@tarantool.org> [19/12/04 16:08]:

Leonid, thank you for the benchmark.

Could you please also provide 
- explanation for the results (not everyone may understand why the
  results are the way they are)
- your analysis of the results and suggestions for further
  actions?

> So, several benchmarks (sergos/rollback_to_savepoint -
> lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api):
> 
> BM1:
> 
> local os = require('os')
> local clock = require('clock')
> 
> box.cfg()
> 
> local s = box.schema.space.create('test')
> s:create_index('primary')
> 
> 
> local start_clock = clock.monotonic()
> 
> for i = 1,1000000 do
>     box.begin()
>     s:replace{1}
>     box.is_in_txn()
>     local save1 = box.savepoint()
>     s:replace{2}
>     box.is_in_txn()
>     local save2 = box.savepoint()
>     s:replace{3}
>     box.is_in_txn()
>     local save3 = box.savepoint()
>     s:replace{4}
>     box.is_in_txn()
>     box.rollback_to_savepoint(save3)
>     box.is_in_txn()
>     box.rollback_to_savepoint(save2)
>     box.is_in_txn()
>     box.rollback_to_savepoint(save1)
>     box.is_in_txn()
>     box.commit()
> end
> 
> local stop_clock = clock.monotonic()
> local work_time = stop_clock - start_clock
> print("Result: " .. tostring(work_time))
> 
> s:drop()
> os.exit()
> 
> 
> BM2:
> 
> local os = require('os')
> local clock = require('clock')
> 
> box.cfg()
> 
> local s = box.schema.space.create('test')
> s:create_index('primary')
> 
> 
> local start_clock = clock.monotonic()
> 
> for i = 1,1000000 do
>     box.is_in_txn()
> end
> 
> local stop_clock = clock.monotonic()
> local work_time = stop_clock - start_clock
> print("Result: " .. tostring(work_time))
> 
> s:drop()
> os.exit()
> 
> 
> BM3:
> 
> local os = require('os')
> local clock = require('clock')
> 
> box.cfg()
> 
> local s = box.schema.space.create('test')
> s:create_index('primary')
> 
> 
> local start_clock = clock.monotonic()
> 
> for i = 1,1000000 do
>     box.begin()
>     box.commit()
> end
> 
> local stop_clock = clock.monotonic()
> local work_time = stop_clock - start_clock
> print("Result: " .. tostring(work_time))
> 
> s:drop()
> os.exit()
> 
> 
> BM4:
> 
> local os = require('os')
> local clock = require('clock')
> 
> box.cfg()
> 
> local s = box.schema.space.create('test')
> s:create_index('primary')
> 
> 
> local start_clock = clock.monotonic()
> 
> for i = 1,1000000 do
>     box.begin()
>     s:replace{1}
>     local save1 = box.savepoint()
>     s:replace{2}
>     box.rollback_to_savepoint(save1)
>     box.commit()
> end
> 
> local stop_clock = clock.monotonic()
> local work_time = stop_clock - start_clock
> print("Result: " .. tostring(work_time))
> 
> s:drop()
> os.exit()
> 
> 
> Results:
> 
> BM	Sergo commit(s)	Leonid commit(s)	Result
> bm1	26.7092		26.7897			Sergo commit win 0.3%
> bm2	0.0023		0.0195			Sergo commit win x 8.5
> bm3	0.1281		0.1502			Sergo commit win 17.2%
> bm4	19.6567		19.8466			Sergo commit win 0.96%
> 
> 
> On 11/28/19 9:36 PM, Igor Munkin wrote:
> > Kostja,
> > 
> > On 28.11.19, Konstantin Osipov wrote:
> > > * Igor Munkin <imun@tarantool.org> [19/11/28 17:08]:
> > > 
> > > > Why should we be aiming at using FFI more? The root cause is that
> > > > current fiber machinery (as well as some parts of triggers mechanism)
> > > > doesn't respect the Lua coroutine switch semantics, thereby breaking
> > > > trace recording. Lua-C API implicitly (or non-intentionally) prevents
> > > > breakage by JIT trace aborts when recording FUNCC.
> > > 
> > > It's not correct. The current FFI functions were carefully crafted
> > > to never lead to sandwich code: only those functions which can not
> > > trigger a return to Lua were implemented as FFI.
> > > There was one regression between 1.10 and in 2.3 because we
> > > started firing rollback triggers when rolling back to a savepoint,
> > > which was spotted by a failing tests.
> > > 
> > > One more time: When FFI bindings were written we were aware of NYI
> > > and took it into account.
> > > 
> > 
> > OK, maybe I said it the wrong way using the word "non-intentionally". I
> > mean that Tarantool doesn't use any special handler to asynchroniously
> > abort recording, since there is no such outside the LuaJIT internals
> > (jit.off blacklists the function regardless the host and guest stacks
> > layout).
> > 
> > > > Therefore, I guess we should be aiming either at changing fiber
> > > > switching to the one respecting the LuaJIT runtime or at tuning JIT
> > > > compiler way more regarding the Lua-C usage.
> > > 
> > > This is actually quite simple - we could easily call a LuaJIT hook
> > > whenever switching a fiber, to make sure that it carefully
> > > switches the internals as well. Mike Pall refused to cooperate on
> > > the matter, but now we (you) control our own destiny.
> > 
> > Unfortunately, I haven't seen the thread where the subj is discussed
> > with Mike Pall, but the approach you proposed doesn't seem to be a
> > convenient one, however it still solves a problem (as does the move to
> > use Lua-C API for the code with possible Lua VM re-entrance underneath).
> > 
> > The major flaw I see in this solution, is introducing the dependency on
> > the JIT interface into Tarantool internals. There is already one
> > dependency on LuaJIT-2.1.0 presented with internal headers usage for
> > several hacks in utils.c. As a result we are not able to simply replace
> > the Lua implementation to try another one (e.g. uJIT conforming
> > LuaJIT-2.0.5) for comparing each other.
> > 
> > The best proposal we had with Kirill and Sergos is to finalize a trace
> > exactly at CALLXS IR, however after some research I found that the
> > snapshot to be replayed at the corresponding trace exit will restore the
> > guest stack it doesn't relate to. I hope to make further research this
> > direction, but it requires a way more time to adjust this behaviour and
> > its benefits are doubtful for me now.
> > 
> > For now, there is a partial fix I mentioned before, however it still
> > violates the flow I described here[1]. I'm going to proceed with the
> > research a bit later and provide another patch.
> > 
> > > 
> > > > Besides, we can't fully prevent platform failures if there is an FFI
> > > > misusage in users code.
> > > 
> > > Tarantool has never been claiming that it prevents people from
> > 
> > Sorry, I simply misread the following:
> > |> Why not dig it up to protect from future erosion of the code base?
> > |>
> > |> This would be more valuable contribution than just falling back to
> > |> Lua/C for everything.
> > 
> > > shooting themselves in the foot. Performance is the ultimate
> > > design goal, at the cost of safety at times.
> > > 
> > 
> > Great, we discussed with Leonid and Sasha offline and agreed to make
> > several benchmarks to be provided in this thread. With no benchmarks all
> > our estimates can be simply wrong.
> > 
> > > 
> > > > > What should be the rule of thumb in your opinion, ffi or
> > > > > lua/c?
> > > > 
> > > > If you want to know my rule of thumb: FFI is for external existing
> > > > libraries to be used in Lua code (and all compiler related benefits are
> > > > nothing more than a godsend consequence, since all guest stack
> > > > manipulations are implemented in LuaJIT runtime, not in an external
> > > > code) and Lua-C is a well-designed and well-documented API for embedding
> > > > Lua into a host application / extending Lua with external low-lewel
> > > > libs. I totally do not insist on my point of view, since everyone has
> > > > it's own vision on LuaJIT features.
> > > 
> > > OK, but there must be a single policy though. So far it was:
> > > everything that doesn't yield and doesn't call back to Lua
> > > uses FFI. Everything else *has* to use Lua/C API, UNTIL  there
> > > is a way to safely sandwich FFI calls.
> > > 
> > 
> > I agree with you for the policy existence, but we all see the one you
> > mentioned above can introduce bugs leading to a platform failures. So I
> > guess we should reconsider it or simply dump somewhere. I think we have
> > to make some benchmarks and provide not only stats, but also a
> > reproducer with the input data, otherwise JIT tests are IMHO irrelevant.
> > 
> > > -- 
> > > Konstantin Osipov, Moscow, Russia
> > 
> > [1]: https://github.com/tarantool/tarantool/issues/4427#issuecomment-546056302
> > 

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-12-04 13:15                       ` Konstantin Osipov
@ 2019-12-05  8:27                         ` Leonid Vasiliev
  0 siblings, 0 replies; 24+ messages in thread
From: Leonid Vasiliev @ 2019-12-05  8:27 UTC (permalink / raw)
  To: Konstantin Osipov, Igor Munkin, Alexander Turenko, tarantool-patches


On 12/4/19 4:15 PM, Konstantin Osipov wrote:
> * Leonid Vasiliev <lvasiliev@tarantool.org> [19/12/04 16:08]:
> 
> Leonid, thank you for the benchmark.
> 
> Could you please also provide
> - explanation for the results (not everyone may understand why the
>    results are the way they are)

Results explanation:
In case lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api: 
box.begin(), box.is_in_txn(), box.savepoint() use a C-API
In case sergos/rollback_to_savepoint:
box.begin(), box.is_in_txn(), box.savepoint() use a LuaJIT's FFI

> - your analysis of the results and suggestions for further
>    actions?

Look, I'll be honest with you.
I don't have enough competence in Lua performance stuff. As I understand 
- each of these methods has its pros and cons. My (and Igor if I have 
understood correctly) point of view: "We need a some policy of using FFI 
vs. C-API". And I try to force it.

> 
>> So, several benchmarks (sergos/rollback_to_savepoint -
>> lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api):
>>
>> BM1:
>>
>> local os = require('os')
>> local clock = require('clock')
>>
>> box.cfg()
>>
>> local s = box.schema.space.create('test')
>> s:create_index('primary')
>>
>>
>> local start_clock = clock.monotonic()
>>
>> for i = 1,1000000 do
>>      box.begin()
>>      s:replace{1}
>>      box.is_in_txn()
>>      local save1 = box.savepoint()
>>      s:replace{2}
>>      box.is_in_txn()
>>      local save2 = box.savepoint()
>>      s:replace{3}
>>      box.is_in_txn()
>>      local save3 = box.savepoint()
>>      s:replace{4}
>>      box.is_in_txn()
>>      box.rollback_to_savepoint(save3)
>>      box.is_in_txn()
>>      box.rollback_to_savepoint(save2)
>>      box.is_in_txn()
>>      box.rollback_to_savepoint(save1)
>>      box.is_in_txn()
>>      box.commit()
>> end
>>
>> local stop_clock = clock.monotonic()
>> local work_time = stop_clock - start_clock
>> print("Result: " .. tostring(work_time))
>>
>> s:drop()
>> os.exit()
>>
>>
>> BM2:
>>
>> local os = require('os')
>> local clock = require('clock')
>>
>> box.cfg()
>>
>> local s = box.schema.space.create('test')
>> s:create_index('primary')
>>
>>
>> local start_clock = clock.monotonic()
>>
>> for i = 1,1000000 do
>>      box.is_in_txn()
>> end
>>
>> local stop_clock = clock.monotonic()
>> local work_time = stop_clock - start_clock
>> print("Result: " .. tostring(work_time))
>>
>> s:drop()
>> os.exit()
>>
>>
>> BM3:
>>
>> local os = require('os')
>> local clock = require('clock')
>>
>> box.cfg()
>>
>> local s = box.schema.space.create('test')
>> s:create_index('primary')
>>
>>
>> local start_clock = clock.monotonic()
>>
>> for i = 1,1000000 do
>>      box.begin()
>>      box.commit()
>> end
>>
>> local stop_clock = clock.monotonic()
>> local work_time = stop_clock - start_clock
>> print("Result: " .. tostring(work_time))
>>
>> s:drop()
>> os.exit()
>>
>>
>> BM4:
>>
>> local os = require('os')
>> local clock = require('clock')
>>
>> box.cfg()
>>
>> local s = box.schema.space.create('test')
>> s:create_index('primary')
>>
>>
>> local start_clock = clock.monotonic()
>>
>> for i = 1,1000000 do
>>      box.begin()
>>      s:replace{1}
>>      local save1 = box.savepoint()
>>      s:replace{2}
>>      box.rollback_to_savepoint(save1)
>>      box.commit()
>> end
>>
>> local stop_clock = clock.monotonic()
>> local work_time = stop_clock - start_clock
>> print("Result: " .. tostring(work_time))
>>
>> s:drop()
>> os.exit()
>>
>>
>> Results:
>>
>> BM	Sergo commit(s)	Leonid commit(s)	Result
>> bm1	26.7092		26.7897			Sergo commit win 0.3%
>> bm2	0.0023		0.0195			Sergo commit win x 8.5
>> bm3	0.1281		0.1502			Sergo commit win 17.2%
>> bm4	19.6567		19.8466			Sergo commit win 0.96%
>>
>>
>> On 11/28/19 9:36 PM, Igor Munkin wrote:
>>> Kostja,
>>>
>>> On 28.11.19, Konstantin Osipov wrote:
>>>> * Igor Munkin <imun@tarantool.org> [19/11/28 17:08]:
>>>>
>>>>> Why should we be aiming at using FFI more? The root cause is that
>>>>> current fiber machinery (as well as some parts of triggers mechanism)
>>>>> doesn't respect the Lua coroutine switch semantics, thereby breaking
>>>>> trace recording. Lua-C API implicitly (or non-intentionally) prevents
>>>>> breakage by JIT trace aborts when recording FUNCC.
>>>>
>>>> It's not correct. The current FFI functions were carefully crafted
>>>> to never lead to sandwich code: only those functions which can not
>>>> trigger a return to Lua were implemented as FFI.
>>>> There was one regression between 1.10 and in 2.3 because we
>>>> started firing rollback triggers when rolling back to a savepoint,
>>>> which was spotted by a failing tests.
>>>>
>>>> One more time: When FFI bindings were written we were aware of NYI
>>>> and took it into account.
>>>>
>>>
>>> OK, maybe I said it the wrong way using the word "non-intentionally". I
>>> mean that Tarantool doesn't use any special handler to asynchroniously
>>> abort recording, since there is no such outside the LuaJIT internals
>>> (jit.off blacklists the function regardless the host and guest stacks
>>> layout).
>>>
>>>>> Therefore, I guess we should be aiming either at changing fiber
>>>>> switching to the one respecting the LuaJIT runtime or at tuning JIT
>>>>> compiler way more regarding the Lua-C usage.
>>>>
>>>> This is actually quite simple - we could easily call a LuaJIT hook
>>>> whenever switching a fiber, to make sure that it carefully
>>>> switches the internals as well. Mike Pall refused to cooperate on
>>>> the matter, but now we (you) control our own destiny.
>>>
>>> Unfortunately, I haven't seen the thread where the subj is discussed
>>> with Mike Pall, but the approach you proposed doesn't seem to be a
>>> convenient one, however it still solves a problem (as does the move to
>>> use Lua-C API for the code with possible Lua VM re-entrance underneath).
>>>
>>> The major flaw I see in this solution, is introducing the dependency on
>>> the JIT interface into Tarantool internals. There is already one
>>> dependency on LuaJIT-2.1.0 presented with internal headers usage for
>>> several hacks in utils.c. As a result we are not able to simply replace
>>> the Lua implementation to try another one (e.g. uJIT conforming
>>> LuaJIT-2.0.5) for comparing each other.
>>>
>>> The best proposal we had with Kirill and Sergos is to finalize a trace
>>> exactly at CALLXS IR, however after some research I found that the
>>> snapshot to be replayed at the corresponding trace exit will restore the
>>> guest stack it doesn't relate to. I hope to make further research this
>>> direction, but it requires a way more time to adjust this behaviour and
>>> its benefits are doubtful for me now.
>>>
>>> For now, there is a partial fix I mentioned before, however it still
>>> violates the flow I described here[1]. I'm going to proceed with the
>>> research a bit later and provide another patch.
>>>
>>>>
>>>>> Besides, we can't fully prevent platform failures if there is an FFI
>>>>> misusage in users code.
>>>>
>>>> Tarantool has never been claiming that it prevents people from
>>>
>>> Sorry, I simply misread the following:
>>> |> Why not dig it up to protect from future erosion of the code base?
>>> |>
>>> |> This would be more valuable contribution than just falling back to
>>> |> Lua/C for everything.
>>>
>>>> shooting themselves in the foot. Performance is the ultimate
>>>> design goal, at the cost of safety at times.
>>>>
>>>
>>> Great, we discussed with Leonid and Sasha offline and agreed to make
>>> several benchmarks to be provided in this thread. With no benchmarks all
>>> our estimates can be simply wrong.
>>>
>>>>
>>>>>> What should be the rule of thumb in your opinion, ffi or
>>>>>> lua/c?
>>>>>
>>>>> If you want to know my rule of thumb: FFI is for external existing
>>>>> libraries to be used in Lua code (and all compiler related benefits are
>>>>> nothing more than a godsend consequence, since all guest stack
>>>>> manipulations are implemented in LuaJIT runtime, not in an external
>>>>> code) and Lua-C is a well-designed and well-documented API for embedding
>>>>> Lua into a host application / extending Lua with external low-lewel
>>>>> libs. I totally do not insist on my point of view, since everyone has
>>>>> it's own vision on LuaJIT features.
>>>>
>>>> OK, but there must be a single policy though. So far it was:
>>>> everything that doesn't yield and doesn't call back to Lua
>>>> uses FFI. Everything else *has* to use Lua/C API, UNTIL  there
>>>> is a way to safely sandwich FFI calls.
>>>>
>>>
>>> I agree with you for the policy existence, but we all see the one you
>>> mentioned above can introduce bugs leading to a platform failures. So I
>>> guess we should reconsider it or simply dump somewhere. I think we have
>>> to make some benchmarks and provide not only stats, but also a
>>> reproducer with the input data, otherwise JIT tests are IMHO irrelevant.
>>>
>>>> -- 
>>>> Konstantin Osipov, Moscow, Russia
>>>
>>> [1]: https://github.com/tarantool/tarantool/issues/4427#issuecomment-546056302
>>>
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-26 13:13 [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI) Leonid
  2019-11-26 21:05 ` Konstantin Osipov
@ 2019-12-11 22:21 ` Alexander Turenko
  2019-12-12  8:23   ` Leonid Vasiliev
  2019-12-12  8:49   ` Konstantin Osipov
  1 sibling, 2 replies; 24+ messages in thread
From: Alexander Turenko @ 2019-12-11 22:21 UTC (permalink / raw)
  To: Leonid; +Cc: tarantool-patches

Kostya, thanks for bringing the FFI / Lua-C question into our view.

----

We decided to don't change existing convention: let's use FFI when
possible and Lua-C API for functions that access a Lua state somehow
(including touching tarantool_L, yielding and running triggers /
callbacks).

The core reason is that FFI allows to produce longer JIT traces that
should positively impact performance of an application at whole.

We have no good understanding how performant Lua-C API with trace
stitching, however simple benchmarks (shared by Leonid) shows positive
impact of using FFI calls.

We have no interpretation for those benchmarks for now. Whether Lua-C
runs were JITed at all? Whether a result will be different if there will
be more JITable code around? So for now things looks just as 'FFI is
faster the Lua-C'. Hope we'll investigate this later.

We need to find a way to reduce probability that a function that is
called via ffi (or one that is called from it) will be changed in a
future and starts to yield or touch a Lua state.

We should look whether we can mark functions that are called from ffi
(see also [1]) and all its callee to at least verify ourself when
changing code.

Also we can work on detection of so called ffi sandwitches and doing
more stress testing.

It seems that both directions are worth to dig into.

[1]: https://github.com/tarantool/tarantool/issues/4202

----

Back to the patch.

Since we'll just move one function, I would not perform refactoring I
proposed initially: don't move all related functions together. Let's
rewrite box.rollback_to_savepoint() using Lua-C API and place it together
with box.commit() and box.rollback() to src/box/lua/init.c.

I think we don't need any pure Lua wrapper around this function: we can
access a Lua table field and raise an error just from C, so I don't see
a reason to split a function logic around two files
(src/box/lua/schema.lua and src/box/lua/init.c). They are both about API
we expose to Lua.

WBR, Alexander Turenko.

On Tue, Nov 26, 2019 at 04:13:43PM +0300, Leonid wrote:
> https://github.com/tarantool/tarantool/issues/4427
> https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
> 
> ---
>  src/box/CMakeLists.txt       |   2 +
>  src/box/lua/init.c           |   4 +
>  src/box/lua/schema.lua       |  71 -----------------
>  src/box/lua/txn.c            | 145 +++++++++++++++++++++++++++++++++++
>  src/box/lua/txn.h            |  49 ++++++++++++
>  src/box/lua/txn.lua          |  53 +++++++++++++
>  src/box/txn.c                |   2 +
>  test/box/misc.result         |   1 +
>  test/engine/savepoint.result |  12 +--
>  9 files changed, 262 insertions(+), 77 deletions(-)
>  create mode 100644 src/box/lua/txn.c
>  create mode 100644 src/box/lua/txn.h
>  create mode 100644 src/box/lua/txn.lua

> --- box.commit yields, so it's defined as Lua/C binding
> --- box.rollback yields as well

Let's move box.rollback_to_savepoint() to them.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-12-11 22:21 ` Alexander Turenko
@ 2019-12-12  8:23   ` Leonid Vasiliev
  2020-01-15 17:05     ` Alexander Turenko
  2019-12-12  8:49   ` Konstantin Osipov
  1 sibling, 1 reply; 24+ messages in thread
From: Leonid Vasiliev @ 2019-12-12  8:23 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches



On 12/12/19 1:21 AM, Alexander Turenko wrote:
> Kostya, thanks for bringing the FFI / Lua-C question into our view.
> 
> ----
> 
> We decided to don't change existing convention: let's use FFI when
> possible and Lua-C API for functions that access a Lua state somehow
> (including touching tarantool_L, yielding and running triggers /
> callbacks).
> 
> The core reason is that FFI allows to produce longer JIT traces that
> should positively impact performance of an application at whole.
> 
> We have no good understanding how performant Lua-C API with trace
> stitching, however simple benchmarks (shared by Leonid) shows positive
> impact of using FFI calls.
> 
> We have no interpretation for those benchmarks for now. Whether Lua-C
> runs were JITed at all? Whether a result will be different if there will
> be more JITable code around? So for now things looks just as 'FFI is
> faster the Lua-C'. Hope we'll investigate this later.
> 
> We need to find a way to reduce probability that a function that is
> called via ffi (or one that is called from it) will be changed in a
> future and starts to yield or touch a Lua state.
> 
> We should look whether we can mark functions that are called from ffi
> (see also [1]) and all its callee to at least verify ourself when
> changing code.
> 
> Also we can work on detection of so called ffi sandwitches and doing
> more stress testing.
> 
> It seems that both directions are worth to dig into.
> 
> [1]: https://github.com/tarantool/tarantool/issues/4202
> 
> ----
> 
> Back to the patch.
> 
> Since we'll just move one function, I would not perform refactoring I
> proposed initially: don't move all related functions together. Let's
> rewrite box.rollback_to_savepoint() using Lua-C API and place it together
> with box.commit() and box.rollback() to src/box/lua/init.c >
> I think we don't need any pure Lua wrapper around this function: we can
> access a Lua table field and raise an error just from C, so I don't see
> a reason to split a function logic around two files
> (src/box/lua/schema.lua and src/box/lua/init.c). They are both about API
> we expose to Lua.
> 
> WBR, Alexander Turenko.
>  > On Tue, Nov 26, 2019 at 04:13:43PM +0300, Leonid wrote:
>> https://github.com/tarantool/tarantool/issues/4427
>> https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
>>
>> ---
>>   src/box/CMakeLists.txt       |   2 +
>>   src/box/lua/init.c           |   4 +
>>   src/box/lua/schema.lua       |  71 -----------------
>>   src/box/lua/txn.c            | 145 +++++++++++++++++++++++++++++++++++
>>   src/box/lua/txn.h            |  49 ++++++++++++
>>   src/box/lua/txn.lua          |  53 +++++++++++++
>>   src/box/txn.c                |   2 +
>>   test/box/misc.result         |   1 +
>>   test/engine/savepoint.result |  12 +--
>>   9 files changed, 262 insertions(+), 77 deletions(-)
>>   create mode 100644 src/box/lua/txn.c
>>   create mode 100644 src/box/lua/txn.h
>>   create mode 100644 src/box/lua/txn.lua
> 
>> --- box.commit yields, so it's defined as Lua/C binding
>> --- box.rollback yields as well
> 
> Let's move box.rollback_to_savepoint() to them.
> 

I think to have a separate module for txn is a good idea. IMHO a true 
way use FFI for begin and is_in_txn and use C API for create a savepoint 
and rollback. Really, it's more consistent in my mind. What do you think 
about it?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-12-11 22:21 ` Alexander Turenko
  2019-12-12  8:23   ` Leonid Vasiliev
@ 2019-12-12  8:49   ` Konstantin Osipov
  1 sibling, 0 replies; 24+ messages in thread
From: Konstantin Osipov @ 2019-12-12  8:49 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches

* Alexander Turenko <alexander.turenko@tarantool.org> [19/12/12 01:23]:

Thanks.

As to refactoring issue, my personal preference would be to not move the
code if only a handful of functions is changed, but I don't care as much.


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-12-12  8:23   ` Leonid Vasiliev
@ 2020-01-15 17:05     ` Alexander Turenko
  0 siblings, 0 replies; 24+ messages in thread
From: Alexander Turenko @ 2020-01-15 17:05 UTC (permalink / raw)
  To: Leonid Vasiliev; +Cc: tarantool-patches

On Thu, Dec 12, 2019 at 11:23:14AM +0300, Leonid Vasiliev wrote:
> 
> 
> On 12/12/19 1:21 AM, Alexander Turenko wrote:
> > Kostya, thanks for bringing the FFI / Lua-C question into our view.
> > 
> > ----
> > 
> > We decided to don't change existing convention: let's use FFI when
> > possible and Lua-C API for functions that access a Lua state somehow
> > (including touching tarantool_L, yielding and running triggers /
> > callbacks).
> > 
> > The core reason is that FFI allows to produce longer JIT traces that
> > should positively impact performance of an application at whole.
> > 
> > We have no good understanding how performant Lua-C API with trace
> > stitching, however simple benchmarks (shared by Leonid) shows positive
> > impact of using FFI calls.
> > 
> > We have no interpretation for those benchmarks for now. Whether Lua-C
> > runs were JITed at all? Whether a result will be different if there will
> > be more JITable code around? So for now things looks just as 'FFI is
> > faster the Lua-C'. Hope we'll investigate this later.
> > 
> > We need to find a way to reduce probability that a function that is
> > called via ffi (or one that is called from it) will be changed in a
> > future and starts to yield or touch a Lua state.
> > 
> > We should look whether we can mark functions that are called from ffi
> > (see also [1]) and all its callee to at least verify ourself when
> > changing code.
> > 
> > Also we can work on detection of so called ffi sandwitches and doing
> > more stress testing.
> > 
> > It seems that both directions are worth to dig into.
> > 
> > [1]: https://github.com/tarantool/tarantool/issues/4202
> > 
> > ----
> > 
> > Back to the patch.
> > 
> > Since we'll just move one function, I would not perform refactoring I
> > proposed initially: don't move all related functions together. Let's
> > rewrite box.rollback_to_savepoint() using Lua-C API and place it together
> > with box.commit() and box.rollback() to src/box/lua/init.c >
> > I think we don't need any pure Lua wrapper around this function: we can
> > access a Lua table field and raise an error just from C, so I don't see
> > a reason to split a function logic around two files
> > (src/box/lua/schema.lua and src/box/lua/init.c). They are both about API
> > we expose to Lua.
> > 
> > WBR, Alexander Turenko.
> >  > On Tue, Nov 26, 2019 at 04:13:43PM +0300, Leonid wrote:
> > > https://github.com/tarantool/tarantool/issues/4427
> > > https://github.com/tarantool/tarantool/tree/lvasiliev/gh-4427-move-some-stuff-from-ffi-to-c-api
> > > 
> > > ---
> > >   src/box/CMakeLists.txt       |   2 +
> > >   src/box/lua/init.c           |   4 +
> > >   src/box/lua/schema.lua       |  71 -----------------
> > >   src/box/lua/txn.c            | 145 +++++++++++++++++++++++++++++++++++
> > >   src/box/lua/txn.h            |  49 ++++++++++++
> > >   src/box/lua/txn.lua          |  53 +++++++++++++
> > >   src/box/txn.c                |   2 +
> > >   test/box/misc.result         |   1 +
> > >   test/engine/savepoint.result |  12 +--
> > >   9 files changed, 262 insertions(+), 77 deletions(-)
> > >   create mode 100644 src/box/lua/txn.c
> > >   create mode 100644 src/box/lua/txn.h
> > >   create mode 100644 src/box/lua/txn.lua
> > 
> > > --- box.commit yields, so it's defined as Lua/C binding
> > > --- box.rollback yields as well
> > 
> > Let's move box.rollback_to_savepoint() to them.
> > 
> 
> I think to have a separate module for txn is a good idea. IMHO a true way
> use FFI for begin and is_in_txn and use C API for create a savepoint and
> rollback. Really, it's more consistent in my mind. What do you think about
> it?

Personally I'm not sure that this refactoring is valuable enough to do
it. I don't mind, however: let's propose it after we'll fix the bug, if
you want.

But I think that creation of a savepoint using Lua-C violaves the rule
re FFI / Lua-C API usage (cited from above):

> Use FFI when possible and Lua-C API for functions that access a Lua
> state somehow (including touching tarantool_L, yielding and running
> triggers / callbacks).

I would maybe add an exception for some pure Lua-C API modules (I wrote
about it to the related internal mail thread, see '[dev] RFC: FFI usage
guidance'), but this is not our case. Moving a function from FFI to
Lua-C should be strictly motivated. Since we're working on a strict rule
and your proposal violates the one cited above, can you propose a
wording that will handle your case gracefully?

I'll look into the patch, which moves just one function ('txn: move
rollback to savepoint from ffi to C-API' from 2019-12-13) soon.

WBR, Alexander Turenko.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2019-11-28 13:18             ` Konstantin Osipov
  2019-11-28 14:03               ` Igor Munkin
@ 2020-03-20 18:48               ` Igor Munkin
  2020-03-20 19:27                 ` Konstantin Osipov
  1 sibling, 1 reply; 24+ messages in thread
From: Igor Munkin @ 2020-03-20 18:48 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

Kostja,

On 28.11.19, Konstantin Osipov wrote:
> * Igor Munkin <imun@tarantool.org> [19/11/28 16:03]:

<snipped>

> > > 
> > > Nick Zavaritsky had a patch that would detect sandwich stacks in
> > > runtime and assert. Nobody had time to look at it back then -

Many thanks to Sasha for finding the corresponding issue[1] created by
Nick. Did you mention *this* patch above?

> > 
> > Could you please provide the issue/link for this changeset, I'll take a
> > look on it with pleasure.
> 
> Nick Zavaritsky himself is the only link here, unfortunately.
> Perhaps he has this patch in one of his personal branches. I
> suggest someone contacts him :)
> 
> -- 
> Konstantin Osipov, Moscow, Russia


[1]: https://github.com/tarantool/tarantool/issues/1700

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI)
  2020-03-20 18:48               ` Igor Munkin
@ 2020-03-20 19:27                 ` Konstantin Osipov
  0 siblings, 0 replies; 24+ messages in thread
From: Konstantin Osipov @ 2020-03-20 19:27 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches

* Igor Munkin <imun@tarantool.org> [20/03/20 21:54]:

Yes.

> Kostja,
> 
> On 28.11.19, Konstantin Osipov wrote:
> > * Igor Munkin <imun@tarantool.org> [19/11/28 16:03]:
> 
> <snipped>
> 
> > > > 
> > > > Nick Zavaritsky had a patch that would detect sandwich stacks in
> > > > runtime and assert. Nobody had time to look at it back then -
> 
> Many thanks to Sasha for finding the corresponding issue[1] created by
> Nick. Did you mention *this* patch above?
> 
> > > 
> > > Could you please provide the issue/link for this changeset, I'll take a
> > > look on it with pleasure.
> > 
> > Nick Zavaritsky himself is the only link here, unfortunately.
> > Perhaps he has this patch in one of his personal branches. I
> > suggest someone contacts him :)
> > 
> > -- 
> > Konstantin Osipov, Moscow, Russia
> 
> 
> [1]: https://github.com/tarantool/tarantool/issues/1700
> 
> -- 
> Best regards,
> IM

-- 
Konstantin Osipov, Moscow, Russia
https://scylladb.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2020-03-20 19:27 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-26 13:13 [Tarantool-patches] [PATCH] Move txn from shema to a separate module (use C API instead of FFI) Leonid
2019-11-26 21:05 ` Konstantin Osipov
2019-11-26 21:17   ` Alexander Turenko
2019-11-27  8:31     ` Konstantin Osipov
2019-11-28  8:10       ` Leonid Vasiliev
2019-11-28 12:34         ` Konstantin Osipov
2019-11-28 13:00           ` Igor Munkin
2019-11-28 13:18             ` Konstantin Osipov
2019-11-28 14:03               ` Igor Munkin
2019-11-28 15:58                 ` Konstantin Osipov
2019-11-28 18:36                   ` Igor Munkin
2019-11-29  5:30                     ` Konstantin Osipov
2019-11-29 17:43                       ` Igor Munkin
2019-11-29  5:41                     ` Konstantin Osipov
2019-11-29 17:37                       ` Igor Munkin
2019-12-04 13:05                     ` Leonid Vasiliev
2019-12-04 13:15                       ` Konstantin Osipov
2019-12-05  8:27                         ` Leonid Vasiliev
2020-03-20 18:48               ` Igor Munkin
2020-03-20 19:27                 ` Konstantin Osipov
2019-12-11 22:21 ` Alexander Turenko
2019-12-12  8:23   ` Leonid Vasiliev
2020-01-15 17:05     ` Alexander Turenko
2019-12-12  8:49   ` Konstantin Osipov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox