Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
@ 2020-05-18  9:37 Sergey Kaplun
  2020-05-18 21:52 ` Vladislav Shpilevoy
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Sergey Kaplun @ 2020-05-18  9:37 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladislav Shpilevoy

Currently on encoding table we push cfunction (lua_field_try_serialize)
to lua stack with additional lightuserdata and table value and after
pcall that function to avoid a raise of error.

In this case LuaJIT creates new object which will not live long time,
so it increase amount of dead object and also increase time and
frequency of garbage collection inside LuaJIT.
Also this pcall is necessary only in case when metafield __serialize
of serilizable object has LUA_TFUNCTION type.

So instead pushcfunction with pcall we can directly call the function
trying to serialize an object.
---

branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring

 src/lua/utils.c | 132 ++++++++++++++++--------------------------------
 1 file changed, 44 insertions(+), 88 deletions(-)

diff --git a/src/lua/utils.c b/src/lua/utils.c
index d410a3d03..58715a55e 100644
--- a/src/lua/utils.c
+++ b/src/lua/utils.c
@@ -461,91 +461,69 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
 }
 
 /**
- * A helper structure to simplify safe call of __serialize method.
- * It passes some arguments into the serializer called via pcall,
- * and carries out some results.
+ * Call __serialize method of a table object by index if the former exists.
+ *
+ * If __serialize not exists function does nothing and the function returns 1;
+ *
+ * If __serialize exists, is correct and is a function then
+ * a result of serialization replaces old value by the index and
+ * the function returns 0;
+ *
+ * If the serialization hints like 'array' or 'map', then field->type,
+ * field->syze and field->compact sets if necessary
+ * and the function returns 0;
+ *
+ * Otherwise it is an error, set diag and the funciton returns -1;
+ *
+ * Return values:
+ * -1 - error, set diag
+ *  0 - has serialize, success replace and have to finish
+ *  1 - hasn't serialize, need to process
  */
-struct lua_serialize_status {
-	/**
-	 * True if an attempt to call __serialize has failed. A
-	 * diag message is set.
-	 */
-	bool is_error;
-	/**
-	 * True, if __serialize exists. Otherwise an ordinary
-	 * default serialization is used.
-	 */
-	bool is_serialize_used;
-	/**
-	 * True, if __serialize not only exists, but also returned
-	 * a new value which should replace the original one. On
-	 * the contrary __serialize could be a string like 'array'
-	 * or 'map' and do not push anything but rather say how to
-	 * interpret the target table. In such a case there is
-	 * nothing to replace with.
-	 */
-	bool is_value_returned;
-	/** Parameters, passed originally to luaL_tofield. */
-	struct luaL_serializer *cfg;
-	struct luaL_field *field;
-};
 
-/**
- * Call __serialize method of a table object if the former exists.
- * The function expects 2 values pushed onto the Lua stack: a
- * value to serialize, and a pointer at a struct
- * lua_serialize_status object. If __serialize exists, is correct,
- * and is a function then one value is pushed as a result of
- * serialization. If it is correct, but just a serialization hint
- * like 'array' or 'map', then nothing is pushed. Otherwise it is
- * an error. All the described outcomes can be distinguished via
- * lua_serialize_status attributes.
- */
 static int
-lua_field_try_serialize(struct lua_State *L)
+lua_field_try_serialize(struct lua_State *L, int idx,
+			struct luaL_serializer *cfg, struct luaL_field *field)
 {
-	struct lua_serialize_status *s =
-		(struct lua_serialize_status *) lua_touserdata(L, 2);
-	s->is_serialize_used = (luaL_getmetafield(L, 1, LUAL_SERIALIZE) != 0);
-	s->is_error = false;
-	s->is_value_returned = false;
-	if (! s->is_serialize_used)
-		return 0;
-	struct luaL_serializer *cfg = s->cfg;
-	struct luaL_field *field = s->field;
+	bool is_serialize_used = (luaL_getmetafield(L, idx,
+						    LUAL_SERIALIZE) != 0);
+	if (!is_serialize_used)
+		return 1;
 	if (lua_isfunction(L, -1)) {
 		/* copy object itself */
-		lua_pushvalue(L, 1);
-		lua_call(L, 1, 1);
-		s->is_error = (luaL_tofield(L, cfg, NULL, -1, field) != 0);
-		s->is_value_returned = true;
-		return 1;
+		lua_pushvalue(L, idx);
+		if (lua_pcall(L, 1, 1, 0) != 0) {
+			diag_set(LuajitError, lua_tostring(L, -1));
+			return -1;
+		}
+		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
+			return -1;
+		lua_replace(L, idx);
+		return 0;
 	}
 	if (!lua_isstring(L, -1)) {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		s->is_error = true;
-		lua_pop(L, 1);
-		return 0;
+		return -1;
 	}
 	const char *type = lua_tostring(L, -1);
 	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
 	    strcmp(type, "sequence") == 0) {
 		field->type = MP_ARRAY; /* Override type */
-		field->size = luaL_arrlen(L, 1);
+		field->size = luaL_arrlen(L, idx);
 		/* YAML: use flow mode if __serialize == 'seq' */
 		if (cfg->has_compact && type[3] == '\0')
 			field->compact = true;
 	} else if (strcmp(type, "map") == 0 || strcmp(type, "mapping") == 0) {
 		field->type = MP_MAP;   /* Override type */
-		field->size = luaL_maplen(L, 1);
+		field->size = luaL_maplen(L, idx);
 		/* YAML: use flow mode if __serialize == 'map' */
 		if (cfg->has_compact && type[3] == '\0')
 			field->compact = true;
 	} else {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		s->is_error = true;
+		return -1;
 	}
-	lua_pop(L, 1);
+	lua_pop(L, 1); /* remove value was setted by luaL_getmetafield */
 	return 0;
 }
 
@@ -559,36 +537,14 @@ lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
 
 	if (cfg->encode_load_metatables) {
 		int top = lua_gettop(L);
-		struct lua_serialize_status s;
-		s.cfg = cfg;
-		s.field = field;
-		lua_pushcfunction(L, lua_field_try_serialize);
-		lua_pushvalue(L, idx);
-		lua_pushlightuserdata(L, &s);
-		if (lua_pcall(L, 2, 1, 0) != 0) {
-			diag_set(LuajitError, lua_tostring(L, -1));
-			return -1;
-		}
-		if (s.is_error)
+		int res = lua_field_try_serialize(L, idx, cfg, field);
+		if (res == -1)
 			return -1;
-		/*
-		 * lua_call/pcall always returns the specified
-		 * number of values. Even if the function returned
-		 * less - others are filled with nils. So when a
-		 * nil is not needed, it should be popped
-		 * manually.
-		 */
-		assert(lua_gettop(L) == top + 1);
-		(void) top;
-		if (s.is_serialize_used) {
-			if (s.is_value_returned)
-				lua_replace(L, idx);
-			else
-				lua_pop(L, 1);
+		assert(lua_gettop(L) == top);
+		(void)top;
+		if (res == 0)
 			return 0;
-		}
-		assert(! s.is_value_returned);
-		lua_pop(L, 1);
+		/* Fall throuth with res == 1 */
 	}
 
 	field->type = MP_ARRAY;
-- 
2.24.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
@ 2020-05-18 21:52 ` Vladislav Shpilevoy
  2020-05-19 10:29   ` Sergey Kaplun
  2020-05-22 11:48 ` Aleksandr Lyapunov
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Vladislav Shpilevoy @ 2020-05-18 21:52 UTC (permalink / raw)
  To: Sergey Kaplun, tarantool-patches

Hi! Thanks for the patch!

Igor, please, join the review.

Sergey, see 4 comments below.

On 18/05/2020 11:37, Sergey Kaplun wrote:
> Currently on encoding table we push cfunction (lua_field_try_serialize)
> to lua stack with additional lightuserdata and table value and after
> pcall that function to avoid a raise of error.
> 
> In this case LuaJIT creates new object which will not live long time,
> so it increase amount of dead object and also increase time and
> frequency of garbage collection inside LuaJIT.
> Also this pcall is necessary only in case when metafield __serialize
> of serilizable object has LUA_TFUNCTION type.
> 
> So instead pushcfunction with pcall we can directly call the function
> trying to serialize an object.
> ---

1. Is there a measurement, how much does this patch help and when,
exactly?

> branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring
> 
>  src/lua/utils.c | 132 ++++++++++++++++--------------------------------
>  1 file changed, 44 insertions(+), 88 deletions(-)
> 
> diff --git a/src/lua/utils.c b/src/lua/utils.c
> index d410a3d03..58715a55e 100644
> --- a/src/lua/utils.c
> +++ b/src/lua/utils.c
> @@ -461,91 +461,69 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
>  }
>  
>  /**
> - * A helper structure to simplify safe call of __serialize method.
> - * It passes some arguments into the serializer called via pcall,
> - * and carries out some results.
> + * Call __serialize method of a table object by index if the former exists.
> + *
> + * If __serialize not exists function does nothing and the function returns 1;

2. 'not exists' -> 'does not exist'.

> + *
> + * If __serialize exists, is correct and is a function then
> + * a result of serialization replaces old value by the index and
> + * the function returns 0;
> + *
> + * If the serialization hints like 'array' or 'map', then field->type,
> + * field->syze and field->compact sets if necessary
> + * and the function returns 0;
> + *
> + * Otherwise it is an error, set diag and the funciton returns -1;
> + *
> + * Return values:
> + * -1 - error, set diag
> + *  0 - has serialize, success replace and have to finish
> + *  1 - hasn't serialize, need to process
>   */
> -struct lua_serialize_status {
> -	/**
> -	 * True if an attempt to call __serialize has failed. A
> -	 * diag message is set.
> -	 */
> -	bool is_error;
> -	/**
> -	 * True, if __serialize exists. Otherwise an ordinary
> -	 * default serialization is used.
> -	 */
> -	bool is_serialize_used;
> -	/**
> -	 * True, if __serialize not only exists, but also returned
> -	 * a new value which should replace the original one. On
> -	 * the contrary __serialize could be a string like 'array'
> -	 * or 'map' and do not push anything but rather say how to
> -	 * interpret the target table. In such a case there is
> -	 * nothing to replace with.
> -	 */
> -	bool is_value_returned;
> -	/** Parameters, passed originally to luaL_tofield. */
> -	struct luaL_serializer *cfg;
> -	struct luaL_field *field;
> -};
>  
> -/**
> - * Call __serialize method of a table object if the former exists.
> - * The function expects 2 values pushed onto the Lua stack: a
> - * value to serialize, and a pointer at a struct
> - * lua_serialize_status object. If __serialize exists, is correct,
> - * and is a function then one value is pushed as a result of
> - * serialization. If it is correct, but just a serialization hint
> - * like 'array' or 'map', then nothing is pushed. Otherwise it is
> - * an error. All the described outcomes can be distinguished via
> - * lua_serialize_status attributes.
> - */
>  static int
> -lua_field_try_serialize(struct lua_State *L)
> +lua_field_try_serialize(struct lua_State *L, int idx,
> +			struct luaL_serializer *cfg, struct luaL_field *field)
>  {
> -	struct lua_serialize_status *s =
> -		(struct lua_serialize_status *) lua_touserdata(L, 2);
> -	s->is_serialize_used = (luaL_getmetafield(L, 1, LUAL_SERIALIZE) != 0);
> -	s->is_error = false;
> -	s->is_value_returned = false;
> -	if (! s->is_serialize_used)
> -		return 0;
> -	struct luaL_serializer *cfg = s->cfg;
> -	struct luaL_field *field = s->field;
> +	bool is_serialize_used = (luaL_getmetafield(L, idx,
> +						    LUAL_SERIALIZE) != 0);
> +	if (!is_serialize_used)
> +		return 1;

3. Seems like you don't need to save the check result into a variable.
You can just inline is_serialize_used assignment into the condition.

>  	if (lua_isfunction(L, -1)) {
>  		/* copy object itself */
> -		lua_pushvalue(L, 1);
> -		lua_call(L, 1, 1);
> -		s->is_error = (luaL_tofield(L, cfg, NULL, -1, field) != 0);
> -		s->is_value_returned = true;
> -		return 1;
> +		lua_pushvalue(L, idx);
> +		if (lua_pcall(L, 1, 1, 0) != 0) {
> +			diag_set(LuajitError, lua_tostring(L, -1));
> +			return -1;
> +		}
> +		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
> +			return -1;
> +		lua_replace(L, idx);
> +		return 0;
>  	}
>  	if (!lua_isstring(L, -1)) {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		s->is_error = true;
> -		lua_pop(L, 1);
> -		return 0;
> +		return -1;
>  	}
>  	const char *type = lua_tostring(L, -1);
>  	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
>  	    strcmp(type, "sequence") == 0) {
>  		field->type = MP_ARRAY; /* Override type */
> -		field->size = luaL_arrlen(L, 1);
> +		field->size = luaL_arrlen(L, idx);
>  		/* YAML: use flow mode if __serialize == 'seq' */
>  		if (cfg->has_compact && type[3] == '\0')
>  			field->compact = true;
>  	} else if (strcmp(type, "map") == 0 || strcmp(type, "mapping") == 0) {
>  		field->type = MP_MAP;   /* Override type */
> -		field->size = luaL_maplen(L, 1);
> +		field->size = luaL_maplen(L, idx);
>  		/* YAML: use flow mode if __serialize == 'map' */
>  		if (cfg->has_compact && type[3] == '\0')
>  			field->compact = true;
>  	} else {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		s->is_error = true;
> +		return -1;
>  	}
> -	lua_pop(L, 1);
> +	lua_pop(L, 1); /* remove value was setted by luaL_getmetafield */

4. Please, don't write comments on the same line as code. Also start
sentences with a capital letter and end with a dot.

'was setted' -> 'set'

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18 21:52 ` Vladislav Shpilevoy
@ 2020-05-19 10:29   ` Sergey Kaplun
  2020-05-19 20:08     ` Vladislav Shpilevoy
  0 siblings, 1 reply; 12+ messages in thread
From: Sergey Kaplun @ 2020-05-19 10:29 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

Hi! Thanks for the review!

On 18.05.20, Vladislav Shpilevoy wrote:
> On 18/05/2020 11:37, Sergey Kaplun wrote:
> > Currently on encoding table we push cfunction (lua_field_try_serialize)
> > to lua stack with additional lightuserdata and table value and after
> > pcall that function to avoid a raise of error.
> > 
> > In this case LuaJIT creates new object which will not live long time,
> > so it increase amount of dead object and also increase time and
> > frequency of garbage collection inside LuaJIT.
> > Also this pcall is necessary only in case when metafield __serialize
> > of serilizable object has LUA_TFUNCTION type.
> > 
> > So instead pushcfunction with pcall we can directly call the function
> > trying to serialize an object.
> > ---
> 
> 1. Is there a measurement, how much does this patch help and when,
> exactly?

I was faced this when I was looking at the flamegraphs by Aleksandr
Lyapunov related to https://github.com/tarantool/vshard/issues/224
At vshard_router.svg you can see that mp_encode_lua call-chain comes to
luamp_inspect_table, with pushcfunction and gc_step respectively.

So I suppose that simple test with mp_lua_encode would be enough (see
example below) to see a difference.
Note: for your local machine 1e5-1e6 itterations would be ok.

| 20:15:42 jobs:0 s.kaplun@dev4:~/tarantool_builds/lua_mp_pcall$
| >>> src/tarantool
| Tarantool 2.5.0-32-g7f20272
| type 'help' for interactive help
| tarantool> c1_source = {
|                setmetatable({[180] = 10, [1] = 0}, {__serialize = "map"}),
|                setmetatable({
|                    [34] = "vshard.router.call_rw",
|                    [33] = {148, 0, "bench_call_select_r"}
|                }, {__serialize = "map"})
|            }
| ---
| ...
| 
| tarantool> local msgpack = require"msgpack"
|            local clock = require"clock"
|            local S = clock.proc()
|            for i=1,1e7 do msgpack.encode(c1_source) end
|            return clock.proc() - S
| ---
| - 9.968856686
| ...
| 
| tarantool> 21:00:32 jobs:0 s.kaplun@dev4:~/tarantool_builds/lua_mp_pcall$
| >>> ../master/src/tarantool
| Tarantool 2.5.0-32-g7f20272
| type 'help' for interactive help
| tarantool> c1_source = {
|                setmetatable({[180] = 10, [1] = 0}, {__serialize = "map"}),
|                setmetatable({
|                    [34] = "vshard.router.call_rw",
|                    [33] = {148, 0, "bench_call_select_r"}
|                }, {__serialize = "map"})
|            }
| ---
| ...
| 
| tarantool> local msgpack = require"msgpack"
|            local clock = require"clock"
|            local S = clock.proc()
|            for i=1,1e7 do msgpack.encode(c1_source) end
|            return clock.proc() - S
| ---
| - 12.622932064

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-19 10:29   ` Sergey Kaplun
@ 2020-05-19 20:08     ` Vladislav Shpilevoy
  0 siblings, 0 replies; 12+ messages in thread
From: Vladislav Shpilevoy @ 2020-05-19 20:08 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Hi! Thanks for the explanation!

Looks nice. Please, fix the other comments and the patch
will be good to go.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
  2020-05-18 21:52 ` Vladislav Shpilevoy
@ 2020-05-22 11:48 ` Aleksandr Lyapunov
  2020-05-22 14:05 ` Alexander Turenko
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Aleksandr Lyapunov @ 2020-05-22 11:48 UTC (permalink / raw)
  To: Sergey Kaplun, tarantool-patches; +Cc: Vladislav Shpilevoy

Thanks for the patch! I agree with Vlad's comments.
I have only one insignificant comment, see below.
The rest LGTM, but I'm not a Lua expert an could miss something.

On 5/18/20 12:37 PM, Sergey Kaplun wrote:
> + * Return values:
> + * -1 - error, set diag
> + *  0 - has serialize, success replace and have to finish
> + *  1 - hasn't serialize, need to process
I would say "doesn't have" or "hasn't got".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
  2020-05-18 21:52 ` Vladislav Shpilevoy
  2020-05-22 11:48 ` Aleksandr Lyapunov
@ 2020-05-22 14:05 ` Alexander Turenko
  2020-05-25 12:06   ` Sergey Kaplun
  2020-06-02  0:19 ` Igor Munkin
  2020-06-10 12:15 ` Kirill Yukhin
  4 siblings, 1 reply; 12+ messages in thread
From: Alexander Turenko @ 2020-05-22 14:05 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches, Vladislav Shpilevoy

On Mon, May 18, 2020 at 12:37:48PM +0300, Sergey Kaplun wrote:
> Currently on encoding table we push cfunction (lua_field_try_serialize)
> to lua stack with additional lightuserdata and table value and after
> pcall that function to avoid a raise of error.

It is often used to catch 'not enough memory' error. Say, to avoid a
leak if some resource should be freed before reporting an error. Or when
you want to return an error instead of raising it.

Please, look whether it was added by intention and verify that
everything still good after the patch.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-22 14:05 ` Alexander Turenko
@ 2020-05-25 12:06   ` Sergey Kaplun
  0 siblings, 0 replies; 12+ messages in thread
From: Sergey Kaplun @ 2020-05-25 12:06 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches, Vladislav Shpilevoy

On 22.05.20, Alexander Turenko wrote:
> On Mon, May 18, 2020 at 12:37:48PM +0300, Sergey Kaplun wrote:
> > Currently on encoding table we push cfunction (lua_field_try_serialize)
> > to lua stack with additional lightuserdata and table value and after
> > pcall that function to avoid a raise of error.
> 
> It is often used to catch 'not enough memory' error. Say, to avoid a
> leak if some resource should be freed before reporting an error. Or when
> you want to return an error instead of raising it.
> 
> Please, look whether it was added by intention and verify that
> everything still good after the patch.

Hi!

As far as I can tell from the code lua_field_try_serialize hasn't got any calls
of lua API that can raise error about OOM or so on, exept lua_tostring and
lua_pushvalue. But we have already used lua_pushvalue and lua_pushcfunction so
with this patch probability of raising error is smaller (because we need less
arguments to push).

And lua_tolstring calls only after checking type of lua
object so lj_gc_check branch is unreacheble.

504         if (!lua_isstring(L, -1)) {
505 +---  2 lines: diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");--
507         }
508         const char *type = lua_tostring(L, -1);

491 LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
492 {
493   TValue *o = index2adr(L, idx);
494   GCstr *s;
495   if (LJ_LIKELY(tvisstr(o))) {
496     s = strV(o);
497   } else if (tvisnumber(o)) {
498     lj_gc_check(L);


If I understand correctly, we must avoid panic when called luaL_tofield. I was
able to come up with a similar example, how to show that the behavior has not
changed. But I am not 100% sure that this test covers all corner cases.
Do you have any idea of a more complete test?


| 13:00:04 jobs:0 burii@root:~/builds_workspace/tarantool/master$
| >>> ../no-ticket-lua-inspect-table-refactoring/src/tarantool 
| Tarantool 2.5.0-32-g7f20272ea
| type 'help' for interactive help
| tarantool> collectgarbage("stop")
|            local source = setmetatable({}, {__serialize = "map"})
|            print("start")
|            for i = 1, 340000 do
|                table.insert(source, (tostring(i)):rep(1024))
|            end
|            print("end")
|            local msgpack = require "msgpack"
|            return  pcall(msgpack.encode, source)
| start
| end
| ---
| - false
| - not enough memory
| ...
| 
| tarantool> 13:00:41 jobs:0 burii@root:~/builds_workspace/tarantool/master$
| >>> src/tarantool 
| Tarantool 2.5.0-1-gad13b6d57
| type 'help' for interactive help
| tarantool> collectgarbage("stop")
|            local source = {}
|            print("start")
|            for i = 1, 340000 do
|                table.insert(source, (tostring(i)):rep(1024))
|            end
|            print("end")
|            local msgpack = require "msgpack"
|            return  pcall(msgpack.encode, source)
| start
| end
| ---
| - false
| - not enough memory
| ...

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
                   ` (2 preceding siblings ...)
  2020-05-22 14:05 ` Alexander Turenko
@ 2020-06-02  0:19 ` Igor Munkin
  2020-06-02  9:36   ` Igor Munkin
  2020-06-10 12:15 ` Kirill Yukhin
  4 siblings, 1 reply; 12+ messages in thread
From: Igor Munkin @ 2020-06-02  0:19 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches, Vladislav Shpilevoy

Sergey,

Thanks for the patch! Please consider my comments below.

On 18.05.20, Sergey Kaplun wrote:
> Currently on encoding table we push cfunction (lua_field_try_serialize)
> to lua stack with additional lightuserdata and table value and after
> pcall that function to avoid a raise of error.
> 
> In this case LuaJIT creates new object which will not live long time,

<lua_pushlightuserdata> just assigns a pointer to the top guest stack
slot. Yes, it might trigger stack reallocation, but no GC object is
created.

Yes, the reallocation can fail with either LUA_ERRMEM or LUA_ERRRUN, but
the error is raised out of the protected scope, and is not handled.
There are more corner cases we have already discussed with Vlad here[1].

> so it increase amount of dead object and also increase time and
> frequency of garbage collection inside LuaJIT.
> Also this pcall is necessary only in case when metafield __serialize
> of serilizable object has LUA_TFUNCTION type.

Typo: s/serilizable/serializable/.

> 
> So instead pushcfunction with pcall we can directly call the function
> trying to serialize an object.

Well, let's polish the commit message to make it a bit clearer. Commit
subject also looks non-informative and doesn't respect our contribution
guide[2], so I propose the following rewording:
| lua: remove excess Lua call from table encoding
|
| For safe table encoding <lua_field_try_serialize> function is pushed
| to Lua stack along with auxiliary lightuserdata and table object to be
| encoded. Its further protected call catches Lua error if one is raised
| while encoding. It is only necessary when the object to be serialized
| has __serialize field in metatable and this field is a Lua function.
|
| As a result of this change the function serializing the given object
| is called without excess protected frame and auxiliary status struct.
Feel free to change it on your own.

> ---
> 
> branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring
> 
>  src/lua/utils.c | 132 ++++++++++++++++--------------------------------
>  1 file changed, 44 insertions(+), 88 deletions(-)
> 
> diff --git a/src/lua/utils.c b/src/lua/utils.c
> index d410a3d03..58715a55e 100644
> --- a/src/lua/utils.c
> +++ b/src/lua/utils.c
> @@ -461,91 +461,69 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
>  }
>  
>  /**
> - * A helper structure to simplify safe call of __serialize method.
> - * It passes some arguments into the serializer called via pcall,
> - * and carries out some results.
> + * Call __serialize method of a table object by index if the former exists.

The comment exceeds 66 symbols. Please adjust it considering our
contribution guide[3].

> + *
> + * If __serialize not exists function does nothing and the function returns 1;
> + *
> + * If __serialize exists, is correct and is a function then

What is 'correct' in this context?

> + * a result of serialization replaces old value by the index and
> + * the function returns 0;
> + *
> + * If the serialization hints like 'array' or 'map', then field->type,
> + * field->syze and field->compact sets if necessary

Typo: s/syze/size/.

> + * and the function returns 0;

I failed to get the sentence above and here is what I see in the code:
| If the serialization is a hint string (like 'array' or 'map'), then
| field->type, field->size and field->compact are set if necessary and
| the function returns 0;

> + *
> + * Otherwise it is an error, set diag and the funciton returns -1;
> + *
> + * Return values:
> + * -1 - error, set diag
> + *  0 - has serialize, success replace and have to finish
> + *  1 - hasn't serialize, need to process

Minor: I propose the following rewording to make this part a bit clear:
| Return values:
| -1 - error occurs, diag is set, the top of guest stack is undefined.
|  0 - __serialize is set, the result value is in the origin slot,
|      encoding is finished.
|  1 - __serialize is not set, proceed with default table encoding.

>   */
> -struct lua_serialize_status {
> -	/**
> -	 * True if an attempt to call __serialize has failed. A
> -	 * diag message is set.
> -	 */
> -	bool is_error;
> -	/**
> -	 * True, if __serialize exists. Otherwise an ordinary
> -	 * default serialization is used.
> -	 */
> -	bool is_serialize_used;
> -	/**
> -	 * True, if __serialize not only exists, but also returned
> -	 * a new value which should replace the original one. On
> -	 * the contrary __serialize could be a string like 'array'
> -	 * or 'map' and do not push anything but rather say how to
> -	 * interpret the target table. In such a case there is
> -	 * nothing to replace with.
> -	 */
> -	bool is_value_returned;
> -	/** Parameters, passed originally to luaL_tofield. */
> -	struct luaL_serializer *cfg;
> -	struct luaL_field *field;
> -};
>  
> -/**
> - * Call __serialize method of a table object if the former exists.
> - * The function expects 2 values pushed onto the Lua stack: a
> - * value to serialize, and a pointer at a struct
> - * lua_serialize_status object. If __serialize exists, is correct,
> - * and is a function then one value is pushed as a result of
> - * serialization. If it is correct, but just a serialization hint
> - * like 'array' or 'map', then nothing is pushed. Otherwise it is
> - * an error. All the described outcomes can be distinguished via
> - * lua_serialize_status attributes.
> - */
>  static int
> -lua_field_try_serialize(struct lua_State *L)
> +lua_field_try_serialize(struct lua_State *L, int idx,
> +			struct luaL_serializer *cfg, struct luaL_field *field)

I looked on the code around and other signatures are different in the
arguments order:
| lua_field_do_something(struct lua_State *, struct luaL_serializer *,
|                        int, struct luaL_field *);
I see no reason to violate this practice.

>  {
> -	struct lua_serialize_status *s =
> -		(struct lua_serialize_status *) lua_touserdata(L, 2);
> -	s->is_serialize_used = (luaL_getmetafield(L, 1, LUAL_SERIALIZE) != 0);
> -	s->is_error = false;
> -	s->is_value_returned = false;
> -	if (! s->is_serialize_used)
> -		return 0;
> -	struct luaL_serializer *cfg = s->cfg;
> -	struct luaL_field *field = s->field;
> +	bool is_serialize_used = (luaL_getmetafield(L, idx,
> +						    LUAL_SERIALIZE) != 0);

I agree with Vlad here. If there are some reasons to leave this variable
has_serialize is also OK, but at the same time it fits 80 chars.

> +	if (!is_serialize_used)
> +		return 1;
>  	if (lua_isfunction(L, -1)) {
>  		/* copy object itself */
> -		lua_pushvalue(L, 1);
> -		lua_call(L, 1, 1);
> -		s->is_error = (luaL_tofield(L, cfg, NULL, -1, field) != 0);
> -		s->is_value_returned = true;
> -		return 1;
> +		lua_pushvalue(L, idx);
> +		if (lua_pcall(L, 1, 1, 0) != 0) {
> +			diag_set(LuajitError, lua_tostring(L, -1));
> +			return -1;
> +		}
> +		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
> +			return -1;
> +		lua_replace(L, idx);
> +		return 0;
>  	}
>  	if (!lua_isstring(L, -1)) {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		s->is_error = true;
> -		lua_pop(L, 1);
> -		return 0;
> +		return -1;
>  	}
>  	const char *type = lua_tostring(L, -1);
>  	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
>  	    strcmp(type, "sequence") == 0) {
>  		field->type = MP_ARRAY; /* Override type */
> -		field->size = luaL_arrlen(L, 1);
> +		field->size = luaL_arrlen(L, idx);
>  		/* YAML: use flow mode if __serialize == 'seq' */
>  		if (cfg->has_compact && type[3] == '\0')
>  			field->compact = true;
>  	} else if (strcmp(type, "map") == 0 || strcmp(type, "mapping") == 0) {
>  		field->type = MP_MAP;   /* Override type */
> -		field->size = luaL_maplen(L, 1);
> +		field->size = luaL_maplen(L, idx);
>  		/* YAML: use flow mode if __serialize == 'map' */
>  		if (cfg->has_compact && type[3] == '\0')
>  			field->compact = true;
>  	} else {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		s->is_error = true;
> +		return -1;
>  	}
> -	lua_pop(L, 1);
> +	lua_pop(L, 1); /* remove value was setted by luaL_getmetafield */
>  	return 0;
>  }
>  
> @@ -559,36 +537,14 @@ lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
>  
>  	if (cfg->encode_load_metatables) {
>  		int top = lua_gettop(L);
> -		struct lua_serialize_status s;
> -		s.cfg = cfg;
> -		s.field = field;
> -		lua_pushcfunction(L, lua_field_try_serialize);
> -		lua_pushvalue(L, idx);
> -		lua_pushlightuserdata(L, &s);
> -		if (lua_pcall(L, 2, 1, 0) != 0) {
> -			diag_set(LuajitError, lua_tostring(L, -1));
> -			return -1;
> -		}
> -		if (s.is_error)
> +		int res = lua_field_try_serialize(L, idx, cfg, field);
> +		if (res == -1)
>  			return -1;
> -		/*
> -		 * lua_call/pcall always returns the specified
> -		 * number of values. Even if the function returned
> -		 * less - others are filled with nils. So when a
> -		 * nil is not needed, it should be popped
> -		 * manually.
> -		 */
> -		assert(lua_gettop(L) == top + 1);
> -		(void) top;
> -		if (s.is_serialize_used) {
> -			if (s.is_value_returned)
> -				lua_replace(L, idx);
> -			else
> -				lua_pop(L, 1);
> +		assert(lua_gettop(L) == top);
> +		(void)top;
> +		if (res == 0)
>  			return 0;
> -		}
> -		assert(! s.is_value_returned);
> -		lua_pop(L, 1);
> +		/* Fall throuth with res == 1 */

Typo: s/Fall throuth/Fallthrough/.

Minor: This part can be simply rewritten the following way:
| int top = lua_gettop(L);
| (void)top;
|
| switch(lua_field_try_serialize(L, idx, cfg, field)) {
| case -1:
| 	return -1;
| case 0:
| 	assert(lua_gettop(L) == top);
| 	return 0;
| case 1:
| 	assert(lua_gettop(L) == top);
| 	/* Continue table encoding */
| 	break;
| default:
| 	/* Unreachable */
| 	assert(0);
| }
IMHO, it's more readable and also checks that nothing except the values
mentioned in the function contract is returned. Feel free to ignore.

>  	}
>  
>  	field->type = MP_ARRAY;
> -- 
> 2.24.1
> 

Side note: I can't come up with tests except those you showed to Sasha
here[4], but it looks like they doesn't directly relate to the changes
you introduce with the patch. No idea for now. I guess we can return to
the question when you fix the review comments.


[1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-April/015701.html
[2]: https://www.tarantool.io/en/doc/2.2/dev_guide/developer_guidelines/#how-to-write-a-commit-message
[3]: https://www.tarantool.io/en/doc/2.2/dev_guide/c_style_guide/#chapter-2-breaking-long-lines-and-strings
[4]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-May/016994.html

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-06-02  0:19 ` Igor Munkin
@ 2020-06-02  9:36   ` Igor Munkin
  0 siblings, 0 replies; 12+ messages in thread
From: Igor Munkin @ 2020-06-02  9:36 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches, Vladislav Shpilevoy

OK, it was too late at night (or too early at morning), sorry.

On 02.06.20, Igor Munkin wrote:
> Sergey,
> 
> Thanks for the patch! Please consider my comments below.
> 
> On 18.05.20, Sergey Kaplun wrote:
> > Currently on encoding table we push cfunction (lua_field_try_serialize)
> > to lua stack with additional lightuserdata and table value and after
> > pcall that function to avoid a raise of error.
> > 
> > In this case LuaJIT creates new object which will not live long time,
> 
> <lua_pushlightuserdata> just assigns a pointer to the top guest stack
> slot. Yes, it might trigger stack reallocation, but no GC object is
> created.

Yes creating a Lua function object with <lua_field_try_serialize>
payload definitely clobbers GC.

> 

<snipped>

> 
> Well, let's polish the commit message to make it a bit clearer. Commit
> subject also looks non-informative and doesn't respect our contribution
> guide[2], so I propose the following rewording:
> | lua: remove excess Lua call from table encoding
> |
> | For safe table encoding <lua_field_try_serialize> function is pushed
> | to Lua stack along with auxiliary lightuserdata and table object to be
> | encoded. Its further protected call catches Lua error if one is raised
> | while encoding. It is only necessary when the object to be serialized
> | has __serialize field in metatable and this field is a Lua function.
> |
> | As a result of this change the function serializing the given object
> | is called without excess protected frame and auxiliary status struct.

Added this point to the last part:
| This change reduces GC usage since a Lua function object is not
| created. Moreover the function serializing the given object is called
| without excess protected frame and auxiliary status struct.

> Feel free to change it on your own.
> 

<snipped>

> 
> -- 
> Best regards,
> IM

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
                   ` (3 preceding siblings ...)
  2020-06-02  0:19 ` Igor Munkin
@ 2020-06-10 12:15 ` Kirill Yukhin
  2020-06-10 13:01   ` Igor Munkin
  4 siblings, 1 reply; 12+ messages in thread
From: Kirill Yukhin @ 2020-06-10 12:15 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches, Vladislav Shpilevoy

Hello,

On 18 май 12:37, Sergey Kaplun wrote:
> Currently on encoding table we push cfunction (lua_field_try_serialize)
> to lua stack with additional lightuserdata and table value and after
> pcall that function to avoid a raise of error.
> 
> In this case LuaJIT creates new object which will not live long time,
> so it increase amount of dead object and also increase time and
> frequency of garbage collection inside LuaJIT.
> Also this pcall is necessary only in case when metafield __serialize
> of serilizable object has LUA_TFUNCTION type.
> 
> So instead pushcfunction with pcall we can directly call the function
> trying to serialize an object.
> ---
> 
> branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring

I've checked your patch into master.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-06-10 12:15 ` Kirill Yukhin
@ 2020-06-10 13:01   ` Igor Munkin
  2020-06-10 13:39     ` Kirill Yukhin
  0 siblings, 1 reply; 12+ messages in thread
From: Igor Munkin @ 2020-06-10 13:01 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: tarantool-patches, Vladislav Shpilevoy

Kirill,

On 10.06.20, Kirill Yukhin wrote:
> Hello,
> 
> On 18 май 12:37, Sergey Kaplun wrote:
> > Currently on encoding table we push cfunction (lua_field_try_serialize)
> > to lua stack with additional lightuserdata and table value and after
> > pcall that function to avoid a raise of error.
> > 
> > In this case LuaJIT creates new object which will not live long time,
> > so it increase amount of dead object and also increase time and
> > frequency of garbage collection inside LuaJIT.
> > Also this pcall is necessary only in case when metafield __serialize
> > of serilizable object has LUA_TFUNCTION type.
> > 
> > So instead pushcfunction with pcall we can directly call the function
> > trying to serialize an object.
> > ---
> > 
> > branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring
> 
> I've checked your patch into master.

Just in case: you replied to v1 thread but the applied patch relates to v2.
I would like to mention that ChangeLog entry changes between versions.

> 
> --
> Regards, Kirill Yukhin

-- 
Best regards,
IM

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction
  2020-06-10 13:01   ` Igor Munkin
@ 2020-06-10 13:39     ` Kirill Yukhin
  0 siblings, 0 replies; 12+ messages in thread
From: Kirill Yukhin @ 2020-06-10 13:39 UTC (permalink / raw)
  To: Igor Munkin; +Cc: tarantool-patches, Vladislav Shpilevoy

Hello,

On 10 июн 16:01, Igor Munkin wrote:
> Kirill,
> 
> On 10.06.20, Kirill Yukhin wrote:
> > Hello,
> > 
> > On 18 май 12:37, Sergey Kaplun wrote:
> > > Currently on encoding table we push cfunction (lua_field_try_serialize)
> > > to lua stack with additional lightuserdata and table value and after
> > > pcall that function to avoid a raise of error.
> > > 
> > > In this case LuaJIT creates new object which will not live long time,
> > > so it increase amount of dead object and also increase time and
> > > frequency of garbage collection inside LuaJIT.
> > > Also this pcall is necessary only in case when metafield __serialize
> > > of serilizable object has LUA_TFUNCTION type.
> > > 
> > > So instead pushcfunction with pcall we can directly call the function
> > > trying to serialize an object.
> > > ---
> > > 
> > > branch: https://github.com/tarantool/tarantool/tree/skaplun/no-ticket-lua-inspect-table-refactoring
> > 
> > I've checked your patch into master.
> 
> Just in case: you replied to v1 thread but the applied patch relates to v2.
> I would like to mention that ChangeLog entry changes between versions.

Thanks! ChangeLog entry is correct.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-06-10 13:39 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-18  9:37 [Tarantool-patches] [PATCH] lua: lua_field_inspect_table without pushcfunction Sergey Kaplun
2020-05-18 21:52 ` Vladislav Shpilevoy
2020-05-19 10:29   ` Sergey Kaplun
2020-05-19 20:08     ` Vladislav Shpilevoy
2020-05-22 11:48 ` Aleksandr Lyapunov
2020-05-22 14:05 ` Alexander Turenko
2020-05-25 12:06   ` Sergey Kaplun
2020-06-02  0:19 ` Igor Munkin
2020-06-02  9:36   ` Igor Munkin
2020-06-10 12:15 ` Kirill Yukhin
2020-06-10 13:01   ` Igor Munkin
2020-06-10 13:39     ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox