Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] serializer: serialize recursive structures
@ 2021-03-11  5:49 Roman Khabibov via Tarantool-patches
  2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Roman Khabibov via Tarantool-patches @ 2021-03-11  5:49 UTC (permalink / raw)
  To: tarantool-patches

Fix bug with bus error during serializing of recursive
structures.

Closes #3228
---

Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
Issue: https://github.com/tarantool/tarantool/issues/3228

 src/box/lua/call.c                            |   6 +-
 src/box/lua/execute.c                         |   2 +-
 src/box/lua/serialize_lua.c                   |  96 ++------
 src/box/lua/tuple.c                           |   2 +-
 src/box/sql/func.c                            |   2 +-
 src/lua/msgpack.c                             |  10 +-
 src/lua/pickle.c                              |   2 +-
 src/lua/utils.c                               | 226 ++++++++++++++++--
 src/lua/utils.h                               |  43 +++-
 ...-3228-serializer-look-for-recursion.result |  67 ++++++
 ...228-serializer-look-for-recursion.test.lua |  17 ++
 test/swim/swim.result                         |  18 +-
 third_party/lua-cjson/lua_cjson.c             |   4 +-
 third_party/lua-yaml/lyaml.cc                 |  88 +++----
 14 files changed, 390 insertions(+), 193 deletions(-)
 create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
 create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua

diff --git a/src/box/lua/call.c b/src/box/lua/call.c
index 0315e720c..e4907a3f7 100644
--- a/src/box/lua/call.c
+++ b/src/box/lua/call.c
@@ -193,7 +193,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 		 */
 		for (int i = 1; i <= nrets; ++i) {
 			struct luaL_field field;
-			if (luaL_tofield(L, cfg, NULL, i, &field) < 0)
+			if (luaL_tofield(L, cfg, 0, NULL, i, &field) < 0)
 				return luaT_error(L);
 			struct tuple *tuple;
 			if (field.type == MP_EXT &&
@@ -222,7 +222,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	 * Inspect the first result
 	 */
 	struct luaL_field root;
-	if (luaL_tofield(L, cfg, NULL, 1, &root) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, 1, &root) < 0)
 		return luaT_error(L);
 	struct tuple *tuple;
 	if (root.type == MP_EXT && (tuple = luaT_istuple(L, 1)) != NULL) {
@@ -252,7 +252,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	for (uint32_t t = 1; t <= root.size; t++) {
 		lua_rawgeti(L, 1, t);
 		struct luaL_field field;
-		if (luaL_tofield(L, cfg, NULL, -1, &field) < 0)
+		if (luaL_tofield(L, cfg, 0, NULL, -1, &field) < 0)
 			return luaT_error(L);
 		if (field.type == MP_EXT && (tuple = luaT_istuple(L, -1))) {
 			tuple_to_mpstream(tuple, stream);
diff --git a/src/box/lua/execute.c b/src/box/lua/execute.c
index 926a0a61c..76d271f5c 100644
--- a/src/box/lua/execute.c
+++ b/src/box/lua/execute.c
@@ -328,7 +328,7 @@ lua_sql_bind_decode(struct lua_State *L, struct sql_bind *bind, int idx, int i)
 		bind->name = NULL;
 		bind->name_len = 0;
 	}
-	if (luaL_tofield(L, luaL_msgpack_default, NULL, -1, &field) < 0)
+	if (luaL_tofield(L, luaL_msgpack_default, 0, NULL, -1, &field) < 0)
 		return -1;
 	switch (field.type) {
 	case MP_UINT:
diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
index caa08a60f..ab612dfc2 100644
--- a/src/box/lua/serialize_lua.c
+++ b/src/box/lua/serialize_lua.c
@@ -215,7 +215,7 @@ trace_node(struct lua_dumper *d)
 	struct luaL_field field;
 
 	memset(&field, 0, sizeof(field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &field);
 
 	if (field.type < lengthof(mp_type_names)) {
 		if (field.type == MP_EXT) {
@@ -234,7 +234,7 @@ trace_node(struct lua_dumper *d)
 
 	memset(&field, 0, sizeof(field));
 
-	luaL_checkfield(d->L, d->cfg, top, &field);
+	luaL_checkfield(d->L, d->cfg, 0, top, &field);
 	say_info("serializer-trace: node    :\tfield type %s (%d)",
 		 type_str, field.type);
 }
@@ -339,41 +339,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
 static const char *
 get_lua_anchor(struct lua_dumper *d)
 {
-	const char *s = "";
-
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (!lua_toboolean(d->L, -1)) {
-		lua_pop(d->L, 1);
-		return NULL;
-	}
-
-	if (lua_isboolean(d->L, -1)) {
-		/*
-		 * This element is referenced more
-		 * than once but has not been named.
-		 */
-		char buf[32];
-		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
-		lua_pop(d->L, 1);
-		lua_pushvalue(d->L, -1);
-		lua_pushstring(d->L, buf);
-		s = lua_tostring(d->L, -1);
-		lua_rawset(d->L, d->anchortable_index);
-		trace_anchor(s, false);
-	} else {
-		/*
-		 * An aliased element.
-		 *
-		 * FIXME: Need an example to use.
-		 *
-		 * const char *str = lua_tostring(d->L, -1);
-		 */
-		const char *str = lua_tostring(d->L, -1);
-		trace_anchor(str, true);
-		lua_pop(d->L, 1);
+	const char *anchor = NULL;
+	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
+			      &anchor);
+	if (code == 1) {
+		trace_anchor(anchor, false);
+	} else if (code == 2) {
+		trace_anchor(anchor, true);
+		return "";
 	}
-	return s;
+	return anchor;
 }
 
 static void
@@ -783,7 +758,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 		return -1;
 
 	memset(field, 0, sizeof(*field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), field);
 
 	switch (field->type) {
 	case MP_NIL:
@@ -881,49 +856,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 	return emit_node(d, nd, indent, str, len);
 }
 
-/**
- * Find references to tables, we use it
- * to find self references in tables.
- */
-static void
-find_references(struct lua_dumper *d)
-{
-	int newval;
-
-	if (lua_type(d->L, -1) != LUA_TTABLE)
-		return;
-
-	/* Copy of a table for self refs */
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (lua_isnil(d->L, -1))
-		newval = 0;
-	else if (!lua_toboolean(d->L, -1))
-		newval = 1;
-	else
-		newval = -1;
-	lua_pop(d->L, 1);
-
-	if (newval != -1) {
-		lua_pushvalue(d->L, -1);
-		lua_pushboolean(d->L, newval);
-		lua_rawset(d->L, d->anchortable_index);
-	}
-
-	if (newval != 0)
-		return;
-
-	/*
-	 * Other values and keys in the table
-	 */
-	lua_pushnil(d->L);
-	while (lua_next(d->L, -2) != 0) {
-		find_references(d);
-		lua_pop(d->L, 1);
-		find_references(d);
-	}
-}
-
 /**
  * Dump recursively from the root node.
  */
@@ -935,7 +867,7 @@ dump_root(struct lua_dumper *d)
 	};
 	int ret;
 
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &nd.field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &nd.field);
 
 	if (nd.field.type != MP_ARRAY || nd.field.size != 1) {
 		d->err = EINVAL;
@@ -984,7 +916,7 @@ lua_encode(lua_State *L, struct luaL_serializer *serializer,
 
 	/* Push copy of arg we're processing */
 	lua_pushvalue(L, 1);
-	find_references(&dumper);
+	find_references(dumper.L, dumper.anchortable_index, "before");
 
 	if (dump_root(&dumper) != 0)
 		goto out;
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 3e6f043b4..4d417ec34 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -411,7 +411,7 @@ luamp_convert_key(struct lua_State *L, struct luaL_serializer *cfg,
 		return tuple_to_mpstream(tuple, stream);
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, NULL, index, &field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, index, &field) < 0)
 		luaT_error(L);
 	if (field.type == MP_ARRAY) {
 		lua_pushvalue(L, index);
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index f15d27051..d4a7064f0 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -262,7 +262,7 @@ port_lua_get_vdbemem(struct port *base, uint32_t *size)
 		return NULL;
 	for (int i = 0; i < argc; i++) {
 		struct luaL_field field;
-		if (luaL_tofield(L, luaL_msgpack_default,
+		if (luaL_tofield(L, luaL_msgpack_default, 0,
 				 NULL, -1 - i, &field) < 0) {
 			goto error;
 		}
diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
index 24b0d2ccd..412736572 100644
--- a/src/lua/msgpack.c
+++ b/src/lua/msgpack.c
@@ -156,11 +156,11 @@ restart: /* used by MP_EXT of unidentified subtype */
 		lua_pushnil(L);  /* first key */
 		while (lua_next(L, top) != 0) {
 			lua_pushvalue(L, -2); /* push a copy of key to top */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop a copy of key */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop value */
@@ -181,7 +181,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 		mpstream_encode_array(stream, size);
 		for (uint32_t i = 0; i < size; i++) {
 			lua_rawgeti(L, top, i + 1);
-			if (luaL_tofield(L, cfg, opts, top + 1, field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, top + 1, field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1);
@@ -204,7 +204,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 			if (type != MP_EXT)
 				return type; /* Value has been packed by the trigger */
 			/* Try to convert value to serializable type */
-			luaL_convertfield(L, cfg, top, field);
+			luaL_convertfield(L, cfg, 0, top, field);
 			/* handled by luaL_convertfield */
 			assert(field->type != MP_EXT);
 			assert(lua_gettop(L) == top);
@@ -229,7 +229,7 @@ luamp_encode(struct lua_State *L, struct luaL_serializer *cfg,
 	}
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, opts, lua_gettop(L), &field) < 0)
+	if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), &field) < 0)
 		return luaT_error(L);
 	enum mp_type top_type = luamp_encode_r(L, cfg, opts, stream, &field, 0);
 
diff --git a/src/lua/pickle.c b/src/lua/pickle.c
index 65208b5b3..86f0f77e8 100644
--- a/src/lua/pickle.c
+++ b/src/lua/pickle.c
@@ -80,7 +80,7 @@ lbox_pack(struct lua_State *L)
 		if (i > nargs)
 			luaL_error(L, "pickle.pack: argument count does not match "
 				   "the format");
-		luaL_checkfield(L, luaL_msgpack_default, i, &field);
+		luaL_checkfield(L, luaL_msgpack_default, 0, i, &field);
 		switch (*format) {
 		case 'B':
 		case 'b':
diff --git a/src/lua/utils.c b/src/lua/utils.c
index b5a6ca5b7..64404e3fc 100644
--- a/src/lua/utils.c
+++ b/src/lua/utils.c
@@ -439,9 +439,54 @@ lua_gettable_wrapper(lua_State *L)
 	return 1;
 }
 
+/**
+ * Check if node with @a idx serialized.
+ */
+static int
+is_serialized(struct lua_State *L, int anchortable_index, int idx)
+{
+	if (anchortable_index == 0)
+		return 0;
+	lua_pushvalue(L, idx);
+	lua_rawget(L, anchortable_index);
+	int is_serialized = 0;
+	if (lua_isnil(L, -1) == 0) {
+		lua_pushstring(L, "serialized");
+		lua_rawget(L, -2);
+		is_serialized = lua_toboolean(L, -1);
+		lua_pop(L, 1);
+	}
+	lua_pop(L, 1);
+	return is_serialized;
+}
+
+/**
+ * Mark node with @a idx as serialized.
+ */
+static void
+mark_as_serialized(struct lua_State *L, int anchortable_index, int idx)
+{
+	if (anchortable_index == 0)
+		return;
+	/* Push copy of table. */
+	lua_pushvalue(L, idx);
+	lua_pushvalue(L, idx);
+	lua_rawget(L, anchortable_index);
+	if (lua_isnil(L, -1) == 1) {
+		lua_pop(L, 1);
+		lua_newtable(L);
+	}
+	int flags_index = lua_gettop(L);
+	lua_pushstring(L, "serialized");
+	lua_pushboolean(L, true);
+	lua_rawset(L, flags_index);
+	lua_rawset(L, anchortable_index);
+}
+
 static void
 lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+			 int anchortable_index, int idx,
+			 struct luaL_field *field)
 {
 	if (!cfg->encode_load_metatables)
 		return;
@@ -463,12 +508,148 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
 		lua_pcall(L, 1, 1, 0);
 		/* replace obj with the unpacked value */
 		lua_replace(L, idx);
-		if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+		if (luaL_tofield(L, cfg, anchortable_index, NULL, idx,
+			         field) < 0)
 			luaT_error(L);
 	} /* else ignore lua_gettable exceptions */
 	lua_settop(L, top); /* remove temporary objects */
 }
 
+void
+find_references(struct lua_State *L, int anchortable_index, const char *mode)
+{
+	int type = lua_type(L, -1);
+	if (type != LUA_TTABLE || anchortable_index == 0)
+		return;
+
+	int node_index = lua_gettop(L);
+	lua_pushvalue(L, node_index);
+
+	/* Get value with flags from anchor table. */
+	lua_pushvalue(L, node_index);
+	lua_rawget(L, anchortable_index);
+	if (lua_isnil(L, -1) == 1) {
+		lua_pop(L, 1);
+		lua_newtable(L);
+	}
+	int flags_index = lua_gettop(L);
+
+	/* Get and increment counter. */
+	lua_pushstring(L, mode);
+	lua_rawget(L, flags_index);
+
+	int new_count = 0;
+	if (lua_isnil(L, -1) == 1) {
+		new_count = 1;
+		lua_pop(L, 1);
+	} else {
+		lua_Integer number = lua_tointeger(L, -1);
+		lua_pop(L, 1);
+		if (number < 2)
+			new_count = ++number;
+		else
+			new_count = -1;
+	}
+
+	/* Push new count to value with flags. */
+	if (new_count == -1) {
+		lua_pop(L, 2);
+		assert(node_index == lua_gettop(L));
+		return;
+	}
+	lua_pushstring(L, mode);
+	lua_pushinteger(L, new_count);
+	lua_rawset(L, flags_index);
+
+	/* Check if node is already serialized. */
+	bool already_serialized = false;
+	if (strcmp(mode, "after") == 0) {
+		lua_pushstring(L, "serialized");
+		lua_rawget(L, flags_index);
+		already_serialized = lua_toboolean(L, -1);
+		lua_pop(L, 1);
+	}
+	lua_rawset(L, anchortable_index);
+	assert(node_index == lua_gettop(L));
+	if (already_serialized == true)
+		return;
+
+	/* Recursively process other table values. */
+	lua_pushnil(L);
+	while (lua_next(L, -2) != 0) {
+		/* Find references on value. */
+		find_references(L, anchortable_index, mode);
+		lua_pop(L, 1);
+		/* Find references on key. */
+		find_references(L, anchortable_index, mode);
+	}
+}
+
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor)
+{
+	lua_pushvalue(L, -1);
+	lua_rawget(L, anchortable_index);
+	if (lua_isnil(L, -1) == 1) {
+		lua_pop(L, 1);
+		*anchor = NULL;
+		return 0;
+	}
+
+	lua_pushstring(L, "before");
+	lua_rawget(L, -2);
+	const char *str = NULL;
+	if (lua_type(L, -1) == LUA_TSTRING)
+		str = lua_tostring(L, -1);
+	lua_Integer number_before = lua_tointeger(L, -1);
+	lua_pop(L, 1);
+	lua_Integer number_after = 0;
+	if (str == NULL) {
+		lua_pushstring(L, "after");
+		lua_rawget(L, -2);
+		if (lua_type(L, -1) == LUA_TSTRING)
+			str = lua_tostring(L, -1);
+		number_after = lua_tointeger(L, -1);
+		lua_pop(L, 1);
+	}
+
+	*anchor = "";
+	int ret = 0;
+	if ((number_before > 1 || number_after > 1) && str == NULL) {
+		/*
+		 * This element is referenced more than once but
+		 * has not been named.
+		 */
+		char buf[32];
+		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
+		lua_pop(L, 1);
+		lua_pushvalue(L, -1);
+
+		lua_pushvalue(L, -1);
+		lua_rawget(L, anchortable_index);
+		if (number_before > 1)
+			lua_pushstring(L, "before");
+		else
+			lua_pushstring(L, "after");
+		lua_pushstring(L, buf);
+		*anchor = lua_tostring(L, -1);
+		lua_rawset(L, -3);
+		lua_rawset(L, anchortable_index);
+		ret = 1;
+	} else if (str != NULL) {
+		/* This is an aliased element. */
+		lua_pop(L, 1);
+		*anchor = str;
+		ret = 2;
+	} else {
+		lua_pop(L, 1);
+		*anchor = NULL;
+		ret = 0;
+	}
+	return ret;
+}
+
 /**
  * Call __serialize method of a table object by index
  * if the former exists.
@@ -496,11 +677,13 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
  *      proceed with default table encoding.
  */
 static int
-lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_try_serialize(struct lua_State *L, int anchortable_index,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	if (luaL_getmetafield(L, idx, LUAL_SERIALIZE) == 0)
 		return 1;
+	mark_as_serialized(L, anchortable_index, idx);
 	if (lua_isfunction(L, -1)) {
 		/* copy object itself */
 		lua_pushvalue(L, idx);
@@ -508,7 +691,9 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
 			diag_set(LuajitError, lua_tostring(L, -1));
 			return -1;
 		}
-		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
+		find_references(L, anchortable_index, "after");
+		if (luaL_tofield(L, cfg, anchortable_index, NULL, -1,
+				 field) != 0)
 			return -1;
 		lua_replace(L, idx);
 		return 0;
@@ -541,16 +726,19 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
 }
 
 static int
-lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_inspect_table(struct lua_State *L, int anchortable_index,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	assert(lua_type(L, idx) == LUA_TTABLE);
 	uint32_t size = 0;
 	uint32_t max = 0;
 
-	if (cfg->encode_load_metatables) {
+	if (cfg->encode_load_metatables &&
+	    is_serialized(L, anchortable_index, idx) != 1) {
 		int top = lua_gettop(L);
-		int res = lua_field_try_serialize(L, cfg, idx, field);
+		int res = lua_field_try_serialize(L, anchortable_index, cfg,
+						  idx, field);
 		if (res == -1)
 			return -1;
 		assert(lua_gettop(L) == top);
@@ -612,14 +800,14 @@ lua_field_tostring(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 	lua_call(L, 1, 1);
 	lua_replace(L, idx);
 	lua_settop(L, top);
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, idx, field) < 0)
 		luaT_error(L);
 }
 
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field)
+	     int anchortable_index, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field)
 {
 	if (index < 0)
 		index = lua_gettop(L) + index + 1;
@@ -753,7 +941,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 	case LUA_TTABLE:
 	{
 		field->compact = false;
-		return lua_field_inspect_table(L, cfg, index, field);
+		return lua_field_inspect_table(L, anchortable_index, cfg,
+					       index, field);
 	}
 	case LUA_TLIGHTUSERDATA:
 	case LUA_TUSERDATA:
@@ -773,8 +962,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 }
 
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field)
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int anchortable_index, int idx, struct luaL_field *field)
 {
 	if (idx < 0)
 		idx = lua_gettop(L) + idx + 1;
@@ -789,9 +978,12 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 			 */
 			GCcdata *cd = cdataV(L->base + idx - 1);
 			if (cd->ctypeid > CTID_CTYPEID)
-				lua_field_inspect_ucdata(L, cfg, idx, field);
+				lua_field_inspect_ucdata(L, cfg,
+							 anchortable_index, idx,
+							 field);
 		} else if (type == LUA_TUSERDATA) {
-			lua_field_inspect_ucdata(L, cfg, idx, field);
+			lua_field_inspect_ucdata(L, cfg, anchortable_index, idx,
+						 field);
 		}
 	}
 
diff --git a/src/lua/utils.h b/src/lua/utils.h
index 37531676d..ef9c70dfb 100644
--- a/src/lua/utils.h
+++ b/src/lua/utils.h
@@ -354,6 +354,29 @@ struct luaL_field {
 	bool compact;                /* a flag used by YAML serializer */
 };
 
+/**
+ * Find references to tables to look up self references in tables.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack.
+ * @param mode When the function is called before or after dump
+               function.
+ */
+void
+find_references(struct lua_State *L, int anchortable_index, const char *mode);
+
+/**
+ * Generate aliases and anchor numbers for self references.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack.
+ * @param[out] anchor_number Ptr to the number anchors.
+ * @param[out] anchor Ptr to anchor string.
+ */
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor);
+
 /**
  * @brief Convert a value from the Lua stack to a lua_field structure.
  * This function is designed for use with Lua bindings and data
@@ -388,6 +411,7 @@ struct luaL_field {
  *
  * @param L stack
  * @param cfg configuration
+ * @param anchortable_index Index of anchor table on stack.
  * @param opts the Lua serializer additional options.
  * @param index stack index
  * @param field conversion result
@@ -397,8 +421,8 @@ struct luaL_field {
  */
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field);
+	     int anchortable_index, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field);
 
 /**
  * @brief Try to convert userdata/cdata values using defined conversion logic.
@@ -406,18 +430,21 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
  *
  * @param L stack
  * @param cfg configuration
+ * @param anchortable_index Index of anchor table on stack.
  * @param idx stack index
  * @param field conversion result
  */
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field);
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int anchortable_index, int idx, struct luaL_field *field);
 
 /**
  * @brief A wrapper for luaL_tofield() and luaL_convertfield() that
  * tries to convert value or raise an error.
  * @param L stack
  * @param cfg configuration
+ * @param anchortable_index Stack index of table with node visit
+                            and serialization info.
  * @param idx stack index
  * @param field conversion result
  * @sa lua_tofield()
@@ -433,14 +460,14 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
  * (tostring) -> (nil) -> exception
  */
 static inline void
-luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		struct luaL_field *field)
+luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg,
+		int anchortable_index, int idx, struct luaL_field *field)
 {
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, anchortable_index, NULL, idx, field) < 0)
 		luaT_error(L);
 	if (field->type != MP_EXT || field->ext_type != MP_UNKNOWN_EXTENSION)
 		return;
-	luaL_convertfield(L, cfg, idx, field);
+	luaL_convertfield(L, cfg, anchortable_index, idx, field);
 }
 
 void
diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
new file mode 100644
index 000000000..773470c8e
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.result
@@ -0,0 +1,67 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+--
+-- gh-3228: Check the error message in the case of a __serialize
+-- function generating infinite recursion.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+ | ---
+ | - []
+ | ...
+setmetatable({}, {__serialize = function(a) return {a} end})
+ | ---
+ | - - []
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+ | ---
+ | - - &0 []
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+ | ---
+ | - - &0 []
+ |   - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+ | ---
+ | - - - &0 []
+ |     - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+ | ---
+ | - - []
+ |   - 1
+ | ...
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+ | ---
+ | - - - - - []
+ | ...
+setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
+ | ---
+ | - - - - - 1
+ | ...
+b = {}
+ | ---
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+ | ---
+ | - b_2: &0 []
+ |   a_2: &1
+ |   - *0
+ |   a_1: *1
+ |   b_1: *0
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
+ | ---
+ | - b: &0 []
+ |   a_2:
+ |   - &1
+ |     - *0
+ |   - *0
+ |   a_1: *1
+ | ...
diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
new file mode 100644
index 000000000..1c9b0375f
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
@@ -0,0 +1,17 @@
+test_run = require('test_run').new()
+
+--
+-- gh-3228: Check the error message in the case of a __serialize
+-- function generating infinite recursion.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+setmetatable({}, {__serialize = function(a) return {a} end})
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
+b = {}
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
diff --git a/test/swim/swim.result b/test/swim/swim.result
index bfc83c143..539131677 100644
--- a/test/swim/swim.result
+++ b/test/swim/swim.result
@@ -1322,16 +1322,13 @@ m_list
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
-  - uri: 127.0.0.1:<port>
-    status: alive
-    incarnation: cdata {generation = 0ULL, version = 2ULL}
-    uuid: 00000000-0000-1000-8000-000000000001
-    payload_size: 8
-  - uri: 127.0.0.1:<port>
+  - &0
+    uri: 127.0.0.1:<port>
     status: alive
     incarnation: cdata {generation = 0ULL, version = 2ULL}
     uuid: 00000000-0000-1000-8000-000000000001
     payload_size: 8
+  - *0
 ...
 e_list
 ---
@@ -1374,16 +1371,13 @@ fiber.sleep(0)
 -- Two events - status update to 'left', and 'drop'.
 m_list
 ---
-- - uri: 127.0.0.1:<port>
-    status: left
-    incarnation: cdata {generation = 0ULL, version = 1ULL}
-    uuid: 00000000-0000-1000-8000-000000000002
-    payload_size: 0
-  - uri: 127.0.0.1:<port>
+- - &0
+    uri: 127.0.0.1:<port>
     status: left
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
+  - *0
 ...
 e_list
 ---
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 38e999870..67a9762bc 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -354,7 +354,7 @@ static void json_append_object(lua_State *l, struct luaL_serializer *cfg,
             comma = 1;
 
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -2, &field);
+    luaL_checkfield(l, cfg, 0, -2, &field);
     if (field.type == MP_UINT) {
         strbuf_append_char(json, '"');
         json_append_uint(cfg, json, field.ival);
@@ -384,7 +384,7 @@ static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
                              int current_depth, strbuf_t *json)
 {
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -1, &field);
+    luaL_checkfield(l, cfg, 0, -1, &field);
     switch (field.type) {
     case MP_UINT:
         return json_append_uint(cfg, json, field.ival);
diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
index 9c3a4a646..72f269e6c 100644
--- a/third_party/lua-yaml/lyaml.cc
+++ b/third_party/lua-yaml/lyaml.cc
@@ -500,38 +500,23 @@ usage_error:
 static int dump_node(struct lua_yaml_dumper *dumper);
 
 static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
-   const char *s = "";
-   lua_pushvalue(dumper->L, -1);
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (!lua_toboolean(dumper->L, -1)) {
-      lua_pop(dumper->L, 1);
-      return NULL;
-   }
-
-   if (lua_isboolean(dumper->L, -1)) {
-      /* this element is referenced more than once but has not been named */
-      char buf[32];
-      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
-      lua_pop(dumper->L, 1);
-      lua_pushvalue(dumper->L, -1);
-      lua_pushstring(dumper->L, buf);
-      s = lua_tostring(dumper->L, -1);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   } else {
-      /* this is an aliased element */
+   const char *anchor = NULL;
+   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
+                  &anchor) == 2) {
       yaml_event_t ev;
-      const char *str = lua_tostring(dumper->L, -1);
-      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
+      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
           !yaml_emitter_emit(&dumper->emitter, &ev))
          luaL_error(dumper->L, OOM_ERRMSG);
-      lua_pop(dumper->L, 1);
+      return (yaml_char_t *)"";
    }
-   return (yaml_char_t *)s;
+   return (yaml_char_t *) anchor;
 }
 
-static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
+static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field,
+                      yaml_char_t *anchor) {
    yaml_event_t ev;
-   yaml_char_t *anchor = get_yaml_anchor(dumper);
+   if (anchor == NULL)
+      anchor = get_yaml_anchor(dumper);
 
    if (anchor && !*anchor) return 1;
 
@@ -556,10 +541,12 @@ static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
           yaml_emitter_emit(&dumper->emitter, &ev) != 0 ? 1 : 0;
 }
 
-static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
+static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field,
+                      yaml_char_t *anchor) {
    unsigned i;
    yaml_event_t ev;
-   yaml_char_t *anchor = get_yaml_anchor(dumper);
+   if (anchor == NULL)
+      anchor = get_yaml_anchor(dumper);
 
    if (anchor && !*anchor)
       return 1;
@@ -621,8 +608,18 @@ static int dump_node(struct lua_yaml_dumper *dumper)
    bool unused;
    (void) unused;
 
+   yaml_char_t *anchor = NULL;
+
+   /*
+    * Keep anchor to print it if luaL_checkfield() push new value
+    * on stack after serialization.
+    */
+   if (lua_istable(dumper->L, -1) == 1)
+      anchor = get_yaml_anchor(dumper);
+
    int top = lua_gettop(dumper->L);
-   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
+   luaL_checkfield(dumper->L, dumper->cfg, dumper->anchortable_index, top,
+                   &field);
    switch(field.type) {
    case MP_UINT:
       snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
@@ -647,9 +644,9 @@ static int dump_node(struct lua_yaml_dumper *dumper)
       len = strlen(buf);
       break;
    case MP_ARRAY:
-      return dump_array(dumper, &field);
+      return dump_array(dumper, &field, anchor);
    case MP_MAP:
-      return dump_table(dumper, &field);
+      return dump_table(dumper, &field, anchor);
    case MP_STR:
       str = lua_tolstring(dumper->L, -1, &len);
       if (yaml_is_null(str, len) || yaml_is_bool(str, len, &unused) ||
@@ -745,35 +742,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
    return 1;
 }
 
-static void find_references(struct lua_yaml_dumper *dumper) {
-   int newval = -1, type = lua_type(dumper->L, -1);
-   if (type != LUA_TTABLE)
-      return;
-
-   lua_pushvalue(dumper->L, -1); /* push copy of table */
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (lua_isnil(dumper->L, -1))
-      newval = 0;
-   else if (!lua_toboolean(dumper->L, -1))
-      newval = 1;
-   lua_pop(dumper->L, 1);
-   if (newval != -1) {
-      lua_pushvalue(dumper->L, -1);
-      lua_pushboolean(dumper->L, newval);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   }
-   if (newval)
-      return;
-
-   /* recursively process other table values */
-   lua_pushnil(dumper->L);
-   while (lua_next(dumper->L, -2) != 0) {
-      find_references(dumper); /* find references on value */
-      lua_pop(dumper->L, 1);
-      find_references(dumper); /* find references on key */
-   }
-}
-
 int
 lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
                 const char *tag_handle, const char *tag_prefix)
@@ -819,7 +787,7 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
    dumper.anchortable_index = lua_gettop(L);
    dumper.anchor_number = 0;
    lua_pushvalue(L, 1); /* push copy of arg we're processing */
-   find_references(&dumper);
+   find_references(L, dumper.anchortable_index, "before");
    dump_document(&dumper);
    if (dumper.error)
       goto error;
-- 
2.24.3 (Apple Git-128)


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-03-11  5:49 [Tarantool-patches] [PATCH] serializer: serialize recursive structures Roman Khabibov via Tarantool-patches
@ 2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
  2021-04-05 22:05   ` Roman Khabibov via Tarantool-patches
  2021-04-13 15:54 ` Sergey Bronnikov via Tarantool-patches
  2021-04-21 12:34 ` Sergey Bronnikov via Tarantool-patches
  2 siblings, 1 reply; 9+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2021-03-14 12:42 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

Hi!

Thanks for the patch!

Side note: I've got into the swing of things by looking of previous
series of patches ([1][2]). AFAICS, there was idea to throw an error on
recursive serialization, is there discussion anywhere about this
implementation instead the old one which I've missed?

I have some questions about current version of serializer:

Nit: we have no need to add a marker for structures that are not used:
| Tarantool 2.8.0-108-g195d44ddc
| tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a setmetatable(b, {__serialize = function(...) return {a} end}) return a
| ---
| - &0
|   ? &1
|   - *0
|   : *0
| ...

And when serialize is omitted, it works like expected:
| Tarantool 2.8.0-108-g195d44ddc
| tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a  return a
| ---
| - &0
|   ? &1
|     a: *0
|     *1: *1
|   : *0
| ...

In the old version it looks minimalistic and good:
| Tarantool 2.8.0-0-gefc30ccf8
| tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a setmetatable(b, {__serialize = function(...) return {a} end}) return a
| ---
| - &0
|   ? - *0
|   : *0
| ...

Also, I found some inconsistency in showing repeated objects.
For the new version:
| Tarantool 2.8.0-108-g195d44ddc
| local a = {} local b = {} a[b] = b  setmetatable(b, {__serialize = function(...) return print end}) return a
| ---
| - 'function: builtin#29': *0
| ...

But '&0' does not exist, so it looks confusing. Same story with threads.

And for old version:

| Tarantool 2.8.0-0-gefc30ccf8
| tarantool> local a = {} local b = {} a[b] = b  setmetatable(b, {__serialize = function(...) return print end}) return a
| ---
| - 'function: builtin#29': 'function: builtin#29'
| ...


Side note: I'm not sure that the next example is adequate, so I CC-ed Sergos
to judge me.
The main idea is to change a serialization function inside serialization.
It looks weird and useless, to be honest, but who knows. Here is the example:

| Tarantool 2.8.0-108-g195d44ddc
| tarantool> local function example()
|          >   local a = {}
|          >   local b = {}
|          >   b[a] = a
|          >   local reta
|          >   local retb
|          >   local function swap_ser(o)
|          >     local newf
|          >     local f = getmetatable(o).__serialize
|          >     if f == reta then
|          >       newf = retb
|          >     else
|          >       newf = reta
|          >     end
|          >     getmetatable(o).__serialize = newf
|          >   end
|          >   reta = function(o) swap_ser(o) return "a" end
|          >   retb = function(o) swap_ser(o) return "b" end
|          >   setmetatable(a, {__serialize = reta})
|          >   return b
|          > end return example()
| ---
| - a: *0
| ...

Note, that '*0' looks even more confusing now :).

For the old version:
| Tarantool 2.8.0-0-gefc30ccf8
| tarantool> local function example()
|          >   local a = {}
|          >   local b = {}
|          >   b[a] = a
|          >   local reta
|          >   local retb
|          >   local function swap_ser(o)
|          >     local newf
|          >     local f = getmetatable(o).__serialize
|          >     if f == reta then
|          >       newf = retb
|          >     else
|          >       newf = reta
|          >     end
|          >     getmetatable(o).__serialize = newf
|          >   end
|          >   reta = function(o) swap_ser(o) return "a" end
|          >   retb = function(o) swap_ser(o) return "b" end
|          >   setmetatable(a, {__serialize = reta})
|          >   return b
|          > end return example()
| ---
| - a: b
| ...

Looks like we should check not only object itself, but that the serialization
function is the same.

Moreover, I don't know, what can we do with situations, when a serialize
function depends on some context that can be changed inside it:

The new version:
| Tarantool 2.8.0-108-g195d44ddc
| tarantool> local a = {} local b = {} b[a] = a local show_a = true setmetatable(a, {__serialize = function() show_a = not show_a if show_a then return "a" else return "b" end end}) return b
| ---
| - b: *0
| ...

The old version works like expected:
| Tarantool 2.8.0-0-gefc30ccf8
| tarantool> local a = {} local b = {} b[a] = a local show_a = true setmetatable(a, {__serialize = function() show_a = not show_a if show_a then return "a" else return "b" end end}) return b
| ---
| - b: a
| ...

Side note: It doesn't relate to issue and patch directly, but
deep recursive calls still lead to core:
| Tarantool 2.8.0-108-g195d44ddc
| tarantool> local a = {} a[a] = a local recf recf = function(t) return setmetatable({}, {__serialize = recf}) end setmetatable(a, {__serialize = recf}) return a
| Segmentation fault (core dumped)

With the backtrace that looks like the following:
| #8  0x000055fd217034d3 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1375, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:694
| #9  0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1375, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
| #10 0x000055fd21704542 in luaL_tofield (L=0x40ffb378, cfg=0x41c6e8c0, anchortable_index=3, opts=0x0, index=1375, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:944
| #11 0x000055fd217034f8 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1374, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:695
| #12 0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1374, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
| #13 0x000055fd21704542 in luaL_tofield (L=0x40ffb378, cfg=0x41c6e8c0, anchortable_index=3, opts=0x0, index=1374, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:944
| #14 0x000055fd217034f8 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1373, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:695
| #15 0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1373, field=0x7f29425fd760)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
| <4K lines below>

I suppose that it can be handled by checking of the serialize level of
structure (table). If its to high we can either raise an error or
return <snipped> for the rest of the structure. At least it will be
good to create an issue follow-ups this problem.

Since the answers to the questions above are critical to evaluate the
logic of the patch, I have not reviewed it yet. Most parts of the
comments below is about to add more comments to code or some suggestions
about codestile.

On 11.03.21, Roman Khabibov wrote:
> Fix bug with bus error during serializing of recursive
> structures.

Please provide a little bit more description about your changes.
It helps me as a reviewer to check the patch and for other developers
it will be easier to understand these changes in the future.

> 
> Closes #3228
> ---
> 
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
> Issue: https://github.com/tarantool/tarantool/issues/3228
> 
>  src/box/lua/call.c                            |   6 +-
>  src/box/lua/execute.c                         |   2 +-
>  src/box/lua/serialize_lua.c                   |  96 ++------
>  src/box/lua/tuple.c                           |   2 +-
>  src/box/sql/func.c                            |   2 +-
>  src/lua/msgpack.c                             |  10 +-
>  src/lua/pickle.c                              |   2 +-
>  src/lua/utils.c                               | 226 ++++++++++++++++--
>  src/lua/utils.h                               |  43 +++-
>  ...-3228-serializer-look-for-recursion.result |  67 ++++++
>  ...228-serializer-look-for-recursion.test.lua |  17 ++
>  test/swim/swim.result                         |  18 +-
>  third_party/lua-cjson/lua_cjson.c             |   4 +-
>  third_party/lua-yaml/lyaml.cc                 |  88 +++----
>  14 files changed, 390 insertions(+), 193 deletions(-)
>  create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
>  create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua

Please provide Changelog entry too.

> 
> diff --git a/src/box/lua/call.c b/src/box/lua/call.c
> index 0315e720c..e4907a3f7 100644
> --- a/src/box/lua/call.c
> +++ b/src/box/lua/call.c

<snipped>

> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
> index caa08a60f..ab612dfc2 100644
> --- a/src/box/lua/serialize_lua.c
> +++ b/src/box/lua/serialize_lua.c
> @@ -215,7 +215,7 @@ trace_node(struct lua_dumper *d)

<snipped>

> @@ -339,41 +339,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
>  static const char *
>  get_lua_anchor(struct lua_dumper *d)
>  {
> -	const char *s = "";
> -
> -	lua_pushvalue(d->L, -1);
> -	lua_rawget(d->L, d->anchortable_index);
> -	if (!lua_toboolean(d->L, -1)) {
> -		lua_pop(d->L, 1);
> -		return NULL;
> -	}
> -
> -	if (lua_isboolean(d->L, -1)) {
> -		/*
> -		 * This element is referenced more
> -		 * than once but has not been named.
> -		 */
> -		char buf[32];
> -		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
> -		lua_pop(d->L, 1);
> -		lua_pushvalue(d->L, -1);
> -		lua_pushstring(d->L, buf);
> -		s = lua_tostring(d->L, -1);
> -		lua_rawset(d->L, d->anchortable_index);
> -		trace_anchor(s, false);
> -	} else {
> -		/*
> -		 * An aliased element.
> -		 *
> -		 * FIXME: Need an example to use.
> -		 *
> -		 * const char *str = lua_tostring(d->L, -1);
> -		 */
> -		const char *str = lua_tostring(d->L, -1);
> -		trace_anchor(str, true);
> -		lua_pop(d->L, 1);
> +	const char *anchor = NULL;
> +	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
> +			      &anchor);
> +	if (code == 1) {

Please avoid using magic numbers. Here and below.

> +		trace_anchor(anchor, false);
> +	} else if (code == 2) {
> +		trace_anchor(anchor, true);
> +		return "";
>  	}
> -	return s;
> +	return anchor;
>  }
>  
>  static void
> @@ -783,7 +758,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)

<snipped>

> @@ -881,49 +856,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
>  	return emit_node(d, nd, indent, str, len);
>  }
>  
> -/**
> - * Find references to tables, we use it
> - * to find self references in tables.
> - */
> -static void
> -find_references(struct lua_dumper *d)
> -{

<snipped>

> -}
> -
>  /**
>   * Dump recursively from the root node.
>   */
> @@ -935,7 +867,7 @@ dump_root(struct lua_dumper *d)

<snipped>

> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 3e6f043b4..4d417ec34 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c

<snipped>

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index f15d27051..d4a7064f0 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c

<snipped>

> diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
> index 24b0d2ccd..412736572 100644
> --- a/src/lua/msgpack.c
> +++ b/src/lua/msgpack.c

<snipped>

> diff --git a/src/lua/pickle.c b/src/lua/pickle.c
> index 65208b5b3..86f0f77e8 100644
> --- a/src/lua/pickle.c
> +++ b/src/lua/pickle.c

<snipped>

> diff --git a/src/lua/utils.c b/src/lua/utils.c
> index b5a6ca5b7..64404e3fc 100644
> --- a/src/lua/utils.c
> +++ b/src/lua/utils.c
> @@ -439,9 +439,54 @@ lua_gettable_wrapper(lua_State *L)
>  	return 1;
>  }
>  
> +/**
> + * Check if node with @a idx serialized.
> + */
> +static int
> +is_serialized(struct lua_State *L, int anchortable_index, int idx)
> +{
> +	if (anchortable_index == 0)
> +		return 0;
> +	lua_pushvalue(L, idx);
> +	lua_rawget(L, anchortable_index);
> +	int is_serialized = 0;
> +	if (lua_isnil(L, -1) == 0) {

Nit: This comparison is good according to current Tarantool's code
style, but all old checks are using just `lua_is*()`.
Personally, I prefer to keep code style consistence inside one
translation unit.
Feel free to ignore.
Here and below.

> +		lua_pushstring(L, "serialized");
> +		lua_rawget(L, -2);
> +		is_serialized = lua_toboolean(L, -1);
> +		lua_pop(L, 1);
> +	}
> +	lua_pop(L, 1);
> +	return is_serialized;
> +}
> +
> +/**
> + * Mark node with @a idx as serialized.
> + */
> +static void
> +mark_as_serialized(struct lua_State *L, int anchortable_index, int idx)
> +{
> +	if (anchortable_index == 0)
> +		return;
> +	/* Push copy of table. */

For what? Feel free to be more descriptive.

> +	lua_pushvalue(L, idx);
> +	lua_pushvalue(L, idx);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		lua_newtable(L);
> +	}
> +	int flags_index = lua_gettop(L);
> +	lua_pushstring(L, "serialized");
> +	lua_pushboolean(L, true);
> +	lua_rawset(L, flags_index);
> +	lua_rawset(L, anchortable_index);
> +}
> +

<snipped>

> +void
> +find_references(struct lua_State *L, int anchortable_index, const char *mode)
> +{
> +	int type = lua_type(L, -1);
> +	if (type != LUA_TTABLE || anchortable_index == 0)
> +		return;
> +
> +	int node_index = lua_gettop(L);
> +	lua_pushvalue(L, node_index);
> +
> +	/* Get value with flags from anchor table. */
> +	lua_pushvalue(L, node_index);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		lua_newtable(L);
> +	}
> +	int flags_index = lua_gettop(L);
> +
> +	/* Get and increment counter. */
> +	lua_pushstring(L, mode);
> +	lua_rawget(L, flags_index);
> +
> +	int new_count = 0;
> +	if (lua_isnil(L, -1) == 1) {
> +		new_count = 1;
> +		lua_pop(L, 1);
> +	} else {
> +		lua_Integer number = lua_tointeger(L, -1);
> +		lua_pop(L, 1);
> +		if (number < 2)
> +			new_count = ++number;
> +		else
> +			new_count = -1;
> +	}
> +
> +	/* Push new count to value with flags. */
> +	if (new_count == -1) {
> +		lua_pop(L, 2);
> +		assert(node_index == lua_gettop(L));
> +		return;
> +	}
> +	lua_pushstring(L, mode);
> +	lua_pushinteger(L, new_count);
> +	lua_rawset(L, flags_index);
> +
> +	/* Check if node is already serialized. */
> +	bool already_serialized = false;
> +	if (strcmp(mode, "after") == 0) {
> +		lua_pushstring(L, "serialized");
> +		lua_rawget(L, flags_index);
> +		already_serialized = lua_toboolean(L, -1);
> +		lua_pop(L, 1);
> +	}
> +	lua_rawset(L, anchortable_index);
> +	assert(node_index == lua_gettop(L));
> +	if (already_serialized == true)
> +		return;
> +
> +	/* Recursively process other table values. */
> +	lua_pushnil(L);
> +	while (lua_next(L, -2) != 0) {
> +		/* Find references on value. */
> +		find_references(L, anchortable_index, mode);
> +		lua_pop(L, 1);
> +		/* Find references on key. */
> +		find_references(L, anchortable_index, mode);
> +	}
> +}
> +
> +int
> +get_anchor(struct lua_State *L, int anchortable_index,
> +	   unsigned int *anchor_number, const char **anchor)
> +{
> +	lua_pushvalue(L, -1);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		*anchor = NULL;
> +		return 0;
> +	}
> +
> +	lua_pushstring(L, "before");
> +	lua_rawget(L, -2);
> +	const char *str = NULL;
> +	if (lua_type(L, -1) == LUA_TSTRING)
> +		str = lua_tostring(L, -1);
> +	lua_Integer number_before = lua_tointeger(L, -1);
> +	lua_pop(L, 1);
> +	lua_Integer number_after = 0;
> +	if (str == NULL) {
> +		lua_pushstring(L, "after");
> +		lua_rawget(L, -2);
> +		if (lua_type(L, -1) == LUA_TSTRING)
> +			str = lua_tostring(L, -1);
> +		number_after = lua_tointeger(L, -1);
> +		lua_pop(L, 1);
> +	}
> +
> +	*anchor = "";
> +	int ret = 0;
> +	if ((number_before > 1 || number_after > 1) && str == NULL) {
> +		/*
> +		 * This element is referenced more than once but
> +		 * has not been named.
> +		 */
> +		char buf[32];
> +		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);

According to your comment:
| @param[out] anchor_number Ptr to the number anchors.
Does it determines only by this function (as far as it is output value)?
If yes, should you set *anchor_number to 0? And can it be more than 1?

> +		lua_pop(L, 1);
> +		lua_pushvalue(L, -1);
> +
> +		lua_pushvalue(L, -1);
> +		lua_rawget(L, anchortable_index);
> +		if (number_before > 1)
> +			lua_pushstring(L, "before");
> +		else
> +			lua_pushstring(L, "after");

Looks like `lua_pushstring(L, number_before > 1 ? "before" : "after");`
is enough, isn't it?

> +		lua_pushstring(L, buf);
> +		*anchor = lua_tostring(L, -1);
> +		lua_rawset(L, -3);
> +		lua_rawset(L, anchortable_index);
> +		ret = 1;
> +	} else if (str != NULL) {
> +		/* This is an aliased element. */
> +		lua_pop(L, 1);
> +		*anchor = str;
> +		ret = 2;
> +	} else {
> +		lua_pop(L, 1);
> +		*anchor = NULL;
> +		ret = 0;
> +	}
> +	return ret;
> +}
> +
>  /**
>   * Call __serialize method of a table object by index
>   * if the former exists.
> @@ -496,11 +677,13 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,

<snipped>

> diff --git a/src/lua/utils.h b/src/lua/utils.h
> index 37531676d..ef9c70dfb 100644
> --- a/src/lua/utils.h
> +++ b/src/lua/utils.h
> @@ -354,6 +354,29 @@ struct luaL_field {
>  	bool compact;                /* a flag used by YAML serializer */
>  };
>  
> +/**
> + * Find references to tables to look up self references in tables.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack.
> + * @param mode When the function is called before or after dump
> +               function.
> + */

Please add some information about how this function modifies
table by `anchortable_index`. May be it makes sense to reference
this function by `luaL_tofield()` and `luaL_convertfield()`.

> +void
> +find_references(struct lua_State *L, int anchortable_index, const char *mode);
> +
> +/**
> + * Generate aliases and anchor numbers for self references.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack.
> + * @param[out] anchor_number Ptr to the number anchors.
> + * @param[out] anchor Ptr to anchor string.
> + */
> +int
> +get_anchor(struct lua_State *L, int anchortable_index,
> +	   unsigned int *anchor_number, const char **anchor);
> +
>  /**
>   * @brief Convert a value from the Lua stack to a lua_field structure.
>   * This function is designed for use with Lua bindings and data
> @@ -388,6 +411,7 @@ struct luaL_field {

<snipped>

> diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
> new file mode 100644
> index 000000000..773470c8e
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.result

<snipped>

> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> new file mode 100644
> index 000000000..1c9b0375f
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua

<snipped>

> diff --git a/test/swim/swim.result b/test/swim/swim.result
> index bfc83c143..539131677 100644
> --- a/test/swim/swim.result
> +++ b/test/swim/swim.result

<snipped>

> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 38e999870..67a9762bc 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c

<snipped>

> diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
> index 9c3a4a646..72f269e6c 100644
> --- a/third_party/lua-yaml/lyaml.cc
> +++ b/third_party/lua-yaml/lyaml.cc
> @@ -500,38 +500,23 @@ usage_error:
>  static int dump_node(struct lua_yaml_dumper *dumper);
>  
>  static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
> -   const char *s = "";
> -   lua_pushvalue(dumper->L, -1);
> -   lua_rawget(dumper->L, dumper->anchortable_index);
> -   if (!lua_toboolean(dumper->L, -1)) {
> -      lua_pop(dumper->L, 1);
> -      return NULL;
> -   }
> -
> -   if (lua_isboolean(dumper->L, -1)) {
> -      /* this element is referenced more than once but has not been named */
> -      char buf[32];
> -      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
> -      lua_pop(dumper->L, 1);
> -      lua_pushvalue(dumper->L, -1);
> -      lua_pushstring(dumper->L, buf);
> -      s = lua_tostring(dumper->L, -1);
> -      lua_rawset(dumper->L, dumper->anchortable_index);
> -   } else {
> -      /* this is an aliased element */
> +   const char *anchor = NULL;
> +   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
> +                  &anchor) == 2) {
>        yaml_event_t ev;
> -      const char *str = lua_tostring(dumper->L, -1);
> -      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
> +      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
>            !yaml_emitter_emit(&dumper->emitter, &ev))
>           luaL_error(dumper->L, OOM_ERRMSG);
> -      lua_pop(dumper->L, 1);
> +      return (yaml_char_t *)"";
>     }
> -   return (yaml_char_t *)s;
> +   return (yaml_char_t *) anchor;

Nit: s/(yaml_char_t *) anchor/(yaml_char_t *)anchor/, according to the
previous change.

>  }
>  
> -static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
> +static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field,
> +                      yaml_char_t *anchor) {
>     yaml_event_t ev;
> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
> +   if (anchor == NULL)
> +      anchor = get_yaml_anchor(dumper);
>  
>     if (anchor && !*anchor) return 1;
>  
> @@ -556,10 +541,12 @@ static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
>            yaml_emitter_emit(&dumper->emitter, &ev) != 0 ? 1 : 0;
>  }
>  
> -static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
> +static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field,
> +                      yaml_char_t *anchor) {
>     unsigned i;
>     yaml_event_t ev;
> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
> +   if (anchor == NULL)
> +      anchor = get_yaml_anchor(dumper);
>  
>     if (anchor && !*anchor)
>        return 1;
> @@ -621,8 +608,18 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>     bool unused;
>     (void) unused;
>  
> +   yaml_char_t *anchor = NULL;
> +
> +   /*
> +    * Keep anchor to print it if luaL_checkfield() push new value
> +    * on stack after serialization.
> +    */
> +   if (lua_istable(dumper->L, -1) == 1)
> +      anchor = get_yaml_anchor(dumper);
> +
>     int top = lua_gettop(dumper->L);
> -   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
> +   luaL_checkfield(dumper->L, dumper->cfg, dumper->anchortable_index, top,
> +                   &field);
>     switch(field.type) {
>     case MP_UINT:
>        snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
> @@ -647,9 +644,9 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>        len = strlen(buf);
>        break;
>     case MP_ARRAY:
> -      return dump_array(dumper, &field);
> +      return dump_array(dumper, &field, anchor);
>     case MP_MAP:
> -      return dump_table(dumper, &field);
> +      return dump_table(dumper, &field, anchor);
>     case MP_STR:
>        str = lua_tolstring(dumper->L, -1, &len);
>        if (yaml_is_null(str, len) || yaml_is_bool(str, len, &unused) ||
> @@ -745,35 +742,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
>     return 1;
>  }
>  
> -static void find_references(struct lua_yaml_dumper *dumper) {

<snipped>

> -}
> -
>  int
>  lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>                  const char *tag_handle, const char *tag_prefix)
> @@ -819,7 +787,7 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>     dumper.anchortable_index = lua_gettop(L);
>     dumper.anchor_number = 0;
>     lua_pushvalue(L, 1); /* push copy of arg we're processing */
> -   find_references(&dumper);
> +   find_references(L, dumper.anchortable_index, "before");

Side note: looks like <third_party/lua-yaml> depends on
<src/lua/utils.c> now. Doesn't look like a real third party :).
Feel free to ignore.

>     dump_document(&dumper);
>     if (dumper.error)
>        goto error;
> -- 
> 2.24.3 (Apple Git-128)
> 

[1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-November/020771.html
[2]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018419.html

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
@ 2021-04-05 22:05   ` Roman Khabibov via Tarantool-patches
  2021-04-21  9:27     ` Sergey Kaplun via Tarantool-patches
  0 siblings, 1 reply; 9+ messages in thread
From: Roman Khabibov via Tarantool-patches @ 2021-04-05 22:05 UTC (permalink / raw)
  To: Sergey Kaplun; +Cc: tarantool-patches

Hi! Thanks for your review.

Perf

Script:

local serializer = require('json')
clock = require('clock')
serializer.cfg({encode_max_depth = 64})
local avg = 0
for j = 1, 10 do
    local time = clock.monotonic()
    for i = 1, 150000 do
        local rec1 = {{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}
        serializer.encode(rec1)
    end
    local result = clock.monotonic() - time
    print(result)
    avg = avg + result
end
print("avg", avg / 10)


JSON

master
r:tarantool r.khabibov$ ./src/tarantool a.lua
2.3421729998663
2.4643029998988
2.4067219998688
2.5030510001816
2.6378739997745
2.3903989996761
2.9206310003065
2.9560130001046
2.8415450002067
2.8488600002602
avg	2.6311571000144

my branch
2.3160850000568
2.4038570001721
2.4244269998744
2.4124370003119
2.4159540003166
2.4096140000038
2.4202209999785
2.3913590000011
2.4089310001582
2.4692329997197
avg	2.4072118000593


MSGPACK

master
4.1559460000135
3.415306000039
3.2962070000358
3.0802390002646
2.8740380001254
2.7747510001063
2.7418200001121
2.7159779998474
2.8011710001156
3.1459960001521
avg	3.1001452000812

my branch
2.6075639999472
2.7236989997327
3.0367560000159
3.071974999737
4.0645679999143
3.3987269997597
3.0836050002836
3.1121310000308
2.9265290000476
3.1954760001972
avg	3.1221029999666

> On Mar 14, 2021, at 15:42, Sergey Kaplun <skaplun@tarantool.org> wrote:
> 
> Hi!
> 
> Thanks for the patch!
> 
> Side note: I've got into the swing of things by looking of previous
> series of patches ([1][2]). AFAICS, there was idea to throw an error on
> recursive serialization, is there discussion anywhere about this
> implementation instead the old one which I've missed?
> 
> I have some questions about current version of serializer:
> 
> Nit: we have no need to add a marker for structures that are not used:
> | Tarantool 2.8.0-108-g195d44ddc
> | tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a setmetatable(b, {__serialize = function(...) return {a} end}) return a
> | ---
> | - &0
> |   ? &1
> |   - *0
> |   : *0
> | ...
> 
> And when serialize is omitted, it works like expected:
> | Tarantool 2.8.0-108-g195d44ddc
> | tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a  return a
> | ---
> | - &0
> |   ? &1
> |     a: *0
> |     *1: *1
> |   : *0
> | ...
> 
> In the old version it looks minimalistic and good:
> | Tarantool 2.8.0-0-gefc30ccf8
> | tarantool> local a = {} local b = {a = a} b[b] = b a[b] = a setmetatable(b, {__serialize = function(...) return {a} end}) return a
> | ---
> | - &0
> |   ? - *0
> |   : *0
> | ...
> 
> Also, I found some inconsistency in showing repeated objects.
> For the new version:
> | Tarantool 2.8.0-108-g195d44ddc
> | local a = {} local b = {} a[b] = b  setmetatable(b, {__serialize = function(...) return print end}) return a
> | ---
> | - 'function: builtin#29': *0
> | ...
> 
> But '&0' does not exist, so it looks confusing. Same story with threads.
> 
> And for old version:
> 
> | Tarantool 2.8.0-0-gefc30ccf8
> | tarantool> local a = {} local b = {} a[b] = b  setmetatable(b, {__serialize = function(...) return print end}) return a
> | ---
> | - 'function: builtin#29': 'function: builtin#29'
> | ...
> 
> 
> Side note: I'm not sure that the next example is adequate, so I CC-ed Sergos
> to judge me.
> The main idea is to change a serialization function inside serialization.
> It looks weird and useless, to be honest, but who knows. Here is the example:
> 
> | Tarantool 2.8.0-108-g195d44ddc
> | tarantool> local function example()
> |          >   local a = {}
> |          >   local b = {}
> |          >   b[a] = a
> |          >   local reta
> |          >   local retb
> |          >   local function swap_ser(o)
> |          >     local newf
> |          >     local f = getmetatable(o).__serialize
> |          >     if f == reta then
> |          >       newf = retb
> |          >     else
> |          >       newf = reta
> |          >     end
> |          >     getmetatable(o).__serialize = newf
> |          >   end
> |          >   reta = function(o) swap_ser(o) return "a" end
> |          >   retb = function(o) swap_ser(o) return "b" end
> |          >   setmetatable(a, {__serialize = reta})
> |          >   return b
> |          > end return example()
> | ---
> | - a: *0
> | ...
> 
> Note, that '*0' looks even more confusing now :).
> 
> For the old version:
> | Tarantool 2.8.0-0-gefc30ccf8
> | tarantool> local function example()
> |          >   local a = {}
> |          >   local b = {}
> |          >   b[a] = a
> |          >   local reta
> |          >   local retb
> |          >   local function swap_ser(o)
> |          >     local newf
> |          >     local f = getmetatable(o).__serialize
> |          >     if f == reta then
> |          >       newf = retb
> |          >     else
> |          >       newf = reta
> |          >     end
> |          >     getmetatable(o).__serialize = newf
> |          >   end
> |          >   reta = function(o) swap_ser(o) return "a" end
> |          >   retb = function(o) swap_ser(o) return "b" end
> |          >   setmetatable(a, {__serialize = reta})
> |          >   return b
> |          > end return example()
> | ---
> | - a: b
> | ...
> 
> Looks like we should check not only object itself, but that the serialization
> function is the same.
> 
> Moreover, I don't know, what can we do with situations, when a serialize
> function depends on some context that can be changed inside it:
> 
> The new version:
> | Tarantool 2.8.0-108-g195d44ddc
> | tarantool> local a = {} local b = {} b[a] = a local show_a = true setmetatable(a, {__serialize = function() show_a = not show_a if show_a then return "a" else return "b" end end}) return b
> | ---
> | - b: *0
> | ...
> 
> The old version works like expected:
> | Tarantool 2.8.0-0-gefc30ccf8
> | tarantool> local a = {} local b = {} b[a] = a local show_a = true setmetatable(a, {__serialize = function() show_a = not show_a if show_a then return "a" else return "b" end end}) return b
> | ---
> | - b: a
> | ...
> 
> Side note: It doesn't relate to issue and patch directly, but
> deep recursive calls still lead to core:
> | Tarantool 2.8.0-108-g195d44ddc
> | tarantool> local a = {} a[a] = a local recf recf = function(t) return setmetatable({}, {__serialize = recf}) end setmetatable(a, {__serialize = recf}) return a
> | Segmentation fault (core dumped)
> 
> With the backtrace that looks like the following:
> | #8  0x000055fd217034d3 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1375, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:694
> | #9  0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1375, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
> | #10 0x000055fd21704542 in luaL_tofield (L=0x40ffb378, cfg=0x41c6e8c0, anchortable_index=3, opts=0x0, index=1375, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:944
> | #11 0x000055fd217034f8 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1374, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:695
> | #12 0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1374, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
> | #13 0x000055fd21704542 in luaL_tofield (L=0x40ffb378, cfg=0x41c6e8c0, anchortable_index=3, opts=0x0, index=1374, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:944
> | #14 0x000055fd217034f8 in lua_field_try_serialize (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1373, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:695
> | #15 0x000055fd2170387d in lua_field_inspect_table (L=0x40ffb378, anchortable_index=3, cfg=0x41c6e8c0, idx=1373, field=0x7f29425fd760)
> |     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:740
> | <4K lines below>
> 
> I suppose that it can be handled by checking of the serialize level of
> structure (table). If its to high we can either raise an error or
> return <snipped> for the rest of the structure. At least it will be
> good to create an issue follow-ups this problem.
See updated tests.
 
> Since the answers to the questions above are critical to evaluate the
> logic of the patch, I have not reviewed it yet. Most parts of the
> comments below is about to add more comments to code or some suggestions
> about codestile.
> 
> On 11.03.21, Roman Khabibov wrote:
>> Fix bug with bus error during serializing of recursive
>> structures.
> 
> Please provide a little bit more description about your changes.
> It helps me as a reviewer to check the patch and for other developers
> it will be easier to understand these changes in the future.
Done.

>> 
>> Closes #3228
>> ---
>> 
>> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
>> Issue: https://github.com/tarantool/tarantool/issues/3228
>> 
>> src/box/lua/call.c                            |   6 +-
>> src/box/lua/execute.c                         |   2 +-
>> src/box/lua/serialize_lua.c                   |  96 ++------
>> src/box/lua/tuple.c                           |   2 +-
>> src/box/sql/func.c                            |   2 +-
>> src/lua/msgpack.c                             |  10 +-
>> src/lua/pickle.c                              |   2 +-
>> src/lua/utils.c                               | 226 ++++++++++++++++--
>> src/lua/utils.h                               |  43 +++-
>> ...-3228-serializer-look-for-recursion.result |  67 ++++++
>> ...228-serializer-look-for-recursion.test.lua |  17 ++
>> test/swim/swim.result                         |  18 +-
>> third_party/lua-cjson/lua_cjson.c             |   4 +-
>> third_party/lua-yaml/lyaml.cc                 |  88 +++----
>> 14 files changed, 390 insertions(+), 193 deletions(-)
>> create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
>> create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua
> 
> Please provide Changelog entry too.
>> 
>> diff --git a/src/box/lua/call.c b/src/box/lua/call.c
>> index 0315e720c..e4907a3f7 100644
>> --- a/src/box/lua/call.c
>> +++ b/src/box/lua/call.c
> 
> <snipped>
> 
>> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
>> index caa08a60f..ab612dfc2 100644
>> --- a/src/box/lua/serialize_lua.c
>> +++ b/src/box/lua/serialize_lua.c
>> @@ -215,7 +215,7 @@ trace_node(struct lua_dumper *d)
> 
> <snipped>
> 
>> @@ -339,41 +339,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
>> static const char *
>> get_lua_anchor(struct lua_dumper *d)
>> {
>> -	const char *s = "";
>> -
>> -	lua_pushvalue(d->L, -1);
>> -	lua_rawget(d->L, d->anchortable_index);
>> -	if (!lua_toboolean(d->L, -1)) {
>> -		lua_pop(d->L, 1);
>> -		return NULL;
>> -	}
>> -
>> -	if (lua_isboolean(d->L, -1)) {
>> -		/*
>> -		 * This element is referenced more
>> -		 * than once but has not been named.
>> -		 */
>> -		char buf[32];
>> -		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
>> -		lua_pop(d->L, 1);
>> -		lua_pushvalue(d->L, -1);
>> -		lua_pushstring(d->L, buf);
>> -		s = lua_tostring(d->L, -1);
>> -		lua_rawset(d->L, d->anchortable_index);
>> -		trace_anchor(s, false);
>> -	} else {
>> -		/*
>> -		 * An aliased element.
>> -		 *
>> -		 * FIXME: Need an example to use.
>> -		 *
>> -		 * const char *str = lua_tostring(d->L, -1);
>> -		 */
>> -		const char *str = lua_tostring(d->L, -1);
>> -		trace_anchor(str, true);
>> -		lua_pop(d->L, 1);
>> +	const char *anchor = NULL;
>> +	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
>> +			      &anchor);
>> +	if (code == 1) {
> 
> Please avoid using magic numbers. Here and below.
Done. See enums.

>> +		trace_anchor(anchor, false);
>> +	} else if (code == 2) {
>> +		trace_anchor(anchor, true);
>> +		return "";
>> 	}
>> -	return s;
>> +	return anchor;
>> }
>> 
>> static void
>> @@ -783,7 +758,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
> 
> <snipped>
> 
>> @@ -881,49 +856,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
>> 	return emit_node(d, nd, indent, str, len);
>> }
>> 
>> -/**
>> - * Find references to tables, we use it
>> - * to find self references in tables.
>> - */
>> -static void
>> -find_references(struct lua_dumper *d)
>> -{
> 
> <snipped>
> 
>> -}
>> -
>> /**
>>  * Dump recursively from the root node.
>>  */
>> @@ -935,7 +867,7 @@ dump_root(struct lua_dumper *d)
> 
> <snipped>
> 
>> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
>> index 3e6f043b4..4d417ec34 100644
>> --- a/src/box/lua/tuple.c
>> +++ b/src/box/lua/tuple.c
> 
> <snipped>
> 
>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index f15d27051..d4a7064f0 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
> 
> <snipped>
> 
>> diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
>> index 24b0d2ccd..412736572 100644
>> --- a/src/lua/msgpack.c
>> +++ b/src/lua/msgpack.c
> 
> <snipped>
> 
>> diff --git a/src/lua/pickle.c b/src/lua/pickle.c
>> index 65208b5b3..86f0f77e8 100644
>> --- a/src/lua/pickle.c
>> +++ b/src/lua/pickle.c
> 
> <snipped>
> 
>> diff --git a/src/lua/utils.c b/src/lua/utils.c
>> index b5a6ca5b7..64404e3fc 100644
>> --- a/src/lua/utils.c
>> +++ b/src/lua/utils.c
>> @@ -439,9 +439,54 @@ lua_gettable_wrapper(lua_State *L)
>> 	return 1;
>> }
>> 
>> +/**
>> + * Check if node with @a idx serialized.
>> + */
>> +static int
>> +is_serialized(struct lua_State *L, int anchortable_index, int idx)
>> +{
>> +	if (anchortable_index == 0)
>> +		return 0;
>> +	lua_pushvalue(L, idx);
>> +	lua_rawget(L, anchortable_index);
>> +	int is_serialized = 0;
>> +	if (lua_isnil(L, -1) == 0) {
> 
> Nit: This comparison is good according to current Tarantool's code
> style, but all old checks are using just `lua_is*()`.
> Personally, I prefer to keep code style consistence inside one
> translation unit.
> Feel free to ignore.
> Here and below.
>> +		lua_pushstring(L, "serialized");
>> +		lua_rawget(L, -2);
>> +		is_serialized = lua_toboolean(L, -1);
>> +		lua_pop(L, 1);
>> +	}
>> +	lua_pop(L, 1);
>> +	return is_serialized;
>> +}
>> +
>> +/**
>> + * Mark node with @a idx as serialized.
>> + */
>> +static void
>> +mark_as_serialized(struct lua_State *L, int anchortable_index, int idx)
>> +{
>> +	if (anchortable_index == 0)
>> +		return;
>> +	/* Push copy of table. */
> 
> For what? Feel free to be more descriptive.
I added comments.

>> +	lua_pushvalue(L, idx);
>> +	lua_pushvalue(L, idx);
>> +	lua_rawget(L, anchortable_index);
>> +	if (lua_isnil(L, -1) == 1) {
>> +		lua_pop(L, 1);
>> +		lua_newtable(L);
>> +	}
>> +	int flags_index = lua_gettop(L);
>> +	lua_pushstring(L, "serialized");
>> +	lua_pushboolean(L, true);
>> +	lua_rawset(L, flags_index);
>> +	lua_rawset(L, anchortable_index);
>> +}
>> +
> 
> <snipped>
> 
>> +void
>> +find_references(struct lua_State *L, int anchortable_index, const char *mode)
>> +{
>> +	int type = lua_type(L, -1);
>> +	if (type != LUA_TTABLE || anchortable_index == 0)
>> +		return;
>> +
>> +	int node_index = lua_gettop(L);
>> +	lua_pushvalue(L, node_index);
>> +
>> +	/* Get value with flags from anchor table. */
>> +	lua_pushvalue(L, node_index);
>> +	lua_rawget(L, anchortable_index);
>> +	if (lua_isnil(L, -1) == 1) {
>> +		lua_pop(L, 1);
>> +		lua_newtable(L);
>> +	}
>> +	int flags_index = lua_gettop(L);
>> +
>> +	/* Get and increment counter. */
>> +	lua_pushstring(L, mode);
>> +	lua_rawget(L, flags_index);
>> +
>> +	int new_count = 0;
>> +	if (lua_isnil(L, -1) == 1) {
>> +		new_count = 1;
>> +		lua_pop(L, 1);
>> +	} else {
>> +		lua_Integer number = lua_tointeger(L, -1);
>> +		lua_pop(L, 1);
>> +		if (number < 2)
>> +			new_count = ++number;
>> +		else
>> +			new_count = -1;
>> +	}
>> +
>> +	/* Push new count to value with flags. */
>> +	if (new_count == -1) {
>> +		lua_pop(L, 2);
>> +		assert(node_index == lua_gettop(L));
>> +		return;
>> +	}
>> +	lua_pushstring(L, mode);
>> +	lua_pushinteger(L, new_count);
>> +	lua_rawset(L, flags_index);
>> +
>> +	/* Check if node is already serialized. */
>> +	bool already_serialized = false;
>> +	if (strcmp(mode, "after") == 0) {
>> +		lua_pushstring(L, "serialized");
>> +		lua_rawget(L, flags_index);
>> +		already_serialized = lua_toboolean(L, -1);
>> +		lua_pop(L, 1);
>> +	}
>> +	lua_rawset(L, anchortable_index);
>> +	assert(node_index == lua_gettop(L));
>> +	if (already_serialized == true)
>> +		return;
>> +
>> +	/* Recursively process other table values. */
>> +	lua_pushnil(L);
>> +	while (lua_next(L, -2) != 0) {
>> +		/* Find references on value. */
>> +		find_references(L, anchortable_index, mode);
>> +		lua_pop(L, 1);
>> +		/* Find references on key. */
>> +		find_references(L, anchortable_index, mode);
>> +	}
>> +}
>> +
>> +int
>> +get_anchor(struct lua_State *L, int anchortable_index,
>> +	   unsigned int *anchor_number, const char **anchor)
>> +{
>> +	lua_pushvalue(L, -1);
>> +	lua_rawget(L, anchortable_index);
>> +	if (lua_isnil(L, -1) == 1) {
>> +		lua_pop(L, 1);
>> +		*anchor = NULL;
>> +		return 0;
>> +	}
>> +
>> +	lua_pushstring(L, "before");
>> +	lua_rawget(L, -2);
>> +	const char *str = NULL;
>> +	if (lua_type(L, -1) == LUA_TSTRING)
>> +		str = lua_tostring(L, -1);
>> +	lua_Integer number_before = lua_tointeger(L, -1);
>> +	lua_pop(L, 1);
>> +	lua_Integer number_after = 0;
>> +	if (str == NULL) {
>> +		lua_pushstring(L, "after");
>> +		lua_rawget(L, -2);
>> +		if (lua_type(L, -1) == LUA_TSTRING)
>> +			str = lua_tostring(L, -1);
>> +		number_after = lua_tointeger(L, -1);
>> +		lua_pop(L, 1);
>> +	}
>> +
>> +	*anchor = "";
>> +	int ret = 0;
>> +	if ((number_before > 1 || number_after > 1) && str == NULL) {
>> +		/*
>> +		 * This element is referenced more than once but
>> +		 * has not been named.
>> +		 */
>> +		char buf[32];
>> +		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
> 
> According to your comment:
> | @param[out] anchor_number Ptr to the number anchors.
> Does it determines only by this function (as far as it is output value)?
> If yes, should you set *anchor_number to 0? And can it be more than 1?
Done.

>> +		lua_pop(L, 1);
>> +		lua_pushvalue(L, -1);
>> +
>> +		lua_pushvalue(L, -1);
>> +		lua_rawget(L, anchortable_index);
>> +		if (number_before > 1)
>> +			lua_pushstring(L, "before");
>> +		else
>> +			lua_pushstring(L, "after");
> 
> Looks like `lua_pushstring(L, number_before > 1 ? "before" : "after");`
> is enough, isn't it?
It isn’t actually now.

>> +		lua_pushstring(L, buf);
>> +		*anchor = lua_tostring(L, -1);
>> +		lua_rawset(L, -3);
>> +		lua_rawset(L, anchortable_index);
>> +		ret = 1;
>> +	} else if (str != NULL) {
>> +		/* This is an aliased element. */
>> +		lua_pop(L, 1);
>> +		*anchor = str;
>> +		ret = 2;
>> +	} else {
>> +		lua_pop(L, 1);
>> +		*anchor = NULL;
>> +		ret = 0;
>> +	}
>> +	return ret;
>> +}
>> +
>> /**
>>  * Call __serialize method of a table object by index
>>  * if the former exists.
>> @@ -496,11 +677,13 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
> 
> <snipped>
> 
>> diff --git a/src/lua/utils.h b/src/lua/utils.h
>> index 37531676d..ef9c70dfb 100644
>> --- a/src/lua/utils.h
>> +++ b/src/lua/utils.h
>> @@ -354,6 +354,29 @@ struct luaL_field {
>> 	bool compact;                /* a flag used by YAML serializer */
>> };
>> 
>> +/**
>> + * Find references to tables to look up self references in tables.
>> + *
>> + * @param L Lua stack.
>> + * @param anchortable_index Index of anchor table on stack.
>> + * @param mode When the function is called before or after dump
>> +               function.
>> + */
> 
> Please add some information about how this function modifies
> table by `anchortable_index`. May be it makes sense to reference
> this function by `luaL_tofield()` and `luaL_convertfield()`.
Done.

>> +void
>> +find_references(struct lua_State *L, int anchortable_index, const char *mode);
>> +
>> +/**
>> + * Generate aliases and anchor numbers for self references.
>> + *
>> + * @param L Lua stack.
>> + * @param anchortable_index Index of anchor table on stack.
>> + * @param[out] anchor_number Ptr to the number anchors.
>> + * @param[out] anchor Ptr to anchor string.
>> + */
>> +int
>> +get_anchor(struct lua_State *L, int anchortable_index,
>> +	   unsigned int *anchor_number, const char **anchor);
>> +
>> /**
>>  * @brief Convert a value from the Lua stack to a lua_field structure.
>>  * This function is designed for use with Lua bindings and data
>> @@ -388,6 +411,7 @@ struct luaL_field {
> 
> <snipped>
> 
>> diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
>> new file mode 100644
>> index 000000000..773470c8e
>> --- /dev/null
>> +++ b/test/app/gh-3228-serializer-look-for-recursion.result
> 
> <snipped>
> 
>> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
>> new file mode 100644
>> index 000000000..1c9b0375f
>> --- /dev/null
>> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> 
> <snipped>
> 
>> diff --git a/test/swim/swim.result b/test/swim/swim.result
>> index bfc83c143..539131677 100644
>> --- a/test/swim/swim.result
>> +++ b/test/swim/swim.result
> 
> <snipped>
> 
>> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
>> index 38e999870..67a9762bc 100644
>> --- a/third_party/lua-cjson/lua_cjson.c
>> +++ b/third_party/lua-cjson/lua_cjson.c
> 
> <snipped>
> 
>> diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
>> index 9c3a4a646..72f269e6c 100644
>> --- a/third_party/lua-yaml/lyaml.cc
>> +++ b/third_party/lua-yaml/lyaml.cc
>> @@ -500,38 +500,23 @@ usage_error:
>> static int dump_node(struct lua_yaml_dumper *dumper);
>> 
>> static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
>> -   const char *s = "";
>> -   lua_pushvalue(dumper->L, -1);
>> -   lua_rawget(dumper->L, dumper->anchortable_index);
>> -   if (!lua_toboolean(dumper->L, -1)) {
>> -      lua_pop(dumper->L, 1);
>> -      return NULL;
>> -   }
>> -
>> -   if (lua_isboolean(dumper->L, -1)) {
>> -      /* this element is referenced more than once but has not been named */
>> -      char buf[32];
>> -      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
>> -      lua_pop(dumper->L, 1);
>> -      lua_pushvalue(dumper->L, -1);
>> -      lua_pushstring(dumper->L, buf);
>> -      s = lua_tostring(dumper->L, -1);
>> -      lua_rawset(dumper->L, dumper->anchortable_index);
>> -   } else {
>> -      /* this is an aliased element */
>> +   const char *anchor = NULL;
>> +   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
>> +                  &anchor) == 2) {
>>       yaml_event_t ev;
>> -      const char *str = lua_tostring(dumper->L, -1);
>> -      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
>> +      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
>>           !yaml_emitter_emit(&dumper->emitter, &ev))
>>          luaL_error(dumper->L, OOM_ERRMSG);
>> -      lua_pop(dumper->L, 1);
>> +      return (yaml_char_t *)"";
>>    }
>> -   return (yaml_char_t *)s;
>> +   return (yaml_char_t *) anchor;
> 
> Nit: s/(yaml_char_t *) anchor/(yaml_char_t *)anchor/, according to the
> previous change.
Fixed.

>> }
>> 
>> -static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
>> +static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field,
>> +                      yaml_char_t *anchor) {
>>    yaml_event_t ev;
>> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
>> +   if (anchor == NULL)
>> +      anchor = get_yaml_anchor(dumper);
>> 
>>    if (anchor && !*anchor) return 1;
>> 
>> @@ -556,10 +541,12 @@ static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
>>           yaml_emitter_emit(&dumper->emitter, &ev) != 0 ? 1 : 0;
>> }
>> 
>> -static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
>> +static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field,
>> +                      yaml_char_t *anchor) {
>>    unsigned i;
>>    yaml_event_t ev;
>> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
>> +   if (anchor == NULL)
>> +      anchor = get_yaml_anchor(dumper);
>> 
>>    if (anchor && !*anchor)
>>       return 1;
>> @@ -621,8 +608,18 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>>    bool unused;
>>    (void) unused;
>> 
>> +   yaml_char_t *anchor = NULL;
>> +
>> +   /*
>> +    * Keep anchor to print it if luaL_checkfield() push new value
>> +    * on stack after serialization.
>> +    */
>> +   if (lua_istable(dumper->L, -1) == 1)
>> +      anchor = get_yaml_anchor(dumper);
>> +
>>    int top = lua_gettop(dumper->L);
>> -   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
>> +   luaL_checkfield(dumper->L, dumper->cfg, dumper->anchortable_index, top,
>> +                   &field);
>>    switch(field.type) {
>>    case MP_UINT:
>>       snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
>> @@ -647,9 +644,9 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>>       len = strlen(buf);
>>       break;
>>    case MP_ARRAY:
>> -      return dump_array(dumper, &field);
>> +      return dump_array(dumper, &field, anchor);
>>    case MP_MAP:
>> -      return dump_table(dumper, &field);
>> +      return dump_table(dumper, &field, anchor);
>>    case MP_STR:
>>       str = lua_tolstring(dumper->L, -1, &len);
>>       if (yaml_is_null(str, len) || yaml_is_bool(str, len, &unused) ||
>> @@ -745,35 +742,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
>>    return 1;
>> }
>> 
>> -static void find_references(struct lua_yaml_dumper *dumper) {
> 
> <snipped>
> 
>> -}
>> -
>> int
>> lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>>                 const char *tag_handle, const char *tag_prefix)
>> @@ -819,7 +787,7 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>>    dumper.anchortable_index = lua_gettop(L);
>>    dumper.anchor_number = 0;
>>    lua_pushvalue(L, 1); /* push copy of arg we're processing */
>> -   find_references(&dumper);
>> +   find_references(L, dumper.anchortable_index, "before");
> 
> Side note: looks like <third_party/lua-yaml> depends on
> <src/lua/utils.c> now. Doesn't look like a real third party :).
> Feel free to ignore.
> 
>>    dump_document(&dumper);
>>    if (dumper.error)
>>       goto error;
>> -- 
>> 2.24.3 (Apple Git-128)
>> 
> 
> [1]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-November/020771.html
> [2]: https://lists.tarantool.org/pipermail/tarantool-patches/2020-July/018419.html
> 
> -- 
> Best regards,
> Sergey Kaplun

commit 61c84258d32f121c0a5e75761ae3b347d0f41b5d
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Fri Feb 5 16:55:56 2021 +0300

    serializer: serialize recursive structures
    
    Fix bug with bus error during serializing of recursive structures
    and print aliases if node become referenced after serialization.
    
    This patch adds map with serialized nodes and results. The map is
    used to mark referenced nodes which appears during serializing.
    
    Closes #3228
    
    @TarantoolBot
    __serialize parameter
    If __serialize parameter is initialized as a function then this
    function should be pure. A pure function is a function which
    returns identical values for identical arguments (no variation
    with local static variables, non-local variables, mutable
    reference arguments or input streams). Otherwise, the behavior
    is undefined.

diff --git a/changelogs/unreleased/serializer-bug.md b/changelogs/unreleased/serializer-bug.md
new file mode 100755
index 000000000..96f359d64
--- /dev/null
+++ b/changelogs/unreleased/serializer-bug.md
@@ -0,0 +1,4 @@
+## bugfix/core
+
+* Recursive structures appeared after __serialize call don't lead
+* to segfault anymore in lua and yaml serializers (gh-5392).
diff --git a/src/box/lua/call.c b/src/box/lua/call.c
index 0315e720c..e4907a3f7 100644
--- a/src/box/lua/call.c
+++ b/src/box/lua/call.c
@@ -193,7 +193,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 		 */
 		for (int i = 1; i <= nrets; ++i) {
 			struct luaL_field field;
-			if (luaL_tofield(L, cfg, NULL, i, &field) < 0)
+			if (luaL_tofield(L, cfg, 0, NULL, i, &field) < 0)
 				return luaT_error(L);
 			struct tuple *tuple;
 			if (field.type == MP_EXT &&
@@ -222,7 +222,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	 * Inspect the first result
 	 */
 	struct luaL_field root;
-	if (luaL_tofield(L, cfg, NULL, 1, &root) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, 1, &root) < 0)
 		return luaT_error(L);
 	struct tuple *tuple;
 	if (root.type == MP_EXT && (tuple = luaT_istuple(L, 1)) != NULL) {
@@ -252,7 +252,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	for (uint32_t t = 1; t <= root.size; t++) {
 		lua_rawgeti(L, 1, t);
 		struct luaL_field field;
-		if (luaL_tofield(L, cfg, NULL, -1, &field) < 0)
+		if (luaL_tofield(L, cfg, 0, NULL, -1, &field) < 0)
 			return luaT_error(L);
 		if (field.type == MP_EXT && (tuple = luaT_istuple(L, -1))) {
 			tuple_to_mpstream(tuple, stream);
diff --git a/src/box/lua/execute.c b/src/box/lua/execute.c
index 926a0a61c..76d271f5c 100644
--- a/src/box/lua/execute.c
+++ b/src/box/lua/execute.c
@@ -328,7 +328,7 @@ lua_sql_bind_decode(struct lua_State *L, struct sql_bind *bind, int idx, int i)
 		bind->name = NULL;
 		bind->name_len = 0;
 	}
-	if (luaL_tofield(L, luaL_msgpack_default, NULL, -1, &field) < 0)
+	if (luaL_tofield(L, luaL_msgpack_default, 0, NULL, -1, &field) < 0)
 		return -1;
 	switch (field.type) {
 	case MP_UINT:
diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
index caa08a60f..17b434ba5 100644
--- a/src/box/lua/serialize_lua.c
+++ b/src/box/lua/serialize_lua.c
@@ -115,6 +115,7 @@ struct lua_dumper {
 	/** Anchors for self references */
 	int anchortable_index;
 	unsigned int anchor_number;
+	int serialized_objs_idx;
 
 	/** Error message buffer */
 	char err_msg[256];
@@ -215,7 +216,7 @@ trace_node(struct lua_dumper *d)
 	struct luaL_field field;
 
 	memset(&field, 0, sizeof(field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &field);
 
 	if (field.type < lengthof(mp_type_names)) {
 		if (field.type == MP_EXT) {
@@ -234,7 +235,7 @@ trace_node(struct lua_dumper *d)
 
 	memset(&field, 0, sizeof(field));
 
-	luaL_checkfield(d->L, d->cfg, top, &field);
+	luaL_checkfield(d->L, d->cfg, 0, top, &field);
 	say_info("serializer-trace: node    :\tfield type %s (%d)",
 		 type_str, field.type);
 }
@@ -339,41 +340,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
 static const char *
 get_lua_anchor(struct lua_dumper *d)
 {
-	const char *s = "";
-
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (!lua_toboolean(d->L, -1)) {
-		lua_pop(d->L, 1);
-		return NULL;
+	const char *anchor = NULL;
+	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
+			      &anchor);
+	if (code == GET_ANCHOR_NAMED_NOT) {
+		trace_anchor(anchor, false);
+	} else if (code == GET_ANCHOR_ALIASED) {
+		trace_anchor(anchor, true);
+		return "";
 	}
-
-	if (lua_isboolean(d->L, -1)) {
-		/*
-		 * This element is referenced more
-		 * than once but has not been named.
-		 */
-		char buf[32];
-		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
-		lua_pop(d->L, 1);
-		lua_pushvalue(d->L, -1);
-		lua_pushstring(d->L, buf);
-		s = lua_tostring(d->L, -1);
-		lua_rawset(d->L, d->anchortable_index);
-		trace_anchor(s, false);
-	} else {
-		/*
-		 * An aliased element.
-		 *
-		 * FIXME: Need an example to use.
-		 *
-		 * const char *str = lua_tostring(d->L, -1);
-		 */
-		const char *str = lua_tostring(d->L, -1);
-		trace_anchor(str, true);
-		lua_pop(d->L, 1);
-	}
-	return s;
+	return anchor;
 }
 
 static void
@@ -783,7 +759,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 		return -1;
 
 	memset(field, 0, sizeof(*field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), field);
 
 	switch (field->type) {
 	case MP_NIL:
@@ -881,49 +857,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 	return emit_node(d, nd, indent, str, len);
 }
 
-/**
- * Find references to tables, we use it
- * to find self references in tables.
- */
-static void
-find_references(struct lua_dumper *d)
-{
-	int newval;
-
-	if (lua_type(d->L, -1) != LUA_TTABLE)
-		return;
-
-	/* Copy of a table for self refs */
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (lua_isnil(d->L, -1))
-		newval = 0;
-	else if (!lua_toboolean(d->L, -1))
-		newval = 1;
-	else
-		newval = -1;
-	lua_pop(d->L, 1);
-
-	if (newval != -1) {
-		lua_pushvalue(d->L, -1);
-		lua_pushboolean(d->L, newval);
-		lua_rawset(d->L, d->anchortable_index);
-	}
-
-	if (newval != 0)
-		return;
-
-	/*
-	 * Other values and keys in the table
-	 */
-	lua_pushnil(d->L);
-	while (lua_next(d->L, -2) != 0) {
-		find_references(d);
-		lua_pop(d->L, 1);
-		find_references(d);
-	}
-}
-
 /**
  * Dump recursively from the root node.
  */
@@ -935,7 +868,8 @@ dump_root(struct lua_dumper *d)
 	};
 	int ret;
 
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &nd.field);
+	luaL_checkfield(d->L, d->cfg, d->serialized_objs_idx,
+			lua_gettop(d->L), &nd.field);
 
 	if (nd.field.type != MP_ARRAY || nd.field.size != 1) {
 		d->err = EINVAL;
@@ -982,9 +916,13 @@ lua_encode(lua_State *L, struct luaL_serializer *serializer,
 	dumper.anchortable_index = lua_gettop(L);
 	dumper.anchor_number = 0;
 
+	lua_newtable(L);
+	dumper.serialized_objs_idx = lua_gettop(L);
+
 	/* Push copy of arg we're processing */
 	lua_pushvalue(L, 1);
-	find_references(&dumper);
+	find_references_and_serialize(dumper.L, dumper.anchortable_index,
+				      dumper.serialized_objs_idx);
 
 	if (dump_root(&dumper) != 0)
 		goto out;
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index f7198a025..2779cf786 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -427,7 +427,7 @@ luamp_convert_key(struct lua_State *L, struct luaL_serializer *cfg,
 		return tuple_to_mpstream(tuple, stream);
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, NULL, index, &field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, index, &field) < 0)
 		luaT_error(L);
 	if (field.type == MP_ARRAY) {
 		lua_pushvalue(L, index);
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 9b6179f3a..2c7181f84 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -264,7 +264,7 @@ port_lua_get_vdbemem(struct port *base, uint32_t *size)
 		return NULL;
 	for (int i = 0; i < argc; i++) {
 		struct luaL_field field;
-		if (luaL_tofield(L, luaL_msgpack_default,
+		if (luaL_tofield(L, luaL_msgpack_default, 0,
 				 NULL, -1 - i, &field) < 0) {
 			goto error;
 		}
diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
index 1e74a6a3c..f9503a275 100644
--- a/src/lua/msgpack.c
+++ b/src/lua/msgpack.c
@@ -157,11 +157,11 @@ restart: /* used by MP_EXT of unidentified subtype */
 		lua_pushnil(L);  /* first key */
 		while (lua_next(L, top) != 0) {
 			lua_pushvalue(L, -2); /* push a copy of key to top */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop a copy of key */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop value */
@@ -182,7 +182,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 		mpstream_encode_array(stream, size);
 		for (uint32_t i = 0; i < size; i++) {
 			lua_rawgeti(L, top, i + 1);
-			if (luaL_tofield(L, cfg, opts, top + 1, field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, top + 1, field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1);
@@ -205,7 +205,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 			if (type != MP_EXT)
 				return type; /* Value has been packed by the trigger */
 			/* Try to convert value to serializable type */
-			luaL_convertfield(L, cfg, top, field);
+			luaL_convertfield(L, cfg, 0, top, field);
 			/* handled by luaL_convertfield */
 			assert(field->type != MP_EXT);
 			assert(lua_gettop(L) == top);
@@ -230,7 +230,7 @@ luamp_encode(struct lua_State *L, struct luaL_serializer *cfg,
 	}
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, opts, lua_gettop(L), &field) < 0)
+	if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), &field) < 0)
 		return luaT_error(L);
 	enum mp_type top_type = luamp_encode_r(L, cfg, opts, stream, &field, 0);
 
diff --git a/src/lua/pickle.c b/src/lua/pickle.c
index 65208b5b3..86f0f77e8 100644
--- a/src/lua/pickle.c
+++ b/src/lua/pickle.c
@@ -80,7 +80,7 @@ lbox_pack(struct lua_State *L)
 		if (i > nargs)
 			luaL_error(L, "pickle.pack: argument count does not match "
 				   "the format");
-		luaL_checkfield(L, luaL_msgpack_default, i, &field);
+		luaL_checkfield(L, luaL_msgpack_default, 0, i, &field);
 		switch (*format) {
 		case 'B':
 		case 'b':
diff --git a/src/lua/utils.c b/src/lua/utils.c
index b5a6ca5b7..0580f4094 100644
--- a/src/lua/utils.c
+++ b/src/lua/utils.c
@@ -441,7 +441,8 @@ lua_gettable_wrapper(lua_State *L)
 
 static void
 lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+			 int serialized_objs_idx, int idx,
+			 struct luaL_field *field)
 {
 	if (!cfg->encode_load_metatables)
 		return;
@@ -463,59 +464,280 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
 		lua_pcall(L, 1, 1, 0);
 		/* replace obj with the unpacked value */
 		lua_replace(L, idx);
-		if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+		if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, idx,
+			         field) < 0)
 			luaT_error(L);
 	} /* else ignore lua_gettable exceptions */
 	lua_settop(L, top); /* remove temporary objects */
 }
 
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor)
+{
+	lua_pushvalue(L, -1);
+	lua_rawget(L, anchortable_index);
+	if (lua_toboolean(L, -1) == 0) {
+		/* This element is not referenced. */
+		lua_pop(L, 1);
+		*anchor = NULL;
+		return 0;
+	}
+
+	if (lua_isboolean(L, -1) != 0) {
+		/*
+		 * This element is referenced more than once but
+		 * has not been named.
+		 */
+		char buf[32];
+		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
+		lua_pop(L, 1);
+		/* Generate a string anchor and push to table. */
+		lua_pushvalue(L, -1);
+		lua_pushstring(L, buf);
+		*anchor = lua_tostring(L, -1);
+		lua_rawset(L, anchortable_index);
+		return GET_ANCHOR_NAMED_NOT;
+	} else {
+		/* This is an aliased element. */
+		*anchor = lua_tostring(L, -1);
+		lua_pop(L, 1);
+		return GET_ANCHOR_ALIASED;
+	}
+	return GET_ANCHOR_NO_REFS;
+}
+
+void
+find_references_and_serialize(struct lua_State *L, int anchortable_index,
+			      int serialized_objs_idx)
+{
+	int type = lua_type(L, -1);
+	if (type != LUA_TTABLE)
+		return;
+	int node_idx = lua_gettop(L);
+
+	/*
+	 * Check if the node is already serialized i.e. the record
+	 * is already in the map of serialized objects.
+	 */
+	lua_pushvalue(L, node_idx);
+	lua_rawget(L, serialized_objs_idx);
+	bool srlzd = lua_isnil(L, -1) == 0;
+	/*
+	 * This flag indicates that the object is being serialized
+	 * in the current function call.
+	 */
+	bool serialization = false;
+
+	if (!srlzd && luaL_getmetafield(L, node_idx, LUAL_SERIALIZE) != 0 &&
+	    lua_isfunction(L, -1) != 0) {
+		/* Delete nil after lua_rawget() above. */
+		lua_replace(L, -2);
+		srlzd = true;
+		serialization = true;
+		/*
+		 * Copy object itself and call serialize function
+		 * for it.
+		 */
+		lua_pushvalue(L, node_idx);
+		if (lua_pcall(L, 1, 1, 0) != 0) {
+			diag_set(LuajitError, lua_tostring(L, -1));
+			luaT_error(L);
+		}
+		/*
+		 * Add the result of serialization to the
+		 * serialized_objs map. Key is node (node_idx),
+		 * value is the result of serialization (now on
+		 * the top of stack).
+		 */
+		lua_pushvalue(L, node_idx);
+		lua_pushvalue(L, -2);
+		lua_rawset(L, serialized_objs_idx);
+	}
+
+	if (srlzd) {
+		if (lua_type(L, -1) == LUA_TTABLE) {
+			/*
+			 * Now we will process the new node - the
+			 * result of serialization. It is
+			 * necessary to take it into account in
+			 * the anchor table.
+			 */
+			node_idx = lua_gettop(L);
+		} else {
+			lua_pop(L, 1);
+			return;
+		}
+	} else {
+		if (lua_isnil(L, -1) == 0)
+			/*
+			 * Pop an extra field thrown out
+			 * by luaL_getmetafield(), it was
+			 * not a function.
+			 */
+			lua_pop(L, 1);
+		/* Delete nil after lua_rawget() above. */
+		lua_pop(L, 1);
+	}
+	/*
+	 * Take an entry from the anchor table about the current
+	 * node.
+	 */
+	lua_pushvalue(L, node_idx);
+	lua_rawget(L, anchortable_index);
+	int newval = -1;
+	if (lua_isnil(L, -1) == 1)
+		/*
+		 * The node has not yet been encountered, it is
+		 * bypassed for the first time.
+		 */
+		newval = 0;
+	else if (lua_toboolean(L, -1) == 0)
+		 /* The node has already met once. */
+		newval = 1;
+	lua_pop(L, 1);
+	if (newval != -1) {
+		lua_pushvalue(L, node_idx);
+		lua_pushboolean(L, newval);
+		lua_rawset(L, anchortable_index);
+	}
+	if (srlzd && !serialization) {
+		/*
+		 * The node has already been serialized not in the
+		 * current call, so there is no point in going
+		 * further in depth and checking the leaves. Pop
+		 * the processed sterilization result.
+		 */
+		lua_pop(L, 1);
+		return;
+	}
+	if (newval)
+		/* The node has already met twice or more, so
+		 * there is no point in going further in depth and
+		 * checking the leaves.
+		 */
+		return;
+
+	/* Recursively process other table values. */
+	lua_pushnil(L);
+	while (lua_next(L, node_idx) != 0) {
+		/* Find references on value. */
+		find_references_and_serialize(L, anchortable_index,
+					      serialized_objs_idx);
+		lua_pop(L, 1);
+		/* Find references on key. */
+		find_references_and_serialize(L, anchortable_index,
+					      serialized_objs_idx);
+	}
+	if (serialization)
+		/*
+		 * The loop above processed the nodes inside the
+		 * serialization result. Pop it, so as not to
+		 * break the integrity of the loop of the previous
+		 * call in recursion.
+		 */
+		lua_pop(L, 1);
+
+	return;
+}
+
+enum try_serialize_ret_code
+{
+
+	TRY_SERIALIZE_ERROR = -1,
+	TRY_SERIALIZE_RES_TABLE_NOT = 0,
+	TRY_SERIALIZE_RES_FROM_MAP,
+	TRY_SERIALIZE_DEFAULT_TABLE,
+};
+
 /**
  * Call __serialize method of a table object by index
- * if the former exists.
+ * if the former exists or pull result of serialization (if
+ * exists) from table with index @a serialized_objs_idx if it
+ * isn't 0.
  *
  * If __serialize does not exist then function does nothing
- * and the function returns 1;
+ * and the function returns TRY_SERIALIZE_DEFAULT_TABLE;
  *
  * If __serialize exists, is a function (which doesn't
  * raise any error) then a result of serialization
- * replaces old value by the index and the function returns 0;
+ * replaces old value by the index and the function returns
+ * TRY_SERIALIZE_RES_TABLE_NOT or TRY_SERIALIZE_DEFAULT_TABLE;
  *
  * If the serialization is a hint string (like 'array' or 'map'),
  * then field->type, field->size and field->compact
- * are set if necessary and the function returns 0;
+ * are set if necessary and the function returns
+ * TRY_SERIALIZE_RES_TABLE_NOT;
  *
- * Otherwise it is an error, set diag and the funciton returns -1;
+ * Otherwise it is an error, set diag and the funciton returns
+ * TRY_SERIALIZE_ERROR;
  *
  * Return values:
- * -1 - error occurs, diag is set, the top of guest stack
- *      is undefined.
- *  0 - __serialize field is available in the metatable,
- *      the result value is put in the origin slot,
- *      encoding is finished.
- *  1 - __serialize field is not available in the metatable,
- *      proceed with default table encoding.
+ * TRY_SERIALIZE_ERROR - error occurs, diag is set, the top of
+ * guest stack is undefined.
+ * TRY_SERIALIZE_RES_TABLE_NOT - __serialize field is available in
+ * the metatable, the result value is put in the origin slot,
+ * encoding is finished.
+ * TRY_SERIALIZE_RES_FROM_MAP - __serialize field is available in
+ * the metatable, the result value is table from serialized
+ * objects map.
+ * TRY_SERIALIZE_DEFAULT_TABLE - __serialize field is not
+ * available in the metatable, proceed with default table
+ * encoding.
  */
+
 static int
-lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_try_serialize(struct lua_State *L, int serialized_objs_idx,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	if (luaL_getmetafield(L, idx, LUAL_SERIALIZE) == 0)
-		return 1;
+		return TRY_SERIALIZE_DEFAULT_TABLE;
 	if (lua_isfunction(L, -1)) {
-		/* copy object itself */
-		lua_pushvalue(L, idx);
-		if (lua_pcall(L, 1, 1, 0) != 0) {
-			diag_set(LuajitError, lua_tostring(L, -1));
-			return -1;
+		if (serialized_objs_idx != 0) {
+			/*
+			 * Pop the __serialize function for the
+			 * current node. It was already called in
+			 * find_references_and_serialize().
+			 */
+			lua_pop(L, 1);
+			/*
+			 * Get the result of serialization from
+			 * the map.
+			 */
+			lua_pushvalue(L, idx);
+			lua_rawget(L, serialized_objs_idx);
+			assert(lua_isnil(L, -1) == 0);
+
+			/*
+			 * Replace the serialized node with a new
+			 * result, if it is a table.
+			 */
+			if (lua_type(L, -1) == LUA_TTABLE) {
+				lua_replace(L, idx);
+				return TRY_SERIALIZE_RES_FROM_MAP;
+			}
+		} else {
+			/*
+			 * Serializer don't use map with
+			 * serialized objects. Copy object itself
+			 * and call __serialize for it.
+			 */
+			lua_pushvalue(L, idx);
+			if (lua_pcall(L, 1, 1, 0) != 0) {
+				diag_set(LuajitError, lua_tostring(L, -1));
+				return TRY_SERIALIZE_ERROR;
+			}
 		}
-		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
-			return -1;
+		if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, -1,
+				 field) != 0)
+			return TRY_SERIALIZE_ERROR;
 		lua_replace(L, idx);
-		return 0;
+		return TRY_SERIALIZE_RES_TABLE_NOT;
 	}
 	if (!lua_isstring(L, -1)) {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		return -1;
+		return TRY_SERIALIZE_ERROR;
 	}
 	const char *type = lua_tostring(L, -1);
 	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
@@ -533,16 +755,17 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
 			field->compact = true;
 	} else {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		return -1;
+		return TRY_SERIALIZE_ERROR;
 	}
 	/* Remove value set by luaL_getmetafield. */
 	lua_pop(L, 1);
-	return 0;
+	return TRY_SERIALIZE_RES_TABLE_NOT;
 }
 
 static int
-lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_inspect_table(struct lua_State *L, int serialized_objs_idx,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	assert(lua_type(L, idx) == LUA_TTABLE);
 	uint32_t size = 0;
@@ -550,14 +773,20 @@ lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
 
 	if (cfg->encode_load_metatables) {
 		int top = lua_gettop(L);
-		int res = lua_field_try_serialize(L, cfg, idx, field);
-		if (res == -1)
+		int res = lua_field_try_serialize(L, serialized_objs_idx, cfg,
+						  idx, field);
+		if (res == TRY_SERIALIZE_ERROR)
 			return -1;
 		assert(lua_gettop(L) == top);
 		(void)top;
-		if (res == 0)
+		if (res == TRY_SERIALIZE_RES_TABLE_NOT ||
+		    (res == TRY_SERIALIZE_RES_FROM_MAP &&
+		    lua_type(L, -1) != LUA_TTABLE))
 			return 0;
-		/* Fallthrough with res == 1 */
+		/*
+		 * Fallthrough with res TRY_SERIALIZE_RES_FROM_MAP
+		 * or TRY_SERIALIZE_DEFAULT_TABLE.
+		 */
 	}
 
 	field->type = MP_ARRAY;
@@ -612,14 +841,14 @@ lua_field_tostring(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 	lua_call(L, 1, 1);
 	lua_replace(L, idx);
 	lua_settop(L, top);
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, idx, field) < 0)
 		luaT_error(L);
 }
 
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field)
+	     int serialized_objs_idx, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field)
 {
 	if (index < 0)
 		index = lua_gettop(L) + index + 1;
@@ -753,7 +982,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 	case LUA_TTABLE:
 	{
 		field->compact = false;
-		return lua_field_inspect_table(L, cfg, index, field);
+		return lua_field_inspect_table(L, serialized_objs_idx, cfg,
+					       index, field);
 	}
 	case LUA_TLIGHTUSERDATA:
 	case LUA_TUSERDATA:
@@ -773,8 +1003,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 }
 
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field)
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int serialized_objs_idx, int idx, struct luaL_field *field)
 {
 	if (idx < 0)
 		idx = lua_gettop(L) + idx + 1;
@@ -789,9 +1019,12 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 			 */
 			GCcdata *cd = cdataV(L->base + idx - 1);
 			if (cd->ctypeid > CTID_CTYPEID)
-				lua_field_inspect_ucdata(L, cfg, idx, field);
+				lua_field_inspect_ucdata(L, cfg,
+							 serialized_objs_idx, idx,
+							 field);
 		} else if (type == LUA_TUSERDATA) {
-			lua_field_inspect_ucdata(L, cfg, idx, field);
+			lua_field_inspect_ucdata(L, cfg, serialized_objs_idx, idx,
+						 field);
 		}
 	}
 
diff --git a/src/lua/utils.h b/src/lua/utils.h
index 4a164868b..cb1025495 100644
--- a/src/lua/utils.h
+++ b/src/lua/utils.h
@@ -353,6 +353,44 @@ struct luaL_field {
 	bool compact;                /* a flag used by YAML serializer */
 };
 
+/**
+ * Find references to tables to look up self references in tables
+ * and serialize nodes if __serialize is function. Collect
+ * referenced nodes in a table with stack index @a
+ * anchortable_index and serialized nodes with results in a table
+ * with stack index @a serialized_objs_idx.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack with
+          pairs: "node address" <-> "false (met once) or true
+          (met more than once)".
+ * @param serialized_objs_idx Index of serialized objects map on
+ *        stack with pairs: "node address" <-> "result address".
+ */
+void
+find_references_and_serialize(struct lua_State *L, int anchortable_index,
+			      int serialized_objs_idx);
+
+enum get_anchor_ret_code
+{
+	GET_ANCHOR_NO_REFS = 0,
+	GET_ANCHOR_NAMED_NOT,
+	GET_ANCHOR_ALIASED,
+};
+
+/**
+ * Generate aliases and anchor numbers for self references.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack.
+ * @param[out] anchor_number Ptr to the number anchors in a
+               structure being under dumping.
+ * @param[out] anchor Ptr to anchor string.
+ */
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor);
+
 /**
  * @brief Convert a value from the Lua stack to a lua_field structure.
  * This function is designed for use with Lua bindings and data
@@ -387,6 +425,8 @@ struct luaL_field {
  *
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Index of table with serialized
+                              objects map.
  * @param opts the Lua serializer additional options.
  * @param index stack index
  * @param field conversion result
@@ -396,8 +436,8 @@ struct luaL_field {
  */
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field);
+	     int serialized_objs_idx, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field);
 
 /**
  * @brief Try to convert userdata/cdata values using defined conversion logic.
@@ -405,18 +445,22 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
  *
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Index of table with serialized
+                              objects map.
  * @param idx stack index
  * @param field conversion result
  */
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field);
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int serialized_objs_idx, int idx, struct luaL_field *field);
 
 /**
  * @brief A wrapper for luaL_tofield() and luaL_convertfield() that
  * tries to convert value or raise an error.
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Stack index of table with serialized
+                              objects map.
  * @param idx stack index
  * @param field conversion result
  * @sa lua_tofield()
@@ -432,14 +476,14 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
  * (tostring) -> (nil) -> exception
  */
 static inline void
-luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		struct luaL_field *field)
+luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg,
+		int serialized_objs_idx, int idx, struct luaL_field *field)
 {
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, idx, field) < 0)
 		luaT_error(L);
 	if (field->type != MP_EXT || field->ext_type != MP_UNKNOWN_EXTENSION)
 		return;
-	luaL_convertfield(L, cfg, idx, field);
+	luaL_convertfield(L, cfg, serialized_objs_idx, idx, field);
 }
 
 void
diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
new file mode 100644
index 000000000..cb8c73de0
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.result
@@ -0,0 +1,211 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+--
+-- gh-3228: Check that recursive structures are serialized
+-- properly.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+ | ---
+ | - []
+ | ...
+setmetatable({}, {__serialize = function(a) return {a} end})
+ | ---
+ | - &0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+ | ---
+ | - &0
+ |   - - *0
+ |     - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - 1
+ | ...
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+ | ---
+ | - &0
+ |   - - - - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
+ | ---
+ | - - - - - 1
+ | ...
+
+b = {}
+ | ---
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+ | ---
+ | - &0
+ |   b_2: &1 []
+ |   a_2: *0
+ |   a_1: *0
+ |   b_1: *1
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
+ | ---
+ | - &0
+ |   b: &1 []
+ |   a_2:
+ |   - *0
+ |   - *1
+ |   a_1: *0
+ | ...
+
+a = {}
+ | ---
+ | ...
+b = {a = a}
+ | ---
+ | ...
+b[b] = b
+ | ---
+ | ...
+a[b] = a
+ | ---
+ | ...
+setmetatable(b, {__serialize = function(...) return {a} end})
+ | ---
+ | - &0
+ |   - &1
+ |     *0: *1
+ | ...
+a
+ | ---
+ | - &0
+ |   ? - *0
+ |   : *0
+ | ...
+
+a = {}
+ | ---
+ | ...
+b = {}
+ | ---
+ | ...
+a[b] = b
+ | ---
+ | ...
+setmetatable(b, {__serialize = function(...) return print end})
+ | ---
+ | - 'function: builtin#29'
+ | ...
+a
+ | ---
+ | - 'function: builtin#29': 'function: builtin#29'
+ | ...
+
+a = {}
+ | ---
+ | ...
+a[a] = a
+ | ---
+ | ...
+recf = function(t) return setmetatable({}, {__serialize = recf}) end
+ | ---
+ | ...
+setmetatable(a, {__serialize = recf}) return a
+ | ---
+ | - []
+ | ...
+
+--
+-- __serialize function is pure, i.e. always returns identical
+-- value for identical argument. Otherwise, the behavior is
+-- undefined. So that, we ignore the side effects and just use the
+-- value after the first serialization.
+--
+a = {}
+ | ---
+ | ...
+b = {}
+ | ---
+ | ...
+b[a] = a
+ | ---
+ | ...
+show_a = true
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+serialize = function()
+    show_a = not show_a
+    if show_a then
+        return "a"
+    else
+        return "b" end
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+setmetatable(a, {__serialize = serialize})
+ | ---
+ | - b
+ | ...
+b
+ | ---
+ | - a: a
+ | ...
+
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function example()
+    local a = {}
+    local b = {}
+    b[a] = a
+    local reta
+    local retb
+    local function swap_ser(o)
+        local newf
+        local f = getmetatable(o).__serialize
+        if f == reta then
+            newf = retb
+        else
+            newf = reta
+        end
+        getmetatable(o).__serialize = newf
+    end
+    reta = function(o) swap_ser(o) return "a" end
+    retb = function(o) swap_ser(o) return "b" end
+    setmetatable(a, {__serialize = reta})
+    return b
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+example()
+ | ---
+ | - a: a
+ | ...
diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
new file mode 100644
index 000000000..bfa046a16
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
@@ -0,0 +1,83 @@
+test_run = require('test_run').new()
+
+--
+-- gh-3228: Check that recursive structures are serialized
+-- properly.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+setmetatable({}, {__serialize = function(a) return {a} end})
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
+
+b = {}
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
+
+a = {}
+b = {a = a}
+b[b] = b
+a[b] = a
+setmetatable(b, {__serialize = function(...) return {a} end})
+a
+
+a = {}
+b = {}
+a[b] = b
+setmetatable(b, {__serialize = function(...) return print end})
+a
+
+a = {}
+a[a] = a
+recf = function(t) return setmetatable({}, {__serialize = recf}) end
+setmetatable(a, {__serialize = recf}) return a
+
+--
+-- __serialize function is pure, i.e. always returns identical
+-- value for identical argument. Otherwise, the behavior is
+-- undefined. So that, we ignore the side effects and just use the
+-- value after the first serialization.
+--
+a = {}
+b = {}
+b[a] = a
+show_a = true
+test_run:cmd('setopt delimiter ";"')
+serialize = function()
+    show_a = not show_a
+    if show_a then
+        return "a"
+    else
+        return "b" end
+end;
+test_run:cmd('setopt delimiter ""');
+setmetatable(a, {__serialize = serialize})
+b
+
+test_run:cmd('setopt delimiter ";"')
+function example()
+    local a = {}
+    local b = {}
+    b[a] = a
+    local reta
+    local retb
+    local function swap_ser(o)
+        local newf
+        local f = getmetatable(o).__serialize
+        if f == reta then
+            newf = retb
+        else
+            newf = reta
+        end
+        getmetatable(o).__serialize = newf
+    end
+    reta = function(o) swap_ser(o) return "a" end
+    retb = function(o) swap_ser(o) return "b" end
+    setmetatable(a, {__serialize = reta})
+    return b
+end;
+test_run:cmd('setopt delimiter ""');
+example()
diff --git a/test/swim/swim.result b/test/swim/swim.result
index bfc83c143..539131677 100644
--- a/test/swim/swim.result
+++ b/test/swim/swim.result
@@ -1322,16 +1322,13 @@ m_list
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
-  - uri: 127.0.0.1:<port>
-    status: alive
-    incarnation: cdata {generation = 0ULL, version = 2ULL}
-    uuid: 00000000-0000-1000-8000-000000000001
-    payload_size: 8
-  - uri: 127.0.0.1:<port>
+  - &0
+    uri: 127.0.0.1:<port>
     status: alive
     incarnation: cdata {generation = 0ULL, version = 2ULL}
     uuid: 00000000-0000-1000-8000-000000000001
     payload_size: 8
+  - *0
 ...
 e_list
 ---
@@ -1374,16 +1371,13 @@ fiber.sleep(0)
 -- Two events - status update to 'left', and 'drop'.
 m_list
 ---
-- - uri: 127.0.0.1:<port>
-    status: left
-    incarnation: cdata {generation = 0ULL, version = 1ULL}
-    uuid: 00000000-0000-1000-8000-000000000002
-    payload_size: 0
-  - uri: 127.0.0.1:<port>
+- - &0
+    uri: 127.0.0.1:<port>
     status: left
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
+  - *0
 ...
 e_list
 ---
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 38e999870..67a9762bc 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -354,7 +354,7 @@ static void json_append_object(lua_State *l, struct luaL_serializer *cfg,
             comma = 1;
 
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -2, &field);
+    luaL_checkfield(l, cfg, 0, -2, &field);
     if (field.type == MP_UINT) {
         strbuf_append_char(json, '"');
         json_append_uint(cfg, json, field.ival);
@@ -384,7 +384,7 @@ static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
                              int current_depth, strbuf_t *json)
 {
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -1, &field);
+    luaL_checkfield(l, cfg, 0, -1, &field);
     switch (field.type) {
     case MP_UINT:
         return json_append_uint(cfg, json, field.ival);
diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
index 9c3a4a646..249603ba3 100644
--- a/third_party/lua-yaml/lyaml.cc
+++ b/third_party/lua-yaml/lyaml.cc
@@ -79,6 +79,7 @@ struct lua_yaml_dumper {
    lua_State *L;
    struct luaL_serializer *cfg;
    int anchortable_index;
+   int serialized_objs_idx;
    unsigned int anchor_number;
    yaml_emitter_t emitter;
    char error;
@@ -500,33 +501,16 @@ usage_error:
 static int dump_node(struct lua_yaml_dumper *dumper);
 
 static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
-   const char *s = "";
-   lua_pushvalue(dumper->L, -1);
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (!lua_toboolean(dumper->L, -1)) {
-      lua_pop(dumper->L, 1);
-      return NULL;
-   }
-
-   if (lua_isboolean(dumper->L, -1)) {
-      /* this element is referenced more than once but has not been named */
-      char buf[32];
-      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
-      lua_pop(dumper->L, 1);
-      lua_pushvalue(dumper->L, -1);
-      lua_pushstring(dumper->L, buf);
-      s = lua_tostring(dumper->L, -1);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   } else {
-      /* this is an aliased element */
+   const char *anchor = NULL;
+   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
+                  &anchor) == GET_ANCHOR_ALIASED) {
       yaml_event_t ev;
-      const char *str = lua_tostring(dumper->L, -1);
-      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
+      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
           !yaml_emitter_emit(&dumper->emitter, &ev))
          luaL_error(dumper->L, OOM_ERRMSG);
-      lua_pop(dumper->L, 1);
+      return (yaml_char_t *)"";
    }
-   return (yaml_char_t *)s;
+   return (yaml_char_t *)anchor;
 }
 
 static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
@@ -622,7 +606,8 @@ static int dump_node(struct lua_yaml_dumper *dumper)
    (void) unused;
 
    int top = lua_gettop(dumper->L);
-   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
+   luaL_checkfield(dumper->L, dumper->cfg, dumper->serialized_objs_idx,
+                   top, &field);
    switch(field.type) {
    case MP_UINT:
       snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
@@ -745,35 +730,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
    return 1;
 }
 
-static void find_references(struct lua_yaml_dumper *dumper) {
-   int newval = -1, type = lua_type(dumper->L, -1);
-   if (type != LUA_TTABLE)
-      return;
-
-   lua_pushvalue(dumper->L, -1); /* push copy of table */
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (lua_isnil(dumper->L, -1))
-      newval = 0;
-   else if (!lua_toboolean(dumper->L, -1))
-      newval = 1;
-   lua_pop(dumper->L, 1);
-   if (newval != -1) {
-      lua_pushvalue(dumper->L, -1);
-      lua_pushboolean(dumper->L, newval);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   }
-   if (newval)
-      return;
-
-   /* recursively process other table values */
-   lua_pushnil(dumper->L);
-   while (lua_next(dumper->L, -2) != 0) {
-      find_references(dumper); /* find references on value */
-      lua_pop(dumper->L, 1);
-      find_references(dumper); /* find references on key */
-   }
-}
-
 int
 lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
                 const char *tag_handle, const char *tag_prefix)
@@ -817,9 +773,12 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
 
    lua_newtable(L);
    dumper.anchortable_index = lua_gettop(L);
+   lua_newtable(L);
+   dumper.serialized_objs_idx = lua_gettop(L);
    dumper.anchor_number = 0;
    lua_pushvalue(L, 1); /* push copy of arg we're processing */
-   find_references(&dumper);
+   find_references_and_serialize(dumper.L, dumper.anchortable_index,
+                                 dumper.serialized_objs_idx);
    dump_document(&dumper);
    if (dumper.error)
       goto error;



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-03-11  5:49 [Tarantool-patches] [PATCH] serializer: serialize recursive structures Roman Khabibov via Tarantool-patches
  2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
@ 2021-04-13 15:54 ` Sergey Bronnikov via Tarantool-patches
  2021-04-15 20:39   ` Roman Khabibov via Tarantool-patches
  2021-04-21 12:34 ` Sergey Bronnikov via Tarantool-patches
  2 siblings, 1 reply; 9+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2021-04-13 15:54 UTC (permalink / raw)
  To: Roman Khabibov, tml

Hello!

thanks for the patch!

1. I have reverted all changes and test passed.

2. In commit message you declares recursive structures support.
What is the max supported depth of recursive structure?
I believe it is worth to describe possible limitations in commit message.

See inline

On 11.03.2021 08:49, Roman Khabibov via Tarantool-patches wrote:
> Fix bug with bus error during serializing of recursive
> structures.
>
> Closes #3228
> ---
>
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
> Issue: https://github.com/tarantool/tarantool/issues/3228
>
>   src/box/lua/call.c                            |   6 +-
>   src/box/lua/execute.c                         |   2 +-
>   src/box/lua/serialize_lua.c                   |  96 ++------
>   src/box/lua/tuple.c                           |   2 +-
>   src/box/sql/func.c                            |   2 +-
>   src/lua/msgpack.c                             |  10 +-
>   src/lua/pickle.c                              |   2 +-
>   src/lua/utils.c                               | 226 ++++++++++++++++--
>   src/lua/utils.h                               |  43 +++-
>   ...-3228-serializer-look-for-recursion.result |  67 ++++++
>   ...228-serializer-look-for-recursion.test.lua |  17 ++
>   test/swim/swim.result                         |  18 +-
>   third_party/lua-cjson/lua_cjson.c             |   4 +-
>   third_party/lua-yaml/lyaml.cc                 |  88 +++----
>   14 files changed, 390 insertions(+), 193 deletions(-)
>   create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
>   create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua

<snipped>

> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> new file mode 100644
> index 000000000..1c9b0375f
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> @@ -0,0 +1,17 @@
> +test_run = require('test_run').new()
> +
> +--
> +-- gh-3228: Check the error message in the case of a __serialize
> +-- function generating infinite recursion.
> +--
> +setmetatable({}, {__serialize = function(a) return a end})
> +setmetatable({}, {__serialize = function(a) return {a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a, a} end})
> +setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, 1} end})
> +setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
> +setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})

3. Are you sure 4 levels depth is enough to check recursive structure?

What is about testing max depth?


> +b = {}
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
> diff --git a/test/swim/swim.result b/test/swim/swim.result
> index bfc83c143..539131677 100644
> --- a/test/swim/swim.result
> +++ b/test/swim/swim.result
> @@ -1322,16 +1322,13 @@ m_list
>       incarnation: cdata {generation = 0ULL, version = 1ULL}
>       uuid: 00000000-0000-1000-8000-000000000002
>       payload_size: 0
> -  - uri: 127.0.0.1:<port>
> -    status: alive
> -    incarnation: cdata {generation = 0ULL, version = 2ULL}
> -    uuid: 00000000-0000-1000-8000-000000000001
> -    payload_size: 8
> -  - uri: 127.0.0.1:<port>
> +  - &0
> +    uri: 127.0.0.1:<port>
>       status: alive
>       incarnation: cdata {generation = 0ULL, version = 2ULL}
>       uuid: 00000000-0000-1000-8000-000000000001
>       payload_size: 8
> +  - *0
>   ...
>   e_list
>   ---
> @@ -1374,16 +1371,13 @@ fiber.sleep(0)
>   -- Two events - status update to 'left', and 'drop'.
>   m_list
>   ---
> -- - uri: 127.0.0.1:<port>
> -    status: left
> -    incarnation: cdata {generation = 0ULL, version = 1ULL}
> -    uuid: 00000000-0000-1000-8000-000000000002
> -    payload_size: 0
> -  - uri: 127.0.0.1:<port>
> +- - &0
> +    uri: 127.0.0.1:<port>
>       status: left
>       incarnation: cdata {generation = 0ULL, version = 1ULL}
>       uuid: 00000000-0000-1000-8000-000000000002
>       payload_size: 0
> +  - *0
>   ...
>   e_list
>   ---

4. How changes in swim test related to patch?

It is not clear from commit message.

<snipped>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-04-13 15:54 ` Sergey Bronnikov via Tarantool-patches
@ 2021-04-15 20:39   ` Roman Khabibov via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Roman Khabibov via Tarantool-patches @ 2021-04-15 20:39 UTC (permalink / raw)
  To: Sergey Bronnikov; +Cc: tml

Hi! Thanks for the review.

> On Apr 13, 2021, at 20:54, Sergey Bronnikov <sergeyb@tarantool.org> wrote:
> 
> Hello!
> 
> thanks for the patch!
> 
> 1. I have reverted all changes and test passed.
I dropped this tests.

> 2. In commit message you declares recursive structures support.
> What is the max supported depth of recursive structure?
> I believe it is worth to describe possible limitations in commit message.
Hmm, I think this term ‘depth' (the number of nested tables) is not related
to this patch.
For example,
setmetatable({}, {__serialize = function(a) return {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} end})
This case works on master. The number of nested tables is defined by size of
allocated memory.

> See inline
> 
> On 11.03.2021 08:49, Roman Khabibov via Tarantool-patches wrote:
>> Fix bug with bus error during serializing of recursive
>> structures.
>> 
>> Closes #3228
>> ---
>> 
>> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
>> Issue: https://github.com/tarantool/tarantool/issues/3228
>> 
>>  src/box/lua/call.c                            |   6 +-
>>  src/box/lua/execute.c                         |   2 +-
>>  src/box/lua/serialize_lua.c                   |  96 ++------
>>  src/box/lua/tuple.c                           |   2 +-
>>  src/box/sql/func.c                            |   2 +-
>>  src/lua/msgpack.c                             |  10 +-
>>  src/lua/pickle.c                              |   2 +-
>>  src/lua/utils.c                               | 226 ++++++++++++++++--
>>  src/lua/utils.h                               |  43 +++-
>>  ...-3228-serializer-look-for-recursion.result |  67 ++++++
>>  ...228-serializer-look-for-recursion.test.lua |  17 ++
>>  test/swim/swim.result                         |  18 +-
>>  third_party/lua-cjson/lua_cjson.c             |   4 +-
>>  third_party/lua-yaml/lyaml.cc                 |  88 +++----
>>  14 files changed, 390 insertions(+), 193 deletions(-)
>>  create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
>>  create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua
> 
> <snipped>
> 
>> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
>> new file mode 100644
>> index 000000000..1c9b0375f
>> --- /dev/null
>> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
>> @@ -0,0 +1,17 @@
>> +test_run = require('test_run').new()
>> +
>> +--
>> +-- gh-3228: Check the error message in the case of a __serialize
>> +-- function generating infinite recursion.
>> +--
>> +setmetatable({}, {__serialize = function(a) return a end})
>> +setmetatable({}, {__serialize = function(a) return {a} end})
>> +setmetatable({}, {__serialize = function(a) return {a, a} end})
>> +setmetatable({}, {__serialize = function(a) return {a, a, a} end})
>> +setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
>> +setmetatable({}, {__serialize = function(a) return {a, 1} end})
>> +setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
>> +setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
> 
> 3. Are you sure 4 levels depth is enough to check recursive structure?
> 
> What is about testing max depth?
^^^
> 
>> +b = {}
>> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
>> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
>> diff --git a/test/swim/swim.result b/test/swim/swim.result
>> index bfc83c143..539131677 100644
>> --- a/test/swim/swim.result
>> +++ b/test/swim/swim.result
>> @@ -1322,16 +1322,13 @@ m_list
>>      incarnation: cdata {generation = 0ULL, version = 1ULL}
>>      uuid: 00000000-0000-1000-8000-000000000002
>>      payload_size: 0
>> -  - uri: 127.0.0.1:<port>
>> -    status: alive
>> -    incarnation: cdata {generation = 0ULL, version = 2ULL}
>> -    uuid: 00000000-0000-1000-8000-000000000001
>> -    payload_size: 8
>> -  - uri: 127.0.0.1:<port>
>> +  - &0
>> +    uri: 127.0.0.1:<port>
>>      status: alive
>>      incarnation: cdata {generation = 0ULL, version = 2ULL}
>>      uuid: 00000000-0000-1000-8000-000000000001
>>      payload_size: 8
>> +  - *0
>>  ...
>>  e_list
>>  ---
>> @@ -1374,16 +1371,13 @@ fiber.sleep(0)
>>  -- Two events - status update to 'left', and 'drop'.
>>  m_list
>>  ---
>> -- - uri: 127.0.0.1:<port>
>> -    status: left
>> -    incarnation: cdata {generation = 0ULL, version = 1ULL}
>> -    uuid: 00000000-0000-1000-8000-000000000002
>> -    payload_size: 0
>> -  - uri: 127.0.0.1:<port>
>> +- - &0
>> +    uri: 127.0.0.1:<port>
>>      status: left
>>      incarnation: cdata {generation = 0ULL, version = 1ULL}
>>      uuid: 00000000-0000-1000-8000-000000000002
>>      payload_size: 0
>> +  - *0
>>  ...
>>  e_list
>>  ---
> 
> 4. How changes in swim test related to patch?
> 
> It is not clear from commit message.
See new commit message.

> <snipped>

commit 7206b41ff1456e71474c2ff84b214f770e5897af
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Fri Feb 5 16:55:56 2021 +0300

    serializer: serialize recursive structures
    
    Fix bug with bus error during serializing of recursive structures
    and always print aliases if node become referenced before or
    after serialization.
    
    This patch adds map with serialized nodes and results. The map is
    used to mark referenced nodes which appears during serializing.
    
    Closes #3228
    
    @TarantoolBot
    __serialize parameter
    If __serialize parameter is initialized as a function then this
    function should be pure. A pure function is a function which
    returns identical values for identical arguments (no variation
    with local static variables, non-local variables, mutable
    reference arguments or input streams). Otherwise, the behavior
    is undefined.

diff --git a/changelogs/unreleased/serializer-bug.md b/changelogs/unreleased/serializer-bug.md
new file mode 100755
index 000000000..96f359d64
--- /dev/null
+++ b/changelogs/unreleased/serializer-bug.md
@@ -0,0 +1,4 @@
+## bugfix/core
+
+* Recursive structures appeared after __serialize call don't lead
+* to segfault anymore in lua and yaml serializers (gh-5392).
diff --git a/src/box/lua/call.c b/src/box/lua/call.c
index 0315e720c..e4907a3f7 100644
--- a/src/box/lua/call.c
+++ b/src/box/lua/call.c
@@ -193,7 +193,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 		 */
 		for (int i = 1; i <= nrets; ++i) {
 			struct luaL_field field;
-			if (luaL_tofield(L, cfg, NULL, i, &field) < 0)
+			if (luaL_tofield(L, cfg, 0, NULL, i, &field) < 0)
 				return luaT_error(L);
 			struct tuple *tuple;
 			if (field.type == MP_EXT &&
@@ -222,7 +222,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	 * Inspect the first result
 	 */
 	struct luaL_field root;
-	if (luaL_tofield(L, cfg, NULL, 1, &root) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, 1, &root) < 0)
 		return luaT_error(L);
 	struct tuple *tuple;
 	if (root.type == MP_EXT && (tuple = luaT_istuple(L, 1)) != NULL) {
@@ -252,7 +252,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
 	for (uint32_t t = 1; t <= root.size; t++) {
 		lua_rawgeti(L, 1, t);
 		struct luaL_field field;
-		if (luaL_tofield(L, cfg, NULL, -1, &field) < 0)
+		if (luaL_tofield(L, cfg, 0, NULL, -1, &field) < 0)
 			return luaT_error(L);
 		if (field.type == MP_EXT && (tuple = luaT_istuple(L, -1))) {
 			tuple_to_mpstream(tuple, stream);
diff --git a/src/box/lua/execute.c b/src/box/lua/execute.c
index 926a0a61c..76d271f5c 100644
--- a/src/box/lua/execute.c
+++ b/src/box/lua/execute.c
@@ -328,7 +328,7 @@ lua_sql_bind_decode(struct lua_State *L, struct sql_bind *bind, int idx, int i)
 		bind->name = NULL;
 		bind->name_len = 0;
 	}
-	if (luaL_tofield(L, luaL_msgpack_default, NULL, -1, &field) < 0)
+	if (luaL_tofield(L, luaL_msgpack_default, 0, NULL, -1, &field) < 0)
 		return -1;
 	switch (field.type) {
 	case MP_UINT:
diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
index caa08a60f..17b434ba5 100644
--- a/src/box/lua/serialize_lua.c
+++ b/src/box/lua/serialize_lua.c
@@ -115,6 +115,7 @@ struct lua_dumper {
 	/** Anchors for self references */
 	int anchortable_index;
 	unsigned int anchor_number;
+	int serialized_objs_idx;
 
 	/** Error message buffer */
 	char err_msg[256];
@@ -215,7 +216,7 @@ trace_node(struct lua_dumper *d)
 	struct luaL_field field;
 
 	memset(&field, 0, sizeof(field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &field);
 
 	if (field.type < lengthof(mp_type_names)) {
 		if (field.type == MP_EXT) {
@@ -234,7 +235,7 @@ trace_node(struct lua_dumper *d)
 
 	memset(&field, 0, sizeof(field));
 
-	luaL_checkfield(d->L, d->cfg, top, &field);
+	luaL_checkfield(d->L, d->cfg, 0, top, &field);
 	say_info("serializer-trace: node    :\tfield type %s (%d)",
 		 type_str, field.type);
 }
@@ -339,41 +340,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
 static const char *
 get_lua_anchor(struct lua_dumper *d)
 {
-	const char *s = "";
-
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (!lua_toboolean(d->L, -1)) {
-		lua_pop(d->L, 1);
-		return NULL;
+	const char *anchor = NULL;
+	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
+			      &anchor);
+	if (code == GET_ANCHOR_NAMED_NOT) {
+		trace_anchor(anchor, false);
+	} else if (code == GET_ANCHOR_ALIASED) {
+		trace_anchor(anchor, true);
+		return "";
 	}
-
-	if (lua_isboolean(d->L, -1)) {
-		/*
-		 * This element is referenced more
-		 * than once but has not been named.
-		 */
-		char buf[32];
-		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
-		lua_pop(d->L, 1);
-		lua_pushvalue(d->L, -1);
-		lua_pushstring(d->L, buf);
-		s = lua_tostring(d->L, -1);
-		lua_rawset(d->L, d->anchortable_index);
-		trace_anchor(s, false);
-	} else {
-		/*
-		 * An aliased element.
-		 *
-		 * FIXME: Need an example to use.
-		 *
-		 * const char *str = lua_tostring(d->L, -1);
-		 */
-		const char *str = lua_tostring(d->L, -1);
-		trace_anchor(str, true);
-		lua_pop(d->L, 1);
-	}
-	return s;
+	return anchor;
 }
 
 static void
@@ -783,7 +759,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 		return -1;
 
 	memset(field, 0, sizeof(*field));
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), field);
+	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), field);
 
 	switch (field->type) {
 	case MP_NIL:
@@ -881,49 +857,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
 	return emit_node(d, nd, indent, str, len);
 }
 
-/**
- * Find references to tables, we use it
- * to find self references in tables.
- */
-static void
-find_references(struct lua_dumper *d)
-{
-	int newval;
-
-	if (lua_type(d->L, -1) != LUA_TTABLE)
-		return;
-
-	/* Copy of a table for self refs */
-	lua_pushvalue(d->L, -1);
-	lua_rawget(d->L, d->anchortable_index);
-	if (lua_isnil(d->L, -1))
-		newval = 0;
-	else if (!lua_toboolean(d->L, -1))
-		newval = 1;
-	else
-		newval = -1;
-	lua_pop(d->L, 1);
-
-	if (newval != -1) {
-		lua_pushvalue(d->L, -1);
-		lua_pushboolean(d->L, newval);
-		lua_rawset(d->L, d->anchortable_index);
-	}
-
-	if (newval != 0)
-		return;
-
-	/*
-	 * Other values and keys in the table
-	 */
-	lua_pushnil(d->L);
-	while (lua_next(d->L, -2) != 0) {
-		find_references(d);
-		lua_pop(d->L, 1);
-		find_references(d);
-	}
-}
-
 /**
  * Dump recursively from the root node.
  */
@@ -935,7 +868,8 @@ dump_root(struct lua_dumper *d)
 	};
 	int ret;
 
-	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &nd.field);
+	luaL_checkfield(d->L, d->cfg, d->serialized_objs_idx,
+			lua_gettop(d->L), &nd.field);
 
 	if (nd.field.type != MP_ARRAY || nd.field.size != 1) {
 		d->err = EINVAL;
@@ -982,9 +916,13 @@ lua_encode(lua_State *L, struct luaL_serializer *serializer,
 	dumper.anchortable_index = lua_gettop(L);
 	dumper.anchor_number = 0;
 
+	lua_newtable(L);
+	dumper.serialized_objs_idx = lua_gettop(L);
+
 	/* Push copy of arg we're processing */
 	lua_pushvalue(L, 1);
-	find_references(&dumper);
+	find_references_and_serialize(dumper.L, dumper.anchortable_index,
+				      dumper.serialized_objs_idx);
 
 	if (dump_root(&dumper) != 0)
 		goto out;
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index f7198a025..2779cf786 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -427,7 +427,7 @@ luamp_convert_key(struct lua_State *L, struct luaL_serializer *cfg,
 		return tuple_to_mpstream(tuple, stream);
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, NULL, index, &field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, index, &field) < 0)
 		luaT_error(L);
 	if (field.type == MP_ARRAY) {
 		lua_pushvalue(L, index);
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 9b6179f3a..2c7181f84 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -264,7 +264,7 @@ port_lua_get_vdbemem(struct port *base, uint32_t *size)
 		return NULL;
 	for (int i = 0; i < argc; i++) {
 		struct luaL_field field;
-		if (luaL_tofield(L, luaL_msgpack_default,
+		if (luaL_tofield(L, luaL_msgpack_default, 0,
 				 NULL, -1 - i, &field) < 0) {
 			goto error;
 		}
diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
index 1e74a6a3c..f9503a275 100644
--- a/src/lua/msgpack.c
+++ b/src/lua/msgpack.c
@@ -157,11 +157,11 @@ restart: /* used by MP_EXT of unidentified subtype */
 		lua_pushnil(L);  /* first key */
 		while (lua_next(L, top) != 0) {
 			lua_pushvalue(L, -2); /* push a copy of key to top */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop a copy of key */
-			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1); /* pop value */
@@ -182,7 +182,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 		mpstream_encode_array(stream, size);
 		for (uint32_t i = 0; i < size; i++) {
 			lua_rawgeti(L, top, i + 1);
-			if (luaL_tofield(L, cfg, opts, top + 1, field) < 0)
+			if (luaL_tofield(L, cfg, 0, opts, top + 1, field) < 0)
 				return luaT_error(L);
 			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
 			lua_pop(L, 1);
@@ -205,7 +205,7 @@ restart: /* used by MP_EXT of unidentified subtype */
 			if (type != MP_EXT)
 				return type; /* Value has been packed by the trigger */
 			/* Try to convert value to serializable type */
-			luaL_convertfield(L, cfg, top, field);
+			luaL_convertfield(L, cfg, 0, top, field);
 			/* handled by luaL_convertfield */
 			assert(field->type != MP_EXT);
 			assert(lua_gettop(L) == top);
@@ -230,7 +230,7 @@ luamp_encode(struct lua_State *L, struct luaL_serializer *cfg,
 	}
 
 	struct luaL_field field;
-	if (luaL_tofield(L, cfg, opts, lua_gettop(L), &field) < 0)
+	if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), &field) < 0)
 		return luaT_error(L);
 	enum mp_type top_type = luamp_encode_r(L, cfg, opts, stream, &field, 0);
 
diff --git a/src/lua/pickle.c b/src/lua/pickle.c
index 65208b5b3..86f0f77e8 100644
--- a/src/lua/pickle.c
+++ b/src/lua/pickle.c
@@ -80,7 +80,7 @@ lbox_pack(struct lua_State *L)
 		if (i > nargs)
 			luaL_error(L, "pickle.pack: argument count does not match "
 				   "the format");
-		luaL_checkfield(L, luaL_msgpack_default, i, &field);
+		luaL_checkfield(L, luaL_msgpack_default, 0, i, &field);
 		switch (*format) {
 		case 'B':
 		case 'b':
diff --git a/src/lua/utils.c b/src/lua/utils.c
index b5a6ca5b7..0580f4094 100644
--- a/src/lua/utils.c
+++ b/src/lua/utils.c
@@ -441,7 +441,8 @@ lua_gettable_wrapper(lua_State *L)
 
 static void
 lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+			 int serialized_objs_idx, int idx,
+			 struct luaL_field *field)
 {
 	if (!cfg->encode_load_metatables)
 		return;
@@ -463,59 +464,280 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
 		lua_pcall(L, 1, 1, 0);
 		/* replace obj with the unpacked value */
 		lua_replace(L, idx);
-		if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+		if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, idx,
+			         field) < 0)
 			luaT_error(L);
 	} /* else ignore lua_gettable exceptions */
 	lua_settop(L, top); /* remove temporary objects */
 }
 
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor)
+{
+	lua_pushvalue(L, -1);
+	lua_rawget(L, anchortable_index);
+	if (lua_toboolean(L, -1) == 0) {
+		/* This element is not referenced. */
+		lua_pop(L, 1);
+		*anchor = NULL;
+		return 0;
+	}
+
+	if (lua_isboolean(L, -1) != 0) {
+		/*
+		 * This element is referenced more than once but
+		 * has not been named.
+		 */
+		char buf[32];
+		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
+		lua_pop(L, 1);
+		/* Generate a string anchor and push to table. */
+		lua_pushvalue(L, -1);
+		lua_pushstring(L, buf);
+		*anchor = lua_tostring(L, -1);
+		lua_rawset(L, anchortable_index);
+		return GET_ANCHOR_NAMED_NOT;
+	} else {
+		/* This is an aliased element. */
+		*anchor = lua_tostring(L, -1);
+		lua_pop(L, 1);
+		return GET_ANCHOR_ALIASED;
+	}
+	return GET_ANCHOR_NO_REFS;
+}
+
+void
+find_references_and_serialize(struct lua_State *L, int anchortable_index,
+			      int serialized_objs_idx)
+{
+	int type = lua_type(L, -1);
+	if (type != LUA_TTABLE)
+		return;
+	int node_idx = lua_gettop(L);
+
+	/*
+	 * Check if the node is already serialized i.e. the record
+	 * is already in the map of serialized objects.
+	 */
+	lua_pushvalue(L, node_idx);
+	lua_rawget(L, serialized_objs_idx);
+	bool srlzd = lua_isnil(L, -1) == 0;
+	/*
+	 * This flag indicates that the object is being serialized
+	 * in the current function call.
+	 */
+	bool serialization = false;
+
+	if (!srlzd && luaL_getmetafield(L, node_idx, LUAL_SERIALIZE) != 0 &&
+	    lua_isfunction(L, -1) != 0) {
+		/* Delete nil after lua_rawget() above. */
+		lua_replace(L, -2);
+		srlzd = true;
+		serialization = true;
+		/*
+		 * Copy object itself and call serialize function
+		 * for it.
+		 */
+		lua_pushvalue(L, node_idx);
+		if (lua_pcall(L, 1, 1, 0) != 0) {
+			diag_set(LuajitError, lua_tostring(L, -1));
+			luaT_error(L);
+		}
+		/*
+		 * Add the result of serialization to the
+		 * serialized_objs map. Key is node (node_idx),
+		 * value is the result of serialization (now on
+		 * the top of stack).
+		 */
+		lua_pushvalue(L, node_idx);
+		lua_pushvalue(L, -2);
+		lua_rawset(L, serialized_objs_idx);
+	}
+
+	if (srlzd) {
+		if (lua_type(L, -1) == LUA_TTABLE) {
+			/*
+			 * Now we will process the new node - the
+			 * result of serialization. It is
+			 * necessary to take it into account in
+			 * the anchor table.
+			 */
+			node_idx = lua_gettop(L);
+		} else {
+			lua_pop(L, 1);
+			return;
+		}
+	} else {
+		if (lua_isnil(L, -1) == 0)
+			/*
+			 * Pop an extra field thrown out
+			 * by luaL_getmetafield(), it was
+			 * not a function.
+			 */
+			lua_pop(L, 1);
+		/* Delete nil after lua_rawget() above. */
+		lua_pop(L, 1);
+	}
+	/*
+	 * Take an entry from the anchor table about the current
+	 * node.
+	 */
+	lua_pushvalue(L, node_idx);
+	lua_rawget(L, anchortable_index);
+	int newval = -1;
+	if (lua_isnil(L, -1) == 1)
+		/*
+		 * The node has not yet been encountered, it is
+		 * bypassed for the first time.
+		 */
+		newval = 0;
+	else if (lua_toboolean(L, -1) == 0)
+		 /* The node has already met once. */
+		newval = 1;
+	lua_pop(L, 1);
+	if (newval != -1) {
+		lua_pushvalue(L, node_idx);
+		lua_pushboolean(L, newval);
+		lua_rawset(L, anchortable_index);
+	}
+	if (srlzd && !serialization) {
+		/*
+		 * The node has already been serialized not in the
+		 * current call, so there is no point in going
+		 * further in depth and checking the leaves. Pop
+		 * the processed sterilization result.
+		 */
+		lua_pop(L, 1);
+		return;
+	}
+	if (newval)
+		/* The node has already met twice or more, so
+		 * there is no point in going further in depth and
+		 * checking the leaves.
+		 */
+		return;
+
+	/* Recursively process other table values. */
+	lua_pushnil(L);
+	while (lua_next(L, node_idx) != 0) {
+		/* Find references on value. */
+		find_references_and_serialize(L, anchortable_index,
+					      serialized_objs_idx);
+		lua_pop(L, 1);
+		/* Find references on key. */
+		find_references_and_serialize(L, anchortable_index,
+					      serialized_objs_idx);
+	}
+	if (serialization)
+		/*
+		 * The loop above processed the nodes inside the
+		 * serialization result. Pop it, so as not to
+		 * break the integrity of the loop of the previous
+		 * call in recursion.
+		 */
+		lua_pop(L, 1);
+
+	return;
+}
+
+enum try_serialize_ret_code
+{
+
+	TRY_SERIALIZE_ERROR = -1,
+	TRY_SERIALIZE_RES_TABLE_NOT = 0,
+	TRY_SERIALIZE_RES_FROM_MAP,
+	TRY_SERIALIZE_DEFAULT_TABLE,
+};
+
 /**
  * Call __serialize method of a table object by index
- * if the former exists.
+ * if the former exists or pull result of serialization (if
+ * exists) from table with index @a serialized_objs_idx if it
+ * isn't 0.
  *
  * If __serialize does not exist then function does nothing
- * and the function returns 1;
+ * and the function returns TRY_SERIALIZE_DEFAULT_TABLE;
  *
  * If __serialize exists, is a function (which doesn't
  * raise any error) then a result of serialization
- * replaces old value by the index and the function returns 0;
+ * replaces old value by the index and the function returns
+ * TRY_SERIALIZE_RES_TABLE_NOT or TRY_SERIALIZE_DEFAULT_TABLE;
  *
  * If the serialization is a hint string (like 'array' or 'map'),
  * then field->type, field->size and field->compact
- * are set if necessary and the function returns 0;
+ * are set if necessary and the function returns
+ * TRY_SERIALIZE_RES_TABLE_NOT;
  *
- * Otherwise it is an error, set diag and the funciton returns -1;
+ * Otherwise it is an error, set diag and the funciton returns
+ * TRY_SERIALIZE_ERROR;
  *
  * Return values:
- * -1 - error occurs, diag is set, the top of guest stack
- *      is undefined.
- *  0 - __serialize field is available in the metatable,
- *      the result value is put in the origin slot,
- *      encoding is finished.
- *  1 - __serialize field is not available in the metatable,
- *      proceed with default table encoding.
+ * TRY_SERIALIZE_ERROR - error occurs, diag is set, the top of
+ * guest stack is undefined.
+ * TRY_SERIALIZE_RES_TABLE_NOT - __serialize field is available in
+ * the metatable, the result value is put in the origin slot,
+ * encoding is finished.
+ * TRY_SERIALIZE_RES_FROM_MAP - __serialize field is available in
+ * the metatable, the result value is table from serialized
+ * objects map.
+ * TRY_SERIALIZE_DEFAULT_TABLE - __serialize field is not
+ * available in the metatable, proceed with default table
+ * encoding.
  */
+
 static int
-lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_try_serialize(struct lua_State *L, int serialized_objs_idx,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	if (luaL_getmetafield(L, idx, LUAL_SERIALIZE) == 0)
-		return 1;
+		return TRY_SERIALIZE_DEFAULT_TABLE;
 	if (lua_isfunction(L, -1)) {
-		/* copy object itself */
-		lua_pushvalue(L, idx);
-		if (lua_pcall(L, 1, 1, 0) != 0) {
-			diag_set(LuajitError, lua_tostring(L, -1));
-			return -1;
+		if (serialized_objs_idx != 0) {
+			/*
+			 * Pop the __serialize function for the
+			 * current node. It was already called in
+			 * find_references_and_serialize().
+			 */
+			lua_pop(L, 1);
+			/*
+			 * Get the result of serialization from
+			 * the map.
+			 */
+			lua_pushvalue(L, idx);
+			lua_rawget(L, serialized_objs_idx);
+			assert(lua_isnil(L, -1) == 0);
+
+			/*
+			 * Replace the serialized node with a new
+			 * result, if it is a table.
+			 */
+			if (lua_type(L, -1) == LUA_TTABLE) {
+				lua_replace(L, idx);
+				return TRY_SERIALIZE_RES_FROM_MAP;
+			}
+		} else {
+			/*
+			 * Serializer don't use map with
+			 * serialized objects. Copy object itself
+			 * and call __serialize for it.
+			 */
+			lua_pushvalue(L, idx);
+			if (lua_pcall(L, 1, 1, 0) != 0) {
+				diag_set(LuajitError, lua_tostring(L, -1));
+				return TRY_SERIALIZE_ERROR;
+			}
 		}
-		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
-			return -1;
+		if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, -1,
+				 field) != 0)
+			return TRY_SERIALIZE_ERROR;
 		lua_replace(L, idx);
-		return 0;
+		return TRY_SERIALIZE_RES_TABLE_NOT;
 	}
 	if (!lua_isstring(L, -1)) {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		return -1;
+		return TRY_SERIALIZE_ERROR;
 	}
 	const char *type = lua_tostring(L, -1);
 	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
@@ -533,16 +755,17 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
 			field->compact = true;
 	} else {
 		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
-		return -1;
+		return TRY_SERIALIZE_ERROR;
 	}
 	/* Remove value set by luaL_getmetafield. */
 	lua_pop(L, 1);
-	return 0;
+	return TRY_SERIALIZE_RES_TABLE_NOT;
 }
 
 static int
-lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
-			int idx, struct luaL_field *field)
+lua_field_inspect_table(struct lua_State *L, int serialized_objs_idx,
+			struct luaL_serializer *cfg, int idx,
+			struct luaL_field *field)
 {
 	assert(lua_type(L, idx) == LUA_TTABLE);
 	uint32_t size = 0;
@@ -550,14 +773,20 @@ lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
 
 	if (cfg->encode_load_metatables) {
 		int top = lua_gettop(L);
-		int res = lua_field_try_serialize(L, cfg, idx, field);
-		if (res == -1)
+		int res = lua_field_try_serialize(L, serialized_objs_idx, cfg,
+						  idx, field);
+		if (res == TRY_SERIALIZE_ERROR)
 			return -1;
 		assert(lua_gettop(L) == top);
 		(void)top;
-		if (res == 0)
+		if (res == TRY_SERIALIZE_RES_TABLE_NOT ||
+		    (res == TRY_SERIALIZE_RES_FROM_MAP &&
+		    lua_type(L, -1) != LUA_TTABLE))
 			return 0;
-		/* Fallthrough with res == 1 */
+		/*
+		 * Fallthrough with res TRY_SERIALIZE_RES_FROM_MAP
+		 * or TRY_SERIALIZE_DEFAULT_TABLE.
+		 */
 	}
 
 	field->type = MP_ARRAY;
@@ -612,14 +841,14 @@ lua_field_tostring(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 	lua_call(L, 1, 1);
 	lua_replace(L, idx);
 	lua_settop(L, top);
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, 0, NULL, idx, field) < 0)
 		luaT_error(L);
 }
 
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field)
+	     int serialized_objs_idx, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field)
 {
 	if (index < 0)
 		index = lua_gettop(L) + index + 1;
@@ -753,7 +982,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 	case LUA_TTABLE:
 	{
 		field->compact = false;
-		return lua_field_inspect_table(L, cfg, index, field);
+		return lua_field_inspect_table(L, serialized_objs_idx, cfg,
+					       index, field);
 	}
 	case LUA_TLIGHTUSERDATA:
 	case LUA_TUSERDATA:
@@ -773,8 +1003,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
 }
 
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field)
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int serialized_objs_idx, int idx, struct luaL_field *field)
 {
 	if (idx < 0)
 		idx = lua_gettop(L) + idx + 1;
@@ -789,9 +1019,12 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
 			 */
 			GCcdata *cd = cdataV(L->base + idx - 1);
 			if (cd->ctypeid > CTID_CTYPEID)
-				lua_field_inspect_ucdata(L, cfg, idx, field);
+				lua_field_inspect_ucdata(L, cfg,
+							 serialized_objs_idx, idx,
+							 field);
 		} else if (type == LUA_TUSERDATA) {
-			lua_field_inspect_ucdata(L, cfg, idx, field);
+			lua_field_inspect_ucdata(L, cfg, serialized_objs_idx, idx,
+						 field);
 		}
 	}
 
diff --git a/src/lua/utils.h b/src/lua/utils.h
index 4a164868b..cb1025495 100644
--- a/src/lua/utils.h
+++ b/src/lua/utils.h
@@ -353,6 +353,44 @@ struct luaL_field {
 	bool compact;                /* a flag used by YAML serializer */
 };
 
+/**
+ * Find references to tables to look up self references in tables
+ * and serialize nodes if __serialize is function. Collect
+ * referenced nodes in a table with stack index @a
+ * anchortable_index and serialized nodes with results in a table
+ * with stack index @a serialized_objs_idx.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack with
+          pairs: "node address" <-> "false (met once) or true
+          (met more than once)".
+ * @param serialized_objs_idx Index of serialized objects map on
+ *        stack with pairs: "node address" <-> "result address".
+ */
+void
+find_references_and_serialize(struct lua_State *L, int anchortable_index,
+			      int serialized_objs_idx);
+
+enum get_anchor_ret_code
+{
+	GET_ANCHOR_NO_REFS = 0,
+	GET_ANCHOR_NAMED_NOT,
+	GET_ANCHOR_ALIASED,
+};
+
+/**
+ * Generate aliases and anchor numbers for self references.
+ *
+ * @param L Lua stack.
+ * @param anchortable_index Index of anchor table on stack.
+ * @param[out] anchor_number Ptr to the number anchors in a
+               structure being under dumping.
+ * @param[out] anchor Ptr to anchor string.
+ */
+int
+get_anchor(struct lua_State *L, int anchortable_index,
+	   unsigned int *anchor_number, const char **anchor);
+
 /**
  * @brief Convert a value from the Lua stack to a lua_field structure.
  * This function is designed for use with Lua bindings and data
@@ -387,6 +425,8 @@ struct luaL_field {
  *
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Index of table with serialized
+                              objects map.
  * @param opts the Lua serializer additional options.
  * @param index stack index
  * @param field conversion result
@@ -396,8 +436,8 @@ struct luaL_field {
  */
 int
 luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
-	     const struct serializer_opts *opts, int index,
-	     struct luaL_field *field);
+	     int serialized_objs_idx, const struct serializer_opts *opts,
+	     int index, struct luaL_field *field);
 
 /**
  * @brief Try to convert userdata/cdata values using defined conversion logic.
@@ -405,18 +445,22 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
  *
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Index of table with serialized
+                              objects map.
  * @param idx stack index
  * @param field conversion result
  */
 void
-luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		  struct luaL_field *field);
+luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
+		  int serialized_objs_idx, int idx, struct luaL_field *field);
 
 /**
  * @brief A wrapper for luaL_tofield() and luaL_convertfield() that
  * tries to convert value or raise an error.
  * @param L stack
  * @param cfg configuration
+ * @param serialized_objs_idx Stack index of table with serialized
+                              objects map.
  * @param idx stack index
  * @param field conversion result
  * @sa lua_tofield()
@@ -432,14 +476,14 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
  * (tostring) -> (nil) -> exception
  */
 static inline void
-luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
-		struct luaL_field *field)
+luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg,
+		int serialized_objs_idx, int idx, struct luaL_field *field)
 {
-	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
+	if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, idx, field) < 0)
 		luaT_error(L);
 	if (field->type != MP_EXT || field->ext_type != MP_UNKNOWN_EXTENSION)
 		return;
-	luaL_convertfield(L, cfg, idx, field);
+	luaL_convertfield(L, cfg, serialized_objs_idx, idx, field);
 }
 
 void
diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
new file mode 100644
index 000000000..c284fec6e
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.result
@@ -0,0 +1,164 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+--
+-- gh-3228: Check that recursive structures are serialized
+-- properly.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+ | ---
+ | - []
+ | ...
+setmetatable({}, {__serialize = function(a) return {a} end})
+ | ---
+ | - &0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+ | ---
+ | - &0
+ |   - - *0
+ |     - *0
+ |   - *0
+ | ...
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+ | ---
+ | - &0
+ |   - *0
+ |   - 1
+ | ...
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+ | ---
+ | - &0
+ |   - - - - *0
+ | ...
+
+b = {}
+ | ---
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+ | ---
+ | - &0
+ |   b_2: &1 []
+ |   a_2: *0
+ |   a_1: *0
+ |   b_1: *1
+ | ...
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
+ | ---
+ | - &0
+ |   b: &1 []
+ |   a_2:
+ |   - *0
+ |   - *1
+ |   a_1: *0
+ | ...
+
+a = {}
+ | ---
+ | ...
+a[a] = a
+ | ---
+ | ...
+recf = function(t) return setmetatable({}, {__serialize = recf}) end
+ | ---
+ | ...
+setmetatable(a, {__serialize = recf}) return a
+ | ---
+ | - []
+ | ...
+
+--
+-- __serialize function is pure, i.e. always returns identical
+-- value for identical argument. Otherwise, the behavior is
+-- undefined. So that, we ignore the side effects and just use the
+-- value after the first serialization.
+--
+a = {}
+ | ---
+ | ...
+b = {}
+ | ---
+ | ...
+b[a] = a
+ | ---
+ | ...
+show_a = true
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+serialize = function()
+    show_a = not show_a
+    if show_a then
+        return "a"
+    else
+        return "b" end
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+setmetatable(a, {__serialize = serialize})
+ | ---
+ | - b
+ | ...
+b
+ | ---
+ | - a: a
+ | ...
+
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function example()
+    local a = {}
+    local b = {}
+    b[a] = a
+    local reta
+    local retb
+    local function swap_ser(o)
+        local newf
+        local f = getmetatable(o).__serialize
+        if f == reta then
+            newf = retb
+        else
+            newf = reta
+        end
+        getmetatable(o).__serialize = newf
+    end
+    reta = function(o) swap_ser(o) return "a" end
+    retb = function(o) swap_ser(o) return "b" end
+    setmetatable(a, {__serialize = reta})
+    return b
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+example()
+ | ---
+ | - a: a
+ | ...
diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
new file mode 100644
index 000000000..e1dbd9f90
--- /dev/null
+++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
@@ -0,0 +1,69 @@
+test_run = require('test_run').new()
+
+--
+-- gh-3228: Check that recursive structures are serialized
+-- properly.
+--
+setmetatable({}, {__serialize = function(a) return a end})
+setmetatable({}, {__serialize = function(a) return {a} end})
+setmetatable({}, {__serialize = function(a) return {a, a} end})
+setmetatable({}, {__serialize = function(a) return {a, a, a} end})
+setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
+setmetatable({}, {__serialize = function(a) return {a, 1} end})
+setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
+
+b = {}
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
+setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
+
+a = {}
+a[a] = a
+recf = function(t) return setmetatable({}, {__serialize = recf}) end
+setmetatable(a, {__serialize = recf}) return a
+
+--
+-- __serialize function is pure, i.e. always returns identical
+-- value for identical argument. Otherwise, the behavior is
+-- undefined. So that, we ignore the side effects and just use the
+-- value after the first serialization.
+--
+a = {}
+b = {}
+b[a] = a
+show_a = true
+test_run:cmd('setopt delimiter ";"')
+serialize = function()
+    show_a = not show_a
+    if show_a then
+        return "a"
+    else
+        return "b" end
+end;
+test_run:cmd('setopt delimiter ""');
+setmetatable(a, {__serialize = serialize})
+b
+
+test_run:cmd('setopt delimiter ";"')
+function example()
+    local a = {}
+    local b = {}
+    b[a] = a
+    local reta
+    local retb
+    local function swap_ser(o)
+        local newf
+        local f = getmetatable(o).__serialize
+        if f == reta then
+            newf = retb
+        else
+            newf = reta
+        end
+        getmetatable(o).__serialize = newf
+    end
+    reta = function(o) swap_ser(o) return "a" end
+    retb = function(o) swap_ser(o) return "b" end
+    setmetatable(a, {__serialize = reta})
+    return b
+end;
+test_run:cmd('setopt delimiter ""');
+example()
diff --git a/test/swim/swim.result b/test/swim/swim.result
index bfc83c143..539131677 100644
--- a/test/swim/swim.result
+++ b/test/swim/swim.result
@@ -1322,16 +1322,13 @@ m_list
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
-  - uri: 127.0.0.1:<port>
-    status: alive
-    incarnation: cdata {generation = 0ULL, version = 2ULL}
-    uuid: 00000000-0000-1000-8000-000000000001
-    payload_size: 8
-  - uri: 127.0.0.1:<port>
+  - &0
+    uri: 127.0.0.1:<port>
     status: alive
     incarnation: cdata {generation = 0ULL, version = 2ULL}
     uuid: 00000000-0000-1000-8000-000000000001
     payload_size: 8
+  - *0
 ...
 e_list
 ---
@@ -1374,16 +1371,13 @@ fiber.sleep(0)
 -- Two events - status update to 'left', and 'drop'.
 m_list
 ---
-- - uri: 127.0.0.1:<port>
-    status: left
-    incarnation: cdata {generation = 0ULL, version = 1ULL}
-    uuid: 00000000-0000-1000-8000-000000000002
-    payload_size: 0
-  - uri: 127.0.0.1:<port>
+- - &0
+    uri: 127.0.0.1:<port>
     status: left
     incarnation: cdata {generation = 0ULL, version = 1ULL}
     uuid: 00000000-0000-1000-8000-000000000002
     payload_size: 0
+  - *0
 ...
 e_list
 ---
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index 38e999870..67a9762bc 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -354,7 +354,7 @@ static void json_append_object(lua_State *l, struct luaL_serializer *cfg,
             comma = 1;
 
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -2, &field);
+    luaL_checkfield(l, cfg, 0, -2, &field);
     if (field.type == MP_UINT) {
         strbuf_append_char(json, '"');
         json_append_uint(cfg, json, field.ival);
@@ -384,7 +384,7 @@ static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
                              int current_depth, strbuf_t *json)
 {
     struct luaL_field field;
-    luaL_checkfield(l, cfg, -1, &field);
+    luaL_checkfield(l, cfg, 0, -1, &field);
     switch (field.type) {
     case MP_UINT:
         return json_append_uint(cfg, json, field.ival);
diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
index 9c3a4a646..249603ba3 100644
--- a/third_party/lua-yaml/lyaml.cc
+++ b/third_party/lua-yaml/lyaml.cc
@@ -79,6 +79,7 @@ struct lua_yaml_dumper {
    lua_State *L;
    struct luaL_serializer *cfg;
    int anchortable_index;
+   int serialized_objs_idx;
    unsigned int anchor_number;
    yaml_emitter_t emitter;
    char error;
@@ -500,33 +501,16 @@ usage_error:
 static int dump_node(struct lua_yaml_dumper *dumper);
 
 static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
-   const char *s = "";
-   lua_pushvalue(dumper->L, -1);
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (!lua_toboolean(dumper->L, -1)) {
-      lua_pop(dumper->L, 1);
-      return NULL;
-   }
-
-   if (lua_isboolean(dumper->L, -1)) {
-      /* this element is referenced more than once but has not been named */
-      char buf[32];
-      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
-      lua_pop(dumper->L, 1);
-      lua_pushvalue(dumper->L, -1);
-      lua_pushstring(dumper->L, buf);
-      s = lua_tostring(dumper->L, -1);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   } else {
-      /* this is an aliased element */
+   const char *anchor = NULL;
+   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
+                  &anchor) == GET_ANCHOR_ALIASED) {
       yaml_event_t ev;
-      const char *str = lua_tostring(dumper->L, -1);
-      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
+      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
           !yaml_emitter_emit(&dumper->emitter, &ev))
          luaL_error(dumper->L, OOM_ERRMSG);
-      lua_pop(dumper->L, 1);
+      return (yaml_char_t *)"";
    }
-   return (yaml_char_t *)s;
+   return (yaml_char_t *)anchor;
 }
 
 static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
@@ -622,7 +606,8 @@ static int dump_node(struct lua_yaml_dumper *dumper)
    (void) unused;
 
    int top = lua_gettop(dumper->L);
-   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
+   luaL_checkfield(dumper->L, dumper->cfg, dumper->serialized_objs_idx,
+                   top, &field);
    switch(field.type) {
    case MP_UINT:
       snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
@@ -745,35 +730,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
    return 1;
 }
 
-static void find_references(struct lua_yaml_dumper *dumper) {
-   int newval = -1, type = lua_type(dumper->L, -1);
-   if (type != LUA_TTABLE)
-      return;
-
-   lua_pushvalue(dumper->L, -1); /* push copy of table */
-   lua_rawget(dumper->L, dumper->anchortable_index);
-   if (lua_isnil(dumper->L, -1))
-      newval = 0;
-   else if (!lua_toboolean(dumper->L, -1))
-      newval = 1;
-   lua_pop(dumper->L, 1);
-   if (newval != -1) {
-      lua_pushvalue(dumper->L, -1);
-      lua_pushboolean(dumper->L, newval);
-      lua_rawset(dumper->L, dumper->anchortable_index);
-   }
-   if (newval)
-      return;
-
-   /* recursively process other table values */
-   lua_pushnil(dumper->L);
-   while (lua_next(dumper->L, -2) != 0) {
-      find_references(dumper); /* find references on value */
-      lua_pop(dumper->L, 1);
-      find_references(dumper); /* find references on key */
-   }
-}
-
 int
 lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
                 const char *tag_handle, const char *tag_prefix)
@@ -817,9 +773,12 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
 
    lua_newtable(L);
    dumper.anchortable_index = lua_gettop(L);
+   lua_newtable(L);
+   dumper.serialized_objs_idx = lua_gettop(L);
    dumper.anchor_number = 0;
    lua_pushvalue(L, 1); /* push copy of arg we're processing */
-   find_references(&dumper);
+   find_references_and_serialize(dumper.L, dumper.anchortable_index,
+                                 dumper.serialized_objs_idx);
    dump_document(&dumper);
    if (dumper.error)
       goto error;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-04-05 22:05   ` Roman Khabibov via Tarantool-patches
@ 2021-04-21  9:27     ` Sergey Kaplun via Tarantool-patches
  2021-04-21 13:12       ` Sergey Bronnikov via Tarantool-patches
  0 siblings, 1 reply; 9+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2021-04-21  9:27 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

Hi!

Thanks for the fixes and the benchmarks!

Please consider my comments below.

On 06.04.21, Roman Khabibov wrote:
> Hi! Thanks for your review.
> 
> Perf
> 
> Script:
> 
> local serializer = require('json')
> clock = require('clock')
> serializer.cfg({encode_max_depth = 64})

Why do we need this parameter for benchmark?

> local avg = 0
> for j = 1, 10 do
>     local time = clock.monotonic()
>     for i = 1, 150000 do
>         local rec1 = {{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}

LuaJIT will create these tables every iteration. It is better to move
this line out of cycles scope.

>         serializer.encode(rec1)
>     end
>     local result = clock.monotonic() - time
>     print(result)
>     avg = avg + result
> end
> print("avg", avg / 10)

Side note: I suppose that there is nothing bad to create an gist to
refer in the commit message. Thoughts?

> 
> 
> JSON
> 
> master
> r:tarantool r.khabibov$ ./src/tarantool a.lua
> 2.3421729998663
> 2.4643029998988
> 2.4067219998688
> 2.5030510001816
> 2.6378739997745
> 2.3903989996761
> 2.9206310003065
> 2.9560130001046
> 2.8415450002067
> 2.8488600002602
> avg	2.6311571000144
> 
> my branch
> 2.3160850000568
> 2.4038570001721
> 2.4244269998744
> 2.4124370003119
> 2.4159540003166
> 2.4096140000038
> 2.4202209999785
> 2.3913590000011
> 2.4089310001582
> 2.4692329997197
> avg	2.4072118000593
> 
> 
> MSGPACK
> 
> master
> 4.1559460000135

Side note: Looks like a statistical outlier, doesn't it?
The standart deviation has the same order than the difference between
tests, hasn't it?

> 3.415306000039
> 3.2962070000358
> 3.0802390002646
> 2.8740380001254
> 2.7747510001063
> 2.7418200001121
> 2.7159779998474
> 2.8011710001156
> 3.1459960001521
> avg	3.1001452000812
> 
> my branch
> 2.6075639999472
> 2.7236989997327
> 3.0367560000159
> 3.071974999737
> 4.0645679999143
> 3.3987269997597
> 3.0836050002836
> 3.1121310000308
> 2.9265290000476
> 3.1954760001972
> avg	3.1221029999666
> 

I've moved object creation out of cycles scope and increased amount of
iterations. Here is my results below for RelWithDebInfo build:

JSON:

Master branch:

| 13.585487608798
| 13.637473559938
| 13.619634022936
| 13.573492439464
| 13.595329248346
| 13.635302185081
| 13.599111944437
| 13.547587860376
| 13.577754129656
| 13.594658776186
| avg     13.596583177522

Your branch:

| 13.061761728488
| 13.070893581957
| 13.043347925879
| 13.016978750005
| 13.092284397222
| 13.056780406274
| 13.108249689452
| 13.080217910931
| 13.016354624182
| 13.077872656286
| avg     13.062474167068

MSGPACK:

Master branch:

| 14.461708432995
| 14.485332051292
| 14.49474313762
| 14.497900255956
| 14.501624618657
| 14.475869619288
| 14.452158411033
| 14.504987037741
| 14.460136301816
| 14.526830960065
| avg     14.486129082646

Your branch:

| 14.325977869332
| 14.226252351888
| 14.210794054903
| 14.332323177718
| 14.643362686969
| 14.701242586598
| 14.272431840189
| 14.164950084873
| 14.151984995231
| 14.226807836443
| avg     14.325612748414

MSGPACKFFI:

Master branch:

| 57.604440504685
| 57.71239098534
| 57.707063949667
| 57.636537715793
| 57.562112994492
| 57.627671867609
| 57.714451034553
| 57.665768134408
| 57.560196113773
| 57.603734403849
| avg     57.639436770417

Your branch:

| 58.02601410076
| 57.854091227986
| 57.902167879045
| 58.103026106022
| 58.019230334088
| 58.185967157595
| 57.99015656393
| 57.940024384297
| 58.309608150274
| 58.555808863603
| avg     58.08860947676

YAML:

Master branch:

| 78.119072271511
| 78.244466565549
| 78.199724264443
| 78.105738090351
| 78.051472786814
| 78.179750479758
| 78.219370298088
| 78.096038719639
| 78.09271354042
| 78.195037381724
| avg     78.15033843983

Your branch:

| 88.296284722164
| 88.319487094879
| 88.342842547223
| 88.313459647819
| 88.354313291609
| 88.400771623477
| 88.3271939978
| 88.307627035305
| 88.334653683007
| 88.330221014097
| avg     88.332685465738

To sum up:

There is a small improving of JSON encoder, there is no improving of
MSGPACK encoder (small degradation for MSGPACKFFI) and there is
significant degradation in yaml decoder performance.

Side note: Do you investigate the reason of
improvements/degradations?

Side note: MSGPACK FFI is x4 times slower than lua MSGPACK...

<snipped>

Known issues:

Deep recursive calls still a problem (run with disabled JIT for full
backtrace):

| tarantool> jit.off() local a = {} a[a] = a local recf recf = function(t) return {setmetatable({}, {__serialize = recf})} end setmetatable(a, {__serialize = recf}) return a
| Segmentation fault (core dumped)

Backtrace is the following:

| #0  0x0000558c599d01c4 in lj_mem_newgco (L=L@entry=0x404fb378, size=size@entry=56) at /home/burii/reviews/tarantool/serialize/third_party/luajit/src/lj_gc.c:875
| #1  0x0000558c599d735a in newtab (L=0x404fb378, asize=3, hbits=0) at /home/burii/reviews/tarantool/serialize/third_party/luajit/src/lj_tab.c:107
| #2  0x0000558c599d7579 in lj_tab_new (L=<optimized out>, asize=<optimized out>, hbits=<optimized out>) at /home/burii/reviews/tarantool/serialize/third_party/luajit/src/lj_tab.c:161
| #3  0x0000558c59a046fa in lj_BC_TNEW () at buildvm_x86.dasc:552
| #4  0x0000558c599cac74 in lua_pcall (L=L@entry=0x404fb378, nargs=nargs@entry=1, nresults=nresults@entry=1, errfunc=errfunc@entry=0)
|     at /home/burii/reviews/tarantool/serialize/third_party/luajit/src/lj_api.c:1158
| #5  0x0000558c5998317a in find_references_and_serialize (L=L@entry=0x404fb378, anchortable_index=anchortable_index@entry=3, serialized_objs_idx=serialized_objs_idx@entry=4)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:625
| #6  0x0000558c5998306b in find_references_and_serialize (L=L@entry=0x404fb378, anchortable_index=anchortable_index@entry=3, serialized_objs_idx=serialized_objs_idx@entry=4)
|     at /home/burii/reviews/tarantool/serialize/src/lua/utils.c:625
| <5K lines snipped>

Also, the following code leads to core:

| tarantool> local a = {} return setmetatable(a, {__serialize = function(t) return nil end})
| tarantool: /home/burii/reviews/tarantool/serialize/src/lua/utils.c:710: lua_field_try_serialize: Assertion `lua_isnil(L, -1) == 0' failed.
| Aborted (core dumped)

Side note: should `__serialize` field work for userdata?

| tarantool> local u = newproxy(true) getmetatable(u).__serialize = function() return "custom coroutine" end return u
| ---
| - 'userdata: 0x41613c70'

Side note:
I surprized with the following behaviour:

| tarantool> local t = {} t[math.huge] = 0/0 for k,v in pairs(t) do print(k,v) end return t
| inf     nan
| ---
| - []
| ...

But according to documentation [1] encoding and decoding of invalid
numbers is enabled by default. What am I doing wrong?

Also, is it valid representation?

| tarantool> local a = math.huge/math.huge print(a) return a
| nan
| ---
| - -nan
| ...

It should be just `nan` not `-nan`.

General notes:
1) Nit: use brackets for the one-line if statements with comments.
Or even better, move it above before if statement:

| /* They should be the same. */
| if (a != b)
| 	return;

Instead of:
| if (a != b)
| 	/* They should be the same. */
| 	return;

2) Nit: don't use ==/!= for Lua type checks:
| if (!lua_isboolean(L, -1)) {

Instead of:
| if (lua_isboolean(L, -1) == 0) {

The returned value is kinda boolean: 0 or 1. Also, this is preferable
codestyle:

| $ grep -P 'lua_is[^&|]*?\([^&|]*?\) (?![!=]=)' -R ../before_patch/src/ 2>/dev/null | wc -l
| 136
| $ grep -P 'lua_is[^&|]*?\([^&|]*?\) (?=[!=]=)' -R ../before_patch/src/ 2>/dev/null | wc -l
| 6

3) Nit: I suggest to use early return in cases like:
| if (!condition) {
| 	lua_pop(L, 1);
| 	return;
| }
| /* Payload. */

Instead of:

| if (condition) {
| 	/* Payload. */
| } else {
| 	lua_pop(L, 1);
| 	return;
| }

> 
> commit 61c84258d32f121c0a5e75761ae3b347d0f41b5d
> Author: Roman Khabibov <roman.habibov@tarantool.org>
> Date:   Fri Feb 5 16:55:56 2021 +0300
> 
>     serializer: serialize recursive structures
>     
>     Fix bug with bus error during serializing of recursive structures
>     and print aliases if node become referenced after serialization.

Typo: s/become/becomes/

>     
>     This patch adds map with serialized nodes and results. The map is
>     used to mark referenced nodes which appears during serializing.

Typo: s/nodes which/nodes, which/
Typo: s/which appears/which appear/

>     
>     Closes #3228
>     
>     @TarantoolBot
>     __serialize parameter
>     If __serialize parameter is initialized as a function then this

Typo: s/function then/function, then/

>     function should be pure. A pure function is a function which
>     returns identical values for identical arguments (no variation
>     with local static variables, non-local variables, mutable
>     reference arguments or input streams). Otherwise, the behavior
>     is undefined.
> 
> diff --git a/changelogs/unreleased/serializer-bug.md b/changelogs/unreleased/serializer-bug.md
> new file mode 100755
> index 000000000..96f359d64
> --- /dev/null
> +++ b/changelogs/unreleased/serializer-bug.md
> @@ -0,0 +1,4 @@
> +## bugfix/core
> +
> +* Recursive structures appeared after __serialize call don't lead
> +* to segfault anymore in lua and yaml serializers (gh-5392).

Typo: s/lua/Lua/
Typo: s/yaml/YAML/
Typo: s/5392/3228/

Nit: I propose the following rewording:

| Serialization of recursive structures considering the value of
| `__serialize` metafield doesn't lead to segfault anymore in Lua and
| YAML serializers (gh-3228).

Feel free to change it on your own or ignore.

> diff --git a/src/box/lua/call.c b/src/box/lua/call.c
> index 0315e720c..e4907a3f7 100644
> --- a/src/box/lua/call.c
> +++ b/src/box/lua/call.c
> @@ -193,7 +193,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,

<snipped>

> diff --git a/src/box/lua/execute.c b/src/box/lua/execute.c
> index 926a0a61c..76d271f5c 100644
> --- a/src/box/lua/execute.c
> +++ b/src/box/lua/execute.c
> @@ -328,7 +328,7 @@ lua_sql_bind_decode(struct lua_State *L, struct sql_bind *bind, int idx, int i)

<snipped>

> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
> index caa08a60f..17b434ba5 100644
> --- a/src/box/lua/serialize_lua.c
> +++ b/src/box/lua/serialize_lua.c
> @@ -115,6 +115,7 @@ struct lua_dumper {
>  	/** Anchors for self references */
>  	int anchortable_index;
>  	unsigned int anchor_number;
> +	int serialized_objs_idx;

Lets be consistent with previous naming: use "index" instead "idx".

Nit: I suggest s/serialized_objs_idx/cache_index/g renaming.
In my opinion the meaning is the same, but its shorter.
Thoughts?
Feel free to ignore.

>
>  	/** Error message buffer */
>  	char err_msg[256];
> @@ -215,7 +216,7 @@ trace_node(struct lua_dumper *d)
>  	struct luaL_field field;
>  
>  	memset(&field, 0, sizeof(field));
> -	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &field);
> +	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &field);
>  
>  	if (field.type < lengthof(mp_type_names)) {
>  		if (field.type == MP_EXT) {
> @@ -234,7 +235,7 @@ trace_node(struct lua_dumper *d)
>  
>  	memset(&field, 0, sizeof(field));
>  
> -	luaL_checkfield(d->L, d->cfg, top, &field);
> +	luaL_checkfield(d->L, d->cfg, 0, top, &field);
>  	say_info("serializer-trace: node    :\tfield type %s (%d)",
>  		 type_str, field.type);
>  }
> @@ -339,41 +340,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
>  static const char *
>  get_lua_anchor(struct lua_dumper *d)
>  {
> -	const char *s = "";
> -
> -	lua_pushvalue(d->L, -1);
> -	lua_rawget(d->L, d->anchortable_index);
> -	if (!lua_toboolean(d->L, -1)) {
> -		lua_pop(d->L, 1);
> -		return NULL;
> +	const char *anchor = NULL;
> +	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
> +			      &anchor);
> +	if (code == GET_ANCHOR_NAMED_NOT) {
> +		trace_anchor(anchor, false);
> +	} else if (code == GET_ANCHOR_ALIASED) {
> +		trace_anchor(anchor, true);
> +		return "";

Why do we return empty string instead an aliased anchor value?

Nit: Looks like trace_anchor should be unconditional, just:
|		trace_anchor(anchor, code == GET_ANCHOR_ALIASED);

>  	}
> -
> -	if (lua_isboolean(d->L, -1)) {

<snipped>

> -	return s;
> +	return anchor;
>  }
>  
>  static void
> @@ -783,7 +759,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)

<snipped>

> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index f7198a025..2779cf786 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c

<snipped>

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 9b6179f3a..2c7181f84 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c

<snipped>

> diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
> index 1e74a6a3c..f9503a275 100644
> --- a/src/lua/msgpack.c
> +++ b/src/lua/msgpack.c

<snipped>

> diff --git a/src/lua/pickle.c b/src/lua/pickle.c
> index 65208b5b3..86f0f77e8 100644
> --- a/src/lua/pickle.c
> +++ b/src/lua/pickle.c

<snipped>

> diff --git a/src/lua/utils.c b/src/lua/utils.c
> index b5a6ca5b7..0580f4094 100644
> --- a/src/lua/utils.c
> +++ b/src/lua/utils.c
> @@ -441,7 +441,8 @@ lua_gettable_wrapper(lua_State *L)

<snipped>

> +int
> +get_anchor(struct lua_State *L, int anchortable_index,

Nit: I suppose it will be good to add a prefix to the function name
to underline that it is from utils.

> +	   unsigned int *anchor_number, const char **anchor)

Nit: may be anchor_id is better?
Feel free to ignore.

> +{
> +	lua_pushvalue(L, -1);

What is expected to be on the top of the Lua stack? Am I right, that
it is an object to serialize? Please, add corresponding description
to <lua/utils.h>.

> +	lua_rawget(L, anchortable_index);
> +	if (lua_toboolean(L, -1) == 0) {
> +		/* This element is not referenced. */
> +		lua_pop(L, 1);
> +		*anchor = NULL;
> +		return 0;

Should it be `GET_ANCHOR_NO_REFS`?

> +	}
> +
> +	if (lua_isboolean(L, -1) != 0) {
> +		/*
> +		 * This element is referenced more than once but

Typo: s/once but/once, but/

> +		 * has not been named.
> +		 */
> +		char buf[32];
> +		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
> +		lua_pop(L, 1);
> +		/* Generate a string anchor and push to table. */

Nit: As for me "cache table" is more verbose.
Feel free to ignore.

> +		lua_pushvalue(L, -1);
> +		lua_pushstring(L, buf);
> +		*anchor = lua_tostring(L, -1);
> +		lua_rawset(L, anchortable_index);
> +		return GET_ANCHOR_NAMED_NOT;
> +	} else {
> +		/* This is an aliased element. */
> +		*anchor = lua_tostring(L, -1);

Should we add assert for string type here?

> +		lua_pop(L, 1);
> +		return GET_ANCHOR_ALIASED;
> +	}
> +	return GET_ANCHOR_NO_REFS;
> +}
> +
> +void
> +find_references_and_serialize(struct lua_State *L, int anchortable_index,

Nit: I suppose it will be good to add a prefix to the function name
to underline that it is from utils.

> +			      int serialized_objs_idx)

Let's be consistent in namings. Please, choose `idx` or `index` notation.

> +{
> +	int type = lua_type(L, -1);
> +	if (type != LUA_TTABLE)
> +		return;

This `type` variable is used only once, so, it may be ommited like the
following:

| 	if (lua_type(L, -1) != LUA_TTABLE)
| 		return;

> +	int node_idx = lua_gettop(L);
> +
> +	/*
> +	 * Check if the node is already serialized i.e. the record
> +	 * is already in the map of serialized objects.
> +	 */
> +	lua_pushvalue(L, node_idx);

If you want to push the last one element you can just use -1 as
index, like you do in `get_anchor()`.

> +	lua_rawget(L, serialized_objs_idx);
> +	bool srlzd = lua_isnil(L, -1) == 0;

Nit: !lua_isnil(L, -1) looks prettier for me.
Feel free to ignore.

Nit: `srlzd` doesn't shows the meaning, and strange in naming :).
I suggest `is_cached` or something like that.

Also, it is hard to me read the code flow below:
There are 2 boolean variables and one variable with three different
values. It gives us 2*2*3 = 12 combinations of values, that define
code behaviour. In my opinion, it is hard to keep track all variants.
I suggest to separate code with the branches:
* already serialized
* not serialized and have __serialize field (by the way, why do you
  do nothing for __serialize "map" and so on?)
* not serialized and have no __serialize field

Also, you can use helper functions to make the code more readable --
like split back `find_references_and_serialize()` ->
`find_references()` + `serialize()`. Let function do one thing and
do it well.

Also, I don't understand what is purpose of this function and
differnce between it and `lua_field_try_serialize()`?

> +	/*
> +	 * This flag indicates that the object is being serialized
> +	 * in the current function call.
> +	 */
> +	bool serialization = false;

Why is this highlighted in a separate case?

> +
> +	if (!srlzd && luaL_getmetafield(L, node_idx, LUAL_SERIALIZE) != 0 &&
> +	    lua_isfunction(L, -1) != 0) {
> +		/* Delete nil after lua_rawget() above. */
> +		lua_replace(L, -2);
> +		srlzd = true;
> +		serialization = true;
> +		/*
> +		 * Copy object itself and call serialize function

Typo: s/serialize function/a serialize function/

> +		 * for it.
> +		 */
> +		lua_pushvalue(L, node_idx);
> +		if (lua_pcall(L, 1, 1, 0) != 0) {

Nit: It is better to use the name of the code: `LUA_OK`.

> +			diag_set(LuajitError, lua_tostring(L, -1));
> +			luaT_error(L);
> +		}
> +		/*
> +		 * Add the result of serialization to the
> +		 * serialized_objs map. Key is node (node_idx),

Typo: s/node/the node/

> +		 * value is the result of serialization (now on
> +		 * the top of stack).

Typo: s/top of stack/top of the stack/

> +		 */
> +		lua_pushvalue(L, node_idx);
> +		lua_pushvalue(L, -2);
> +		lua_rawset(L, serialized_objs_idx);
> +	}
> +
> +	if (srlzd) {
> +		if (lua_type(L, -1) == LUA_TTABLE) {
> +			/*
> +			 * Now we will process the new node - the
> +			 * result of serialization. It is
> +			 * necessary to take it into account in
> +			 * the anchor table.
> +			 */
> +			node_idx = lua_gettop(L);
> +		} else {
> +			lua_pop(L, 1);
> +			return;
> +		}
> +	} else {
> +		if (lua_isnil(L, -1) == 0)
> +			/*
> +			 * Pop an extra field thrown out
> +			 * by luaL_getmetafield(), it was
> +			 * not a function.
> +			 */
> +			lua_pop(L, 1);
> +		/* Delete nil after lua_rawget() above. */
> +		lua_pop(L, 1);
> +	}
> +	/*
> +	 * Take an entry from the anchor table about the current
> +	 * node.
> +	 */
> +	lua_pushvalue(L, node_idx);
> +	lua_rawget(L, anchortable_index);
> +	int newval = -1;
> +	if (lua_isnil(L, -1) == 1)
> +		/*
> +		 * The node has not yet been encountered, it is
> +		 * bypassed for the first time.
> +		 */
> +		newval = 0;
> +	else if (lua_toboolean(L, -1) == 0)
> +		 /* The node has already met once. */
> +		newval = 1;
> +	lua_pop(L, 1);
> +	if (newval != -1) {
> +		lua_pushvalue(L, node_idx);
> +		lua_pushboolean(L, newval);
> +		lua_rawset(L, anchortable_index);
> +	}
> +	if (srlzd && !serialization) {
> +		/*
> +		 * The node has already been serialized not in the
> +		 * current call, so there is no point in going
> +		 * further in depth and checking the leaves. Pop
> +		 * the processed sterilization result.

Typo: s/sterilization/serialization/

> +		 */
> +		lua_pop(L, 1);
> +		return;
> +	}
> +	if (newval)
> +		/* The node has already met twice or more, so

Nit: Impermissible comment codestyle.

> +		 * there is no point in going further in depth and
> +		 * checking the leaves.
> +		 */

Why do we need to process node if it has already met once?

> +		return;
> +
> +	/* Recursively process other table values. */
> +	lua_pushnil(L);
> +	while (lua_next(L, node_idx) != 0) {
> +		/* Find references on value. */

Nit: "Process value." looks enough.
Feel free to ignore.

> +		find_references_and_serialize(L, anchortable_index,
> +					      serialized_objs_idx);
> +		lua_pop(L, 1);
> +		/* Find references on key. */

Nit: "Process key." looks enough.
Feel free to ignore.

> +		find_references_and_serialize(L, anchortable_index,
> +					      serialized_objs_idx);
> +	}
> +	if (serialization)
> +		/*
> +		 * The loop above processed the nodes inside the

Typo: s/processed/processes/

> +		 * serialization result. Pop it, so as not to
> +		 * break the integrity of the loop of the previous
> +		 * call in recursion.
> +		 */
> +		lua_pop(L, 1);
> +
> +	return;
> +}
> +
> +enum try_serialize_ret_code
> +{
> +
> +	TRY_SERIALIZE_ERROR = -1,
> +	TRY_SERIALIZE_RES_TABLE_NOT = 0,
> +	TRY_SERIALIZE_RES_FROM_MAP,
> +	TRY_SERIALIZE_DEFAULT_TABLE,
> +};
> +
>  /**
>   * Call __serialize method of a table object by index
> - * if the former exists.
> + * if the former exists or pull result of serialization (if
> + * exists) from table with index @a serialized_objs_idx if it

Typo: s/from table/from the table/

> + * isn't 0.
>   *
>   * If __serialize does not exist then function does nothing
> - * and the function returns 1;
> + * and the function returns TRY_SERIALIZE_DEFAULT_TABLE;
>   *
>   * If __serialize exists, is a function (which doesn't
>   * raise any error) then a result of serialization
> - * replaces old value by the index and the function returns 0;
> + * replaces old value by the index and the function returns
> + * TRY_SERIALIZE_RES_TABLE_NOT or TRY_SERIALIZE_DEFAULT_TABLE;
>   *
>   * If the serialization is a hint string (like 'array' or 'map'),
>   * then field->type, field->size and field->compact
> - * are set if necessary and the function returns 0;
> + * are set if necessary and the function returns
> + * TRY_SERIALIZE_RES_TABLE_NOT;
>   *
> - * Otherwise it is an error, set diag and the funciton returns -1;
> + * Otherwise it is an error, set diag and the funciton returns
> + * TRY_SERIALIZE_ERROR;
>   *
>   * Return values:
> - * -1 - error occurs, diag is set, the top of guest stack
> - *      is undefined.
> - *  0 - __serialize field is available in the metatable,
> - *      the result value is put in the origin slot,
> - *      encoding is finished.
> - *  1 - __serialize field is not available in the metatable,
> - *      proceed with default table encoding.
> + * TRY_SERIALIZE_ERROR - error occurs, diag is set, the top of
> + * guest stack is undefined.
> + * TRY_SERIALIZE_RES_TABLE_NOT - __serialize field is available in
> + * the metatable, the result value is put in the origin slot,
> + * encoding is finished.

Sorry, I don't understand what is "NOT" part standing for?
TRY_SERIALIZE_OK looks better, IMHO.
Also, see comment about `lua_field_inspect_table()` function below.

> + * TRY_SERIALIZE_RES_FROM_MAP - __serialize field is available in

Nit: TRY_SERIALIZE_CACHED sounds better, in my opinion.
Feel free to ignore.

> + * the metatable, the result value is table from serialized
> + * objects map.
> + * TRY_SERIALIZE_DEFAULT_TABLE - __serialize field is not

Nit: TRY_SERIALIZE_DEFAULT sounds better, in my opinion.
Feel free to ignore.

> + * available in the metatable, proceed with default table
> + * encoding.
>   */
> +
>  static int
> -lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
> -			int idx, struct luaL_field *field)
> +lua_field_try_serialize(struct lua_State *L, int serialized_objs_idx,
> +			struct luaL_serializer *cfg, int idx,
> +			struct luaL_field *field)
>  {
>  	if (luaL_getmetafield(L, idx, LUAL_SERIALIZE) == 0)
> -		return 1;
> +		return TRY_SERIALIZE_DEFAULT_TABLE;

Can we use the default handler here?

>  	if (lua_isfunction(L, -1)) {
> -		/* copy object itself */
> -		lua_pushvalue(L, idx);
> -		if (lua_pcall(L, 1, 1, 0) != 0) {
> -			diag_set(LuajitError, lua_tostring(L, -1));
> -			return -1;
> +		if (serialized_objs_idx != 0) {
> +			/*
> +			 * Pop the __serialize function for the
> +			 * current node. It was already called in
> +			 * find_references_and_serialize().
> +			 */
> +			lua_pop(L, 1);
> +			/*
> +			 * Get the result of serialization from
> +			 * the map.
> +			 */
> +			lua_pushvalue(L, idx);
> +			lua_rawget(L, serialized_objs_idx);
> +			assert(lua_isnil(L, -1) == 0);

This assert fails, when a serialization function returns nil. See the
example above.

> +
> +			/*
> +			 * Replace the serialized node with a new
> +			 * result, if it is a table.
> +			 */
> +			if (lua_type(L, -1) == LUA_TTABLE) {
> +				lua_replace(L, idx);
> +				return TRY_SERIALIZE_RES_FROM_MAP;
> +			}
> +		} else {
> +			/*
> +			 * Serializer don't use map with
> +			 * serialized objects. Copy object itself
> +			 * and call __serialize for it.
> +			 */
> +			lua_pushvalue(L, idx);
> +			if (lua_pcall(L, 1, 1, 0) != 0) {

Nit: It is better to use the name of the code: `LUA_OK`.

> +				diag_set(LuajitError, lua_tostring(L, -1));
> +				return TRY_SERIALIZE_ERROR;
> +			}
>  		}
> -		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
> -			return -1;
> +		if (luaL_tofield(L, cfg, serialized_objs_idx, NULL, -1,
> +				 field) != 0)
> +			return TRY_SERIALIZE_ERROR;
>  		lua_replace(L, idx);
> -		return 0;
> +		return TRY_SERIALIZE_RES_TABLE_NOT;
>  	}
>  	if (!lua_isstring(L, -1)) {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		return -1;
> +		return TRY_SERIALIZE_ERROR;
>  	}
>  	const char *type = lua_tostring(L, -1);
>  	if (strcmp(type, "array") == 0 || strcmp(type, "seq") == 0 ||
> @@ -533,16 +755,17 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
>  			field->compact = true;
>  	} else {
>  		diag_set(LuajitError, "invalid " LUAL_SERIALIZE " value");
> -		return -1;
> +		return TRY_SERIALIZE_ERROR;
>  	}
>  	/* Remove value set by luaL_getmetafield. */
>  	lua_pop(L, 1);
> -	return 0;
> +	return TRY_SERIALIZE_RES_TABLE_NOT;
>  }
>  

Nit: see nothing bad to add a comment for this function as for
previous one.

>  static int
> -lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
> -			int idx, struct luaL_field *field)
> +lua_field_inspect_table(struct lua_State *L, int serialized_objs_idx,

Nit: I suggest s/serialized_objs_idx/cache_idx/g renaming.
In my opinion the meaning is the same, but its shorter.
Thoughts?
Feel free to ignore.

> +			struct luaL_serializer *cfg, int idx,
> +			struct luaL_field *field)
>  {
>  	assert(lua_type(L, idx) == LUA_TTABLE);
>  	uint32_t size = 0;
> @@ -550,14 +773,20 @@ lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
>  
>  	if (cfg->encode_load_metatables) {
>  		int top = lua_gettop(L);
> -		int res = lua_field_try_serialize(L, cfg, idx, field);
> -		if (res == -1)
> +		int res = lua_field_try_serialize(L, serialized_objs_idx, cfg,
> +						  idx, field);
> +		if (res == TRY_SERIALIZE_ERROR)
>  			return -1;
>  		assert(lua_gettop(L) == top);
>  		(void)top;
> -		if (res == 0)
> +		if (res == TRY_SERIALIZE_RES_TABLE_NOT ||
> +		    (res == TRY_SERIALIZE_RES_FROM_MAP &&
> +		    lua_type(L, -1) != LUA_TTABLE))
>  			return 0;
> -		/* Fallthrough with res == 1 */
> +		/*
> +		 * Fallthrough with res TRY_SERIALIZE_RES_FROM_MAP
> +		 * or TRY_SERIALIZE_DEFAULT_TABLE.
> +		 */

Looks like the current work of the function is inconsistent.
In one case an object serializing inside `lua_field_try_serialize()`,
but in others in the caller. As for me, it makes code tricky to understand.

I suggest to introduce the default serializer function (the chunk to
fallthrough), that is called inside `lua_field_try_serialize()`. It
allows to reduce the amount of returning codes(may be even use only
two: 0 on success and -1 on failure, as usual), doesn't it ?

Also, with this refactoring I suggest the following renaming:
s/lua_field_try_serialize/field_serialize/g

Rationale: "try" prefix is redundant as far as this function really
serialize object, or return standard status on failure. "lua" prefix
is conflicted with standard Lua-C API, so we can just omit it for
static functions.

Thoughts?

>  	}
>  
>  	field->type = MP_ARRAY;
> @@ -612,14 +841,14 @@ lua_field_tostring(struct lua_State *L, struct luaL_serializer *cfg, int idx,

<skipped>

>  int
>  luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
> -	     const struct serializer_opts *opts, int index,
> -	     struct luaL_field *field)
> +	     int serialized_objs_idx, const struct serializer_opts *opts,
> +	     int index, struct luaL_field *field)

Let's be consistent in namings. Please, choose `idx` or `index` notation.

>  {
>  	if (index < 0)
>  		index = lua_gettop(L) + index + 1;
> @@ -753,7 +982,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,

<snipped>

> @@ -789,9 +1019,12 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
>  			 */
>  			GCcdata *cd = cdataV(L->base + idx - 1);
>  			if (cd->ctypeid > CTID_CTYPEID)
> -				lua_field_inspect_ucdata(L, cfg, idx, field);
> +				lua_field_inspect_ucdata(L, cfg,
> +							 serialized_objs_idx, idx,

Line length is more than 80 symbols.

> +							 field);
>  		} else if (type == LUA_TUSERDATA) {
> -			lua_field_inspect_ucdata(L, cfg, idx, field);
> +			lua_field_inspect_ucdata(L, cfg, serialized_objs_idx, idx,

Line length is more than 80 symbols.

> +						 field);
>  		}
>  	}
>  
> diff --git a/src/lua/utils.h b/src/lua/utils.h
> index 4a164868b..cb1025495 100644
> --- a/src/lua/utils.h
> +++ b/src/lua/utils.h
> @@ -353,6 +353,44 @@ struct luaL_field {
>  	bool compact;                /* a flag used by YAML serializer */
>  };
>  
> +/**
> + * Find references to tables to look up self references in tables
> + * and serialize nodes if __serialize is function. Collect

Typo: s/function/a function/

> + * referenced nodes in a table with stack index @a
> + * anchortable_index and serialized nodes with results in a table
> + * with stack index @a serialized_objs_idx.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack with

Typo: s/of anchor table/of the anchor table/

> +          pairs: "node address" <-> "false (met once) or true
> +          (met more than once)".
> + * @param serialized_objs_idx Index of serialized objects map on
> + *        stack with pairs: "node address" <-> "result address".

It looks like the second table is superfluous: it is enough for us to
store "node address" -> "result object address". If we go through the
first time, then we will fill in the same table and indicate that
this element has already been serialized. If we serialize the object
a second/third/... time, then the serialization result will already
be in the table.

Thoughts?

> + */
> +void
> +find_references_and_serialize(struct lua_State *L, int anchortable_index,
> +			      int serialized_objs_idx);
> +
> +enum get_anchor_ret_code
> +{
> +	GET_ANCHOR_NO_REFS = 0,
> +	GET_ANCHOR_NAMED_NOT,
> +	GET_ANCHOR_ALIASED,
> +};
> +
> +/**
> + * Generate aliases and anchor numbers for self references.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack.

Typo: s/of table/of the table/
Typo: s/on stack/on the stack/

> + * @param[out] anchor_number Ptr to the number anchors in a
> +               structure being under dumping.

Please, mention here, when this value is incremented.

> + * @param[out] anchor Ptr to anchor string.
> + */

Please, add comment about return codes like it is done for
`lua_field_try_serialize()` in <lua/utils.c>

> +int
> +get_anchor(struct lua_State *L, int anchortable_index,
> +	   unsigned int *anchor_number, const char **anchor);
> +
>  /**
>   * @brief Convert a value from the Lua stack to a lua_field structure.
>   * This function is designed for use with Lua bindings and data
> @@ -387,6 +425,8 @@ struct luaL_field {
>   *
>   * @param L stack
>   * @param cfg configuration
> + * @param serialized_objs_idx Index of table with serialized

Typo: s/of table/of the table/

> +                              objects map.
>   * @param opts the Lua serializer additional options.
>   * @param index stack index
>   * @param field conversion result
> @@ -396,8 +436,8 @@ struct luaL_field {
>   */
>  int
>  luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
> -	     const struct serializer_opts *opts, int index,
> -	     struct luaL_field *field);
> +	     int serialized_objs_idx, const struct serializer_opts *opts,

Let's be consistent in namings. Please, choose `idx` or `index` notation.

> +	     int index, struct luaL_field *field);
>  
>  /**
>   * @brief Try to convert userdata/cdata values using defined conversion logic.
> @@ -405,18 +445,22 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
>   *
>   * @param L stack
>   * @param cfg configuration
> + * @param serialized_objs_idx Index of table with serialized

Typo: s/of table/of the table/

> +                              objects map.
>   * @param idx stack index
>   * @param field conversion result
>   */
>  void
> -luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
> -		  struct luaL_field *field);
> +luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
> +		  int serialized_objs_idx, int idx, struct luaL_field *field);
>  
>  /**
>   * @brief A wrapper for luaL_tofield() and luaL_convertfield() that
>   * tries to convert value or raise an error.
>   * @param L stack
>   * @param cfg configuration
> + * @param serialized_objs_idx Stack index of table with serialized

Typo: s/of table/of the table/

> +                              objects map.
>   * @param idx stack index
>   * @param field conversion result
>   * @sa lua_tofield()
> @@ -432,14 +476,14 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,

<snipped>

> diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
> new file mode 100644
> index 000000000..cb8c73de0
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.result

Side note: AFAIK, we try to avoid to create new result files.
But for these tests it looks like the best option.

> @@ -0,0 +1,211 @@

<snipped>

> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> new file mode 100644
> index 000000000..bfa046a16
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> @@ -0,0 +1,83 @@
> +test_run = require('test_run').new()
> +
> +--
> +-- gh-3228: Check that recursive structures are serialized
> +-- properly.
> +--
> +setmetatable({}, {__serialize = function(a) return a end})
> +setmetatable({}, {__serialize = function(a) return {a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a, a} end})

Looks like this case is the same as the previous one, we can drop
it.

> +setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, 1} end})
> +setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
> +setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
> +
> +b = {}
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
> +
> +a = {}
> +b = {a = a}
> +b[b] = b
> +a[b] = a
> +setmetatable(b, {__serialize = function(...) return {a} end})
> +a
> +
> +a = {}
> +b = {}
> +a[b] = b
> +setmetatable(b, {__serialize = function(...) return print end})
> +a
> +
> +a = {}
> +a[a] = a
> +recf = function(t) return setmetatable({}, {__serialize = recf}) end
> +setmetatable(a, {__serialize = recf}) return a
> +
> +--
> +-- __serialize function is pure, i.e. always returns identical
> +-- value for identical argument. Otherwise, the behavior is
> +-- undefined. So that, we ignore the side effects and just use the
> +-- value after the first serialization.

I see no reason to check undefined behaviour. I suppose we can just
drop these tests. Otherwise, we shouldn't check the value returned by
the function, just verify that the call doesn't lead to a crash.

Thoughts?

> +--
> +a = {}
> +b = {}
> +b[a] = a
> +show_a = true
> +test_run:cmd('setopt delimiter ";"')
> +serialize = function()
> +    show_a = not show_a
> +    if show_a then
> +        return "a"
> +    else
> +        return "b" end
> +end;
> +test_run:cmd('setopt delimiter ""');
> +setmetatable(a, {__serialize = serialize})
> +b
> +
> +test_run:cmd('setopt delimiter ";"')
> +function example()

Nit: swapped_serializer() as a name is more verbose to me.
Feel free to ignore.

> +    local a = {}
> +    local b = {}
> +    b[a] = a
> +    local reta
> +    local retb
> +    local function swap_ser(o)
> +        local newf
> +        local f = getmetatable(o).__serialize
> +        if f == reta then
> +            newf = retb
> +        else
> +            newf = reta
> +        end
> +        getmetatable(o).__serialize = newf
> +    end
> +    reta = function(o) swap_ser(o) return "a" end
> +    retb = function(o) swap_ser(o) return "b" end
> +    setmetatable(a, {__serialize = reta})
> +    return b
> +end;
> +test_run:cmd('setopt delimiter ""');
> +example()
> diff --git a/test/swim/swim.result b/test/swim/swim.result
> index bfc83c143..539131677 100644
> --- a/test/swim/swim.result
> +++ b/test/swim/swim.result
> @@ -1322,16 +1322,13 @@ m_list

<snipped>

> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 38e999870..67a9762bc 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c

<snipped>

> diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
> index 9c3a4a646..249603ba3 100644
> --- a/third_party/lua-yaml/lyaml.cc
> +++ b/third_party/lua-yaml/lyaml.cc
> @@ -79,6 +79,7 @@ struct lua_yaml_dumper {
>     lua_State *L;
>     struct luaL_serializer *cfg;
>     int anchortable_index;
> +   int serialized_objs_idx;

Let's be consistent in namings. Please, choose `idx` or `index` notation.

>     unsigned int anchor_number;

<snipped>

> 

[1]: https://www.tarantool.io/en/doc/latest/reference/reference_lua/yaml/

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-03-11  5:49 [Tarantool-patches] [PATCH] serializer: serialize recursive structures Roman Khabibov via Tarantool-patches
  2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
  2021-04-13 15:54 ` Sergey Bronnikov via Tarantool-patches
@ 2021-04-21 12:34 ` Sergey Bronnikov via Tarantool-patches
  2 siblings, 0 replies; 9+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2021-04-21 12:34 UTC (permalink / raw)
  To: tarantool-patches, roman Khabibov

Hello,

seems patch series below is a bit outdated, so I'll review and quote 
actual version in a branch.

 >    @TarantoolBot
 >    __serialize parameter
 >    If __serialize parameter is initialized as a function then this
 >    function should be pure. A pure function is a function which
 >    returns identical values for identical arguments (no variation
 >    with local static variables, non-local variables, mutable
 >    reference arguments or input streams). Otherwise, the behavior
 >    is undefined.

1. __serialize is a method, not parameter

--- /dev/null
+++ b/changelogs/unreleased/serializer-bug.md
@@ -0,0 +1,4 @@
+## bugfix/core
+
+* Recursive structures appeared after __serialize call don't lead
+* to segfault anymore in lua and yaml serializers (gh-5392).

2. seems issue number is wrong, proper one is #3228

3. I have reviewed a code coverage and propose to cover uncovered 
branches in lua_field_try_serialize(), see [1]

I believe it would be enough to add more examples to proposed tests to 
enhance coverage.


1. https://coveralls.io/builds/38864750/source?filename=src%2Flua%2Futils.c


On 11.03.2021 08:49, Roman Khabibov via Tarantool-patches wrote:
> Fix bug with bus error during serializing of recursive
> structures.
>
> Closes #3228
> ---
>
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3228-visited-set
> Issue: https://github.com/tarantool/tarantool/issues/3228
>
>   src/box/lua/call.c                            |   6 +-
>   src/box/lua/execute.c                         |   2 +-
>   src/box/lua/serialize_lua.c                   |  96 ++------
>   src/box/lua/tuple.c                           |   2 +-
>   src/box/sql/func.c                            |   2 +-
>   src/lua/msgpack.c                             |  10 +-
>   src/lua/pickle.c                              |   2 +-
>   src/lua/utils.c                               | 226 ++++++++++++++++--
>   src/lua/utils.h                               |  43 +++-
>   ...-3228-serializer-look-for-recursion.result |  67 ++++++
>   ...228-serializer-look-for-recursion.test.lua |  17 ++
>   test/swim/swim.result                         |  18 +-
>   third_party/lua-cjson/lua_cjson.c             |   4 +-
>   third_party/lua-yaml/lyaml.cc                 |  88 +++----
>   14 files changed, 390 insertions(+), 193 deletions(-)
>   create mode 100644 test/app/gh-3228-serializer-look-for-recursion.result
>   create mode 100644 test/app/gh-3228-serializer-look-for-recursion.test.lua
>
> diff --git a/src/box/lua/call.c b/src/box/lua/call.c
> index 0315e720c..e4907a3f7 100644
> --- a/src/box/lua/call.c
> +++ b/src/box/lua/call.c
> @@ -193,7 +193,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
>   		 */
>   		for (int i = 1; i <= nrets; ++i) {
>   			struct luaL_field field;
> -			if (luaL_tofield(L, cfg, NULL, i, &field) < 0)
> +			if (luaL_tofield(L, cfg, 0, NULL, i, &field) < 0)
>   				return luaT_error(L);
>   			struct tuple *tuple;
>   			if (field.type == MP_EXT &&
> @@ -222,7 +222,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
>   	 * Inspect the first result
>   	 */
>   	struct luaL_field root;
> -	if (luaL_tofield(L, cfg, NULL, 1, &root) < 0)
> +	if (luaL_tofield(L, cfg, 0, NULL, 1, &root) < 0)
>   		return luaT_error(L);
>   	struct tuple *tuple;
>   	if (root.type == MP_EXT && (tuple = luaT_istuple(L, 1)) != NULL) {
> @@ -252,7 +252,7 @@ luamp_encode_call_16(lua_State *L, struct luaL_serializer *cfg,
>   	for (uint32_t t = 1; t <= root.size; t++) {
>   		lua_rawgeti(L, 1, t);
>   		struct luaL_field field;
> -		if (luaL_tofield(L, cfg, NULL, -1, &field) < 0)
> +		if (luaL_tofield(L, cfg, 0, NULL, -1, &field) < 0)
>   			return luaT_error(L);
>   		if (field.type == MP_EXT && (tuple = luaT_istuple(L, -1))) {
>   			tuple_to_mpstream(tuple, stream);
> diff --git a/src/box/lua/execute.c b/src/box/lua/execute.c
> index 926a0a61c..76d271f5c 100644
> --- a/src/box/lua/execute.c
> +++ b/src/box/lua/execute.c
> @@ -328,7 +328,7 @@ lua_sql_bind_decode(struct lua_State *L, struct sql_bind *bind, int idx, int i)
>   		bind->name = NULL;
>   		bind->name_len = 0;
>   	}
> -	if (luaL_tofield(L, luaL_msgpack_default, NULL, -1, &field) < 0)
> +	if (luaL_tofield(L, luaL_msgpack_default, 0, NULL, -1, &field) < 0)
>   		return -1;
>   	switch (field.type) {
>   	case MP_UINT:
> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
> index caa08a60f..ab612dfc2 100644
> --- a/src/box/lua/serialize_lua.c
> +++ b/src/box/lua/serialize_lua.c
> @@ -215,7 +215,7 @@ trace_node(struct lua_dumper *d)
>   	struct luaL_field field;
>   
>   	memset(&field, 0, sizeof(field));
> -	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &field);
> +	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &field);
>   
>   	if (field.type < lengthof(mp_type_names)) {
>   		if (field.type == MP_EXT) {
> @@ -234,7 +234,7 @@ trace_node(struct lua_dumper *d)
>   
>   	memset(&field, 0, sizeof(field));
>   
> -	luaL_checkfield(d->L, d->cfg, top, &field);
> +	luaL_checkfield(d->L, d->cfg, 0, top, &field);
>   	say_info("serializer-trace: node    :\tfield type %s (%d)",
>   		 type_str, field.type);
>   }
> @@ -339,41 +339,16 @@ emit_node(struct lua_dumper *d, struct node *nd, int indent,
>   static const char *
>   get_lua_anchor(struct lua_dumper *d)
>   {
> -	const char *s = "";
> -
> -	lua_pushvalue(d->L, -1);
> -	lua_rawget(d->L, d->anchortable_index);
> -	if (!lua_toboolean(d->L, -1)) {
> -		lua_pop(d->L, 1);
> -		return NULL;
> -	}
> -
> -	if (lua_isboolean(d->L, -1)) {
> -		/*
> -		 * This element is referenced more
> -		 * than once but has not been named.
> -		 */
> -		char buf[32];
> -		snprintf(buf, sizeof(buf), "%u", d->anchor_number++);
> -		lua_pop(d->L, 1);
> -		lua_pushvalue(d->L, -1);
> -		lua_pushstring(d->L, buf);
> -		s = lua_tostring(d->L, -1);
> -		lua_rawset(d->L, d->anchortable_index);
> -		trace_anchor(s, false);
> -	} else {
> -		/*
> -		 * An aliased element.
> -		 *
> -		 * FIXME: Need an example to use.
> -		 *
> -		 * const char *str = lua_tostring(d->L, -1);
> -		 */
> -		const char *str = lua_tostring(d->L, -1);
> -		trace_anchor(str, true);
> -		lua_pop(d->L, 1);
> +	const char *anchor = NULL;
> +	int code = get_anchor(d->L, d->anchortable_index, &d->anchor_number,
> +			      &anchor);
> +	if (code == 1) {
> +		trace_anchor(anchor, false);
> +	} else if (code == 2) {
> +		trace_anchor(anchor, true);
> +		return "";
>   	}
> -	return s;
> +	return anchor;
>   }
>   
>   static void
> @@ -783,7 +758,7 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
>   		return -1;
>   
>   	memset(field, 0, sizeof(*field));
> -	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), field);
> +	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), field);
>   
>   	switch (field->type) {
>   	case MP_NIL:
> @@ -881,49 +856,6 @@ dump_node(struct lua_dumper *d, struct node *nd, int indent)
>   	return emit_node(d, nd, indent, str, len);
>   }
>   
> -/**
> - * Find references to tables, we use it
> - * to find self references in tables.
> - */
> -static void
> -find_references(struct lua_dumper *d)
> -{
> -	int newval;
> -
> -	if (lua_type(d->L, -1) != LUA_TTABLE)
> -		return;
> -
> -	/* Copy of a table for self refs */
> -	lua_pushvalue(d->L, -1);
> -	lua_rawget(d->L, d->anchortable_index);
> -	if (lua_isnil(d->L, -1))
> -		newval = 0;
> -	else if (!lua_toboolean(d->L, -1))
> -		newval = 1;
> -	else
> -		newval = -1;
> -	lua_pop(d->L, 1);
> -
> -	if (newval != -1) {
> -		lua_pushvalue(d->L, -1);
> -		lua_pushboolean(d->L, newval);
> -		lua_rawset(d->L, d->anchortable_index);
> -	}
> -
> -	if (newval != 0)
> -		return;
> -
> -	/*
> -	 * Other values and keys in the table
> -	 */
> -	lua_pushnil(d->L);
> -	while (lua_next(d->L, -2) != 0) {
> -		find_references(d);
> -		lua_pop(d->L, 1);
> -		find_references(d);
> -	}
> -}
> -
>   /**
>    * Dump recursively from the root node.
>    */
> @@ -935,7 +867,7 @@ dump_root(struct lua_dumper *d)
>   	};
>   	int ret;
>   
> -	luaL_checkfield(d->L, d->cfg, lua_gettop(d->L), &nd.field);
> +	luaL_checkfield(d->L, d->cfg, 0, lua_gettop(d->L), &nd.field);
>   
>   	if (nd.field.type != MP_ARRAY || nd.field.size != 1) {
>   		d->err = EINVAL;
> @@ -984,7 +916,7 @@ lua_encode(lua_State *L, struct luaL_serializer *serializer,
>   
>   	/* Push copy of arg we're processing */
>   	lua_pushvalue(L, 1);
> -	find_references(&dumper);
> +	find_references(dumper.L, dumper.anchortable_index, "before");
>   
>   	if (dump_root(&dumper) != 0)
>   		goto out;
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 3e6f043b4..4d417ec34 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -411,7 +411,7 @@ luamp_convert_key(struct lua_State *L, struct luaL_serializer *cfg,
>   		return tuple_to_mpstream(tuple, stream);
>   
>   	struct luaL_field field;
> -	if (luaL_tofield(L, cfg, NULL, index, &field) < 0)
> +	if (luaL_tofield(L, cfg, 0, NULL, index, &field) < 0)
>   		luaT_error(L);
>   	if (field.type == MP_ARRAY) {
>   		lua_pushvalue(L, index);
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index f15d27051..d4a7064f0 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -262,7 +262,7 @@ port_lua_get_vdbemem(struct port *base, uint32_t *size)
>   		return NULL;
>   	for (int i = 0; i < argc; i++) {
>   		struct luaL_field field;
> -		if (luaL_tofield(L, luaL_msgpack_default,
> +		if (luaL_tofield(L, luaL_msgpack_default, 0,
>   				 NULL, -1 - i, &field) < 0) {
>   			goto error;
>   		}
> diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
> index 24b0d2ccd..412736572 100644
> --- a/src/lua/msgpack.c
> +++ b/src/lua/msgpack.c
> @@ -156,11 +156,11 @@ restart: /* used by MP_EXT of unidentified subtype */
>   		lua_pushnil(L);  /* first key */
>   		while (lua_next(L, top) != 0) {
>   			lua_pushvalue(L, -2); /* push a copy of key to top */
> -			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
> +			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
>   				return luaT_error(L);
>   			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
>   			lua_pop(L, 1); /* pop a copy of key */
> -			if (luaL_tofield(L, cfg, opts, lua_gettop(L), field) < 0)
> +			if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), field) < 0)
>   				return luaT_error(L);
>   			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
>   			lua_pop(L, 1); /* pop value */
> @@ -181,7 +181,7 @@ restart: /* used by MP_EXT of unidentified subtype */
>   		mpstream_encode_array(stream, size);
>   		for (uint32_t i = 0; i < size; i++) {
>   			lua_rawgeti(L, top, i + 1);
> -			if (luaL_tofield(L, cfg, opts, top + 1, field) < 0)
> +			if (luaL_tofield(L, cfg, 0, opts, top + 1, field) < 0)
>   				return luaT_error(L);
>   			luamp_encode_r(L, cfg, opts, stream, field, level + 1);
>   			lua_pop(L, 1);
> @@ -204,7 +204,7 @@ restart: /* used by MP_EXT of unidentified subtype */
>   			if (type != MP_EXT)
>   				return type; /* Value has been packed by the trigger */
>   			/* Try to convert value to serializable type */
> -			luaL_convertfield(L, cfg, top, field);
> +			luaL_convertfield(L, cfg, 0, top, field);
>   			/* handled by luaL_convertfield */
>   			assert(field->type != MP_EXT);
>   			assert(lua_gettop(L) == top);
> @@ -229,7 +229,7 @@ luamp_encode(struct lua_State *L, struct luaL_serializer *cfg,
>   	}
>   
>   	struct luaL_field field;
> -	if (luaL_tofield(L, cfg, opts, lua_gettop(L), &field) < 0)
> +	if (luaL_tofield(L, cfg, 0, opts, lua_gettop(L), &field) < 0)
>   		return luaT_error(L);
>   	enum mp_type top_type = luamp_encode_r(L, cfg, opts, stream, &field, 0);
>   
> diff --git a/src/lua/pickle.c b/src/lua/pickle.c
> index 65208b5b3..86f0f77e8 100644
> --- a/src/lua/pickle.c
> +++ b/src/lua/pickle.c
> @@ -80,7 +80,7 @@ lbox_pack(struct lua_State *L)
>   		if (i > nargs)
>   			luaL_error(L, "pickle.pack: argument count does not match "
>   				   "the format");
> -		luaL_checkfield(L, luaL_msgpack_default, i, &field);
> +		luaL_checkfield(L, luaL_msgpack_default, 0, i, &field);
>   		switch (*format) {
>   		case 'B':
>   		case 'b':
> diff --git a/src/lua/utils.c b/src/lua/utils.c
> index b5a6ca5b7..64404e3fc 100644
> --- a/src/lua/utils.c
> +++ b/src/lua/utils.c
> @@ -439,9 +439,54 @@ lua_gettable_wrapper(lua_State *L)
>   	return 1;
>   }
>   
> +/**
> + * Check if node with @a idx serialized.
> + */
> +static int
> +is_serialized(struct lua_State *L, int anchortable_index, int idx)
> +{
> +	if (anchortable_index == 0)
> +		return 0;
> +	lua_pushvalue(L, idx);
> +	lua_rawget(L, anchortable_index);
> +	int is_serialized = 0;
> +	if (lua_isnil(L, -1) == 0) {
> +		lua_pushstring(L, "serialized");
> +		lua_rawget(L, -2);
> +		is_serialized = lua_toboolean(L, -1);
> +		lua_pop(L, 1);
> +	}
> +	lua_pop(L, 1);
> +	return is_serialized;
> +}
> +
> +/**
> + * Mark node with @a idx as serialized.
> + */
> +static void
> +mark_as_serialized(struct lua_State *L, int anchortable_index, int idx)
> +{
> +	if (anchortable_index == 0)
> +		return;
> +	/* Push copy of table. */
> +	lua_pushvalue(L, idx);
> +	lua_pushvalue(L, idx);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		lua_newtable(L);
> +	}
> +	int flags_index = lua_gettop(L);
> +	lua_pushstring(L, "serialized");
> +	lua_pushboolean(L, true);
> +	lua_rawset(L, flags_index);
> +	lua_rawset(L, anchortable_index);
> +}
> +
>   static void
>   lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
> -			int idx, struct luaL_field *field)
> +			 int anchortable_index, int idx,
> +			 struct luaL_field *field)
>   {
>   	if (!cfg->encode_load_metatables)
>   		return;
> @@ -463,12 +508,148 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
>   		lua_pcall(L, 1, 1, 0);
>   		/* replace obj with the unpacked value */
>   		lua_replace(L, idx);
> -		if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
> +		if (luaL_tofield(L, cfg, anchortable_index, NULL, idx,
> +			         field) < 0)
>   			luaT_error(L);
>   	} /* else ignore lua_gettable exceptions */
>   	lua_settop(L, top); /* remove temporary objects */
>   }
>   
> +void
> +find_references(struct lua_State *L, int anchortable_index, const char *mode)
> +{
> +	int type = lua_type(L, -1);
> +	if (type != LUA_TTABLE || anchortable_index == 0)
> +		return;
> +
> +	int node_index = lua_gettop(L);
> +	lua_pushvalue(L, node_index);
> +
> +	/* Get value with flags from anchor table. */
> +	lua_pushvalue(L, node_index);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		lua_newtable(L);
> +	}
> +	int flags_index = lua_gettop(L);
> +
> +	/* Get and increment counter. */
> +	lua_pushstring(L, mode);
> +	lua_rawget(L, flags_index);
> +
> +	int new_count = 0;
> +	if (lua_isnil(L, -1) == 1) {
> +		new_count = 1;
> +		lua_pop(L, 1);
> +	} else {
> +		lua_Integer number = lua_tointeger(L, -1);
> +		lua_pop(L, 1);
> +		if (number < 2)
> +			new_count = ++number;
> +		else
> +			new_count = -1;
> +	}
> +
> +	/* Push new count to value with flags. */
> +	if (new_count == -1) {
> +		lua_pop(L, 2);
> +		assert(node_index == lua_gettop(L));
> +		return;
> +	}
> +	lua_pushstring(L, mode);
> +	lua_pushinteger(L, new_count);
> +	lua_rawset(L, flags_index);
> +
> +	/* Check if node is already serialized. */
> +	bool already_serialized = false;
> +	if (strcmp(mode, "after") == 0) {
> +		lua_pushstring(L, "serialized");
> +		lua_rawget(L, flags_index);
> +		already_serialized = lua_toboolean(L, -1);
> +		lua_pop(L, 1);
> +	}
> +	lua_rawset(L, anchortable_index);
> +	assert(node_index == lua_gettop(L));
> +	if (already_serialized == true)
> +		return;
> +
> +	/* Recursively process other table values. */
> +	lua_pushnil(L);
> +	while (lua_next(L, -2) != 0) {
> +		/* Find references on value. */
> +		find_references(L, anchortable_index, mode);
> +		lua_pop(L, 1);
> +		/* Find references on key. */
> +		find_references(L, anchortable_index, mode);
> +	}
> +}
> +
> +int
> +get_anchor(struct lua_State *L, int anchortable_index,
> +	   unsigned int *anchor_number, const char **anchor)
> +{
> +	lua_pushvalue(L, -1);
> +	lua_rawget(L, anchortable_index);
> +	if (lua_isnil(L, -1) == 1) {
> +		lua_pop(L, 1);
> +		*anchor = NULL;
> +		return 0;
> +	}
> +
> +	lua_pushstring(L, "before");
> +	lua_rawget(L, -2);
> +	const char *str = NULL;
> +	if (lua_type(L, -1) == LUA_TSTRING)
> +		str = lua_tostring(L, -1);
> +	lua_Integer number_before = lua_tointeger(L, -1);
> +	lua_pop(L, 1);
> +	lua_Integer number_after = 0;
> +	if (str == NULL) {
> +		lua_pushstring(L, "after");
> +		lua_rawget(L, -2);
> +		if (lua_type(L, -1) == LUA_TSTRING)
> +			str = lua_tostring(L, -1);
> +		number_after = lua_tointeger(L, -1);
> +		lua_pop(L, 1);
> +	}
> +
> +	*anchor = "";
> +	int ret = 0;
> +	if ((number_before > 1 || number_after > 1) && str == NULL) {
> +		/*
> +		 * This element is referenced more than once but
> +		 * has not been named.
> +		 */
> +		char buf[32];
> +		snprintf(buf, sizeof(buf), "%u", (*anchor_number)++);
> +		lua_pop(L, 1);
> +		lua_pushvalue(L, -1);
> +
> +		lua_pushvalue(L, -1);
> +		lua_rawget(L, anchortable_index);
> +		if (number_before > 1)
> +			lua_pushstring(L, "before");
> +		else
> +			lua_pushstring(L, "after");
> +		lua_pushstring(L, buf);
> +		*anchor = lua_tostring(L, -1);
> +		lua_rawset(L, -3);
> +		lua_rawset(L, anchortable_index);
> +		ret = 1;
> +	} else if (str != NULL) {
> +		/* This is an aliased element. */
> +		lua_pop(L, 1);
> +		*anchor = str;
> +		ret = 2;
> +	} else {
> +		lua_pop(L, 1);
> +		*anchor = NULL;
> +		ret = 0;
> +	}
> +	return ret;
> +}
> +
>   /**
>    * Call __serialize method of a table object by index
>    * if the former exists.
> @@ -496,11 +677,13 @@ lua_field_inspect_ucdata(struct lua_State *L, struct luaL_serializer *cfg,
>    *      proceed with default table encoding.
>    */
>   static int
> -lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
> -			int idx, struct luaL_field *field)
> +lua_field_try_serialize(struct lua_State *L, int anchortable_index,
> +			struct luaL_serializer *cfg, int idx,
> +			struct luaL_field *field)
>   {
>   	if (luaL_getmetafield(L, idx, LUAL_SERIALIZE) == 0)
>   		return 1;
> +	mark_as_serialized(L, anchortable_index, idx);
>   	if (lua_isfunction(L, -1)) {
>   		/* copy object itself */
>   		lua_pushvalue(L, idx);
> @@ -508,7 +691,9 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
>   			diag_set(LuajitError, lua_tostring(L, -1));
>   			return -1;
>   		}
> -		if (luaL_tofield(L, cfg, NULL, -1, field) != 0)
> +		find_references(L, anchortable_index, "after");
> +		if (luaL_tofield(L, cfg, anchortable_index, NULL, -1,
> +				 field) != 0)
>   			return -1;
>   		lua_replace(L, idx);
>   		return 0;
> @@ -541,16 +726,19 @@ lua_field_try_serialize(struct lua_State *L, struct luaL_serializer *cfg,
>   }
>   
>   static int
> -lua_field_inspect_table(struct lua_State *L, struct luaL_serializer *cfg,
> -			int idx, struct luaL_field *field)
> +lua_field_inspect_table(struct lua_State *L, int anchortable_index,
> +			struct luaL_serializer *cfg, int idx,
> +			struct luaL_field *field)
>   {
>   	assert(lua_type(L, idx) == LUA_TTABLE);
>   	uint32_t size = 0;
>   	uint32_t max = 0;
>   
> -	if (cfg->encode_load_metatables) {
> +	if (cfg->encode_load_metatables &&
> +	    is_serialized(L, anchortable_index, idx) != 1) {
>   		int top = lua_gettop(L);
> -		int res = lua_field_try_serialize(L, cfg, idx, field);
> +		int res = lua_field_try_serialize(L, anchortable_index, cfg,
> +						  idx, field);
>   		if (res == -1)
>   			return -1;
>   		assert(lua_gettop(L) == top);
> @@ -612,14 +800,14 @@ lua_field_tostring(struct lua_State *L, struct luaL_serializer *cfg, int idx,
>   	lua_call(L, 1, 1);
>   	lua_replace(L, idx);
>   	lua_settop(L, top);
> -	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
> +	if (luaL_tofield(L, cfg, 0, NULL, idx, field) < 0)
>   		luaT_error(L);
>   }
>   
>   int
>   luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
> -	     const struct serializer_opts *opts, int index,
> -	     struct luaL_field *field)
> +	     int anchortable_index, const struct serializer_opts *opts,
> +	     int index, struct luaL_field *field)
>   {
>   	if (index < 0)
>   		index = lua_gettop(L) + index + 1;
> @@ -753,7 +941,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
>   	case LUA_TTABLE:
>   	{
>   		field->compact = false;
> -		return lua_field_inspect_table(L, cfg, index, field);
> +		return lua_field_inspect_table(L, anchortable_index, cfg,
> +					       index, field);
>   	}
>   	case LUA_TLIGHTUSERDATA:
>   	case LUA_TUSERDATA:
> @@ -773,8 +962,8 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
>   }
>   
>   void
> -luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
> -		  struct luaL_field *field)
> +luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
> +		  int anchortable_index, int idx, struct luaL_field *field)
>   {
>   	if (idx < 0)
>   		idx = lua_gettop(L) + idx + 1;
> @@ -789,9 +978,12 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
>   			 */
>   			GCcdata *cd = cdataV(L->base + idx - 1);
>   			if (cd->ctypeid > CTID_CTYPEID)
> -				lua_field_inspect_ucdata(L, cfg, idx, field);
> +				lua_field_inspect_ucdata(L, cfg,
> +							 anchortable_index, idx,
> +							 field);
>   		} else if (type == LUA_TUSERDATA) {
> -			lua_field_inspect_ucdata(L, cfg, idx, field);
> +			lua_field_inspect_ucdata(L, cfg, anchortable_index, idx,
> +						 field);
>   		}
>   	}
>   
> diff --git a/src/lua/utils.h b/src/lua/utils.h
> index 37531676d..ef9c70dfb 100644
> --- a/src/lua/utils.h
> +++ b/src/lua/utils.h
> @@ -354,6 +354,29 @@ struct luaL_field {
>   	bool compact;                /* a flag used by YAML serializer */
>   };
>   
> +/**
> + * Find references to tables to look up self references in tables.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack.
> + * @param mode When the function is called before or after dump
> +               function.
> + */
> +void
> +find_references(struct lua_State *L, int anchortable_index, const char *mode);
> +
> +/**
> + * Generate aliases and anchor numbers for self references.
> + *
> + * @param L Lua stack.
> + * @param anchortable_index Index of anchor table on stack.
> + * @param[out] anchor_number Ptr to the number anchors.
> + * @param[out] anchor Ptr to anchor string.
> + */
> +int
> +get_anchor(struct lua_State *L, int anchortable_index,
> +	   unsigned int *anchor_number, const char **anchor);
> +
>   /**
>    * @brief Convert a value from the Lua stack to a lua_field structure.
>    * This function is designed for use with Lua bindings and data
> @@ -388,6 +411,7 @@ struct luaL_field {
>    *
>    * @param L stack
>    * @param cfg configuration
> + * @param anchortable_index Index of anchor table on stack.
>    * @param opts the Lua serializer additional options.
>    * @param index stack index
>    * @param field conversion result
> @@ -397,8 +421,8 @@ struct luaL_field {
>    */
>   int
>   luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
> -	     const struct serializer_opts *opts, int index,
> -	     struct luaL_field *field);
> +	     int anchortable_index, const struct serializer_opts *opts,
> +	     int index, struct luaL_field *field);
>   
>   /**
>    * @brief Try to convert userdata/cdata values using defined conversion logic.
> @@ -406,18 +430,21 @@ luaL_tofield(struct lua_State *L, struct luaL_serializer *cfg,
>    *
>    * @param L stack
>    * @param cfg configuration
> + * @param anchortable_index Index of anchor table on stack.
>    * @param idx stack index
>    * @param field conversion result
>    */
>   void
> -luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
> -		  struct luaL_field *field);
> +luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg,
> +		  int anchortable_index, int idx, struct luaL_field *field);
>   
>   /**
>    * @brief A wrapper for luaL_tofield() and luaL_convertfield() that
>    * tries to convert value or raise an error.
>    * @param L stack
>    * @param cfg configuration
> + * @param anchortable_index Stack index of table with node visit
> +                            and serialization info.
>    * @param idx stack index
>    * @param field conversion result
>    * @sa lua_tofield()
> @@ -433,14 +460,14 @@ luaL_convertfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
>    * (tostring) -> (nil) -> exception
>    */
>   static inline void
> -luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg, int idx,
> -		struct luaL_field *field)
> +luaL_checkfield(struct lua_State *L, struct luaL_serializer *cfg,
> +		int anchortable_index, int idx, struct luaL_field *field)
>   {
> -	if (luaL_tofield(L, cfg, NULL, idx, field) < 0)
> +	if (luaL_tofield(L, cfg, anchortable_index, NULL, idx, field) < 0)
>   		luaT_error(L);
>   	if (field->type != MP_EXT || field->ext_type != MP_UNKNOWN_EXTENSION)
>   		return;
> -	luaL_convertfield(L, cfg, idx, field);
> +	luaL_convertfield(L, cfg, anchortable_index, idx, field);
>   }
>   
>   void
> diff --git a/test/app/gh-3228-serializer-look-for-recursion.result b/test/app/gh-3228-serializer-look-for-recursion.result
> new file mode 100644
> index 000000000..773470c8e
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.result
> @@ -0,0 +1,67 @@
> +-- test-run result file version 2
> +test_run = require('test_run').new()
> + | ---
> + | ...
> +
> +--
> +-- gh-3228: Check the error message in the case of a __serialize
> +-- function generating infinite recursion.
> +--
> +setmetatable({}, {__serialize = function(a) return a end})
> + | ---
> + | - []
> + | ...
> +setmetatable({}, {__serialize = function(a) return {a} end})
> + | ---
> + | - - []
> + | ...
> +setmetatable({}, {__serialize = function(a) return {a, a} end})
> + | ---
> + | - - &0 []
> + |   - *0
> + | ...
> +setmetatable({}, {__serialize = function(a) return {a, a, a} end})
> + | ---
> + | - - &0 []
> + |   - *0
> + |   - *0
> + | ...
> +setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
> + | ---
> + | - - - &0 []
> + |     - *0
> + |   - *0
> + | ...
> +setmetatable({}, {__serialize = function(a) return {a, 1} end})
> + | ---
> + | - - []
> + |   - 1
> + | ...
> +setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
> + | ---
> + | - - - - - []
> + | ...
> +setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
> + | ---
> + | - - - - - 1
> + | ...
> +b = {}
> + | ---
> + | ...
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
> + | ---
> + | - b_2: &0 []
> + |   a_2: &1
> + |   - *0
> + |   a_1: *1
> + |   b_1: *0
> + | ...
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
> + | ---
> + | - b: &0 []
> + |   a_2:
> + |   - &1
> + |     - *0
> + |   - *0
> + |   a_1: *1
> + | ...
> diff --git a/test/app/gh-3228-serializer-look-for-recursion.test.lua b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> new file mode 100644
> index 000000000..1c9b0375f
> --- /dev/null
> +++ b/test/app/gh-3228-serializer-look-for-recursion.test.lua
> @@ -0,0 +1,17 @@
> +test_run = require('test_run').new()
> +
> +--
> +-- gh-3228: Check the error message in the case of a __serialize
> +-- function generating infinite recursion.
> +--
> +setmetatable({}, {__serialize = function(a) return a end})
> +setmetatable({}, {__serialize = function(a) return {a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, a, a} end})
> +setmetatable({}, {__serialize = function(a) return {{a, a}, a} end})
> +setmetatable({}, {__serialize = function(a) return {a, 1} end})
> +setmetatable({}, {__serialize = function(a) return {{{{a}}}} end})
> +setmetatable({}, {__serialize = function(a) return {{{{1}}}} end})
> +b = {}
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = a, b_1 = b, b_2 = b} end})
> +setmetatable({b}, {__serialize = function(a) return {a_1 = a, a_2 = {a, b}, b = b} end})
> diff --git a/test/swim/swim.result b/test/swim/swim.result
> index bfc83c143..539131677 100644
> --- a/test/swim/swim.result
> +++ b/test/swim/swim.result
> @@ -1322,16 +1322,13 @@ m_list
>       incarnation: cdata {generation = 0ULL, version = 1ULL}
>       uuid: 00000000-0000-1000-8000-000000000002
>       payload_size: 0
> -  - uri: 127.0.0.1:<port>
> -    status: alive
> -    incarnation: cdata {generation = 0ULL, version = 2ULL}
> -    uuid: 00000000-0000-1000-8000-000000000001
> -    payload_size: 8
> -  - uri: 127.0.0.1:<port>
> +  - &0
> +    uri: 127.0.0.1:<port>
>       status: alive
>       incarnation: cdata {generation = 0ULL, version = 2ULL}
>       uuid: 00000000-0000-1000-8000-000000000001
>       payload_size: 8
> +  - *0
>   ...
>   e_list
>   ---
> @@ -1374,16 +1371,13 @@ fiber.sleep(0)
>   -- Two events - status update to 'left', and 'drop'.
>   m_list
>   ---
> -- - uri: 127.0.0.1:<port>
> -    status: left
> -    incarnation: cdata {generation = 0ULL, version = 1ULL}
> -    uuid: 00000000-0000-1000-8000-000000000002
> -    payload_size: 0
> -  - uri: 127.0.0.1:<port>
> +- - &0
> +    uri: 127.0.0.1:<port>
>       status: left
>       incarnation: cdata {generation = 0ULL, version = 1ULL}
>       uuid: 00000000-0000-1000-8000-000000000002
>       payload_size: 0
> +  - *0
>   ...
>   e_list
>   ---
> diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
> index 38e999870..67a9762bc 100644
> --- a/third_party/lua-cjson/lua_cjson.c
> +++ b/third_party/lua-cjson/lua_cjson.c
> @@ -354,7 +354,7 @@ static void json_append_object(lua_State *l, struct luaL_serializer *cfg,
>               comma = 1;
>   
>       struct luaL_field field;
> -    luaL_checkfield(l, cfg, -2, &field);
> +    luaL_checkfield(l, cfg, 0, -2, &field);
>       if (field.type == MP_UINT) {
>           strbuf_append_char(json, '"');
>           json_append_uint(cfg, json, field.ival);
> @@ -384,7 +384,7 @@ static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
>                                int current_depth, strbuf_t *json)
>   {
>       struct luaL_field field;
> -    luaL_checkfield(l, cfg, -1, &field);
> +    luaL_checkfield(l, cfg, 0, -1, &field);
>       switch (field.type) {
>       case MP_UINT:
>           return json_append_uint(cfg, json, field.ival);
> diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
> index 9c3a4a646..72f269e6c 100644
> --- a/third_party/lua-yaml/lyaml.cc
> +++ b/third_party/lua-yaml/lyaml.cc
> @@ -500,38 +500,23 @@ usage_error:
>   static int dump_node(struct lua_yaml_dumper *dumper);
>   
>   static yaml_char_t *get_yaml_anchor(struct lua_yaml_dumper *dumper) {
> -   const char *s = "";
> -   lua_pushvalue(dumper->L, -1);
> -   lua_rawget(dumper->L, dumper->anchortable_index);
> -   if (!lua_toboolean(dumper->L, -1)) {
> -      lua_pop(dumper->L, 1);
> -      return NULL;
> -   }
> -
> -   if (lua_isboolean(dumper->L, -1)) {
> -      /* this element is referenced more than once but has not been named */
> -      char buf[32];
> -      snprintf(buf, sizeof(buf), "%u", dumper->anchor_number++);
> -      lua_pop(dumper->L, 1);
> -      lua_pushvalue(dumper->L, -1);
> -      lua_pushstring(dumper->L, buf);
> -      s = lua_tostring(dumper->L, -1);
> -      lua_rawset(dumper->L, dumper->anchortable_index);
> -   } else {
> -      /* this is an aliased element */
> +   const char *anchor = NULL;
> +   if (get_anchor(dumper->L, dumper->anchortable_index, &dumper->anchor_number,
> +                  &anchor) == 2) {
>         yaml_event_t ev;
> -      const char *str = lua_tostring(dumper->L, -1);
> -      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) str) ||
> +      if (!yaml_alias_event_initialize(&ev, (yaml_char_t *) anchor) ||
>             !yaml_emitter_emit(&dumper->emitter, &ev))
>            luaL_error(dumper->L, OOM_ERRMSG);
> -      lua_pop(dumper->L, 1);
> +      return (yaml_char_t *)"";
>      }
> -   return (yaml_char_t *)s;
> +   return (yaml_char_t *) anchor;
>   }
>   
> -static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
> +static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field,
> +                      yaml_char_t *anchor) {
>      yaml_event_t ev;
> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
> +   if (anchor == NULL)
> +      anchor = get_yaml_anchor(dumper);
>   
>      if (anchor && !*anchor) return 1;
>   
> @@ -556,10 +541,12 @@ static int dump_table(struct lua_yaml_dumper *dumper, struct luaL_field *field){
>             yaml_emitter_emit(&dumper->emitter, &ev) != 0 ? 1 : 0;
>   }
>   
> -static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
> +static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field,
> +                      yaml_char_t *anchor) {
>      unsigned i;
>      yaml_event_t ev;
> -   yaml_char_t *anchor = get_yaml_anchor(dumper);
> +   if (anchor == NULL)
> +      anchor = get_yaml_anchor(dumper);
>   
>      if (anchor && !*anchor)
>         return 1;
> @@ -621,8 +608,18 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>      bool unused;
>      (void) unused;
>   
> +   yaml_char_t *anchor = NULL;
> +
> +   /*
> +    * Keep anchor to print it if luaL_checkfield() push new value
> +    * on stack after serialization.
> +    */
> +   if (lua_istable(dumper->L, -1) == 1)
> +      anchor = get_yaml_anchor(dumper);
> +
>      int top = lua_gettop(dumper->L);
> -   luaL_checkfield(dumper->L, dumper->cfg, top, &field);
> +   luaL_checkfield(dumper->L, dumper->cfg, dumper->anchortable_index, top,
> +                   &field);
>      switch(field.type) {
>      case MP_UINT:
>         snprintf(buf, sizeof(buf) - 1, "%" PRIu64, field.ival);
> @@ -647,9 +644,9 @@ static int dump_node(struct lua_yaml_dumper *dumper)
>         len = strlen(buf);
>         break;
>      case MP_ARRAY:
> -      return dump_array(dumper, &field);
> +      return dump_array(dumper, &field, anchor);
>      case MP_MAP:
> -      return dump_table(dumper, &field);
> +      return dump_table(dumper, &field, anchor);
>      case MP_STR:
>         str = lua_tolstring(dumper->L, -1, &len);
>         if (yaml_is_null(str, len) || yaml_is_bool(str, len, &unused) ||
> @@ -745,35 +742,6 @@ static int append_output(void *arg, unsigned char *buf, size_t len) {
>      return 1;
>   }
>   
> -static void find_references(struct lua_yaml_dumper *dumper) {
> -   int newval = -1, type = lua_type(dumper->L, -1);
> -   if (type != LUA_TTABLE)
> -      return;
> -
> -   lua_pushvalue(dumper->L, -1); /* push copy of table */
> -   lua_rawget(dumper->L, dumper->anchortable_index);
> -   if (lua_isnil(dumper->L, -1))
> -      newval = 0;
> -   else if (!lua_toboolean(dumper->L, -1))
> -      newval = 1;
> -   lua_pop(dumper->L, 1);
> -   if (newval != -1) {
> -      lua_pushvalue(dumper->L, -1);
> -      lua_pushboolean(dumper->L, newval);
> -      lua_rawset(dumper->L, dumper->anchortable_index);
> -   }
> -   if (newval)
> -      return;
> -
> -   /* recursively process other table values */
> -   lua_pushnil(dumper->L);
> -   while (lua_next(dumper->L, -2) != 0) {
> -      find_references(dumper); /* find references on value */
> -      lua_pop(dumper->L, 1);
> -      find_references(dumper); /* find references on key */
> -   }
> -}
> -
>   int
>   lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>                   const char *tag_handle, const char *tag_prefix)
> @@ -819,7 +787,7 @@ lua_yaml_encode(lua_State *L, struct luaL_serializer *serializer,
>      dumper.anchortable_index = lua_gettop(L);
>      dumper.anchor_number = 0;
>      lua_pushvalue(L, 1); /* push copy of arg we're processing */
> -   find_references(&dumper);
> +   find_references(L, dumper.anchortable_index, "before");
>      dump_document(&dumper);
>      if (dumper.error)
>         goto error;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-04-21  9:27     ` Sergey Kaplun via Tarantool-patches
@ 2021-04-21 13:12       ` Sergey Bronnikov via Tarantool-patches
  2021-04-21 18:20         ` Sergey Kaplun via Tarantool-patches
  0 siblings, 1 reply; 9+ messages in thread
From: Sergey Bronnikov via Tarantool-patches @ 2021-04-21 13:12 UTC (permalink / raw)
  To: Sergey Kaplun, Roman Khabibov; +Cc: tarantool-patches


On 21.04.2021 12:27, Sergey Kaplun via Tarantool-patches wrote:
> Hi!
>
> Thanks for the fixes and the benchmarks!

I propose an updated version of script and propose to add benchmark as a 
part of patch series.

--[[
-- measure serialization time
-- run: taskset -c 1 tarantool perf.lua
]]

local clock = require('clock')

local function elapsed(f, n)
     local t0 = clock.monotonic()
     for i = 1, n do
         f()
     end
     local t1 = clock.monotonic()
     return t1 - t0
end

-- Get the mean of a table
function calculate_mean(t)
   local sum = 0
   local count= 0
   for k, v in pairs(t) do
     if type(v) == 'number' then
       sum = sum + v
     end
   end

   return (sum/#t)
end

-- Get the median of a table.
function calculate_median(t)
   local temp = {}
   -- deep copy table so that when we sort it, the original is unchanged
   -- also weed out any non numbers
   for k, v in pairs(t) do
     if type(v) == 'number' then
       table.insert(temp, v)
     end
   end
   table.sort(temp)
   -- If we have an even number of table elements or odd.
   if math.fmod(#temp,2) == 0 then
     -- return mean value of middle two elements
     return (temp[#temp/2] + temp[(#temp/2)+1]) / 2
   else
     -- return middle element
     return temp[math.ceil(#temp/2)]
   end
end

-- Get the standard deviation of a table
function calculate_stddev(t)
   local vm
   local sum = 0
   local mean = calculate_mean(t)
   for k, v in pairs(t) do
     if type(v) == 'number' then
       vm = v - mean
       sum = sum + (vm * vm)
     end
   end

   return math.sqrt(sum/(#t - 1))
end

local function timeit(f, name)
     print('======================================')
     print(name)
     print('======================================')
     local res = {}
     local iterations = 10
     local elapsed_time = 0
     local repetitions = 150000
     for j = 1, iterations do
         -- warming
         for i = 1, 100 do f() end
         -- measurement
         elapsed_time = elapsed(f, repetitions)
         table.insert(res, elapsed_time)
         print(string.format("%-2d - %f sec / %d repetitions", j, 
elapsed_time, repetitions))
     end
     print(string.format("time mean   %f", calculate_mean(res)))
     print(string.format("time median %f", calculate_median(res)))
     print(string.format("time stddev %f", calculate_stddev(res)))
end

local t = {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
timeit(function()
            local console = require('console')
            console.set_default_output('yaml')
            return console.eval(tostring(t))
        end, 'serializer console yaml')
timeit(function()
            local console = require('console')
            console.set_default_output('lua')
            return console.eval(tostring(t))
        end, 'serializer console lua')
timeit(function()
            local serializer = require('json')
            serializer.cfg({encode_max_depth = 64})
            return serializer.encode(t)
        end, 'serializer json')
timeit(function()
            local serializer = require('yaml')
            serializer.cfg({encode_max_depth = 64})
            return serializer.encode(t)
        end, 'serializer yaml')
timeit(function()
            local serializer = require('msgpack')
            serializer.cfg({encode_max_depth = 64})
            return serializer.encode(t)
        end, 'serializer msgpack')
timeit(function()
            local serializer = require('msgpackffi')
            return serializer.encode(t)
        end, 'serializer msgpackffi')


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH] serializer: serialize recursive structures
  2021-04-21 13:12       ` Sergey Bronnikov via Tarantool-patches
@ 2021-04-21 18:20         ` Sergey Kaplun via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2021-04-21 18:20 UTC (permalink / raw)
  To: Sergey Bronnikov; +Cc: tarantool-patches

Hi!

Thanks for benchmarks!
I like them!

But I am tormented by doubts whether it is worth keeping it in a
semi-working state in our repository. Shouldn't we first introduce the
architecture of performance tests, and then add some new benchmarks?

I'm totally for the presence of such benchmarks in our repo, but I'm
afraid to make a real mess of things.

Also, I suppose that it should be a collegeal descicion (not only 2
reviewers).

On 21.04.21, Sergey Bronnikov wrote:
> 
> On 21.04.2021 12:27, Sergey Kaplun via Tarantool-patches wrote:
> > Hi!
> >
> > Thanks for the fixes and the benchmarks!
> 
> I propose an updated version of script and propose to add benchmark as a 
> part of patch series.
> 
> --[[
> -- measure serialization time
> -- run: taskset -c 1 tarantool perf.lua
> ]]
> 
> local clock = require('clock')
> 
> local function elapsed(f, n)
>      local t0 = clock.monotonic()

I've read discussions that we should use CLOCK_PROCESS_CPUTIME_ID
instead of CLOCK_MONOTONIC. Is it one preferable than another?

>      for i = 1, n do
>          f()
>      end
>      local t1 = clock.monotonic()
>      return t1 - t0
> end

Shouldn't we still take into account the time to call the function
`clock.monotonic()`?

> 
> -- Get the mean of a table
> function calculate_mean(t)
>    local sum = 0
>    local count= 0
>    for k, v in pairs(t) do
>      if type(v) == 'number' then

2 general points:
1) use `for i = 1, #t do` instead pairs (as far as we use #t later)
2) use `assert(type(v) == 'number')` instead of if statement.

Here and below.

>        sum = sum + v
>      end
>    end
> 
>    return (sum/#t)
> end
> 
> -- Get the median of a table.
> function calculate_median(t)
>    local temp = {}
>    -- deep copy table so that when we sort it, the original is unchanged

Are we interested in unchanged original table?
May be it will be better to sort values inside before and work with
them?

>    -- also weed out any non numbers
>    for k, v in pairs(t) do
>      if type(v) == 'number' then
>        table.insert(temp, v)
>      end
>    end
>    table.sort(temp)
>    -- If we have an even number of table elements or odd.
>    if math.fmod(#temp,2) == 0 then
>      -- return mean value of middle two elements
>      return (temp[#temp/2] + temp[(#temp/2)+1]) / 2
>    else
>      -- return middle element
>      return temp[math.ceil(#temp/2)]
>    end
> end
> 
> -- Get the standard deviation of a table
> function calculate_stddev(t)
>    local vm

Sorry, don't get it: why do you choose this scope for the variable?

>    local sum = 0
>    local mean = calculate_mean(t)
>    for k, v in pairs(t) do
>      if type(v) == 'number' then
>        vm = v - mean
>        sum = sum + (vm * vm)
>      end
>    end
> 
>    return math.sqrt(sum/(#t - 1))
> end
> 
> local function timeit(f, name)
>      print('======================================')
>      print(name)
>      print('======================================')
>      local res = {}
>      local iterations = 10
>      local elapsed_time = 0
>      local repetitions = 150000

It will be nice to have an ability to configure this value (via config
or environment variables).

>      for j = 1, iterations do
>          -- warming
>          for i = 1, 100 do f() end
>          -- measurement
>          elapsed_time = elapsed(f, repetitions)
>          table.insert(res, elapsed_time)
>          print(string.format("%-2d - %f sec / %d repetitions", j, 
> elapsed_time, repetitions))
>      end
>      print(string.format("time mean   %f", calculate_mean(res)))
>      print(string.format("time median %f", calculate_median(res)))
>      print(string.format("time stddev %f", calculate_stddev(res)))
> end
> 
> local t = {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
> timeit(function()
>             local console = require('console')
>             console.set_default_output('yaml')
>             return console.eval(tostring(t))
>         end, 'serializer console yaml')

It is better to move out all this actions and create a local copy of
called functions.

For example:
===================================================================
local tstr = tostring(t)
local console = require('console')
console.set_default_output('yaml')
local eval = console.eval
timeit(function()
  return eval(tstr)
end, 'serializer console yaml')
===================================================================

It reduces time by 5% (stderr is 0.5%).

> timeit(function()
>             local console = require('console')
>             console.set_default_output('lua')
>             return console.eval(tostring(t))
>         end, 'serializer console lua')
> timeit(function()
>             local serializer = require('json')
>             serializer.cfg({encode_max_depth = 64})
>             return serializer.encode(t)
>         end, 'serializer json')
> timeit(function()
>             local serializer = require('yaml')
>             serializer.cfg({encode_max_depth = 64})
>             return serializer.encode(t)
>         end, 'serializer yaml')
> timeit(function()
>             local serializer = require('msgpack')
>             serializer.cfg({encode_max_depth = 64})
>             return serializer.encode(t)
>         end, 'serializer msgpack')
> timeit(function()
>             local serializer = require('msgpackffi')
>             return serializer.encode(t)
>         end, 'serializer msgpackffi')
> 

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-04-21 18:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-11  5:49 [Tarantool-patches] [PATCH] serializer: serialize recursive structures Roman Khabibov via Tarantool-patches
2021-03-14 12:42 ` Sergey Kaplun via Tarantool-patches
2021-04-05 22:05   ` Roman Khabibov via Tarantool-patches
2021-04-21  9:27     ` Sergey Kaplun via Tarantool-patches
2021-04-21 13:12       ` Sergey Bronnikov via Tarantool-patches
2021-04-21 18:20         ` Sergey Kaplun via Tarantool-patches
2021-04-13 15:54 ` Sergey Bronnikov via Tarantool-patches
2021-04-15 20:39   ` Roman Khabibov via Tarantool-patches
2021-04-21 12:34 ` Sergey Bronnikov via Tarantool-patches

Tarantool development patches archive

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://lists.tarantool.org/tarantool-patches/0 tarantool-patches/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 tarantool-patches tarantool-patches/ https://lists.tarantool.org/tarantool-patches \
		tarantool-patches@dev.tarantool.org.
	public-inbox-index tarantool-patches

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git