[tarantool-patches] Re: [PATCH] sql: terminate with \0 encoded msgpack

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Dec 12 14:40:10 MSK 2018


Hi! Thanks for the patch! See 1 comment below.

On 11/12/2018 19:21, Nikita Pettik wrote:
> During table, index, constraint or trigger creation it is required to
> encode entry's parts/opts to msgpack for the next insertion into system
> space. Encoded msgpack is known to lack null termination at the end of
> binary string. Hence, while such string is passed to printf, strlen or
> other function which excpects null termination, buffer overflow may
> occur.
> 
> This bug was hard to detect due to several reasons.
> Firstly, SQLite memory allocators align memory to 8-byte border. Hence,
> as a rule even ASAN doesn't detect overflow, since there is additional
> alignment which can be addressable.
> Secondly, msgpack can contain nulls through the encoded string. As a
> result, strlen returns not the real size of binary string, but something
> less.
> 
> Note that null termination is required only encoded string is to be
> displayed (i.e. EXPLAIN statement is executed or vdbe_debug mode is
> enabled).
> 
> Bug was discovered with help of AddressSanitizer.
> 
> Closes #3868
> ---
> Branch: https://github.com/tarantool/tarantool/tree/np/gh-3868-buffer-overflow
> Issue: https://github.com/tarantool/tarantool/issues/3868
> 
> * Notes to the one who will review *
> 
> To enable ASAN on macOS:
> 
> cmake -DENABLE_ASAN=ON .
> 
> Also it may be required to apply this diff:
> 
> diff --git a/src/fiber.c b/src/fiber.c
> index 7658a2294..743ceff7a 100644
> --- a/src/fiber.c
> +++ b/src/fiber.c
> @@ -54,7 +54,7 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
>          __sanitizer_start_switch_fiber((will_switch_back) ? &var_name : NULL, \
>                                          (bottom), (size))
>   #define ASAN_FINISH_SWITCH_FIBER(var_name) \
> -       __sanitizer_finish_switch_fiber(var_name);
> +       __sanitizer_finish_switch_fiber(var_name, 0, 0);
> 
> To turn off SQLite 8-byte allocation alignment apply this diff:
> 
> diff --git a/src/box/sql/malloc.c b/src/box/sql/malloc.c
> index 9526cce5e..fd206632b 100644
> --- a/src/box/sql/malloc.c
> +++ b/src/box/sql/malloc.c
> @@ -49,7 +49,7 @@ sql_sized_malloc(int nByte)
>   {
>          sqlite3_int64 *p;
>          assert(nByte > 0);
> -       nByte = ROUND8(nByte);
> +       //nByte = ROUND8(nByte);
>          p = malloc(nByte + 8);
>          if (p) {
>                  p[0] = nByte;
> @@ -312,9 +312,9 @@ sqlite3MallocAlarm(int nByte)
>   static int
>   mallocWithAlarm(int n, void **pp)
>   {
> -       int nFull;
> +       int nFull = n;
>          void *p;
> -       nFull = ROUND8(n);
> +       //nFull = ROUND8(n);
>          sqlite3StatusHighwater(SQLITE_STATUS_MALLOC_SIZE, n);
>          if (mem0.alarmThreshold > 0) {
>                  sqlite3_int64 nUsed =
> 
> To observe buffer-overflow run this query
> (it is taken from sql-tap/table.test.lua):
> 
> EXPLAIN CREATE TABLE test1(f1 int primary key);

Please, show where exactly strlen() is called on msgpack? I
can not find this place. Also, terminating msgpack with 0 is
a crutch, IMO. You will be able to print msgpack and use strlen(),
but as you said, it will not be valid msgpack. I will not be
able to decode it. This is because of msgpack having zeroes in a
middle. So those columns in EXPLAIN with invalid msgpack makes no
sense to show in such a case - invalid msgpack is useless.

I remember, that msgpack 0 termination existed in earlier
SQLite + Tarantool versions, but I removed it deliberately so
as to return it without string methods usage.

All places, which you crutched up with 0 termination, have
SQL_SUBTYPE_MSGPACK subtype, using which you can determine
if it is msgpack.

I understand, that from EXPLAIN we can not return Lua maps/arrays
since 6th column of EXPLAIN output is declared as a string. But
we can decode msgpack into JSON or YAML. This output would be
both parsable and readable.

> 
>   src/box/sql/build.c   | 19 ++++++++++++++++---
>   src/box/sql/trigger.c |  4 ++--
>   2 files changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/src/box/sql/build.c b/src/box/sql/build.c
> index 52f0bde15..be51028e3 100644
> --- a/src/box/sql/build.c
> +++ b/src/box/sql/build.c
> @@ -1140,7 +1140,7 @@ vdbe_emit_create_index(struct Parse *parse, struct space_def *def,
>   	if (index_parts == NULL)
>   		goto error;
>   	char *raw = sqlite3DbMallocRaw(parse->db,
> -				       index_opts_sz +index_parts_sz);
> +				       index_opts_sz + index_parts_sz + 1);
>   	if (raw == NULL)
>   		return;
>   	memcpy(raw, index_opts, index_opts_sz);
> @@ -1148,6 +1148,7 @@ vdbe_emit_create_index(struct Parse *parse, struct space_def *def,
>   	raw += index_opts_sz;
>   	memcpy(raw, index_parts, index_parts_sz);
>   	index_parts = raw;
> +	index_parts[index_parts_sz] = '\0';
>   
>   	if (parse->pNewTable != NULL) {
>   		sqlite3VdbeAddOp2(v, OP_SCopy, space_id_reg, entry_reg);
> @@ -1209,8 +1210,18 @@ createSpace(Parse * pParse, int iSpaceId, char *zStmt)
>   	char *table_stmt = sql_encode_table(region, table, &table_stmt_sz);
>   	if (table_stmt == NULL)
>   		goto error;
> +	/*
> +	 * If we are executing EXPLAIN query or vdbe_debug mode
> +	 * is enabled, we should terminate string with \0:
> +	 * in internals it is copied to be displayed.
> +	 * In turn, to copy string strlen() is called:
> +	 * see handling of P4 in sqlite3VdbeList().
> +	 * Without null termination call of strlen() may lead to
> +	 * buffer-overflow. This bug was detected by ASAN.
> +	 */
> +
>   	char *raw = sqlite3DbMallocRaw(pParse->db,
> -				       table_stmt_sz + table_opts_stmt_sz);
> +				       table_stmt_sz + table_opts_stmt_sz + 1);
>   	if (raw == NULL)
>   		return;
>   
> @@ -1219,6 +1230,7 @@ createSpace(Parse * pParse, int iSpaceId, char *zStmt)
>   	raw += table_opts_stmt_sz;
>   	memcpy(raw, table_stmt, table_stmt_sz);
>   	table_stmt = raw;
> +	table_stmt[table_stmt_sz] = '\0';
>   
>   	sqlite3VdbeAddOp2(v, OP_SCopy, iSpaceId, iFirstCol /* spaceId */ );
>   	sqlite3VdbeAddOp2(v, OP_Integer, effective_user()->uid,
> @@ -1397,7 +1409,7 @@ vdbe_emit_fkey_create(struct Parse *parse_context, const struct fkey_def *fk)
>   	 * as dynamic and releases memory.
>   	 */
>   	char *raw = sqlite3DbMallocRaw(parse_context->db,
> -				       parent_links_size + child_links_size);
> +				       parent_links_size + child_links_size + 1);
>   	if (raw == NULL)
>   		return;
>   	memcpy(raw, parent_links, parent_links_size);
> @@ -1405,6 +1417,7 @@ vdbe_emit_fkey_create(struct Parse *parse_context, const struct fkey_def *fk)
>   	raw += parent_links_size;
>   	memcpy(raw, child_links, child_links_size);
>   	child_links = raw;
> +	child_links[child_links_size] = '\0';
>   
>   	sqlite3VdbeAddOp4(vdbe, OP_Blob, child_links_size, constr_tuple_reg + 7,
>   			  SQL_SUBTYPE_MSGPACK, child_links, P4_STATIC);
> diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c
> index 482f40926..cd4d3fb47 100644
> --- a/src/box/sql/trigger.c
> +++ b/src/box/sql/trigger.c
> @@ -213,11 +213,11 @@ sql_trigger_finish(struct Parse *parse, struct TriggerStep *step_list,
>   			parse->rc = SQL_TARANTOOL_ERROR;
>   			goto cleanup;
>   		}
> -		char *opts_buff = sqlite3DbMallocRaw(db, opts_buff_sz);
> +		char *opts_buff = sqlite3DbMallocRaw(db, opts_buff_sz + 1);
>   		if (opts_buff == NULL)
>   			goto cleanup;
>   		memcpy(opts_buff, data, opts_buff_sz);
> -
> +		opts_buff[opts_buff_sz] = '\0';
>   		trigger_name = sqlite3DbStrDup(db, trigger_name);
>   		if (trigger_name == NULL) {
>   			sqlite3DbFree(db, opts_buff);
> 




More information about the Tarantool-patches mailing list