[Tarantool-patches] [PATCH 3/3] tuple: use calloc for service truncation tuples

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Dec 19 03:31:57 MSK 2019


Thanks for the patch!

See 3 comments below.

On 13/12/2019 08:05, Ilya Kosarev wrote:
> Trying to perform space:truncate() while reaching memtx_memory limit we
> could experience slab allocator failure on allocation for service tuple
> being upserted into service space to keep record of truncations. This
> behavior seems to be quite surprising for users. Now we are using
> calloc() to allocate tuples in BOX_TRUNCATE_ID space.
> 
> Closes: #3807
> ---
>  src/box/memtx_engine.c | 50 ++++++++++++++++++++++++++++--------------
>  src/box/memtx_engine.h |  2 +-
>  src/box/memtx_space.c  | 20 ++++++++++++-----
>  src/box/tuple.c        | 16 ++++++++++----
>  src/box/tuple.h        |  2 +-
>  src/box/tuple_format.c |  1 +
>  src/box/tuple_format.h | 10 ++++++++-
>  src/box/vy_stmt.c      |  3 ++-
>  8 files changed, 74 insertions(+), 30 deletions(-)
> 
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index 23ccc4703fe..59148059f2b 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -1113,7 +1113,7 @@ memtx_leave_delayed_free_mode(struct memtx_engine *memtx)
>  }
>  
>  struct tuple *
> -memtx_tuple_new(struct tuple_format *format, const char *data, const char *end)
> +memtx_tuple_new(struct tuple_format *format, const char *data, const char *end, bool use_calloc)

1. Oh. No. Please. The whole point of format vtable is to avoid that.

>  {
>  	struct memtx_engine *memtx = (struct memtx_engine *)format->engine;
>  	assert(mp_typeof(*data) == MP_ARRAY);
> @@ -1139,15 +1139,27 @@ memtx_tuple_new(struct tuple_format *format, const char *data, const char *end)
>  	}
>  
>  	struct memtx_tuple *memtx_tuple;
> -	while ((memtx_tuple = smalloc(&memtx->alloc, total)) == NULL) {
> -		bool stop;
> -		memtx_engine_run_gc(memtx, &stop);
> -		if (stop)
> -			break;
> -	}
> -	if (memtx_tuple == NULL) {
> -		diag_set(OutOfMemory, total, "slab allocator", "memtx_tuple");
> -		goto end;
> +	if (use_calloc) {
> +		memtx_tuple = calloc(1, total);
> +		if (memtx_tuple == NULL) {
> +			diag_set(OutOfMemory, total, "malloc",
> +						     "memtx_tuple");
> +			goto end;
> +		}
> +		format->is_malloced = true;

2. Sorry, we can't change format here. The only reason why format
is not const in that function is because we need increment its
reference counter. A tuple can't change its own format. Especially
in such a hot path.

> +	} else {
> +		while ((memtx_tuple = smalloc(&memtx->alloc,
> +					      total)) == NULL) {
> +			bool stop;
> +			memtx_engine_run_gc(memtx, &stop);
> +			if (stop)
> +				break;
> +		}
> +		if (memtx_tuple == NULL) {
> +			diag_set(OutOfMemory, total, "slab allocator",
> +						     "memtx_tuple");
> +			goto end;
> +		}
>  	}
>  	tuple = &memtx_tuple->base;
>  	tuple->refs = 0;
> @@ -1177,16 +1189,20 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
>  	struct memtx_engine *memtx = (struct memtx_engine *)format->engine;
>  	say_debug("%s(%p)", __func__, tuple);
>  	assert(tuple->refs == 0);
> -	tuple_format_unref(format);
>  	struct memtx_tuple *memtx_tuple =
>  		container_of(tuple, struct memtx_tuple, base);
>  	size_t total = tuple_size(tuple) + offsetof(struct memtx_tuple, base);
> -	if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
> -	    memtx_tuple->version == memtx->snapshot_version ||
> -	    format->is_temporary)
> -		smfree(&memtx->alloc, memtx_tuple, total);
> -	else
> -		smfree_delayed(&memtx->alloc, memtx_tuple, total);
> +	if (format->is_malloced) {
> +		free(memtx_tuple);

3. There is a certain purpose for these free_mode, version, blah
blah. These attributes create a read view of the whole memtx engine
to make a snapshot, but do not stop new requests. Tuples are not
deleted during snapshot.

Having ignored the free mode you get this:

====================================================================

    box.cfg{}
    errinj = box.error.injection
    fiber = require('fiber')

    s = box.schema.create_space('test')
    pk = s:create_index('pk')
    s:truncate()
    s:replace{1}
    box.error.injection.set('ERRINJ_SNAP_WRITE_DELAY', true)
    f = fiber.create(box.snapshot)
    box.space._truncate:delete{s.id}

-- The code below improves reproducibility. It creates and
-- deletes several tuples to unref the blessed tuple
-- (tuple_bless), rewrite some recently freed heap chunks.

    s:get{1}
    collectgarbage('collect')
    s:replace{1}
    s:get{1}
    collectgarbage('collect')
    s:replace{1}
    s:get{1}
    collectgarbage('collect')
    s:replace{1}
    s:get{1}
    collectgarbage('collect')

--

    box.error.injection.set('ERRINJ_SNAP_WRITE_DELAY', false)

    tarantool> Process 94390 stopped
    * thread #2, name = 'snapshot', stop reason = EXC_BAD_ACCESS (code=2, address=0x105481000)
        frame #0: 0x00007fff5a964ce9 libsystem_platform.dylib`_platform_memmove$VARIANT$Haswell + 41
    libsystem_platform.dylib`_platform_memmove$VARIANT$Haswell:
    ->  0x7fff5a964ce9 <+41>: rep    movsb  (%rsi), %es:(%rdi)
        0x7fff5a964ceb <+43>: popq   %rbp
        0x7fff5a964cec <+44>: retq   
        0x7fff5a964ced <+45>: cmpq   %rdi, %rsi
    Target 0: (tarantool) stopped.
    (lldb) bt
    * thread #2, name = 'snapshot', stop reason = EXC_BAD_ACCESS (code=2, address=0x105481000)
      * frame #0: 0x00007fff5a964ce9 libsystem_platform.dylib`_platform_memmove$VARIANT$Haswell + 41
        frame #1: 0x00000001003f56ba tarantool`obuf_dup(buf=0x000000010f0ff8b0, data=0x0000000105157aa6, size=85071264) at obuf.c:161:2
        frame #2: 0x00000001003634e8 tarantool`xlog_write_row(log=0x000000010f0ff208, packet=0x000000010f0ff0d8) at xlog.c:1320:7
        frame #3: 0x0000000100038aef tarantool`checkpoint_write_row(l=0x000000010f0ff208, row=0x000000010f0ff0d8) at memtx_engine.c:431:20
        frame #4: 0x0000000100038a2e tarantool`checkpoint_write_tuple(l=0x000000010f0ff208, space_id=330, group_id=0, data="", size=85071264) at memtx_engine.c:459:9
        frame #5: 0x0000000100038811 tarantool`checkpoint_f(ap=0x000000010f000278) at memtx_engine.c:603:8
        frame #6: 0x0000000100005e5a tarantool`fiber_cxx_invoke(f=(tarantool`checkpoint_f at memtx_engine.c:577), ap=0x000000010f000278)(__va_list_tag*), __va_list_tag*) at fiber.h:762:10
        frame #7: 0x000000010018168b tarantool`fiber_loop(data=0x0000000000000000) at fiber.c:830:18
        frame #8: 0x00000001004165e7 tarantool`coro_init at coro.c:110:3

====================================================================

Snapshoting means that you can't solve the issue on the format
level, or tuple level. You need to patch the engine level (IMO).

Honestly, I thought about this issue for quite a long time already,
and I don't have a plan how to fix it. All ways look bad. All of them
assume special patching of _truncate space insertion and crutches for
deletion. For me the most promising way was to introduce an analogue
of vy_quota_force_use() somehow, for _truncate, and use it in a
before_replace trigger of _truncate. Forced use of quota would allow
to temporary overflow the memory limit. But it won't fix deletion.
Maybe the key is in introduction of quota overuse for deletions. I
don't know.

Personally, I don't think the issue is worth fixing at all. If a
user wants to delete/truncate, he can increase memtx memory, do
cleanup, and decrease it back.


More information about the Tarantool-patches mailing list