[PATCH v2] core: Non-blocking io.popen

Fri May 31 14:49:43 MSK 2019

A few more comments. Please take a look.

On Wed, May 29, 2019 at 10:08:09AM +0300, Stanislav Zudin wrote:
> diff --git a/src/lib/core/coio_popen.c b/src/lib/core/coio_popen.c
> +#include <stddef.h>
> +#include <signal.h>
> +#include "coio_popen.h"
> +#include "coio_task.h"
> +#include "fiber.h"
> +#include "say.h"
> +#include "fio.h"
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <dirent.h>
> +#include <sys/wait.h>
> +#include <paths.h>

Why do you need this header? Looks like you never use anything from it.
Same for dirent.h. Please include only those headers you really need.

> +static struct popen_data *
> +popen_lookup_data_by_pid(pid_t pid)
> +{
> +	struct popen_data *cur = popen_data_list;
> +	for(; cur; cur = cur->next) {
> +		if (cur->pid == pid)
> +			return cur;
> +	}

Please write a comment explaining why you think linear search is fine
here and why we don't use some kind of hash or tree.

> +
> +	return NULL;
> +}
> +
> +static void
> +popen_exclude_from_list(struct popen_data *data)
> +{
> +	if (popen_data_list == data) {
> +		popen_data_list = data->next;
> +		return;
> +	}
> +
> +	/* Find the previous entry */
> +	struct popen_data *prev = popen_data_list;
> +	for( ; prev && prev->next != data; prev = prev->next) {
> +		/* empty */;
> +	}
> +
> +	if (!prev)
> +		return ;
> +	assert(prev->next == data);
> +	prev->next = data->next;

Looks like doubly-linked list (rlist) would suit better here.

> +}
> +
> +/*
> + * Returns next socket to read.
> + * Use this function when both STDOUT and STDERR outputs
> + * are ready for reading.

But what does it do? How does it prioritize? Though short, this function
looks kinda arcane to me due to this bit manipulation magic. Please
clarify. However, if you switch to ev io, you'll probably won't be
needing it.

> + * */
> +static inline int
> +get_handle_in_order(struct popen_data *data)
> +{
> +	/*
> +	 * Invert the order of handles to be read
> +	 */
> +	const int mask = STDERR_FILENO | STDOUT_FILENO;
> +	data->prev_source ^= mask;
> +
> +	/*
> +	 * If handle is not available, invert it back
> +	 */
> +	if (data->fh[data->prev_source] < 0)
> +		data->prev_source ^= mask;
> +	/*
> +	 * if both reading handles are invalid return -1
> +	 */
> +	return data->fh[data->prev_source];
> +}

> +void *
> +coio_popen_impl(char** argv, int argc,
> +	char** environment, int environment_size,
> +	int stdin_fd, int stdout_fd, int stderr_fd)
> +{
> +	pid_t pid;
> +	int socket_rd[2] = {-1,-1};
> +	int socket_wr[2] = {-1,-1};
> +	int socket_er[2] = {-1,-1};
> +	errno = 0;
> +
> +	struct popen_data *data = popen_data_new();
> +	if (data == NULL)
> +		return NULL;
> +
> +	/* Setup a /dev/null if necessary */
> +	bool read_devnull = (stdin_fd == FIO_DEVNULL);
> +	bool write_devnull = (stdout_fd == FIO_DEVNULL) ||
> +			     (stderr_fd == FIO_DEVNULL);
> +	int devnull_flags = O_RDWR | O_CREAT;
> +	if (!read_devnull)
> +		devnull_flags = O_WRONLY | O_CREAT;
> +	else if (!write_devnull)
> +		devnull_flags = O_RDONLY | O_CREAT;
> +
> +	if (read_devnull || write_devnull) {
> +		data->devnull_fd = open("/dev/null", devnull_flags, 0666);

No need to pass mode unless you're creating a file.

> +		if (data->devnull_fd < 0)
> +			goto on_error;
> +		else {
> +			if (stdin_fd == FIO_DEVNULL)
> +				stdin_fd = data->devnull_fd;
> +			if (stdout_fd == FIO_DEVNULL)
> +				stdout_fd = data->devnull_fd;
> +			if (stderr_fd == FIO_DEVNULL)
> +				stderr_fd = data->devnull_fd;
> +		}
> +	}
> +
> +	if (stdin_fd == FIO_PIPE) {
> +		/*
> +		 * Enable non-blocking for the parent side
> +		 * and close-on-exec on the child's side.
> +		 * The socketpair on OSX doesn't support
> +		 * SOCK_NONBLOCK & SOCK_CLOEXEC flags.
> +		 */
> +		if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_rd) < 0 ||

I overlooked it when I reviewed the patch last time. Why do you use
sockets? Why is a simple pipe not enough? Sockets are full-duplex
while in your case a simplex connection would be enough, no?

> +		    fcntl(socket_rd[0], F_SETFL, O_NONBLOCK) < 0 ||
> +		    fcntl(socket_rd[1], F_SETFD, FD_CLOEXEC) < 0) {

You don't really need FD_CLOEXEC for this one as you can close it in the
child right after fork(). Moreover, looks like you close() them anyway.

We do need to use CLOEXEC for other fds (e.g.  xlog or vinyl files), but
I guess this can be done in a separate patch.

> +			goto on_error;
> +		}
> +	}
> +
> +	if (stdout_fd == FIO_PIPE) {
> +		if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_wr) < 0 ||
> +		    fcntl(socket_wr[0], F_SETFL, O_NONBLOCK) < 0 ||
> +		    fcntl(socket_wr[1], F_SETFD, FD_CLOEXEC) < 0) {
> +			goto on_error;
> +		}
> +	}
> +
> +	if (stderr_fd == FIO_PIPE) {
> +		if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_er) < 0 ||
> +		    fcntl(socket_er[0], F_SETFL, O_NONBLOCK) < 0 ||
> +		    fcntl(socket_er[1], F_SETFD, FD_CLOEXEC) < 0) {

Looks like quite a bit of repetition. Worth moving this to a separate
helper function or doing it in a loop somehow instead of copying and
pasting?

> +			goto on_error;
> +		}
> +	}
> +
> +	pid = fork();
> +
> +	if (pid < 0)
> +		goto on_error;
> +	else if (pid == 0) /* child */ {
> +		/* Setup stdin/stdout */
> +		if (stdin_fd == FIO_PIPE) {
> +			close(socket_rd[0]); /* close parent's side */
> +			stdin_fd = socket_rd[1];
> +		}
> +		if (stdin_fd != STDIN_FILENO) {
> +			dup2(stdin_fd, STDIN_FILENO);
> +			close(stdin_fd);
> +		}
> +
> +		if (stdout_fd == FIO_PIPE) {
> +			close(socket_wr[0]); /* close parent's side */
> +			stdout_fd = socket_wr[1];
> +		}
> +		if (stdout_fd != STDOUT_FILENO) {
> +			dup2(stdout_fd, STDOUT_FILENO);
> +			if (stdout_fd != STDERR_FILENO)
> +				close(stdout_fd);
> +		}
> +
> +		if (stderr_fd == FIO_PIPE) {
> +			close(socket_er[0]); /* close parent's side */
> +			stderr_fd = socket_er[1];
> +		}
> +		if (stderr_fd != STDERR_FILENO) {
> +			dup2(stderr_fd, STDERR_FILENO);
> +			if (stderr_fd != STDOUT_FILENO)
> +				close(stderr_fd);
> +		}
> +
> +		execve( argv[0], argv,
> +			environment ? environment : environ);
> +		_exit(127);

Why _exit(), why not exit()? If there's a reason, please explain in a
comment. Also, why 127? We prefer to avoid explicit numeric constants.
May be, you could use EXIT_FAILURE instead?

Also, may be we need to write something to stderr in this case, just to
let the caller now that it's not the child failure, it's execve failure.

> +int
> +coio_popen_destroy(void *fh)
> +{
> +	struct popen_data *data = (struct popen_data *)fh;
> +
> +	if (data == NULL){
> +		errno = EBADF;
> +		return -1;
> +	}

What's the point in returning an error from this function?
You never check it AFAICS.

Anyway, like I mentioned above, I think that one shouldn't pass NULL for
a handle to any of popen functions.

> +void
> +coio_popen_child_is_dead(int sig, siginfo_t *si, void *context)

Judging by the name it looks like this function should merely return
true/false, not do anything else. Please rename. For instance, we could
name it coio_popen_on_child_termination or handle_child_death.

> +{
> +	(void)context;	/* UNUSED */

Why pass it at all if it's unused?

> +
> +	if (sig != SIGCHLD)
> +		return;

Why call it if it isn't SIGCHLD?

> +
> +	/*
> +	 * The sigaction is called with SA_NOCLDWAIT,
> +	 * so no need to call waitpid()
> +	 */
> +
> +	popen_lock_data_list();
> +
> +	struct popen_data *data = popen_lookup_data_by_pid(si->si_pid);
> +	if (data) {
> +		switch (si->si_code) {
> +		case CLD_EXITED:
> +			data->exit_code = si->si_status;
> +			data->status = POPEN_EXITED;
> +			break;
> +		case CLD_KILLED:
> +			data->signal_id = si->si_status;
> +			data->status = POPEN_KILLED;
> +			break;
> +		case CLD_DUMPED:
> +			/* exit_status makes no sense */
> +			data->status = POPEN_DUMPED;
> +			break;
> +		}
> +
> +		/*
> +		 * We shouldn't close file descriptors here.
> +		 * The child process may exit earlier than
> +		 * the parent process finishes reading data.
> +		 * In this case the reading fails.
> +		 */
> +	}
> +
> +	popen_unlock_data_list();
> +}

> diff --git a/src/lib/core/coio_popen.h b/src/lib/core/coio_popen.h
> +/**
> + * Special values of the file descriptors passed to fio.popen
> + * */
> +enum {
> +	FIO_PIPE = -2,
> +	/*
> +	 * Tells fio.popen to open a handle for
> +	 * direct reading/writing
> +	 */
> +
> +	FIO_DEVNULL = -3
> +	/*
> +	 * Tells fio.popen to redirect the given standard
> +	 * stream into /dev/null
> +	 */
> +};
> +
> +/**
> + * Possible status of the process started via fio.popen
> + **/
> +enum popen_status {
> +	POPEN_UNKNOWN = -1,

Why do you need this mysterious status code? Looking at the code, I see
that you coio_popen_get_status() returns it in case of error. But what
kind of error? Turns out that this is the case when the handle is NULL.
This looks pointless to me - one shouldn't pass NULL to that function.
Am I missing something?

> +
> +	POPEN_RUNNING = 1,
> +	/*the process is alive and well*/

Comments should be above constants, not below. Also, please use
formatting typically used in the code (starts with /**, capital
letter, a dot at the end).

> +
> +	POPEN_EXITED = 2,
> +	/*the process exited*/
> +
> +	POPEN_KILLED = 3,
> +	/*the process terminated by a signal*/
> +
> +	POPEN_DUMPED = 4
> +	/* the process terminated abnormally */
> +};

> +/**
> + * Returns status of the associated process.
> + *
> + * @param fd - handle returned by fio.popen.
> + * @param signal_id - if not NULL accepts the signal
> + * sent to terminate the process
> + * @param exit_code - if not NULL accepts the exit code
> + * if the process terminated normally.
> + *
> + * @return one of the following values:
> + * POPEN_RUNNING if the process is alive
> + * POPEN_EXITED if the process was terminated normally
> + * POPEN_KILLED if the process was terminated by a signal
> + * POPEN_UNKNOWN an error's occurred
> + */
> +int
> +coio_popen_get_status(void *fh, int *signal_id, int *exit_code);
> +
> +/**
> + * Handle SIGCHLD signal
> + */
> +void
> +coio_popen_child_is_dead(int sig, siginfo_t *si, void *);

Better not use siginfo_t here - instead pass what you really need.
This makes the API clearer and cuts the dependency list.

> diff --git a/src/lua/fio.c b/src/lua/fio.c
> +/**
> + * A wrapper applicable for using with lua's GC.
> + * */
> +struct fio_popen_wrap {

Why do you need a wrapper at all?

> +	void* handle;
> +};
> +static const char* fio_popen_typename = "fio.popen.data";
> +
> +static struct fio_popen_wrap *
> +fio_popen_get_handle_wrap(lua_State *L, int index)
> +{
> +	luaL_checktype(L, index, LUA_TUSERDATA);
> +	struct fio_popen_wrap *wrap = (struct fio_popen_wrap *)
> +		luaL_checkudata(L, index, fio_popen_typename);

Hmm, why do you use udata? Why not plain cdata? I keep forgetting
the difference, to tell the truth.

> diff --git a/src/lua/fio.lua b/src/lua/fio.lua
> +popen_methods.do_read = function(self, buf, size, seconds)
> +    if size == nil or type(size) ~= 'number' then
> +        error('fio.popen.read: invalid size argument')
> +    end
> +    if seconds == nil or type(seconds) ~= 'number' then
> +        error('fio.popen.read: invalid seconds argument')
> +    end
> +
> +    local tmpbuf
> +    if not ffi.istype(const_char_ptr_t, buf) then
> +        tmpbuf = buffer.ibuf()
> +        buf = tmpbuf:reserve(size)
> +    end
> +
> +    local res, output_no, err = internal.popen_read(self.fh, buf, size, seconds)
> +    if res == nil then
> +        if tmpbuf ~= nil then
> +            tmpbuf:recycle()
> +        end
> +        return nil, nil, err
> +    end
> +
> +    if tmpbuf ~= nil then
> +        tmpbuf:alloc(res)
> +        res = ffi.string(tmpbuf.rpos, tmpbuf:size())
> +        tmpbuf:recycle()
> +    end
> +    return res, output_no
> +end
> +
> +-- read stdout & stderr of the process started by fio.popen
> +-- read() -> str, source
> +-- read(buf) -> len, source
> +-- read(size) -> str, source
> +-- read(buf, size) -> len, source
> +-- source contains id of the stream, fio.STDOUT or fio.STDERR
> +popen_methods.read = function(self, buf, size)
> +    if self.fh == nil then
> +        return nil, nil, 'Invalid object'
> +    end
> +
> +    if buf == nil and size == nil then
> +        -- read()
> +        size = 512

Why 512? Aren't we supposed to read everything till EOF in this case?

> +    elseif ffi.istype(const_char_ptr_t, buf) and size == nil then
> +        -- read(buf)
> +        size = 512
> +    elseif not ffi.istype(const_char_ptr_t, buf) and
> +            buf ~= nil and size == nil then
> +        -- read(size)
> +        size = tonumber(buf)
> +        buf = nil
> +    elseif ffi.istype(const_char_ptr_t, buf) and size ~= nil then
> +        -- read(buf, size)
> +        size = tonumber(size)
> +    else
> +        error("fio.popen.read: invalid arguments")
> +    end
> +
> +    return self:do_read(buf, size, -1)
> +end

> +popen_methods.wait = function(self, timeout)
> +    if self.fh == nil then
> +        return nil, 'Invalid object'
> +    end
> +
> +    if timeout == nil then
> +        timeout = -1
> +    else
> +        timeout = tonumber(timeout)
> +    end
> +
> +    local rc, err = internal.popen_wait(self.fh, timeout)
> +    if rc ~= nil then
> +        self.exit_code = tonumber(rc)
> +        self.fh = nil

This is kinda unexpected. What if we want to read the process output
after waiting for it to complete.

Come to think of it, we might need an explicit knob to destroy an object
even before it's collected. May be, consider re-adding close() for this?
Please consult with Alexander and/or Georgy re this.

> +        return rc
> +    else
> +        return nil,err
> +    end
> +end

> diff --git a/test/box-tap/fio_popen.test.lua b/test/box-tap/fio_popen.test.lua
> new file mode 100755
> index 000000000..0c2bd7e70
> --- /dev/null
> +++ b/test/box-tap/fio_popen.test.lua
> @@ -0,0 +1,189 @@
> +#!/usr/bin/env tarantool
> +
> +local tap = require('tap')
> +local fio = require('fio')
> +local errno = require('errno')
> +local fiber = require('fiber')
> +local test = tap.test()
> +
> +test:plan(5+9+11+9+8)
> +
> +-- Preliminaries
> +local function read_stdout(app)
> +    local ss = ""
> +
> +    local s,src,err = app:read(128)
> +
> +    while s ~= nil and s ~= "" do
> +        ss = ss .. s
> +
> +        s,src,err = app:read(128)
> +    end
> +
> +    return ss
> +end
> +
> +-- Test 1. Run application, check its status, kill and wait
> +local app1 = fio.popen({argv = {'/bin/sh', '-c', 'cat'},
> +                        stdout=fio.STDOUT,
> +                        stderr=fio.STDOUT})
> +test:isnt(app1, nil, "#1. Starting a existing application")
> +
> +local rc = app1:get_status()
> +test:is(rc, nil, "#1. Process is running")
> +
> +rc,err = app1:kill()
> +test:is(rc, true, "#1. Sending kill(15)")
> +
> +rc,err = app1:wait(5)
> +test:is(rc, -15, "#1. Process was killed")
> +
> +rc = app1:get_status()
> +test:is(rc, -15, "#1. Process was killed 2")
> +
> +app1 = nil
> +
> +-- Test 2. Run application, write to stdin, read from stdout
> +app1 = fio.popen({argv = {'/bin/sh', '-c', 'cat'},
> +                  stdout=fio.PIPE,
> +                  stdin=fio.PIPE,
> +                  stderr=fio.STDOUT})
> +test:isnt(app1, nil, "#2. Starting a existing application")
> +
> +rc = app1:get_status()
> +test:is(rc, nil, "#2. Process is running")
> +
> +local test2str = '123\n456\n789'
> +
> +app1:write(test2str)
> +rc,src,err = app1:read(256)

Please also test the case when the child writes both to
stdin and stdout (looks like you missed it).

> +
> +test:is(src, fio.STDOUT, "#2. Read from STDOUT")
> +test:is(rc, test2str, "#2. Received exact string")
> +
> +test2str = 'abc\ndef\ngh'
> +
> +app1:write(test2str, 4)
> +local rc2,src2,err = app1:read(6)
> +
> +test:is(err, nil, "#2. Reading ok")
> +test:is(src2, fio.STDOUT, "#2. Read from STDOUT 2")
> +test:is(rc2, 'abc\n', "#2. Received exact string 2")
> +
> +rc,err = app1:kill()
> +test:is(rc, true, "#2. Sending kill(15)")
> +
> +rc,err = app1:wait()
> +test:is(rc, -15, "#2. Process was killed")
> +
> +app1 = nil
> +
> +-- Test 3. read from stdout with timeout
> +app1 = fio.popen({argv = {'/bin/sh', '-c', 'cat'},
> +                  stdout=fio.PIPE,
> +                  stdin=fio.PIPE,
> +                  stderr=fio.STDOUT})
> +test:isnt(app1, nil, "#3. Starting a existing application")
> +test:is(app1:get_stderr(), nil, "#3. STDERR is redirected")
> +test:isnt(app1:get_stdout(), nil, "#3. STDOUT is available")
> +test:isnt(app1:get_stdin(), nil, "#3. STDIN is available")
> +
> +
> +rc = app1:get_status()
> +test:is(rc, nil, "#3. Process is running")
> +
> +rc,src,err = app1:read2(256, 2)
> +
> +local e = errno()
> +test:is(e, errno.ETIMEDOUT, "#3. Timeout")
> +
> +
> +local test2str = '123\n456\n789'
> +
> +app1:write(test2str)
> +rc,src,err = app1:read2(256, 2)
> +
> +test:is(err, nil, "#3. Reading ok")
> +test:is(src, fio.STDOUT, "#3. Read from STDOUT")
> +test:is(rc, test2str, "#3. Received exact string")
> +
> +rc,err = app1:kill('SIGHUP')
> +test:is(rc, true, "#3. Sending kill(1)")
> +
> +rc,err = app1:wait()
> +test:is(rc, -1, "#3. Process was killed")
> +
> +app1 = nil
> +
> +-- Test 4. Redirect from file
> +local build_path = os.getenv("BUILDDIR")
> +local txt_filename = fio.pathjoin(build_path, 'test/box-tap/fio_popen.sample.txt')
> +
> +local txt_file = fio.open(txt_filename, {'O_RDONLY'})
> +test:isnt(txt_file, nil, "#4. Open existing file for reading")
> +
> +app1 = fio.popen({argv = {'/bin/sh', '-c', 'cat'},
> +                  stdout=fio.PIPE,
> +                  stdin=txt_file,
> +                  stderr=fio.STDOUT})
> +test:isnt(app1, nil, "#4. Starting a existing application")
> +test:is(app1:get_stderr(), nil, "#4. STDERR is redirected")
> +test:isnt(app1:get_stdout(), nil, "#4. STDOUT is available")
> +test:is(app1:get_stdin(), nil, "#4. STDIN is redirected")
> +
> +rc = app1:get_status()
> +test:is(rc, nil, "#4. Process is running")
> +
> +rc,src,err = app1:read2(256, 2)
> +
> +test:is(src, fio.STDOUT, "#4. Read from STDOUT")
> +
> +local test2str = 'AAA\nBBB\nCCC\nDDD\n\n'
> +
> +test:is(rc, test2str, "#4. Received exact string")
> +
> +rc,err = app1:wait()
> +test:is(rc, 0, "#4. Process's exited")
> +
> +app1 = nil
> +txt_file:close()
> +
> +-- Test 5. Redirect output from one process to another
> +local app_path = fio.pathjoin(build_path, 'test/box-tap/fio_popen_test1.sh')
> +
> +app1 = fio.popen({argv = {'/bin/sh', '-c', app_path},
> +                  stdout=fio.PIPE,
> +                  stderr=fio.STDOUT})
> +
> +test:isnt(app1, nil, "#5. Starting application 1")
> +test:is(app1:get_stderr(), nil, "#5. STDERR is redirected")
> +test:isnt(app1:get_stdout(), nil, "#5. STDOUT is available")
> +test:isnt(app1:get_stdin(), nil, "#5. STDIN is available")
> +
> +fiber.sleep(1)
> +
> +local app2 = fio.popen({argv = {'/bin/sh', '-c', 'cat'},
> +                  stdout=fio.PIPE,
> +                  stdin=app1:get_stdout()})
> +
> +test:isnt(app2, nil, "#5. Starting application 2")
> +
> +local test2str = '1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n'
> +
> +rc = read_stdout(app2)

Please also test reading/writing/killing in case when the child is dead.

> +
> +test:is(rc, test2str, "#5. Received exact string")
> +
> +rc,err = app1:wait()
> +test:is(rc, 0, "#5. Process's exited 1")
> +
> +rc,err = app2:wait()
> +test:is(rc, 0, "#5. Process's exited 2")
> +
> +app1 = nil
> +app2 = nil
> +
> +-- --------------------------------------------------------------
> +test:check()
> +os.exit(0)
> +
> diff --git a/test/box-tap/fio_popen_test1.sh b/test/box-tap/fio_popen_test1.sh
> new file mode 100755
> index 000000000..da54ee19a
> --- /dev/null
> +++ b/test/box-tap/fio_popen_test1.sh
> @@ -0,0 +1,7 @@
> +#!/bin/bash
> +for i in {1..10}
> +do
> +	echo $i
> +#	sleep 1
> +done
> +