From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Alexander Turenko Subject: [PATCH] httpc: fix redirection on libcurl-7.30 and older Date: Mon, 8 Jul 2019 19:08:48 +0300 Message-Id: <1c4c99b6d79ca5b16bfe025f5b613ed2f4162e66.1562601541.git.alexander.turenko@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: Vladimir Davydov , Daniel Stenberg Cc: Alexander Turenko , tarantool-patches@freelists.org List-ID: libcurl-7.30 and older have a problem with handling redirection. libcurl postpones removing from an internal hash table and invoking CURLMOPT_SOCKETFUNCTION callback to remove a closed socket. If timings are so that a new socket is opened before the old one is fully removed and they have the same file descriptors, CURLMOPT_SOCKETFUNCTION callback will not be invoked for the new socket. See curl_easy_sock_opt_cb() comment for more information. The described situation will likely occur during redirection (say, at '302 Found' HTTP response) when a request is machine-local. The request will hang in the case. The issue was fixed in libcurl-7.31: see the [discussion][1], the [bug 1248][2] and the [fix][3] (after that libcurl react on socket close immediately). This commit provides a workaround for old libcurl versions. [1]: https://curl.haxx.se/mail/lib-2013-03/0045.html [2]: https://sourceforge.net/p/curl/bugs/1248/ [3]: https://github.com/curl/curl/commit/88c5c63ffc3312a8c8471b48a44ec5f50420f2e3 Fixes #4180. --- The patch provides workaround for old libcurl problem that was fixed in libcurl-7.31. This is needed, because, say, CentOS 7 has libcurl-7.29 in its repositories. Vladimir, can you please review my patch? I also added Daniel Stenberg to this discussion as author of the commit, which fixes this problem in libcurl. Daniel, I would appreciate if you can take glance to comments in this commit and give me pointers if I did some wrong assumptions about libcurl. Thanks in advance! issue: https://github.com/tarantool/tarantool/issues/4180 branch: https://github.com/tarantool/tarantool/tree/Totktonada/gh-4180-httpc-fix-redirection-full-ci src/curl.c | 168 ++++++++++++++++++++++++++++++ src/curl.h | 40 +++++++ test/app-tap/http_client.test.lua | 8 +- 3 files changed, 211 insertions(+), 5 deletions(-) diff --git a/src/curl.c b/src/curl.c index a41c65e4c..dc3de2948 100644 --- a/src/curl.c +++ b/src/curl.c @@ -37,6 +37,32 @@ #include "fiber.h" #include "errinj.h" +#if LIBCURL_PROBLEMATIC_REDIRECTION +/* + * We need a hash table to track ev_io watchers on our end, + * because libcurl-7.30 and older doing it incorrectly in the case + * when a newly open socket has the same file descriptor as the + * old one. + * + * See curl_easy_sock_opt_cb() for more information. + */ +#define mh_name _curl_watchers +#define mh_key_t int +#define mh_node_t struct ev_io * +#define mh_arg_t void * +#define mh_hash(node, arg) ((*node)->fd) +#define mh_hash_key(key, arg) (key) +#define mh_cmp(node_a, node_b, arg) ((*node_a)->fd - (*node_b)->fd) +#define mh_cmp_key(key, node, arg) ((key) - (*node)->fd) +#define MH_SOURCE 1 +#include "salad/mhash.h" + +/* CURL_SOCKOPT_OK was introduced in libcurl-7.21.5. */ +#ifndef CURL_SOCKOPT_OK +#define CURL_SOCKOPT_OK 0 +#endif +#endif /* LIBCURL_PROBLEMATIC_REDIRECTION */ + /** * Process events */ @@ -181,6 +207,12 @@ curl_multi_sock_cb(CURL *easy, curl_socket_t fd, int what, void *envp, if (what == CURL_POLL_REMOVE) { say_debug("curl %p: remove fd=%d", env, fd); assert(watcher != NULL); +#if LIBCURL_PROBLEMATIC_REDIRECTION + /* Remove a watcher from the hash table. */ + mh_int_t node = mh_curl_watchers_find(env->watchers, fd, NULL); + assert(node != mh_end(env->watchers)); + mh_curl_watchers_del(env->watchers, node, NULL); +#endif ev_io_stop(loop(), watcher); ++env->stat.sockets_deleted; mempool_free(&env->sock_pool, watcher); @@ -198,6 +230,20 @@ curl_multi_sock_cb(CURL *easy, curl_socket_t fd, int what, void *envp, watcher->data = env; ++env->stat.sockets_added; curl_multi_assign(env->multi, fd, watcher); +#if LIBCURL_PROBLEMATIC_REDIRECTION + /* Add a watcher to the hash table. */ + struct ev_io **old_watcher_ptr = NULL; + mh_int_t node = mh_curl_watchers_put( + env->watchers, (const struct ev_io **) &watcher, + &old_watcher_ptr, NULL); + if (node == mh_end(env->watchers)) { + diag_set(OutOfMemory, 0, "mh_curl_watchers_put", + "watcher"); + return -1; + } + assert(old_watcher_ptr == NULL); + (void) old_watcher_ptr; +#endif say_debug("curl %p: add fd=%d", env, fd); } @@ -218,6 +264,95 @@ curl_multi_sock_cb(CURL *easy, curl_socket_t fd, int what, void *envp, return 0; } +#if LIBCURL_PROBLEMATIC_REDIRECTION +/** + * libcurl callback for CURLOPT_SOCKOPTFUNCTION + * @see https://curl.haxx.se/libcurl/c/CURLOPT_SOCKOPTFUNCTION.html + * + * When libcurl handles redirection (say, '302 Found' HTTP + * response) it closes an old socket and open a new one (at least + * in some cases). libcurl-7.30 and older does not proceed with + * closed socket immediately: it postpones removing the closed + * socket from an internal hash and also postpones invoking an + * CURLMOPT_SOCKETFUNCTION callback (curl_multi_sock_cb()) with + * CURL_POLL_REMOVE. When a new socket is opened it is possible + * that those actions are not done yet. + * + * The new socket likely will get the same file descriptor as the + * old one. libcurl checks its internal hash by a file descriptor + * and judge that a socket is already opened. If needed events ( + * CURL_POLL_IN / CURL_POLL_OUT) are the same as ones that was set + * for the old socket, then libcurl will just wait for events. If + * they are not the same, then libcurl will call + * curl_multi_sock_cb() with a ev_io watcher that has empty epoll + * set (a socket is removed from an epoll set when closed). Both + * cases lead to hanging of the HTTP request. + * + * This callback is necessary to work this behaviour around. When + * a new socket is opened it checks whether a watcher for such + * file descriptor exists. If so the fuction removes it and + * registers a new watcher for the new socket. + * + * See gh-4180 for links and more information. + */ +static int +curl_easy_sock_opt_cb(void *envp, curl_socket_t fd, curlsocktype purpose) +{ + struct curl_env *env = (struct curl_env *) envp; + + /* + * libcurl calls the callback with CURLSOCKTYPE_IPCXN to + * set options for actively created connections. Other + * events are not possible here (at least at the moment of + * libcurl-7.65.1), but if future versions of libcurl will + * add more 'purposes', they are likely will unrelevant + * for us. Ignore them. + */ + if (purpose != CURLSOCKTYPE_IPCXN) + return CURL_SOCKOPT_OK; + + /* + * If there are no old watcher with the same file + * descriptor as the new one within the same curl multi + * handle, nothing to do here. + */ + mh_int_t node = mh_curl_watchers_find(env->watchers, fd, NULL); + if (node == mh_end(env->watchers)) + return CURL_SOCKOPT_OK; + + /* + * At this point we have an old watcher that was not + * properly removed and a new watcher that will not be + * added by curl itself due to the bug (because they have + * the same file descriptor). + */ + + struct ev_io *watcher = *mh_curl_watchers_node(env->watchers, node); + assert(watcher != NULL); + + /* + * libcurl will not ask us to poll for certain events if + * it judges that a watcher already polls for them. So we + * need to subscribe to the same events as an old watcher + * did. See singlesocket() in curl's lib/multi.c. + */ + const int events = watcher->events; + const int action = ((events & EV_READ ? CURL_POLL_IN : 0) | + (events & EV_WRITE ? CURL_POLL_OUT : 0)); + + /* + * Remove an old watcher and register a new one. + * + * Note: An easy handle is not used in + * curl_multi_sock_cb(), so we pass NULL here as the first + * argument. + */ + curl_multi_sock_cb(NULL, fd, CURL_POLL_REMOVE, env, watcher); + curl_multi_sock_cb(NULL, fd, action, env, NULL); + + return CURL_SOCKOPT_OK; +} +#endif /* LIBCURL_PROBLEMATIC_REDIRECTION */ int curl_env_create(struct curl_env *env, long max_conns, long max_total_conns) @@ -232,6 +367,15 @@ curl_env_create(struct curl_env *env, long max_conns, long max_total_conns) goto error_exit; } +#if LIBCURL_PROBLEMATIC_REDIRECTION + env->watchers = mh_curl_watchers_new(); + if (env->watchers == NULL) { + diag_set(OutOfMemory, 0, "mh_curl_socket_table_new", + "watchers"); + goto error_exit; + } +#endif + ev_init(&env->timer_event, curl_timer_cb); env->timer_event.data = (void *) env; curl_multi_setopt(env->multi, CURLMOPT_TIMERFUNCTION, @@ -260,6 +404,23 @@ void curl_env_destroy(struct curl_env *env) { assert(env); + +#if LIBCURL_PROBLEMATIC_REDIRECTION + /* + * If there are registered ev_io watchers, they will be + * freed with the mempool destroying, but we should stop + * them and account in statistics. + */ + mh_int_t node; + struct ev_io *watcher; + mh_foreach(env->watchers, node) { + watcher = *mh_curl_watchers_node(env->watchers, node); + curl_multi_sock_cb(NULL, watcher->fd, CURL_POLL_REMOVE, env, + watcher); + } + mh_curl_watchers_delete(env->watchers); +#endif + if (env->multi != NULL) curl_multi_cleanup(env->multi); @@ -298,6 +459,13 @@ curl_execute(struct curl_request *curl_request, struct curl_env *env, if (mcode != CURLM_OK) goto curl_merror; ERROR_INJECT_YIELD(ERRINJ_HTTP_RESPONSE_ADD_WAIT); + +#if LIBCURL_PROBLEMATIC_REDIRECTION + curl_easy_setopt(curl_request->easy, CURLOPT_SOCKOPTFUNCTION, + curl_easy_sock_opt_cb); + curl_easy_setopt(curl_request->easy, CURLOPT_SOCKOPTDATA, env); +#endif + /* Don't wait on a cond if request has already failed or finished. */ if (curl_request->code == CURLE_OK && curl_request->in_progress) { ++env->stat.active_requests; diff --git a/src/curl.h b/src/curl.h index ea50afe3e..6c8d52cfb 100644 --- a/src/curl.h +++ b/src/curl.h @@ -39,6 +39,39 @@ #include "diag.h" #include "fiber_cond.h" +/* + * The macro determines whether we need to workaround a libcurl + * bug with tracking open sockets. libcurl-7.30 and older are + * affected. + * + * We check for a libcurl version at compile time and enable the + * workaround when it is less then 7.30. Operating systems where a + * default libcurl version is less then that will receive a + * tarantool package with the workaround enabled, but everything + * will work even if a user will update libcurl to more recent + * binary compatible version (at the moment it is any above + * 7.16.0). The memory and performance overhead should be + * negligible for most cases. + * + * For operating systems where a default libcurl version is 7.31 + * or newer we build packages w/o this workaround. + * + * The bug can lead to 'hanging' of a request when a server + * responds with HTTP 3xx code (depends on timings, but often + * occurs with machine-local requests). See + * curl_easy_sock_opt_cb() in src/curl.c for more information. + * + * The workaround uses CURLOPT_SOCKOPTFUNCTION, so we also check + * for the minimum version where it is supported: libcurl-7.16.0. + */ +#define LIBCURL_PROBLEMATIC_REDIRECTION (LIBCURL_VERSION_NUM >= 0x071000 && \ + LIBCURL_VERSION_NUM < 0x071f00) + +/* Forward declarations. */ +#if LIBCURL_PROBLEMATIC_REDIRECTION +struct mh_curl_watchers_t; +#endif + /** * CURL Statistics */ @@ -60,6 +93,13 @@ struct curl_env { struct ev_timer timer_event; /** Statistics. */ struct curl_stat stat; +#if LIBCURL_PROBLEMATIC_REDIRECTION + /** + * Hash table of ev_io watchers with file descriptors as + * keys. + */ + struct mh_curl_watchers_t *watchers; +#endif }; /** diff --git a/test/app-tap/http_client.test.lua b/test/app-tap/http_client.test.lua index 680c78b35..2271b9e9c 100755 --- a/test/app-tap/http_client.test.lua +++ b/test/app-tap/http_client.test.lua @@ -62,7 +62,7 @@ local function stop_server(test, server) end local function test_http_client(test, url, opts) - test:plan(11) + test:plan(12) -- gh-4136: confusing httpc usage error message local ok, err = pcall(client.request, client) @@ -85,10 +85,9 @@ local function test_http_client(test, url, opts) local r = client.request('GET', url, nil, opts) test:is(r.status, 200, 'request') - -- XXX: enable after resolving of gh-4180: httpc: redirects - -- are broken with libcurl-7.30 and older - --[[ -- gh-4119: specify whether to follow 'Location' header + -- gh-4180: httpc: redirects are broken with libcurl-7.30 and + -- older test:test('gh-4119: follow location', function(test) test:plan(7) local endpoint = 'redirect' @@ -111,7 +110,6 @@ local function test_http_client(test, url, opts) test:is(r.body, 'redirecting', 'do not follow location: body') test:is(r.headers['location'], '/', 'do not follow location: header') end) - ]]-- end -- -- 2.22.0