[tarantool-patches] Re: [PATCH v1 1/1] http: fix httpc auto-managed headers

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Mon Feb 25 17:17:13 MSK 2019


Hi! Thanks for the patch! See 14 comments below.

On 25/02/2019 10:52, Kirill Shcherbatov wrote:
> I've reworked code a bit to set all user-defined headers before other httpc
> methods call to they have a priority.
> ================================================
> 
> The https library intelligently sets the headers "Accept",
> "Content-Length", "Connection", "Keep-Alive: timeout".
> However, when the user explicitly specified them in the header
> options section of the call argument, they could be written to
> the HTTP request twice.
> Was introduced service http c_managed_header_mask mask in which
> the bits corresponding to the http auto-manager headers, set 1
> when referenced. When the bit is set, the new value is not
> applied. In this way, we prefer user-defined value for all
> httpc-managed headers.
> 
> Closes #3955
> ---
> src/httpc.c | 123 +++++++++++++++++++++---------
> src/httpc.h | 24 +++++-
> src/lua/httpc.c | 45 ++++++-----
> test/app-tap/http_client.test.lua | 13 +++-
> 4 files changed, 145 insertions(+), 60 deletions(-)
> 
> diff --git a/src/httpc.c b/src/httpc.c
> index 950f8b32f..4685a712a 100644
> --- a/src/httpc.c
> +++ b/src/httpc.c
> @@ -95,8 +95,7 @@ httpc_env_destroy(struct httpc_env *ctx)
> }
> 
> struct httpc_request *
> -httpc_request_new(struct httpc_env *env, const char *method,
> - const char *url)
> +httpc_request_new(struct httpc_env *env)
> {
> struct httpc_request *req = mempool_alloc(&env->req_pool);
> if (req == NULL) {
> @@ -110,42 +109,7 @@ httpc_request_new(struct httpc_env *env, const char *method,
> region_create(&req->resp_body, &cord()->slabc);
> 
> if (curl_request_create(&req->curl_request) != 0)
> - return NULL;
> -
> - if (strcmp(method, "GET") == 0) {
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_HTTPGET, 1L);
> - } else if (strcmp(method, "HEAD") == 0) {
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_NOBODY, 1L);
> - } else if (strcmp(method, "POST") == 0 ||
> - strcmp(method, "PUT") == 0 ||
> - strcmp(method, "PATCH")) {
> - /*
> - * Set CURLOPT_POSTFIELDS to "" and CURLOPT_POSTFIELDSSIZE 0
> - * to avoid the read callback in any cases even if user
> - * forgot to call httpc_set_body() for POST request.
> - * @see https://curl.haxx.se/libcurl/c/CURLOPT_POSTFIELDS.html
> - */
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_POST, 1L);
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDS, "");
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDSIZE, 0);
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> - if (httpc_set_header(req, "Accept: */*") < 0)
> - goto error;
> - } else {
> - curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> - }

1. The email is utterly corrupted - all the indentation is lost. Please,
do not copypaste patches into an email without proper preparations.

For example, if you want to resend a patch with some comments, then either

     - Use 'create_commits.sh', or raw 'git format-patch' to create a patch
       email. Then edit it manually right in the file. Then send that file
       using 'git send-email' with '--in-reply-to=<MessageID>' option.

     Or

     - Use 'git --no-pager show/diff/... > patch.txt' to dump diff or any
       other git output into a file as is, without any changes of
       indentation, and 'less' editor view. Depending on your text editor,
       you may need to change indentation width and type in that file.
       Then you can copypaste that file into a new email.

Below I put a normal version.

> diff --git a/src/httpc.c b/src/httpc.c
> index 950f8b32f..4685a712a 100644
> --- a/src/httpc.c
> +++ b/src/httpc.c
> @@ -110,42 +109,7 @@ httpc_request_new(struct httpc_env *env, const char *method,
>  	region_create(&req->resp_body, &cord()->slabc);
>  
>  	if (curl_request_create(&req->curl_request) != 0)
> -		return NULL;
> -
> -	if (strcmp(method, "GET") == 0) {
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_HTTPGET, 1L);
> -	} else if (strcmp(method, "HEAD") == 0) {
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_NOBODY, 1L);
> -	} else if (strcmp(method, "POST") == 0 ||
> -		   strcmp(method, "PUT") == 0 ||
> -		   strcmp(method, "PATCH")) {
> -		/*
> -		 * Set CURLOPT_POSTFIELDS to "" and CURLOPT_POSTFIELDSSIZE 0
> -		 * to avoid the read callback in any cases even if user
> -		 * forgot to call httpc_set_body() for POST request.
> -		 * @see https://curl.haxx.se/libcurl/c/CURLOPT_POSTFIELDS.html
> -		 */
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_POST, 1L);
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDS, "");
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDSIZE, 0);
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> -		if (httpc_set_header(req, "Accept: */*") < 0)
> -			goto error;
> -	} else {
> -		curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> -	}
> -
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_URL, url);
> -
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_FOLLOWLOCATION, 1);
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_SSL_VERIFYPEER, 1);
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_WRITEFUNCTION,
> -			 curl_easy_write_cb);
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_HEADERFUNCTION,
> -			 curl_easy_header_cb);
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_NOPROGRESS, 1L);
> -	curl_easy_setopt(req->curl_request.easy, CURLOPT_HTTP_VERSION,
> -			 CURL_HTTP_VERSION_1_1);
> +		goto error;

2. In such a small function it is already clear, that you
do not need this single 'goto error'. Just process that error
here.

>  
>  	ibuf_create(&req->body, &cord()->slabc, 1);
>  
> @@ -170,6 +134,43 @@ httpc_request_delete(struct httpc_request *req)
>  	mempool_free(&req->env->req_pool, req);
>  }
>  
> +/**
> + * Update bitmask of the http request headers that httpc may set
> + * automatically. In case of reserved pattern is found in header,
> + * routine sets corresponding bit in auto_headers_mask.
> + * Returns -1 when header is reserved and it's bit is already set
> + * in auto_headers_mask; 0 otherwise,
> +*/

3. Use doxygen style.

> +static int
> +httpc_managed_headers_mask_update(uint8_t *auto_headers_mask,
> +				  const char *header)

4. I do not understand the name. Is it supposed to be a check?
Then, firstly, for checks we usually use '<object>_is/has_<property>'
naming template. But anyway it is not a check, because it changes
the mask. Come up with a better name, or use, for example, this:

     httpc_set_header_bit

> +{
> +	static struct {
> +		const char *header_name;
> +		uint32_t header_name_len;

5. The code below looks like Java. Please, rename to just 'name'
and 'len'.

> +	} httpc_managed_headers[] = {
> +		{"Connection",		10},
> +		{"Keep-Alive",		10},
> +		{"Content-Length",	14},
> +		{"Accept",		6},

6. I do not see any point in storing lengths here, because
anyway you can not use memcmp. Strncasecmp below will check
for terminating zero in both parameters anyway.

> +	};
> +	uint32_t httpc_managed_headers_cnt =
> +		sizeof(httpc_managed_headers)/sizeof(httpc_managed_headers[0]);

7. We have lengthof in trivia/utils.h.

> +	assert(httpc_managed_headers_cnt <
> +	       sizeof(*auto_headers_mask) * CHAR_BIT);
> +	for (uint32_t i = 0; i < httpc_managed_headers_cnt; i++) {

8. Please, do not use non standard types without necessity.
https://github.com/tarantool/tarantool/issues/2161

> +		if (strncasecmp(header, httpc_managed_headers[i].header_name,
> +				httpc_managed_headers[i].header_name_len) != 0)

9. As I said at least 2 times for one of previous issues, strncmp does
not work properly without additional checks. Your code will consider
'Acceptttttt' and 'Accept' to be equal. Please, fix that and add a test.

> +			continue;
> +		int8_t bit_index = 1 << i;
> +		if ((*auto_headers_mask & bit_index) != 0)
> +			return -1;
> +		*auto_headers_mask |= bit_index;
> +		return 0;
> +	}

10. I propose you to store const headers of httpc_managed_headers in a sorted
order. Sorted by the header names. Then, if during an iteration in the cycle
above you see, that strncasecmp() returned < 0, you can return without
further checks. I hope, that you understand why.

Another option is a binary search, but I think, that it is overkill here.

> +	return 0;
> +}
> +
>  int
>  httpc_set_header(struct httpc_request *req, const char *fmt, ...)
>  {
> @@ -187,6 +193,47 @@ httpc_set_header(struct httpc_request *req, const char *fmt, ...)
>  	return 0;
>  }
>  
> +int
> +httpc_set_resource(struct httpc_request *req, const char *method,
> +		   const char *url)

11. Firstly, it is a method of http_request, and should have
prefix 'http_request_'. But as I see, it is a common style
violation for this library, so lets turn the blind eye to it.

Secondly, personally I would like 'httpc_set_url', because it
looks shorter. And because 'resource' confused me at a first
glance.

Thirdly, please, move that function implementation below
httpc_request_new to reduce diff.

> +{
> +	if (strcmp(method, "GET") == 0) {
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_HTTPGET, 1L);
> +	} else if (strcmp(method, "HEAD") == 0) {
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_NOBODY, 1L);
> +	} else if (strcmp(method, "POST") == 0 ||
> +		   strcmp(method, "PUT") == 0 ||
> +		   strcmp(method, "PATCH")) {
> +		/*
> +		 * Set CURLOPT_POSTFIELDS to "" and CURLOPT_POSTFIELDSSIZE 0
> +		 * to avoid the read callback in any cases even if user
> +		 * forgot to call httpc_set_body() for POST request.
> +		 * @see https://curl.haxx.se/libcurl/c/CURLOPT_POSTFIELDS.html
> +		 */
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_POST, 1L);
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDS, "");
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_POSTFIELDSIZE, 0);
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> +		if (httpc_set_header(req, "Accept: */*") < 0)
> +			return -1;
> +	} else {
> +		curl_easy_setopt(req->curl_request.easy, CURLOPT_CUSTOMREQUEST, method);
> +	}
> +
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_URL, url);
> +
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_FOLLOWLOCATION, 1);
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_SSL_VERIFYPEER, 1);
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_WRITEFUNCTION,
> +			 curl_easy_write_cb);
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_HEADERFUNCTION,
> +			 curl_easy_header_cb);
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_NOPROGRESS, 1L);
> +	curl_easy_setopt(req->curl_request.easy, CURLOPT_HTTP_VERSION,
> +			 CURL_HTTP_VERSION_1_1);
> +	return 0;
> +}
> +
>  int
>  httpc_set_body(struct httpc_request *req, const char *body, size_t size)
>  {
> diff --git a/src/httpc.h b/src/httpc.h
> index dc709e6fa..8b400ae43 100644
> --- a/src/httpc.h
> +++ b/src/httpc.h
> @@ -109,16 +109,23 @@ struct httpc_request {
>  	struct region resp_headers;
>  	/** buffer of body */
>  	struct region resp_body;
> +	/**
> +	 * A bitmask of the http request headers from among
> +	 * the headers that httpc can set automatically.
> +	 * This allows to prevent settings such headers twice.
> +	 * Read httpc_managed_headers_mask_update definition for
> +	 * more details.

12. Do not put references 'for details' to some internal
functions. They likely to be changed, or renamed, or deleted,
and your reference will be useless. Explain here everything.

> +	 */
> +	uint8_t auto_headers_mask;
>  };
> @@ -137,6 +144,17 @@ httpc_request_delete(struct httpc_request *req);
>  int
>  httpc_set_header(struct httpc_request *req, const char *fmt, ...);
>  
> +/**
> + * Set  HTTP method and destination url, set service headers.
> + * @param req request
> + * @param method HTTP request method (GET, HEAD, POST, PUT...)
> + * @param url The network resource URL address string.
> + * @retval 0 for success, -1 otherwise

13.
- Please, start each @<option> description with a capital letter,
   and finish with a dot;

- Please, use separate @retval for different return values
   description.

- Do not put double whitespaces in your comments. Here and in other
   places too.

> + */
> +int
> +httpc_set_resource(struct httpc_request *req, const char *method,
> +		   const char *url);
> +
>  /**
>   * Sets body of request
>   * @param req request
> diff --git a/test/app-tap/http_client.test.lua b/test/app-tap/http_client.test.lua
> index 10a3728f8..26119fa73 100755
> --- a/test/app-tap/http_client.test.lua
> +++ b/test/app-tap/http_client.test.lua
> @@ -79,6 +79,15 @@ local function test_http_client(test, url, opts)
>      test:is(r.status, 200, 'request')
>  end
>  
> +local function test_http_client_headers_redefine(test, url, opts)

14. Setting a new test out, please, write a comment about what this
test tests, and put a github issue number in it. Conforming our
test comments style.

> +    test:plan(3)
> +    opts.headers={['Connection'] = 'Keep-Alive', ['Accept'] = 'text/html'}
> +    local r = client.get(url, opts)
> +    test:is(r.status, 200, 'simple 200')
> +    test:is(r.headers["accept"], 'text/html', 'redefined Accept header')
> +    test:is(r.headers["connection"], 'Keep-Alive', 'redefined Connection header')
> +end
> +




More information about the Tarantool-patches mailing list