[patches] [http 1/1] http: adapt nginx http headers parser

v.shpilevoy at tarantool.org v.shpilevoy at tarantool.org
Tue Feb 13 16:43:04 MSK 2018


Ack.

> 13 февр. 2018 г., в 15:07, imarkov <imarkov at tarantool.org> написал(а):
> 
> From: Ilya <markovilya197 at gmail.com>
> 
> * delete old small parser with nginx tested one
> * functionality is not changed
> 
> Signed-off-by: imarkov <imarkov at tarantool.org>
> ---
> src/CMakeLists.txt |   1 +
> src/http_parser.c  | 399 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> src/http_parser.h  |  66 +++++++++
> src/lua/httpc.c    |  68 ++++++++-
> src/lua/httpc.lua  |  47 ++-----
> 5 files changed, 540 insertions(+), 41 deletions(-)
> create mode 100644 src/http_parser.c
> create mode 100644 src/http_parser.h
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index e5acef7..fe99b44 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -93,6 +93,7 @@ set (core_sources
>      util.c
>      random.c
>      trigger.cc
> +     http_parser.c
>  )
> 
> if (TARGET_OS_NETBSD)
> diff --git a/src/http_parser.c b/src/http_parser.c
> new file mode 100644
> index 0000000..7166903
> --- /dev/null
> +++ b/src/http_parser.c
> @@ -0,0 +1,399 @@
> +/*
> + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + *    copyright notice, this list of conditions and the
> + *    following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <string.h>
> +#include "httpc.h"
> +#include "http_parser.h"
> +
> +#define LF     (unsigned char) '\n'
> +#define CR     (unsigned char) '\r'
> +#define CRLF   "\r\n"
> +
> +/**
> + * Following http parser functions were taken with slight
> + * adaptation from nginx http parser module
> + */
> +
> +/**
> + * Utility function used in headers parsing
> + */
> +static int
> +http_parse_status_line(struct http_parser *parser, char **bufp,
> +		       const char *end_buf)
> +{
> +	char ch;
> +	char *p = *bufp;
> +	enum {
> +		sw_start = 0,
> +		sw_H,
> +		sw_HT,
> +		sw_HTT,
> +		sw_HTTP,
> +		sw_first_major_digit,
> +		sw_major_digit,
> +		sw_first_minor_digit,
> +		sw_minor_digit,
> +		sw_status,
> +		sw_space_after_status,
> +		sw_status_text,
> +		sw_almost_done
> +	} state;
> +
> +	state = sw_start;
> +	int status_count = 0;
> +	for (;p < end_buf; p++) {
> +		ch = *p;
> +		switch (state) {
> +		/* "HTTP/" */
> +		case sw_start:
> +			if (ch == 'H')
> +				state = sw_H;
> +			else
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		case sw_H:
> +			if (ch == 'T')
> +				state = sw_HT;
> +			else
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		case sw_HT:
> +			if (ch == 'T')
> +				state = sw_HTT;
> +			else
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		case sw_HTT:
> +			if (ch == 'P')
> +				state = sw_HTTP;
> +			else
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		case sw_HTTP:
> +			if (ch == '/')
> +				state = sw_first_major_digit;
> +			else
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		/* The first digit of major HTTP version */
> +		case sw_first_major_digit:
> +			if (ch < '1' || ch > '9') {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			parser->http_major = ch - '0';
> +			state = sw_major_digit;
> +			break;
> +		/* The major HTTP version or dot */
> +		case sw_major_digit:
> +			if (ch == '.') {
> +				state = sw_first_minor_digit;
> +				break;
> +			}
> +			if (ch < '0' || ch > '9') {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			if (parser->http_major > 99) {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			parser->http_major = parser->http_major * 10
> +					     + (ch - '0');
> +			break;
> +		/* The first digit of minor HTTP version */
> +		case sw_first_minor_digit:
> +			if (ch < '0' || ch > '9') {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			parser->http_minor = ch - '0';
> +			state = sw_minor_digit;
> +			break;
> +		/*
> +		 * The minor HTTP version or
> +		 * the end of the request line
> +		 */
> +		case sw_minor_digit:
> +			if (ch == ' ') {
> +				state = sw_status;
> +				break;
> +			}
> +			if (ch < '0' || ch > '9') {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			if (parser->http_minor > 99) {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			parser->http_minor = parser->http_minor * 10
> +					     + (ch - '0');
> +			break;
> +		/* HTTP status code */
> +		case sw_status:
> +			if (ch == ' ') {
> +				break;
> +			}
> +			if (ch < '0' || ch > '9') {
> +				return HTTP_PARSE_INVALID;
> +			}
> +			if (++status_count == 3) {
> +				state = sw_space_after_status;
> +			}
> +			break;
> +		/* Space or end of line */
> +		case sw_space_after_status:
> +			switch (ch) {
> +			case ' ':
> +				state = sw_status_text;
> +				break;
> +			case '.':
> +				/* IIS may send 403.1, 403.2, etc */
> +				state = sw_status_text;
> +				break;
> +			case CR:
> +				state = sw_almost_done;
> +				break;
> +			case LF:
> +				goto done;
> +			default:
> +				return HTTP_PARSE_INVALID;
> +			}
> +			break;
> +		/* Any text until end of line */
> +		case sw_status_text:
> +			switch (ch) {
> +			case CR:
> +				state = sw_almost_done;
> +				break;
> +			case LF:
> +				goto done;
> +			}
> +			break;
> +
> +		/* End of status line */
> +		case sw_almost_done:
> +			switch (ch) {
> +			case LF:
> +				goto done;
> +			default:
> +				return HTTP_PARSE_INVALID;
> +			}
> +		}
> +	}
> +done:
> +	*bufp = p + 1;
> +	return HTTP_PARSE_OK;
> +}
> +
> +int
> +http_parse_header_line(struct http_parser *parser, char **bufp,
> +		       const char *end_buf)
> +{
> +	char c, ch;
> +	char *p = *bufp;
> +	char *header_name_start = p;
> +	parser->header_name_idx = 0;
> +
> +	enum {
> +		sw_start = 0,
> +		sw_name,
> +		sw_space_before_value,
> +		sw_value,
> +		sw_space_after_value,
> +		sw_almost_done,
> +		sw_header_almost_done
> +	} state = sw_start;
> +
> +	/*
> +	 * The last '\0' is not needed
> +	 * because string is zero terminated
> +	 */
> +	static char lowcase[] =
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789"
> +			"\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0"
> +			"abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> +			"\0\0\0\0\0\0\0\0\0\0";
> +
> +	for (; p < end_buf; p++) {
> +		ch = *p;
> +		switch (state) {
> +		/* first char */
> +		case sw_start:
> +			switch (ch) {
> +			case CR:
> +				parser->header_value_end = p;
> +				state = sw_header_almost_done;
> +				break;
> +			case LF:
> +				parser->header_value_end = p;
> +				goto header_done;
> +			default:
> +				state = sw_name;
> +
> +				c = lowcase[ch];
> +				if (c != 0) {
> +					parser->header_name[0] = c;
> +					parser->header_name_idx = 1;
> +					break;
> +				}
> +				if (ch == '\0') {
> +					return HTTP_PARSE_INVALID;
> +				}
> +				break;
> +			}
> +			break;
> +		/* http_header name */
> +		case sw_name:
> +			c = lowcase[ch];
> +			if (c != 0) {
> +				parser->header_name[parser->header_name_idx] = c;
> +				parser->header_name_idx++;
> +				parser->header_name_idx &= (HEADER_LEN - 1);
> +				break;
> +			}
> +			if (ch == ':') {
> +				state = sw_space_before_value;
> +				break;
> +			}
> +			if (ch == CR) {
> +				parser->header_value_start = p;
> +				parser->header_value_end = p;
> +				state = sw_almost_done;
> +				break;
> +			}
> +			if (ch == LF) {
> +				parser->header_value_start = p;
> +				parser->header_value_end = p;
> +				goto done;
> +			}
> +			/* handle "HTTP/1.1 ..." lines */
> +			if (ch == '/' && p - header_name_start == 4 &&
> +				strncmp(header_name_start, "HTTP", 4) == 0) {
> +				int rc = http_parse_status_line(parser,
> +							&header_name_start,
> +							end_buf);
> +				if (rc == HTTP_PARSE_INVALID) {
> +					parser->http_minor = -1;
> +					parser->http_major = -1;
> +				}
> +				state = sw_start;
> +				break;
> +			}
> +			if (ch == '\0')
> +				return HTTP_PARSE_INVALID;
> +			break;
> +		/* space* before http_header value */
> +		case sw_space_before_value:
> +			switch (ch) {
> +			case ' ':
> +				break;
> +			case CR:
> +				parser->header_value_start = p;
> +				parser->header_value_end = p;
> +				state = sw_almost_done;
> +				break;
> +			case LF:
> +				parser->header_value_start = p;
> +				parser->header_value_end = p;
> +				goto done;
> +			case '\0':
> +				return HTTP_PARSE_INVALID;
> +			default:
> +				parser->header_value_start = p;
> +				state = sw_value;
> +				break;
> +			}
> +			break;
> +
> +		/* http_header value */
> +		case sw_value:
> +			switch (ch) {
> +			case ' ':
> +				parser->header_value_end = p;
> +				state = sw_space_after_value;
> +				break;
> +			case CR:
> +				parser->header_value_end = p;
> +				state = sw_almost_done;
> +				break;
> +			case LF:
> +				parser->header_value_end = p;
> +				goto done;
> +			case '\0':
> +				return HTTP_PARSE_INVALID;
> +			}
> +			break;
> +		/* space* before end of http_header line */
> +		case sw_space_after_value:
> +			switch (ch) {
> +			case ' ':
> +				break;
> +			case CR:
> +				state = sw_almost_done;
> +				break;
> +			case LF:
> +				goto done;
> +			case '\0':
> +				return HTTP_PARSE_INVALID;
> +			default:
> +				state = sw_value;
> +				break;
> +			}
> +			break;
> +		/* end of http_header line */
> +		case sw_almost_done:
> +			switch (ch) {
> +			case LF:
> +				goto done;
> +			case CR:
> +				break;
> +			default:
> +				return HTTP_PARSE_INVALID;
> +			}
> +			break;
> +		/* end of http_header */
> +		case sw_header_almost_done:
> +			if (ch == LF)
> +				goto header_done;
> +			else
> +				return HTTP_PARSE_INVALID;
> +		}
> +	}
> +
> +done:
> +	*bufp = p + 1;
> +	return HTTP_PARSE_OK;
> +
> +header_done:
> +	*bufp = p + 1;
> +	return HTTP_PARSE_DONE;
> +}
> diff --git a/src/http_parser.h b/src/http_parser.h
> new file mode 100644
> index 0000000..5e20f53
> --- /dev/null
> +++ b/src/http_parser.h
> @@ -0,0 +1,66 @@
> +/*
> + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + *    copyright notice, this list of conditions and the
> + *    following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#ifndef TARANTOOL_HTTP_PARSER_H
> +#define TARANTOOL_HTTP_PARSER_H
> +
> +#define HEADER_LEN 32
> +
> +enum {
> +	HTTP_PARSE_OK,
> +	HTTP_PARSE_DONE,
> +	HTTP_PARSE_INVALID
> +};
> +
> +struct http_parser {
> +	char *header_value_start;
> +	char *header_value_end;
> +
> +	int http_major;
> +	int http_minor;
> +
> +	char header_name[HEADER_LEN];
> +	int header_name_idx;
> +};
> +
> +/*
> + * @brief Parse line containing http header info
> + * @param parser object
> + * @param bufp pointer to buffer with data
> + * @param end_buf
> + * @return	HTTP_DONE - line was parsed
> + * 		HTTP_OK - header was read
> + * 		HTTP_PARSE_INVALID - error during parsing
> + */
> +int
> +http_parse_header_line(struct http_parser *parser, char **bufp, const char *end_buf);
> +
> +#endif //TARANTOOL_HTTP_PARSER_H
> diff --git a/src/lua/httpc.c b/src/lua/httpc.c
> index 76b3d00..45abb98 100644
> --- a/src/lua/httpc.c
> +++ b/src/lua/httpc.c
> @@ -34,6 +34,7 @@
>  */
> #define DRIVER_LUA_UDATA_NAME	"httpc"
> 
> +#include <http_parser.h>
> #include "src/httpc.h"
> #include "say.h"
> #include "lua/utils.h"
> @@ -58,6 +59,69 @@ lua_add_key_u64(lua_State *L, const char *key, uint64_t value)
> 	lua_pushinteger(L, value);
> 	lua_settable(L, -3);
> }
> +
> +static void
> +parse_headers(lua_State *L, char *buffer, size_t len)
> +{
> +	struct http_parser parser;
> +	char *end_buf = buffer + len;
> +	lua_pushstring(L, "headers");
> +	lua_newtable(L);
> +	while (true) {
> +		int rc = http_parse_header_line(&parser, &buffer, end_buf);
> +		if (rc == HTTP_PARSE_INVALID) {
> +			continue;
> +		}
> +		if (rc == HTTP_PARSE_DONE) {
> +			break;
> +		}
> +
> +		if (rc == HTTP_PARSE_OK) {
> +			lua_pushlstring(L, parser.header_name,
> +					parser.header_name_idx);
> +
> +			/* check value of header, if exists */
> +			lua_pushlstring(L, parser.header_name,
> +					parser.header_name_idx);
> +			lua_gettable(L, -3);
> +			int value_len = parser.header_value_end -
> +						parser.header_value_start;
> +			/* table of values to handle duplicates*/
> +			if (lua_isnil(L, -1)) {
> +				lua_pop(L, 1);
> +				lua_newtable(L);
> +				lua_pushinteger(L, 1);
> +				lua_pushlstring(L, parser.header_value_start,
> +						value_len);
> +				lua_settable(L, -3);
> +			} else if (lua_istable(L, -1)) {
> +				lua_pushinteger(L, lua_objlen(L, -1) + 1);
> +				lua_pushlstring(L, parser.header_value_start,
> +						value_len);
> +				lua_settable(L, -3);
> +			}
> +			/*headers[parser.header] = {value}*/
> +			lua_settable(L, -3);
> +		}
> +	}
> +
> +	/* headers */
> +	lua_settable(L, -3);
> +
> +	lua_pushstring(L, "proto");
> +
> +	lua_newtable(L);
> +	lua_pushinteger(L, 1);
> +	lua_pushinteger(L, (parser.http_major > 0) ? parser.http_major: 0);
> +	lua_settable(L, -3);
> +
> +	lua_pushinteger(L, 2);
> +	lua_pushinteger(L, (parser.http_minor > 0) ? parser.http_minor: 0);
> +	lua_settable(L, -3);
> +
> +	/* proto */
> +	lua_settable(L, -3);
> +}
> /* }}}
>  */
> 
> @@ -215,9 +279,7 @@ luaT_httpc_request(lua_State *L)
> 			httpc_request_delete(req);
> 			return luaT_error(L);
> 		}
> -		lua_pushstring(L, "headers");
> -		lua_pushlstring(L, headers, headers_len);
> -		lua_settable(L, -3);
> +		parse_headers(L, headers, headers_len);
> 	}
> 
> 	size_t body_len = region_used(&req->resp_body);
> diff --git a/src/lua/httpc.lua b/src/lua/httpc.lua
> index 07ef395..3ddd3e7 100644
> --- a/src/lua/httpc.lua
> +++ b/src/lua/httpc.lua
> @@ -103,46 +103,17 @@ local special_headers = {
>     ["user-agent"] = true,
> }
> 
> -local function parse_list(list)
> -    local result = {}
> -    for _,str in pairs(list) do
> -        local h = str:split(':', 1)
> -        if #h > 1 then
> -            local key = h[1]:lower()
> -            local val = string.gsub(h[2], "^%s*(.-)%s*$", "%1")
> -            local prev_val = result[key]
> -            -- pack headers
> -            if not special_headers[key] then
> -                if prev_val == nil then
> -                    result[key] = {}
> -                    table.insert(result[key], val)
> -                else
> -                    table.insert(prev_val, val)
> -                end
> -            else if not prev_val then
> -                result[key] = val
> -               end
> +local function process_headers(headers)
> +    for header, value in pairs(headers) do
> +        if type(value) == 'table' then
> +            if special_headers[header] then
> +                headers[header] = value[1]
> +            else
> +                headers[header] = table.concat(value, ',')
>             end
> -        elseif string.match(str, "HTTP/%d%.%d %d%d%d") then
> -            result = {}
>         end
>     end
> -
> -    for key, value in pairs(result) do
> -        if not special_headers[key] then
> -            result[key] = table.concat(result[key], ",")
> -        end
> -    end
> -    return result
> -end
> -
> -local function parse_headers(resp)
> -    local list = resp.headers:split('\r\n')
> -    local h1 = table.remove(list, 1):split(' ')
> -    local proto = h1[1]:split('/')[2]:split('.')
> -    resp.proto = { tonumber(proto[1]), tonumber(proto[2]) }
> -    resp.headers = parse_list(list)
> -    return resp
> +    return headers
> end
> 
> --
> @@ -214,7 +185,7 @@ curl_mt = {
>             end
>             local resp = self.curl:request(method, url, body, opts or {})
>             if resp and resp.headers then
> -                resp = parse_headers(resp)
> +                resp.headers = process_headers(resp.headers)
>             end
>             return resp
>         end,
> -- 
> 2.7.4
> 




More information about the Tarantool-patches mailing list