[patches] [http 1/1] http: adapt nginx http headers parser
v.shpilevoy at tarantool.org
v.shpilevoy at tarantool.org
Tue Feb 13 16:43:04 MSK 2018
Ack.
> 13 февр. 2018 г., в 15:07, imarkov <imarkov at tarantool.org> написал(а):
>
> From: Ilya <markovilya197 at gmail.com>
>
> * delete old small parser with nginx tested one
> * functionality is not changed
>
> Signed-off-by: imarkov <imarkov at tarantool.org>
> ---
> src/CMakeLists.txt | 1 +
> src/http_parser.c | 399 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> src/http_parser.h | 66 +++++++++
> src/lua/httpc.c | 68 ++++++++-
> src/lua/httpc.lua | 47 ++-----
> 5 files changed, 540 insertions(+), 41 deletions(-)
> create mode 100644 src/http_parser.c
> create mode 100644 src/http_parser.h
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index e5acef7..fe99b44 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -93,6 +93,7 @@ set (core_sources
> util.c
> random.c
> trigger.cc
> + http_parser.c
> )
>
> if (TARGET_OS_NETBSD)
> diff --git a/src/http_parser.c b/src/http_parser.c
> new file mode 100644
> index 0000000..7166903
> --- /dev/null
> +++ b/src/http_parser.c
> @@ -0,0 +1,399 @@
> +/*
> + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the
> + * following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <string.h>
> +#include "httpc.h"
> +#include "http_parser.h"
> +
> +#define LF (unsigned char) '\n'
> +#define CR (unsigned char) '\r'
> +#define CRLF "\r\n"
> +
> +/**
> + * Following http parser functions were taken with slight
> + * adaptation from nginx http parser module
> + */
> +
> +/**
> + * Utility function used in headers parsing
> + */
> +static int
> +http_parse_status_line(struct http_parser *parser, char **bufp,
> + const char *end_buf)
> +{
> + char ch;
> + char *p = *bufp;
> + enum {
> + sw_start = 0,
> + sw_H,
> + sw_HT,
> + sw_HTT,
> + sw_HTTP,
> + sw_first_major_digit,
> + sw_major_digit,
> + sw_first_minor_digit,
> + sw_minor_digit,
> + sw_status,
> + sw_space_after_status,
> + sw_status_text,
> + sw_almost_done
> + } state;
> +
> + state = sw_start;
> + int status_count = 0;
> + for (;p < end_buf; p++) {
> + ch = *p;
> + switch (state) {
> + /* "HTTP/" */
> + case sw_start:
> + if (ch == 'H')
> + state = sw_H;
> + else
> + return HTTP_PARSE_INVALID;
> + break;
> + case sw_H:
> + if (ch == 'T')
> + state = sw_HT;
> + else
> + return HTTP_PARSE_INVALID;
> + break;
> + case sw_HT:
> + if (ch == 'T')
> + state = sw_HTT;
> + else
> + return HTTP_PARSE_INVALID;
> + break;
> + case sw_HTT:
> + if (ch == 'P')
> + state = sw_HTTP;
> + else
> + return HTTP_PARSE_INVALID;
> + break;
> + case sw_HTTP:
> + if (ch == '/')
> + state = sw_first_major_digit;
> + else
> + return HTTP_PARSE_INVALID;
> + break;
> + /* The first digit of major HTTP version */
> + case sw_first_major_digit:
> + if (ch < '1' || ch > '9') {
> + return HTTP_PARSE_INVALID;
> + }
> + parser->http_major = ch - '0';
> + state = sw_major_digit;
> + break;
> + /* The major HTTP version or dot */
> + case sw_major_digit:
> + if (ch == '.') {
> + state = sw_first_minor_digit;
> + break;
> + }
> + if (ch < '0' || ch > '9') {
> + return HTTP_PARSE_INVALID;
> + }
> + if (parser->http_major > 99) {
> + return HTTP_PARSE_INVALID;
> + }
> + parser->http_major = parser->http_major * 10
> + + (ch - '0');
> + break;
> + /* The first digit of minor HTTP version */
> + case sw_first_minor_digit:
> + if (ch < '0' || ch > '9') {
> + return HTTP_PARSE_INVALID;
> + }
> + parser->http_minor = ch - '0';
> + state = sw_minor_digit;
> + break;
> + /*
> + * The minor HTTP version or
> + * the end of the request line
> + */
> + case sw_minor_digit:
> + if (ch == ' ') {
> + state = sw_status;
> + break;
> + }
> + if (ch < '0' || ch > '9') {
> + return HTTP_PARSE_INVALID;
> + }
> + if (parser->http_minor > 99) {
> + return HTTP_PARSE_INVALID;
> + }
> + parser->http_minor = parser->http_minor * 10
> + + (ch - '0');
> + break;
> + /* HTTP status code */
> + case sw_status:
> + if (ch == ' ') {
> + break;
> + }
> + if (ch < '0' || ch > '9') {
> + return HTTP_PARSE_INVALID;
> + }
> + if (++status_count == 3) {
> + state = sw_space_after_status;
> + }
> + break;
> + /* Space or end of line */
> + case sw_space_after_status:
> + switch (ch) {
> + case ' ':
> + state = sw_status_text;
> + break;
> + case '.':
> + /* IIS may send 403.1, 403.2, etc */
> + state = sw_status_text;
> + break;
> + case CR:
> + state = sw_almost_done;
> + break;
> + case LF:
> + goto done;
> + default:
> + return HTTP_PARSE_INVALID;
> + }
> + break;
> + /* Any text until end of line */
> + case sw_status_text:
> + switch (ch) {
> + case CR:
> + state = sw_almost_done;
> + break;
> + case LF:
> + goto done;
> + }
> + break;
> +
> + /* End of status line */
> + case sw_almost_done:
> + switch (ch) {
> + case LF:
> + goto done;
> + default:
> + return HTTP_PARSE_INVALID;
> + }
> + }
> + }
> +done:
> + *bufp = p + 1;
> + return HTTP_PARSE_OK;
> +}
> +
> +int
> +http_parse_header_line(struct http_parser *parser, char **bufp,
> + const char *end_buf)
> +{
> + char c, ch;
> + char *p = *bufp;
> + char *header_name_start = p;
> + parser->header_name_idx = 0;
> +
> + enum {
> + sw_start = 0,
> + sw_name,
> + sw_space_before_value,
> + sw_value,
> + sw_space_after_value,
> + sw_almost_done,
> + sw_header_almost_done
> + } state = sw_start;
> +
> + /*
> + * The last '\0' is not needed
> + * because string is zero terminated
> + */
> + static char lowcase[] =
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789"
> + "\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0"
> + "abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
> + "\0\0\0\0\0\0\0\0\0\0";
> +
> + for (; p < end_buf; p++) {
> + ch = *p;
> + switch (state) {
> + /* first char */
> + case sw_start:
> + switch (ch) {
> + case CR:
> + parser->header_value_end = p;
> + state = sw_header_almost_done;
> + break;
> + case LF:
> + parser->header_value_end = p;
> + goto header_done;
> + default:
> + state = sw_name;
> +
> + c = lowcase[ch];
> + if (c != 0) {
> + parser->header_name[0] = c;
> + parser->header_name_idx = 1;
> + break;
> + }
> + if (ch == '\0') {
> + return HTTP_PARSE_INVALID;
> + }
> + break;
> + }
> + break;
> + /* http_header name */
> + case sw_name:
> + c = lowcase[ch];
> + if (c != 0) {
> + parser->header_name[parser->header_name_idx] = c;
> + parser->header_name_idx++;
> + parser->header_name_idx &= (HEADER_LEN - 1);
> + break;
> + }
> + if (ch == ':') {
> + state = sw_space_before_value;
> + break;
> + }
> + if (ch == CR) {
> + parser->header_value_start = p;
> + parser->header_value_end = p;
> + state = sw_almost_done;
> + break;
> + }
> + if (ch == LF) {
> + parser->header_value_start = p;
> + parser->header_value_end = p;
> + goto done;
> + }
> + /* handle "HTTP/1.1 ..." lines */
> + if (ch == '/' && p - header_name_start == 4 &&
> + strncmp(header_name_start, "HTTP", 4) == 0) {
> + int rc = http_parse_status_line(parser,
> + &header_name_start,
> + end_buf);
> + if (rc == HTTP_PARSE_INVALID) {
> + parser->http_minor = -1;
> + parser->http_major = -1;
> + }
> + state = sw_start;
> + break;
> + }
> + if (ch == '\0')
> + return HTTP_PARSE_INVALID;
> + break;
> + /* space* before http_header value */
> + case sw_space_before_value:
> + switch (ch) {
> + case ' ':
> + break;
> + case CR:
> + parser->header_value_start = p;
> + parser->header_value_end = p;
> + state = sw_almost_done;
> + break;
> + case LF:
> + parser->header_value_start = p;
> + parser->header_value_end = p;
> + goto done;
> + case '\0':
> + return HTTP_PARSE_INVALID;
> + default:
> + parser->header_value_start = p;
> + state = sw_value;
> + break;
> + }
> + break;
> +
> + /* http_header value */
> + case sw_value:
> + switch (ch) {
> + case ' ':
> + parser->header_value_end = p;
> + state = sw_space_after_value;
> + break;
> + case CR:
> + parser->header_value_end = p;
> + state = sw_almost_done;
> + break;
> + case LF:
> + parser->header_value_end = p;
> + goto done;
> + case '\0':
> + return HTTP_PARSE_INVALID;
> + }
> + break;
> + /* space* before end of http_header line */
> + case sw_space_after_value:
> + switch (ch) {
> + case ' ':
> + break;
> + case CR:
> + state = sw_almost_done;
> + break;
> + case LF:
> + goto done;
> + case '\0':
> + return HTTP_PARSE_INVALID;
> + default:
> + state = sw_value;
> + break;
> + }
> + break;
> + /* end of http_header line */
> + case sw_almost_done:
> + switch (ch) {
> + case LF:
> + goto done;
> + case CR:
> + break;
> + default:
> + return HTTP_PARSE_INVALID;
> + }
> + break;
> + /* end of http_header */
> + case sw_header_almost_done:
> + if (ch == LF)
> + goto header_done;
> + else
> + return HTTP_PARSE_INVALID;
> + }
> + }
> +
> +done:
> + *bufp = p + 1;
> + return HTTP_PARSE_OK;
> +
> +header_done:
> + *bufp = p + 1;
> + return HTTP_PARSE_DONE;
> +}
> diff --git a/src/http_parser.h b/src/http_parser.h
> new file mode 100644
> index 0000000..5e20f53
> --- /dev/null
> +++ b/src/http_parser.h
> @@ -0,0 +1,66 @@
> +/*
> + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the
> + * following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#ifndef TARANTOOL_HTTP_PARSER_H
> +#define TARANTOOL_HTTP_PARSER_H
> +
> +#define HEADER_LEN 32
> +
> +enum {
> + HTTP_PARSE_OK,
> + HTTP_PARSE_DONE,
> + HTTP_PARSE_INVALID
> +};
> +
> +struct http_parser {
> + char *header_value_start;
> + char *header_value_end;
> +
> + int http_major;
> + int http_minor;
> +
> + char header_name[HEADER_LEN];
> + int header_name_idx;
> +};
> +
> +/*
> + * @brief Parse line containing http header info
> + * @param parser object
> + * @param bufp pointer to buffer with data
> + * @param end_buf
> + * @return HTTP_DONE - line was parsed
> + * HTTP_OK - header was read
> + * HTTP_PARSE_INVALID - error during parsing
> + */
> +int
> +http_parse_header_line(struct http_parser *parser, char **bufp, const char *end_buf);
> +
> +#endif //TARANTOOL_HTTP_PARSER_H
> diff --git a/src/lua/httpc.c b/src/lua/httpc.c
> index 76b3d00..45abb98 100644
> --- a/src/lua/httpc.c
> +++ b/src/lua/httpc.c
> @@ -34,6 +34,7 @@
> */
> #define DRIVER_LUA_UDATA_NAME "httpc"
>
> +#include <http_parser.h>
> #include "src/httpc.h"
> #include "say.h"
> #include "lua/utils.h"
> @@ -58,6 +59,69 @@ lua_add_key_u64(lua_State *L, const char *key, uint64_t value)
> lua_pushinteger(L, value);
> lua_settable(L, -3);
> }
> +
> +static void
> +parse_headers(lua_State *L, char *buffer, size_t len)
> +{
> + struct http_parser parser;
> + char *end_buf = buffer + len;
> + lua_pushstring(L, "headers");
> + lua_newtable(L);
> + while (true) {
> + int rc = http_parse_header_line(&parser, &buffer, end_buf);
> + if (rc == HTTP_PARSE_INVALID) {
> + continue;
> + }
> + if (rc == HTTP_PARSE_DONE) {
> + break;
> + }
> +
> + if (rc == HTTP_PARSE_OK) {
> + lua_pushlstring(L, parser.header_name,
> + parser.header_name_idx);
> +
> + /* check value of header, if exists */
> + lua_pushlstring(L, parser.header_name,
> + parser.header_name_idx);
> + lua_gettable(L, -3);
> + int value_len = parser.header_value_end -
> + parser.header_value_start;
> + /* table of values to handle duplicates*/
> + if (lua_isnil(L, -1)) {
> + lua_pop(L, 1);
> + lua_newtable(L);
> + lua_pushinteger(L, 1);
> + lua_pushlstring(L, parser.header_value_start,
> + value_len);
> + lua_settable(L, -3);
> + } else if (lua_istable(L, -1)) {
> + lua_pushinteger(L, lua_objlen(L, -1) + 1);
> + lua_pushlstring(L, parser.header_value_start,
> + value_len);
> + lua_settable(L, -3);
> + }
> + /*headers[parser.header] = {value}*/
> + lua_settable(L, -3);
> + }
> + }
> +
> + /* headers */
> + lua_settable(L, -3);
> +
> + lua_pushstring(L, "proto");
> +
> + lua_newtable(L);
> + lua_pushinteger(L, 1);
> + lua_pushinteger(L, (parser.http_major > 0) ? parser.http_major: 0);
> + lua_settable(L, -3);
> +
> + lua_pushinteger(L, 2);
> + lua_pushinteger(L, (parser.http_minor > 0) ? parser.http_minor: 0);
> + lua_settable(L, -3);
> +
> + /* proto */
> + lua_settable(L, -3);
> +}
> /* }}}
> */
>
> @@ -215,9 +279,7 @@ luaT_httpc_request(lua_State *L)
> httpc_request_delete(req);
> return luaT_error(L);
> }
> - lua_pushstring(L, "headers");
> - lua_pushlstring(L, headers, headers_len);
> - lua_settable(L, -3);
> + parse_headers(L, headers, headers_len);
> }
>
> size_t body_len = region_used(&req->resp_body);
> diff --git a/src/lua/httpc.lua b/src/lua/httpc.lua
> index 07ef395..3ddd3e7 100644
> --- a/src/lua/httpc.lua
> +++ b/src/lua/httpc.lua
> @@ -103,46 +103,17 @@ local special_headers = {
> ["user-agent"] = true,
> }
>
> -local function parse_list(list)
> - local result = {}
> - for _,str in pairs(list) do
> - local h = str:split(':', 1)
> - if #h > 1 then
> - local key = h[1]:lower()
> - local val = string.gsub(h[2], "^%s*(.-)%s*$", "%1")
> - local prev_val = result[key]
> - -- pack headers
> - if not special_headers[key] then
> - if prev_val == nil then
> - result[key] = {}
> - table.insert(result[key], val)
> - else
> - table.insert(prev_val, val)
> - end
> - else if not prev_val then
> - result[key] = val
> - end
> +local function process_headers(headers)
> + for header, value in pairs(headers) do
> + if type(value) == 'table' then
> + if special_headers[header] then
> + headers[header] = value[1]
> + else
> + headers[header] = table.concat(value, ',')
> end
> - elseif string.match(str, "HTTP/%d%.%d %d%d%d") then
> - result = {}
> end
> end
> -
> - for key, value in pairs(result) do
> - if not special_headers[key] then
> - result[key] = table.concat(result[key], ",")
> - end
> - end
> - return result
> -end
> -
> -local function parse_headers(resp)
> - local list = resp.headers:split('\r\n')
> - local h1 = table.remove(list, 1):split(' ')
> - local proto = h1[1]:split('/')[2]:split('.')
> - resp.proto = { tonumber(proto[1]), tonumber(proto[2]) }
> - resp.headers = parse_list(list)
> - return resp
> + return headers
> end
>
> --
> @@ -214,7 +185,7 @@ curl_mt = {
> end
> local resp = self.curl:request(method, url, body, opts or {})
> if resp and resp.headers then
> - resp = parse_headers(resp)
> + resp.headers = process_headers(resp.headers)
> end
> return resp
> end,
> --
> 2.7.4
>
More information about the Tarantool-patches
mailing list