[patches] [http 1/1] http: adapt nginx http headers parser
imarkov
imarkov at tarantool.org
Tue Feb 13 15:07:15 MSK 2018
From: Ilya <markovilya197 at gmail.com>
* delete old small parser with nginx tested one
* functionality is not changed
Signed-off-by: imarkov <imarkov at tarantool.org>
---
src/CMakeLists.txt | 1 +
src/http_parser.c | 399 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/http_parser.h | 66 +++++++++
src/lua/httpc.c | 68 ++++++++-
src/lua/httpc.lua | 47 ++-----
5 files changed, 540 insertions(+), 41 deletions(-)
create mode 100644 src/http_parser.c
create mode 100644 src/http_parser.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e5acef7..fe99b44 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -93,6 +93,7 @@ set (core_sources
util.c
random.c
trigger.cc
+ http_parser.c
)
if (TARGET_OS_NETBSD)
diff --git a/src/http_parser.c b/src/http_parser.c
new file mode 100644
index 0000000..7166903
--- /dev/null
+++ b/src/http_parser.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include "httpc.h"
+#include "http_parser.h"
+
+#define LF (unsigned char) '\n'
+#define CR (unsigned char) '\r'
+#define CRLF "\r\n"
+
+/**
+ * Following http parser functions were taken with slight
+ * adaptation from nginx http parser module
+ */
+
+/**
+ * Utility function used in headers parsing
+ */
+static int
+http_parse_status_line(struct http_parser *parser, char **bufp,
+ const char *end_buf)
+{
+ char ch;
+ char *p = *bufp;
+ enum {
+ sw_start = 0,
+ sw_H,
+ sw_HT,
+ sw_HTT,
+ sw_HTTP,
+ sw_first_major_digit,
+ sw_major_digit,
+ sw_first_minor_digit,
+ sw_minor_digit,
+ sw_status,
+ sw_space_after_status,
+ sw_status_text,
+ sw_almost_done
+ } state;
+
+ state = sw_start;
+ int status_count = 0;
+ for (;p < end_buf; p++) {
+ ch = *p;
+ switch (state) {
+ /* "HTTP/" */
+ case sw_start:
+ if (ch == 'H')
+ state = sw_H;
+ else
+ return HTTP_PARSE_INVALID;
+ break;
+ case sw_H:
+ if (ch == 'T')
+ state = sw_HT;
+ else
+ return HTTP_PARSE_INVALID;
+ break;
+ case sw_HT:
+ if (ch == 'T')
+ state = sw_HTT;
+ else
+ return HTTP_PARSE_INVALID;
+ break;
+ case sw_HTT:
+ if (ch == 'P')
+ state = sw_HTTP;
+ else
+ return HTTP_PARSE_INVALID;
+ break;
+ case sw_HTTP:
+ if (ch == '/')
+ state = sw_first_major_digit;
+ else
+ return HTTP_PARSE_INVALID;
+ break;
+ /* The first digit of major HTTP version */
+ case sw_first_major_digit:
+ if (ch < '1' || ch > '9') {
+ return HTTP_PARSE_INVALID;
+ }
+ parser->http_major = ch - '0';
+ state = sw_major_digit;
+ break;
+ /* The major HTTP version or dot */
+ case sw_major_digit:
+ if (ch == '.') {
+ state = sw_first_minor_digit;
+ break;
+ }
+ if (ch < '0' || ch > '9') {
+ return HTTP_PARSE_INVALID;
+ }
+ if (parser->http_major > 99) {
+ return HTTP_PARSE_INVALID;
+ }
+ parser->http_major = parser->http_major * 10
+ + (ch - '0');
+ break;
+ /* The first digit of minor HTTP version */
+ case sw_first_minor_digit:
+ if (ch < '0' || ch > '9') {
+ return HTTP_PARSE_INVALID;
+ }
+ parser->http_minor = ch - '0';
+ state = sw_minor_digit;
+ break;
+ /*
+ * The minor HTTP version or
+ * the end of the request line
+ */
+ case sw_minor_digit:
+ if (ch == ' ') {
+ state = sw_status;
+ break;
+ }
+ if (ch < '0' || ch > '9') {
+ return HTTP_PARSE_INVALID;
+ }
+ if (parser->http_minor > 99) {
+ return HTTP_PARSE_INVALID;
+ }
+ parser->http_minor = parser->http_minor * 10
+ + (ch - '0');
+ break;
+ /* HTTP status code */
+ case sw_status:
+ if (ch == ' ') {
+ break;
+ }
+ if (ch < '0' || ch > '9') {
+ return HTTP_PARSE_INVALID;
+ }
+ if (++status_count == 3) {
+ state = sw_space_after_status;
+ }
+ break;
+ /* Space or end of line */
+ case sw_space_after_status:
+ switch (ch) {
+ case ' ':
+ state = sw_status_text;
+ break;
+ case '.':
+ /* IIS may send 403.1, 403.2, etc */
+ state = sw_status_text;
+ break;
+ case CR:
+ state = sw_almost_done;
+ break;
+ case LF:
+ goto done;
+ default:
+ return HTTP_PARSE_INVALID;
+ }
+ break;
+ /* Any text until end of line */
+ case sw_status_text:
+ switch (ch) {
+ case CR:
+ state = sw_almost_done;
+ break;
+ case LF:
+ goto done;
+ }
+ break;
+
+ /* End of status line */
+ case sw_almost_done:
+ switch (ch) {
+ case LF:
+ goto done;
+ default:
+ return HTTP_PARSE_INVALID;
+ }
+ }
+ }
+done:
+ *bufp = p + 1;
+ return HTTP_PARSE_OK;
+}
+
+int
+http_parse_header_line(struct http_parser *parser, char **bufp,
+ const char *end_buf)
+{
+ char c, ch;
+ char *p = *bufp;
+ char *header_name_start = p;
+ parser->header_name_idx = 0;
+
+ enum {
+ sw_start = 0,
+ sw_name,
+ sw_space_before_value,
+ sw_value,
+ sw_space_after_value,
+ sw_almost_done,
+ sw_header_almost_done
+ } state = sw_start;
+
+ /*
+ * The last '\0' is not needed
+ * because string is zero terminated
+ */
+ static char lowcase[] =
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789"
+ "\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0"
+ "abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0";
+
+ for (; p < end_buf; p++) {
+ ch = *p;
+ switch (state) {
+ /* first char */
+ case sw_start:
+ switch (ch) {
+ case CR:
+ parser->header_value_end = p;
+ state = sw_header_almost_done;
+ break;
+ case LF:
+ parser->header_value_end = p;
+ goto header_done;
+ default:
+ state = sw_name;
+
+ c = lowcase[ch];
+ if (c != 0) {
+ parser->header_name[0] = c;
+ parser->header_name_idx = 1;
+ break;
+ }
+ if (ch == '\0') {
+ return HTTP_PARSE_INVALID;
+ }
+ break;
+ }
+ break;
+ /* http_header name */
+ case sw_name:
+ c = lowcase[ch];
+ if (c != 0) {
+ parser->header_name[parser->header_name_idx] = c;
+ parser->header_name_idx++;
+ parser->header_name_idx &= (HEADER_LEN - 1);
+ break;
+ }
+ if (ch == ':') {
+ state = sw_space_before_value;
+ break;
+ }
+ if (ch == CR) {
+ parser->header_value_start = p;
+ parser->header_value_end = p;
+ state = sw_almost_done;
+ break;
+ }
+ if (ch == LF) {
+ parser->header_value_start = p;
+ parser->header_value_end = p;
+ goto done;
+ }
+ /* handle "HTTP/1.1 ..." lines */
+ if (ch == '/' && p - header_name_start == 4 &&
+ strncmp(header_name_start, "HTTP", 4) == 0) {
+ int rc = http_parse_status_line(parser,
+ &header_name_start,
+ end_buf);
+ if (rc == HTTP_PARSE_INVALID) {
+ parser->http_minor = -1;
+ parser->http_major = -1;
+ }
+ state = sw_start;
+ break;
+ }
+ if (ch == '\0')
+ return HTTP_PARSE_INVALID;
+ break;
+ /* space* before http_header value */
+ case sw_space_before_value:
+ switch (ch) {
+ case ' ':
+ break;
+ case CR:
+ parser->header_value_start = p;
+ parser->header_value_end = p;
+ state = sw_almost_done;
+ break;
+ case LF:
+ parser->header_value_start = p;
+ parser->header_value_end = p;
+ goto done;
+ case '\0':
+ return HTTP_PARSE_INVALID;
+ default:
+ parser->header_value_start = p;
+ state = sw_value;
+ break;
+ }
+ break;
+
+ /* http_header value */
+ case sw_value:
+ switch (ch) {
+ case ' ':
+ parser->header_value_end = p;
+ state = sw_space_after_value;
+ break;
+ case CR:
+ parser->header_value_end = p;
+ state = sw_almost_done;
+ break;
+ case LF:
+ parser->header_value_end = p;
+ goto done;
+ case '\0':
+ return HTTP_PARSE_INVALID;
+ }
+ break;
+ /* space* before end of http_header line */
+ case sw_space_after_value:
+ switch (ch) {
+ case ' ':
+ break;
+ case CR:
+ state = sw_almost_done;
+ break;
+ case LF:
+ goto done;
+ case '\0':
+ return HTTP_PARSE_INVALID;
+ default:
+ state = sw_value;
+ break;
+ }
+ break;
+ /* end of http_header line */
+ case sw_almost_done:
+ switch (ch) {
+ case LF:
+ goto done;
+ case CR:
+ break;
+ default:
+ return HTTP_PARSE_INVALID;
+ }
+ break;
+ /* end of http_header */
+ case sw_header_almost_done:
+ if (ch == LF)
+ goto header_done;
+ else
+ return HTTP_PARSE_INVALID;
+ }
+ }
+
+done:
+ *bufp = p + 1;
+ return HTTP_PARSE_OK;
+
+header_done:
+ *bufp = p + 1;
+ return HTTP_PARSE_DONE;
+}
diff --git a/src/http_parser.h b/src/http_parser.h
new file mode 100644
index 0000000..5e20f53
--- /dev/null
+++ b/src/http_parser.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef TARANTOOL_HTTP_PARSER_H
+#define TARANTOOL_HTTP_PARSER_H
+
+#define HEADER_LEN 32
+
+enum {
+ HTTP_PARSE_OK,
+ HTTP_PARSE_DONE,
+ HTTP_PARSE_INVALID
+};
+
+struct http_parser {
+ char *header_value_start;
+ char *header_value_end;
+
+ int http_major;
+ int http_minor;
+
+ char header_name[HEADER_LEN];
+ int header_name_idx;
+};
+
+/*
+ * @brief Parse line containing http header info
+ * @param parser object
+ * @param bufp pointer to buffer with data
+ * @param end_buf
+ * @return HTTP_DONE - line was parsed
+ * HTTP_OK - header was read
+ * HTTP_PARSE_INVALID - error during parsing
+ */
+int
+http_parse_header_line(struct http_parser *parser, char **bufp, const char *end_buf);
+
+#endif //TARANTOOL_HTTP_PARSER_H
diff --git a/src/lua/httpc.c b/src/lua/httpc.c
index 76b3d00..45abb98 100644
--- a/src/lua/httpc.c
+++ b/src/lua/httpc.c
@@ -34,6 +34,7 @@
*/
#define DRIVER_LUA_UDATA_NAME "httpc"
+#include <http_parser.h>
#include "src/httpc.h"
#include "say.h"
#include "lua/utils.h"
@@ -58,6 +59,69 @@ lua_add_key_u64(lua_State *L, const char *key, uint64_t value)
lua_pushinteger(L, value);
lua_settable(L, -3);
}
+
+static void
+parse_headers(lua_State *L, char *buffer, size_t len)
+{
+ struct http_parser parser;
+ char *end_buf = buffer + len;
+ lua_pushstring(L, "headers");
+ lua_newtable(L);
+ while (true) {
+ int rc = http_parse_header_line(&parser, &buffer, end_buf);
+ if (rc == HTTP_PARSE_INVALID) {
+ continue;
+ }
+ if (rc == HTTP_PARSE_DONE) {
+ break;
+ }
+
+ if (rc == HTTP_PARSE_OK) {
+ lua_pushlstring(L, parser.header_name,
+ parser.header_name_idx);
+
+ /* check value of header, if exists */
+ lua_pushlstring(L, parser.header_name,
+ parser.header_name_idx);
+ lua_gettable(L, -3);
+ int value_len = parser.header_value_end -
+ parser.header_value_start;
+ /* table of values to handle duplicates*/
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 1);
+ lua_newtable(L);
+ lua_pushinteger(L, 1);
+ lua_pushlstring(L, parser.header_value_start,
+ value_len);
+ lua_settable(L, -3);
+ } else if (lua_istable(L, -1)) {
+ lua_pushinteger(L, lua_objlen(L, -1) + 1);
+ lua_pushlstring(L, parser.header_value_start,
+ value_len);
+ lua_settable(L, -3);
+ }
+ /*headers[parser.header] = {value}*/
+ lua_settable(L, -3);
+ }
+ }
+
+ /* headers */
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "proto");
+
+ lua_newtable(L);
+ lua_pushinteger(L, 1);
+ lua_pushinteger(L, (parser.http_major > 0) ? parser.http_major: 0);
+ lua_settable(L, -3);
+
+ lua_pushinteger(L, 2);
+ lua_pushinteger(L, (parser.http_minor > 0) ? parser.http_minor: 0);
+ lua_settable(L, -3);
+
+ /* proto */
+ lua_settable(L, -3);
+}
/* }}}
*/
@@ -215,9 +279,7 @@ luaT_httpc_request(lua_State *L)
httpc_request_delete(req);
return luaT_error(L);
}
- lua_pushstring(L, "headers");
- lua_pushlstring(L, headers, headers_len);
- lua_settable(L, -3);
+ parse_headers(L, headers, headers_len);
}
size_t body_len = region_used(&req->resp_body);
diff --git a/src/lua/httpc.lua b/src/lua/httpc.lua
index 07ef395..3ddd3e7 100644
--- a/src/lua/httpc.lua
+++ b/src/lua/httpc.lua
@@ -103,46 +103,17 @@ local special_headers = {
["user-agent"] = true,
}
-local function parse_list(list)
- local result = {}
- for _,str in pairs(list) do
- local h = str:split(':', 1)
- if #h > 1 then
- local key = h[1]:lower()
- local val = string.gsub(h[2], "^%s*(.-)%s*$", "%1")
- local prev_val = result[key]
- -- pack headers
- if not special_headers[key] then
- if prev_val == nil then
- result[key] = {}
- table.insert(result[key], val)
- else
- table.insert(prev_val, val)
- end
- else if not prev_val then
- result[key] = val
- end
+local function process_headers(headers)
+ for header, value in pairs(headers) do
+ if type(value) == 'table' then
+ if special_headers[header] then
+ headers[header] = value[1]
+ else
+ headers[header] = table.concat(value, ',')
end
- elseif string.match(str, "HTTP/%d%.%d %d%d%d") then
- result = {}
end
end
-
- for key, value in pairs(result) do
- if not special_headers[key] then
- result[key] = table.concat(result[key], ",")
- end
- end
- return result
-end
-
-local function parse_headers(resp)
- local list = resp.headers:split('\r\n')
- local h1 = table.remove(list, 1):split(' ')
- local proto = h1[1]:split('/')[2]:split('.')
- resp.proto = { tonumber(proto[1]), tonumber(proto[2]) }
- resp.headers = parse_list(list)
- return resp
+ return headers
end
--
@@ -214,7 +185,7 @@ curl_mt = {
end
local resp = self.curl:request(method, url, body, opts or {})
if resp and resp.headers then
- resp = parse_headers(resp)
+ resp.headers = process_headers(resp.headers)
end
return resp
end,
--
2.7.4
More information about the Tarantool-patches
mailing list