[patches] [http 1/1] http: adapt nginx http headers parser

imarkov imarkov at tarantool.org
Tue Feb 13 15:07:15 MSK 2018


From: Ilya <markovilya197 at gmail.com>

* delete old small parser with nginx tested one
* functionality is not changed

Signed-off-by: imarkov <imarkov at tarantool.org>
---
 src/CMakeLists.txt |   1 +
 src/http_parser.c  | 399 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/http_parser.h  |  66 +++++++++
 src/lua/httpc.c    |  68 ++++++++-
 src/lua/httpc.lua  |  47 ++-----
 5 files changed, 540 insertions(+), 41 deletions(-)
 create mode 100644 src/http_parser.c
 create mode 100644 src/http_parser.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e5acef7..fe99b44 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -93,6 +93,7 @@ set (core_sources
      util.c
      random.c
      trigger.cc
+     http_parser.c
  )
 
 if (TARGET_OS_NETBSD)
diff --git a/src/http_parser.c b/src/http_parser.c
new file mode 100644
index 0000000..7166903
--- /dev/null
+++ b/src/http_parser.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include "httpc.h"
+#include "http_parser.h"
+
+#define LF     (unsigned char) '\n'
+#define CR     (unsigned char) '\r'
+#define CRLF   "\r\n"
+
+/**
+ * Following http parser functions were taken with slight
+ * adaptation from nginx http parser module
+ */
+
+/**
+ * Utility function used in headers parsing
+ */
+static int
+http_parse_status_line(struct http_parser *parser, char **bufp,
+		       const char *end_buf)
+{
+	char ch;
+	char *p = *bufp;
+	enum {
+		sw_start = 0,
+		sw_H,
+		sw_HT,
+		sw_HTT,
+		sw_HTTP,
+		sw_first_major_digit,
+		sw_major_digit,
+		sw_first_minor_digit,
+		sw_minor_digit,
+		sw_status,
+		sw_space_after_status,
+		sw_status_text,
+		sw_almost_done
+	} state;
+
+	state = sw_start;
+	int status_count = 0;
+	for (;p < end_buf; p++) {
+		ch = *p;
+		switch (state) {
+		/* "HTTP/" */
+		case sw_start:
+			if (ch == 'H')
+				state = sw_H;
+			else
+				return HTTP_PARSE_INVALID;
+			break;
+		case sw_H:
+			if (ch == 'T')
+				state = sw_HT;
+			else
+				return HTTP_PARSE_INVALID;
+			break;
+		case sw_HT:
+			if (ch == 'T')
+				state = sw_HTT;
+			else
+				return HTTP_PARSE_INVALID;
+			break;
+		case sw_HTT:
+			if (ch == 'P')
+				state = sw_HTTP;
+			else
+				return HTTP_PARSE_INVALID;
+			break;
+		case sw_HTTP:
+			if (ch == '/')
+				state = sw_first_major_digit;
+			else
+				return HTTP_PARSE_INVALID;
+			break;
+		/* The first digit of major HTTP version */
+		case sw_first_major_digit:
+			if (ch < '1' || ch > '9') {
+				return HTTP_PARSE_INVALID;
+			}
+			parser->http_major = ch - '0';
+			state = sw_major_digit;
+			break;
+		/* The major HTTP version or dot */
+		case sw_major_digit:
+			if (ch == '.') {
+				state = sw_first_minor_digit;
+				break;
+			}
+			if (ch < '0' || ch > '9') {
+				return HTTP_PARSE_INVALID;
+			}
+			if (parser->http_major > 99) {
+				return HTTP_PARSE_INVALID;
+			}
+			parser->http_major = parser->http_major * 10
+					     + (ch - '0');
+			break;
+		/* The first digit of minor HTTP version */
+		case sw_first_minor_digit:
+			if (ch < '0' || ch > '9') {
+				return HTTP_PARSE_INVALID;
+			}
+			parser->http_minor = ch - '0';
+			state = sw_minor_digit;
+			break;
+		/*
+		 * The minor HTTP version or
+		 * the end of the request line
+		 */
+		case sw_minor_digit:
+			if (ch == ' ') {
+				state = sw_status;
+				break;
+			}
+			if (ch < '0' || ch > '9') {
+				return HTTP_PARSE_INVALID;
+			}
+			if (parser->http_minor > 99) {
+				return HTTP_PARSE_INVALID;
+			}
+			parser->http_minor = parser->http_minor * 10
+					     + (ch - '0');
+			break;
+		/* HTTP status code */
+		case sw_status:
+			if (ch == ' ') {
+				break;
+			}
+			if (ch < '0' || ch > '9') {
+				return HTTP_PARSE_INVALID;
+			}
+			if (++status_count == 3) {
+				state = sw_space_after_status;
+			}
+			break;
+		/* Space or end of line */
+		case sw_space_after_status:
+			switch (ch) {
+			case ' ':
+				state = sw_status_text;
+				break;
+			case '.':
+				/* IIS may send 403.1, 403.2, etc */
+				state = sw_status_text;
+				break;
+			case CR:
+				state = sw_almost_done;
+				break;
+			case LF:
+				goto done;
+			default:
+				return HTTP_PARSE_INVALID;
+			}
+			break;
+		/* Any text until end of line */
+		case sw_status_text:
+			switch (ch) {
+			case CR:
+				state = sw_almost_done;
+				break;
+			case LF:
+				goto done;
+			}
+			break;
+
+		/* End of status line */
+		case sw_almost_done:
+			switch (ch) {
+			case LF:
+				goto done;
+			default:
+				return HTTP_PARSE_INVALID;
+			}
+		}
+	}
+done:
+	*bufp = p + 1;
+	return HTTP_PARSE_OK;
+}
+
+int
+http_parse_header_line(struct http_parser *parser, char **bufp,
+		       const char *end_buf)
+{
+	char c, ch;
+	char *p = *bufp;
+	char *header_name_start = p;
+	parser->header_name_idx = 0;
+
+	enum {
+		sw_start = 0,
+		sw_name,
+		sw_space_before_value,
+		sw_value,
+		sw_space_after_value,
+		sw_almost_done,
+		sw_header_almost_done
+	} state = sw_start;
+
+	/*
+	 * The last '\0' is not needed
+	 * because string is zero terminated
+	 */
+	static char lowcase[] =
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789"
+			"\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0"
+			"abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+			"\0\0\0\0\0\0\0\0\0\0";
+
+	for (; p < end_buf; p++) {
+		ch = *p;
+		switch (state) {
+		/* first char */
+		case sw_start:
+			switch (ch) {
+			case CR:
+				parser->header_value_end = p;
+				state = sw_header_almost_done;
+				break;
+			case LF:
+				parser->header_value_end = p;
+				goto header_done;
+			default:
+				state = sw_name;
+
+				c = lowcase[ch];
+				if (c != 0) {
+					parser->header_name[0] = c;
+					parser->header_name_idx = 1;
+					break;
+				}
+				if (ch == '\0') {
+					return HTTP_PARSE_INVALID;
+				}
+				break;
+			}
+			break;
+		/* http_header name */
+		case sw_name:
+			c = lowcase[ch];
+			if (c != 0) {
+				parser->header_name[parser->header_name_idx] = c;
+				parser->header_name_idx++;
+				parser->header_name_idx &= (HEADER_LEN - 1);
+				break;
+			}
+			if (ch == ':') {
+				state = sw_space_before_value;
+				break;
+			}
+			if (ch == CR) {
+				parser->header_value_start = p;
+				parser->header_value_end = p;
+				state = sw_almost_done;
+				break;
+			}
+			if (ch == LF) {
+				parser->header_value_start = p;
+				parser->header_value_end = p;
+				goto done;
+			}
+			/* handle "HTTP/1.1 ..." lines */
+			if (ch == '/' && p - header_name_start == 4 &&
+				strncmp(header_name_start, "HTTP", 4) == 0) {
+				int rc = http_parse_status_line(parser,
+							&header_name_start,
+							end_buf);
+				if (rc == HTTP_PARSE_INVALID) {
+					parser->http_minor = -1;
+					parser->http_major = -1;
+				}
+				state = sw_start;
+				break;
+			}
+			if (ch == '\0')
+				return HTTP_PARSE_INVALID;
+			break;
+		/* space* before http_header value */
+		case sw_space_before_value:
+			switch (ch) {
+			case ' ':
+				break;
+			case CR:
+				parser->header_value_start = p;
+				parser->header_value_end = p;
+				state = sw_almost_done;
+				break;
+			case LF:
+				parser->header_value_start = p;
+				parser->header_value_end = p;
+				goto done;
+			case '\0':
+				return HTTP_PARSE_INVALID;
+			default:
+				parser->header_value_start = p;
+				state = sw_value;
+				break;
+			}
+			break;
+
+		/* http_header value */
+		case sw_value:
+			switch (ch) {
+			case ' ':
+				parser->header_value_end = p;
+				state = sw_space_after_value;
+				break;
+			case CR:
+				parser->header_value_end = p;
+				state = sw_almost_done;
+				break;
+			case LF:
+				parser->header_value_end = p;
+				goto done;
+			case '\0':
+				return HTTP_PARSE_INVALID;
+			}
+			break;
+		/* space* before end of http_header line */
+		case sw_space_after_value:
+			switch (ch) {
+			case ' ':
+				break;
+			case CR:
+				state = sw_almost_done;
+				break;
+			case LF:
+				goto done;
+			case '\0':
+				return HTTP_PARSE_INVALID;
+			default:
+				state = sw_value;
+				break;
+			}
+			break;
+		/* end of http_header line */
+		case sw_almost_done:
+			switch (ch) {
+			case LF:
+				goto done;
+			case CR:
+				break;
+			default:
+				return HTTP_PARSE_INVALID;
+			}
+			break;
+		/* end of http_header */
+		case sw_header_almost_done:
+			if (ch == LF)
+				goto header_done;
+			else
+				return HTTP_PARSE_INVALID;
+		}
+	}
+
+done:
+	*bufp = p + 1;
+	return HTTP_PARSE_OK;
+
+header_done:
+	*bufp = p + 1;
+	return HTTP_PARSE_DONE;
+}
diff --git a/src/http_parser.h b/src/http_parser.h
new file mode 100644
index 0000000..5e20f53
--- /dev/null
+++ b/src/http_parser.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef TARANTOOL_HTTP_PARSER_H
+#define TARANTOOL_HTTP_PARSER_H
+
+#define HEADER_LEN 32
+
+enum {
+	HTTP_PARSE_OK,
+	HTTP_PARSE_DONE,
+	HTTP_PARSE_INVALID
+};
+
+struct http_parser {
+	char *header_value_start;
+	char *header_value_end;
+
+	int http_major;
+	int http_minor;
+
+	char header_name[HEADER_LEN];
+	int header_name_idx;
+};
+
+/*
+ * @brief Parse line containing http header info
+ * @param parser object
+ * @param bufp pointer to buffer with data
+ * @param end_buf
+ * @return	HTTP_DONE - line was parsed
+ * 		HTTP_OK - header was read
+ * 		HTTP_PARSE_INVALID - error during parsing
+ */
+int
+http_parse_header_line(struct http_parser *parser, char **bufp, const char *end_buf);
+
+#endif //TARANTOOL_HTTP_PARSER_H
diff --git a/src/lua/httpc.c b/src/lua/httpc.c
index 76b3d00..45abb98 100644
--- a/src/lua/httpc.c
+++ b/src/lua/httpc.c
@@ -34,6 +34,7 @@
  */
 #define DRIVER_LUA_UDATA_NAME	"httpc"
 
+#include <http_parser.h>
 #include "src/httpc.h"
 #include "say.h"
 #include "lua/utils.h"
@@ -58,6 +59,69 @@ lua_add_key_u64(lua_State *L, const char *key, uint64_t value)
 	lua_pushinteger(L, value);
 	lua_settable(L, -3);
 }
+
+static void
+parse_headers(lua_State *L, char *buffer, size_t len)
+{
+	struct http_parser parser;
+	char *end_buf = buffer + len;
+	lua_pushstring(L, "headers");
+	lua_newtable(L);
+	while (true) {
+		int rc = http_parse_header_line(&parser, &buffer, end_buf);
+		if (rc == HTTP_PARSE_INVALID) {
+			continue;
+		}
+		if (rc == HTTP_PARSE_DONE) {
+			break;
+		}
+
+		if (rc == HTTP_PARSE_OK) {
+			lua_pushlstring(L, parser.header_name,
+					parser.header_name_idx);
+
+			/* check value of header, if exists */
+			lua_pushlstring(L, parser.header_name,
+					parser.header_name_idx);
+			lua_gettable(L, -3);
+			int value_len = parser.header_value_end -
+						parser.header_value_start;
+			/* table of values to handle duplicates*/
+			if (lua_isnil(L, -1)) {
+				lua_pop(L, 1);
+				lua_newtable(L);
+				lua_pushinteger(L, 1);
+				lua_pushlstring(L, parser.header_value_start,
+						value_len);
+				lua_settable(L, -3);
+			} else if (lua_istable(L, -1)) {
+				lua_pushinteger(L, lua_objlen(L, -1) + 1);
+				lua_pushlstring(L, parser.header_value_start,
+						value_len);
+				lua_settable(L, -3);
+			}
+			/*headers[parser.header] = {value}*/
+			lua_settable(L, -3);
+		}
+	}
+
+	/* headers */
+	lua_settable(L, -3);
+
+	lua_pushstring(L, "proto");
+
+	lua_newtable(L);
+	lua_pushinteger(L, 1);
+	lua_pushinteger(L, (parser.http_major > 0) ? parser.http_major: 0);
+	lua_settable(L, -3);
+
+	lua_pushinteger(L, 2);
+	lua_pushinteger(L, (parser.http_minor > 0) ? parser.http_minor: 0);
+	lua_settable(L, -3);
+
+	/* proto */
+	lua_settable(L, -3);
+}
 /* }}}
  */
 
@@ -215,9 +279,7 @@ luaT_httpc_request(lua_State *L)
 			httpc_request_delete(req);
 			return luaT_error(L);
 		}
-		lua_pushstring(L, "headers");
-		lua_pushlstring(L, headers, headers_len);
-		lua_settable(L, -3);
+		parse_headers(L, headers, headers_len);
 	}
 
 	size_t body_len = region_used(&req->resp_body);
diff --git a/src/lua/httpc.lua b/src/lua/httpc.lua
index 07ef395..3ddd3e7 100644
--- a/src/lua/httpc.lua
+++ b/src/lua/httpc.lua
@@ -103,46 +103,17 @@ local special_headers = {
     ["user-agent"] = true,
 }
 
-local function parse_list(list)
-    local result = {}
-    for _,str in pairs(list) do
-        local h = str:split(':', 1)
-        if #h > 1 then
-            local key = h[1]:lower()
-            local val = string.gsub(h[2], "^%s*(.-)%s*$", "%1")
-            local prev_val = result[key]
-            -- pack headers
-            if not special_headers[key] then
-                if prev_val == nil then
-                    result[key] = {}
-                    table.insert(result[key], val)
-                else
-                    table.insert(prev_val, val)
-                end
-            else if not prev_val then
-                result[key] = val
-               end
+local function process_headers(headers)
+    for header, value in pairs(headers) do
+        if type(value) == 'table' then
+            if special_headers[header] then
+                headers[header] = value[1]
+            else
+                headers[header] = table.concat(value, ',')
             end
-        elseif string.match(str, "HTTP/%d%.%d %d%d%d") then
-            result = {}
         end
     end
-
-    for key, value in pairs(result) do
-        if not special_headers[key] then
-            result[key] = table.concat(result[key], ",")
-        end
-    end
-    return result
-end
-
-local function parse_headers(resp)
-    local list = resp.headers:split('\r\n')
-    local h1 = table.remove(list, 1):split(' ')
-    local proto = h1[1]:split('/')[2]:split('.')
-    resp.proto = { tonumber(proto[1]), tonumber(proto[2]) }
-    resp.headers = parse_list(list)
-    return resp
+    return headers
 end
 
 --
@@ -214,7 +185,7 @@ curl_mt = {
             end
             local resp = self.curl:request(method, url, body, opts or {})
             if resp and resp.headers then
-                resp = parse_headers(resp)
+                resp.headers = process_headers(resp.headers)
             end
             return resp
         end,
-- 
2.7.4




More information about the Tarantool-patches mailing list