From a9e942d75541f4a7cf99c97055f0081f2bff72f4 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 2 Dec 2019 15:45:26 -0800 Subject: [PATCH 1/2] Properly trim whitespace from headers HTTP Whitespace and regex whitespace are not the same, so we can't use \s in regexes when parsing HTTP headers. Instead, explicitly specify what is considered whitespace in the regex. --- httplib.h | 43 ++++++++++++++++++++++++++----------------- test/test.cc | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/httplib.h b/httplib.h index 914433f..0b440e7 100644 --- a/httplib.h +++ b/httplib.h @@ -1357,6 +1357,26 @@ inline bool is_connection_error() { #endif } +inline socket_t create_client_socket( + const char *host, int port, time_t timeout_sec) { + return create_socket( + host, port, [=](socket_t sock, struct addrinfo &ai) -> bool { + set_nonblocking(sock, true); + + auto ret = ::connect(sock, ai.ai_addr, static_cast(ai.ai_addrlen)); + if (ret < 0) { + if (is_connection_error() || + !wait_until_socket_is_ready(sock, timeout_sec, 0)) { + close_socket(sock); + return false; + } + } + + set_nonblocking(sock, false); + return true; + }); +} + inline std::string get_remote_addr(socket_t sock) { struct sockaddr_storage addr; socklen_t len = sizeof(addr); @@ -1542,7 +1562,11 @@ inline uint64_t get_header_value_uint64(const Headers &headers, const char *key, } inline bool read_headers(Stream &strm, Headers &headers) { - static std::regex re(R"((.+?):\s*(.+?)\s*\r\n)"); + // Horizontal tab and ' ' are considered whitespace and are ignored when on + // the left or right side of the header value: + // - https://stackoverflow.com/questions/50179659/ + // - https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html + static std::regex re(R"((.+?):[\t ]*(.+?)[\t ]*\r\n)"); const auto bufsiz = 2048; char buf[bufsiz]; @@ -3166,22 +3190,7 @@ inline Client::~Client() {} inline bool Client::is_valid() const { return true; } inline socket_t Client::create_client_socket() const { - return detail::create_socket( - host_.c_str(), port_, [=](socket_t sock, struct addrinfo &ai) -> bool { - detail::set_nonblocking(sock, true); - - auto ret = connect(sock, ai.ai_addr, static_cast(ai.ai_addrlen)); - if (ret < 0) { - if (detail::is_connection_error() || - !detail::wait_until_socket_is_ready(sock, timeout_sec_, 0)) { - detail::close_socket(sock); - return false; - } - } - - detail::set_nonblocking(sock, false); - return true; - }); + return detail::create_client_socket(host_.c_str(), port_, timeout_sec_); } inline bool Client::read_response_line(Stream &strm, Response &res) { diff --git a/test/test.cc b/test/test.cc index cc92796..e8ab069 100644 --- a/test/test.cc +++ b/test/test.cc @@ -1766,6 +1766,52 @@ TEST_F(ServerTest, MultipartFormDataGzip) { } #endif +TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) { + Server svr; + std::string header_value; + svr.Get("/validate-ws-in-headers", + [&](const Request &req, Response &res) { + header_value = req.get_header_value("foo"); + res.set_content("ok", "text/plain"); + }); + + thread t = thread([&] { svr.listen(HOST, PORT); }); + while (!svr.is_running()) { + msleep(1); + } + + // Only space and horizontal tab are whitespace. Make sure other whitespace- + // like characters are not treated the same - use vertical tab and escape. + auto client_sock = + detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5); + ASSERT_TRUE(client_sock != INVALID_SOCKET); + const std::string req = + "GET /validate-ws-in-headers HTTP/1.1\r\n" + "foo: \t \v bar \e\t \r\n" + "Connection: close\r\n" + "\r\n"; + + bool process_ok = detail::process_and_close_socket( + true, client_sock, 1, 5, 0, + [&](Stream& strm, bool /*last_connection*/, + bool &/*connection_close*/) -> bool { + if (req.size() != + static_cast(strm.write(req.data(), req.size()))) { + return false; + } + + char buf[512]; + + detail::stream_line_reader line_reader(strm, buf, sizeof(buf)); + while (line_reader.getline()) {} + return true; + }); + ASSERT_TRUE(process_ok); + svr.stop(); + t.join(); + EXPECT_EQ(header_value, "\v bar \e"); +} + class ServerTestWithAI_PASSIVE : public ::testing::Test { protected: ServerTestWithAI_PASSIVE() From bc9251ea4906455e24f2b049eb8ba4343315c6f4 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 2 Dec 2019 13:38:56 -0800 Subject: [PATCH 2/2] Work around incompatibility in in libc++ libc++ (the implementation of the C++ standard library usually used by Clang) throws an exception for the regex used by parse_headers before this patch for certain strings. Work around this by simplifying the regex and parsing the header lines "by hand" partially. I have repro'd this problem with Xcode 11.1 which I believe uses libc++ version 8. This may be a bug in libc++ as I can't see why the regex would result in asymptotic run-time complexity for any strings. However, it may take a while for libc++ to be fixed and for everyone to migrate to it, so it makes sense to work around it in this codebase for now. --- httplib.h | 20 +++++++++++--- test/test.cc | 75 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/httplib.h b/httplib.h index 0b440e7..c05b866 100644 --- a/httplib.h +++ b/httplib.h @@ -1566,7 +1566,7 @@ inline bool read_headers(Stream &strm, Headers &headers) { // the left or right side of the header value: // - https://stackoverflow.com/questions/50179659/ // - https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html - static std::regex re(R"((.+?):[\t ]*(.+?)[\t ]*\r\n)"); + static std::regex re(R"((.+?):[\t ]*(.+))"); const auto bufsiz = 2048; char buf[bufsiz]; @@ -1575,9 +1575,23 @@ inline bool read_headers(Stream &strm, Headers &headers) { for (;;) { if (!line_reader.getline()) { return false; } - if (!strcmp(line_reader.ptr(), "\r\n")) { break; } + const char *end = line_reader.ptr() + line_reader.size(); + auto erase_last_char = [&](char c) { + if (line_reader.ptr() == end || end[-1] != c) { + return false; + } + end--; + return true; + }; + if (!erase_last_char('\n')) { continue; } + if (!erase_last_char('\r')) { continue; } + + // Blank line indicates end of headers. + if (line_reader.ptr() == end) { break; } + + while (erase_last_char(' ') || erase_last_char('\t')) {} std::cmatch m; - if (std::regex_match(line_reader.ptr(), m, re)) { + if (std::regex_match(line_reader.ptr(), end, m, re)) { auto key = std::string(m[1]); auto val = std::string(m[2]); headers.emplace(key, val); diff --git a/test/test.cc b/test/test.cc index e8ab069..bdc02d4 100644 --- a/test/test.cc +++ b/test/test.cc @@ -1766,6 +1766,30 @@ TEST_F(ServerTest, MultipartFormDataGzip) { } #endif +// Sends a raw request to a server listening at HOST:PORT. +static bool send_request(time_t read_timeout_sec, const std::string& req) { + auto client_sock = + detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5); + + if (client_sock == INVALID_SOCKET) { return false; } + + return detail::process_and_close_socket( + true, client_sock, 1, read_timeout_sec, 0, + [&](Stream& strm, bool /*last_connection*/, + bool &/*connection_close*/) -> bool { + if (req.size() != + static_cast(strm.write(req.data(), req.size()))) { + return false; + } + + char buf[512]; + + detail::stream_line_reader line_reader(strm, buf, sizeof(buf)); + while (line_reader.getline()) {} + return true; + }); +} + TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) { Server svr; std::string header_value; @@ -1782,36 +1806,49 @@ TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) { // Only space and horizontal tab are whitespace. Make sure other whitespace- // like characters are not treated the same - use vertical tab and escape. - auto client_sock = - detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5); - ASSERT_TRUE(client_sock != INVALID_SOCKET); const std::string req = "GET /validate-ws-in-headers HTTP/1.1\r\n" "foo: \t \v bar \e\t \r\n" "Connection: close\r\n" "\r\n"; - bool process_ok = detail::process_and_close_socket( - true, client_sock, 1, 5, 0, - [&](Stream& strm, bool /*last_connection*/, - bool &/*connection_close*/) -> bool { - if (req.size() != - static_cast(strm.write(req.data(), req.size()))) { - return false; - } - - char buf[512]; - - detail::stream_line_reader line_reader(strm, buf, sizeof(buf)); - while (line_reader.getline()) {} - return true; - }); - ASSERT_TRUE(process_ok); + ASSERT_TRUE(send_request(5, req)); svr.stop(); t.join(); EXPECT_EQ(header_value, "\v bar \e"); } +TEST(ServerRequestParsingTest, ReadHeadersRegexComplexity) { + Server svr; + svr.Get("/hi", + [&](const Request & /*req*/, Response &res) { + res.set_content("ok", "text/plain"); + }); + + // Server read timeout must be longer than the client read timeout for the + // bug to reproduce, probably to force the server to process a request + // without a trailing blank line. + const time_t client_read_timeout_sec = 1; + svr.set_read_timeout(client_read_timeout_sec + 1, 0); + bool listen_thread_ok = false; + thread t = thread([&] { listen_thread_ok = svr.listen(HOST, PORT); }); + while (!svr.is_running()) { + msleep(1); + } + + // A certain header line causes an exception if the header property is parsed + // naively with a single regex. This occurs with libc++ but not libstdc++. + const std::string req = + "GET /hi HTTP/1.1\r\n" + " : " + " "; + + ASSERT_TRUE(send_request(client_read_timeout_sec, req)); + svr.stop(); + t.join(); + EXPECT_TRUE(listen_thread_ok); +} + class ServerTestWithAI_PASSIVE : public ::testing::Test { protected: ServerTestWithAI_PASSIVE()