From 2c701de0302b1d255750c638d16087794fa4585d Mon Sep 17 00:00:00 2001 From: Junqi Wang Date: Fri, 26 Apr 2019 09:55:05 -0700 Subject: [PATCH] Continue on network unreachable with timeout Summary: Previously we tried continue on network down and it showed good improvement. But there was a problem: it hurts UX for airplane mode users, it didn't return error back to user immediately but after 30/60 seconds timeout. This adds a timer for this feature and it only allows the transport to ignore network unreachable error for 200ms. After 200ms, it throws and reports to user if the error persists. Reviewed By: siyengar Differential Revision: D15089442 fbshipit-source-id: dd87f4f579187c4b45244a7ee0477d2a0cf1b5d7 --- quic/api/IoBufQuicBatch.cpp | 21 +++++++-- quic/api/IoBufQuicBatch.h | 2 + quic/api/QuicTransportFunctions.cpp | 1 + quic/api/test/IoBufQuicBatchTest.cpp | 4 +- quic/client/test/QuicClientTransportTest.cpp | 47 +++++++++++++++++++- quic/state/StateData.h | 3 ++ quic/state/TransportSettings.h | 14 ++++-- 7 files changed, 84 insertions(+), 8 deletions(-) diff --git a/quic/api/IoBufQuicBatch.cpp b/quic/api/IoBufQuicBatch.cpp index 5091f653b..66b564cac 100644 --- a/quic/api/IoBufQuicBatch.cpp +++ b/quic/api/IoBufQuicBatch.cpp @@ -15,10 +15,12 @@ IOBufQuicBatch::IOBufQuicBatch( std::unique_ptr&& batchWriter, folly::AsyncUDPSocket& sock, folly::SocketAddress& peerAddress, + QuicConnectionStateBase& conn, QuicConnectionStateBase::HappyEyeballsState& happyEyeballsState) : batchWriter_(std::move(batchWriter)), sock_(sock), peerAddress_(peerAddress), + conn_(conn), happyEyeballsState_(happyEyeballsState) {} bool IOBufQuicBatch::write( @@ -62,9 +64,19 @@ bool IOBufQuicBatch::isNetworkUnreachable(int err) { } bool IOBufQuicBatch::isRetriableError(int err) { - return err == EAGAIN || err == EWOULDBLOCK || err == ENOBUFS || - err == EMSGSIZE || - (continueOnNetworkUnreachable_ && isNetworkUnreachable(err)); + if (err == EAGAIN || err == EWOULDBLOCK || err == ENOBUFS || + err == EMSGSIZE) { + return true; + } + auto now = Clock::now(); + if (continueOnNetworkUnreachable_ && isNetworkUnreachable(err)) { + if (!conn_.continueOnNetworkUnreachableDeadline) { + conn_.continueOnNetworkUnreachableDeadline = + now + conn_.transportSettings.continueOnNetworkUnreachableDuration; + } + return now <= *conn_.continueOnNetworkUnreachableDeadline; + } + return false; } bool IOBufQuicBatch::flushInternal() { @@ -123,6 +135,9 @@ bool IOBufQuicBatch::flushInternal() { return false; // done } + // Reset the deadline after successful write + conn_.continueOnNetworkUnreachableDeadline = folly::none; + return true; // success, not done yet } } // namespace quic diff --git a/quic/api/IoBufQuicBatch.h b/quic/api/IoBufQuicBatch.h index 5143a30bd..6bd799983 100644 --- a/quic/api/IoBufQuicBatch.h +++ b/quic/api/IoBufQuicBatch.h @@ -18,6 +18,7 @@ class IOBufQuicBatch { std::unique_ptr&& batchWriter, folly::AsyncUDPSocket& sock, folly::SocketAddress& peerAddress, + QuicConnectionStateBase& conn, QuicConnectionStateBase::HappyEyeballsState& happyEyeballsState); ~IOBufQuicBatch() = default; @@ -49,6 +50,7 @@ class IOBufQuicBatch { std::unique_ptr batchWriter_; folly::AsyncUDPSocket& sock_; folly::SocketAddress& peerAddress_; + QuicConnectionStateBase& conn_; QuicConnectionStateBase::HappyEyeballsState& happyEyeballsState_; uint64_t pktSent_{0}; bool continueOnNetworkUnreachable_{false}; diff --git a/quic/api/QuicTransportFunctions.cpp b/quic/api/QuicTransportFunctions.cpp index 7f48690d5..c5dcb6341 100644 --- a/quic/api/QuicTransportFunctions.cpp +++ b/quic/api/QuicTransportFunctions.cpp @@ -855,6 +855,7 @@ uint64_t writeConnectionDataToSocket( std::move(batchWriter), sock, connection.peerAddress, + connection, connection.happyEyeballsState); ioBufBatch.setContinueOnNetworkUnreachable( connection.transportSettings.continueOnNetworkUnreachable); diff --git a/quic/api/test/IoBufQuicBatchTest.cpp b/quic/api/test/IoBufQuicBatchTest.cpp index 6998adf06..9ced916e9 100644 --- a/quic/api/test/IoBufQuicBatchTest.cpp +++ b/quic/api/test/IoBufQuicBatchTest.cpp @@ -8,6 +8,7 @@ #include #include +#include #include constexpr const auto kNumLoops = 64; @@ -52,10 +53,11 @@ void RunTest(int numBatch) { auto batchWriter = std::make_unique(numBatch); folly::SocketAddress peerAddress{"127.0.0.1", 1234}; + QuicClientConnectionState conn; QuicConnectionStateBase::HappyEyeballsState happyEyeballsState; IOBufQuicBatch ioBufBatch( - std::move(batchWriter), sock, peerAddress, happyEyeballsState); + std::move(batchWriter), sock, peerAddress, conn, happyEyeballsState); std::string strTest("Test"); diff --git a/quic/client/test/QuicClientTransportTest.cpp b/quic/client/test/QuicClientTransportTest.cpp index c44b68262..8de7546b4 100644 --- a/quic/client/test/QuicClientTransportTest.cpp +++ b/quic/client/test/QuicClientTransportTest.cpp @@ -1343,12 +1343,57 @@ TEST_F(QuicClientTransportTest, NetworkUnreachableIsNotFatalIfContinue) { EXPECT_CALL(clientConnCallback, onConnectionError(_)).Times(0); setupCryptoLayer(); EXPECT_CALL(*sock, write(_, _)).WillOnce(SetErrnoAndReturn(ENETUNREACH, -1)); + EXPECT_FALSE(client->getConn().continueOnNetworkUnreachableDeadline); client->start(&clientConnCallback); - loopForWrites(); + EXPECT_TRUE(client->getConn().continueOnNetworkUnreachableDeadline); ASSERT_FALSE(client->getConn().receivedNewPacketBeforeWrite); ASSERT_TRUE(client->idleTimeout().isScheduled()); } +TEST_F( + QuicClientTransportTest, + NetworkUnreachableIsFatalIfContinueAfterDeadline) { + TransportSettings settings; + settings.continueOnNetworkUnreachable = true; + client->setTransportSettings(settings); + client->addNewPeerAddress(serverAddr); + setupCryptoLayer(); + EXPECT_CALL(*sock, write(_, _)) + .WillRepeatedly(SetErrnoAndReturn(ENETUNREACH, -1)); + EXPECT_FALSE(client->getConn().continueOnNetworkUnreachableDeadline); + client->start(&clientConnCallback); + ASSERT_FALSE(client->getConn().receivedNewPacketBeforeWrite); + ASSERT_TRUE(client->idleTimeout().isScheduled()); + usleep(std::chrono::duration_cast( + settings.continueOnNetworkUnreachableDuration) + .count()); + EXPECT_CALL(clientConnCallback, onConnectionError(_)); + loopForWrites(); +} + +TEST_F( + QuicClientTransportTest, + NetworkUnreachableDeadlineIsResetAfterSuccessfulWrite) { + TransportSettings settings; + settings.continueOnNetworkUnreachable = true; + client->setTransportSettings(settings); + client->addNewPeerAddress(serverAddr); + EXPECT_CALL(clientConnCallback, onConnectionError(_)).Times(0); + setupCryptoLayer(); + EXPECT_CALL(*sock, write(_, _)) + .WillOnce(SetErrnoAndReturn(ENETUNREACH, -1)) + .WillOnce(Return(1)); + EXPECT_FALSE(client->getConn().continueOnNetworkUnreachableDeadline); + client->start(&clientConnCallback); + EXPECT_TRUE(client->getConn().continueOnNetworkUnreachableDeadline); + ASSERT_FALSE(client->getConn().receivedNewPacketBeforeWrite); + ASSERT_TRUE(client->idleTimeout().isScheduled()); + + client->lossTimeout().cancelTimeout(); + client->lossTimeout().timeoutExpired(); + EXPECT_FALSE(client->getConn().continueOnNetworkUnreachableDeadline); +} + TEST_F(QuicClientTransportTest, HappyEyeballsWithSingleV4Address) { auto& conn = client->getConn(); diff --git a/quic/state/StateData.h b/quic/state/StateData.h index 8fd1929d6..7fa902fd5 100644 --- a/quic/state/StateData.h +++ b/quic/state/StateData.h @@ -462,6 +462,9 @@ struct QuicConnectionStateBase { // Error sent on the connection by the peer. folly::Optional> peerConnectionError; + // Before deadline, transport may treat ENETUNREACH as non-fatal error + folly::Optional continueOnNetworkUnreachableDeadline; + // Supported versions in order of preference. Only meaningful to clients. // TODO: move to client only conn state. std::vector supportedVersions; diff --git a/quic/state/TransportSettings.h b/quic/state/TransportSettings.h index cf8acef47..27bc6b83c 100644 --- a/quic/state/TransportSettings.h +++ b/quic/state/TransportSettings.h @@ -74,10 +74,18 @@ struct TransportSettings { // maximum number of packets we can batch // does not apply to BATCHING_MODE_NONE uint32_t batchingNum{kDefaultQuicBatchingNum}; - // Sets network down to be a non fatal error. In some environments, ENETDOWN - // and ENETUNREACH could just be because the routing table is being setup. - // This option makes those non fatal connection errors. + // Sets network unreachable to be a non fatal error. In some environments, + // EHOSTUNREACH or ENETUNREACH could just be because the routing table is + // being setup. This option makes those non fatal connection errors. bool continueOnNetworkUnreachable{false}; + // Amount of time for which the transport treats ENETUNREACH/EHOSTUNREACH as + // non-fatal error since the first error is seen. If transport still sees the + // error after this amount of time, it'll throw and report the error. This is + // to minimize the negative impact on user experience for real no network + // case, so that errors are only delayed to be reported for 200ms, which + // should be invisible to end users. + // Choosing 150ms because loss timer fires at the 100ms for the first time. + std::chrono::milliseconds continueOnNetworkUnreachableDuration{150}; // Initial congestion window in MSS uint64_t initCwndInMss{kInitCwndInMss}; // Minimum congestion window in MSS