From 2a97d4533cbe0bd17233cb3a9a7e40f9696c237b Mon Sep 17 00:00:00 2001 From: Yang Chi Date: Wed, 17 Jul 2019 15:15:09 -0700 Subject: [PATCH] Quic busy write loop detector Summary: This diff adds a DebugState in the QuicConnectionState to track the reason we schedule transport WriteLooper to run, the reason we end up not writing and the number of times such write happens consecutively. And when it reaches a predefined limit, we trigger a callback on a loop detector interface. Reviewed By: kvtsoy Differential Revision: D15433881 fbshipit-source-id: d903342c00bc4dccf8d7320726368d23295bec66 --- quic/QuicConstants.cpp | 46 +++++++++++++ quic/QuicConstants.h | 26 +++++++ quic/api/LoopDetectorCallback.h | 25 +++++++ quic/api/QuicTransportBase.cpp | 27 +++++++- quic/api/QuicTransportBase.h | 4 ++ quic/api/QuicTransportFunctions.cpp | 61 +++++++++++------ quic/api/QuicTransportFunctions.h | 4 +- quic/api/test/Mocks.h | 9 +++ quic/api/test/QuicTransportFunctionsTest.cpp | 36 +++++----- quic/api/test/QuicTransportTest.cpp | 72 +++++++++++++++++--- quic/loss/QuicLossFunctions.h | 5 +- quic/state/StateData.h | 15 ++++ 12 files changed, 277 insertions(+), 53 deletions(-) create mode 100644 quic/api/LoopDetectorCallback.h diff --git a/quic/QuicConstants.cpp b/quic/QuicConstants.cpp index beec73651..eafcd5bff 100644 --- a/quic/QuicConstants.cpp +++ b/quic/QuicConstants.cpp @@ -37,4 +37,50 @@ std::vector filterSupportedVersions( }); return filteredVersions; } + +std::string writeDataReasonString(WriteDataReason reason) { + switch (reason) { + case WriteDataReason::PROBES: + return "Probes"; + case WriteDataReason::ACK: + return "Ack"; + case WriteDataReason::CRYPTO_STREAM: + return "Crypto"; + case WriteDataReason::STREAM: + return "Stream"; + case WriteDataReason::LOSS: + return "Loss"; + case WriteDataReason::BLOCKED: + return "Blocked"; + case WriteDataReason::STREAM_WINDOW_UPDATE: + return "StreamWindowUpdate"; + case WriteDataReason::CONN_WINDOW_UPDATE: + return "ConnWindowUpdate"; + case WriteDataReason::SIMPLE: + return "Simple"; + case WriteDataReason::RESET: + return "Reset"; + case WriteDataReason::PATHCHALLENGE: + return "PathChallenge"; + case WriteDataReason::NO_WRITE: + return "NoWrite"; + } + folly::assume_unreachable(); +} + +std::string writeNoWriteReasonString(NoWriteReason reason) { + switch (reason) { + case NoWriteReason::WRITE_OK: + return "WriteOk"; + case NoWriteReason::EMPTY_SCHEDULER: + return "EmptyScheduler"; + case NoWriteReason::NO_FRAME: + return "NoFrame"; + case NoWriteReason::NO_BODY: + return "NoBody"; + case NoWriteReason::SOCKET_FAILURE: + return "SocketFailure"; + } + folly::assume_unreachable(); +} } // namespace quic diff --git a/quic/QuicConstants.h b/quic/QuicConstants.h index 3f319dd73..182d37a03 100644 --- a/quic/QuicConstants.h +++ b/quic/QuicConstants.h @@ -391,6 +391,32 @@ inline std::ostream& operator<<(std::ostream& os, const QuicVersion& v) { return os; } +enum class WriteDataReason { + NO_WRITE, + PROBES, + ACK, + CRYPTO_STREAM, + STREAM, + LOSS, + BLOCKED, + STREAM_WINDOW_UPDATE, + CONN_WINDOW_UPDATE, + SIMPLE, + RESET, + PATHCHALLENGE, +}; + +enum class NoWriteReason { + WRITE_OK, + EMPTY_SCHEDULER, + NO_FRAME, + NO_BODY, + SOCKET_FAILURE, +}; + +std::string writeDataReasonString(WriteDataReason reason); +std::string writeNoWriteReasonString(NoWriteReason reason); + /** * Filter the versions that are currently supported. */ diff --git a/quic/api/LoopDetectorCallback.h b/quic/api/LoopDetectorCallback.h new file mode 100644 index 000000000..3a3a110f2 --- /dev/null +++ b/quic/api/LoopDetectorCallback.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + */ + +#pragma once + +#include + +namespace quic { + +class LoopDetectorCallback { + public: + virtual ~LoopDetectorCallback() = default; + virtual void onSuspiciousLoops( + uint64_t emptyLoopCount, + WriteDataReason writeReason, + NoWriteReason noWriteReason, + const std::string& scheduler) = 0; +}; + +} // namespace quic diff --git a/quic/api/QuicTransportBase.cpp b/quic/api/QuicTransportBase.cpp index 6a48002ae..b6163d95c 100644 --- a/quic/api/QuicTransportBase.cpp +++ b/quic/api/QuicTransportBase.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -1117,16 +1118,22 @@ void QuicTransportBase::updateWriteLooper(bool thisIteration) { } // TODO: Also listens to write event from libevent. Only schedule write when // the socket itself is writable. - if (shouldWriteData(*conn_)) { + auto writeDataReason = shouldWriteData(*conn_); + if (writeDataReason != WriteDataReason::NO_WRITE) { VLOG(10) << nodeToString(conn_->nodeType) << " running write looper thisIteration=" << thisIteration << " " << *this; writeLooper_->run(thisIteration); + conn_->debugState.needsWriteLoopDetect = + (conn_->loopDetectorCallback != nullptr); } else { VLOG(10) << nodeToString(conn_->nodeType) << " stopping write looper " << *this; writeLooper_->stop(); + conn_->debugState.needsWriteLoopDetect = false; + conn_->debugState.currentEmptyLoopCount = 0; } + conn_->debugState.writeDataReason = writeDataReason; } void QuicTransportBase::cancelDeliveryCallbacksForStream(StreamId streamId) { @@ -2181,7 +2188,23 @@ void QuicTransportBase::writeSocketData() { if (closeState_ != CloseState::CLOSED) { setLossDetectionAlarm(*conn_, *this); auto packetsAfter = conn_->outstandingPackets.size(); - // If we sent a new packet and the new packet was either the first packet + bool packetWritten = (packetsAfter > packetsBefore); + if (packetWritten) { + conn_->debugState.currentEmptyLoopCount = 0; + } else if ( + conn_->debugState.needsWriteLoopDetect && + conn_->loopDetectorCallback) { + // TODO: Currently we will to get some stats first. Then we may filter + // out some errors here. For example, socket fail to write might be a + // legit case to filter out. + conn_->loopDetectorCallback->onSuspiciousLoops( + ++conn_->debugState.currentEmptyLoopCount, + conn_->debugState.writeDataReason, + conn_->debugState.noWriteReason, + conn_->debugState.schedulerName); + } + // If we sent a new packet and the new packet was either the first + // packet // after quiescence or after receiving a new packet. if (packetsAfter > packetsBefore && (packetsBefore == 0 || conn_->receivedNewPacketBeforeWrite)) { diff --git a/quic/api/QuicTransportBase.h b/quic/api/QuicTransportBase.h index c73382be2..a767227bd 100644 --- a/quic/api/QuicTransportBase.h +++ b/quic/api/QuicTransportBase.h @@ -441,6 +441,10 @@ class QuicTransportBase : public QuicSocket { conn_->qLogger = std::move(qLogger); } + void setLoopDetectorCallback(std::shared_ptr callback) { + conn_->loopDetectorCallback = std::move(callback); + } + virtual void cancelAllAppCallbacks( std::pair error) noexcept; diff --git a/quic/api/QuicTransportFunctions.cpp b/quic/api/QuicTransportFunctions.cpp index d839755a2..f06deb08d 100644 --- a/quic/api/QuicTransportFunctions.cpp +++ b/quic/api/QuicTransportFunctions.cpp @@ -909,6 +909,11 @@ uint64_t writeConnectionDataToSocket( ioBufBatch.setContinueOnNetworkUnreachable( connection.transportSettings.continueOnNetworkUnreachable); + connection.debugState.schedulerName = scheduler.name(); + connection.debugState.noWriteReason = NoWriteReason::WRITE_OK; + if (!scheduler.hasData()) { + connection.debugState.noWriteReason = NoWriteReason::EMPTY_SCHEDULER; + } while (scheduler.hasData() && ioBufBatch.getPktSent() < packetLimit) { auto packetNum = getNextPacketNum(connection, pnSpace); auto header = builder( @@ -935,11 +940,13 @@ uint64_t writeConnectionDataToSocket( auto& packet = result.second; if (!packet || packet->packet.frames.empty()) { ioBufBatch.flush(); + connection.debugState.noWriteReason = NoWriteReason::NO_FRAME; return ioBufBatch.getPktSent(); } if (!packet->body) { // No more space remaining. ioBufBatch.flush(); + connection.debugState.noWriteReason = NoWriteReason::NO_BODY; return ioBufBatch.getPktSent(); } auto body = @@ -973,6 +980,7 @@ uint64_t writeConnectionDataToSocket( // if ioBufBatch.write returns false // it is because a flush() call failed if (!ret) { + connection.debugState.noWriteReason = NoWriteReason::SOCKET_FAILURE; return ioBufBatch.getPktSent(); } } @@ -1014,16 +1022,16 @@ uint64_t writeProbingDataToSocket( return written; } -bool shouldWriteData(const QuicConnectionStateBase& conn) { +WriteDataReason shouldWriteData(const QuicConnectionStateBase& conn) { if (conn.pendingEvents.numProbePackets) { VLOG(10) << nodeToString(conn.nodeType) << " needs write because of PTO" << conn; - return true; + return WriteDataReason::PROBES; } if (hasAckDataToWrite(conn)) { VLOG(10) << nodeToString(conn.nodeType) << " needs write because of ACKs " << conn; - return true; + return WriteDataReason::ACK; } const size_t minimumDataSize = std::max( kLongHeaderHeaderSize + kCipherOverheadHeuristic, sizeof(Sample)); @@ -1032,12 +1040,12 @@ bool shouldWriteData(const QuicConnectionStateBase& conn) { *conn.writableBytesLimit - conn.lossState.totalBytesSent <= minimumDataSize)) { QUIC_STATS(conn.infoCallback, onCwndBlocked); - return false; + return WriteDataReason::NO_WRITE; } if (conn.congestionController && conn.congestionController->getWritableBytes() <= minimumDataSize) { QUIC_STATS(conn.infoCallback, onCwndBlocked); - return false; + return WriteDataReason::NO_WRITE; } return hasNonAckDataToWrite(conn); } @@ -1061,28 +1069,43 @@ bool hasAckDataToWrite(const QuicConnectionStateBase& conn) { return writeAcks; } -bool hasNonAckDataToWrite(const QuicConnectionStateBase& conn) { +WriteDataReason hasNonAckDataToWrite(const QuicConnectionStateBase& conn) { if (cryptoHasWritableData(conn)) { VLOG(10) << nodeToString(conn.nodeType) << " needs write because of crypto stream" << " " << conn; - return true; + return WriteDataReason::CRYPTO_STREAM; } if (!conn.oneRttWriteCipher && !conn.zeroRttWriteCipher) { // All the rest of the types of data need either a 1-rtt or 0-rtt cipher to // be written. - return false; + return WriteDataReason::NO_WRITE; } - bool hasStreamData = getSendConnFlowControlBytesWire(conn) != 0 && - conn.streamManager->hasWritable(); - bool hasLoss = conn.streamManager->hasLoss(); - bool hasBlocked = conn.streamManager->hasBlocked(); - bool hasStreamWindowUpdates = conn.streamManager->hasWindowUpdates(); - bool hasConnWindowUpdate = conn.pendingEvents.connWindowUpdate; - bool hasSimple = !conn.pendingEvents.frames.empty(); - bool hasResets = !conn.pendingEvents.resets.empty(); - bool hasPathChallenge = (conn.pendingEvents.pathChallenge != folly::none); - return hasStreamData || hasLoss || hasBlocked || hasStreamWindowUpdates || - hasConnWindowUpdate || hasResets || hasSimple || hasPathChallenge; + if (!conn.pendingEvents.resets.empty()) { + return WriteDataReason::RESET; + } + if (conn.streamManager->hasWindowUpdates()) { + return WriteDataReason::STREAM_WINDOW_UPDATE; + } + if (conn.pendingEvents.connWindowUpdate) { + return WriteDataReason::CONN_WINDOW_UPDATE; + } + if (conn.streamManager->hasBlocked()) { + return WriteDataReason::BLOCKED; + } + if (conn.streamManager->hasLoss()) { + return WriteDataReason::LOSS; + } + if (getSendConnFlowControlBytesWire(conn) != 0 && + conn.streamManager->hasWritable()) { + return WriteDataReason::STREAM; + } + if (!conn.pendingEvents.frames.empty()) { + return WriteDataReason::SIMPLE; + } + if ((conn.pendingEvents.pathChallenge != folly::none)) { + return WriteDataReason::PATHCHALLENGE; + } + return WriteDataReason::NO_WRITE; } } // namespace quic diff --git a/quic/api/QuicTransportFunctions.h b/quic/api/QuicTransportFunctions.h index 0125eb38b..6edcbdd68 100644 --- a/quic/api/QuicTransportFunctions.h +++ b/quic/api/QuicTransportFunctions.h @@ -95,9 +95,9 @@ uint64_t writeZeroRttDataToSocket( * * TODO: We should probably split "should" and "can" into two APIs. */ -bool shouldWriteData(const QuicConnectionStateBase& conn); +WriteDataReason shouldWriteData(const QuicConnectionStateBase& conn); bool hasAckDataToWrite(const QuicConnectionStateBase& conn); -bool hasNonAckDataToWrite(const QuicConnectionStateBase& conn); +WriteDataReason hasNonAckDataToWrite(const QuicConnectionStateBase& conn); /** * Invoked when the written stream data was new stream data. diff --git a/quic/api/test/Mocks.h b/quic/api/test/Mocks.h index 844a87400..dea1cd1e8 100644 --- a/quic/api/test/Mocks.h +++ b/quic/api/test/Mocks.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -266,6 +267,14 @@ class MockQuicTransport : public QuicServerTransport { GMOCK_METHOD1_(, noexcept, , setConnectionIdAlgo, void(ConnectionIdAlgo*)); }; +class MockLoopDetectorCallback : public LoopDetectorCallback { + public: + ~MockLoopDetectorCallback() override = default; + MOCK_METHOD4( + onSuspiciousLoops, + void(uint64_t, WriteDataReason, NoWriteReason, const std::string&)); +}; + inline std::ostream& operator<<(std::ostream& os, const MockQuicTransport&) { return os; } diff --git a/quic/api/test/QuicTransportFunctionsTest.cpp b/quic/api/test/QuicTransportFunctionsTest.cpp index 8e1a049af..4b9d3662d 100644 --- a/quic/api/test/QuicTransportFunctionsTest.cpp +++ b/quic/api/test/QuicTransportFunctionsTest.cpp @@ -1107,7 +1107,7 @@ TEST_F( writableBytes -= iobuf->computeChainDataLength(); return iobuf->computeChainDataLength(); })); - EXPECT_TRUE(shouldWriteData(*conn)); + EXPECT_NE(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); writeQuicDataToSocket( *rawSocket, *conn, @@ -1117,7 +1117,7 @@ TEST_F( *headerCipher, getVersion(*conn), conn->transportSettings.writeConnectionDataPacketsLimit); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); } TEST_F(QuicTransportFunctionsTest, WriteQuicDataToSocketWithNoBytesForHeader) { @@ -1542,16 +1542,16 @@ TEST_F(QuicTransportFunctionsTest, ShouldWriteDataTest) { CHECK(!conn->oneRttWriteCipher); conn->ackStates.appDataAckState.needsToSendAckImmediately = true; addAckStatesWithCurrentTimestamps(conn->ackStates.appDataAckState, 1, 20); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); conn->oneRttWriteCipher = test::createNoOpAead(); EXPECT_CALL(*transportInfoCb_, onCwndBlocked()).Times(0); - EXPECT_TRUE(shouldWriteData(*conn)); + EXPECT_NE(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); auto stream1 = conn->streamManager->createNextBidirectionalStream().value(); auto buf = IOBuf::copyBuffer("0123456789"); writeDataToQuicStream(*stream1, buf->clone(), false); - EXPECT_TRUE(shouldWriteData(*conn)); + EXPECT_NE(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); writeQuicDataToSocket( *rawSocket, @@ -1562,14 +1562,14 @@ TEST_F(QuicTransportFunctionsTest, ShouldWriteDataTest) { *headerCipher, getVersion(*conn), conn->transportSettings.writeConnectionDataPacketsLimit); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); // Congestion control EXPECT_CALL(*rawCongestionController, getWritableBytes()) .WillRepeatedly(Return(0)); EXPECT_CALL(*transportInfoCb_, onCwndBlocked()); writeDataToQuicStream(*stream1, buf->clone(), true); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); EXPECT_CALL(*transportInfoCb_, onCwndBlocked()); writeQuicDataToSocket( @@ -1581,7 +1581,7 @@ TEST_F(QuicTransportFunctionsTest, ShouldWriteDataTest) { *headerCipher, getVersion(*conn), conn->transportSettings.writeConnectionDataPacketsLimit); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); } TEST_F(QuicTransportFunctionsTest, ShouldWriteStreamsNoCipher) { @@ -1595,7 +1595,7 @@ TEST_F(QuicTransportFunctionsTest, ShouldWriteStreamsNoCipher) { auto stream1 = conn->streamManager->createNextBidirectionalStream().value(); auto buf = IOBuf::copyBuffer("0123456789"); writeDataToQuicStream(*stream1, buf->clone(), false); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); } TEST_F(QuicTransportFunctionsTest, ShouldWritePureAcksNoCipher) { @@ -1608,7 +1608,7 @@ TEST_F(QuicTransportFunctionsTest, ShouldWritePureAcksNoCipher) { conn->ackStates.appDataAckState.needsToSendAckImmediately = true; addAckStatesWithCurrentTimestamps(conn->ackStates.appDataAckState, 1, 20); - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); } TEST_F(QuicTransportFunctionsTest, ShouldWriteDataNoConnFlowControl) { @@ -1621,10 +1621,10 @@ TEST_F(QuicTransportFunctionsTest, ShouldWriteDataNoConnFlowControl) { auto stream1 = conn->streamManager->createNextBidirectionalStream().value(); auto buf = IOBuf::copyBuffer("0123456789"); writeDataToQuicStream(*stream1, buf->clone(), false); - EXPECT_TRUE(shouldWriteData(*conn)); + EXPECT_NE(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); // Artificially limit the connection flow control. conn->flowControlState.peerAdvertisedMaxOffset = 0; - EXPECT_FALSE(shouldWriteData(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(*conn)); } TEST_F(QuicTransportFunctionsTest, HasAckDataToWriteCipherAndAckStateMatch) { @@ -1681,23 +1681,23 @@ TEST_F(QuicTransportFunctionsTest, HasCryptoDataToWrite) { auto conn = createConn(); conn->cryptoState->initialStream.lossBuffer.emplace_back( folly::IOBuf::copyBuffer("Grab your coat and get your hat"), 0, false); - EXPECT_TRUE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::CRYPTO_STREAM, hasNonAckDataToWrite(*conn)); } TEST_F(QuicTransportFunctionsTest, HasControlFramesToWrite) { auto conn = createConn(); conn->streamManager->queueBlocked(1, 100); - EXPECT_FALSE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, hasNonAckDataToWrite(*conn)); conn->oneRttWriteCipher = test::createNoOpAead(); - EXPECT_TRUE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::BLOCKED, hasNonAckDataToWrite(*conn)); } TEST_F(QuicTransportFunctionsTest, FlowControlBlocked) { auto conn = createConn(); conn->flowControlState.peerAdvertisedMaxOffset = 1000; conn->flowControlState.sumCurWriteOffset = 1000; - EXPECT_FALSE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, hasNonAckDataToWrite(*conn)); } TEST_F(QuicTransportFunctionsTest, HasAppDataToWrite) { @@ -1705,10 +1705,10 @@ TEST_F(QuicTransportFunctionsTest, HasAppDataToWrite) { conn->flowControlState.peerAdvertisedMaxOffset = 1000; conn->flowControlState.sumCurWriteOffset = 800; conn->streamManager->addWritable(0); - EXPECT_FALSE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, hasNonAckDataToWrite(*conn)); conn->oneRttWriteCipher = test::createNoOpAead(); - EXPECT_TRUE(hasNonAckDataToWrite(*conn)); + EXPECT_EQ(WriteDataReason::STREAM, hasNonAckDataToWrite(*conn)); } TEST_F(QuicTransportFunctionsTest, UpdateConnectionCloneCounter) { diff --git a/quic/api/test/QuicTransportTest.cpp b/quic/api/test/QuicTransportTest.cpp index aa96a1a45..fe39362da 100644 --- a/quic/api/test/QuicTransportTest.cpp +++ b/quic/api/test/QuicTransportTest.cpp @@ -476,7 +476,7 @@ TEST_F(QuicTransportTest, WriteSmall) { conn.transportSettings.writeConnectionDataPacketsLimit); verifyCorrectness(conn, 0, stream, *buf); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteLarge) { @@ -511,7 +511,7 @@ TEST_F(QuicTransportTest, WriteLarge) { conn.transportSettings.writeConnectionDataPacketsLimit); EXPECT_EQ(NumFullPackets + 1, conn.outstandingPackets.size()); verifyCorrectness(conn, 0, stream, *buf); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteMultipleTimes) { @@ -532,7 +532,7 @@ TEST_F(QuicTransportTest, WriteMultipleTimes) { transport_->writeChain(stream, buf->clone(), false, false); loopForWrites(); verifyCorrectness(conn, originalWriteOffset, stream, *buf); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteMultipleStreams) { @@ -695,7 +695,7 @@ TEST_F(QuicTransportTest, WriteFin) { transport_->getVersion(), conn.transportSettings.writeConnectionDataPacketsLimit); verifyCorrectness(conn, 0, stream, *buf, true); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteOnlyFin) { @@ -723,7 +723,7 @@ TEST_F(QuicTransportTest, WriteOnlyFin) { transport_->getVersion(), conn.transportSettings.writeConnectionDataPacketsLimit); verifyCorrectness(conn, 0, stream, *buf, true); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteDataWithRetransmission) { @@ -744,7 +744,7 @@ TEST_F(QuicTransportTest, WriteDataWithRetransmission) { // lost data and new data buf->appendChain(std::move(buf2)); verifyCorrectness(conn, 0, stream, *buf); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, WriteImmediateAcks) { @@ -780,7 +780,7 @@ TEST_F(QuicTransportTest, WriteImmediateAcks) { EXPECT_EQ(conn.ackStates.appDataAckState.largestAckScheduled, end); EXPECT_FALSE(conn.ackStates.appDataAckState.needsToSendAckImmediately); EXPECT_EQ(0, conn.ackStates.appDataAckState.numNonRxPacketsRecvd); - EXPECT_FALSE(shouldWriteData(conn)); + EXPECT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); } TEST_F(QuicTransportTest, NotWriteAcksIfNoData) { @@ -1397,6 +1397,56 @@ TEST_F(QuicTransportTest, CloneNewConnectionIdFrame) { EXPECT_EQ(numNewConnIdPackets, 3); } +TEST_F(QuicTransportTest, BusyWriteLoopDetection) { + auto& conn = transport_->getConnectionState(); + conn.transportSettings.writeConnectionDataPacketsLimit = 1; + auto mockLoopDetectorCallback = std::make_unique(); + auto rawLoopDetectorCallback = mockLoopDetectorCallback.get(); + conn.loopDetectorCallback = std::move(mockLoopDetectorCallback); + ASSERT_FALSE(conn.debugState.needsWriteLoopDetect); + ASSERT_EQ(0, conn.debugState.currentEmptyLoopCount); + auto mockCongestionController = std::make_unique(); + auto rawCongestionController = mockCongestionController.get(); + conn.congestionController = std::move(mockCongestionController); + EXPECT_CALL(*rawCongestionController, getWritableBytes()) + .WillRepeatedly(Return(1000)); + + // There should be no data to send at this point + transport_->updateWriteLooper(true); + EXPECT_FALSE(conn.debugState.needsWriteLoopDetect); + EXPECT_EQ(WriteDataReason::NO_WRITE, conn.debugState.writeDataReason); + EXPECT_EQ(0, conn.debugState.currentEmptyLoopCount); + loopForWrites(); + + auto stream = transport_->createBidirectionalStream().value(); + auto buf = buildRandomInputData(100); + transport_->writeChain(stream, buf->clone(), true, false); + transport_->updateWriteLooper(true); + EXPECT_TRUE(conn.debugState.needsWriteLoopDetect); + EXPECT_EQ(0, conn.debugState.currentEmptyLoopCount); + EXPECT_EQ(WriteDataReason::STREAM, conn.debugState.writeDataReason); + EXPECT_CALL(*socket_, write(_, _)).WillOnce(Return(1000)); + loopForWrites(); + EXPECT_EQ(1, conn.outstandingPackets.size()); + EXPECT_EQ(0, conn.debugState.currentEmptyLoopCount); + + // Queue a window update for a stream doesn't exist + conn.streamManager->queueWindowUpdate(stream + 1); + transport_->updateWriteLooper(true); + EXPECT_TRUE( + WriteDataReason::STREAM_WINDOW_UPDATE == conn.debugState.writeDataReason); + EXPECT_CALL(*socket_, write(_, _)).Times(0); + EXPECT_CALL( + *rawLoopDetectorCallback, + onSuspiciousLoops(1, WriteDataReason::STREAM_WINDOW_UPDATE, _, _)) + .Times(1); + loopForWrites(); + EXPECT_EQ(1, conn.outstandingPackets.size()); + EXPECT_EQ(1, conn.debugState.currentEmptyLoopCount); + + transport_->close(folly::none); +} + TEST_F(QuicTransportTest, ResendNewConnectionIdOnLoss) { auto& conn = transport_->getConnectionState(); @@ -2389,7 +2439,9 @@ TEST_F(QuicTransportTest, CloseWithDrainWillKeepSocketAround) { } TEST_F(QuicTransportTest, PacedWriteNoDataToWrite) { - ASSERT_FALSE(shouldWriteData(transport_->getConnectionState())); + ASSERT_EQ( + WriteDataReason::NO_WRITE, + shouldWriteData(transport_->getConnectionState())); EXPECT_CALL(*socket_, write(_, _)).Times(0); transport_->pacedWrite(true); } @@ -2441,7 +2493,7 @@ TEST_F(QuicTransportTest, AlreadyScheduledPacingNoWrite) { // schedule a pacing timeout. loopForWrites(); - ASSERT_TRUE(shouldWriteData(conn)); + ASSERT_NE(WriteDataReason::NO_WRITE, shouldWriteData(conn)); EXPECT_TRUE(transport_->isPacingScheduled()); EXPECT_CALL(*socket_, write(_, _)).Times(0); transport_->pacedWrite(true); @@ -2469,7 +2521,7 @@ TEST_F(QuicTransportTest, NoScheduleIfNoNewData) { // FunctionLooper won't schedule a pacing timeout. transport_->pacedWrite(true); - ASSERT_FALSE(shouldWriteData(conn)); + ASSERT_EQ(WriteDataReason::NO_WRITE, shouldWriteData(conn)); EXPECT_FALSE(transport_->isPacingScheduled()); } diff --git a/quic/loss/QuicLossFunctions.h b/quic/loss/QuicLossFunctions.h index 254a55caa..1fda0b52e 100644 --- a/quic/loss/QuicLossFunctions.h +++ b/quic/loss/QuicLossFunctions.h @@ -24,7 +24,7 @@ namespace quic { // Forward-declaration bool hasAckDataToWrite(const QuicConnectionStateBase& conn); -bool hasNonAckDataToWrite(const QuicConnectionStateBase& conn); +WriteDataReason hasNonAckDataToWrite(const QuicConnectionStateBase& conn); std::chrono::microseconds calculatePTO(const QuicConnectionStateBase& conn); @@ -138,7 +138,8 @@ void setLossDetectionAlarm(QuicConnectionStateBase& conn, Timeout& timeout) { * in the buffers which is unsent or known to be lost. We should set a timer * in this case to be able to send this data on the next PTO. */ - bool hasDataToWrite = hasAckDataToWrite(conn) || hasNonAckDataToWrite(conn); + bool hasDataToWrite = hasAckDataToWrite(conn) || + (hasNonAckDataToWrite(conn) != WriteDataReason::NO_WRITE); auto totalPacketsOutstanding = conn.outstandingPackets.size(); if (totalPacketsOutstanding == conn.outstandingPureAckPacketsCount) { VLOG(10) << __func__ << " unset alarm pure ack only" diff --git a/quic/state/StateData.h b/quic/state/StateData.h index 0f57406ff..0f5a65918 100644 --- a/quic/state/StateData.h +++ b/quic/state/StateData.h @@ -358,6 +358,7 @@ struct LossState { class Logger; class CongestionControllerFactory; +class LoopDetectorCallback; struct QuicConnectionStateBase { virtual ~QuicConnectionStateBase() = default; @@ -616,6 +617,20 @@ struct QuicConnectionStateBase { // Whether or not both ends agree to use partial reliability bool partialReliabilityEnabled{false}; + + // Debug information. Currently only used to debug busy loop of Transport + // WriteLooper. + struct DebugState { + bool needsWriteLoopDetect{false}; + uint64_t currentEmptyLoopCount{0}; + WriteDataReason writeDataReason{WriteDataReason::NO_WRITE}; + NoWriteReason noWriteReason{NoWriteReason::WRITE_OK}; + std::string schedulerName; + }; + + DebugState debugState; + + std::shared_ptr loopDetectorCallback; }; std::ostream& operator<<(std::ostream& os, const QuicConnectionStateBase& st);