mirror of
https://github.com/facebookincubator/mvfst.git
synced 2025-11-27 03:41:14 +03:00
Summary: This diff adds `recvmmsg` to the `QuicAsyncUDPSocketWrapper` interface and adds a concrete implementation to `QuicAsyncUDPSocketWrapperImpl` - The concrete implementation is largely a copy/paste of what we have in `QuicClientTransport::recvmmsg` today. - A new transport setting is added — `shouldUseWrapperRecvmmsgForBatchRecv` — to determine if `QuicAsyncUDPSocketWrapperImpl::recvmmsg` should be used. It is currently disabled for all use cases. -- This diff is part of a larger stack focused on the following: - **Cleaning up client and server UDP packet receive paths while improving testability.** We currently have multiple receive paths for client and server. Capabilities vary significantly and there are few tests. For instance: - The server receive path supports socket RX timestamps, abet incorrectly in that it does not store timestamp per packet. In comparison, the client receive path does not currently support socket RX timestamps, although the code in `QuicClientTransport::recvmsg` and `QuicClientTransport::recvmmsg` makes reference to socket RX timestamps, making it confusing to understand the capabilities available when tracing through the code. This complicates the tests in `QuicTypedTransportTests`, as we have to disable test logic that depends on socket RX timestamps for client tests. - The client currently has three receive paths, and none of them are well tested. - **Modularize and abstract components in the receive path.** This will make it easier to mock/fake the UDP socket and network layers. - `QuicClientTransport` and `QuicServerTransport` currently contain UDP socket handling logic that operates over lower layer primitives such `cmsg` and `io_vec` (see `QuicClientTransport::recvmmsg` and `...::recvmsg` as examples). - Because this UDP socket handling logic is inside of the mvfst transport implementations, it is difficult to test this logic in isolation and mock/fake the underlying socket and network layers. For instance, injecting a user space network emulator that operates at the socket layer would require faking `folly::AsyncUDPSocket`, which is non-trivial given that `AsyncUDPSocket` does not abstract away intricacies arising from the aforementioned lower layer primitives. - By shifting this logic into an intermediate layer between the transport and the underlying UDP socket, it will be easier to mock out the UDP socket layer when testing functionality at higher layers, and inject fake components when we want to emulate the network between a mvfst client and server. It will also be easier for us to have unit tests focused on testing interactions between the UDP socket implementation and this intermediate layer. - **Improving receive path timestamping.** We only record a single timestamp per `NetworkData` at the moment, but (1) it is possible for a `NetworkData` to have multiple packets, each with their own timestamps, and (2) we should be able to record both userspace and socket timestamps. Reviewed By: silver23arrow Differential Revision: D48719751 fbshipit-source-id: d08f95823b917fa01dff04757b0ceabbf691a0ca
186 lines
5.6 KiB
C++
186 lines
5.6 KiB
C++
/*
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#include <folly/portability/Sockets.h>
|
|
#include <quic/common/QuicAsyncUDPSocketWrapper.h>
|
|
|
|
namespace {
|
|
constexpr socklen_t kAddrLen = sizeof(sockaddr_storage);
|
|
} // namespace
|
|
|
|
namespace quic {
|
|
|
|
#ifdef MVFST_USE_LIBEV
|
|
int getSocketFd(const QuicAsyncUDPSocketWrapper& /* s */) {
|
|
return -1;
|
|
}
|
|
NetworkFdType toNetworkFdType(int fd) {
|
|
return fd;
|
|
}
|
|
#else
|
|
int getSocketFd(const QuicAsyncUDPSocketWrapper& s) {
|
|
return s.getNetworkSocket().toFd();
|
|
}
|
|
NetworkFdType toNetworkFdType(int fd) {
|
|
return folly::NetworkSocket(fd);
|
|
}
|
|
#endif
|
|
|
|
QuicAsyncUDPSocketWrapperImpl::RecvResult
|
|
QuicAsyncUDPSocketWrapperImpl::recvMmsg(
|
|
uint64_t readBufferSize,
|
|
uint16_t numPackets,
|
|
NetworkData& networkData,
|
|
folly::Optional<folly::SocketAddress>& peerAddress,
|
|
size_t& totalData) {
|
|
/**
|
|
* This is largely a copy / paste of QuicClientTransport::recvmmsg.
|
|
*
|
|
* TODO(bschlinker): Refactor and add dedicated testing.
|
|
*/
|
|
recvmmsgStorage_.resize(numPackets);
|
|
auto& msgs = recvmmsgStorage_.msgs;
|
|
int flags = 0;
|
|
#ifdef FOLLY_HAVE_MSG_ERRQUEUE
|
|
bool useGRO = getGRO() > 0;
|
|
bool useTS = getTimestamping() > 0;
|
|
std::vector<std::array<
|
|
char,
|
|
QuicAsyncUDPSocketWrapper::ReadCallback::OnDataAvailableParams::
|
|
kCmsgSpace>>
|
|
controlVec((useGRO | useTS) ? numPackets : 0);
|
|
|
|
// we need to consider MSG_TRUNC too
|
|
if (useGRO) {
|
|
flags |= MSG_TRUNC;
|
|
}
|
|
#endif
|
|
for (uint16_t i = 0; i < numPackets; ++i) {
|
|
auto& addr = recvmmsgStorage_.impl_[i].addr;
|
|
auto& readBuffer = recvmmsgStorage_.impl_[i].readBuffer;
|
|
auto& iovec = recvmmsgStorage_.impl_[i].iovec;
|
|
struct msghdr* msg = &msgs[i].msg_hdr;
|
|
|
|
if (!readBuffer) {
|
|
readBuffer = folly::IOBuf::createCombined(readBufferSize);
|
|
iovec.iov_base = readBuffer->writableData();
|
|
iovec.iov_len = readBufferSize;
|
|
msg->msg_iov = &iovec;
|
|
msg->msg_iovlen = 1;
|
|
}
|
|
CHECK(readBuffer != nullptr);
|
|
|
|
auto* rawAddr = reinterpret_cast<sockaddr*>(&addr);
|
|
rawAddr->sa_family = address().getFamily();
|
|
msg->msg_name = rawAddr;
|
|
msg->msg_namelen = kAddrLen;
|
|
#ifdef FOLLY_HAVE_MSG_ERRQUEUE
|
|
if (useGRO || useTS) {
|
|
::memset(controlVec[i].data(), 0, controlVec[i].size());
|
|
msg->msg_control = controlVec[i].data();
|
|
msg->msg_controllen = controlVec[i].size();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int numMsgsRecvd = recvmmsg(msgs.data(), numPackets, flags, nullptr);
|
|
if (numMsgsRecvd < 0) {
|
|
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
|
// Exit, socket will notify us again when socket is readable.
|
|
return RecvResult(NoReadReason::RETRIABLE_ERROR);
|
|
}
|
|
// If we got a non-retriable error, we might have received
|
|
// a packet that we could process, however let's just quit early.
|
|
// pauseRead();
|
|
return RecvResult(NoReadReason::NONRETRIABLE_ERROR);
|
|
}
|
|
|
|
CHECK_LE(numMsgsRecvd, numPackets);
|
|
// Need to save our position so we can recycle the unused buffers.
|
|
uint16_t i;
|
|
for (i = 0; i < static_cast<uint16_t>(numMsgsRecvd); ++i) {
|
|
auto& addr = recvmmsgStorage_.impl_[i].addr;
|
|
auto& readBuffer = recvmmsgStorage_.impl_[i].readBuffer;
|
|
auto& msg = msgs[i];
|
|
|
|
size_t bytesRead = msg.msg_len;
|
|
if (bytesRead == 0) {
|
|
// Empty datagram, this is probably garbage matching our tuple, we
|
|
// should ignore such datagrams.
|
|
continue;
|
|
}
|
|
QuicAsyncUDPSocketWrapper::ReadCallback::OnDataAvailableParams params;
|
|
#ifdef FOLLY_HAVE_MSG_ERRQUEUE
|
|
if (useGRO || useTS) {
|
|
QuicAsyncUDPSocketWrapper::fromMsg(params, msg.msg_hdr);
|
|
|
|
// truncated
|
|
if (bytesRead > readBufferSize) {
|
|
bytesRead = readBufferSize;
|
|
if (params.gro > 0) {
|
|
bytesRead = bytesRead - bytesRead % params.gro;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
totalData += bytesRead;
|
|
|
|
if (!peerAddress) {
|
|
peerAddress.emplace(folly::SocketAddress());
|
|
auto* rawAddr = reinterpret_cast<sockaddr*>(&addr);
|
|
peerAddress->setFromSockaddr(rawAddr, kAddrLen);
|
|
}
|
|
|
|
VLOG(10) << "Got data from socket peer=" << *peerAddress
|
|
<< " len=" << bytesRead;
|
|
readBuffer->append(bytesRead);
|
|
if (params.gro > 0) {
|
|
size_t len = bytesRead;
|
|
size_t remaining = len;
|
|
size_t offset = 0;
|
|
size_t totalNumPackets =
|
|
networkData.packets.size() + ((len + params.gro - 1) / params.gro);
|
|
networkData.packets.reserve(totalNumPackets);
|
|
while (remaining) {
|
|
if (static_cast<int>(remaining) > params.gro) {
|
|
auto tmp = readBuffer->cloneOne();
|
|
// start at offset
|
|
tmp->trimStart(offset);
|
|
// the actual len is len - offset now
|
|
// leave gro bytes
|
|
tmp->trimEnd(len - offset - params.gro);
|
|
DCHECK_EQ(tmp->length(), params.gro);
|
|
|
|
offset += params.gro;
|
|
remaining -= params.gro;
|
|
networkData.packets.emplace_back(std::move(tmp));
|
|
} else {
|
|
// do not clone the last packet
|
|
// start at offset, use all the remaining data
|
|
readBuffer->trimStart(offset);
|
|
DCHECK_EQ(readBuffer->length(), remaining);
|
|
remaining = 0;
|
|
networkData.packets.emplace_back(std::move(readBuffer));
|
|
}
|
|
}
|
|
} else {
|
|
networkData.packets.emplace_back(std::move(readBuffer));
|
|
}
|
|
}
|
|
|
|
return RecvResult();
|
|
}
|
|
|
|
void QuicAsyncUDPSocketWrapperImpl::RecvmmsgStorage::resize(size_t numPackets) {
|
|
if (msgs.size() != numPackets) {
|
|
msgs.resize(numPackets);
|
|
impl_.resize(numPackets);
|
|
}
|
|
}
|
|
|
|
} // namespace quic
|