Implement S/G IO for batched sends and eliminate another frame copy (#2874)
This commit is contained in:
+9
-6
@@ -185,7 +185,7 @@ namespace crypto {
|
|||||||
* The resulting ciphertext and the GCM tag are written into the tagged_cipher buffer.
|
* The resulting ciphertext and the GCM tag are written into the tagged_cipher buffer.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
gcm_t::encrypt(const std::string_view &plaintext, std::uint8_t *tagged_cipher, aes_t *iv) {
|
gcm_t::encrypt(const std::string_view &plaintext, std::uint8_t *tag, std::uint8_t *ciphertext, aes_t *iv) {
|
||||||
if (!encrypt_ctx && init_encrypt_gcm(encrypt_ctx, &key, iv, padding)) {
|
if (!encrypt_ctx && init_encrypt_gcm(encrypt_ctx, &key, iv, padding)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -196,18 +196,15 @@ namespace crypto {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto tag = tagged_cipher;
|
|
||||||
auto cipher = tag + tag_size;
|
|
||||||
|
|
||||||
int update_outlen, final_outlen;
|
int update_outlen, final_outlen;
|
||||||
|
|
||||||
// Encrypt into the caller's buffer
|
// Encrypt into the caller's buffer
|
||||||
if (EVP_EncryptUpdate(encrypt_ctx.get(), cipher, &update_outlen, (const std::uint8_t *) plaintext.data(), plaintext.size()) != 1) {
|
if (EVP_EncryptUpdate(encrypt_ctx.get(), ciphertext, &update_outlen, (const std::uint8_t *) plaintext.data(), plaintext.size()) != 1) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// GCM encryption won't ever fill ciphertext here but we have to call it anyway
|
// GCM encryption won't ever fill ciphertext here but we have to call it anyway
|
||||||
if (EVP_EncryptFinal_ex(encrypt_ctx.get(), cipher + update_outlen, &final_outlen) != 1) {
|
if (EVP_EncryptFinal_ex(encrypt_ctx.get(), ciphertext + update_outlen, &final_outlen) != 1) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,6 +215,12 @@ namespace crypto {
|
|||||||
return update_outlen + final_outlen;
|
return update_outlen + final_outlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
gcm_t::encrypt(const std::string_view &plaintext, std::uint8_t *tagged_cipher, aes_t *iv) {
|
||||||
|
// This overload handles the common case of [GCM tag][cipher text] buffer layout
|
||||||
|
return encrypt(plaintext, tagged_cipher, tagged_cipher + tag_size, iv);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
ecb_t::decrypt(const std::string_view &cipher, std::vector<std::uint8_t> &plaintext) {
|
ecb_t::decrypt(const std::string_view &cipher, std::vector<std::uint8_t> &plaintext) {
|
||||||
auto fg = util::fail_guard([this]() {
|
auto fg = util::fail_guard([this]() {
|
||||||
|
|||||||
@@ -125,6 +125,17 @@ namespace crypto {
|
|||||||
|
|
||||||
gcm_t(const crypto::aes_t &key, bool padding = true);
|
gcm_t(const crypto::aes_t &key, bool padding = true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Encrypts the plaintext using AES GCM mode.
|
||||||
|
* @param plaintext The plaintext data to be encrypted.
|
||||||
|
* @param tag The buffer where the GCM tag will be written.
|
||||||
|
* @param ciphertext The buffer where the resulting ciphertext will be written.
|
||||||
|
* @param iv The initialization vector to be used for the encryption.
|
||||||
|
* @return The total length of the ciphertext and GCM tag. Returns -1 in case of an error.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
encrypt(const std::string_view &plaintext, std::uint8_t *tag, std::uint8_t *ciphertext, aes_t *iv);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Encrypts the plaintext using AES GCM mode.
|
* @brief Encrypts the plaintext using AES GCM mode.
|
||||||
* length of cipher must be at least: round_to_pkcs7_padded(plaintext.size()) + crypto::cipher::tag_size
|
* length of cipher must be at least: round_to_pkcs7_padded(plaintext.size()) + crypto::cipher::tag_size
|
||||||
|
|||||||
+47
-2
@@ -606,15 +606,60 @@ namespace platf {
|
|||||||
void
|
void
|
||||||
restart();
|
restart();
|
||||||
|
|
||||||
struct batched_send_info_t {
|
struct buffer_descriptor_t {
|
||||||
const char *buffer;
|
const char *buffer;
|
||||||
size_t block_size;
|
size_t size;
|
||||||
|
|
||||||
|
// Constructors required for emplace_back() prior to C++20
|
||||||
|
buffer_descriptor_t(const char *buffer, size_t size):
|
||||||
|
buffer(buffer), size(size) {}
|
||||||
|
buffer_descriptor_t():
|
||||||
|
buffer(nullptr), size(0) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct batched_send_info_t {
|
||||||
|
// Optional headers to be prepended to each packet
|
||||||
|
const char *headers;
|
||||||
|
size_t header_size;
|
||||||
|
|
||||||
|
// One or more data buffers to use for the payloads
|
||||||
|
//
|
||||||
|
// NB: Data buffers must be aligned to payload size!
|
||||||
|
std::vector<buffer_descriptor_t> &payload_buffers;
|
||||||
|
size_t payload_size;
|
||||||
|
|
||||||
|
// The offset (in header+payload message blocks) in the header and payload
|
||||||
|
// buffers to begin sending messages from
|
||||||
|
size_t block_offset;
|
||||||
|
|
||||||
|
// The number of header+payload message blocks to send
|
||||||
size_t block_count;
|
size_t block_count;
|
||||||
|
|
||||||
std::uintptr_t native_socket;
|
std::uintptr_t native_socket;
|
||||||
boost::asio::ip::address &target_address;
|
boost::asio::ip::address &target_address;
|
||||||
uint16_t target_port;
|
uint16_t target_port;
|
||||||
boost::asio::ip::address &source_address;
|
boost::asio::ip::address &source_address;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns a payload buffer descriptor for the given payload offset.
|
||||||
|
* @param offset The offset in the total payload data (bytes).
|
||||||
|
* @return Buffer descriptor describing the region at the given offset.
|
||||||
|
*/
|
||||||
|
buffer_descriptor_t
|
||||||
|
buffer_for_payload_offset(ptrdiff_t offset) {
|
||||||
|
for (const auto &desc : payload_buffers) {
|
||||||
|
if (offset < desc.size) {
|
||||||
|
return {
|
||||||
|
desc.buffer + offset,
|
||||||
|
desc.size - offset,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
offset -= desc.size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
bool
|
bool
|
||||||
send_batch(batched_send_info_t &send_info);
|
send_batch(batched_send_info_t &send_info);
|
||||||
|
|||||||
+52
-18
@@ -433,22 +433,48 @@ namespace platf {
|
|||||||
memcpy(CMSG_DATA(pktinfo_cm), &pktInfo, sizeof(pktInfo));
|
memcpy(CMSG_DATA(pktinfo_cm), &pktInfo, sizeof(pktInfo));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto const max_iovs_per_msg = send_info.payload_buffers.size() + (send_info.headers ? 1 : 0);
|
||||||
|
|
||||||
#ifdef UDP_SEGMENT
|
#ifdef UDP_SEGMENT
|
||||||
{
|
{
|
||||||
struct iovec iov = {};
|
|
||||||
|
|
||||||
msg.msg_iov = &iov;
|
|
||||||
msg.msg_iovlen = 1;
|
|
||||||
|
|
||||||
// UDP GSO on Linux currently only supports sending 64K or 64 segments at a time
|
// UDP GSO on Linux currently only supports sending 64K or 64 segments at a time
|
||||||
size_t seg_index = 0;
|
size_t seg_index = 0;
|
||||||
const size_t seg_max = 65536 / 1500;
|
const size_t seg_max = 65536 / 1500;
|
||||||
|
struct iovec iovs[(send_info.headers ? std::min(seg_max, send_info.block_count) : 1) * max_iovs_per_msg] = {};
|
||||||
|
auto msg_size = send_info.header_size + send_info.payload_size;
|
||||||
while (seg_index < send_info.block_count) {
|
while (seg_index < send_info.block_count) {
|
||||||
iov.iov_base = (void *) &send_info.buffer[seg_index * send_info.block_size];
|
int iovlen = 0;
|
||||||
iov.iov_len = send_info.block_size * std::min(send_info.block_count - seg_index, seg_max);
|
auto segs_in_batch = std::min(send_info.block_count - seg_index, seg_max);
|
||||||
|
if (send_info.headers) {
|
||||||
|
// Interleave iovs for headers and payloads
|
||||||
|
for (auto i = 0; i < segs_in_batch; i++) {
|
||||||
|
iovs[iovlen].iov_base = (void *) &send_info.headers[(send_info.block_offset + seg_index + i) * send_info.header_size];
|
||||||
|
iovs[iovlen].iov_len = send_info.header_size;
|
||||||
|
iovlen++;
|
||||||
|
auto payload_desc = send_info.buffer_for_payload_offset((send_info.block_offset + seg_index + i) * send_info.payload_size);
|
||||||
|
iovs[iovlen].iov_base = (void *) payload_desc.buffer;
|
||||||
|
iovs[iovlen].iov_len = send_info.payload_size;
|
||||||
|
iovlen++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Translate buffer descriptors into iovs
|
||||||
|
auto payload_offset = (send_info.block_offset + seg_index) * send_info.payload_size;
|
||||||
|
auto payload_length = payload_offset + (segs_in_batch * send_info.payload_size);
|
||||||
|
while (payload_offset < payload_length) {
|
||||||
|
auto payload_desc = send_info.buffer_for_payload_offset(payload_offset);
|
||||||
|
iovs[iovlen].iov_base = (void *) payload_desc.buffer;
|
||||||
|
iovs[iovlen].iov_len = std::min(payload_desc.size, payload_length - payload_offset);
|
||||||
|
payload_offset += iovs[iovlen].iov_len;
|
||||||
|
iovlen++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.msg_iov = iovs;
|
||||||
|
msg.msg_iovlen = iovlen;
|
||||||
|
|
||||||
// We should not use GSO if the data is <= one full block size
|
// We should not use GSO if the data is <= one full block size
|
||||||
if (iov.iov_len > send_info.block_size) {
|
if (segs_in_batch > 1) {
|
||||||
msg.msg_controllen = cmbuflen + CMSG_SPACE(sizeof(uint16_t));
|
msg.msg_controllen = cmbuflen + CMSG_SPACE(sizeof(uint16_t));
|
||||||
|
|
||||||
// Enable GSO to perform segmentation of our buffer for us
|
// Enable GSO to perform segmentation of our buffer for us
|
||||||
@@ -456,7 +482,7 @@ namespace platf {
|
|||||||
cm->cmsg_level = SOL_UDP;
|
cm->cmsg_level = SOL_UDP;
|
||||||
cm->cmsg_type = UDP_SEGMENT;
|
cm->cmsg_type = UDP_SEGMENT;
|
||||||
cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
|
cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
|
||||||
*((uint16_t *) CMSG_DATA(cm)) = send_info.block_size;
|
*((uint16_t *) CMSG_DATA(cm)) = msg_size;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
msg.msg_controllen = cmbuflen;
|
msg.msg_controllen = cmbuflen;
|
||||||
@@ -483,10 +509,11 @@ namespace platf {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOST_LOG(verbose) << "sendmsg() failed: "sv << errno;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
seg_index += bytes_sent / send_info.block_size;
|
seg_index += bytes_sent / msg_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we sent something, return the status and don't fall back to the non-GSO path.
|
// If we sent something, return the status and don't fall back to the non-GSO path.
|
||||||
@@ -498,18 +525,25 @@ namespace platf {
|
|||||||
|
|
||||||
{
|
{
|
||||||
// If GSO is not supported, use sendmmsg() instead.
|
// If GSO is not supported, use sendmmsg() instead.
|
||||||
struct mmsghdr msgs[send_info.block_count];
|
struct mmsghdr msgs[send_info.block_count] = {};
|
||||||
struct iovec iovs[send_info.block_count];
|
struct iovec iovs[send_info.block_count * (send_info.headers ? 2 : 1)] = {};
|
||||||
|
int iov_idx = 0;
|
||||||
for (size_t i = 0; i < send_info.block_count; i++) {
|
for (size_t i = 0; i < send_info.block_count; i++) {
|
||||||
iovs[i] = {};
|
msgs[i].msg_hdr.msg_iov = &iovs[iov_idx];
|
||||||
iovs[i].iov_base = (void *) &send_info.buffer[i * send_info.block_size];
|
msgs[i].msg_hdr.msg_iovlen = send_info.headers ? 2 : 1;
|
||||||
iovs[i].iov_len = send_info.block_size;
|
|
||||||
|
if (send_info.headers) {
|
||||||
|
iovs[iov_idx].iov_base = (void *) &send_info.headers[(send_info.block_offset + i) * send_info.header_size];
|
||||||
|
iovs[iov_idx].iov_len = send_info.header_size;
|
||||||
|
iov_idx++;
|
||||||
|
}
|
||||||
|
auto payload_desc = send_info.buffer_for_payload_offset((send_info.block_offset + i) * send_info.payload_size);
|
||||||
|
iovs[iov_idx].iov_base = (void *) payload_desc.buffer;
|
||||||
|
iovs[iov_idx].iov_len = send_info.payload_size;
|
||||||
|
iov_idx++;
|
||||||
|
|
||||||
msgs[i] = {};
|
|
||||||
msgs[i].msg_hdr.msg_name = msg.msg_name;
|
msgs[i].msg_hdr.msg_name = msg.msg_name;
|
||||||
msgs[i].msg_hdr.msg_namelen = msg.msg_namelen;
|
msgs[i].msg_hdr.msg_namelen = msg.msg_namelen;
|
||||||
msgs[i].msg_hdr.msg_iov = &iovs[i];
|
|
||||||
msgs[i].msg_hdr.msg_iovlen = 1;
|
|
||||||
msgs[i].msg_hdr.msg_control = cmbuf.buf;
|
msgs[i].msg_hdr.msg_control = cmbuf.buf;
|
||||||
msgs[i].msg_hdr.msg_controllen = cmbuflen;
|
msgs[i].msg_hdr.msg_controllen = cmbuflen;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1452,12 +1452,37 @@ namespace platf {
|
|||||||
msg.namelen = sizeof(taddr_v4);
|
msg.namelen = sizeof(taddr_v4);
|
||||||
}
|
}
|
||||||
|
|
||||||
WSABUF buf;
|
auto const max_bufs_per_msg = send_info.payload_buffers.size() + (send_info.headers ? 1 : 0);
|
||||||
buf.buf = (char *) send_info.buffer;
|
|
||||||
buf.len = send_info.block_size * send_info.block_count;
|
|
||||||
|
|
||||||
msg.lpBuffers = &buf;
|
WSABUF bufs[(send_info.headers ? send_info.block_count : 1) * max_bufs_per_msg];
|
||||||
msg.dwBufferCount = 1;
|
DWORD bufcount = 0;
|
||||||
|
if (send_info.headers) {
|
||||||
|
// Interleave buffers for headers and payloads
|
||||||
|
for (auto i = 0; i < send_info.block_count; i++) {
|
||||||
|
bufs[bufcount].buf = (char *) &send_info.headers[(send_info.block_offset + i) * send_info.header_size];
|
||||||
|
bufs[bufcount].len = send_info.header_size;
|
||||||
|
bufcount++;
|
||||||
|
auto payload_desc = send_info.buffer_for_payload_offset((send_info.block_offset + i) * send_info.payload_size);
|
||||||
|
bufs[bufcount].buf = (char *) payload_desc.buffer;
|
||||||
|
bufs[bufcount].len = send_info.payload_size;
|
||||||
|
bufcount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Translate buffer descriptors into WSABUFs
|
||||||
|
auto payload_offset = send_info.block_offset * send_info.payload_size;
|
||||||
|
auto payload_length = payload_offset + (send_info.block_count * send_info.payload_size);
|
||||||
|
while (payload_offset < payload_length) {
|
||||||
|
auto payload_desc = send_info.buffer_for_payload_offset(payload_offset);
|
||||||
|
bufs[bufcount].buf = (char *) payload_desc.buffer;
|
||||||
|
bufs[bufcount].len = std::min(payload_desc.size, payload_length - payload_offset);
|
||||||
|
payload_offset += bufs[bufcount].len;
|
||||||
|
bufcount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.lpBuffers = bufs;
|
||||||
|
msg.dwBufferCount = bufcount;
|
||||||
msg.dwFlags = 0;
|
msg.dwFlags = 0;
|
||||||
|
|
||||||
// At most, one DWORD option and one PKTINFO option
|
// At most, one DWORD option and one PKTINFO option
|
||||||
@@ -1505,7 +1530,7 @@ namespace platf {
|
|||||||
cm->cmsg_level = IPPROTO_UDP;
|
cm->cmsg_level = IPPROTO_UDP;
|
||||||
cm->cmsg_type = UDP_SEND_MSG_SIZE;
|
cm->cmsg_type = UDP_SEND_MSG_SIZE;
|
||||||
cm->cmsg_len = WSA_CMSG_LEN(sizeof(DWORD));
|
cm->cmsg_len = WSA_CMSG_LEN(sizeof(DWORD));
|
||||||
*((DWORD *) WSA_CMSG_DATA(cm)) = send_info.block_size;
|
*((DWORD *) WSA_CMSG_DATA(cm)) = send_info.header_size + send_info.payload_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
msg.Control.len = cmbuflen;
|
msg.Control.len = cmbuflen;
|
||||||
|
|||||||
+49
-24
@@ -126,11 +126,6 @@ namespace stream {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct video_packet_enc_prefix_t {
|
struct video_packet_enc_prefix_t {
|
||||||
video_packet_raw_t *
|
|
||||||
payload() {
|
|
||||||
return (video_packet_raw_t *) (this + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::uint8_t iv[12]; // 12-byte IV is ideal for AES-GCM
|
std::uint8_t iv[12]; // 12-byte IV is ideal for AES-GCM
|
||||||
std::uint32_t frameNumber;
|
std::uint32_t frameNumber;
|
||||||
std::uint8_t tag[16];
|
std::uint8_t tag[16];
|
||||||
@@ -227,7 +222,6 @@ namespace stream {
|
|||||||
}
|
}
|
||||||
constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400;
|
constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400;
|
||||||
|
|
||||||
using video_packet_t = util::c_ptr<video_packet_raw_t>;
|
|
||||||
using audio_aes_t = std::array<char, round_to_pkcs7_padded(MAX_AUDIO_PACKET_SIZE)>;
|
using audio_aes_t = std::array<char, round_to_pkcs7_padded(MAX_AUDIO_PACKET_SIZE)>;
|
||||||
|
|
||||||
using av_session_id_t = std::variant<asio::ip::address, std::string>; // IP address or SS-Ping-Payload from RTSP handshake
|
using av_session_id_t = std::variant<asio::ip::address, std::string>; // IP address or SS-Ping-Payload from RTSP handshake
|
||||||
@@ -619,15 +613,19 @@ namespace stream {
|
|||||||
size_t blocksize;
|
size_t blocksize;
|
||||||
size_t prefixsize;
|
size_t prefixsize;
|
||||||
util::buffer_t<char> shards;
|
util::buffer_t<char> shards;
|
||||||
|
util::buffer_t<char> headers;
|
||||||
|
util::buffer_t<uint8_t *> shards_p;
|
||||||
|
|
||||||
|
std::vector<platf::buffer_descriptor_t> payload_buffers;
|
||||||
|
|
||||||
char *
|
char *
|
||||||
data(size_t el) {
|
data(size_t el) {
|
||||||
return &shards[(el + 1) * prefixsize + el * blocksize];
|
return (char *) shards_p[el];
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
char *
|
||||||
prefix(size_t el) {
|
prefix(size_t el) {
|
||||||
return &shards[el * (prefixsize + blocksize)];
|
return prefixsize ? &headers[el * prefixsize] : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
@@ -642,7 +640,8 @@ namespace stream {
|
|||||||
|
|
||||||
auto pad = payload_size % blocksize != 0;
|
auto pad = payload_size % blocksize != 0;
|
||||||
|
|
||||||
auto data_shards = payload_size / blocksize + (pad ? 1 : 0);
|
auto aligned_data_shards = payload_size / blocksize;
|
||||||
|
auto data_shards = aligned_data_shards + (pad ? 1 : 0);
|
||||||
auto parity_shards = (data_shards * fecpercentage + 99) / 100;
|
auto parity_shards = (data_shards * fecpercentage + 99) / 100;
|
||||||
|
|
||||||
// increase the FEC percentage for this frame if the parity shard minimum is not met
|
// increase the FEC percentage for this frame if the parity shard minimum is not met
|
||||||
@@ -655,27 +654,46 @@ namespace stream {
|
|||||||
|
|
||||||
auto nr_shards = data_shards + parity_shards;
|
auto nr_shards = data_shards + parity_shards;
|
||||||
|
|
||||||
util::buffer_t<char> shards { nr_shards * (blocksize + prefixsize) };
|
// If we need to store a zero-padded data shard, allocate that first to
|
||||||
|
// to keep the shards in order and reduce buffer fragmentation
|
||||||
|
auto parity_shard_offset = pad ? 1 : 0;
|
||||||
|
util::buffer_t<char> shards { (parity_shard_offset + parity_shards) * blocksize };
|
||||||
util::buffer_t<uint8_t *> shards_p { nr_shards };
|
util::buffer_t<uint8_t *> shards_p { nr_shards };
|
||||||
|
std::vector<platf::buffer_descriptor_t> payload_buffers;
|
||||||
|
payload_buffers.reserve(2);
|
||||||
|
|
||||||
|
// Point into the payload buffer for all except the final padded data shard
|
||||||
auto next = std::begin(payload);
|
auto next = std::begin(payload);
|
||||||
for (auto x = 0; x < nr_shards; ++x) {
|
for (auto x = 0; x < aligned_data_shards; ++x) {
|
||||||
shards_p[x] = (uint8_t *) &shards[(x + 1) * prefixsize + x * blocksize];
|
shards_p[x] = (uint8_t *) next;
|
||||||
|
next += blocksize;
|
||||||
|
}
|
||||||
|
payload_buffers.emplace_back(std::begin(payload), aligned_data_shards * blocksize);
|
||||||
|
|
||||||
|
// If the last data shard needs to be zero-padded, we must use the shards buffer
|
||||||
|
if (pad) {
|
||||||
|
shards_p[aligned_data_shards] = (uint8_t *) &shards[0];
|
||||||
|
|
||||||
// GCC doesn't figure out that std::copy_n() can be replaced with memcpy() here
|
// GCC doesn't figure out that std::copy_n() can be replaced with memcpy() here
|
||||||
// and ends up compiling a horribly slow element-by-element copy loop, so we
|
// and ends up compiling a horribly slow element-by-element copy loop, so we
|
||||||
// help it by using memcpy()/memset() directly.
|
// help it by using memcpy()/memset() directly.
|
||||||
auto copy_len = std::min<size_t>(blocksize, std::end(payload) - next);
|
auto copy_len = std::min<size_t>(blocksize, std::end(payload) - next);
|
||||||
std::memcpy(shards_p[x], next, copy_len);
|
std::memcpy(shards_p[aligned_data_shards], next, copy_len);
|
||||||
if (copy_len < blocksize) {
|
if (copy_len < blocksize) {
|
||||||
// Zero any additional space after the end of the payload
|
// Zero any additional space after the end of the payload
|
||||||
std::memset(shards_p[x] + copy_len, 0, blocksize - copy_len);
|
std::memset(shards_p[aligned_data_shards] + copy_len, 0, blocksize - copy_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
next += copy_len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add a payload buffer describing the shard buffer
|
||||||
|
payload_buffers.emplace_back(std::begin(shards), shards.size());
|
||||||
|
|
||||||
if (fecpercentage != 0) {
|
if (fecpercentage != 0) {
|
||||||
|
// Point into our allocated buffer for the parity shards
|
||||||
|
for (auto x = 0; x < parity_shards; ++x) {
|
||||||
|
shards_p[data_shards + x] = (uint8_t *) &shards[(parity_shard_offset + x) * blocksize];
|
||||||
|
}
|
||||||
|
|
||||||
// packets = parity_shards + data_shards
|
// packets = parity_shards + data_shards
|
||||||
rs_t rs { reed_solomon_new(data_shards, parity_shards) };
|
rs_t rs { reed_solomon_new(data_shards, parity_shards) };
|
||||||
|
|
||||||
@@ -688,7 +706,10 @@ namespace stream {
|
|||||||
fecpercentage,
|
fecpercentage,
|
||||||
blocksize,
|
blocksize,
|
||||||
prefixsize,
|
prefixsize,
|
||||||
std::move(shards)
|
std::move(shards),
|
||||||
|
util::buffer_t<char> { nr_shards * prefixsize },
|
||||||
|
std::move(shards_p),
|
||||||
|
std::move(payload_buffers),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} // namespace fec
|
} // namespace fec
|
||||||
@@ -1438,8 +1459,11 @@ namespace stream {
|
|||||||
|
|
||||||
auto peer_address = session->video.peer.address();
|
auto peer_address = session->video.peer.address();
|
||||||
auto batch_info = platf::batched_send_info_t {
|
auto batch_info = platf::batched_send_info_t {
|
||||||
nullptr,
|
shards.headers.begin(),
|
||||||
shards.prefixsize + shards.blocksize,
|
shards.prefixsize,
|
||||||
|
shards.payload_buffers,
|
||||||
|
shards.blocksize,
|
||||||
|
0,
|
||||||
0,
|
0,
|
||||||
(uintptr_t) sock.native_handle(),
|
(uintptr_t) sock.native_handle(),
|
||||||
peer_address,
|
peer_address,
|
||||||
@@ -1487,7 +1511,8 @@ namespace stream {
|
|||||||
auto *prefix = (video_packet_enc_prefix_t *) shards.prefix(x);
|
auto *prefix = (video_packet_enc_prefix_t *) shards.prefix(x);
|
||||||
prefix->frameNumber = packet->frame_index();
|
prefix->frameNumber = packet->frame_index();
|
||||||
std::copy(std::begin(iv), std::end(iv), prefix->iv);
|
std::copy(std::begin(iv), std::end(iv), prefix->iv);
|
||||||
session->video.cipher->encrypt(std::string_view { (char *) inspect, (size_t) blocksize }, prefix->tag, &iv);
|
session->video.cipher->encrypt(std::string_view { (char *) inspect, (size_t) blocksize },
|
||||||
|
prefix->tag, (uint8_t *) inspect, &iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x - next_shard_to_send + 1 >= send_batch_size ||
|
if (x - next_shard_to_send + 1 >= send_batch_size ||
|
||||||
@@ -1510,7 +1535,7 @@ namespace stream {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t current_batch_size = x - next_shard_to_send + 1;
|
size_t current_batch_size = x - next_shard_to_send + 1;
|
||||||
batch_info.buffer = shards.prefix(next_shard_to_send);
|
batch_info.block_offset = next_shard_to_send;
|
||||||
batch_info.block_count = current_batch_size;
|
batch_info.block_count = current_batch_size;
|
||||||
|
|
||||||
frame_send_batch_latency_logger.first_point_now();
|
frame_send_batch_latency_logger.first_point_now();
|
||||||
@@ -1520,10 +1545,10 @@ namespace stream {
|
|||||||
BOOST_LOG(verbose) << "Falling back to unbatched send"sv;
|
BOOST_LOG(verbose) << "Falling back to unbatched send"sv;
|
||||||
for (auto y = 0; y < current_batch_size; y++) {
|
for (auto y = 0; y < current_batch_size; y++) {
|
||||||
auto send_info = platf::send_info_t {
|
auto send_info = platf::send_info_t {
|
||||||
nullptr,
|
|
||||||
0,
|
|
||||||
shards.prefix(next_shard_to_send + y),
|
shards.prefix(next_shard_to_send + y),
|
||||||
shards.prefixsize + shards.blocksize,
|
shards.prefixsize,
|
||||||
|
shards.data(next_shard_to_send + y),
|
||||||
|
shards.blocksize,
|
||||||
(uintptr_t) sock.native_handle(),
|
(uintptr_t) sock.native_handle(),
|
||||||
peer_address,
|
peer_address,
|
||||||
session->video.peer.port(),
|
session->video.peer.port(),
|
||||||
|
|||||||
Reference in New Issue
Block a user