style: adjust clang-format rules (#2186)

Co-authored-by: Vithorio Polten <reach@vithor.io>
This commit is contained in:
ReenigneArcher
2025-01-19 22:34:47 -05:00
committed by GitHub
parent f57aee9025
commit c2420427b1
158 changed files with 8754 additions and 9994 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,161 +1,155 @@
/**
* @file src/nvenc/nvenc_base.h
* @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder.
*/
#pragma once
#include "nvenc_colorspace.h"
#include "nvenc_config.h"
#include "nvenc_encoded_frame.h"
#include "src/logging.h"
#include "src/video.h"
#include <ffnvcodec/nvEncodeAPI.h>
/**
* @brief Standalone NVENC encoder
*/
namespace nvenc {
/**
* @brief Abstract platform-agnostic base of standalone NVENC encoder.
* Derived classes perform platform-specific operations.
*/
class nvenc_base {
public:
/**
* @param device_type Underlying device type used by derived class.
*/
explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type);
virtual ~nvenc_base();
nvenc_base(const nvenc_base &) = delete;
nvenc_base &
operator=(const nvenc_base &) = delete;
/**
* @brief Create the encoder.
* @param config NVENC encoder configuration.
* @param client_config Stream configuration requested by the client.
* @param colorspace YUV colorspace.
* @param buffer_format Platform-agnostic input surface format.
* @return `true` on success, `false` on error
*/
bool
create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
/**
* @brief Destroy the encoder.
* Derived classes classes call it in the destructor.
*/
void
destroy_encoder();
/**
* @brief Encode the next frame using platform-specific input surface.
* @param frame_index Frame index that uniquely identifies the frame.
* Afterwards serves as parameter for `invalidate_ref_frames()`.
* No restrictions on the first frame index, but later frame indexes must be subsequent.
* @param force_idr Whether to encode frame as forced IDR.
* @return Encoded frame.
*/
nvenc_encoded_frame
encode_frame(uint64_t frame_index, bool force_idr);
/**
* @brief Perform reference frame invalidation (RFI) procedure.
* @param first_frame First frame index of the invalidation range.
* @param last_frame Last frame index of the invalidation range.
* @return `true` on success, `false` on error.
* After error next frame must be encoded with `force_idr = true`.
*/
bool
invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
protected:
/**
* @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`.
* Called during `create_encoder()` if `nvenc` variable is not initialized.
* @return `true` on success, `false` on error
*/
virtual bool
init_library() = 0;
/**
* @brief Required. Used for creating outside-facing input surface,
* registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable.
* Called during `create_encoder()`.
* @return `true` on success, `false` on error
*/
virtual bool
create_and_register_input_buffer() = 0;
/**
* @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`.
* Typically used for interop copy.
* @return `true` on success, `false` on error
*/
virtual bool
synchronize_input_buffer() { return true; }
/**
* @brief Optional. Override if you want to create encoder in async mode.
* In this case must also set `async_event_handle` variable.
* @param timeout_ms Wait timeout in milliseconds
* @return `true` on success, `false` on timeout or error
*/
virtual bool
wait_for_async_event(uint32_t timeout_ms) { return false; }
bool
nvenc_failed(NVENCSTATUS status);
/**
* @brief This function returns the corresponding struct version for the minimum API required by the codec.
* @details Reducing the struct versions maximizes driver compatibility by avoiding needless API breaks.
* @param version The raw structure version from `NVENCAPI_STRUCT_VERSION()`.
* @param v11_struct_version Optionally specifies the struct version to use with v11 SDK major versions.
* @param v12_struct_version Optionally specifies the struct version to use with v12 SDK major versions.
* @return A suitable struct version for the active codec.
*/
uint32_t
min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0);
const NV_ENC_DEVICE_TYPE device_type;
void *encoder = nullptr;
struct {
uint32_t width = 0;
uint32_t height = 0;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
uint32_t ref_frames_in_dpb = 0;
bool rfi = false;
} encoder_params;
std::string last_nvenc_error_string;
// Derived classes set these variables
void *device = nullptr; ///< Platform-specific handle of encoding device.
///< Should be set in constructor or `init_library()`.
std::shared_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc; ///< Function pointers list produced by `NvEncodeAPICreateInstance()`.
///< Should be set in `init_library()`.
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; ///< Platform-specific input surface registered with `NvEncRegisterResource()`.
///< Should be set in `create_and_register_input_buffer()`.
void *async_event_handle = nullptr; ///< (optional) Platform-specific handle of event object event.
///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`.
private:
NV_ENC_OUTPUT_PTR output_bitstream = nullptr;
uint32_t minimum_api_version = 0;
struct {
uint64_t last_encoded_frame_index = 0;
bool rfi_needs_confirmation = false;
std::pair<uint64_t, uint64_t> last_rfi_range;
logging::min_max_avg_periodic_logger<double> frame_size_logger = { debug, "NvEnc: encoded frame sizes in kB", "" };
} encoder_state;
};
} // namespace nvenc
/**
* @file src/nvenc/nvenc_base.h
* @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder.
*/
#pragma once
// lib includes
#include <ffnvcodec/nvEncodeAPI.h>
// local includes
#include "nvenc_colorspace.h"
#include "nvenc_config.h"
#include "nvenc_encoded_frame.h"
#include "src/logging.h"
#include "src/video.h"
/**
* @brief Standalone NVENC encoder
*/
namespace nvenc {
/**
* @brief Abstract platform-agnostic base of standalone NVENC encoder.
* Derived classes perform platform-specific operations.
*/
class nvenc_base {
public:
/**
* @param device_type Underlying device type used by derived class.
*/
explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type);
virtual ~nvenc_base();
nvenc_base(const nvenc_base &) = delete;
nvenc_base &operator=(const nvenc_base &) = delete;
/**
* @brief Create the encoder.
* @param config NVENC encoder configuration.
* @param client_config Stream configuration requested by the client.
* @param colorspace YUV colorspace.
* @param buffer_format Platform-agnostic input surface format.
* @return `true` on success, `false` on error
*/
bool create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
/**
* @brief Destroy the encoder.
* Derived classes classes call it in the destructor.
*/
void destroy_encoder();
/**
* @brief Encode the next frame using platform-specific input surface.
* @param frame_index Frame index that uniquely identifies the frame.
* Afterwards serves as parameter for `invalidate_ref_frames()`.
* No restrictions on the first frame index, but later frame indexes must be subsequent.
* @param force_idr Whether to encode frame as forced IDR.
* @return Encoded frame.
*/
nvenc_encoded_frame encode_frame(uint64_t frame_index, bool force_idr);
/**
* @brief Perform reference frame invalidation (RFI) procedure.
* @param first_frame First frame index of the invalidation range.
* @param last_frame Last frame index of the invalidation range.
* @return `true` on success, `false` on error.
* After error next frame must be encoded with `force_idr = true`.
*/
bool invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
protected:
/**
* @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`.
* Called during `create_encoder()` if `nvenc` variable is not initialized.
* @return `true` on success, `false` on error
*/
virtual bool init_library() = 0;
/**
* @brief Required. Used for creating outside-facing input surface,
* registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable.
* Called during `create_encoder()`.
* @return `true` on success, `false` on error
*/
virtual bool create_and_register_input_buffer() = 0;
/**
* @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`.
* Typically used for interop copy.
* @return `true` on success, `false` on error
*/
virtual bool synchronize_input_buffer() {
return true;
}
/**
* @brief Optional. Override if you want to create encoder in async mode.
* In this case must also set `async_event_handle` variable.
* @param timeout_ms Wait timeout in milliseconds
* @return `true` on success, `false` on timeout or error
*/
virtual bool wait_for_async_event(uint32_t timeout_ms) {
return false;
}
bool nvenc_failed(NVENCSTATUS status);
/**
* @brief This function returns the corresponding struct version for the minimum API required by the codec.
* @details Reducing the struct versions maximizes driver compatibility by avoiding needless API breaks.
* @param version The raw structure version from `NVENCAPI_STRUCT_VERSION()`.
* @param v11_struct_version Optionally specifies the struct version to use with v11 SDK major versions.
* @param v12_struct_version Optionally specifies the struct version to use with v12 SDK major versions.
* @return A suitable struct version for the active codec.
*/
uint32_t min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0);
const NV_ENC_DEVICE_TYPE device_type;
void *encoder = nullptr;
struct {
uint32_t width = 0;
uint32_t height = 0;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
uint32_t ref_frames_in_dpb = 0;
bool rfi = false;
} encoder_params;
std::string last_nvenc_error_string;
// Derived classes set these variables
void *device = nullptr; ///< Platform-specific handle of encoding device.
///< Should be set in constructor or `init_library()`.
std::shared_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc; ///< Function pointers list produced by `NvEncodeAPICreateInstance()`.
///< Should be set in `init_library()`.
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; ///< Platform-specific input surface registered with `NvEncRegisterResource()`.
///< Should be set in `create_and_register_input_buffer()`.
void *async_event_handle = nullptr; ///< (optional) Platform-specific handle of event object event.
///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`.
private:
NV_ENC_OUTPUT_PTR output_bitstream = nullptr;
uint32_t minimum_api_version = 0;
struct {
uint64_t last_encoded_frame_index = 0;
bool rfi_needs_confirmation = false;
std::pair<uint64_t, uint64_t> last_rfi_range;
logging::min_max_avg_periodic_logger<double> frame_size_logger = {debug, "NvEnc: encoded frame sizes in kB", ""};
} encoder_state;
};
} // namespace nvenc

View File

@@ -1,21 +1,22 @@
/**
* @file src/nvenc/nvenc_colorspace.h
* @brief Declarations for NVENC YUV colorspace.
*/
#pragma once
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
/**
* @brief YUV colorspace and color range.
*/
struct nvenc_colorspace_t {
NV_ENC_VUI_COLOR_PRIMARIES primaries;
NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
NV_ENC_VUI_MATRIX_COEFFS matrix;
bool full_range;
};
} // namespace nvenc
/**
* @file src/nvenc/nvenc_colorspace.h
* @brief Declarations for NVENC YUV colorspace.
*/
#pragma once
// lib includes
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
/**
* @brief YUV colorspace and color range.
*/
struct nvenc_colorspace_t {
NV_ENC_VUI_COLOR_PRIMARIES primaries;
NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
NV_ENC_VUI_MATRIX_COEFFS matrix;
bool full_range;
};
} // namespace nvenc

View File

@@ -1,53 +1,53 @@
/**
* @file src/nvenc/nvenc_config.h
* @brief Declarations for NVENC encoder configuration.
*/
#pragma once
namespace nvenc {
enum class nvenc_two_pass {
disabled, ///< Single pass, the fastest and no extra vram
quarter_resolution, ///< Larger motion vectors being caught, faster and uses less extra vram
full_resolution, ///< Better overall statistics, slower and uses more extra vram
};
/**
* @brief NVENC encoder configuration.
*/
struct nvenc_config {
// Quality preset from 1 to 7, higher is slower
int quality_preset = 1;
// Use optional preliminary pass for better motion vectors, bitrate distribution and stricter VBV(HRD), uses CUDA cores
nvenc_two_pass two_pass = nvenc_two_pass::quarter_resolution;
// Percentage increase of VBV/HRD from the default single frame, allows low-latency variable bitrate
int vbv_percentage_increase = 0;
// Improves fades compression, uses CUDA cores
bool weighted_prediction = false;
// Allocate more bitrate to flat regions since they're visually more perceptible, uses CUDA cores
bool adaptive_quantization = false;
// Don't use QP below certain value, limits peak image quality to save bitrate
bool enable_min_qp = false;
// Min QP value for H.264 when enable_min_qp is selected
unsigned min_qp_h264 = 19;
// Min QP value for HEVC when enable_min_qp is selected
unsigned min_qp_hevc = 23;
// Min QP value for AV1 when enable_min_qp is selected
unsigned min_qp_av1 = 23;
// Use CAVLC entropy coding in H.264 instead of CABAC, not relevant and here for historical reasons
bool h264_cavlc = false;
// Add filler data to encoded frames to stay at target bitrate, mainly for testing
bool insert_filler_data = false;
};
} // namespace nvenc
/**
* @file src/nvenc/nvenc_config.h
* @brief Declarations for NVENC encoder configuration.
*/
#pragma once
namespace nvenc {
enum class nvenc_two_pass {
disabled, ///< Single pass, the fastest and no extra vram
quarter_resolution, ///< Larger motion vectors being caught, faster and uses less extra vram
full_resolution, ///< Better overall statistics, slower and uses more extra vram
};
/**
* @brief NVENC encoder configuration.
*/
struct nvenc_config {
// Quality preset from 1 to 7, higher is slower
int quality_preset = 1;
// Use optional preliminary pass for better motion vectors, bitrate distribution and stricter VBV(HRD), uses CUDA cores
nvenc_two_pass two_pass = nvenc_two_pass::quarter_resolution;
// Percentage increase of VBV/HRD from the default single frame, allows low-latency variable bitrate
int vbv_percentage_increase = 0;
// Improves fades compression, uses CUDA cores
bool weighted_prediction = false;
// Allocate more bitrate to flat regions since they're visually more perceptible, uses CUDA cores
bool adaptive_quantization = false;
// Don't use QP below certain value, limits peak image quality to save bitrate
bool enable_min_qp = false;
// Min QP value for H.264 when enable_min_qp is selected
unsigned min_qp_h264 = 19;
// Min QP value for HEVC when enable_min_qp is selected
unsigned min_qp_hevc = 23;
// Min QP value for AV1 when enable_min_qp is selected
unsigned min_qp_av1 = 23;
// Use CAVLC entropy coding in H.264 instead of CABAC, not relevant and here for historical reasons
bool h264_cavlc = false;
// Add filler data to encoded frames to stay at target bitrate, mainly for testing
bool insert_filler_data = false;
};
} // namespace nvenc

View File

@@ -1,58 +1,57 @@
/**
* @file src/nvenc/nvenc_d3d11.cpp
* @brief Definitions for abstract Direct3D11 NVENC encoder.
*/
#include "src/logging.h"
#ifdef _WIN32
#include "nvenc_d3d11.h"
namespace nvenc {
nvenc_d3d11::~nvenc_d3d11() {
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
}
bool
nvenc_d3d11::init_library() {
if (dll) return true;
#ifdef _WIN64
constexpr auto dll_name = "nvEncodeAPI64.dll";
#else
constexpr auto dll_name = "nvEncodeAPI.dll";
#endif
if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) {
auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER);
if (nvenc_failed(create_instance(new_nvenc.get()))) {
BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string;
}
else {
nvenc = std::move(new_nvenc);
return true;
}
}
else {
BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name;
}
}
else {
BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name;
}
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
return false;
}
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11.cpp
* @brief Definitions for abstract Direct3D11 NVENC encoder.
*/
// local includes
#include "src/logging.h"
#ifdef _WIN32
#include "nvenc_d3d11.h"
namespace nvenc {
nvenc_d3d11::~nvenc_d3d11() {
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
}
bool nvenc_d3d11::init_library() {
if (dll) {
return true;
}
#ifdef _WIN64
constexpr auto dll_name = "nvEncodeAPI64.dll";
#else
constexpr auto dll_name = "nvEncodeAPI.dll";
#endif
if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) {
auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER);
if (nvenc_failed(create_instance(new_nvenc.get()))) {
BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string;
} else {
nvenc = std::move(new_nvenc);
return true;
}
} else {
BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name;
}
} else {
BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name;
}
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
return false;
}
} // namespace nvenc
#endif

View File

@@ -1,47 +1,48 @@
/**
* @file src/nvenc/nvenc_d3d11.h
* @brief Declarations for abstract Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
#include <comdef.h>
#include <d3d11.h>
#include "nvenc_base.h"
namespace nvenc {
_COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
_COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
_COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice);
_COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter);
/**
* @brief Abstract Direct3D11 NVENC encoder.
* Encapsulates common code used by native and interop implementations.
*/
class nvenc_d3d11: public nvenc_base {
public:
explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type):
nvenc_base(device_type) {}
~nvenc_d3d11();
/**
* @brief Get input surface texture.
* @return Input surface texture.
*/
virtual ID3D11Texture2D *
get_input_texture() = 0;
protected:
bool
init_library() override;
private:
HMODULE dll = NULL;
};
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11.h
* @brief Declarations for abstract Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
// standard includes
#include <comdef.h>
#include <d3d11.h>
// local includes
#include "nvenc_base.h"
namespace nvenc {
_COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
_COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
_COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice);
_COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter);
/**
* @brief Abstract Direct3D11 NVENC encoder.
* Encapsulates common code used by native and interop implementations.
*/
class nvenc_d3d11: public nvenc_base {
public:
explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type):
nvenc_base(device_type) {
}
~nvenc_d3d11();
/**
* @brief Get input surface texture.
* @return Input surface texture.
*/
virtual ID3D11Texture2D *get_input_texture() = 0;
protected:
bool init_library() override;
private:
HMODULE dll = NULL;
};
} // namespace nvenc
#endif

View File

@@ -1,71 +1,74 @@
/**
* @file src/nvenc/nvenc_d3d11_native.cpp
* @brief Definitions for native Direct3D11 NVENC encoder.
*/
#ifdef _WIN32
#include "nvenc_d3d11_native.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX),
d3d_device(d3d_device) {
device = d3d_device;
}
nvenc_d3d11_native::~nvenc_d3d11_native() {
if (encoder) destroy_encoder();
}
ID3D11Texture2D *
nvenc_d3d11_native::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11_native::create_and_register_input_buffer() {
if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11_native.cpp
* @brief Definitions for native Direct3D11 NVENC encoder.
*/
#ifdef _WIN32
// this include
#include "nvenc_d3d11_native.h"
// local includes
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX),
d3d_device(d3d_device) {
device = d3d_device;
}
nvenc_d3d11_native::~nvenc_d3d11_native() {
if (encoder) {
destroy_encoder();
}
}
ID3D11Texture2D *
nvenc_d3d11_native::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool nvenc_d3d11_native::create_and_register_input_buffer() {
if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)};
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
} // namespace nvenc
#endif

View File

@@ -1,38 +1,37 @@
/**
* @file src/nvenc/nvenc_d3d11_native.h
* @brief Declarations for native Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
#include <comdef.h>
#include <d3d11.h>
#include "nvenc_d3d11.h"
namespace nvenc {
/**
* @brief Native Direct3D11 NVENC encoder.
*/
class nvenc_d3d11_native final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device used for encoding.
*/
explicit nvenc_d3d11_native(ID3D11Device *d3d_device);
~nvenc_d3d11_native();
ID3D11Texture2D *
get_input_texture() override;
private:
bool
create_and_register_input_buffer() override;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
};
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11_native.h
* @brief Declarations for native Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
// standard includes
#include <comdef.h>
#include <d3d11.h>
// local includes
#include "nvenc_d3d11.h"
namespace nvenc {
/**
* @brief Native Direct3D11 NVENC encoder.
*/
class nvenc_d3d11_native final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device used for encoding.
*/
explicit nvenc_d3d11_native(ID3D11Device *d3d_device);
~nvenc_d3d11_native();
ID3D11Texture2D *get_input_texture() override;
private:
bool create_and_register_input_buffer() override;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
};
} // namespace nvenc
#endif

View File

@@ -1,267 +1,269 @@
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.cpp
* @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#ifdef _WIN32
#include "nvenc_d3d11_on_cuda.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
d3d_device(d3d_device) {
}
nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
if (encoder) destroy_encoder();
if (cuda_context) {
{
auto autopop_context = push_context();
if (cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
}
cuda_d3d_input_texture = nullptr;
}
if (cuda_surface) {
if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
}
cuda_surface = 0;
}
}
if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
}
cuda_context = nullptr;
}
if (cuda_functions.dll) {
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
}
ID3D11Texture2D *
nvenc_d3d11_on_cuda::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11_on_cuda::init_library() {
if (!nvenc_d3d11::init_library()) return false;
constexpr auto dll_name = "nvcuda.dll";
if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
location = (T) GetProcAddress(cuda_functions.dll, symbol);
return location != nullptr;
};
if (!load_function(cuda_functions.cuInit, "cuInit") ||
!load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
!load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
!load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
!load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
!load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
!load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
!load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
!load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
!load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
!load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
!load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
!load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
!load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
}
else {
BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
}
if (cuda_functions.dll) {
IDXGIDevicePtr dxgi_device;
IDXGIAdapterPtr dxgi_adapter;
if (d3d_device &&
SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
CUdevice cuda_device;
if (cuda_succeeded(cuda_functions.cuInit(0)) &&
cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
device = cuda_context;
}
else {
BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
}
}
else {
BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
}
}
return device != nullptr;
}
bool
nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height * 3; // Planar YUV
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
{
auto autopop_context = push_context();
if (!autopop_context) return false;
if (!cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
&cuda_d3d_input_texture,
d3d_input_texture,
CU_GRAPHICS_REGISTER_FLAGS_NONE))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
return false;
}
}
if (!cuda_surface) {
if (cuda_failed(cuda_functions.cuMemAllocPitch(
&cuda_surface,
&cuda_surface_pitch,
// Planar 16-bit YUV
encoder_params.width * 2,
encoder_params.height * 3, 16))) {
BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
return false;
}
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.pitch = cuda_surface_pitch;
register_resource.resourceToRegister = (void *) cuda_surface;
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
bool
nvenc_d3d11_on_cuda::synchronize_input_buffer() {
auto autopop_context = push_context();
if (!autopop_context) return false;
if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
return false;
}
auto unmap = [&]() -> bool {
if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
return false;
}
return true;
};
auto unmap_guard = util::fail_guard(unmap);
CUarray input_texture_array;
if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
return false;
}
{
CUDA_MEMCPY2D copy_params = {};
copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params.srcArray = input_texture_array;
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.dstDevice = cuda_surface;
copy_params.dstPitch = cuda_surface_pitch;
// Planar 16-bit YUV
copy_params.WidthInBytes = encoder_params.width * 2;
copy_params.Height = encoder_params.height * 3;
if (cuda_failed(cuda_functions.cuMemcpy2D(&copy_params))) {
BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
return false;
}
}
unmap_guard.disable();
return unmap();
}
bool
nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
last_cuda_error = result;
return result == CUDA_SUCCESS;
}
bool
nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
last_cuda_error = result;
return result != CUDA_SUCCESS;
}
nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
if (pushed_context) {
CUcontext popped_context;
if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
}
}
}
nvenc_d3d11_on_cuda::autopop_context
nvenc_d3d11_on_cuda::push_context() {
if (cuda_context &&
cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
return { *this, cuda_context };
}
else {
BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
return { *this, nullptr };
}
}
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.cpp
* @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#ifdef _WIN32
// this include
#include "nvenc_d3d11_on_cuda.h"
// local includes
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
d3d_device(d3d_device) {
}
nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
if (encoder) {
destroy_encoder();
}
if (cuda_context) {
{
auto autopop_context = push_context();
if (cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
}
cuda_d3d_input_texture = nullptr;
}
if (cuda_surface) {
if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
}
cuda_surface = 0;
}
}
if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
}
cuda_context = nullptr;
}
if (cuda_functions.dll) {
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
}
ID3D11Texture2D *nvenc_d3d11_on_cuda::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool nvenc_d3d11_on_cuda::init_library() {
if (!nvenc_d3d11::init_library()) {
return false;
}
constexpr auto dll_name = "nvcuda.dll";
if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
location = (T) GetProcAddress(cuda_functions.dll, symbol);
return location != nullptr;
};
if (!load_function(cuda_functions.cuInit, "cuInit") ||
!load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
!load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
!load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
!load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
!load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
!load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
!load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
!load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
!load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
!load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
!load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
!load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
!load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
} else {
BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
}
if (cuda_functions.dll) {
IDXGIDevicePtr dxgi_device;
IDXGIAdapterPtr dxgi_adapter;
if (d3d_device &&
SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
CUdevice cuda_device;
if (cuda_succeeded(cuda_functions.cuInit(0)) &&
cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
device = cuda_context;
} else {
BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
}
} else {
BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
}
}
return device != nullptr;
}
bool nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height * 3; // Planar YUV
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
{
auto autopop_context = push_context();
if (!autopop_context) {
return false;
}
if (!cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
&cuda_d3d_input_texture,
d3d_input_texture,
CU_GRAPHICS_REGISTER_FLAGS_NONE
))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
return false;
}
}
if (!cuda_surface) {
if (cuda_failed(cuda_functions.cuMemAllocPitch(
&cuda_surface,
&cuda_surface_pitch,
// Planar 16-bit YUV
encoder_params.width * 2,
encoder_params.height * 3,
16
))) {
BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
return false;
}
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)};
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.pitch = cuda_surface_pitch;
register_resource.resourceToRegister = (void *) cuda_surface;
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
bool nvenc_d3d11_on_cuda::synchronize_input_buffer() {
auto autopop_context = push_context();
if (!autopop_context) {
return false;
}
if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
return false;
}
auto unmap = [&]() -> bool {
if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
return false;
}
return true;
};
auto unmap_guard = util::fail_guard(unmap);
CUarray input_texture_array;
if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
return false;
}
{
CUDA_MEMCPY2D copy_params = {};
copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params.srcArray = input_texture_array;
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.dstDevice = cuda_surface;
copy_params.dstPitch = cuda_surface_pitch;
// Planar 16-bit YUV
copy_params.WidthInBytes = encoder_params.width * 2;
copy_params.Height = encoder_params.height * 3;
if (cuda_failed(cuda_functions.cuMemcpy2D(&copy_params))) {
BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
return false;
}
}
unmap_guard.disable();
return unmap();
}
bool nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
last_cuda_error = result;
return result == CUDA_SUCCESS;
}
bool nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
last_cuda_error = result;
return result != CUDA_SUCCESS;
}
nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
if (pushed_context) {
CUcontext popped_context;
if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
}
}
}
nvenc_d3d11_on_cuda::autopop_context nvenc_d3d11_on_cuda::push_context() {
if (cuda_context &&
cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
return {*this, cuda_context};
} else {
BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
return {*this, nullptr};
}
}
} // namespace nvenc
#endif

View File

@@ -1,96 +1,89 @@
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.h
* @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#pragma once
#ifdef _WIN32
#include "nvenc_d3d11.h"
#include <ffnvcodec/dynlink_cuda.h>
namespace nvenc {
/**
* @brief Interop Direct3D11 on CUDA NVENC encoder.
* Input surface is Direct3D11, encoding is performed by CUDA.
*/
class nvenc_d3d11_on_cuda final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device that will create input surface texture.
* CUDA encoding device will be derived from it.
*/
explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device);
~nvenc_d3d11_on_cuda();
ID3D11Texture2D *
get_input_texture() override;
private:
bool
init_library() override;
bool
create_and_register_input_buffer() override;
bool
synchronize_input_buffer() override;
bool
cuda_succeeded(CUresult result);
bool
cuda_failed(CUresult result);
struct autopop_context {
autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context):
parent(parent),
pushed_context(pushed_context) {
}
~autopop_context();
explicit
operator bool() const {
return pushed_context != nullptr;
}
nvenc_d3d11_on_cuda &parent;
CUcontext pushed_context = nullptr;
};
autopop_context
push_context();
HMODULE dll = NULL;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
struct {
tcuInit *cuInit;
tcuD3D11GetDevice *cuD3D11GetDevice;
tcuCtxCreate_v2 *cuCtxCreate;
tcuCtxDestroy_v2 *cuCtxDestroy;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
tcuMemAllocPitch_v2 *cuMemAllocPitch;
tcuMemFree_v2 *cuMemFree;
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuMemcpy2D_v2 *cuMemcpy2D;
HMODULE dll;
} cuda_functions = {};
CUresult last_cuda_error = CUDA_SUCCESS;
CUcontext cuda_context = nullptr;
CUgraphicsResource cuda_d3d_input_texture = nullptr;
CUdeviceptr cuda_surface = 0;
size_t cuda_surface_pitch = 0;
};
} // namespace nvenc
#endif
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.h
* @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#pragma once
#ifdef _WIN32
// lib includes
#include <ffnvcodec/dynlink_cuda.h>
// local includes
#include "nvenc_d3d11.h"
namespace nvenc {
/**
* @brief Interop Direct3D11 on CUDA NVENC encoder.
* Input surface is Direct3D11, encoding is performed by CUDA.
*/
class nvenc_d3d11_on_cuda final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device that will create input surface texture.
* CUDA encoding device will be derived from it.
*/
explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device);
~nvenc_d3d11_on_cuda();
ID3D11Texture2D *get_input_texture() override;
private:
bool init_library() override;
bool create_and_register_input_buffer() override;
bool synchronize_input_buffer() override;
bool cuda_succeeded(CUresult result);
bool cuda_failed(CUresult result);
struct autopop_context {
autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context):
parent(parent),
pushed_context(pushed_context) {
}
~autopop_context();
explicit operator bool() const {
return pushed_context != nullptr;
}
nvenc_d3d11_on_cuda &parent;
CUcontext pushed_context = nullptr;
};
autopop_context push_context();
HMODULE dll = NULL;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
struct {
tcuInit *cuInit;
tcuD3D11GetDevice *cuD3D11GetDevice;
tcuCtxCreate_v2 *cuCtxCreate;
tcuCtxDestroy_v2 *cuCtxDestroy;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
tcuMemAllocPitch_v2 *cuMemAllocPitch;
tcuMemFree_v2 *cuMemFree;
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuMemcpy2D_v2 *cuMemcpy2D;
HMODULE dll;
} cuda_functions = {};
CUresult last_cuda_error = CUDA_SUCCESS;
CUcontext cuda_context = nullptr;
CUgraphicsResource cuda_d3d_input_texture = nullptr;
CUdeviceptr cuda_surface = 0;
size_t cuda_surface_pitch = 0;
};
} // namespace nvenc
#endif

View File

@@ -1,22 +1,23 @@
/**
* @file src/nvenc/nvenc_encoded_frame.h
* @brief Declarations for NVENC encoded frame.
*/
#pragma once
#include <cstdint>
#include <vector>
namespace nvenc {
/**
* @brief Encoded frame.
*/
struct nvenc_encoded_frame {
std::vector<uint8_t> data;
uint64_t frame_index = 0;
bool idr = false;
bool after_ref_frame_invalidation = false;
};
} // namespace nvenc
/**
* @file src/nvenc/nvenc_encoded_frame.h
* @brief Declarations for NVENC encoded frame.
*/
#pragma once
// standard includes
#include <cstdint>
#include <vector>
namespace nvenc {
/**
* @brief Encoded frame.
*/
struct nvenc_encoded_frame {
std::vector<uint8_t> data;
uint64_t frame_index = 0;
bool idr = false;
bool after_ref_frame_invalidation = false;
};
} // namespace nvenc

View File

@@ -1,94 +1,93 @@
/**
* @file src/nvenc/nvenc_utils.cpp
* @brief Definitions for NVENC utilities.
*/
#include <cassert>
#include "nvenc_utils.h"
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT
dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format) {
switch (format) {
case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
return DXGI_FORMAT_P010;
case NV_ENC_BUFFER_FORMAT_NV12:
return DXGI_FORMAT_NV12;
case NV_ENC_BUFFER_FORMAT_AYUV:
return DXGI_FORMAT_AYUV;
case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
return DXGI_FORMAT_R16_UINT;
default:
return DXGI_FORMAT_UNKNOWN;
}
}
#endif
NV_ENC_BUFFER_FORMAT
nvenc_format_from_sunshine_format(platf::pix_fmt_e format) {
switch (format) {
case platf::pix_fmt_e::nv12:
return NV_ENC_BUFFER_FORMAT_NV12;
case platf::pix_fmt_e::p010:
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
case platf::pix_fmt_e::ayuv:
return NV_ENC_BUFFER_FORMAT_AYUV;
case platf::pix_fmt_e::yuv444p16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
default:
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
}
}
nvenc_colorspace_t
nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace) {
nvenc_colorspace_t colorspace;
switch (sunshine_colorspace.colorspace) {
case video::colorspace_e::rec601:
// Rec. 601
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M;
break;
case video::colorspace_e::rec709:
// Rec. 709
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT709;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT709;
break;
case video::colorspace_e::bt2020sdr:
// Rec. 2020
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_10;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
case video::colorspace_e::bt2020:
// Rec. 2020 with ST 2084 perceptual quantizer
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
}
colorspace.full_range = sunshine_colorspace.full_range;
return colorspace;
}
} // namespace nvenc
/**
* @file src/nvenc/nvenc_utils.cpp
* @brief Definitions for NVENC utilities.
*/
// standard includes
#include <cassert>
// local includes
#include "nvenc_utils.h"
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format) {
switch (format) {
case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
return DXGI_FORMAT_P010;
case NV_ENC_BUFFER_FORMAT_NV12:
return DXGI_FORMAT_NV12;
case NV_ENC_BUFFER_FORMAT_AYUV:
return DXGI_FORMAT_AYUV;
case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
return DXGI_FORMAT_R16_UINT;
default:
return DXGI_FORMAT_UNKNOWN;
}
}
#endif
NV_ENC_BUFFER_FORMAT nvenc_format_from_sunshine_format(platf::pix_fmt_e format) {
switch (format) {
case platf::pix_fmt_e::nv12:
return NV_ENC_BUFFER_FORMAT_NV12;
case platf::pix_fmt_e::p010:
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
case platf::pix_fmt_e::ayuv:
return NV_ENC_BUFFER_FORMAT_AYUV;
case platf::pix_fmt_e::yuv444p16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
default:
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
}
}
nvenc_colorspace_t nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace) {
nvenc_colorspace_t colorspace;
switch (sunshine_colorspace.colorspace) {
case video::colorspace_e::rec601:
// Rec. 601
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M;
break;
case video::colorspace_e::rec709:
// Rec. 709
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT709;
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT709;
break;
case video::colorspace_e::bt2020sdr:
// Rec. 2020
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_10;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
case video::colorspace_e::bt2020:
// Rec. 2020 with ST 2084 perceptual quantizer
colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020;
assert(sunshine_colorspace.bit_depth == 10);
colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084;
colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL;
break;
}
colorspace.full_range = sunshine_colorspace.full_range;
return colorspace;
}
} // namespace nvenc

View File

@@ -1,31 +1,30 @@
/**
* @file src/nvenc/nvenc_utils.h
* @brief Declarations for NVENC utilities.
*/
#pragma once
#ifdef _WIN32
#include <dxgiformat.h>
#endif
#include "nvenc_colorspace.h"
#include "src/platform/common.h"
#include "src/video_colorspace.h"
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT
dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format);
#endif
NV_ENC_BUFFER_FORMAT
nvenc_format_from_sunshine_format(platf::pix_fmt_e format);
nvenc_colorspace_t
nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace);
} // namespace nvenc
/**
* @file src/nvenc/nvenc_utils.h
* @brief Declarations for NVENC utilities.
*/
#pragma once
// plafform includes
#ifdef _WIN32
#include <dxgiformat.h>
#endif
// lib includes
#include <ffnvcodec/nvEncodeAPI.h>
// local includes
#include "nvenc_colorspace.h"
#include "src/platform/common.h"
#include "src/video_colorspace.h"
namespace nvenc {
#ifdef _WIN32
DXGI_FORMAT dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format);
#endif
NV_ENC_BUFFER_FORMAT nvenc_format_from_sunshine_format(platf::pix_fmt_e format);
nvenc_colorspace_t nvenc_colorspace_from_sunshine_colorspace(const video::sunshine_colorspace_t &sunshine_colorspace);
} // namespace nvenc