feat(win/video): support native YUV 4:4:4 encoding (#2533)

This commit is contained in:
ns6089
2024-08-16 20:41:27 +03:00
committed by GitHub
parent e8c837f412
commit bfdfcebc80
35 changed files with 1454 additions and 330 deletions

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_base.cpp
* @brief Definitions for base NVENC encoder.
* @brief Definitions for abstract platform-agnostic base of standalone NVENC encoder.
*/
#include "nvenc_base.h"
@@ -85,9 +85,8 @@ namespace {
namespace nvenc {
nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device):
device_type(device_type),
device(device) {
nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type):
device_type(device_type) {
}
nvenc_base::~nvenc_base() {
@@ -115,19 +114,19 @@ namespace nvenc {
session_params.deviceType = device_type;
session_params.apiVersion = minimum_api_version;
if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) {
BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string;
return false;
}
uint32_t encode_guid_count = 0;
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDCount(encoder, &encode_guid_count))) {
BOOST_LOG(error) << "NvEncGetEncodeGUIDCount failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDCount() failed: " << last_nvenc_error_string;
return false;
};
std::vector<GUID> encode_guids(encode_guid_count);
if (nvenc_failed(nvenc->nvEncGetEncodeGUIDs(encoder, encode_guids.data(), encode_guids.size(), &encode_guid_count))) {
BOOST_LOG(error) << "NvEncGetEncodeGUIDs failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDs() failed: " << last_nvenc_error_string;
return false;
}
@@ -176,7 +175,7 @@ namespace nvenc {
};
auto buffer_is_yuv444 = [&]() {
return buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
};
{
@@ -220,7 +219,7 @@ namespace nvenc {
NV_ENC_PRESET_CONFIG preset_config = { min_struct_version(NV_ENC_PRESET_CONFIG_VER), { min_struct_version(NV_ENC_CONFIG_VER, 7, 8) } };
if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) {
BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string;
return false;
}
@@ -228,7 +227,6 @@ namespace nvenc {
enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
enc_config.gopLength = NVENC_INFINITE_GOPLENGTH;
enc_config.frameIntervalP = 1;
enc_config.rcParams.enableAQ = config.adaptive_quantization;
enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
enc_config.rcParams.zeroReorderDelay = 1;
enc_config.rcParams.enableLookahead = 0;
@@ -282,7 +280,7 @@ namespace nvenc {
}
};
auto fill_h264_hevc_vui = [&colorspace](auto &vui_config) {
auto fill_h264_hevc_vui = [&](auto &vui_config) {
vui_config.videoSignalTypePresentFlag = 1;
vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED;
vui_config.videoFullRangeFlag = colorspace.full_range;
@@ -290,7 +288,7 @@ namespace nvenc {
vui_config.colourPrimaries = colorspace.primaries;
vui_config.transferCharacteristics = colorspace.tranfer_function;
vui_config.colourMatrix = colorspace.matrix;
vui_config.chromaSampleLocationFlag = 1;
vui_config.chromaSampleLocationFlag = buffer_is_yuv444() ? 0 : 1;
vui_config.chromaSampleLocationTop = 0;
vui_config.chromaSampleLocationBot = 0;
};
@@ -331,7 +329,9 @@ namespace nvenc {
auto &format_config = enc_config.encodeCodecConfig.av1Config;
format_config.repeatSeqHdr = 1;
format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH;
format_config.chromaFormatIDC = 1; // YUV444 not supported by NVENC yet
if (buffer_is_yuv444()) {
format_config.chromaFormatIDC = 3;
}
format_config.enableBitstreamPadding = config.insert_filler_data;
if (buffer_is_10bit()) {
format_config.inputPixelBitDepthMinus8 = 2;
@@ -341,7 +341,7 @@ namespace nvenc {
format_config.transferCharacteristics = colorspace.tranfer_function;
format_config.matrixCoefficients = colorspace.matrix;
format_config.colorRange = colorspace.full_range;
format_config.chromaSamplePosition = 1;
format_config.chromaSamplePosition = buffer_is_yuv444() ? 0 : 1;
set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numFwdRefs, 8);
set_minqp_if_enabled(config.min_qp_av1);
@@ -358,7 +358,7 @@ namespace nvenc {
init_params.encodeConfig = &enc_config;
if (nvenc_failed(nvenc->nvEncInitializeEncoder(encoder, &init_params))) {
BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncInitializeEncoder() failed: " << last_nvenc_error_string;
return false;
}
@@ -366,14 +366,14 @@ namespace nvenc {
NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
event_params.completionEvent = async_event_handle;
if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) {
BOOST_LOG(error) << "NvEncRegisterAsyncEvent failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string;
return false;
}
}
NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER) };
if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) {
BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string;
return false;
}
output_bitstream = create_bitstream_buffer.bitstreamBuffer;
@@ -388,8 +388,13 @@ namespace nvenc {
}
{
auto video_format_string = client_config.videoFormat == 0 ? "H.264 " :
client_config.videoFormat == 1 ? "HEVC " :
client_config.videoFormat == 2 ? "AV1 " :
" ";
std::string extra;
if (init_params.enableEncodeAsync) extra += " async";
if (buffer_is_yuv444()) extra += " yuv444";
if (buffer_is_10bit()) extra += " 10-bit";
if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass";
if (config.vbv_percentage_increase > 0 && get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) extra += " vbv+" + std::to_string(config.vbv_percentage_increase);
@@ -398,7 +403,8 @@ namespace nvenc {
if (enc_config.rcParams.enableAQ) extra += " spatial-aq";
if (enc_config.rcParams.enableMinQP) extra += " qpmin=" + std::to_string(enc_config.rcParams.minQP.qpInterP);
if (config.insert_filler_data) extra += " filler-data";
BOOST_LOG(info) << "NvEnc: created encoder " << quality_preset_string_from_guid(init_params.presetGUID) << extra;
BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra;
}
encoder_state = {};
@@ -409,20 +415,28 @@ namespace nvenc {
void
nvenc_base::destroy_encoder() {
if (output_bitstream) {
nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream);
if (nvenc_failed(nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream))) {
BOOST_LOG(error) << "NvEnc: NvEncDestroyBitstreamBuffer() failed: " << last_nvenc_error_string;
}
output_bitstream = nullptr;
}
if (encoder && async_event_handle) {
NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
event_params.completionEvent = async_event_handle;
nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params);
if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) {
BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string;
}
}
if (registered_input_buffer) {
nvenc->nvEncUnregisterResource(encoder, registered_input_buffer);
if (nvenc_failed(nvenc->nvEncUnregisterResource(encoder, registered_input_buffer))) {
BOOST_LOG(error) << "NvEnc: NvEncUnregisterResource() failed: " << last_nvenc_error_string;
}
registered_input_buffer = nullptr;
}
if (encoder) {
nvenc->nvEncDestroyEncoder(encoder);
if (nvenc_failed(nvenc->nvEncDestroyEncoder(encoder))) {
BOOST_LOG(error) << "NvEnc: NvEncDestroyEncoder() failed: " << last_nvenc_error_string;
}
encoder = nullptr;
}
@@ -439,14 +453,23 @@ namespace nvenc {
assert(registered_input_buffer);
assert(output_bitstream);
if (!synchronize_input_buffer()) {
BOOST_LOG(error) << "NvEnc: failed to synchronize input buffer";
return {};
}
NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = { min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER) };
mapped_input_buffer.registeredResource = registered_input_buffer;
if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) {
BOOST_LOG(error) << "NvEncMapInputResource failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncMapInputResource() failed: " << last_nvenc_error_string;
return {};
}
auto unmap_guard = util::fail_guard([&] { nvenc->nvEncUnmapInputResource(encoder, &mapped_input_buffer); });
auto unmap_guard = util::fail_guard([&] {
if (nvenc_failed(nvenc->nvEncUnmapInputResource(encoder, mapped_input_buffer.mappedResource))) {
BOOST_LOG(error) << "NvEnc: NvEncUnmapInputResource() failed: " << last_nvenc_error_string;
}
});
NV_ENC_PIC_PARAMS pic_params = { min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6) };
pic_params.inputWidth = encoder_params.width;
@@ -460,7 +483,7 @@ namespace nvenc {
pic_params.completionEvent = async_event_handle;
if (nvenc_failed(nvenc->nvEncEncodePicture(encoder, &pic_params))) {
BOOST_LOG(error) << "NvEncEncodePicture failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncEncodePicture() failed: " << last_nvenc_error_string;
return {};
}
@@ -474,7 +497,7 @@ namespace nvenc {
}
if (nvenc_failed(nvenc->nvEncLockBitstream(encoder, &lock_bitstream))) {
BOOST_LOG(error) << "NvEncLockBitstream failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncLockBitstream() failed: " << last_nvenc_error_string;
return {};
}
@@ -498,7 +521,7 @@ namespace nvenc {
}
if (nvenc_failed(nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream))) {
BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncUnlockBitstream() failed: " << last_nvenc_error_string;
}
encoder_state.frame_size_logger.collect_and_log(encoded_frame.data.size() / 1000.);
@@ -535,7 +558,7 @@ namespace nvenc {
for (auto i = first_frame; i <= last_frame; i++) {
if (nvenc_failed(nvenc->nvEncInvalidateRefFrames(encoder, i))) {
BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncInvalidateRefFrames() " << i << " failed: " << last_nvenc_error_string;
return false;
}
}
@@ -576,20 +599,22 @@ namespace nvenc {
nvenc_status_case(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
// Newer versions of sdk may add more constants, look for them the end of NVENCSTATUS enum
// Newer versions of sdk may add more constants, look for them at the end of NVENCSTATUS enum
#undef nvenc_status_case
default:
return std::to_string(status);
}
};
last_error_string.clear();
last_nvenc_error_string.clear();
if (status != NV_ENC_SUCCESS) {
/* This API function gives broken strings more often than not
if (nvenc && encoder) {
last_error_string = nvenc->nvEncGetLastErrorString(encoder);
if (!last_error_string.empty()) last_error_string += " ";
last_nvenc_error_string = nvenc->nvEncGetLastErrorString(encoder);
if (!last_nvenc_error_string.empty()) last_nvenc_error_string += " ";
}
last_error_string += status_string(status);
*/
last_nvenc_error_string += status_string(status);
return true;
}

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_base.h
* @brief Declarations for base NVENC encoder.
* @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder.
*/
#pragma once
@@ -13,36 +13,98 @@
#include <ffnvcodec/nvEncodeAPI.h>
/**
* @brief Standalone NVENC encoder
*/
namespace nvenc {
/**
* @brief Abstract platform-agnostic base of standalone NVENC encoder.
* Derived classes perform platform-specific operations.
*/
class nvenc_base {
public:
nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device);
/**
* @param device_type Underlying device type used by derived class.
*/
explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type);
virtual ~nvenc_base();
nvenc_base(const nvenc_base &) = delete;
nvenc_base &
operator=(const nvenc_base &) = delete;
/**
* @brief Create the encoder.
* @param config NVENC encoder configuration.
* @param client_config Stream configuration requested by the client.
* @param colorspace YUV colorspace.
* @param buffer_format Platform-agnostic input surface format.
* @return `true` on success, `false` on error
*/
bool
create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
/**
* @brief Destroy the encoder.
* Derived classes classes call it in the destructor.
*/
void
destroy_encoder();
/**
* @brief Encode the next frame using platform-specific input surface.
* @param frame_index Frame index that uniquely identifies the frame.
* Afterwards serves as parameter for `invalidate_ref_frames()`.
* No restrictions on the first frame index, but later frame indexes must be subsequent.
* @param force_idr Whether to encode frame as forced IDR.
* @return Encoded frame.
*/
nvenc_encoded_frame
encode_frame(uint64_t frame_index, bool force_idr);
/**
* @brief Perform reference frame invalidation (RFI) procedure.
* @param first_frame First frame index of the invalidation range.
* @param last_frame Last frame index of the invalidation range.
* @return `true` on success, `false` on error.
* After error next frame must be encoded with `force_idr = true`.
*/
bool
invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
protected:
/**
* @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`.
* Called during `create_encoder()` if `nvenc` variable is not initialized.
* @return `true` on success, `false` on error
*/
virtual bool
init_library() = 0;
/**
* @brief Required. Used for creating outside-facing input surface,
* registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable.
* Called during `create_encoder()`.
* @return `true` on success, `false` on error
*/
virtual bool
create_and_register_input_buffer() = 0;
/**
* @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`.
* Typically used for interop copy.
* @return `true` on success, `false` on error
*/
virtual bool
synchronize_input_buffer() { return true; }
/**
* @brief Optional. Override if you want to create encoder in async mode.
* In this case must also set `async_event_handle` variable.
* @param timeout_ms Wait timeout in milliseconds
* @return `true` on success, `false` on timeout or error
*/
virtual bool
wait_for_async_event(uint32_t timeout_ms) { return false; }
@@ -61,9 +123,6 @@ namespace nvenc {
min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0);
const NV_ENC_DEVICE_TYPE device_type;
void *const device;
std::unique_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc;
void *encoder = nullptr;
@@ -75,11 +134,17 @@ namespace nvenc {
bool rfi = false;
} encoder_params;
// Derived classes set these variables
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr;
void *async_event_handle = nullptr;
std::string last_nvenc_error_string;
std::string last_error_string;
// Derived classes set these variables
void *device = nullptr; ///< Platform-specific handle of encoding device.
///< Should be set in constructor or `init_library()`.
std::shared_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc; ///< Function pointers list produced by `NvEncodeAPICreateInstance()`.
///< Should be set in `init_library()`.
NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; ///< Platform-specific input surface registered with `NvEncRegisterResource()`.
///< Should be set in `create_and_register_input_buffer()`.
void *async_event_handle = nullptr; ///< (optional) Platform-specific handle of event object event.
///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`.
private:
NV_ENC_OUTPUT_PTR output_bitstream = nullptr;

View File

@@ -1,16 +1,21 @@
/**
* @file src/nvenc/nvenc_colorspace.h
* @brief Declarations for base NVENC colorspace.
* @brief Declarations for NVENC YUV colorspace.
*/
#pragma once
#include <ffnvcodec/nvEncodeAPI.h>
namespace nvenc {
/**
* @brief YUV colorspace and color range.
*/
struct nvenc_colorspace_t {
NV_ENC_VUI_COLOR_PRIMARIES primaries;
NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
NV_ENC_VUI_MATRIX_COEFFS matrix;
bool full_range;
};
} // namespace nvenc

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_config.h
* @brief Declarations for base NVENC configuration.
* @brief Declarations for NVENC encoder configuration.
*/
#pragma once
@@ -12,6 +12,9 @@ namespace nvenc {
full_resolution, ///< Better overall statistics, slower and uses more extra vram
};
/**
* @brief NVENC encoder configuration.
*/
struct nvenc_config {
// Quality preset from 1 to 7, higher is slower
int quality_preset = 1;

View File

@@ -1,43 +1,29 @@
/**
* @file src/nvenc/nvenc_d3d11.cpp
* @brief Definitions for base NVENC d3d11.
* @brief Definitions for abstract Direct3D11 NVENC encoder.
*/
#include "src/logging.h"
#ifdef _WIN32
#include "nvenc_d3d11.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device):
nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device),
d3d_device(d3d_device) {
}
nvenc_d3d11::~nvenc_d3d11() {
if (encoder) destroy_encoder();
if (dll) {
FreeLibrary(dll);
dll = NULL;
}
}
ID3D11Texture2D *
nvenc_d3d11::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11::init_library() {
if (dll) return true;
#ifdef _WIN64
auto dll_name = "nvEncodeAPI64.dll";
constexpr auto dll_name = "nvEncodeAPI64.dll";
#else
auto dll_name = "nvEncodeAPI.dll";
constexpr auto dll_name = "nvEncodeAPI.dll";
#endif
if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
@@ -45,7 +31,7 @@ namespace nvenc {
auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER);
if (nvenc_failed(create_instance(new_nvenc.get()))) {
BOOST_LOG(error) << "NvEncodeAPICreateInstance failed: " << last_error_string;
BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string;
}
else {
nvenc = std::move(new_nvenc);
@@ -53,11 +39,11 @@ namespace nvenc {
}
}
else {
BOOST_LOG(error) << "No NvEncodeAPICreateInstance in " << dll_name;
BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name;
}
}
else {
BOOST_LOG(debug) << "Couldn't load NvEnc library " << dll_name;
BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name;
}
if (dll) {
@@ -68,43 +54,5 @@ namespace nvenc {
return false;
}
bool
nvenc_d3d11::create_and_register_input_buffer() {
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEncRegisterResource failed: " << last_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
} // namespace nvenc
#endif

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_d3d11.h
* @brief Declarations for base NVENC d3d11.
* @brief Declarations for abstract Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
@@ -14,25 +14,33 @@ namespace nvenc {
_COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
_COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
_COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice);
_COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter);
class nvenc_d3d11 final: public nvenc_base {
/**
* @brief Abstract Direct3D11 NVENC encoder.
* Encapsulates common code used by native and interop implementations.
*/
class nvenc_d3d11: public nvenc_base {
public:
nvenc_d3d11(ID3D11Device *d3d_device);
explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type):
nvenc_base(device_type) {}
~nvenc_d3d11();
ID3D11Texture2D *
get_input_texture();
/**
* @brief Get input surface texture.
* @return Input surface texture.
*/
virtual ID3D11Texture2D *
get_input_texture() = 0;
private:
protected:
bool
init_library() override;
bool
create_and_register_input_buffer() override;
private:
HMODULE dll = NULL;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
};
} // namespace nvenc

View File

@@ -0,0 +1,71 @@
/**
* @file src/nvenc/nvenc_d3d11_native.cpp
* @brief Definitions for native Direct3D11 NVENC encoder.
*/
#ifdef _WIN32
#include "nvenc_d3d11_native.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX),
d3d_device(d3d_device) {
device = d3d_device;
}
nvenc_d3d11_native::~nvenc_d3d11_native() {
if (encoder) destroy_encoder();
}
ID3D11Texture2D *
nvenc_d3d11_native::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11_native::create_and_register_input_buffer() {
if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
} // namespace nvenc
#endif

View File

@@ -0,0 +1,38 @@
/**
* @file src/nvenc/nvenc_d3d11_native.h
* @brief Declarations for native Direct3D11 NVENC encoder.
*/
#pragma once
#ifdef _WIN32
#include <comdef.h>
#include <d3d11.h>
#include "nvenc_d3d11.h"
namespace nvenc {
/**
* @brief Native Direct3D11 NVENC encoder.
*/
class nvenc_d3d11_native final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device used for encoding.
*/
explicit nvenc_d3d11_native(ID3D11Device *d3d_device);
~nvenc_d3d11_native();
ID3D11Texture2D *
get_input_texture() override;
private:
bool
create_and_register_input_buffer() override;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
};
} // namespace nvenc
#endif

View File

@@ -0,0 +1,267 @@
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.cpp
* @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#ifdef _WIN32
#include "nvenc_d3d11_on_cuda.h"
#include "nvenc_utils.h"
namespace nvenc {
nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
d3d_device(d3d_device) {
}
nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
if (encoder) destroy_encoder();
if (cuda_context) {
{
auto autopop_context = push_context();
if (cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
}
cuda_d3d_input_texture = nullptr;
}
if (cuda_surface) {
if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
}
cuda_surface = 0;
}
}
if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
}
cuda_context = nullptr;
}
if (cuda_functions.dll) {
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
}
ID3D11Texture2D *
nvenc_d3d11_on_cuda::get_input_texture() {
return d3d_input_texture.GetInterfacePtr();
}
bool
nvenc_d3d11_on_cuda::init_library() {
if (!nvenc_d3d11::init_library()) return false;
constexpr auto dll_name = "nvcuda.dll";
if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
location = (T) GetProcAddress(cuda_functions.dll, symbol);
return location != nullptr;
};
if (!load_function(cuda_functions.cuInit, "cuInit") ||
!load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
!load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
!load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
!load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
!load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
!load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
!load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
!load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
!load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
!load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
!load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
!load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
!load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
FreeLibrary(cuda_functions.dll);
cuda_functions = {};
}
}
else {
BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
}
if (cuda_functions.dll) {
IDXGIDevicePtr dxgi_device;
IDXGIAdapterPtr dxgi_adapter;
if (d3d_device &&
SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
CUdevice cuda_device;
if (cuda_succeeded(cuda_functions.cuInit(0)) &&
cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
device = cuda_context;
}
else {
BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
}
}
else {
BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
}
}
return device != nullptr;
}
bool
nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
return false;
}
if (!d3d_input_texture) {
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = encoder_params.width;
desc.Height = encoder_params.height * 3; // Planar YUV
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
return false;
}
}
{
auto autopop_context = push_context();
if (!autopop_context) return false;
if (!cuda_d3d_input_texture) {
if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
&cuda_d3d_input_texture,
d3d_input_texture,
CU_GRAPHICS_REGISTER_FLAGS_NONE))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
return false;
}
}
if (!cuda_surface) {
if (cuda_failed(cuda_functions.cuMemAllocPitch(
&cuda_surface,
&cuda_surface_pitch,
// Planar 16-bit YUV
encoder_params.width * 2,
encoder_params.height * 3, 16))) {
BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
return false;
}
}
}
if (!registered_input_buffer) {
NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
register_resource.width = encoder_params.width;
register_resource.height = encoder_params.height;
register_resource.pitch = cuda_surface_pitch;
register_resource.resourceToRegister = (void *) cuda_surface;
register_resource.bufferFormat = encoder_params.buffer_format;
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
return false;
}
registered_input_buffer = register_resource.registeredResource;
}
return true;
}
bool
nvenc_d3d11_on_cuda::synchronize_input_buffer() {
auto autopop_context = push_context();
if (!autopop_context) return false;
if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
return false;
}
auto unmap = [&]() -> bool {
if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
return false;
}
return true;
};
auto unmap_guard = util::fail_guard(unmap);
CUarray input_texture_array;
if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
return false;
}
{
CUDA_MEMCPY2D copy_params = {};
copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params.srcArray = input_texture_array;
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.dstDevice = cuda_surface;
copy_params.dstPitch = cuda_surface_pitch;
// Planar 16-bit YUV
copy_params.WidthInBytes = encoder_params.width * 2;
copy_params.Height = encoder_params.height * 3;
if (cuda_failed(cuda_functions.cuMemcpy2D(&copy_params))) {
BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
return false;
}
}
unmap_guard.disable();
return unmap();
}
bool
nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
last_cuda_error = result;
return result == CUDA_SUCCESS;
}
bool
nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
last_cuda_error = result;
return result != CUDA_SUCCESS;
}
nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
if (pushed_context) {
CUcontext popped_context;
if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
}
}
}
nvenc_d3d11_on_cuda::autopop_context
nvenc_d3d11_on_cuda::push_context() {
if (cuda_context &&
cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
return { *this, cuda_context };
}
else {
BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
return { *this, nullptr };
}
}
} // namespace nvenc
#endif

View File

@@ -0,0 +1,96 @@
/**
* @file src/nvenc/nvenc_d3d11_on_cuda.h
* @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces.
*/
#pragma once
#ifdef _WIN32
#include "nvenc_d3d11.h"
#include <ffnvcodec/dynlink_cuda.h>
namespace nvenc {
/**
* @brief Interop Direct3D11 on CUDA NVENC encoder.
* Input surface is Direct3D11, encoding is performed by CUDA.
*/
class nvenc_d3d11_on_cuda final: public nvenc_d3d11 {
public:
/**
* @param d3d_device Direct3D11 device that will create input surface texture.
* CUDA encoding device will be derived from it.
*/
explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device);
~nvenc_d3d11_on_cuda();
ID3D11Texture2D *
get_input_texture() override;
private:
bool
init_library() override;
bool
create_and_register_input_buffer() override;
bool
synchronize_input_buffer() override;
bool
cuda_succeeded(CUresult result);
bool
cuda_failed(CUresult result);
struct autopop_context {
autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context):
parent(parent),
pushed_context(pushed_context) {
}
~autopop_context();
explicit
operator bool() const {
return pushed_context != nullptr;
}
nvenc_d3d11_on_cuda &parent;
CUcontext pushed_context = nullptr;
};
autopop_context
push_context();
HMODULE dll = NULL;
const ID3D11DevicePtr d3d_device;
ID3D11Texture2DPtr d3d_input_texture;
struct {
tcuInit *cuInit;
tcuD3D11GetDevice *cuD3D11GetDevice;
tcuCtxCreate_v2 *cuCtxCreate;
tcuCtxDestroy_v2 *cuCtxDestroy;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
tcuMemAllocPitch_v2 *cuMemAllocPitch;
tcuMemFree_v2 *cuMemFree;
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuMemcpy2D_v2 *cuMemcpy2D;
HMODULE dll;
} cuda_functions = {};
CUresult last_cuda_error = CUDA_SUCCESS;
CUcontext cuda_context = nullptr;
CUgraphicsResource cuda_d3d_input_texture = nullptr;
CUdeviceptr cuda_surface = 0;
size_t cuda_surface_pitch = 0;
};
} // namespace nvenc
#endif

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_encoded_frame.h
* @brief Declarations for base NVENC encoded frame.
* @brief Declarations for NVENC encoded frame.
*/
#pragma once
@@ -8,10 +8,15 @@
#include <vector>
namespace nvenc {
/**
* @brief Encoded frame.
*/
struct nvenc_encoded_frame {
std::vector<uint8_t> data;
uint64_t frame_index = 0;
bool idr = false;
bool after_ref_frame_invalidation = false;
};
} // namespace nvenc

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_utils.cpp
* @brief Definitions for base NVENC utilities.
* @brief Definitions for NVENC utilities.
*/
#include <cassert>
@@ -18,6 +18,12 @@ namespace nvenc {
case NV_ENC_BUFFER_FORMAT_NV12:
return DXGI_FORMAT_NV12;
case NV_ENC_BUFFER_FORMAT_AYUV:
return DXGI_FORMAT_AYUV;
case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
return DXGI_FORMAT_R16_UINT;
default:
return DXGI_FORMAT_UNKNOWN;
}
@@ -33,6 +39,12 @@ namespace nvenc {
case platf::pix_fmt_e::p010:
return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
case platf::pix_fmt_e::ayuv:
return NV_ENC_BUFFER_FORMAT_AYUV;
case platf::pix_fmt_e::yuv444p16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
default:
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
}

View File

@@ -1,6 +1,6 @@
/**
* @file src/nvenc/nvenc_utils.h
* @brief Declarations for base NVENC utilities.
* @brief Declarations for NVENC utilities.
*/
#pragma once