Add standalone NVENC encoder

This commit is contained in:
ns6089
2023-04-25 16:38:37 +03:00
committed by Cameron Gutman
parent 7fe52bc5f8
commit 68fa43a61c
34 changed files with 2124 additions and 642 deletions

View File

@@ -13,6 +13,7 @@
#include "src/main.h"
#include "src/thread_safe.h"
#include "src/utility.h"
#include "src/video_colorspace.h"
extern "C" {
#include <moonlight-common-c/src/Limelight.h>
@@ -45,6 +46,9 @@ namespace boost {
namespace video {
struct config_t;
} // namespace video
namespace nvenc {
class nvenc_base;
}
namespace platf {
// Limited by bits in activeGamepadMask
@@ -344,15 +348,28 @@ namespace platf {
std::optional<null_t> null;
};
struct hwdevice_t {
struct encode_device_t {
virtual ~encode_device_t() = default;
virtual int
convert(platf::img_t &img) = 0;
video::sunshine_colorspace_t colorspace;
};
struct avcodec_encode_device_t: encode_device_t {
void *data {};
AVFrame *frame {};
virtual int
convert(platf::img_t &img) {
int
convert(platf::img_t &img) override {
return -1;
}
virtual void
apply_colorspace() {
}
/**
* implementations must take ownership of 'frame'
*/
@@ -362,9 +379,6 @@ namespace platf {
return -1;
};
virtual void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {};
/**
* Implementations may set parameters during initialization of the hwframes context
*/
@@ -378,8 +392,13 @@ namespace platf {
prepare_to_derive_context(int hw_device_type) {
return 0;
};
};
virtual ~hwdevice_t() = default;
struct nvenc_encode_device_t: encode_device_t {
virtual bool
init_encoder(const video::config_t &client_config, const video::sunshine_colorspace_t &colorspace) = 0;
nvenc::nvenc_base *nvenc = nullptr;
};
enum class capture_e : int {
@@ -440,9 +459,14 @@ namespace platf {
virtual int
dummy_img(img_t *img) = 0;
virtual std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) {
return std::make_shared<hwdevice_t>();
virtual std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) {
return nullptr;
}
virtual std::unique_ptr<nvenc_encode_device_t>
make_nvenc_encode_device(pix_fmt_e pix_fmt) {
return nullptr;
}
virtual bool

View File

@@ -88,7 +88,7 @@ namespace cuda {
return 0;
}
class cuda_t: public platf::hwdevice_t {
class cuda_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height) {
@@ -145,8 +145,8 @@ namespace cuda {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
auto tex = tex_t::make(height, width * 4);
if (!tex) {
@@ -223,19 +223,19 @@ namespace cuda {
}
};
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
if (init()) {
return nullptr;
}
std::shared_ptr<cuda_t> cuda;
std::unique_ptr<cuda_t> cuda;
if (vram) {
cuda = std::make_shared<cuda_vram_t>();
cuda = std::make_unique<cuda_vram_t>();
}
else {
cuda = std::make_shared<cuda_ram_t>();
cuda = std::make_unique<cuda_ram_t>();
}
if (cuda->init(width, height)) {
@@ -675,9 +675,9 @@ namespace cuda {
return platf::capture_e::ok;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
return ::cuda::make_hwdevice(width, height, true);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) {
return ::cuda::make_avcodec_encode_device(width, height, true);
}
std::shared_ptr<platf::img_t>

View File

@@ -56,12 +56,11 @@ public:
};
} // namespace platf
namespace video {
using __float4 = float[4];
using __float3 = float[3];
using __float2 = float[2];
// End special declarations
struct alignas(16) color_t {
namespace cuda {
struct alignas(16) cuda_color_t {
float4 color_vec_y;
float4 color_vec_u;
float4 color_vec_v;
@@ -69,22 +68,8 @@ struct alignas(16) color_t {
float2 range_uv;
};
struct alignas(16) color_extern_t {
__float4 color_vec_y;
__float4 color_vec_u;
__float4 color_vec_v;
__float2 range_y;
__float2 range_uv;
};
static_assert(sizeof(video::color_t) == sizeof(cuda::cuda_color_t), "color matrix struct mismatch");
static_assert(sizeof(video::color_t) == sizeof(video::color_extern_t), "color matrix struct mismatch");
extern color_t colors[6];
} // namespace video
// End special declarations
namespace cuda {
auto constexpr INVALID_TEXTURE = std::numeric_limits<cudaTextureObject_t>::max();
template<class T>
@@ -144,7 +129,7 @@ inline __device__ float3 bgra_to_rgb(float4 vec) {
return make_float3(vec.z, vec.y, vec.x);
}
inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float2 calcUV(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_u = color_matrix->color_vec_u;
float4 vec_v = color_matrix->color_vec_v;
@@ -157,7 +142,7 @@ inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_
return make_float2(u, v);
}
inline __device__ float calcY(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float calcY(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_y = color_matrix->color_vec_y;
return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y;
@@ -166,7 +151,7 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
__global__ void RGBA_to_NV12(
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
float scale, const viewport_t viewport, const video::color_t *const color_matrix) {
float scale, const viewport_t viewport, const cuda_color_t *const color_matrix) {
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
int idY = (threadIdx.y + blockDim.y * blockIdx.y) * 2;
@@ -297,7 +282,7 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
auto ptr = make_ptr<video::color_t>();
auto ptr = make_ptr<cuda_color_t>();
if(!ptr) {
return std::nullopt;
}
@@ -316,32 +301,13 @@ int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std:
dim3 block(threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (cuda_color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
}
void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch(colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
color_p = &video::colors[0];
};
if(color_range > 1) {
// Full range
++color_p;
}
void sws_t::apply_colorspace(const video::sunshine_colorspace_t& colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
}

View File

@@ -6,6 +6,8 @@
#if defined(SUNSHINE_BUILD_CUDA)
#include "src/video_colorspace.h"
#include <cstdint>
#include <memory>
#include <optional>
@@ -13,7 +15,7 @@
#include <vector>
namespace platf {
class hwdevice_t;
class avcodec_encode_device_t;
class img_t;
} // namespace platf
@@ -23,8 +25,8 @@ namespace cuda {
std::vector<std::string>
display_names();
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
int
init();
} // namespace cuda
@@ -109,7 +111,7 @@ namespace cuda {
convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
int
load_ram(platf::img_t &img, cudaArray_t array);

View File

@@ -607,27 +607,8 @@ namespace egl {
}
void
sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &video::colors[0];
};
if (color_range > 1) {
// Full range
++color_p;
}
sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
std::string_view members[] {
util::view(color_p->color_vec_y),
@@ -741,7 +722,7 @@ namespace egl {
gl::ctx.UseProgram(sws.program[1].handle());
gl::ctx.Uniform1fv(loc_width_i, 1, &width_i);
auto color_p = &video::colors[0];
auto color_p = video::color_vectors_from_colorspace(video::colorspace_e::rec601, false);
std::pair<const char *, std::string_view> members[] {
std::make_pair("color_vec_y", util::view(color_p->color_vec_y)),
std::make_pair("color_vec_u", util::view(color_p->color_vec_u)),

View File

@@ -14,6 +14,7 @@
#include "src/main.h"
#include "src/platform/common.h"
#include "src/utility.h"
#include "src/video_colorspace.h"
#define SUNSHINE_STRINGIFY_HELPER(x) #x
#define SUNSHINE_STRINGIFY(x) SUNSHINE_STRINGIFY_HELPER(x)
@@ -327,7 +328,7 @@ namespace egl {
load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
// The first texture is the monitor image.
// The second texture is the cursor image

View File

@@ -768,13 +768,13 @@ namespace platf {
return capture_e::ok;
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
capture_e
@@ -843,10 +843,10 @@ namespace platf {
display_vram_t(mem_type_e mem_type):
display_t(mem_type) {}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
return va::make_avcodec_encode_device(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
}
BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt);

View File

@@ -290,9 +290,9 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf);
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf);
class va_t: public platf::hwdevice_t {
class va_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height, file_t &&render_device) {
@@ -304,7 +304,7 @@ namespace va {
return -1;
}
this->data = (void *) vaapi_make_hwdevice_ctx;
this->data = (void *) vaapi_init_avcodec_hardware_input_buffer;
gbm.reset(gbm::create_device(file.el));
if (!gbm) {
@@ -398,8 +398,8 @@ namespace va {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
}
va::display_t::pointer va_display;
@@ -526,7 +526,7 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf) {
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *base, AVBufferRef **hw_device_buf) {
if (!va::initialize) {
BOOST_LOG(warning) << "libva not loaded"sv;
return -1;
@@ -653,10 +653,10 @@ namespace va {
return true;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
if (vram) {
auto egl = std::make_shared<va::va_vram_t>();
auto egl = std::make_unique<va::va_vram_t>();
if (egl->init(width, height, std::move(card), offset_x, offset_y)) {
return nullptr;
}
@@ -665,7 +665,7 @@ namespace va {
}
else {
auto egl = std::make_shared<va::va_ram_t>();
auto egl = std::make_unique<va::va_ram_t>();
if (egl->init(width, height, std::move(card))) {
return nullptr;
}
@@ -674,8 +674,8 @@ namespace va {
}
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram) {
auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str();
file_t file = open(render_device, O_RDWR);
@@ -686,11 +686,11 @@ namespace va {
return nullptr;
}
return make_hwdevice(width, height, std::move(file), offset_x, offset_y, vram);
return make_avcodec_encode_device(width, height, std::move(file), offset_x, offset_y, vram);
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
return make_hwdevice(width, height, 0, 0, vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
return make_avcodec_encode_device(width, height, 0, 0, vram);
}
} // namespace va

View File

@@ -18,12 +18,12 @@ namespace va {
* offset_y --> Vertical offset of the image in the texture
* file_t card --> The file descriptor of the render device used for encoding
*/
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
// Ensure the render device pointed to by fd is capable of encoding h264 with the hevc_mode configured
bool

View File

@@ -215,13 +215,13 @@ namespace wl {
return 0;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
std::shared_ptr<platf::img_t>
@@ -323,13 +323,13 @@ namespace wl {
return img;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, 0, 0, true);
return va::make_avcodec_encode_device(width, height, 0, 0, true);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
int

View File

@@ -553,19 +553,19 @@ namespace platf {
return std::make_shared<x11_img_t>();
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
#ifdef SUNSHINE_BUILD_CUDA
if (mem_type == mem_type_e::cuda) {
return cuda::make_hwdevice(width, height, false);
return cuda::make_avcodec_encode_device(width, height, false);
}
#endif
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
int

View File

@@ -94,15 +94,15 @@ namespace platf {
return std::make_shared<av_img_t>();
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (pix_fmt == pix_fmt_e::yuv420p) {
av_capture.pixelFormat = kCVPixelFormatType_32BGRA;
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
else if (pix_fmt == pix_fmt_e::nv12) {
auto device = std::make_shared<nv12_zero_device>();
auto device = std::make_unique<nv12_zero_device>();
device->init(static_cast<void *>(av_capture), setResolution, setPixelFormat);

View File

@@ -70,10 +70,6 @@ namespace platf {
return 0;
}
void
nv12_zero_device::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
}
int
nv12_zero_device::init(void *display, resolution_fn_t resolution_fn, pixel_format_fn_t pixel_format_fn) {
pixel_format_fn(display, '420v');

View File

@@ -8,7 +8,7 @@
namespace platf {
class nv12_zero_device: public hwdevice_t {
class nv12_zero_device: public avcodec_encode_device_t {
// display holds a pointer to an av_video object. Since the namespaces of AVFoundation
// and FFMPEG collide, we need this opaque pointer and cannot use the definition
void *display;
@@ -27,8 +27,6 @@ namespace platf {
convert(img_t &img);
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
};
} // namespace platf

View File

@@ -13,6 +13,7 @@
#include "src/platform/common.h"
#include "src/utility.h"
#include "src/video.h"
namespace platf::dxgi {
extern const char *format_str[];
@@ -215,8 +216,11 @@ namespace platf::dxgi {
int
init(const ::video::config_t &config, const std::string &display_name);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override;
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override;
std::unique_ptr<nvenc_encode_device_t>
make_nvenc_encode_device(pix_fmt_e pix_fmt) override;
sampler_state_t sampler_linear;

View File

@@ -16,7 +16,11 @@ extern "C" {
#include "display.h"
#include "misc.h"
#include "src/config.h"
#include "src/main.h"
#include "src/nvenc/nvenc_config.h"
#include "src/nvenc/nvenc_d3d11.h"
#include "src/nvenc/nvenc_utils.h"
#include "src/video.h"
#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
@@ -361,10 +365,10 @@ namespace platf::dxgi {
return compile_shader(file, "main_vs", "vs_5_0");
}
class hwdevice_t: public platf::hwdevice_t {
class d3d_base_encode_device final {
public:
int
convert(platf::img_t &img_base) override {
convert(platf::img_t &img_base) {
// Garbage collect mapped capture images whose weak references have expired
for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
if (it->second.img_weak.expired()) {
@@ -413,28 +417,15 @@ namespace platf::dxgi {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &::video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &::video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &::video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &::video::colors[0];
};
apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);
if (color_range > 1) {
// Full range
++color_p;
if (!color_vectors) {
BOOST_LOG(error) << "No vector data for colorspace"sv;
return;
}
auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
auto color_matrix = make_buffer(device.get(), *color_vectors);
if (!color_matrix) {
BOOST_LOG(warning) << "Failed to create color matrix"sv;
return;
@@ -445,78 +436,14 @@ namespace platf::dxgi {
this->color_matrix = std::move(color_matrix);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
init_output(ID3D11Texture2D *frame_texture, int width, int height) {
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
output_texture.reset(frame_texture);
auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
auto out_width = frame->width;
auto out_height = frame->height;
auto out_width = width;
auto out_height = height;
float in_width = display->width;
float in_height = display->height;
@@ -533,10 +460,6 @@ namespace platf::dxgi {
outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
hwframe_texture.reset(frame_texture);
float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; // aligned to 16-byte
info_scene = make_buffer(device.get(), info_in);
@@ -550,7 +473,7 @@ namespace platf::dxgi {
D3D11_RTV_DIMENSION_TEXTURE2D
};
auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@@ -558,7 +481,7 @@ namespace platf::dxgi {
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@@ -574,9 +497,7 @@ namespace platf::dxgi {
}
int
init(
std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
pix_fmt_e pix_fmt) {
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
@@ -615,8 +536,6 @@ namespace platf::dxgi {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
data = device.get();
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if (status) {
@@ -673,7 +592,13 @@ namespace platf::dxgi {
return -1;
}
color_matrix = make_buffer(device.get(), ::video::colors[0]);
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
return -1;
}
color_matrix = make_buffer(device.get(), *default_color_vectors);
if (!color_matrix) {
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
return -1;
@@ -721,7 +646,6 @@ namespace platf::dxgi {
return 0;
}
private:
struct encoder_img_ctx_t {
// Used to determine if the underlying texture changes.
// Not safe for actual use by the encoder!
@@ -789,9 +713,6 @@ namespace platf::dxgi {
return 0;
}
public:
frame_t hwframe;
::video::color_t *color_p;
buf_t info_scene;
@@ -805,9 +726,6 @@ namespace platf::dxgi {
render_target_t nv12_Y_rt;
render_target_t nv12_UV_rt;
// The image referenced by hwframe
texture2d_t hwframe_texture;
// d3d_img_t::id -> encoder_img_ctx_t
// These store the encoder textures for each img_t that passes through
// convert(). We can't store them in the img_t itself because it is shared
@@ -830,6 +748,149 @@ namespace platf::dxgi {
device_t device;
device_ctx_t device_ctx;
texture2d_t output_texture;
};
class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
public:
int
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
int result = base.init(display, adapter_p, pix_fmt);
data = base.device.get();
return result;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
void
apply_colorspace() override {
base.apply_colorspace(colorspace);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
return base.init_output(frame_texture, frame->width, frame->height);
}
private:
d3d_base_encode_device base;
frame_t hwframe;
};
class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
public:
bool
init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
return false;
}
if (base.init(display, adapter_p, pix_fmt)) return false;
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
nvenc = nvenc_d3d.get();
return true;
}
bool
init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override {
if (!nvenc_d3d) return false;
nvenc::nvenc_config nvenc_config;
nvenc_config.quality_preset = config::video.nv.nv_preset ? (*config::video.nv.nv_preset - 11) : 1;
nvenc_config.h264_cavlc = (config::video.nv.nv_coder == NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC);
auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace);
if (!nvenc_d3d->create_encoder(nvenc_config, client_config, nvenc_colorspace, buffer_format)) return false;
base.apply_colorspace(colorspace);
return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
private:
d3d_base_encode_device base;
std::unique_ptr<nvenc::nvenc_d3d11> nvenc_d3d;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
};
bool
@@ -1464,26 +1525,32 @@ namespace platf::dxgi {
};
}
std::shared_ptr<platf::hwdevice_t>
display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
std::unique_ptr<avcodec_encode_device_t>
display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
return nullptr;
}
auto hwdevice = std::make_shared<hwdevice_t>();
auto device = std::make_unique<d3d_avcodec_encode_device_t>();
auto ret = hwdevice->init(
shared_from_this(),
adapter.get(),
pix_fmt);
auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
if (ret) {
return nullptr;
}
return hwdevice;
return device;
}
std::unique_ptr<nvenc_encode_device_t>
display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) {
auto device = std::make_unique<d3d_nvenc_encode_device_t>();
if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
return nullptr;
}
return device;
}
int