Add standalone NVENC encoder

This commit is contained in:
ns6089
2023-04-25 16:38:37 +03:00
committed by Cameron Gutman
parent 7fe52bc5f8
commit 68fa43a61c
34 changed files with 2124 additions and 642 deletions

View File

@@ -88,7 +88,7 @@ namespace cuda {
return 0;
}
class cuda_t: public platf::hwdevice_t {
class cuda_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height) {
@@ -145,8 +145,8 @@ namespace cuda {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
auto tex = tex_t::make(height, width * 4);
if (!tex) {
@@ -223,19 +223,19 @@ namespace cuda {
}
};
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
if (init()) {
return nullptr;
}
std::shared_ptr<cuda_t> cuda;
std::unique_ptr<cuda_t> cuda;
if (vram) {
cuda = std::make_shared<cuda_vram_t>();
cuda = std::make_unique<cuda_vram_t>();
}
else {
cuda = std::make_shared<cuda_ram_t>();
cuda = std::make_unique<cuda_ram_t>();
}
if (cuda->init(width, height)) {
@@ -675,9 +675,9 @@ namespace cuda {
return platf::capture_e::ok;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
return ::cuda::make_hwdevice(width, height, true);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) {
return ::cuda::make_avcodec_encode_device(width, height, true);
}
std::shared_ptr<platf::img_t>

View File

@@ -56,12 +56,11 @@ public:
};
} // namespace platf
namespace video {
using __float4 = float[4];
using __float3 = float[3];
using __float2 = float[2];
// End special declarations
struct alignas(16) color_t {
namespace cuda {
struct alignas(16) cuda_color_t {
float4 color_vec_y;
float4 color_vec_u;
float4 color_vec_v;
@@ -69,22 +68,8 @@ struct alignas(16) color_t {
float2 range_uv;
};
struct alignas(16) color_extern_t {
__float4 color_vec_y;
__float4 color_vec_u;
__float4 color_vec_v;
__float2 range_y;
__float2 range_uv;
};
static_assert(sizeof(video::color_t) == sizeof(cuda::cuda_color_t), "color matrix struct mismatch");
static_assert(sizeof(video::color_t) == sizeof(video::color_extern_t), "color matrix struct mismatch");
extern color_t colors[6];
} // namespace video
// End special declarations
namespace cuda {
auto constexpr INVALID_TEXTURE = std::numeric_limits<cudaTextureObject_t>::max();
template<class T>
@@ -144,7 +129,7 @@ inline __device__ float3 bgra_to_rgb(float4 vec) {
return make_float3(vec.z, vec.y, vec.x);
}
inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float2 calcUV(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_u = color_matrix->color_vec_u;
float4 vec_v = color_matrix->color_vec_v;
@@ -157,7 +142,7 @@ inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_
return make_float2(u, v);
}
inline __device__ float calcY(float3 pixel, const video::color_t *const color_matrix) {
inline __device__ float calcY(float3 pixel, const cuda_color_t *const color_matrix) {
float4 vec_y = color_matrix->color_vec_y;
return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y;
@@ -166,7 +151,7 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
__global__ void RGBA_to_NV12(
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
float scale, const viewport_t viewport, const video::color_t *const color_matrix) {
float scale, const viewport_t viewport, const cuda_color_t *const color_matrix) {
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
int idY = (threadIdx.y + blockDim.y * blockIdx.y) * 2;
@@ -297,7 +282,7 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
auto ptr = make_ptr<video::color_t>();
auto ptr = make_ptr<cuda_color_t>();
if(!ptr) {
return std::nullopt;
}
@@ -316,32 +301,13 @@ int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std:
dim3 block(threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (cuda_color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
}
void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch(colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
color_p = &video::colors[0];
};
if(color_range > 1) {
// Full range
++color_p;
}
void sws_t::apply_colorspace(const video::sunshine_colorspace_t& colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
}

View File

@@ -6,6 +6,8 @@
#if defined(SUNSHINE_BUILD_CUDA)
#include "src/video_colorspace.h"
#include <cstdint>
#include <memory>
#include <optional>
@@ -13,7 +15,7 @@
#include <vector>
namespace platf {
class hwdevice_t;
class avcodec_encode_device_t;
class img_t;
} // namespace platf
@@ -23,8 +25,8 @@ namespace cuda {
std::vector<std::string>
display_names();
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
int
init();
} // namespace cuda
@@ -109,7 +111,7 @@ namespace cuda {
convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
int
load_ram(platf::img_t &img, cudaArray_t array);

View File

@@ -607,27 +607,8 @@ namespace egl {
}
void
sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
video::color_t *color_p;
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &video::colors[0];
};
if (color_range > 1) {
// Full range
++color_p;
}
sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
std::string_view members[] {
util::view(color_p->color_vec_y),
@@ -741,7 +722,7 @@ namespace egl {
gl::ctx.UseProgram(sws.program[1].handle());
gl::ctx.Uniform1fv(loc_width_i, 1, &width_i);
auto color_p = &video::colors[0];
auto color_p = video::color_vectors_from_colorspace(video::colorspace_e::rec601, false);
std::pair<const char *, std::string_view> members[] {
std::make_pair("color_vec_y", util::view(color_p->color_vec_y)),
std::make_pair("color_vec_u", util::view(color_p->color_vec_u)),

View File

@@ -14,6 +14,7 @@
#include "src/main.h"
#include "src/platform/common.h"
#include "src/utility.h"
#include "src/video_colorspace.h"
#define SUNSHINE_STRINGIFY_HELPER(x) #x
#define SUNSHINE_STRINGIFY(x) SUNSHINE_STRINGIFY_HELPER(x)
@@ -327,7 +328,7 @@ namespace egl {
load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture);
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
// The first texture is the monitor image.
// The second texture is the cursor image

View File

@@ -768,13 +768,13 @@ namespace platf {
return capture_e::ok;
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
capture_e
@@ -843,10 +843,10 @@ namespace platf {
display_vram_t(mem_type_e mem_type):
display_t(mem_type) {}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
return va::make_avcodec_encode_device(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
}
BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt);

View File

@@ -290,9 +290,9 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf);
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf);
class va_t: public platf::hwdevice_t {
class va_t: public platf::avcodec_encode_device_t {
public:
int
init(int in_width, int in_height, file_t &&render_device) {
@@ -304,7 +304,7 @@ namespace va {
return -1;
}
this->data = (void *) vaapi_make_hwdevice_ctx;
this->data = (void *) vaapi_init_avcodec_hardware_input_buffer;
gbm.reset(gbm::create_device(file.el));
if (!gbm) {
@@ -398,8 +398,8 @@ namespace va {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
apply_colorspace() override {
sws.apply_colorspace(colorspace);
}
va::display_t::pointer va_display;
@@ -526,7 +526,7 @@ namespace va {
}
int
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf) {
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *base, AVBufferRef **hw_device_buf) {
if (!va::initialize) {
BOOST_LOG(warning) << "libva not loaded"sv;
return -1;
@@ -653,10 +653,10 @@ namespace va {
return true;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
if (vram) {
auto egl = std::make_shared<va::va_vram_t>();
auto egl = std::make_unique<va::va_vram_t>();
if (egl->init(width, height, std::move(card), offset_x, offset_y)) {
return nullptr;
}
@@ -665,7 +665,7 @@ namespace va {
}
else {
auto egl = std::make_shared<va::va_ram_t>();
auto egl = std::make_unique<va::va_ram_t>();
if (egl->init(width, height, std::move(card))) {
return nullptr;
}
@@ -674,8 +674,8 @@ namespace va {
}
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram) {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram) {
auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str();
file_t file = open(render_device, O_RDWR);
@@ -686,11 +686,11 @@ namespace va {
return nullptr;
}
return make_hwdevice(width, height, std::move(file), offset_x, offset_y, vram);
return make_avcodec_encode_device(width, height, std::move(file), offset_x, offset_y, vram);
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram) {
return make_hwdevice(width, height, 0, 0, vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
return make_avcodec_encode_device(width, height, 0, 0, vram);
}
} // namespace va

View File

@@ -18,12 +18,12 @@ namespace va {
* offset_y --> Vertical offset of the image in the texture
* file_t card --> The file descriptor of the render device used for encoding
*/
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram);
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram);
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
// Ensure the render device pointed to by fd is capable of encoding h264 with the hevc_mode configured
bool

View File

@@ -215,13 +215,13 @@ namespace wl {
return 0;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
std::shared_ptr<platf::img_t>
@@ -323,13 +323,13 @@ namespace wl {
return img;
}
std::shared_ptr<platf::hwdevice_t>
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
if (mem_type == platf::mem_type_e::vaapi) {
return va::make_hwdevice(width, height, 0, 0, true);
return va::make_avcodec_encode_device(width, height, 0, 0, true);
}
return std::make_shared<platf::hwdevice_t>();
return std::make_unique<platf::avcodec_encode_device_t>();
}
int

View File

@@ -553,19 +553,19 @@ namespace platf {
return std::make_shared<x11_img_t>();
}
std::shared_ptr<hwdevice_t>
make_hwdevice(pix_fmt_e pix_fmt) override {
std::unique_ptr<avcodec_encode_device_t>
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
if (mem_type == mem_type_e::vaapi) {
return va::make_hwdevice(width, height, false);
return va::make_avcodec_encode_device(width, height, false);
}
#ifdef SUNSHINE_BUILD_CUDA
if (mem_type == mem_type_e::cuda) {
return cuda::make_hwdevice(width, height, false);
return cuda::make_avcodec_encode_device(width, height, false);
}
#endif
return std::make_shared<hwdevice_t>();
return std::make_unique<avcodec_encode_device_t>();
}
int