Add standalone NVENC encoder
This commit is contained in:
@@ -88,7 +88,7 @@ namespace cuda {
|
||||
return 0;
|
||||
}
|
||||
|
||||
class cuda_t: public platf::hwdevice_t {
|
||||
class cuda_t: public platf::avcodec_encode_device_t {
|
||||
public:
|
||||
int
|
||||
init(int in_width, int in_height) {
|
||||
@@ -145,8 +145,8 @@ namespace cuda {
|
||||
}
|
||||
|
||||
void
|
||||
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
|
||||
sws.set_colorspace(colorspace, color_range);
|
||||
apply_colorspace() override {
|
||||
sws.apply_colorspace(colorspace);
|
||||
|
||||
auto tex = tex_t::make(height, width * 4);
|
||||
if (!tex) {
|
||||
@@ -223,19 +223,19 @@ namespace cuda {
|
||||
}
|
||||
};
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, bool vram) {
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, bool vram) {
|
||||
if (init()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<cuda_t> cuda;
|
||||
std::unique_ptr<cuda_t> cuda;
|
||||
|
||||
if (vram) {
|
||||
cuda = std::make_shared<cuda_vram_t>();
|
||||
cuda = std::make_unique<cuda_vram_t>();
|
||||
}
|
||||
else {
|
||||
cuda = std::make_shared<cuda_ram_t>();
|
||||
cuda = std::make_unique<cuda_ram_t>();
|
||||
}
|
||||
|
||||
if (cuda->init(width, height)) {
|
||||
@@ -675,9 +675,9 @@ namespace cuda {
|
||||
return platf::capture_e::ok;
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
|
||||
return ::cuda::make_hwdevice(width, height, true);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) {
|
||||
return ::cuda::make_avcodec_encode_device(width, height, true);
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::img_t>
|
||||
|
||||
@@ -56,12 +56,11 @@ public:
|
||||
};
|
||||
} // namespace platf
|
||||
|
||||
namespace video {
|
||||
using __float4 = float[4];
|
||||
using __float3 = float[3];
|
||||
using __float2 = float[2];
|
||||
// End special declarations
|
||||
|
||||
struct alignas(16) color_t {
|
||||
namespace cuda {
|
||||
|
||||
struct alignas(16) cuda_color_t {
|
||||
float4 color_vec_y;
|
||||
float4 color_vec_u;
|
||||
float4 color_vec_v;
|
||||
@@ -69,22 +68,8 @@ struct alignas(16) color_t {
|
||||
float2 range_uv;
|
||||
};
|
||||
|
||||
struct alignas(16) color_extern_t {
|
||||
__float4 color_vec_y;
|
||||
__float4 color_vec_u;
|
||||
__float4 color_vec_v;
|
||||
__float2 range_y;
|
||||
__float2 range_uv;
|
||||
};
|
||||
static_assert(sizeof(video::color_t) == sizeof(cuda::cuda_color_t), "color matrix struct mismatch");
|
||||
|
||||
static_assert(sizeof(video::color_t) == sizeof(video::color_extern_t), "color matrix struct mismatch");
|
||||
|
||||
extern color_t colors[6];
|
||||
} // namespace video
|
||||
|
||||
// End special declarations
|
||||
|
||||
namespace cuda {
|
||||
auto constexpr INVALID_TEXTURE = std::numeric_limits<cudaTextureObject_t>::max();
|
||||
|
||||
template<class T>
|
||||
@@ -144,7 +129,7 @@ inline __device__ float3 bgra_to_rgb(float4 vec) {
|
||||
return make_float3(vec.z, vec.y, vec.x);
|
||||
}
|
||||
|
||||
inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_matrix) {
|
||||
inline __device__ float2 calcUV(float3 pixel, const cuda_color_t *const color_matrix) {
|
||||
float4 vec_u = color_matrix->color_vec_u;
|
||||
float4 vec_v = color_matrix->color_vec_v;
|
||||
|
||||
@@ -157,7 +142,7 @@ inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_
|
||||
return make_float2(u, v);
|
||||
}
|
||||
|
||||
inline __device__ float calcY(float3 pixel, const video::color_t *const color_matrix) {
|
||||
inline __device__ float calcY(float3 pixel, const cuda_color_t *const color_matrix) {
|
||||
float4 vec_y = color_matrix->color_vec_y;
|
||||
|
||||
return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y;
|
||||
@@ -166,7 +151,7 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
|
||||
__global__ void RGBA_to_NV12(
|
||||
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
|
||||
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
|
||||
float scale, const viewport_t viewport, const video::color_t *const color_matrix) {
|
||||
float scale, const viewport_t viewport, const cuda_color_t *const color_matrix) {
|
||||
|
||||
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
|
||||
int idY = (threadIdx.y + blockDim.y * blockIdx.y) * 2;
|
||||
@@ -297,7 +282,7 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
|
||||
CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
|
||||
CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
|
||||
|
||||
auto ptr = make_ptr<video::color_t>();
|
||||
auto ptr = make_ptr<cuda_color_t>();
|
||||
if(!ptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
@@ -316,32 +301,13 @@ int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std:
|
||||
dim3 block(threadsPerBlock);
|
||||
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
|
||||
|
||||
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
|
||||
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (cuda_color_t *)color_matrix.get());
|
||||
|
||||
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
|
||||
}
|
||||
|
||||
void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
|
||||
video::color_t *color_p;
|
||||
switch(colorspace) {
|
||||
case 5: // SWS_CS_SMPTE170M
|
||||
color_p = &video::colors[0];
|
||||
break;
|
||||
case 1: // SWS_CS_ITU709
|
||||
color_p = &video::colors[2];
|
||||
break;
|
||||
case 9: // SWS_CS_BT2020
|
||||
color_p = &video::colors[4];
|
||||
break;
|
||||
default:
|
||||
color_p = &video::colors[0];
|
||||
};
|
||||
|
||||
if(color_range > 1) {
|
||||
// Full range
|
||||
++color_p;
|
||||
}
|
||||
|
||||
void sws_t::apply_colorspace(const video::sunshine_colorspace_t& colorspace) {
|
||||
auto color_p = video::color_vectors_from_colorspace(colorspace);
|
||||
CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#if defined(SUNSHINE_BUILD_CUDA)
|
||||
|
||||
#include "src/video_colorspace.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
@@ -13,7 +15,7 @@
|
||||
#include <vector>
|
||||
|
||||
namespace platf {
|
||||
class hwdevice_t;
|
||||
class avcodec_encode_device_t;
|
||||
class img_t;
|
||||
} // namespace platf
|
||||
|
||||
@@ -23,8 +25,8 @@ namespace cuda {
|
||||
std::vector<std::string>
|
||||
display_names();
|
||||
}
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, bool vram);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, bool vram);
|
||||
int
|
||||
init();
|
||||
} // namespace cuda
|
||||
@@ -109,7 +111,7 @@ namespace cuda {
|
||||
convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
|
||||
|
||||
void
|
||||
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
|
||||
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
|
||||
|
||||
int
|
||||
load_ram(platf::img_t &img, cudaArray_t array);
|
||||
|
||||
@@ -607,27 +607,8 @@ namespace egl {
|
||||
}
|
||||
|
||||
void
|
||||
sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
|
||||
video::color_t *color_p;
|
||||
switch (colorspace) {
|
||||
case 5: // SWS_CS_SMPTE170M
|
||||
color_p = &video::colors[0];
|
||||
break;
|
||||
case 1: // SWS_CS_ITU709
|
||||
color_p = &video::colors[2];
|
||||
break;
|
||||
case 9: // SWS_CS_BT2020
|
||||
color_p = &video::colors[4];
|
||||
break;
|
||||
default:
|
||||
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
|
||||
color_p = &video::colors[0];
|
||||
};
|
||||
|
||||
if (color_range > 1) {
|
||||
// Full range
|
||||
++color_p;
|
||||
}
|
||||
sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
|
||||
auto color_p = video::color_vectors_from_colorspace(colorspace);
|
||||
|
||||
std::string_view members[] {
|
||||
util::view(color_p->color_vec_y),
|
||||
@@ -741,7 +722,7 @@ namespace egl {
|
||||
gl::ctx.UseProgram(sws.program[1].handle());
|
||||
gl::ctx.Uniform1fv(loc_width_i, 1, &width_i);
|
||||
|
||||
auto color_p = &video::colors[0];
|
||||
auto color_p = video::color_vectors_from_colorspace(video::colorspace_e::rec601, false);
|
||||
std::pair<const char *, std::string_view> members[] {
|
||||
std::make_pair("color_vec_y", util::view(color_p->color_vec_y)),
|
||||
std::make_pair("color_vec_u", util::view(color_p->color_vec_u)),
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "src/main.h"
|
||||
#include "src/platform/common.h"
|
||||
#include "src/utility.h"
|
||||
#include "src/video_colorspace.h"
|
||||
|
||||
#define SUNSHINE_STRINGIFY_HELPER(x) #x
|
||||
#define SUNSHINE_STRINGIFY(x) SUNSHINE_STRINGIFY_HELPER(x)
|
||||
@@ -327,7 +328,7 @@ namespace egl {
|
||||
load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture);
|
||||
|
||||
void
|
||||
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
|
||||
apply_colorspace(const video::sunshine_colorspace_t &colorspace);
|
||||
|
||||
// The first texture is the monitor image.
|
||||
// The second texture is the cursor image
|
||||
|
||||
@@ -768,13 +768,13 @@ namespace platf {
|
||||
return capture_e::ok;
|
||||
}
|
||||
|
||||
std::shared_ptr<hwdevice_t>
|
||||
make_hwdevice(pix_fmt_e pix_fmt) override {
|
||||
std::unique_ptr<avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
|
||||
if (mem_type == mem_type_e::vaapi) {
|
||||
return va::make_hwdevice(width, height, false);
|
||||
return va::make_avcodec_encode_device(width, height, false);
|
||||
}
|
||||
|
||||
return std::make_shared<hwdevice_t>();
|
||||
return std::make_unique<avcodec_encode_device_t>();
|
||||
}
|
||||
|
||||
capture_e
|
||||
@@ -843,10 +843,10 @@ namespace platf {
|
||||
display_vram_t(mem_type_e mem_type):
|
||||
display_t(mem_type) {}
|
||||
|
||||
std::shared_ptr<hwdevice_t>
|
||||
make_hwdevice(pix_fmt_e pix_fmt) override {
|
||||
std::unique_ptr<avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
|
||||
if (mem_type == mem_type_e::vaapi) {
|
||||
return va::make_hwdevice(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
|
||||
return va::make_avcodec_encode_device(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
|
||||
}
|
||||
|
||||
BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt);
|
||||
|
||||
@@ -290,9 +290,9 @@ namespace va {
|
||||
}
|
||||
|
||||
int
|
||||
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf);
|
||||
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf);
|
||||
|
||||
class va_t: public platf::hwdevice_t {
|
||||
class va_t: public platf::avcodec_encode_device_t {
|
||||
public:
|
||||
int
|
||||
init(int in_width, int in_height, file_t &&render_device) {
|
||||
@@ -304,7 +304,7 @@ namespace va {
|
||||
return -1;
|
||||
}
|
||||
|
||||
this->data = (void *) vaapi_make_hwdevice_ctx;
|
||||
this->data = (void *) vaapi_init_avcodec_hardware_input_buffer;
|
||||
|
||||
gbm.reset(gbm::create_device(file.el));
|
||||
if (!gbm) {
|
||||
@@ -398,8 +398,8 @@ namespace va {
|
||||
}
|
||||
|
||||
void
|
||||
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
|
||||
sws.set_colorspace(colorspace, color_range);
|
||||
apply_colorspace() override {
|
||||
sws.apply_colorspace(colorspace);
|
||||
}
|
||||
|
||||
va::display_t::pointer va_display;
|
||||
@@ -526,7 +526,7 @@ namespace va {
|
||||
}
|
||||
|
||||
int
|
||||
vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf) {
|
||||
vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *base, AVBufferRef **hw_device_buf) {
|
||||
if (!va::initialize) {
|
||||
BOOST_LOG(warning) << "libva not loaded"sv;
|
||||
return -1;
|
||||
@@ -653,10 +653,10 @@ namespace va {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
|
||||
if (vram) {
|
||||
auto egl = std::make_shared<va::va_vram_t>();
|
||||
auto egl = std::make_unique<va::va_vram_t>();
|
||||
if (egl->init(width, height, std::move(card), offset_x, offset_y)) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -665,7 +665,7 @@ namespace va {
|
||||
}
|
||||
|
||||
else {
|
||||
auto egl = std::make_shared<va::va_ram_t>();
|
||||
auto egl = std::make_unique<va::va_ram_t>();
|
||||
if (egl->init(width, height, std::move(card))) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -674,8 +674,8 @@ namespace va {
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram) {
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram) {
|
||||
auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str();
|
||||
|
||||
file_t file = open(render_device, O_RDWR);
|
||||
@@ -686,11 +686,11 @@ namespace va {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return make_hwdevice(width, height, std::move(file), offset_x, offset_y, vram);
|
||||
return make_avcodec_encode_device(width, height, std::move(file), offset_x, offset_y, vram);
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, bool vram) {
|
||||
return make_hwdevice(width, height, 0, 0, vram);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, bool vram) {
|
||||
return make_avcodec_encode_device(width, height, 0, 0, vram);
|
||||
}
|
||||
} // namespace va
|
||||
|
||||
@@ -18,12 +18,12 @@ namespace va {
|
||||
* offset_y --> Vertical offset of the image in the texture
|
||||
* file_t card --> The file descriptor of the render device used for encoding
|
||||
*/
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, bool vram);
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram);
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, bool vram);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram);
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
|
||||
|
||||
// Ensure the render device pointed to by fd is capable of encoding h264 with the hevc_mode configured
|
||||
bool
|
||||
|
||||
@@ -215,13 +215,13 @@ namespace wl {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
|
||||
if (mem_type == platf::mem_type_e::vaapi) {
|
||||
return va::make_hwdevice(width, height, false);
|
||||
return va::make_avcodec_encode_device(width, height, false);
|
||||
}
|
||||
|
||||
return std::make_shared<platf::hwdevice_t>();
|
||||
return std::make_unique<platf::avcodec_encode_device_t>();
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::img_t>
|
||||
@@ -323,13 +323,13 @@ namespace wl {
|
||||
return img;
|
||||
}
|
||||
|
||||
std::shared_ptr<platf::hwdevice_t>
|
||||
make_hwdevice(platf::pix_fmt_e pix_fmt) override {
|
||||
std::unique_ptr<platf::avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
|
||||
if (mem_type == platf::mem_type_e::vaapi) {
|
||||
return va::make_hwdevice(width, height, 0, 0, true);
|
||||
return va::make_avcodec_encode_device(width, height, 0, 0, true);
|
||||
}
|
||||
|
||||
return std::make_shared<platf::hwdevice_t>();
|
||||
return std::make_unique<platf::avcodec_encode_device_t>();
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -553,19 +553,19 @@ namespace platf {
|
||||
return std::make_shared<x11_img_t>();
|
||||
}
|
||||
|
||||
std::shared_ptr<hwdevice_t>
|
||||
make_hwdevice(pix_fmt_e pix_fmt) override {
|
||||
std::unique_ptr<avcodec_encode_device_t>
|
||||
make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
|
||||
if (mem_type == mem_type_e::vaapi) {
|
||||
return va::make_hwdevice(width, height, false);
|
||||
return va::make_avcodec_encode_device(width, height, false);
|
||||
}
|
||||
|
||||
#ifdef SUNSHINE_BUILD_CUDA
|
||||
if (mem_type == mem_type_e::cuda) {
|
||||
return cuda::make_hwdevice(width, height, false);
|
||||
return cuda::make_avcodec_encode_device(width, height, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
return std::make_shared<hwdevice_t>();
|
||||
return std::make_unique<avcodec_encode_device_t>();
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
Reference in New Issue
Block a user