Resolve merge conflicts

This commit is contained in:
loki-47-6F-64
2021-10-01 11:30:12 +02:00
10 changed files with 144 additions and 47 deletions
+2 -2
View File
@@ -20,11 +20,11 @@ Install the following:
#### Common #### Common
``` ```
sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev libevdev-dev
``` ```
#### X11 #### X11
``` ```
sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libevdev-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev
``` ```
#### KMS #### KMS
+1 -1
View File
@@ -38,7 +38,7 @@ Architecture: amd64
Maintainer: @loki Maintainer: @loki
Priority: optional Priority: optional
Version: 0.11.0 Version: 0.11.0
Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0, libboost-log1.67.0 | libboost-log1.71.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2 Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0 | libboost-thread1.74.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0 | libboost-filesystem1.74.0, libboost-log1.67.0 | libboost-log1.71.0 | libboost-log1.74.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2
Description: Gamestream host for Moonlight Description: Gamestream host for Moonlight
EOF EOF
+29 -6
View File
@@ -99,16 +99,22 @@ enum quality_e : int {
_default = 0, _default = 0,
speed, speed,
balanced, balanced,
//quality2,
}; };
enum rc_e : int { enum class rc_hevc_e : int {
constqp, /**< Constant QP mode */ constqp, /**< Constant QP mode */
vbr_latency, /**< Latency Constrained Variable Bitrate */ vbr_latency, /**< Latency Constrained Variable Bitrate */
vbr_peak, /**< Peak Contrained Variable Bitrate */ vbr_peak, /**< Peak Contrained Variable Bitrate */
cbr, /**< Constant bitrate mode */ cbr, /**< Constant bitrate mode */
}; };
enum class rc_h264_e : int {
constqp, /**< Constant QP mode */
cbr, /**< Constant bitrate mode */
vbr_peak, /**< Peak Contrained Variable Bitrate */
vbr_latency, /**< Latency Constrained Variable Bitrate */
};
enum coder_e : int { enum coder_e : int {
_auto = 0, _auto = 0,
cabac, cabac,
@@ -120,15 +126,25 @@ std::optional<quality_e> quality_from_view(const std::string_view &quality) {
if(quality == #x##sv) return x if(quality == #x##sv) return x
_CONVERT_(speed); _CONVERT_(speed);
_CONVERT_(balanced); _CONVERT_(balanced);
//_CONVERT_(quality2);
if(quality == "default"sv) return _default; if(quality == "default"sv) return _default;
#undef _CONVERT_ #undef _CONVERT_
return std::nullopt; return std::nullopt;
} }
std::optional<rc_e> rc_from_view(const std::string_view &rc) { std::optional<int> rc_h264_from_view(const std::string_view &rc) {
#define _CONVERT_(x) \ #define _CONVERT_(x) \
if(rc == #x##sv) return x if(rc == #x##sv) return (int)rc_h264_e::x
_CONVERT_(constqp);
_CONVERT_(vbr_latency);
_CONVERT_(vbr_peak);
_CONVERT_(cbr);
#undef _CONVERT_
return std::nullopt;
}
std::optional<int> rc_hevc_from_view(const std::string_view &rc) {
#define _CONVERT_(x) \
if(rc == #x##sv) return (int)rc_hevc_e::x
_CONVERT_(constqp); _CONVERT_(constqp);
_CONVERT_(vbr_latency); _CONVERT_(vbr_latency);
_CONVERT_(vbr_peak); _CONVERT_(vbr_peak);
@@ -165,6 +181,7 @@ video_t video {
{ {
amd::balanced, amd::balanced,
std::nullopt, std::nullopt,
std::nullopt,
-1 }, // amd -1 }, // amd
{}, // encoder {}, // encoder
@@ -659,8 +676,14 @@ void apply_config(std::unordered_map<std::string, std::string> &&vars) {
int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view); int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view);
int_f(vars, "amd_quality", video.amd.quality, amd::quality_from_view); int_f(vars, "amd_quality", video.amd.quality, amd::quality_from_view);
int_f(vars, "amd_rc", video.amd.rc, amd::rc_from_view);
std::string rc;
string_f(vars, "amd_rc", rc);
int_f(vars, "amd_coder", video.amd.coder, amd::coder_from_view); int_f(vars, "amd_coder", video.amd.coder, amd::coder_from_view);
if(!rc.empty()) {
video.amd.rc_h264 = amd::rc_h264_from_view(rc);
video.amd.rc_hevc = amd::rc_hevc_from_view(rc);
}
string_f(vars, "encoder", video.encoder); string_f(vars, "encoder", video.encoder);
string_f(vars, "adapter_name", video.adapter_name); string_f(vars, "adapter_name", video.adapter_name);
+2 -1
View File
@@ -29,7 +29,8 @@ struct video_t {
struct { struct {
std::optional<int> quality; std::optional<int> quality;
std::optional<int> rc; std::optional<int> rc_h264;
std::optional<int> rc_hevc;
int coder; int coder;
} amd; } amd;
+56 -14
View File
@@ -56,6 +56,10 @@ inline static int check(CUresult result, const std::string_view &sv) {
return 0; return 0;
} }
void freeStream(CUstream stream) {
CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream");
}
class img_t : public platf::img_t { class img_t : public platf::img_t {
public: public:
tex_t tex; tex_t tex;
@@ -94,7 +98,8 @@ public:
this->hwframe.reset(frame); this->hwframe.reset(frame);
this->frame = frame; this->frame = frame;
if(((AVHWFramesContext *)frame->hw_frames_ctx->data)->sw_format != AV_PIX_FMT_NV12) { auto hwframe_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
if(hwframe_ctx->sw_format != AV_PIX_FMT_NV12) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv; BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv;
return -1; return -1;
} }
@@ -105,6 +110,15 @@ public:
return -1; return -1;
} }
auto cuda_ctx = (AVCUDADeviceContext *)hwframe_ctx->device_ctx->hwctx;
stream = make_stream();
if(!stream) {
return -1;
}
cuda_ctx->stream = stream.get();
auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4); auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4);
if(!sws_opt) { if(!sws_opt) {
return -1; return -1;
@@ -142,13 +156,14 @@ public:
return; return;
} }
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, { frame->width, frame->height, 0, 0 }); sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), { frame->width, frame->height, 0, 0 });
} }
cudaTextureObject_t tex_obj(const tex_t &tex) const { cudaTextureObject_t tex_obj(const tex_t &tex) const {
return linear_interpolation ? tex.texture.linear : tex.texture.point; return linear_interpolation ? tex.texture.linear : tex.texture.point;
} }
stream_t stream;
frame_t hwframe; frame_t hwframe;
int width, height; int width, height;
@@ -162,7 +177,7 @@ public:
class cuda_ram_t : public cuda_t { class cuda_ram_t : public cuda_t {
public: public:
int convert(platf::img_t &img) override { int convert(platf::img_t &img) override {
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex)); return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
} }
int set_frame(AVFrame *frame) { int set_frame(AVFrame *frame) {
@@ -186,7 +201,7 @@ public:
class cuda_vram_t : public cuda_t { class cuda_vram_t : public cuda_t {
public: public:
int convert(platf::img_t &img) override { int convert(platf::img_t &img) override {
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex)); return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex), stream.get());
} }
}; };
@@ -256,6 +271,28 @@ int init() {
return 0; return 0;
} }
class ctx_t {
public:
ctx_t(NVFBC_SESSION_HANDLE handle) {
NVFBC_BIND_CONTEXT_PARAMS params { NVFBC_BIND_CONTEXT_PARAMS_VER };
if(func.nvFBCBindContext(handle, &params)) {
BOOST_LOG(error) << "Couldn't bind NvFBC context to current thread: " << func.nvFBCGetLastErrorStr(handle);
}
this->handle = handle;
}
~ctx_t() {
NVFBC_RELEASE_CONTEXT_PARAMS params { NVFBC_RELEASE_CONTEXT_PARAMS_VER };
if(func.nvFBCReleaseContext(handle, &params)) {
BOOST_LOG(error) << "Couldn't release NvFBC context from current thread: " << func.nvFBCGetLastErrorStr(handle);
}
}
NVFBC_SESSION_HANDLE handle;
};
class handle_t { class handle_t {
enum flag_e { enum flag_e {
SESSION_HANDLE, SESSION_HANDLE,
@@ -347,24 +384,26 @@ public:
return 0; return 0;
} }
~handle_t() { int reset() {
if(!handle_flags[SESSION_HANDLE]) { if(!handle_flags[SESSION_HANDLE]) {
return; return 0;
} }
if(handle_flags[SESSION_CAPTURE]) { stop();
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER };
if(func.nvFBCDestroyCaptureSession(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle);
}
}
NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER }; NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
if(func.nvFBCDestroyHandle(handle, &params)) { if(func.nvFBCDestroyHandle(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle); BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle);
} }
handle_flags[SESSION_HANDLE] = false;
return 0;
}
~handle_t() {
reset();
} }
std::bitset<MAX_FLAGS> handle_flags; std::bitset<MAX_FLAGS> handle_flags;
@@ -380,6 +419,8 @@ public:
return -1; return -1;
} }
ctx_t ctx { handle->handle };
auto status_params = handle->status(); auto status_params = handle->status();
if(!status_params) { if(!status_params) {
return -1; return -1;
@@ -442,8 +483,9 @@ public:
// Force display_t::capture to initialize handle_t::capture // Force display_t::capture to initialize handle_t::capture
cursor_visible = !*cursor; cursor_visible = !*cursor;
ctx_t ctx { handle.handle };
auto fg = util::fail_guard([&]() { auto fg = util::fail_guard([&]() {
handle.stop(); handle.reset();
}); });
while(img) { while(img) {
+21 -4
View File
@@ -110,6 +110,23 @@ void freeCudaPtr_t::operator()(void *ptr) {
CU_CHECK_IGNORE(cudaFree(ptr), "Couldn't free cuda device pointer"); CU_CHECK_IGNORE(cudaFree(ptr), "Couldn't free cuda device pointer");
} }
void freeCudaStream_t::operator()(cudaStream_t ptr) {
CU_CHECK_IGNORE(cudaStreamDestroy(ptr), "Couldn't free cuda stream");
}
stream_t make_stream(int flags) {
cudaStream_t stream;
if(!flags) {
CU_CHECK_PTR(cudaStreamCreate(&stream), "Couldn't create cuda stream");
}
else {
CU_CHECK_PTR(cudaStreamCreateWithFlags(&stream, flags), "Couldn't create cuda stream with flags");
}
return stream_t { stream };
}
inline __device__ float3 bgra_to_rgb(uchar4 vec) { inline __device__ float3 bgra_to_rgb(uchar4 vec) {
return make_float3((float)vec.z, (float)vec.y, (float)vec.x); return make_float3((float)vec.z, (float)vec.y, (float)vec.x);
} }
@@ -269,18 +286,18 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
return std::make_optional<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor, std::move(ptr)); return std::make_optional<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor, std::move(ptr));
} }
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) { int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream) {
return convert(Y, UV, pitchY, pitchUV, texture, viewport); return convert(Y, UV, pitchY, pitchUV, texture, stream, viewport);
} }
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) { int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport) {
int threadsX = viewport.width / 2; int threadsX = viewport.width / 2;
int threadsY = viewport.height; int threadsY = viewport.height;
dim3 block(threadsPerBlock); dim3 block(threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY); dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
RGBA_to_NV12<<<grid, block>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get()); RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed"); return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
} }
+18 -6
View File
@@ -1,16 +1,18 @@
#if !defined(SUNSHINE_PLATFORM_CUDA_H) && defined(SUNSHINE_BUILD_CUDA) #if !defined(SUNSHINE_PLATFORM_CUDA_H) && defined(SUNSHINE_BUILD_CUDA)
#define SUNSHINE_PLATFORM_CUDA_H #define SUNSHINE_PLATFORM_CUDA_H
#include <vector>
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <string>
#include <vector>
namespace platf { namespace platf {
class hwdevice_t; class hwdevice_t;
class img_t; class img_t;
} } // namespace platf
namespace cuda { namespace cuda {
namespace nvfbc { namespace nvfbc {
std::vector<std::string> display_names(); std::vector<std::string> display_names();
} }
@@ -21,8 +23,10 @@ int init();
typedef struct cudaArray *cudaArray_t; typedef struct cudaArray *cudaArray_t;
#if !defined(__CUDACC__) #if !defined(__CUDACC__)
typedef struct CUstream_st *cudaStream_t;
typedef unsigned long long cudaTextureObject_t; typedef unsigned long long cudaTextureObject_t;
#else /* defined(__CUDACC__) */ #else /* defined(__CUDACC__) */
typedef __location__(device_builtin) struct CUstream_st *cudaStream_t;
typedef __location__(device_builtin) unsigned long long cudaTextureObject_t; typedef __location__(device_builtin) unsigned long long cudaTextureObject_t;
#endif /* !defined(__CUDACC__) */ #endif /* !defined(__CUDACC__) */
@@ -33,7 +37,15 @@ public:
void operator()(void *ptr); void operator()(void *ptr);
}; };
class freeCudaStream_t {
public:
void operator()(cudaStream_t ptr);
};
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>; using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
using stream_t = std::unique_ptr<CUstream_st, freeCudaStream_t>;
stream_t make_stream(int flags = 0);
struct viewport_t { struct viewport_t {
int width, height; int width, height;
@@ -75,8 +87,8 @@ public:
static std::optional<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch); static std::optional<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
// Converts loaded image into a CUDevicePtr // Converts loaded image into a CUDevicePtr
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture); int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport); int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range); void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
+8 -2
View File
@@ -696,7 +696,13 @@ public:
}; };
inline void rumbleIterate(std::vector<effect_t> &effects, std::vector<pollfd_t> &polls, std::chrono::milliseconds to) { inline void rumbleIterate(std::vector<effect_t> &effects, std::vector<pollfd_t> &polls, std::chrono::milliseconds to) {
auto res = poll(&polls.data()->el, polls.size(), to.count()); std::vector<pollfd> polls_tmp;
polls_tmp.reserve(polls.size());
for(auto &poll : polls) {
polls_tmp.emplace_back(poll.el);
}
auto res = poll(polls_tmp.data(), polls.size(), to.count());
// If timed out // If timed out
if(!res) { if(!res) {
@@ -871,7 +877,7 @@ void broadcastRumble(safe::queue_t<mail_evdev_t> &rumble_queue_queue) {
} }
if(polls.empty()) { if(polls.empty()) {
std::this_thread::sleep_for(50ms); std::this_thread::sleep_for(250ms);
} }
else { else {
rumbleIterate(effects, polls, 100ms); rumbleIterate(effects, polls, 100ms);
-4
View File
@@ -264,13 +264,11 @@ std::unique_ptr<deinit_t> init() {
#endif #endif
#ifdef SUNSHINE_BUILD_CUDA #ifdef SUNSHINE_BUILD_CUDA
if(verify_nvfbc()) { if(verify_nvfbc()) {
BOOST_LOG(info) << "Using NvFBC for screencasting"sv;
sources[source::NVFBC] = true; sources[source::NVFBC] = true;
} }
#endif #endif
#ifdef SUNSHINE_BUILD_WAYLAND #ifdef SUNSHINE_BUILD_WAYLAND
if(verify_wl()) { if(verify_wl()) {
BOOST_LOG(info) << "Using Wayland for screencasting"sv;
sources[source::WAYLAND] = true; sources[source::WAYLAND] = true;
} }
#endif #endif
@@ -282,13 +280,11 @@ std::unique_ptr<deinit_t> init() {
display_cursor = false; display_cursor = false;
} }
BOOST_LOG(info) << "Using KMS for screencasting"sv;
sources[source::KMS] = true; sources[source::KMS] = true;
} }
#endif #endif
#ifdef SUNSHINE_BUILD_X11 #ifdef SUNSHINE_BUILD_X11
if(verify_x11()) { if(verify_x11()) {
BOOST_LOG(info) << "Using X11 for screencasting"sv;
sources[source::X11] = true; sources[source::X11] = true;
} }
#endif #endif
+4 -4
View File
@@ -456,19 +456,19 @@ static encoder_t amdvce {
{ "gops_per_idr"s, 30 }, { "gops_per_idr"s, 30 },
{ "usage"s, "ultralowlatency"s }, { "usage"s, "ultralowlatency"s },
{ "quality"s, &config::video.amd.quality }, { "quality"s, &config::video.amd.quality },
{ "rc"s, &config::video.amd.rc }, { "rc"s, &config::video.amd.rc_hevc },
}, },
std::make_optional<encoder_t::option_t>({ "qp"s, &config::video.qp }), std::make_optional<encoder_t::option_t>({ "qp_p"s, &config::video.qp }),
"hevc_amf"s, "hevc_amf"s,
}, },
{ {
{ {
{ "usage"s, "ultralowlatency"s }, { "usage"s, "ultralowlatency"s },
{ "quality"s, &config::video.amd.quality }, { "quality"s, &config::video.amd.quality },
{ "rc"s, &config::video.amd.rc }, { "rc"s, &config::video.amd.rc_h264 },
{ "log_to_dbg"s, "1"s }, { "log_to_dbg"s, "1"s },
}, },
std::make_optional<encoder_t::option_t>({ "qp"s, &config::video.qp }), std::make_optional<encoder_t::option_t>({ "qp_p"s, &config::video.qp }),
"h264_amf"s, "h264_amf"s,
}, },
DEFAULT, DEFAULT,