diff --git a/sunshine/platform/linux/cuda.cpp b/sunshine/platform/linux/cuda.cpp index ac6680b8..b15104a5 100644 --- a/sunshine/platform/linux/cuda.cpp +++ b/sunshine/platform/linux/cuda.cpp @@ -112,6 +112,8 @@ public: sws = std::move(*sws_opt); + linear_interpolation = width != frame->width || height != frame->height; + return 0; } @@ -140,20 +142,27 @@ public: return; } - sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture, { frame->width, frame->height, 0, 0 }); + sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, { frame->width, frame->height, 0, 0 }); + } + + cudaTextureObject_t tex_obj(const tex_t &tex) const { + return linear_interpolation ? tex.texture.linear : tex.texture.point; } frame_t hwframe; int width, height; + // When heigth and width don't change, it's not necessary to use linear interpolation + bool linear_interpolation; + sws_t sws; }; class cuda_ram_t : public cuda_t { public: int convert(platf::img_t &img) override { - return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex.texture); + return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex)); } int set_frame(AVFrame *frame) { @@ -177,7 +186,7 @@ public: class cuda_vram_t : public cuda_t { public: int convert(platf::img_t &img) override { - return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], (cudaTextureObject_t)img.data); + return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex)); } }; @@ -497,7 +506,7 @@ public: NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab { NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER, - NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY, + NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT, &device_ptr, &info, 0, @@ -551,7 +560,7 @@ public: NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab { NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER, - NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY, + NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT, &device_ptr, &info, (std::uint32_t)timeout.count(), @@ -580,6 +589,7 @@ public: std::shared_ptr alloc_img() override { auto img = std::make_shared(); + img->data = nullptr; img->width = width; img->height = height; img->pixel_pitch = 4; @@ -590,8 +600,7 @@ public: return nullptr; } - img->tex = std::move(*tex_opt); - img->data = (std::uint8_t *)img->tex.texture; + img->tex = std::move(*tex_opt); return img; }; diff --git a/sunshine/platform/linux/cuda.cu b/sunshine/platform/linux/cuda.cu index 7bfb5eab..49f088f8 100644 --- a/sunshine/platform/linux/cuda.cu +++ b/sunshine/platform/linux/cuda.cu @@ -160,7 +160,7 @@ __global__ void RGBA_to_NV12( float3 rgb_l = bgra_to_rgb(tex2D(srcImage, x, y)); float3 rgb_r = bgra_to_rgb(tex2D(srcImage, x + scale, y)); - float2 uv = calcUV((rgb_l + rgb_r) * 0.5f, color_matrix) * 255.0f; + float2 uv = calcUV((rgb_l + rgb_r) * 0.5f, color_matrix) * 256.0f; dstUV[0] = uv.x; dstUV[1] = uv.y; @@ -187,12 +187,16 @@ std::optional tex_t::make(int height, int pitch) { cudaTextureDesc desc {}; desc.readMode = cudaReadModeNormalizedFloat; - desc.filterMode = cudaFilterModeLinear; + desc.filterMode = cudaFilterModePoint; desc.normalizedCoords = false; std::fill_n(std::begin(desc.addressMode), 2, cudaAddressModeClamp); - CU_CHECK_OPT(cudaCreateTextureObject(&tex.texture, &res, &desc, nullptr), "Couldn't create cuda texture"); + CU_CHECK_OPT(cudaCreateTextureObject(&tex.texture.point, &res, &desc, nullptr), "Couldn't create cuda texture that uses point interpolation"); + + desc.filterMode = cudaFilterModeLinear; + + CU_CHECK_OPT(cudaCreateTextureObject(&tex.texture.linear, &res, &desc, nullptr), "Couldn't create cuda texture that uses linear interpolation"); return std::move(tex); } @@ -200,7 +204,8 @@ std::optional tex_t::make(int height, int pitch) { tex_t::tex_t() : array {}, texture { INVALID_TEXTURE } {} tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } { other.array = 0; - other.texture = INVALID_TEXTURE; + other.texture.point = INVALID_TEXTURE; + other.texture.linear = INVALID_TEXTURE; } tex_t &tex_t::operator=(tex_t &&other) { @@ -211,10 +216,16 @@ tex_t &tex_t::operator=(tex_t &&other) { } tex_t::~tex_t() { - if(texture != INVALID_TEXTURE) { - CU_CHECK_IGNORE(cudaDestroyTextureObject(texture), "Couldn't deallocate cuda texture"); + if(texture.point != INVALID_TEXTURE) { + CU_CHECK_IGNORE(cudaDestroyTextureObject(texture.point), "Couldn't deallocate cuda texture that uses point interpolation"); - texture = INVALID_TEXTURE; + texture.point = INVALID_TEXTURE; + } + + if(texture.linear != INVALID_TEXTURE) { + CU_CHECK_IGNORE(cudaDestroyTextureObject(texture.linear), "Couldn't deallocate cuda texture that uses linear interpolation"); + + texture.linear = INVALID_TEXTURE; } if(array) { diff --git a/sunshine/platform/linux/cuda.h b/sunshine/platform/linux/cuda.h index d55ab8d0..5811379f 100644 --- a/sunshine/platform/linux/cuda.h +++ b/sunshine/platform/linux/cuda.h @@ -54,7 +54,11 @@ public: int copy(std::uint8_t *src, int height, int pitch); cudaArray_t array; - cudaTextureObject_t texture; + + struct texture { + cudaTextureObject_t point; + cudaTextureObject_t linear; + } texture; }; class sws_t {