From f9c885a414f92d8277337e2fd1283110a0e376bb Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Fri, 9 Aug 2024 23:29:17 +0300 Subject: [PATCH 01/12] fix(linux/audio): don't set pulseaudio buffer size (#2999) --- src/platform/linux/audio.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp index 32aa36a6..ff231707 100644 --- a/src/platform/linux/audio.cpp +++ b/src/platform/linux/audio.cpp @@ -81,9 +81,13 @@ namespace platf { channel = position_mapping[*mapping++]; }); - pa_buffer_attr pa_attr = {}; - pa_attr.fragsize = frame_size * channels * sizeof(float); - pa_attr.maxlength = pa_attr.fragsize * 2; + pa_buffer_attr pa_attr = { + .maxlength = uint32_t(-1), + .tlength = uint32_t(-1), + .prebuf = uint32_t(-1), + .minreq = uint32_t(-1), + .fragsize = uint32_t(frame_size * channels * sizeof(float)) + }; int status; From 299672795c5ef923e5a000d6e11f9421e3a43fb5 Mon Sep 17 00:00:00 2001 From: LizardByte-bot <108553330+LizardByte-bot@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:19:25 -0400 Subject: [PATCH 02/12] chore: update global workflows (#3003) --- .github/semantic.yml | 14 ++++++++++++++ .github/workflows/cpp-lint.yml | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .github/semantic.yml diff --git a/.github/semantic.yml b/.github/semantic.yml new file mode 100644 index 00000000..b5eb70d0 --- /dev/null +++ b/.github/semantic.yml @@ -0,0 +1,14 @@ +--- +# This file is centrally managed in https://github.com//.github/ +# Don't make changes to this file in this repo as they will be overwritten with changes made to the same file in +# the above-mentioned repo. + +# This is the configuration file for https://github.com/Ezard/semantic-prs + +enabled: true +titleOnly: true # We only use the PR title as we squash and merge +commitsOnly: false +titleAndCommits: false +anyCommit: false +allowMergeCommits: false +allowRevertCommits: false diff --git a/.github/workflows/cpp-lint.yml b/.github/workflows/cpp-lint.yml index 5d0df5ad..96cb1d06 100644 --- a/.github/workflows/cpp-lint.yml +++ b/.github/workflows/cpp-lint.yml @@ -55,7 +55,7 @@ jobs: - name: Clang format lint if: ${{ steps.find_files.outputs.found_files }} - uses: DoozyX/clang-format-lint-action@v0.17 + uses: DoozyX/clang-format-lint-action@v0.18 with: source: ${{ steps.find_files.outputs.found_files }} extensions: 'cpp,h,m,mm' From a669b36c31fcb339572b5a4ff1741d42de0ab984 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Thu, 15 Aug 2024 18:16:19 -0400 Subject: [PATCH 03/12] fix(build): update cuda compatibilities (#3018) --- cmake/compile_definitions/linux.cmake | 27 ++++++--------------------- docs/getting_started.md | 4 ++-- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/cmake/compile_definitions/linux.cmake b/cmake/compile_definitions/linux.cmake index e07c2a55..d90f5dc6 100644 --- a/cmake/compile_definitions/linux.cmake +++ b/cmake/compile_definitions/linux.cmake @@ -24,57 +24,42 @@ if(${SUNSHINE_ENABLE_CUDA}) # https://tech.amikelive.com/node-930/cuda-compatibility-of-nvidia-display-gpu-drivers/ if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 6.5) list(APPEND CMAKE_CUDA_ARCHITECTURES 10) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_10,code=sm_10") elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.5) list(APPEND CMAKE_CUDA_ARCHITECTURES 50 52) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_50,code=sm_50") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52") endif() if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 7.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 11) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_11,code=sm_11") elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 7.6) list(APPEND CMAKE_CUDA_ARCHITECTURES 60 61 62) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_62,code=sm_62") endif() + # https://docs.nvidia.com/cuda/archive/9.2/cuda-compiler-driver-nvcc/index.html if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 20) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_20,code=sm_20") elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 70) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70") endif() + # https://docs.nvidia.com/cuda/archive/10.0/cuda-compiler-driver-nvcc/index.html if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0) - list(APPEND CMAKE_CUDA_ARCHITECTURES 75) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75") + list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75) endif() + # https://docs.nvidia.com/cuda/archive/11.0/cuda-compiler-driver-nvcc/index.html if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 30) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30") elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 80) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80") - endif() - - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.1) - list(APPEND CMAKE_CUDA_ARCHITECTURES 86) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86") endif() + # https://docs.nvidia.com/cuda/archive/11.8.0/cuda-compiler-driver-nvcc/index.html if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8) - list(APPEND CMAKE_CUDA_ARCHITECTURES 90) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_90,code=sm_90") + list(APPEND CMAKE_CUDA_ARCHITECTURES 86 87 89 90) endif() if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.0) list(APPEND CMAKE_CUDA_ARCHITECTURES 35) - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35") endif() # sort the architectures diff --git a/docs/getting_started.md b/docs/getting_started.md index c738992b..a9bd5561 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -43,7 +43,7 @@ CUDA is used for NVFBC capture. 11.8.0 450.80.02 - 35;50;52;60;61;62;70;75;80;86;90 + 35;50;52;60;61;62;70;72;75;80;86;87;89;90 sunshine.AppImage @@ -55,7 +55,7 @@ CUDA is used for NVFBC capture. 12.0.0 525.60.13 - 50;52;60;61;62;70;75;80;86;90 + 50;52;60;61;62;70;72;75;80;86;87;89;90 sunshine_{arch}.flatpak From c246c78b13f472e88cd863e0da09cdf2a7c970c6 Mon Sep 17 00:00:00 2001 From: Insanemal Date: Fri, 16 Aug 2024 11:30:00 +1000 Subject: [PATCH 04/12] fix(linux): ensure NvFBC capture works after multiple sessions (#3020) --- src/platform/linux/cuda.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index b5374b18..5498d9a8 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -702,6 +702,7 @@ namespace cuda { NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER }; + ctx_t ctx { handle }; if (func.nvFBCDestroyHandle(handle, ¶ms)) { BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle); } From e8c837f4126f11d4cf073c1b6d23a5e27dfc8775 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:19:57 -0400 Subject: [PATCH 05/12] ci(crowdin): customize PR title (#3031) --- crowdin.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/crowdin.yml b/crowdin.yml index 3dd19366..3a5e4281 100644 --- a/crowdin.yml +++ b/crowdin.yml @@ -2,6 +2,7 @@ "base_path": "." "base_url": "https://api.crowdin.com" # optional (for Crowdin Enterprise only) "preserve_hierarchy": true # false will flatten tree on crowdin, but doesn't work with dest option +"pull_request_title": "chore(l10n): update translations" "pull_request_labels": [ "crowdin", "l10n" From bfdfcebc80b26f532c4ff9875275f21ea5bc3f0f Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Fri, 16 Aug 2024 20:41:27 +0300 Subject: [PATCH 06/12] feat(win/video): support native YUV 4:4:4 encoding (#2533) --- src/nvenc/nvenc_base.cpp | 91 ++-- src/nvenc/nvenc_base.h | 83 +++- src/nvenc/nvenc_colorspace.h | 7 +- src/nvenc/nvenc_config.h | 5 +- src/nvenc/nvenc_d3d11.cpp | 64 +-- src/nvenc/nvenc_d3d11.h | 30 +- src/nvenc/nvenc_d3d11_native.cpp | 71 ++++ src/nvenc/nvenc_d3d11_native.h | 38 ++ src/nvenc/nvenc_d3d11_on_cuda.cpp | 267 ++++++++++++ src/nvenc/nvenc_d3d11_on_cuda.h | 96 +++++ src/nvenc/nvenc_encoded_frame.h | 7 +- src/nvenc/nvenc_utils.cpp | 14 +- src/nvenc/nvenc_utils.h | 2 +- src/nvhttp.cpp | 15 + src/platform/common.h | 6 + src/platform/windows/display_vram.cpp | 394 +++++++++++++----- src/rtsp.cpp | 2 + src/utility.h | 6 + src/video.cpp | 329 ++++++++++----- src/video.h | 21 +- src/video_colorspace.cpp | 105 +++++ src/video_colorspace.h | 13 + .../convert_yuv444_packed_ayuv_ps.hlsl | 3 + .../convert_yuv444_packed_ayuv_ps_linear.hlsl | 3 + .../directx/convert_yuv444_packed_vs.hlsl | 10 + .../convert_yuv444_packed_y410_ps.hlsl | 4 + .../convert_yuv444_packed_y410_ps_linear.hlsl | 4 + ...4_packed_y410_ps_perceptual_quantizer.hlsl | 4 + .../directx/convert_yuv444_planar_ps.hlsl | 4 + .../convert_yuv444_planar_ps_linear.hlsl | 4 + ...yuv444_planar_ps_perceptual_quantizer.hlsl | 4 + .../directx/convert_yuv444_planar_vs.hlsl | 33 ++ .../shaders/directx/include/base_vs.hlsl | 2 +- .../directx/include/base_vs_types.hlsl | 4 + .../include/convert_yuv444_ps_base.hlsl | 39 ++ 35 files changed, 1454 insertions(+), 330 deletions(-) create mode 100644 src/nvenc/nvenc_d3d11_native.cpp create mode 100644 src/nvenc/nvenc_d3d11_native.h create mode 100644 src/nvenc/nvenc_d3d11_on_cuda.cpp create mode 100644 src/nvenc/nvenc_d3d11_on_cuda.h create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index c632c254..b69d6f26 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_base.cpp - * @brief Definitions for base NVENC encoder. + * @brief Definitions for abstract platform-agnostic base of standalone NVENC encoder. */ #include "nvenc_base.h" @@ -85,9 +85,8 @@ namespace { namespace nvenc { - nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device): - device_type(device_type), - device(device) { + nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type): + device_type(device_type) { } nvenc_base::~nvenc_base() { @@ -115,19 +114,19 @@ namespace nvenc { session_params.deviceType = device_type; session_params.apiVersion = minimum_api_version; if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) { - BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string; return false; } uint32_t encode_guid_count = 0; if (nvenc_failed(nvenc->nvEncGetEncodeGUIDCount(encoder, &encode_guid_count))) { - BOOST_LOG(error) << "NvEncGetEncodeGUIDCount failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDCount() failed: " << last_nvenc_error_string; return false; }; std::vector encode_guids(encode_guid_count); if (nvenc_failed(nvenc->nvEncGetEncodeGUIDs(encoder, encode_guids.data(), encode_guids.size(), &encode_guid_count))) { - BOOST_LOG(error) << "NvEncGetEncodeGUIDs failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDs() failed: " << last_nvenc_error_string; return false; } @@ -176,7 +175,7 @@ namespace nvenc { }; auto buffer_is_yuv444 = [&]() { - return buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT; + return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT; }; { @@ -220,7 +219,7 @@ namespace nvenc { NV_ENC_PRESET_CONFIG preset_config = { min_struct_version(NV_ENC_PRESET_CONFIG_VER), { min_struct_version(NV_ENC_CONFIG_VER, 7, 8) } }; if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) { - BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string; return false; } @@ -228,7 +227,6 @@ namespace nvenc { enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID; enc_config.gopLength = NVENC_INFINITE_GOPLENGTH; enc_config.frameIntervalP = 1; - enc_config.rcParams.enableAQ = config.adaptive_quantization; enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; enc_config.rcParams.zeroReorderDelay = 1; enc_config.rcParams.enableLookahead = 0; @@ -282,7 +280,7 @@ namespace nvenc { } }; - auto fill_h264_hevc_vui = [&colorspace](auto &vui_config) { + auto fill_h264_hevc_vui = [&](auto &vui_config) { vui_config.videoSignalTypePresentFlag = 1; vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED; vui_config.videoFullRangeFlag = colorspace.full_range; @@ -290,7 +288,7 @@ namespace nvenc { vui_config.colourPrimaries = colorspace.primaries; vui_config.transferCharacteristics = colorspace.tranfer_function; vui_config.colourMatrix = colorspace.matrix; - vui_config.chromaSampleLocationFlag = 1; + vui_config.chromaSampleLocationFlag = buffer_is_yuv444() ? 0 : 1; vui_config.chromaSampleLocationTop = 0; vui_config.chromaSampleLocationBot = 0; }; @@ -331,7 +329,9 @@ namespace nvenc { auto &format_config = enc_config.encodeCodecConfig.av1Config; format_config.repeatSeqHdr = 1; format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH; - format_config.chromaFormatIDC = 1; // YUV444 not supported by NVENC yet + if (buffer_is_yuv444()) { + format_config.chromaFormatIDC = 3; + } format_config.enableBitstreamPadding = config.insert_filler_data; if (buffer_is_10bit()) { format_config.inputPixelBitDepthMinus8 = 2; @@ -341,7 +341,7 @@ namespace nvenc { format_config.transferCharacteristics = colorspace.tranfer_function; format_config.matrixCoefficients = colorspace.matrix; format_config.colorRange = colorspace.full_range; - format_config.chromaSamplePosition = 1; + format_config.chromaSamplePosition = buffer_is_yuv444() ? 0 : 1; set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numFwdRefs, 8); set_minqp_if_enabled(config.min_qp_av1); @@ -358,7 +358,7 @@ namespace nvenc { init_params.encodeConfig = &enc_config; if (nvenc_failed(nvenc->nvEncInitializeEncoder(encoder, &init_params))) { - BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncInitializeEncoder() failed: " << last_nvenc_error_string; return false; } @@ -366,14 +366,14 @@ namespace nvenc { NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) }; event_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) { - BOOST_LOG(error) << "NvEncRegisterAsyncEvent failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string; return false; } } NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER) }; if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) { - BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string; return false; } output_bitstream = create_bitstream_buffer.bitstreamBuffer; @@ -388,8 +388,13 @@ namespace nvenc { } { + auto video_format_string = client_config.videoFormat == 0 ? "H.264 " : + client_config.videoFormat == 1 ? "HEVC " : + client_config.videoFormat == 2 ? "AV1 " : + " "; std::string extra; if (init_params.enableEncodeAsync) extra += " async"; + if (buffer_is_yuv444()) extra += " yuv444"; if (buffer_is_10bit()) extra += " 10-bit"; if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass"; if (config.vbv_percentage_increase > 0 && get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) extra += " vbv+" + std::to_string(config.vbv_percentage_increase); @@ -398,7 +403,8 @@ namespace nvenc { if (enc_config.rcParams.enableAQ) extra += " spatial-aq"; if (enc_config.rcParams.enableMinQP) extra += " qpmin=" + std::to_string(enc_config.rcParams.minQP.qpInterP); if (config.insert_filler_data) extra += " filler-data"; - BOOST_LOG(info) << "NvEnc: created encoder " << quality_preset_string_from_guid(init_params.presetGUID) << extra; + + BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra; } encoder_state = {}; @@ -409,20 +415,28 @@ namespace nvenc { void nvenc_base::destroy_encoder() { if (output_bitstream) { - nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream); + if (nvenc_failed(nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream))) { + BOOST_LOG(error) << "NvEnc: NvEncDestroyBitstreamBuffer() failed: " << last_nvenc_error_string; + } output_bitstream = nullptr; } if (encoder && async_event_handle) { NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) }; event_params.completionEvent = async_event_handle; - nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params); + if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) { + BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string; + } } if (registered_input_buffer) { - nvenc->nvEncUnregisterResource(encoder, registered_input_buffer); + if (nvenc_failed(nvenc->nvEncUnregisterResource(encoder, registered_input_buffer))) { + BOOST_LOG(error) << "NvEnc: NvEncUnregisterResource() failed: " << last_nvenc_error_string; + } registered_input_buffer = nullptr; } if (encoder) { - nvenc->nvEncDestroyEncoder(encoder); + if (nvenc_failed(nvenc->nvEncDestroyEncoder(encoder))) { + BOOST_LOG(error) << "NvEnc: NvEncDestroyEncoder() failed: " << last_nvenc_error_string; + } encoder = nullptr; } @@ -439,14 +453,23 @@ namespace nvenc { assert(registered_input_buffer); assert(output_bitstream); + if (!synchronize_input_buffer()) { + BOOST_LOG(error) << "NvEnc: failed to synchronize input buffer"; + return {}; + } + NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = { min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER) }; mapped_input_buffer.registeredResource = registered_input_buffer; if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) { - BOOST_LOG(error) << "NvEncMapInputResource failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncMapInputResource() failed: " << last_nvenc_error_string; return {}; } - auto unmap_guard = util::fail_guard([&] { nvenc->nvEncUnmapInputResource(encoder, &mapped_input_buffer); }); + auto unmap_guard = util::fail_guard([&] { + if (nvenc_failed(nvenc->nvEncUnmapInputResource(encoder, mapped_input_buffer.mappedResource))) { + BOOST_LOG(error) << "NvEnc: NvEncUnmapInputResource() failed: " << last_nvenc_error_string; + } + }); NV_ENC_PIC_PARAMS pic_params = { min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6) }; pic_params.inputWidth = encoder_params.width; @@ -460,7 +483,7 @@ namespace nvenc { pic_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncEncodePicture(encoder, &pic_params))) { - BOOST_LOG(error) << "NvEncEncodePicture failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncEncodePicture() failed: " << last_nvenc_error_string; return {}; } @@ -474,7 +497,7 @@ namespace nvenc { } if (nvenc_failed(nvenc->nvEncLockBitstream(encoder, &lock_bitstream))) { - BOOST_LOG(error) << "NvEncLockBitstream failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncLockBitstream() failed: " << last_nvenc_error_string; return {}; } @@ -498,7 +521,7 @@ namespace nvenc { } if (nvenc_failed(nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream))) { - BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncUnlockBitstream() failed: " << last_nvenc_error_string; } encoder_state.frame_size_logger.collect_and_log(encoded_frame.data.size() / 1000.); @@ -535,7 +558,7 @@ namespace nvenc { for (auto i = first_frame; i <= last_frame; i++) { if (nvenc_failed(nvenc->nvEncInvalidateRefFrames(encoder, i))) { - BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncInvalidateRefFrames() " << i << " failed: " << last_nvenc_error_string; return false; } } @@ -576,20 +599,22 @@ namespace nvenc { nvenc_status_case(NV_ENC_ERR_RESOURCE_REGISTER_FAILED); nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_REGISTERED); nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_MAPPED); - // Newer versions of sdk may add more constants, look for them the end of NVENCSTATUS enum + // Newer versions of sdk may add more constants, look for them at the end of NVENCSTATUS enum #undef nvenc_status_case default: return std::to_string(status); } }; - last_error_string.clear(); + last_nvenc_error_string.clear(); if (status != NV_ENC_SUCCESS) { + /* This API function gives broken strings more often than not if (nvenc && encoder) { - last_error_string = nvenc->nvEncGetLastErrorString(encoder); - if (!last_error_string.empty()) last_error_string += " "; + last_nvenc_error_string = nvenc->nvEncGetLastErrorString(encoder); + if (!last_nvenc_error_string.empty()) last_nvenc_error_string += " "; } - last_error_string += status_string(status); + */ + last_nvenc_error_string += status_string(status); return true; } diff --git a/src/nvenc/nvenc_base.h b/src/nvenc/nvenc_base.h index 23976c01..c49aa401 100644 --- a/src/nvenc/nvenc_base.h +++ b/src/nvenc/nvenc_base.h @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_base.h - * @brief Declarations for base NVENC encoder. + * @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder. */ #pragma once @@ -13,36 +13,98 @@ #include +/** + * @brief Standalone NVENC encoder + */ namespace nvenc { + /** + * @brief Abstract platform-agnostic base of standalone NVENC encoder. + * Derived classes perform platform-specific operations. + */ class nvenc_base { public: - nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device); + /** + * @param device_type Underlying device type used by derived class. + */ + explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type); virtual ~nvenc_base(); nvenc_base(const nvenc_base &) = delete; nvenc_base & operator=(const nvenc_base &) = delete; + /** + * @brief Create the encoder. + * @param config NVENC encoder configuration. + * @param client_config Stream configuration requested by the client. + * @param colorspace YUV colorspace. + * @param buffer_format Platform-agnostic input surface format. + * @return `true` on success, `false` on error + */ bool create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format); + /** + * @brief Destroy the encoder. + * Derived classes classes call it in the destructor. + */ void destroy_encoder(); + /** + * @brief Encode the next frame using platform-specific input surface. + * @param frame_index Frame index that uniquely identifies the frame. + * Afterwards serves as parameter for `invalidate_ref_frames()`. + * No restrictions on the first frame index, but later frame indexes must be subsequent. + * @param force_idr Whether to encode frame as forced IDR. + * @return Encoded frame. + */ nvenc_encoded_frame encode_frame(uint64_t frame_index, bool force_idr); + /** + * @brief Perform reference frame invalidation (RFI) procedure. + * @param first_frame First frame index of the invalidation range. + * @param last_frame Last frame index of the invalidation range. + * @return `true` on success, `false` on error. + * After error next frame must be encoded with `force_idr = true`. + */ bool invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame); protected: + /** + * @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`. + * Called during `create_encoder()` if `nvenc` variable is not initialized. + * @return `true` on success, `false` on error + */ virtual bool init_library() = 0; + /** + * @brief Required. Used for creating outside-facing input surface, + * registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable. + * Called during `create_encoder()`. + * @return `true` on success, `false` on error + */ virtual bool create_and_register_input_buffer() = 0; + /** + * @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`. + * Typically used for interop copy. + * @return `true` on success, `false` on error + */ + virtual bool + synchronize_input_buffer() { return true; } + + /** + * @brief Optional. Override if you want to create encoder in async mode. + * In this case must also set `async_event_handle` variable. + * @param timeout_ms Wait timeout in milliseconds + * @return `true` on success, `false` on timeout or error + */ virtual bool wait_for_async_event(uint32_t timeout_ms) { return false; } @@ -61,9 +123,6 @@ namespace nvenc { min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0); const NV_ENC_DEVICE_TYPE device_type; - void *const device; - - std::unique_ptr nvenc; void *encoder = nullptr; @@ -75,11 +134,17 @@ namespace nvenc { bool rfi = false; } encoder_params; - // Derived classes set these variables - NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; - void *async_event_handle = nullptr; + std::string last_nvenc_error_string; - std::string last_error_string; + // Derived classes set these variables + void *device = nullptr; ///< Platform-specific handle of encoding device. + ///< Should be set in constructor or `init_library()`. + std::shared_ptr nvenc; ///< Function pointers list produced by `NvEncodeAPICreateInstance()`. + ///< Should be set in `init_library()`. + NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr; ///< Platform-specific input surface registered with `NvEncRegisterResource()`. + ///< Should be set in `create_and_register_input_buffer()`. + void *async_event_handle = nullptr; ///< (optional) Platform-specific handle of event object event. + ///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`. private: NV_ENC_OUTPUT_PTR output_bitstream = nullptr; diff --git a/src/nvenc/nvenc_colorspace.h b/src/nvenc/nvenc_colorspace.h index 3a37ae34..c9ed5193 100644 --- a/src/nvenc/nvenc_colorspace.h +++ b/src/nvenc/nvenc_colorspace.h @@ -1,16 +1,21 @@ /** * @file src/nvenc/nvenc_colorspace.h - * @brief Declarations for base NVENC colorspace. + * @brief Declarations for NVENC YUV colorspace. */ #pragma once #include namespace nvenc { + + /** + * @brief YUV colorspace and color range. + */ struct nvenc_colorspace_t { NV_ENC_VUI_COLOR_PRIMARIES primaries; NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function; NV_ENC_VUI_MATRIX_COEFFS matrix; bool full_range; }; + } // namespace nvenc diff --git a/src/nvenc/nvenc_config.h b/src/nvenc/nvenc_config.h index 8fcd84a6..213a0d28 100644 --- a/src/nvenc/nvenc_config.h +++ b/src/nvenc/nvenc_config.h @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_config.h - * @brief Declarations for base NVENC configuration. + * @brief Declarations for NVENC encoder configuration. */ #pragma once @@ -12,6 +12,9 @@ namespace nvenc { full_resolution, ///< Better overall statistics, slower and uses more extra vram }; + /** + * @brief NVENC encoder configuration. + */ struct nvenc_config { // Quality preset from 1 to 7, higher is slower int quality_preset = 1; diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp index 8a726084..7dd545b4 100644 --- a/src/nvenc/nvenc_d3d11.cpp +++ b/src/nvenc/nvenc_d3d11.cpp @@ -1,43 +1,29 @@ /** * @file src/nvenc/nvenc_d3d11.cpp - * @brief Definitions for base NVENC d3d11. + * @brief Definitions for abstract Direct3D11 NVENC encoder. */ #include "src/logging.h" #ifdef _WIN32 #include "nvenc_d3d11.h" - #include "nvenc_utils.h" - namespace nvenc { - nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device): - nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device), - d3d_device(d3d_device) { - } - nvenc_d3d11::~nvenc_d3d11() { - if (encoder) destroy_encoder(); - if (dll) { FreeLibrary(dll); dll = NULL; } } - ID3D11Texture2D * - nvenc_d3d11::get_input_texture() { - return d3d_input_texture.GetInterfacePtr(); - } - bool nvenc_d3d11::init_library() { if (dll) return true; #ifdef _WIN64 - auto dll_name = "nvEncodeAPI64.dll"; + constexpr auto dll_name = "nvEncodeAPI64.dll"; #else - auto dll_name = "nvEncodeAPI.dll"; + constexpr auto dll_name = "nvEncodeAPI.dll"; #endif if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) { @@ -45,7 +31,7 @@ namespace nvenc { auto new_nvenc = std::make_unique(); new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER); if (nvenc_failed(create_instance(new_nvenc.get()))) { - BOOST_LOG(error) << "NvEncodeAPICreateInstance failed: " << last_error_string; + BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string; } else { nvenc = std::move(new_nvenc); @@ -53,11 +39,11 @@ namespace nvenc { } } else { - BOOST_LOG(error) << "No NvEncodeAPICreateInstance in " << dll_name; + BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name; } } else { - BOOST_LOG(debug) << "Couldn't load NvEnc library " << dll_name; + BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name; } if (dll) { @@ -68,43 +54,5 @@ namespace nvenc { return false; } - bool - nvenc_d3d11::create_and_register_input_buffer() { - if (!d3d_input_texture) { - D3D11_TEXTURE2D_DESC desc = {}; - desc.Width = encoder_params.width; - desc.Height = encoder_params.height; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format); - desc.SampleDesc.Count = 1; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_RENDER_TARGET; - if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) { - BOOST_LOG(error) << "NvEnc: couldn't create input texture"; - return false; - } - } - - if (!registered_input_buffer) { - NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) }; - register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; - register_resource.width = encoder_params.width; - register_resource.height = encoder_params.height; - register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr(); - register_resource.bufferFormat = encoder_params.buffer_format; - register_resource.bufferUsage = NV_ENC_INPUT_IMAGE; - - if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) { - BOOST_LOG(error) << "NvEncRegisterResource failed: " << last_error_string; - return false; - } - - registered_input_buffer = register_resource.registeredResource; - } - - return true; - } - } // namespace nvenc #endif diff --git a/src/nvenc/nvenc_d3d11.h b/src/nvenc/nvenc_d3d11.h index eac10165..2d4d4fe7 100644 --- a/src/nvenc/nvenc_d3d11.h +++ b/src/nvenc/nvenc_d3d11.h @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_d3d11.h - * @brief Declarations for base NVENC d3d11. + * @brief Declarations for abstract Direct3D11 NVENC encoder. */ #pragma once #ifdef _WIN32 @@ -14,25 +14,33 @@ namespace nvenc { _COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device); _COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D); + _COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice); + _COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter); - class nvenc_d3d11 final: public nvenc_base { + /** + * @brief Abstract Direct3D11 NVENC encoder. + * Encapsulates common code used by native and interop implementations. + */ + class nvenc_d3d11: public nvenc_base { public: - nvenc_d3d11(ID3D11Device *d3d_device); + explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type): + nvenc_base(device_type) {} + ~nvenc_d3d11(); - ID3D11Texture2D * - get_input_texture(); + /** + * @brief Get input surface texture. + * @return Input surface texture. + */ + virtual ID3D11Texture2D * + get_input_texture() = 0; - private: + protected: bool init_library() override; - bool - create_and_register_input_buffer() override; - + private: HMODULE dll = NULL; - const ID3D11DevicePtr d3d_device; - ID3D11Texture2DPtr d3d_input_texture; }; } // namespace nvenc diff --git a/src/nvenc/nvenc_d3d11_native.cpp b/src/nvenc/nvenc_d3d11_native.cpp new file mode 100644 index 00000000..a563b33d --- /dev/null +++ b/src/nvenc/nvenc_d3d11_native.cpp @@ -0,0 +1,71 @@ +/** + * @file src/nvenc/nvenc_d3d11_native.cpp + * @brief Definitions for native Direct3D11 NVENC encoder. + */ +#ifdef _WIN32 + #include "nvenc_d3d11_native.h" + + #include "nvenc_utils.h" + +namespace nvenc { + + nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device): + nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX), + d3d_device(d3d_device) { + device = d3d_device; + } + + nvenc_d3d11_native::~nvenc_d3d11_native() { + if (encoder) destroy_encoder(); + } + + ID3D11Texture2D * + nvenc_d3d11_native::get_input_texture() { + return d3d_input_texture.GetInterfacePtr(); + } + + bool + nvenc_d3d11_native::create_and_register_input_buffer() { + if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop"; + return false; + } + + if (!d3d_input_texture) { + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = encoder_params.width; + desc.Height = encoder_params.height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format); + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) { + BOOST_LOG(error) << "NvEnc: couldn't create input texture"; + return false; + } + } + + if (!registered_input_buffer) { + NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) }; + register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; + register_resource.width = encoder_params.width; + register_resource.height = encoder_params.height; + register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr(); + register_resource.bufferFormat = encoder_params.buffer_format; + register_resource.bufferUsage = NV_ENC_INPUT_IMAGE; + + if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) { + BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string; + return false; + } + + registered_input_buffer = register_resource.registeredResource; + } + + return true; + } + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_d3d11_native.h b/src/nvenc/nvenc_d3d11_native.h new file mode 100644 index 00000000..f9d49b18 --- /dev/null +++ b/src/nvenc/nvenc_d3d11_native.h @@ -0,0 +1,38 @@ +/** + * @file src/nvenc/nvenc_d3d11_native.h + * @brief Declarations for native Direct3D11 NVENC encoder. + */ +#pragma once +#ifdef _WIN32 + + #include + #include + + #include "nvenc_d3d11.h" + +namespace nvenc { + + /** + * @brief Native Direct3D11 NVENC encoder. + */ + class nvenc_d3d11_native final: public nvenc_d3d11 { + public: + /** + * @param d3d_device Direct3D11 device used for encoding. + */ + explicit nvenc_d3d11_native(ID3D11Device *d3d_device); + ~nvenc_d3d11_native(); + + ID3D11Texture2D * + get_input_texture() override; + + private: + bool + create_and_register_input_buffer() override; + + const ID3D11DevicePtr d3d_device; + ID3D11Texture2DPtr d3d_input_texture; + }; + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_d3d11_on_cuda.cpp b/src/nvenc/nvenc_d3d11_on_cuda.cpp new file mode 100644 index 00000000..37fe8963 --- /dev/null +++ b/src/nvenc/nvenc_d3d11_on_cuda.cpp @@ -0,0 +1,267 @@ +/** + * @file src/nvenc/nvenc_d3d11_on_cuda.cpp + * @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces. + */ +#ifdef _WIN32 + #include "nvenc_d3d11_on_cuda.h" + + #include "nvenc_utils.h" + +namespace nvenc { + + nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device): + nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA), + d3d_device(d3d_device) { + } + + nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() { + if (encoder) destroy_encoder(); + + if (cuda_context) { + { + auto autopop_context = push_context(); + + if (cuda_d3d_input_texture) { + if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) { + BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error; + } + cuda_d3d_input_texture = nullptr; + } + + if (cuda_surface) { + if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) { + BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error; + } + cuda_surface = 0; + } + } + + if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) { + BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error; + } + cuda_context = nullptr; + } + + if (cuda_functions.dll) { + FreeLibrary(cuda_functions.dll); + cuda_functions = {}; + } + } + + ID3D11Texture2D * + nvenc_d3d11_on_cuda::get_input_texture() { + return d3d_input_texture.GetInterfacePtr(); + } + + bool + nvenc_d3d11_on_cuda::init_library() { + if (!nvenc_d3d11::init_library()) return false; + + constexpr auto dll_name = "nvcuda.dll"; + + if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) { + auto load_function = [&](T &location, auto symbol) -> bool { + location = (T) GetProcAddress(cuda_functions.dll, symbol); + return location != nullptr; + }; + if (!load_function(cuda_functions.cuInit, "cuInit") || + !load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") || + !load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") || + !load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") || + !load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") || + !load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") || + !load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") || + !load_function(cuda_functions.cuMemFree, "cuMemFree_v2") || + !load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") || + !load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") || + !load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") || + !load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") || + !load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") || + !load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) { + BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name; + FreeLibrary(cuda_functions.dll); + cuda_functions = {}; + } + } + else { + BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name; + } + + if (cuda_functions.dll) { + IDXGIDevicePtr dxgi_device; + IDXGIAdapterPtr dxgi_adapter; + if (d3d_device && + SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) && + SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) { + CUdevice cuda_device; + if (cuda_succeeded(cuda_functions.cuInit(0)) && + cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) && + cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) && + cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) { + device = cuda_context; + } + else { + BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error; + } + } + else { + BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop"; + } + } + + return device != nullptr; + } + + bool + nvenc_d3d11_on_cuda::create_and_register_input_buffer() { + if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding"; + return false; + } + + if (!d3d_input_texture) { + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = encoder_params.width; + desc.Height = encoder_params.height * 3; // Planar YUV + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format); + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + + if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) { + BOOST_LOG(error) << "NvEnc: couldn't create input texture"; + return false; + } + } + + { + auto autopop_context = push_context(); + if (!autopop_context) return false; + + if (!cuda_d3d_input_texture) { + if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource( + &cuda_d3d_input_texture, + d3d_input_texture, + CU_GRAPHICS_REGISTER_FLAGS_NONE))) { + BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error; + return false; + } + } + + if (!cuda_surface) { + if (cuda_failed(cuda_functions.cuMemAllocPitch( + &cuda_surface, + &cuda_surface_pitch, + // Planar 16-bit YUV + encoder_params.width * 2, + encoder_params.height * 3, 16))) { + BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error; + return false; + } + } + } + + if (!registered_input_buffer) { + NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) }; + register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; + register_resource.width = encoder_params.width; + register_resource.height = encoder_params.height; + register_resource.pitch = cuda_surface_pitch; + register_resource.resourceToRegister = (void *) cuda_surface; + register_resource.bufferFormat = encoder_params.buffer_format; + register_resource.bufferUsage = NV_ENC_INPUT_IMAGE; + + if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) { + BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string; + return false; + } + + registered_input_buffer = register_resource.registeredResource; + } + + return true; + } + + bool + nvenc_d3d11_on_cuda::synchronize_input_buffer() { + auto autopop_context = push_context(); + if (!autopop_context) return false; + + if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) { + BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error; + return false; + } + + auto unmap = [&]() -> bool { + if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) { + BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error; + return false; + } + return true; + }; + auto unmap_guard = util::fail_guard(unmap); + + CUarray input_texture_array; + if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) { + BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error; + return false; + } + + { + CUDA_MEMCPY2D copy_params = {}; + copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY; + copy_params.srcArray = input_texture_array; + copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; + copy_params.dstDevice = cuda_surface; + copy_params.dstPitch = cuda_surface_pitch; + // Planar 16-bit YUV + copy_params.WidthInBytes = encoder_params.width * 2; + copy_params.Height = encoder_params.height * 3; + + if (cuda_failed(cuda_functions.cuMemcpy2D(©_params))) { + BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error; + return false; + } + } + + unmap_guard.disable(); + return unmap(); + } + + bool + nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) { + last_cuda_error = result; + return result == CUDA_SUCCESS; + } + + bool + nvenc_d3d11_on_cuda::cuda_failed(CUresult result) { + last_cuda_error = result; + return result != CUDA_SUCCESS; + } + + nvenc_d3d11_on_cuda::autopop_context::~autopop_context() { + if (pushed_context) { + CUcontext popped_context; + if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) { + BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error; + } + } + } + + nvenc_d3d11_on_cuda::autopop_context + nvenc_d3d11_on_cuda::push_context() { + if (cuda_context && + cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) { + return { *this, cuda_context }; + } + else { + BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error; + return { *this, nullptr }; + } + } + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_d3d11_on_cuda.h b/src/nvenc/nvenc_d3d11_on_cuda.h new file mode 100644 index 00000000..81114321 --- /dev/null +++ b/src/nvenc/nvenc_d3d11_on_cuda.h @@ -0,0 +1,96 @@ +/** + * @file src/nvenc/nvenc_d3d11_on_cuda.h + * @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces. + */ +#pragma once +#ifdef _WIN32 + + #include "nvenc_d3d11.h" + + #include + +namespace nvenc { + + /** + * @brief Interop Direct3D11 on CUDA NVENC encoder. + * Input surface is Direct3D11, encoding is performed by CUDA. + */ + class nvenc_d3d11_on_cuda final: public nvenc_d3d11 { + public: + /** + * @param d3d_device Direct3D11 device that will create input surface texture. + * CUDA encoding device will be derived from it. + */ + explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device); + ~nvenc_d3d11_on_cuda(); + + ID3D11Texture2D * + get_input_texture() override; + + private: + bool + init_library() override; + + bool + create_and_register_input_buffer() override; + + bool + synchronize_input_buffer() override; + + bool + cuda_succeeded(CUresult result); + + bool + cuda_failed(CUresult result); + + struct autopop_context { + autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context): + parent(parent), + pushed_context(pushed_context) { + } + + ~autopop_context(); + + explicit + operator bool() const { + return pushed_context != nullptr; + } + + nvenc_d3d11_on_cuda &parent; + CUcontext pushed_context = nullptr; + }; + + autopop_context + push_context(); + + HMODULE dll = NULL; + const ID3D11DevicePtr d3d_device; + ID3D11Texture2DPtr d3d_input_texture; + + struct { + tcuInit *cuInit; + tcuD3D11GetDevice *cuD3D11GetDevice; + tcuCtxCreate_v2 *cuCtxCreate; + tcuCtxDestroy_v2 *cuCtxDestroy; + tcuCtxPushCurrent_v2 *cuCtxPushCurrent; + tcuCtxPopCurrent_v2 *cuCtxPopCurrent; + tcuMemAllocPitch_v2 *cuMemAllocPitch; + tcuMemFree_v2 *cuMemFree; + tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource; + tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; + tcuGraphicsMapResources *cuGraphicsMapResources; + tcuGraphicsUnmapResources *cuGraphicsUnmapResources; + tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; + tcuMemcpy2D_v2 *cuMemcpy2D; + HMODULE dll; + } cuda_functions = {}; + + CUresult last_cuda_error = CUDA_SUCCESS; + CUcontext cuda_context = nullptr; + CUgraphicsResource cuda_d3d_input_texture = nullptr; + CUdeviceptr cuda_surface = 0; + size_t cuda_surface_pitch = 0; + }; + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_encoded_frame.h b/src/nvenc/nvenc_encoded_frame.h index 007345a0..46a8e46d 100644 --- a/src/nvenc/nvenc_encoded_frame.h +++ b/src/nvenc/nvenc_encoded_frame.h @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_encoded_frame.h - * @brief Declarations for base NVENC encoded frame. + * @brief Declarations for NVENC encoded frame. */ #pragma once @@ -8,10 +8,15 @@ #include namespace nvenc { + + /** + * @brief Encoded frame. + */ struct nvenc_encoded_frame { std::vector data; uint64_t frame_index = 0; bool idr = false; bool after_ref_frame_invalidation = false; }; + } // namespace nvenc diff --git a/src/nvenc/nvenc_utils.cpp b/src/nvenc/nvenc_utils.cpp index ff274d53..26e2dc30 100644 --- a/src/nvenc/nvenc_utils.cpp +++ b/src/nvenc/nvenc_utils.cpp @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_utils.cpp - * @brief Definitions for base NVENC utilities. + * @brief Definitions for NVENC utilities. */ #include @@ -18,6 +18,12 @@ namespace nvenc { case NV_ENC_BUFFER_FORMAT_NV12: return DXGI_FORMAT_NV12; + case NV_ENC_BUFFER_FORMAT_AYUV: + return DXGI_FORMAT_AYUV; + + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return DXGI_FORMAT_R16_UINT; + default: return DXGI_FORMAT_UNKNOWN; } @@ -33,6 +39,12 @@ namespace nvenc { case platf::pix_fmt_e::p010: return NV_ENC_BUFFER_FORMAT_YUV420_10BIT; + case platf::pix_fmt_e::ayuv: + return NV_ENC_BUFFER_FORMAT_AYUV; + + case platf::pix_fmt_e::yuv444p16: + return NV_ENC_BUFFER_FORMAT_YUV444_10BIT; + default: return NV_ENC_BUFFER_FORMAT_UNDEFINED; } diff --git a/src/nvenc/nvenc_utils.h b/src/nvenc/nvenc_utils.h index 09c88ff5..db428676 100644 --- a/src/nvenc/nvenc_utils.h +++ b/src/nvenc/nvenc_utils.h @@ -1,6 +1,6 @@ /** * @file src/nvenc/nvenc_utils.h - * @brief Declarations for base NVENC utilities. + * @brief Declarations for NVENC utilities. */ #pragma once diff --git a/src/nvhttp.cpp b/src/nvhttp.cpp index 085402ff..8ac56797 100644 --- a/src/nvhttp.cpp +++ b/src/nvhttp.cpp @@ -720,17 +720,32 @@ namespace nvhttp { } uint32_t codec_mode_flags = SCM_H264; + if (video::last_encoder_probe_supported_yuv444_for_codec[0]) { + codec_mode_flags |= SCM_H264_HIGH8_444; + } if (video::active_hevc_mode >= 2) { codec_mode_flags |= SCM_HEVC; + if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { + codec_mode_flags |= SCM_HEVC_REXT8_444; + } } if (video::active_hevc_mode >= 3) { codec_mode_flags |= SCM_HEVC_MAIN10; + if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { + codec_mode_flags |= SCM_HEVC_REXT10_444; + } } if (video::active_av1_mode >= 2) { codec_mode_flags |= SCM_AV1_MAIN8; + if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { + codec_mode_flags |= SCM_AV1_HIGH8_444; + } } if (video::active_av1_mode >= 3) { codec_mode_flags |= SCM_AV1_MAIN10; + if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { + codec_mode_flags |= SCM_AV1_HIGH10_444; + } } tree.put("root.ServerCodecModeSupport", codec_mode_flags); diff --git a/src/platform/common.h b/src/platform/common.h index 24cd658b..5009c183 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -209,6 +209,9 @@ namespace platf { yuv420p10, ///< YUV 4:2:0 10-bit nv12, ///< NV12 p010, ///< P010 + ayuv, ///< AYUV + yuv444p16, ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4 + y410, ///< Y410 unknown ///< Unknown }; @@ -223,6 +226,9 @@ namespace platf { _CONVERT(yuv420p10); _CONVERT(nv12); _CONVERT(p010); + _CONVERT(ayuv); + _CONVERT(yuv444p16); + _CONVERT(y410); _CONVERT(unknown); } #undef _CONVERT diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 532b80d6..ed88e8d5 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -17,7 +17,8 @@ extern "C" { #include "src/config.h" #include "src/logging.h" #include "src/nvenc/nvenc_config.h" -#include "src/nvenc/nvenc_d3d11.h" +#include "src/nvenc/nvenc_d3d11_native.h" +#include "src/nvenc/nvenc_d3d11_on_cuda.h" #include "src/nvenc/nvenc_utils.h" #include "src/video.h" @@ -110,6 +111,16 @@ namespace platf::dxgi { blob_t convert_yuv420_planar_y_ps_linear_hlsl; blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl; blob_t convert_yuv420_planar_y_vs_hlsl; + blob_t convert_yuv444_packed_ayuv_ps_hlsl; + blob_t convert_yuv444_packed_ayuv_ps_linear_hlsl; + blob_t convert_yuv444_packed_vs_hlsl; + blob_t convert_yuv444_planar_ps_hlsl; + blob_t convert_yuv444_planar_ps_linear_hlsl; + blob_t convert_yuv444_planar_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv444_packed_y410_ps_hlsl; + blob_t convert_yuv444_packed_y410_ps_linear_hlsl; + blob_t convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv444_planar_vs_hlsl; blob_t cursor_ps_hlsl; blob_t cursor_ps_normalize_white_hlsl; blob_t cursor_vs_hlsl; @@ -402,18 +413,38 @@ namespace platf::dxgi { return -1; } - device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr); - device_ctx->VSSetShader(scene_vs.get(), nullptr, 0); - device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0); - device_ctx->RSSetViewports(1, &outY_view); - device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res); - device_ctx->Draw(3, 0); + auto draw = [&](auto &input, auto &y_or_yuv_viewports, auto &uv_viewport) { + device_ctx->PSSetShaderResources(0, 1, &input); - device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr); - device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0); - device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0); - device_ctx->RSSetViewports(1, &outUV_view); - device_ctx->Draw(3, 0); + // Draw Y/YUV + device_ctx->OMSetRenderTargets(1, &out_Y_or_YUV_rtv, nullptr); + device_ctx->VSSetShader(convert_Y_or_YUV_vs.get(), nullptr, 0); + device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_or_YUV_fp16_ps.get() : convert_Y_or_YUV_ps.get(), nullptr, 0); + auto viewport_count = (format == DXGI_FORMAT_R16_UINT) ? 3 : 1; + assert(viewport_count <= y_or_yuv_viewports.size()); + device_ctx->RSSetViewports(viewport_count, y_or_yuv_viewports.data()); + device_ctx->Draw(3 * viewport_count, 0); // vertex shader will spread vertices across viewports + + // Draw UV if needed + if (out_UV_rtv) { + assert(format == DXGI_FORMAT_NV12 || format == DXGI_FORMAT_P010); + device_ctx->OMSetRenderTargets(1, &out_UV_rtv, nullptr); + device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0); + device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0); + device_ctx->RSSetViewports(1, &uv_viewport); + device_ctx->Draw(3, 0); + } + }; + + // Clear render target view(s) once so that the aspect ratio mismatch "bars" appear black + if (!rtvs_cleared) { + auto black = create_black_texture_for_rtv_clear(); + if (black) draw(black, out_Y_or_YUV_viewports_for_clear, out_UV_viewport_for_clear); + rtvs_cleared = true; + } + + // Draw captured frame + draw(img_ctx.encoder_input_res, out_Y_or_YUV_viewports, out_UV_viewport); // Release encoder mutex to allow capture code to reuse this image img_ctx.encoder_mutex->ReleaseSync(0); @@ -429,6 +460,12 @@ namespace platf::dxgi { apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) { auto color_vectors = ::video::color_vectors_from_colorspace(colorspace); + if (format == DXGI_FORMAT_AYUV || + format == DXGI_FORMAT_R16_UINT || + format == DXGI_FORMAT_Y410) { + color_vectors = ::video::new_color_vectors_from_colorspace(colorspace); + } + if (!color_vectors) { BOOST_LOG(error) << "No vector data for colorspace"sv; return; @@ -440,6 +477,7 @@ namespace platf::dxgi { return; } + device_ctx->VSSetConstantBuffers(3, 1, &color_matrix); device_ctx->PSSetConstantBuffers(0, 1, &color_matrix); this->color_matrix = std::move(color_matrix); } @@ -465,8 +503,20 @@ namespace platf::dxgi { auto offsetX = (out_width - out_width_f) / 2; auto offsetY = (out_height - out_height_f) / 2; - outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; - outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; + out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0]; // U plane + out_Y_or_YUV_viewports[1].TopLeftY += out_height; + out_Y_or_YUV_viewports[2] = out_Y_or_YUV_viewports[1]; // V plane + out_Y_or_YUV_viewports[2].TopLeftY += out_height; + + out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0]; // U plane + out_Y_or_YUV_viewports_for_clear[1].TopLeftY += out_height; + out_Y_or_YUV_viewports_for_clear[2] = out_Y_or_YUV_viewports_for_clear[1]; // V plane + out_Y_or_YUV_viewports_for_clear[2].TopLeftY += out_height; + + out_UV_viewport = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; + out_UV_viewport_for_clear = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f }; float subsample_offset_in[16 / sizeof(float)] { 1.0f / (float) out_width_f, 1.0f / (float) out_height_f }; // aligned to 16-byte subsample_offset = make_buffer(device.get(), subsample_offset_in); @@ -488,36 +538,106 @@ namespace platf::dxgi { device_ctx->VSSetConstantBuffers(1, 1, &rotation); } - D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc { - format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM, - D3D11_RTV_DIMENSION_TEXTURE2D + DXGI_FORMAT rtv_Y_or_YUV_format = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT rtv_UV_format = DXGI_FORMAT_UNKNOWN; + bool rtv_simple_clear = false; + + switch (format) { + case DXGI_FORMAT_NV12: + rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UNORM; + rtv_UV_format = DXGI_FORMAT_R8G8_UNORM; + rtv_simple_clear = true; + break; + + case DXGI_FORMAT_P010: + rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UNORM; + rtv_UV_format = DXGI_FORMAT_R16G16_UNORM; + rtv_simple_clear = true; + break; + + case DXGI_FORMAT_AYUV: + rtv_Y_or_YUV_format = DXGI_FORMAT_R8G8B8A8_UINT; + break; + + case DXGI_FORMAT_R16_UINT: + rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UINT; + break; + + case DXGI_FORMAT_Y410: + rtv_Y_or_YUV_format = DXGI_FORMAT_R10G10B10A2_UINT; + break; + + default: + BOOST_LOG(error) << "Unable to create render target views because of the unrecognized surface format"; + return -1; + } + + auto create_rtv = [&](auto &rt, DXGI_FORMAT rt_format) -> bool { + D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {}; + rtv_desc.Format = rt_format; + rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + + auto status = device->CreateRenderTargetView(output_texture.get(), &rtv_desc, &rt); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create render target view: " << util::log_hex(status); + return false; + } + + return true; }; - auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + // Create Y/YUV render target view + if (!create_rtv(out_Y_or_YUV_rtv, rtv_Y_or_YUV_format)) return -1; + + // Create UV render target view if needed + if (rtv_UV_format != DXGI_FORMAT_UNKNOWN && !create_rtv(out_UV_rtv, rtv_UV_format)) return -1; + + if (rtv_simple_clear) { + // Clear the RTVs to ensure the aspect ratio padding is black + const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + device_ctx->ClearRenderTargetView(out_Y_or_YUV_rtv.get(), y_black); + if (out_UV_rtv) { + const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f }; + device_ctx->ClearRenderTargetView(out_UV_rtv.get(), uv_black); + } + rtvs_cleared = true; } - - nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM; - - status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + else { + // Can't use ClearRenderTargetView(), will clear on first convert() + rtvs_cleared = false; } - // Clear the RTVs to ensure the aspect ratio padding is black - const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - device_ctx->ClearRenderTargetView(nv12_Y_rt.get(), y_black); - const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f }; - device_ctx->ClearRenderTargetView(nv12_UV_rt.get(), uv_black); - return 0; } int init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { + switch (pix_fmt) { + case pix_fmt_e::nv12: + format = DXGI_FORMAT_NV12; + break; + + case pix_fmt_e::p010: + format = DXGI_FORMAT_P010; + break; + + case pix_fmt_e::ayuv: + format = DXGI_FORMAT_AYUV; + break; + + case pix_fmt_e::yuv444p16: + format = DXGI_FORMAT_R16_UINT; + break; + + case pix_fmt_e::y410: + format = DXGI_FORMAT_Y410; + break; + + default: + BOOST_LOG(error) << "D3D11 backend doesn't support pixel format: " << from_pix_fmt(pix_fmt); + return -1; + } + D3D_FEATURE_LEVEL featureLevels[] { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, @@ -556,61 +676,82 @@ namespace platf::dxgi { BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance."; } - format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010); - status = device->CreateVertexShader(convert_yuv420_planar_y_vs_hlsl->GetBufferPointer(), convert_yuv420_planar_y_vs_hlsl->GetBufferSize(), nullptr, &scene_vs); - if (status) { - BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } +#define create_vertex_shader_helper(x, y) \ + if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } +#define create_pixel_shader_helper(x, y) \ + if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } - status = device->CreateVertexShader(convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs); - if (status) { - BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } + switch (format) { + case DXGI_FORMAT_NV12: + // Semi-planar 8-bit YUV 4:2:0 + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + break; - // If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ. - if (format == DXGI_FORMAT_P010 && display->is_hdr()) { - status = device->CreatePixelShader(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; + case DXGI_FORMAT_P010: + // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } + break; + + case DXGI_FORMAT_R16_UINT: + // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + case DXGI_FORMAT_AYUV: + // Packed 8-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + break; + + case DXGI_FORMAT_Y410: + // Packed 10-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + default: + BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format"; return -1; - } - - status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - } - else { - // If the display is in Advanced Color mode, the desktop format will be scRGB FP16. - // scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders. - status = device->CreatePixelShader(convert_yuv420_planar_y_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - - status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } } - // These shaders consume standard 8-bit sRGB input - status = device->CreatePixelShader(convert_yuv420_planar_y_ps_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - - status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps); - if (status) { - BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } +#undef create_vertex_shader_helper +#undef create_pixel_shader_helper auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false); if (!default_color_vectors) { @@ -623,6 +764,7 @@ namespace platf::dxgi { BOOST_LOG(error) << "Failed to create color matrix buffer"sv; return -1; } + device_ctx->VSSetConstantBuffers(3, 1, &color_matrix); device_ctx->PSSetConstantBuffers(0, 1, &color_matrix); this->display = std::dynamic_pointer_cast(display); @@ -653,7 +795,7 @@ namespace platf::dxgi { device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); device_ctx->PSSetSamplers(0, 1, &sampler_linear); - device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); return 0; } @@ -725,6 +867,41 @@ namespace platf::dxgi { return 0; } + shader_res_t + create_black_texture_for_rtv_clear() { + constexpr auto width = 32; + constexpr auto height = 32; + + D3D11_TEXTURE2D_DESC texture_desc = {}; + texture_desc.Width = width; + texture_desc.Height = height; + texture_desc.MipLevels = 1; + texture_desc.ArraySize = 1; + texture_desc.SampleDesc.Count = 1; + texture_desc.Usage = D3D11_USAGE_IMMUTABLE; + texture_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + std::vector mem(4 * width * height, 0); + D3D11_SUBRESOURCE_DATA texture_data = { mem.data(), 4 * width, 0 }; + + texture2d_t texture; + auto status = device->CreateTexture2D(&texture_desc, &texture_data, &texture); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create black texture: " << util::log_hex(status); + return {}; + } + + shader_res_t resource_view; + status = device->CreateShaderResourceView(texture.get(), nullptr, &resource_view); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create black texture resource view: " << util::log_hex(status); + return {}; + } + + return resource_view; + } + ::video::color_t *color_p; buf_t subsample_offset; @@ -733,8 +910,9 @@ namespace platf::dxgi { blend_t blend_disable; sampler_state_t sampler_linear; - render_target_t nv12_Y_rt; - render_target_t nv12_UV_rt; + render_target_t out_Y_or_YUV_rtv; + render_target_t out_UV_rtv; + bool rtvs_cleared = false; // d3d_img_t::id -> encoder_img_ctx_t // These store the encoder textures for each img_t that passes through @@ -744,15 +922,16 @@ namespace platf::dxgi { std::shared_ptr display; + vs_t convert_Y_or_YUV_vs; + ps_t convert_Y_or_YUV_ps; + ps_t convert_Y_or_YUV_fp16_ps; + vs_t convert_UV_vs; ps_t convert_UV_ps; ps_t convert_UV_fp16_ps; - ps_t convert_Y_ps; - ps_t convert_Y_fp16_ps; - vs_t scene_vs; - D3D11_VIEWPORT outY_view; - D3D11_VIEWPORT outUV_view; + std::array out_Y_or_YUV_viewports, out_Y_or_YUV_viewports_for_clear; + D3D11_VIEWPORT out_UV_viewport, out_UV_viewport_for_clear; DXGI_FORMAT format; @@ -871,7 +1050,12 @@ namespace platf::dxgi { if (base.init(display, adapter_p, pix_fmt)) return false; - nvenc_d3d = std::make_unique(base.device.get()); + if (pix_fmt == pix_fmt_e::yuv444p16) { + nvenc_d3d = std::make_unique(base.device.get()); + } + else { + nvenc_d3d = std::make_unique(base.device.get()); + } nvenc = nvenc_d3d.get(); return true; @@ -1409,7 +1593,7 @@ namespace platf::dxgi { device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); device_ctx->PSSetSamplers(0, 1, &sampler_linear); - device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); return 0; } @@ -1703,20 +1887,10 @@ namespace platf::dxgi { std::unique_ptr display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) { - if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) { - BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']'; - - return nullptr; - } - auto device = std::make_unique(); - - auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt); - - if (ret) { + if (device->init(shared_from_this(), adapter.get(), pix_fmt) != 0) { return nullptr; } - return device; } @@ -1746,6 +1920,16 @@ namespace platf::dxgi { compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear); compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer); compile_vertex_shader_helper(convert_yuv420_planar_y_vs); + compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps); + compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear); + compile_vertex_shader_helper(convert_yuv444_packed_vs); + compile_pixel_shader_helper(convert_yuv444_planar_ps); + compile_pixel_shader_helper(convert_yuv444_planar_ps_linear); + compile_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer); + compile_pixel_shader_helper(convert_yuv444_packed_y410_ps); + compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear); + compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer); + compile_vertex_shader_helper(convert_yuv444_planar_vs); compile_pixel_shader_helper(cursor_ps); compile_pixel_shader_helper(cursor_ps_normalize_white); compile_vertex_shader_helper(cursor_vs); diff --git a/src/rtsp.cpp b/src/rtsp.cpp index 3528c258..3f146937 100644 --- a/src/rtsp.cpp +++ b/src/rtsp.cpp @@ -978,6 +978,7 @@ namespace rtsp_stream { args.try_emplace("x-nv-aqos.qosTrafficType"sv, "4"sv); args.try_emplace("x-ml-video.configuredBitrateKbps"sv, "0"sv); args.try_emplace("x-ss-general.encryptionEnabled"sv, "0"sv); + args.try_emplace("x-ss-video[0].chromaSamplingType"sv, "0"sv); stream::config_t config; @@ -1013,6 +1014,7 @@ namespace rtsp_stream { config.monitor.encoderCscMode = util::from_view(args.at("x-nv-video[0].encoderCscMode"sv)); config.monitor.videoFormat = util::from_view(args.at("x-nv-vqos[0].bitStreamFormat"sv)); config.monitor.dynamicRange = util::from_view(args.at("x-nv-video[0].dynamicRangeMode"sv)); + config.monitor.chromaSamplingType = util::from_view(args.at("x-ss-video[0].chromaSamplingType"sv)); configuredBitrateKbps = util::from_view(args.at("x-ml-video.configuredBitrateKbps"sv)); } diff --git a/src/utility.h b/src/utility.h index c3b4f3d8..e9adefd8 100644 --- a/src/utility.h +++ b/src/utility.h @@ -267,6 +267,12 @@ namespace util { return Hex(elem, rev); } + template + std::string + log_hex(const T &value) { + return "0x" + Hex(value, false).to_string(); + } + template std::string hex_vec(It begin, It end, bool rev = false) { diff --git a/src/video.cpp b/src/video.cpp index 908b7fa9..6827b6c7 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -53,31 +53,36 @@ namespace video { namespace nv { enum class profile_h264_e : int { - baseline, ///< Baseline profile - main, ///< Main profile - high, ///< High profile - high_444p, ///< High 4:4:4 Predictive profile + high = 2, ///< High profile + high_444p = 3, ///< High 4:4:4 Predictive profile }; enum class profile_hevc_e : int { - main, ///< Main profile - main_10, ///< Main 10 profile - rext, ///< Rext profile + main = 0, ///< Main profile + main_10 = 1, ///< Main 10 profile + rext = 2, ///< Rext profile }; + } // namespace nv namespace qsv { enum class profile_h264_e : int { - baseline = 66, ///< Baseline profile - main = 77, ///< Main profile high = 100, ///< High profile + high_444p = 244, ///< High 4:4:4 Predictive profile }; enum class profile_hevc_e : int { main = 1, ///< Main profile main_10 = 2, ///< Main 10 profile + rext = 4, ///< RExt profile }; + + enum class profile_av1_e : int { + main = 1, ///< Main profile + high = 2, ///< High profile + }; + } // namespace qsv util::Either @@ -274,6 +279,7 @@ namespace video { NO_RC_BUF_LIMIT = 1 << 7, ///< Don't set rc_buffer_size REF_FRAMES_INVALIDATION = 1 << 8, ///< Support reference frames invalidation ALWAYS_REPROBE = 1 << 9, ///< This is an encoder of last resort and we want to aggressively probe for a better one + YUV444_SUPPORT = 1 << 10, ///< Encoder may support 4:4:4 chroma sampling depending on hardware }; class avcodec_encode_session_t: public encode_session_t { @@ -447,44 +453,39 @@ namespace video { "nvenc"sv, std::make_unique( platf::mem_type_e::dxgi, - platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010), + platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010, + platf::pix_fmt_e::ayuv, platf::pix_fmt_e::yuv444p16), { - // Common options - {}, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options - {}, + {}, // Common options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options std::nullopt, // QP rate control fallback "av1_nvenc"s, }, { - // Common options - {}, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options - {}, + {}, // Common options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options std::nullopt, // QP rate control fallback "hevc_nvenc"s, }, { - // Common options - {}, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options - {}, + {}, // Common options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options std::nullopt, // QP rate control fallback "h264_nvenc"s, }, - PARALLEL_ENCODING | REF_FRAMES_INVALIDATION // flags + PARALLEL_ENCODING | REF_FRAMES_INVALIDATION | YUV444_SUPPORT // flags }; #elif !defined(__APPLE__) encoder_t nvenc { @@ -498,6 +499,7 @@ namespace video { AV_PIX_FMT_CUDA, #endif AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, #ifdef _WIN32 dxgi_init_avcodec_hardware_input_buffer #else @@ -516,12 +518,11 @@ namespace video { { "multipass"s, &config::video.nv_legacy.multipass }, { "aq"s, &config::video.nv_legacy.aq }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options - {}, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options std::nullopt, // QP rate control fallback "av1_nvenc"s, }, @@ -537,14 +538,16 @@ namespace video { { "multipass"s, &config::video.nv_legacy.multipass }, { "aq"s, &config::video.nv_legacy.aq }, }, - // SDR-specific options { + // SDR-specific options { "profile"s, (int) nv::profile_hevc_e::main }, }, - // HDR-specific options { + // HDR-specific options { "profile"s, (int) nv::profile_hevc_e::main_10 }, }, + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "hevc_nvenc"s, @@ -561,11 +564,13 @@ namespace video { { "multipass"s, &config::video.nv_legacy.multipass }, { "aq"s, &config::video.nv_legacy.aq }, }, - // SDR-specific options { + // SDR-specific options { "profile"s, (int) nv::profile_h264_e::high }, }, {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "h264_nvenc"s, @@ -581,6 +586,7 @@ namespace video { AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_QSV, AV_PIX_FMT_QSV, AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + AV_PIX_FMT_VUYX, AV_PIX_FMT_XV30, dxgi_init_avcodec_hardware_input_buffer), { // Common options @@ -591,12 +597,23 @@ namespace video { { "low_delay_brc"s, 1 }, { "low_power"s, 1 }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options - {}, + { + // SDR-specific options + { "profile"s, (int) qsv::profile_av1_e::main }, + }, + { + // HDR-specific options + { "profile"s, (int) qsv::profile_av1_e::main }, + }, + { + // YUV444 SDR-specific options + { "profile"s, (int) qsv::profile_av1_e::high }, + }, + { + // YUV444 HDR-specific options + { "profile"s, (int) qsv::profile_av1_e::high }, + }, + {}, // Fallback options std::nullopt, // QP rate control fallback "av1_qsv"s, }, @@ -611,16 +628,24 @@ namespace video { { "recovery_point_sei"s, 0 }, { "pic_timing_sei"s, 0 }, }, - // SDR-specific options { + // SDR-specific options { "profile"s, (int) qsv::profile_hevc_e::main }, }, - // HDR-specific options { + // HDR-specific options { "profile"s, (int) qsv::profile_hevc_e::main_10 }, }, - // Fallback options { + // YUV444 SDR-specific options + { "profile"s, (int) qsv::profile_hevc_e::rext }, + }, + { + // YUV444 HDR-specific options + { "profile"s, (int) qsv::profile_hevc_e::rext }, + }, + { + // Fallback options { "low_power"s, []() { return config::video.qsv.qsv_slow_hevc ? 0 : 1; } }, }, std::nullopt, // QP rate control fallback @@ -640,20 +665,24 @@ namespace video { { "pic_timing_sei"s, 0 }, { "max_dec_frame_buffering"s, 1 }, }, - // SDR-specific options { + // SDR-specific options { "profile"s, (int) qsv::profile_h264_e::high }, }, - // HDR-specific options - {}, - // Fallback options + {}, // HDR-specific options { + // YUV444 SDR-specific options + { "profile"s, (int) qsv::profile_h264_e::high_444p }, + }, + {}, // YUV444 HDR-specific options + { + // Fallback options { "low_power"s, 0 }, // Some old/low-end Intel GPUs don't support low power encoding }, std::nullopt, // QP rate control fallback "h264_qsv"s, }, - PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT + PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT | YUV444_SUPPORT }; encoder_t amdvce { @@ -662,6 +691,7 @@ namespace video { AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE, AV_PIX_FMT_D3D11, AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, dxgi_init_avcodec_hardware_input_buffer), { // Common options @@ -676,6 +706,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "av1_amf"s, @@ -698,6 +730,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "hevc_amf"s, @@ -716,12 +750,12 @@ namespace video { { "vbaq"s, &config::video.amd.amd_vbaq }, { "enforce_hrd"s, &config::video.amd.amd_enforce_hrd }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options { + // Fallback options { "usage"s, 2 /* AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY */ }, // Workaround for https://github.com/GPUOpen-LibrariesAndSDKs/AMF/issues/410 }, std::nullopt, // QP rate control fallback @@ -737,6 +771,7 @@ namespace video { AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10, nullptr), { // libsvtav1 takes different presets than libx264/libx265. @@ -749,6 +784,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options // QP rate control fallback @@ -776,6 +813,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "libx265"s, @@ -788,11 +827,13 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, // QP rate control fallback "libx264"s, }, - H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE + H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE | YUV444_SUPPORT }; #ifdef __linux__ @@ -802,6 +843,7 @@ namespace video { AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_NONE, AV_PIX_FMT_VAAPI, AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, vaapi_init_avcodec_hardware_input_buffer), { // Common options @@ -810,12 +852,12 @@ namespace video { { "async_depth"s, 1 }, { "idr_interval"s, std::numeric_limits::max() }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options { + // Fallback options { "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints }, std::make_optional("qp"s, &config::video.qp), @@ -829,12 +871,12 @@ namespace video { { "sei"s, 0 }, { "idr_interval"s, std::numeric_limits::max() }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options { + // Fallback options { "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints }, std::make_optional("qp"s, &config::video.qp), @@ -848,12 +890,12 @@ namespace video { { "sei"s, 0 }, { "idr_interval"s, std::numeric_limits::max() }, }, - // SDR-specific options - {}, - // HDR-specific options - {}, - // Fallback options + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options { + // Fallback options { "low_power"s, 0 }, // Not all VAAPI drivers expose LP entrypoints }, std::make_optional("qp"s, &config::video.qp), @@ -871,6 +913,7 @@ namespace video { AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_HWDEVICE_TYPE_NONE, AV_PIX_FMT_VIDEOTOOLBOX, AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, vt_init_avcodec_hardware_input_buffer), { // Common options @@ -882,6 +925,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, "av1_videotoolbox"s, @@ -896,6 +941,8 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options {}, // Fallback options std::nullopt, "hevc_videotoolbox"s, @@ -910,9 +957,12 @@ namespace video { }, {}, // SDR-specific options {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options { + // Fallback options { "flags"s, "-low_delay" }, - }, // Fallback options + }, std::nullopt, "h264_videotoolbox"s, }, @@ -941,6 +991,7 @@ namespace video { int active_hevc_mode; int active_av1_mode; bool last_encoder_probe_supported_ref_frames_invalidation = false; + std::array last_encoder_probe_supported_yuv444_for_codec = {}; void reset_display(std::shared_ptr &disp, const platf::mem_type_e &type, const std::string &display_name, const config_t &config) { @@ -1396,6 +1447,11 @@ namespace video { return nullptr; } + if (config.chromaSamplingType == 1 && !video_format[encoder_t::YUV444]) { + BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 not supported"sv; + return nullptr; + } + auto codec = avcodec_find_encoder_by_name(video_format.name.c_str()); if (!codec) { BOOST_LOG(error) << "Couldn't open ["sv << video_format.name << ']'; @@ -1404,7 +1460,11 @@ namespace video { } auto colorspace = encode_device->colorspace; - auto sw_fmt = (colorspace.bit_depth == 10) ? platform_formats->avcodec_pix_fmt_10bit : platform_formats->avcodec_pix_fmt_8bit; + auto sw_fmt = (colorspace.bit_depth == 8 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_8bit : + (colorspace.bit_depth == 8 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_8bit : + (colorspace.bit_depth == 10 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_10bit : + (colorspace.bit_depth == 10 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_10bit : + AV_PIX_FMT_NONE; // Allow up to 1 retry to apply the set of fallback options. // @@ -1421,16 +1481,25 @@ namespace video { switch (config.videoFormat) { case 0: - ctx->profile = FF_PROFILE_H264_HIGH; + // 10-bit h264 encoding is not supported by our streaming protocol + assert(!config.dynamicRange); + ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_H264_HIGH_444_PREDICTIVE : FF_PROFILE_H264_HIGH; break; case 1: - ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN; + if (config.chromaSamplingType == 1) { + // HEVC uses the same RExt profile for both 8 and 10 bit YUV 4:4:4 encoding + ctx->profile = FF_PROFILE_HEVC_REXT; + } + else { + ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN; + } break; case 2: // AV1 supports both 8 and 10 bit encoding with the same Main profile - ctx->profile = FF_PROFILE_AV1_MAIN; + // but YUV 4:4:4 sampling requires High profile + ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_AV1_HIGH : FF_PROFILE_AV1_MAIN; break; } @@ -1561,6 +1630,11 @@ namespace video { for (auto &option : (config.dynamicRange ? video_format.hdr_options : video_format.sdr_options)) { handle_option(option); } + if (config.chromaSamplingType == 1) { + for (auto &option : (config.dynamicRange ? video_format.hdr444_options : video_format.sdr444_options)) { + handle_option(option); + } + } if (retries > 0) { for (auto &option : video_format.fallback_options) { handle_option(option); @@ -1856,7 +1930,24 @@ namespace video { std::unique_ptr result; auto colorspace = colorspace_from_client_config(config, disp.is_hdr()); - auto pix_fmt = (colorspace.bit_depth == 10) ? encoder.platform_formats->pix_fmt_10bit : encoder.platform_formats->pix_fmt_8bit; + + platf::pix_fmt_e pix_fmt; + if (config.chromaSamplingType == 1) { + // YUV 4:4:4 + if (!(encoder.flags & YUV444_SUPPORT)) { + // Encoder can't support YUV 4:4:4 regardless of hardware capabilities + return {}; + } + pix_fmt = (colorspace.bit_depth == 10) ? + encoder.platform_formats->pix_fmt_yuv444_10bit : + encoder.platform_formats->pix_fmt_yuv444_8bit; + } + else { + // YUV 4:2:0 + pix_fmt = (colorspace.bit_depth == 10) ? + encoder.platform_formats->pix_fmt_10bit : + encoder.platform_formats->pix_fmt_8bit; + } { auto encoder_name = config.videoFormat == 0 ? encoder.h264.name : @@ -2300,8 +2391,8 @@ namespace video { encoder.av1.capabilities.set(); // First, test encoder viability - config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0 }; - config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 }; + config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0, 0 }; + config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 0 }; // If the encoder isn't supported at all (not even H.264), bail early reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config_autoselect); @@ -2420,35 +2511,49 @@ namespace video { encoder.av1.capabilities.reset(); } - std::vector> configs { - { encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1 } }, - }; + // Test HDR and YUV444 support + { + // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth + if (encoder.flags & YUV444_SUPPORT) { + config_t config_h264_yuv444 { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 1 }; + encoder.h264[encoder_t::YUV444] = validate_config(disp, encoder, config_h264_yuv444); + } + else { + encoder.h264[encoder_t::YUV444] = false; + } - for (auto &[flag, config] : configs) { - auto h264 = config; - auto hevc = config; - auto av1 = config; - - h264.videoFormat = 0; - hevc.videoFormat = 1; - av1.videoFormat = 2; + const config_t generic_hdr_config = { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1, 0 }; // Reset the display since we're switching from SDR to HDR - reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config); + reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, generic_hdr_config); if (!disp) { return false; } + auto test_hdr_and_yuv444 = [&](auto &flag_map, auto video_format) { + auto config = generic_hdr_config; + config.videoFormat = video_format; + + if (!flag_map[encoder_t::PASSED]) return; + + // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported. + config.chromaSamplingType = 1; + if ((encoder.flags & YUV444_SUPPORT) && validate_config(disp, encoder, config) >= 0) { + flag_map[encoder_t::DYNAMIC_RANGE] = true; + flag_map[encoder_t::YUV444] = true; + return; + } + + // Test 4:2:0 HDR + config.chromaSamplingType = 0; + flag_map[encoder_t::DYNAMIC_RANGE] = validate_config(disp, encoder, config) >= 0; + }; + // HDR is not supported with H.264. Don't bother even trying it. - encoder.h264[flag] = flag != encoder_t::DYNAMIC_RANGE && validate_config(disp, encoder, h264) >= 0; + encoder.h264[encoder_t::DYNAMIC_RANGE] = false; - if (encoder.hevc[encoder_t::PASSED]) { - encoder.hevc[flag] = validate_config(disp, encoder, hevc) >= 0; - } - - if (encoder.av1[encoder_t::PASSED]) { - encoder.av1[flag] = validate_config(disp, encoder, av1) >= 0; - } + test_hdr_and_yuv444(encoder.hevc, 1); + test_hdr_and_yuv444(encoder.av1, 2); } encoder.h264[encoder_t::VUI_PARAMETERS] = encoder.h264[encoder_t::VUI_PARAMETERS] && !config::sunshine.flags[config::flag::FORCE_VIDEO_HEADER_REPLACE]; @@ -2605,6 +2710,12 @@ namespace video { auto &encoder = *chosen_encoder; last_encoder_probe_supported_ref_frames_invalidation = (encoder.flags & REF_FRAMES_INVALIDATION); + last_encoder_probe_supported_yuv444_for_codec[0] = encoder.h264[encoder_t::PASSED] && + encoder.h264[encoder_t::YUV444]; + last_encoder_probe_supported_yuv444_for_codec[1] = encoder.hevc[encoder_t::PASSED] && + encoder.hevc[encoder_t::YUV444]; + last_encoder_probe_supported_yuv444_for_codec[2] = encoder.av1[encoder_t::PASSED] && + encoder.av1[encoder_t::YUV444]; BOOST_LOG(debug) << "------ h264 ------"sv; for (int x = 0; x < encoder_t::MAX_FLAGS; ++x) { @@ -2793,6 +2904,10 @@ namespace video { platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) { switch (fmt) { + case AV_PIX_FMT_VUYX: + return platf::pix_fmt_e::ayuv; + case AV_PIX_FMT_XV30: + return platf::pix_fmt_e::y410; case AV_PIX_FMT_YUV420P10: return platf::pix_fmt_e::yuv420p10; case AV_PIX_FMT_YUV420P: diff --git a/src/video.h b/src/video.h index b46517c8..0b1baac8 100644 --- a/src/video.h +++ b/src/video.h @@ -39,6 +39,7 @@ namespace video { virtual ~encoder_platform_formats_t() = default; platf::mem_type_e dev_type; platf::pix_fmt_e pix_fmt_8bit, pix_fmt_10bit; + platf::pix_fmt_e pix_fmt_yuv444_8bit, pix_fmt_yuv444_10bit; }; struct encoder_platform_formats_avcodec: encoder_platform_formats_t { @@ -50,21 +51,28 @@ namespace video { const AVPixelFormat &avcodec_dev_pix_fmt, const AVPixelFormat &avcodec_pix_fmt_8bit, const AVPixelFormat &avcodec_pix_fmt_10bit, + const AVPixelFormat &avcodec_pix_fmt_yuv444_8bit, + const AVPixelFormat &avcodec_pix_fmt_yuv444_10bit, const init_buffer_function_t &init_avcodec_hardware_input_buffer_function): avcodec_base_dev_type { avcodec_base_dev_type }, avcodec_derived_dev_type { avcodec_derived_dev_type }, avcodec_dev_pix_fmt { avcodec_dev_pix_fmt }, avcodec_pix_fmt_8bit { avcodec_pix_fmt_8bit }, avcodec_pix_fmt_10bit { avcodec_pix_fmt_10bit }, + avcodec_pix_fmt_yuv444_8bit { avcodec_pix_fmt_yuv444_8bit }, + avcodec_pix_fmt_yuv444_10bit { avcodec_pix_fmt_yuv444_10bit }, init_avcodec_hardware_input_buffer { init_avcodec_hardware_input_buffer_function } { dev_type = map_base_dev_type(avcodec_base_dev_type); pix_fmt_8bit = map_pix_fmt(avcodec_pix_fmt_8bit); pix_fmt_10bit = map_pix_fmt(avcodec_pix_fmt_10bit); + pix_fmt_yuv444_8bit = map_pix_fmt(avcodec_pix_fmt_yuv444_8bit); + pix_fmt_yuv444_10bit = map_pix_fmt(avcodec_pix_fmt_yuv444_10bit); } AVHWDeviceType avcodec_base_dev_type, avcodec_derived_dev_type; AVPixelFormat avcodec_dev_pix_fmt; AVPixelFormat avcodec_pix_fmt_8bit, avcodec_pix_fmt_10bit; + AVPixelFormat avcodec_pix_fmt_yuv444_8bit, avcodec_pix_fmt_yuv444_10bit; init_buffer_function_t init_avcodec_hardware_input_buffer; }; @@ -73,10 +81,14 @@ namespace video { encoder_platform_formats_nvenc( const platf::mem_type_e &dev_type, const platf::pix_fmt_e &pix_fmt_8bit, - const platf::pix_fmt_e &pix_fmt_10bit) { + const platf::pix_fmt_e &pix_fmt_10bit, + const platf::pix_fmt_e &pix_fmt_yuv444_8bit, + const platf::pix_fmt_e &pix_fmt_yuv444_10bit) { encoder_platform_formats_t::dev_type = dev_type; encoder_platform_formats_t::pix_fmt_8bit = pix_fmt_8bit; encoder_platform_formats_t::pix_fmt_10bit = pix_fmt_10bit; + encoder_platform_formats_t::pix_fmt_yuv444_8bit = pix_fmt_yuv444_8bit; + encoder_platform_formats_t::pix_fmt_yuv444_10bit = pix_fmt_yuv444_10bit; } }; @@ -87,6 +99,7 @@ namespace video { REF_FRAMES_RESTRICT, ///< Set maximum reference frames. CBR, ///< Some encoders don't support CBR, if not supported attempt constant quantization parameter instead. DYNAMIC_RANGE, ///< HDR support. + YUV444, ///< YUV 4:4:4 support. VUI_PARAMETERS, ///< AMD encoder with VAAPI doesn't add VUI parameters to SPS. MAX_FLAGS ///< Maximum number of flags. }; @@ -101,6 +114,7 @@ namespace video { _CONVERT(REF_FRAMES_RESTRICT); _CONVERT(CBR); _CONVERT(DYNAMIC_RANGE); + _CONVERT(YUV444); _CONVERT(VUI_PARAMETERS); _CONVERT(MAX_FLAGS); } @@ -126,6 +140,8 @@ namespace video { std::vector common_options; std::vector sdr_options; std::vector hdr_options; + std::vector sdr444_options; + std::vector hdr444_options; std::vector fallback_options; // QP option to set in the case that CBR/VBR is not supported @@ -312,11 +328,14 @@ namespace video { /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */ int dynamicRange; + + int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4 }; extern int active_hevc_mode; extern int active_av1_mode; extern bool last_encoder_probe_supported_ref_frames_invalidation; + extern std::array last_encoder_probe_supported_yuv444_for_codec; // 0 - H.264, 1 - HEVC, 2 - AV1 void capture( diff --git a/src/video_colorspace.cpp b/src/video_colorspace.cpp index 5f838052..7b1ddadf 100644 --- a/src/video_colorspace.cpp +++ b/src/video_colorspace.cpp @@ -182,4 +182,109 @@ namespace video { return result; } + const color_t * + new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace) { + constexpr auto generate_color_vectors = [](const sunshine_colorspace_t &colorspace) -> color_t { + double Kr, Kb; + switch (colorspace.colorspace) { + case colorspace_e::rec601: + Kr = 0.299; + Kb = 0.114; + break; + case colorspace_e::rec709: + default: + Kr = 0.2126; + Kb = 0.0722; + break; + case colorspace_e::bt2020: + case colorspace_e::bt2020sdr: + Kr = 0.2627; + Kb = 0.0593; + break; + } + double Kg = 1.0 - Kr - Kb; + + double y_mult, y_add; + double uv_mult, uv_add; + + // "Matrix coefficients" section of ITU-T H.273 + if (colorspace.full_range) { + y_mult = (1 << colorspace.bit_depth) - 1; + y_add = 0; + uv_mult = (1 << colorspace.bit_depth) - 1; + uv_add = (1 << (colorspace.bit_depth - 1)); + } + else { + y_mult = (1 << (colorspace.bit_depth - 8)) * 219; + y_add = (1 << (colorspace.bit_depth - 8)) * 16; + uv_mult = (1 << (colorspace.bit_depth - 8)) * 224; + uv_add = (1 << (colorspace.bit_depth - 8)) * 128; + } + + // For rounding + y_add += 0.5; + uv_add += 0.5; + + color_t color_vectors; + + color_vectors.color_vec_y[0] = Kr * y_mult; + color_vectors.color_vec_y[1] = Kg * y_mult; + color_vectors.color_vec_y[2] = Kb * y_mult; + color_vectors.color_vec_y[3] = y_add; + + color_vectors.color_vec_u[0] = -0.5 * Kr / (1.0 - Kb) * uv_mult; + color_vectors.color_vec_u[1] = -0.5 * Kg / (1.0 - Kb) * uv_mult; + color_vectors.color_vec_u[2] = 0.5 * uv_mult; + color_vectors.color_vec_u[3] = uv_add; + + color_vectors.color_vec_v[0] = 0.5 * uv_mult; + color_vectors.color_vec_v[1] = -0.5 * Kg / (1.0 - Kr) * uv_mult; + color_vectors.color_vec_v[2] = -0.5 * Kb / (1.0 - Kr) * uv_mult; + color_vectors.color_vec_v[3] = uv_add; + + // Unused + color_vectors.range_y[0] = 1; + color_vectors.range_y[1] = 0; + color_vectors.range_uv[0] = 1; + color_vectors.range_uv[1] = 0; + + return color_vectors; + }; + + static constexpr color_t colors[] = { + generate_color_vectors({ colorspace_e::rec601, false, 8 }), + generate_color_vectors({ colorspace_e::rec601, true, 8 }), + generate_color_vectors({ colorspace_e::rec601, false, 10 }), + generate_color_vectors({ colorspace_e::rec601, true, 10 }), + generate_color_vectors({ colorspace_e::rec709, false, 8 }), + generate_color_vectors({ colorspace_e::rec709, true, 8 }), + generate_color_vectors({ colorspace_e::rec709, false, 10 }), + generate_color_vectors({ colorspace_e::rec709, true, 10 }), + generate_color_vectors({ colorspace_e::bt2020, false, 8 }), + generate_color_vectors({ colorspace_e::bt2020, true, 8 }), + generate_color_vectors({ colorspace_e::bt2020, false, 10 }), + generate_color_vectors({ colorspace_e::bt2020, true, 10 }), + }; + + const color_t *result = nullptr; + + switch (colorspace.colorspace) { + case colorspace_e::rec601: + result = &colors[0]; + break; + case colorspace_e::rec709: + default: + result = &colors[4]; + break; + case colorspace_e::bt2020: + case colorspace_e::bt2020sdr: + result = &colors[8]; + break; + } + + if (colorspace.bit_depth == 10) result += 2; + if (colorspace.full_range) result += 1; + + return result; + } } // namespace video diff --git a/src/video_colorspace.h b/src/video_colorspace.h index f3f9f3f3..d6469f99 100644 --- a/src/video_colorspace.h +++ b/src/video_colorspace.h @@ -57,4 +57,17 @@ namespace video { const color_t * color_vectors_from_colorspace(colorspace_e colorspace, bool full_range); + /** + * @brief New version of `color_vectors_from_colorspace()` function that better adheres to the standards. + * Returned vectors are used to perform RGB->YUV conversion. + * Unlike its predecessor, color vectors will produce output in `UINT` range, not `UNORM` range. + * Input is still in `UNORM` range. Returned vectors won't modify color primaries and color + * transfer function. + * @param colorspace Targeted YUV colorspace. + * @return `const color_t*` that contains RGB->YUV transformation vectors. + * Components `range_y` and `range_uv` are there for backwards compatibility + * and can be ignored in the computation. + */ + const color_t * + new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace); } // namespace video diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl new file mode 100644 index 00000000..73c45e9b --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl @@ -0,0 +1,3 @@ +#include "include/convert_base.hlsl" + +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl new file mode 100644 index 00000000..820e5128 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl @@ -0,0 +1,3 @@ +#include "include/convert_linear_base.hlsl" + +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl new file mode 100644 index 00000000..33e48145 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl @@ -0,0 +1,10 @@ +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps); +} diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl new file mode 100644 index 00000000..b84c6617 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_base.hlsl" + +#define Y410 +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl new file mode 100644 index 00000000..f7dbbcb6 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_linear_base.hlsl" + +#define Y410 +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000..1682be7b --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define Y410 +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl new file mode 100644 index 00000000..d6cca979 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_base.hlsl" + +#define PLANAR_VIEWPORTS +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl new file mode 100644 index 00000000..46032651 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_linear_base.hlsl" + +#define PLANAR_VIEWPORTS +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000..d390e81e --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define PLANAR_VIEWPORTS +#include "include/convert_yuv444_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl new file mode 100644 index 00000000..566da5d8 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl @@ -0,0 +1,33 @@ +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +cbuffer color_matrix_cbuffer : register(b3) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; + float2 range_y; + float2 range_uv; +}; + +#define PLANAR_VIEWPORTS +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps); + + output.viewport = vertex_id / 3; + + if (output.viewport == 0) { + output.color_vec = color_vec_y; + } + else if (output.viewport == 1) { + output.color_vec = color_vec_u; + } + else { + output.color_vec = color_vec_v; + } + + return output; +} diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl index c04fad39..c39e7c6f 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl @@ -19,7 +19,7 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_ output.viewpoint_pos = float4(-1, 3, 0, 1); tex_coord = float2(0, -1); } - else if (vertex_id == 2) { + else { output.viewpoint_pos = float4(3, -1, 0, 1); tex_coord = float2(2, 1); } diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl index 9e4b28f1..cf755c5a 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl @@ -9,4 +9,8 @@ struct vertex_t #else float2 tex_coord : TEXCOORD; #endif +#ifdef PLANAR_VIEWPORTS + uint viewport : SV_ViewportArrayIndex; + nointerpolation float4 color_vec : COLOR0; +#endif }; diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl new file mode 100644 index 00000000..2aa8401e --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl @@ -0,0 +1,39 @@ +Texture2D image : register(t0); +SamplerState def_sampler : register(s0); + +#ifndef PLANAR_VIEWPORTS +cbuffer color_matrix_cbuffer : register(b0) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; + float2 range_y; + float2 range_uv; +}; +#endif + +#include "include/base_vs_types.hlsl" + +#ifdef PLANAR_VIEWPORTS +uint main_ps(vertex_t input) : SV_Target +#else +uint4 main_ps(vertex_t input) : SV_Target +#endif +{ + float3 rgb = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_coord, 0).rgb); + +#ifdef PLANAR_VIEWPORTS + // Planar R16, 10 most significant bits store the value + return uint(dot(input.color_vec.xyz, rgb) + input.color_vec.w) << 6; +#else + float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; + float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; + float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + +#ifdef Y410 + return uint4(u, y, v, 0); +#else + // AYUV + return uint4(v, u, y, 0); +#endif +#endif +} From 727ea9037e2e92f56ca2bc0fc610334e9490d32d Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:18:35 -0400 Subject: [PATCH 07/12] docs(docker): add ipc note (#3034) --- DOCKER_README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DOCKER_README.md b/DOCKER_README.md index be27e2fa..2ff7bbf8 100644 --- a/DOCKER_README.md +++ b/DOCKER_README.md @@ -54,6 +54,7 @@ docker run -d \ --device /dev/dri/ \ --name= \ --restart=unless-stopped \ + --ipc=host \ -e PUID= \ -e PGID= \ -e TZ= \ @@ -80,6 +81,7 @@ services: - PUID= - PGID= - TZ= + ipc: host ports: - "47984-47990:47984-47990/tcp" - "48010:48010" @@ -125,6 +127,9 @@ port `47990` (e.g. `http://:47990`). The internal port must be `47990`, | `-e PGID=` | Group ID | `1001` | False | | `-e TZ=` | Lookup [TZ value][1] | `America/New_York` | False | +For additional configuration, it is recommended to reference the *Games on Whales* +[sunshine config](https://github.com/games-on-whales/gow/blob/2e442292d79b9d996f886b8a03d22b6eb6bddf7b/compose/streamers/sunshine.yml). + [1]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones #### User / Group Identifiers: From 7c4c905f0433c51a7f7d04ff9bfe19215b508898 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:41:31 -0400 Subject: [PATCH 08/12] build(macos): fix error in macos-13 build (#3022) Co-authored-by: Vithorio Polten --- .github/workflows/CI.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1ed66a7b..4ac6c217 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -484,6 +484,21 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Fix python + if: matrix.os_name == 'macos' && matrix.os_version == '13' + run: | + rm '/usr/local/bin/2to3' + rm '/usr/local/bin/2to3-3.12' + rm '/usr/local/bin/idle3' + rm '/usr/local/bin/idle3.12' + rm '/usr/local/bin/pydoc3' + rm '/usr/local/bin/pydoc3.12' + rm '/usr/local/bin/python3' + rm '/usr/local/bin/python3-config' + rm '/usr/local/bin/python3.12' + rm '/usr/local/bin/python3.12-config' + brew install python + - name: Configure formula run: | # variables for formula From c8d0d2b6157058a597ac34e85d3f58db661d64bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 19:32:32 -0400 Subject: [PATCH 09/12] build(deps): bump babel from 2.15.0 to 2.16.0 (#2998) Bumps [babel](https://github.com/python-babel/babel) from 2.15.0 to 2.16.0. - [Release notes](https://github.com/python-babel/babel/releases) - [Changelog](https://github.com/python-babel/babel/blob/master/CHANGES.rst) - [Commits](https://github.com/python-babel/babel/compare/v2.15.0...v2.16.0) --- updated-dependencies: - dependency-name: babel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 9cfd158f..cd2ef869 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,2 +1,2 @@ -Babel==2.15.0 +Babel==2.16.0 clang-format From 537e3e6935b60c375de602fed12cb7aacf99f762 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 18 Aug 2024 19:44:24 -0400 Subject: [PATCH 10/12] build(deps): bump third-party/tray from `d9f7e76` to `ebbd14f` (#3010) Bumps [third-party/tray](https://github.com/LizardByte/tray) from `d9f7e76` to `ebbd14f`. - [Commits](https://github.com/LizardByte/tray/compare/d9f7e768cd74390a3b7e68ceed6f9c1a05db9f08...ebbd14fe6af30e61ddbb710251f612d32e371d98) --- updated-dependencies: - dependency-name: third-party/tray dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- third-party/tray | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third-party/tray b/third-party/tray index d9f7e768..ebbd14fe 160000 --- a/third-party/tray +++ b/third-party/tray @@ -1 +1 @@ -Subproject commit d9f7e768cd74390a3b7e68ceed6f9c1a05db9f08 +Subproject commit ebbd14fe6af30e61ddbb710251f612d32e371d98 From 170f4dd181c6dc57dd14da8ae575c92e429d9997 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:30:13 +0300 Subject: [PATCH 11/12] fix(win/qsv): skip unsupported 4:4:4 codecs (#3029) --- src/platform/windows/display_vram.cpp | 7 +++ src/video.cpp | 29 ++++++++----- src/video.h | 61 +++++++++++++++++---------- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index ed88e8d5..96ddff84 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -1871,6 +1871,13 @@ namespace platf::dxgi { if (!boost::algorithm::ends_with(name, "_qsv")) { return false; } + if (config.chromaSamplingType == 1) { + if (config.videoFormat == 0 || config.videoFormat == 2) { + // QSV doesn't support 4:4:4 in H.264 or AV1 + return false; + } + // TODO: Blacklist HEVC 4:4:4 based on adapter model + } } else if (adapter_desc.VendorId == 0x10de) { // Nvidia // If it's not an NVENC encoder, it's not compatible with an Nvidia GPU diff --git a/src/video.cpp b/src/video.cpp index 6827b6c7..8c5829a2 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -1434,9 +1434,7 @@ namespace video { bool hardware = platform_formats->avcodec_base_dev_type != AV_HWDEVICE_TYPE_NONE; - auto &video_format = config.videoFormat == 0 ? encoder.h264 : - config.videoFormat == 1 ? encoder.hevc : - encoder.av1; + auto &video_format = encoder.codec_from_config(config); if (!video_format[encoder_t::PASSED] || !disp->is_codec_supported(video_format.name, config)) { BOOST_LOG(error) << encoder.name << ": "sv << video_format.name << " mode not supported"sv; return nullptr; @@ -1950,10 +1948,7 @@ namespace video { } { - auto encoder_name = config.videoFormat == 0 ? encoder.h264.name : - config.videoFormat == 1 ? encoder.hevc.name : - config.videoFormat == 2 ? encoder.av1.name : - "unknown"; + auto encoder_name = encoder.codec_from_config(config).name; BOOST_LOG(info) << "Creating encoder " << logging::bracket(encoder_name); @@ -2516,7 +2511,8 @@ namespace video { // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth if (encoder.flags & YUV444_SUPPORT) { config_t config_h264_yuv444 { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 1 }; - encoder.h264[encoder_t::YUV444] = validate_config(disp, encoder, config_h264_yuv444); + encoder.h264[encoder_t::YUV444] = disp->is_codec_supported(encoder.h264.name, config_h264_yuv444) && + validate_config(disp, encoder, config_h264_yuv444) >= 0; } else { encoder.h264[encoder_t::YUV444] = false; @@ -2536,17 +2532,30 @@ namespace video { if (!flag_map[encoder_t::PASSED]) return; + auto encoder_codec_name = encoder.codec_from_config(config).name; + // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported. config.chromaSamplingType = 1; - if ((encoder.flags & YUV444_SUPPORT) && validate_config(disp, encoder, config) >= 0) { + if ((encoder.flags & YUV444_SUPPORT) && + disp->is_codec_supported(encoder_codec_name, config) && + validate_config(disp, encoder, config) >= 0) { flag_map[encoder_t::DYNAMIC_RANGE] = true; flag_map[encoder_t::YUV444] = true; return; } + else { + flag_map[encoder_t::YUV444] = false; + } // Test 4:2:0 HDR config.chromaSamplingType = 0; - flag_map[encoder_t::DYNAMIC_RANGE] = validate_config(disp, encoder, config) >= 0; + if (disp->is_codec_supported(encoder_codec_name, config) && + validate_config(disp, encoder, config) >= 0) { + flag_map[encoder_t::DYNAMIC_RANGE] = true; + } + else { + flag_map[encoder_t::DYNAMIC_RANGE] = false; + } }; // HDR is not supported with H.264. Don't bother even trying it. diff --git a/src/video.h b/src/video.h index 0b1baac8..6a50b2e3 100644 --- a/src/video.h +++ b/src/video.h @@ -17,6 +17,29 @@ extern "C" { struct AVPacket; namespace video { + /* Encoding configuration requested by remote client */ + struct config_t { + int width; // Video width in pixels + int height; // Video height in pixels + int framerate; // Requested framerate, used in individual frame bitrate budget calculation + int bitrate; // Video bitrate in kilobits (1000 bits) for requested framerate + int slicesPerFrame; // Number of slices per frame + int numRefFrames; // Max number of reference frames + + /* Requested color range and SDR encoding colorspace, HDR encoding colorspace is always BT.2020+ST2084 + Color range (encoderCscMode & 0x1) : 0 - limited, 1 - full + SDR encoding colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */ + int encoderCscMode; + + int videoFormat; // 0 - H.264, 1 - HEVC, 2 - AV1 + + /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit + HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */ + int dynamicRange; + + int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4 + }; + platf::mem_type_e map_base_dev_type(AVHWDeviceType type); platf::pix_fmt_e @@ -163,6 +186,21 @@ namespace video { } } av1, hevc, h264; + const codec_t & + codec_from_config(const config_t &config) const { + switch (config.videoFormat) { + default: + BOOST_LOG(error) << "Unknown video format " << config.videoFormat << ", falling back to H.264"; + // fallthrough + case 0: + return h264; + case 1: + return hevc; + case 2: + return av1; + } + } + uint32_t flags; }; @@ -309,29 +347,6 @@ namespace video { using hdr_info_t = std::unique_ptr; - /* Encoding configuration requested by remote client */ - struct config_t { - int width; // Video width in pixels - int height; // Video height in pixels - int framerate; // Requested framerate, used in individual frame bitrate budget calculation - int bitrate; // Video bitrate in kilobits (1000 bits) for requested framerate - int slicesPerFrame; // Number of slices per frame - int numRefFrames; // Max number of reference frames - - /* Requested color range and SDR encoding colorspace, HDR encoding colorspace is always BT.2020+ST2084 - Color range (encoderCscMode & 0x1) : 0 - limited, 1 - full - SDR encoding colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */ - int encoderCscMode; - - int videoFormat; // 0 - H.264, 1 - HEVC, 2 - AV1 - - /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit - HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */ - int dynamicRange; - - int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4 - }; - extern int active_hevc_mode; extern int active_av1_mode; extern bool last_encoder_probe_supported_ref_frames_invalidation; From 17c4b26af0709b5414cc32546dd687beb0e1257c Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Mon, 19 Aug 2024 08:29:49 -0500 Subject: [PATCH 12/12] fix(packaging): apply udev rules for uhid (#3041) --- packaging/linux/AppImage/AppRun | 1 + packaging/linux/Arch/sunshine.install | 2 ++ src_assets/linux/misc/postinst | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packaging/linux/AppImage/AppRun b/packaging/linux/AppImage/AppRun index 404704c3..e90ee3a4 100644 --- a/packaging/linux/AppImage/AppRun +++ b/packaging/linux/AppImage/AppRun @@ -49,6 +49,7 @@ function install() { cat "$SUNSHINE_SHARE_HERE/udev/rules.d/60-sunshine.rules" | sudo tee /etc/udev/rules.d/60-sunshine.rules sudo udevadm control --reload-rules sudo udevadm trigger --property-match=DEVNAME=/dev/uinput + sudo udevadm trigger --property-match=DEVNAME=/dev/uhid # sunshine service mkdir -p ~/.config/systemd/user diff --git a/packaging/linux/Arch/sunshine.install b/packaging/linux/Arch/sunshine.install index a8a700f1..6b274cdf 100644 --- a/packaging/linux/Arch/sunshine.install +++ b/packaging/linux/Arch/sunshine.install @@ -5,7 +5,9 @@ do_setcap() { do_udev_reload() { udevadm control --reload-rules udevadm trigger --property-match=DEVNAME=/dev/uinput + udevadm trigger --property-match=DEVNAME=/dev/uhid modprobe uinput || true + modprobe uhid || true } post_install() { diff --git a/src_assets/linux/misc/postinst b/src_assets/linux/misc/postinst index e406c762..47deb784 100644 --- a/src_assets/linux/misc/postinst +++ b/src_assets/linux/misc/postinst @@ -8,9 +8,10 @@ if [ -x "$path_to_setcap" ] ; then $path_to_setcap cap_sys_admin+p $path_to_sunshine fi -# Trigger udev rule reload for /dev/uinput +# Trigger udev rule reload for /dev/uinput and /dev/uhid path_to_udevadm=$(which udevadm) if [ -x "$path_to_udevadm" ] ; then $path_to_udevadm control --reload-rules $path_to_udevadm trigger --property-match=DEVNAME=/dev/uinput + $path_to_udevadm trigger --property-match=DEVNAME=/dev/uhid fi