From f9c885a414f92d8277337e2fd1283110a0e376bb Mon Sep 17 00:00:00 2001
From: ns6089 <61738816+ns6089@users.noreply.github.com>
Date: Fri, 9 Aug 2024 23:29:17 +0300
Subject: [PATCH 01/12] fix(linux/audio): don't set pulseaudio buffer size
 (#2999)

---
 src/platform/linux/audio.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp
index 32aa36a6..ff231707 100644
--- a/src/platform/linux/audio.cpp
+++ b/src/platform/linux/audio.cpp
@@ -81,9 +81,13 @@ namespace platf {
       channel = position_mapping[*mapping++];
     });
 
-    pa_buffer_attr pa_attr = {};
-    pa_attr.fragsize = frame_size * channels * sizeof(float);
-    pa_attr.maxlength = pa_attr.fragsize * 2;
+    pa_buffer_attr pa_attr = {
+      .maxlength = uint32_t(-1),
+      .tlength = uint32_t(-1),
+      .prebuf = uint32_t(-1),
+      .minreq = uint32_t(-1),
+      .fragsize = uint32_t(frame_size * channels * sizeof(float))
+    };
 
     int status;
 

From 299672795c5ef923e5a000d6e11f9421e3a43fb5 Mon Sep 17 00:00:00 2001
From: LizardByte-bot <108553330+LizardByte-bot@users.noreply.github.com>
Date: Mon, 12 Aug 2024 10:19:25 -0400
Subject: [PATCH 02/12] chore: update global workflows (#3003)

---
 .github/semantic.yml           | 14 ++++++++++++++
 .github/workflows/cpp-lint.yml |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 .github/semantic.yml

diff --git a/.github/semantic.yml b/.github/semantic.yml
new file mode 100644
index 00000000..b5eb70d0
--- /dev/null
+++ b/.github/semantic.yml
@@ -0,0 +1,14 @@
+---
+# This file is centrally managed in https://github.com/<organization>/.github/
+# Don't make changes to this file in this repo as they will be overwritten with changes made to the same file in
+# the above-mentioned repo.
+
+# This is the configuration file for https://github.com/Ezard/semantic-prs
+
+enabled: true
+titleOnly: true  # We only use the PR title as we squash and merge
+commitsOnly: false
+titleAndCommits: false
+anyCommit: false
+allowMergeCommits: false
+allowRevertCommits: false
diff --git a/.github/workflows/cpp-lint.yml b/.github/workflows/cpp-lint.yml
index 5d0df5ad..96cb1d06 100644
--- a/.github/workflows/cpp-lint.yml
+++ b/.github/workflows/cpp-lint.yml
@@ -55,7 +55,7 @@ jobs:
 
       - name: Clang format lint
         if: ${{ steps.find_files.outputs.found_files }}
-        uses: DoozyX/clang-format-lint-action@v0.17
+        uses: DoozyX/clang-format-lint-action@v0.18
         with:
           source: ${{ steps.find_files.outputs.found_files }}
           extensions: 'cpp,h,m,mm'

From a669b36c31fcb339572b5a4ff1741d42de0ab984 Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Thu, 15 Aug 2024 18:16:19 -0400
Subject: [PATCH 03/12] fix(build): update cuda compatibilities (#3018)

---
 cmake/compile_definitions/linux.cmake | 27 ++++++---------------------
 docs/getting_started.md               |  4 ++--
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/cmake/compile_definitions/linux.cmake b/cmake/compile_definitions/linux.cmake
index e07c2a55..d90f5dc6 100644
--- a/cmake/compile_definitions/linux.cmake
+++ b/cmake/compile_definitions/linux.cmake
@@ -24,57 +24,42 @@ if(${SUNSHINE_ENABLE_CUDA})
         # https://tech.amikelive.com/node-930/cuda-compatibility-of-nvidia-display-gpu-drivers/
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 6.5)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 10)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_10,code=sm_10")
         elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.5)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 50 52)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_50,code=sm_50")
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52")
         endif()
 
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 7.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 11)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_11,code=sm_11")
         elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 7.6)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 60 61 62)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_62,code=sm_62")
         endif()
 
+        # https://docs.nvidia.com/cuda/archive/9.2/cuda-compiler-driver-nvcc/index.html
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 20)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_20,code=sm_20")
         elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 70)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
         endif()
 
+        # https://docs.nvidia.com/cuda/archive/10.0/cuda-compiler-driver-nvcc/index.html
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0)
-            list(APPEND CMAKE_CUDA_ARCHITECTURES 75)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
+            list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75)
         endif()
 
+        # https://docs.nvidia.com/cuda/archive/11.0/cuda-compiler-driver-nvcc/index.html
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 30)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30")
         elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 80)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
-        endif()
-
-        if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.1)
-            list(APPEND CMAKE_CUDA_ARCHITECTURES 86)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
         endif()
 
+        # https://docs.nvidia.com/cuda/archive/11.8.0/cuda-compiler-driver-nvcc/index.html
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8)
-            list(APPEND CMAKE_CUDA_ARCHITECTURES 90)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_90,code=sm_90")
+            list(APPEND CMAKE_CUDA_ARCHITECTURES 86 87 89 90)
         endif()
 
         if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.0)
             list(APPEND CMAKE_CUDA_ARCHITECTURES 35)
-            # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")
         endif()
 
         # sort the architectures
diff --git a/docs/getting_started.md b/docs/getting_started.md
index c738992b..a9bd5561 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -43,7 +43,7 @@ CUDA is used for NVFBC capture.
     <tr>
         <td rowspan="3">11.8.0</td>
         <td rowspan="3">450.80.02</td>
-        <td rowspan="3">35;50;52;60;61;62;70;75;80;86;90</td>
+        <td rowspan="3">35;50;52;60;61;62;70;72;75;80;86;87;89;90</td>
         <td>sunshine.AppImage</td>
     </tr>
     <tr>
@@ -55,7 +55,7 @@ CUDA is used for NVFBC capture.
     <tr>
         <td rowspan="2">12.0.0</td>
         <td rowspan="4">525.60.13</td>
-        <td rowspan="4">50;52;60;61;62;70;75;80;86;90</td>
+        <td rowspan="4">50;52;60;61;62;70;72;75;80;86;87;89;90</td>
         <td>sunshine_{arch}.flatpak</td>
     </tr>
     <tr>

From c246c78b13f472e88cd863e0da09cdf2a7c970c6 Mon Sep 17 00:00:00 2001
From: Insanemal <insanemal@gmail.com>
Date: Fri, 16 Aug 2024 11:30:00 +1000
Subject: [PATCH 04/12] fix(linux): ensure NvFBC capture works after multiple
 sessions (#3020)

---
 src/platform/linux/cuda.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp
index b5374b18..5498d9a8 100644
--- a/src/platform/linux/cuda.cpp
+++ b/src/platform/linux/cuda.cpp
@@ -702,6 +702,7 @@ namespace cuda {
 
         NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
 
+        ctx_t ctx { handle };
         if (func.nvFBCDestroyHandle(handle, &params)) {
           BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle);
         }

From e8c837f4126f11d4cf073c1b6d23a5e27dfc8775 Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:19:57 -0400
Subject: [PATCH 05/12] ci(crowdin): customize PR title (#3031)

---
 crowdin.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crowdin.yml b/crowdin.yml
index 3dd19366..3a5e4281 100644
--- a/crowdin.yml
+++ b/crowdin.yml
@@ -2,6 +2,7 @@
 "base_path": "."
 "base_url": "https://api.crowdin.com"  # optional (for Crowdin Enterprise only)
 "preserve_hierarchy": true  # false will flatten tree on crowdin, but doesn't work with dest option
+"pull_request_title": "chore(l10n): update translations"
 "pull_request_labels": [
   "crowdin",
   "l10n"

From bfdfcebc80b26f532c4ff9875275f21ea5bc3f0f Mon Sep 17 00:00:00 2001
From: ns6089 <61738816+ns6089@users.noreply.github.com>
Date: Fri, 16 Aug 2024 20:41:27 +0300
Subject: [PATCH 06/12] feat(win/video): support native YUV 4:4:4 encoding
 (#2533)

---
 src/nvenc/nvenc_base.cpp                      |  91 ++--
 src/nvenc/nvenc_base.h                        |  83 +++-
 src/nvenc/nvenc_colorspace.h                  |   7 +-
 src/nvenc/nvenc_config.h                      |   5 +-
 src/nvenc/nvenc_d3d11.cpp                     |  64 +--
 src/nvenc/nvenc_d3d11.h                       |  30 +-
 src/nvenc/nvenc_d3d11_native.cpp              |  71 ++++
 src/nvenc/nvenc_d3d11_native.h                |  38 ++
 src/nvenc/nvenc_d3d11_on_cuda.cpp             | 267 ++++++++++++
 src/nvenc/nvenc_d3d11_on_cuda.h               |  96 +++++
 src/nvenc/nvenc_encoded_frame.h               |   7 +-
 src/nvenc/nvenc_utils.cpp                     |  14 +-
 src/nvenc/nvenc_utils.h                       |   2 +-
 src/nvhttp.cpp                                |  15 +
 src/platform/common.h                         |   6 +
 src/platform/windows/display_vram.cpp         | 394 +++++++++++++-----
 src/rtsp.cpp                                  |   2 +
 src/utility.h                                 |   6 +
 src/video.cpp                                 | 329 ++++++++++-----
 src/video.h                                   |  21 +-
 src/video_colorspace.cpp                      | 105 +++++
 src/video_colorspace.h                        |  13 +
 .../convert_yuv444_packed_ayuv_ps.hlsl        |   3 +
 .../convert_yuv444_packed_ayuv_ps_linear.hlsl |   3 +
 .../directx/convert_yuv444_packed_vs.hlsl     |  10 +
 .../convert_yuv444_packed_y410_ps.hlsl        |   4 +
 .../convert_yuv444_packed_y410_ps_linear.hlsl |   4 +
 ...4_packed_y410_ps_perceptual_quantizer.hlsl |   4 +
 .../directx/convert_yuv444_planar_ps.hlsl     |   4 +
 .../convert_yuv444_planar_ps_linear.hlsl      |   4 +
 ...yuv444_planar_ps_perceptual_quantizer.hlsl |   4 +
 .../directx/convert_yuv444_planar_vs.hlsl     |  33 ++
 .../shaders/directx/include/base_vs.hlsl      |   2 +-
 .../directx/include/base_vs_types.hlsl        |   4 +
 .../include/convert_yuv444_ps_base.hlsl       |  39 ++
 35 files changed, 1454 insertions(+), 330 deletions(-)
 create mode 100644 src/nvenc/nvenc_d3d11_native.cpp
 create mode 100644 src/nvenc/nvenc_d3d11_native.h
 create mode 100644 src/nvenc/nvenc_d3d11_on_cuda.cpp
 create mode 100644 src/nvenc/nvenc_d3d11_on_cuda.h
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl
 create mode 100644 src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl

diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp
index c632c254..b69d6f26 100644
--- a/src/nvenc/nvenc_base.cpp
+++ b/src/nvenc/nvenc_base.cpp
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_base.cpp
- * @brief Definitions for base NVENC encoder.
+ * @brief Definitions for abstract platform-agnostic base of standalone NVENC encoder.
  */
 #include "nvenc_base.h"
 
@@ -85,9 +85,8 @@ namespace {
 
 namespace nvenc {
 
-  nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device):
-      device_type(device_type),
-      device(device) {
+  nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type):
+      device_type(device_type) {
   }
 
   nvenc_base::~nvenc_base() {
@@ -115,19 +114,19 @@ namespace nvenc {
     session_params.deviceType = device_type;
     session_params.apiVersion = minimum_api_version;
     if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) {
-      BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string;
       return false;
     }
 
     uint32_t encode_guid_count = 0;
     if (nvenc_failed(nvenc->nvEncGetEncodeGUIDCount(encoder, &encode_guid_count))) {
-      BOOST_LOG(error) << "NvEncGetEncodeGUIDCount failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDCount() failed: " << last_nvenc_error_string;
       return false;
     };
 
     std::vector<GUID> encode_guids(encode_guid_count);
     if (nvenc_failed(nvenc->nvEncGetEncodeGUIDs(encoder, encode_guids.data(), encode_guids.size(), &encode_guid_count))) {
-      BOOST_LOG(error) << "NvEncGetEncodeGUIDs failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncGetEncodeGUIDs() failed: " << last_nvenc_error_string;
       return false;
     }
 
@@ -176,7 +175,7 @@ namespace nvenc {
     };
 
     auto buffer_is_yuv444 = [&]() {
-      return buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+      return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
     };
 
     {
@@ -220,7 +219,7 @@ namespace nvenc {
 
     NV_ENC_PRESET_CONFIG preset_config = { min_struct_version(NV_ENC_PRESET_CONFIG_VER), { min_struct_version(NV_ENC_CONFIG_VER, 7, 8) } };
     if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) {
-      BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string;
       return false;
     }
 
@@ -228,7 +227,6 @@ namespace nvenc {
     enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
     enc_config.gopLength = NVENC_INFINITE_GOPLENGTH;
     enc_config.frameIntervalP = 1;
-    enc_config.rcParams.enableAQ = config.adaptive_quantization;
     enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
     enc_config.rcParams.zeroReorderDelay = 1;
     enc_config.rcParams.enableLookahead = 0;
@@ -282,7 +280,7 @@ namespace nvenc {
       }
     };
 
-    auto fill_h264_hevc_vui = [&colorspace](auto &vui_config) {
+    auto fill_h264_hevc_vui = [&](auto &vui_config) {
       vui_config.videoSignalTypePresentFlag = 1;
       vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED;
       vui_config.videoFullRangeFlag = colorspace.full_range;
@@ -290,7 +288,7 @@ namespace nvenc {
       vui_config.colourPrimaries = colorspace.primaries;
       vui_config.transferCharacteristics = colorspace.tranfer_function;
       vui_config.colourMatrix = colorspace.matrix;
-      vui_config.chromaSampleLocationFlag = 1;
+      vui_config.chromaSampleLocationFlag = buffer_is_yuv444() ? 0 : 1;
       vui_config.chromaSampleLocationTop = 0;
       vui_config.chromaSampleLocationBot = 0;
     };
@@ -331,7 +329,9 @@ namespace nvenc {
         auto &format_config = enc_config.encodeCodecConfig.av1Config;
         format_config.repeatSeqHdr = 1;
         format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH;
-        format_config.chromaFormatIDC = 1;  // YUV444 not supported by NVENC yet
+        if (buffer_is_yuv444()) {
+          format_config.chromaFormatIDC = 3;
+        }
         format_config.enableBitstreamPadding = config.insert_filler_data;
         if (buffer_is_10bit()) {
           format_config.inputPixelBitDepthMinus8 = 2;
@@ -341,7 +341,7 @@ namespace nvenc {
         format_config.transferCharacteristics = colorspace.tranfer_function;
         format_config.matrixCoefficients = colorspace.matrix;
         format_config.colorRange = colorspace.full_range;
-        format_config.chromaSamplePosition = 1;
+        format_config.chromaSamplePosition = buffer_is_yuv444() ? 0 : 1;
         set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numFwdRefs, 8);
         set_minqp_if_enabled(config.min_qp_av1);
 
@@ -358,7 +358,7 @@ namespace nvenc {
     init_params.encodeConfig = &enc_config;
 
     if (nvenc_failed(nvenc->nvEncInitializeEncoder(encoder, &init_params))) {
-      BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncInitializeEncoder() failed: " << last_nvenc_error_string;
       return false;
     }
 
@@ -366,14 +366,14 @@ namespace nvenc {
       NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
       event_params.completionEvent = async_event_handle;
       if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) {
-        BOOST_LOG(error) << "NvEncRegisterAsyncEvent failed: " << last_error_string;
+        BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string;
         return false;
       }
     }
 
     NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER) };
     if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) {
-      BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string;
       return false;
     }
     output_bitstream = create_bitstream_buffer.bitstreamBuffer;
@@ -388,8 +388,13 @@ namespace nvenc {
     }
 
     {
+      auto video_format_string = client_config.videoFormat == 0 ? "H.264 " :
+                                 client_config.videoFormat == 1 ? "HEVC " :
+                                 client_config.videoFormat == 2 ? "AV1 " :
+                                                                  " ";
       std::string extra;
       if (init_params.enableEncodeAsync) extra += " async";
+      if (buffer_is_yuv444()) extra += " yuv444";
       if (buffer_is_10bit()) extra += " 10-bit";
       if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass";
       if (config.vbv_percentage_increase > 0 && get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) extra += " vbv+" + std::to_string(config.vbv_percentage_increase);
@@ -398,7 +403,8 @@ namespace nvenc {
       if (enc_config.rcParams.enableAQ) extra += " spatial-aq";
       if (enc_config.rcParams.enableMinQP) extra += " qpmin=" + std::to_string(enc_config.rcParams.minQP.qpInterP);
       if (config.insert_filler_data) extra += " filler-data";
-      BOOST_LOG(info) << "NvEnc: created encoder " << quality_preset_string_from_guid(init_params.presetGUID) << extra;
+
+      BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra;
     }
 
     encoder_state = {};
@@ -409,20 +415,28 @@ namespace nvenc {
   void
   nvenc_base::destroy_encoder() {
     if (output_bitstream) {
-      nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream);
+      if (nvenc_failed(nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream))) {
+        BOOST_LOG(error) << "NvEnc: NvEncDestroyBitstreamBuffer() failed: " << last_nvenc_error_string;
+      }
       output_bitstream = nullptr;
     }
     if (encoder && async_event_handle) {
       NV_ENC_EVENT_PARAMS event_params = { min_struct_version(NV_ENC_EVENT_PARAMS_VER) };
       event_params.completionEvent = async_event_handle;
-      nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params);
+      if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) {
+        BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string;
+      }
     }
     if (registered_input_buffer) {
-      nvenc->nvEncUnregisterResource(encoder, registered_input_buffer);
+      if (nvenc_failed(nvenc->nvEncUnregisterResource(encoder, registered_input_buffer))) {
+        BOOST_LOG(error) << "NvEnc: NvEncUnregisterResource() failed: " << last_nvenc_error_string;
+      }
       registered_input_buffer = nullptr;
     }
     if (encoder) {
-      nvenc->nvEncDestroyEncoder(encoder);
+      if (nvenc_failed(nvenc->nvEncDestroyEncoder(encoder))) {
+        BOOST_LOG(error) << "NvEnc: NvEncDestroyEncoder() failed: " << last_nvenc_error_string;
+      }
       encoder = nullptr;
     }
 
@@ -439,14 +453,23 @@ namespace nvenc {
     assert(registered_input_buffer);
     assert(output_bitstream);
 
+    if (!synchronize_input_buffer()) {
+      BOOST_LOG(error) << "NvEnc: failed to synchronize input buffer";
+      return {};
+    }
+
     NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = { min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER) };
     mapped_input_buffer.registeredResource = registered_input_buffer;
 
     if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) {
-      BOOST_LOG(error) << "NvEncMapInputResource failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncMapInputResource() failed: " << last_nvenc_error_string;
       return {};
     }
-    auto unmap_guard = util::fail_guard([&] { nvenc->nvEncUnmapInputResource(encoder, &mapped_input_buffer); });
+    auto unmap_guard = util::fail_guard([&] {
+      if (nvenc_failed(nvenc->nvEncUnmapInputResource(encoder, mapped_input_buffer.mappedResource))) {
+        BOOST_LOG(error) << "NvEnc: NvEncUnmapInputResource() failed: " << last_nvenc_error_string;
+      }
+    });
 
     NV_ENC_PIC_PARAMS pic_params = { min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6) };
     pic_params.inputWidth = encoder_params.width;
@@ -460,7 +483,7 @@ namespace nvenc {
     pic_params.completionEvent = async_event_handle;
 
     if (nvenc_failed(nvenc->nvEncEncodePicture(encoder, &pic_params))) {
-      BOOST_LOG(error) << "NvEncEncodePicture failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncEncodePicture() failed: " << last_nvenc_error_string;
       return {};
     }
 
@@ -474,7 +497,7 @@ namespace nvenc {
     }
 
     if (nvenc_failed(nvenc->nvEncLockBitstream(encoder, &lock_bitstream))) {
-      BOOST_LOG(error) << "NvEncLockBitstream failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncLockBitstream() failed: " << last_nvenc_error_string;
       return {};
     }
 
@@ -498,7 +521,7 @@ namespace nvenc {
     }
 
     if (nvenc_failed(nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream))) {
-      BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << last_error_string;
+      BOOST_LOG(error) << "NvEnc: NvEncUnlockBitstream() failed: " << last_nvenc_error_string;
     }
 
     encoder_state.frame_size_logger.collect_and_log(encoded_frame.data.size() / 1000.);
@@ -535,7 +558,7 @@ namespace nvenc {
 
     for (auto i = first_frame; i <= last_frame; i++) {
       if (nvenc_failed(nvenc->nvEncInvalidateRefFrames(encoder, i))) {
-        BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << last_error_string;
+        BOOST_LOG(error) << "NvEnc: NvEncInvalidateRefFrames() " << i << " failed: " << last_nvenc_error_string;
         return false;
       }
     }
@@ -576,20 +599,22 @@ namespace nvenc {
         nvenc_status_case(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
         nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
         nvenc_status_case(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
-        // Newer versions of sdk may add more constants, look for them the end of NVENCSTATUS enum
+        // Newer versions of sdk may add more constants, look for them at the end of NVENCSTATUS enum
 #undef nvenc_status_case
         default:
           return std::to_string(status);
       }
     };
 
-    last_error_string.clear();
+    last_nvenc_error_string.clear();
     if (status != NV_ENC_SUCCESS) {
+      /* This API function gives broken strings more often than not
       if (nvenc && encoder) {
-        last_error_string = nvenc->nvEncGetLastErrorString(encoder);
-        if (!last_error_string.empty()) last_error_string += " ";
+        last_nvenc_error_string = nvenc->nvEncGetLastErrorString(encoder);
+        if (!last_nvenc_error_string.empty()) last_nvenc_error_string += " ";
       }
-      last_error_string += status_string(status);
+      */
+      last_nvenc_error_string += status_string(status);
       return true;
     }
 
diff --git a/src/nvenc/nvenc_base.h b/src/nvenc/nvenc_base.h
index 23976c01..c49aa401 100644
--- a/src/nvenc/nvenc_base.h
+++ b/src/nvenc/nvenc_base.h
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_base.h
- * @brief Declarations for base NVENC encoder.
+ * @brief Declarations for abstract platform-agnostic base of standalone NVENC encoder.
  */
 #pragma once
 
@@ -13,36 +13,98 @@
 
 #include <ffnvcodec/nvEncodeAPI.h>
 
+/**
+ * @brief Standalone NVENC encoder
+ */
 namespace nvenc {
 
+  /**
+   * @brief Abstract platform-agnostic base of standalone NVENC encoder.
+   *        Derived classes perform platform-specific operations.
+   */
   class nvenc_base {
   public:
-    nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device);
+    /**
+     * @param device_type Underlying device type used by derived class.
+     */
+    explicit nvenc_base(NV_ENC_DEVICE_TYPE device_type);
     virtual ~nvenc_base();
 
     nvenc_base(const nvenc_base &) = delete;
     nvenc_base &
     operator=(const nvenc_base &) = delete;
 
+    /**
+     * @brief Create the encoder.
+     * @param config NVENC encoder configuration.
+     * @param client_config Stream configuration requested by the client.
+     * @param colorspace YUV colorspace.
+     * @param buffer_format Platform-agnostic input surface format.
+     * @return `true` on success, `false` on error
+     */
     bool
     create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format);
 
+    /**
+     * @brief Destroy the encoder.
+     *        Derived classes classes call it in the destructor.
+     */
     void
     destroy_encoder();
 
+    /**
+     * @brief Encode the next frame using platform-specific input surface.
+     * @param frame_index Frame index that uniquely identifies the frame.
+     *        Afterwards serves as parameter for `invalidate_ref_frames()`.
+     *        No restrictions on the first frame index, but later frame indexes must be subsequent.
+     * @param force_idr Whether to encode frame as forced IDR.
+     * @return Encoded frame.
+     */
     nvenc_encoded_frame
     encode_frame(uint64_t frame_index, bool force_idr);
 
+    /**
+     * @brief Perform reference frame invalidation (RFI) procedure.
+     * @param first_frame First frame index of the invalidation range.
+     * @param last_frame Last frame index of the invalidation range.
+     * @return `true` on success, `false` on error.
+     *         After error next frame must be encoded with `force_idr = true`.
+     */
     bool
     invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame);
 
   protected:
+    /**
+     * @brief Required. Used for loading NvEnc library and setting `nvenc` variable with `NvEncodeAPICreateInstance()`.
+     *        Called during `create_encoder()` if `nvenc` variable is not initialized.
+     * @return `true` on success, `false` on error
+     */
     virtual bool
     init_library() = 0;
 
+    /**
+     * @brief Required. Used for creating outside-facing input surface,
+     *        registering this surface with `nvenc->nvEncRegisterResource()` and setting `registered_input_buffer` variable.
+     *        Called during `create_encoder()`.
+     * @return `true` on success, `false` on error
+     */
     virtual bool
     create_and_register_input_buffer() = 0;
 
+    /**
+     * @brief Optional. Override if you must perform additional operations on the registered input surface in the beginning of `encode_frame()`.
+     *        Typically used for interop copy.
+     * @return `true` on success, `false` on error
+     */
+    virtual bool
+    synchronize_input_buffer() { return true; }
+
+    /**
+     * @brief Optional. Override if you want to create encoder in async mode.
+     *        In this case must also set `async_event_handle` variable.
+     * @param timeout_ms Wait timeout in milliseconds
+     * @return `true` on success, `false` on timeout or error
+     */
     virtual bool
     wait_for_async_event(uint32_t timeout_ms) { return false; }
 
@@ -61,9 +123,6 @@ namespace nvenc {
     min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0);
 
     const NV_ENC_DEVICE_TYPE device_type;
-    void *const device;
-
-    std::unique_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc;
 
     void *encoder = nullptr;
 
@@ -75,11 +134,17 @@ namespace nvenc {
       bool rfi = false;
     } encoder_params;
 
-    // Derived classes set these variables
-    NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr;
-    void *async_event_handle = nullptr;
+    std::string last_nvenc_error_string;
 
-    std::string last_error_string;
+    // Derived classes set these variables
+    void *device = nullptr;  ///< Platform-specific handle of encoding device.
+                             ///< Should be set in constructor or `init_library()`.
+    std::shared_ptr<NV_ENCODE_API_FUNCTION_LIST> nvenc;  ///< Function pointers list produced by `NvEncodeAPICreateInstance()`.
+                                                         ///< Should be set in `init_library()`.
+    NV_ENC_REGISTERED_PTR registered_input_buffer = nullptr;  ///< Platform-specific input surface registered with `NvEncRegisterResource()`.
+                                                              ///< Should be set in `create_and_register_input_buffer()`.
+    void *async_event_handle = nullptr;  ///< (optional) Platform-specific handle of event object event.
+                                         ///< Can be set in constructor or `init_library()`, must override `wait_for_async_event()`.
 
   private:
     NV_ENC_OUTPUT_PTR output_bitstream = nullptr;
diff --git a/src/nvenc/nvenc_colorspace.h b/src/nvenc/nvenc_colorspace.h
index 3a37ae34..c9ed5193 100644
--- a/src/nvenc/nvenc_colorspace.h
+++ b/src/nvenc/nvenc_colorspace.h
@@ -1,16 +1,21 @@
 /**
  * @file src/nvenc/nvenc_colorspace.h
- * @brief Declarations for base NVENC colorspace.
+ * @brief Declarations for NVENC YUV colorspace.
  */
 #pragma once
 
 #include <ffnvcodec/nvEncodeAPI.h>
 
 namespace nvenc {
+
+  /**
+   * @brief YUV colorspace and color range.
+   */
   struct nvenc_colorspace_t {
     NV_ENC_VUI_COLOR_PRIMARIES primaries;
     NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function;
     NV_ENC_VUI_MATRIX_COEFFS matrix;
     bool full_range;
   };
+
 }  // namespace nvenc
diff --git a/src/nvenc/nvenc_config.h b/src/nvenc/nvenc_config.h
index 8fcd84a6..213a0d28 100644
--- a/src/nvenc/nvenc_config.h
+++ b/src/nvenc/nvenc_config.h
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_config.h
- * @brief Declarations for base NVENC configuration.
+ * @brief Declarations for NVENC encoder configuration.
  */
 #pragma once
 
@@ -12,6 +12,9 @@ namespace nvenc {
     full_resolution,  ///< Better overall statistics, slower and uses more extra vram
   };
 
+  /**
+   * @brief NVENC encoder configuration.
+   */
   struct nvenc_config {
     // Quality preset from 1 to 7, higher is slower
     int quality_preset = 1;
diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp
index 8a726084..7dd545b4 100644
--- a/src/nvenc/nvenc_d3d11.cpp
+++ b/src/nvenc/nvenc_d3d11.cpp
@@ -1,43 +1,29 @@
 /**
  * @file src/nvenc/nvenc_d3d11.cpp
- * @brief Definitions for base NVENC d3d11.
+ * @brief Definitions for abstract Direct3D11 NVENC encoder.
  */
 #include "src/logging.h"
 
 #ifdef _WIN32
   #include "nvenc_d3d11.h"
 
-  #include "nvenc_utils.h"
-
 namespace nvenc {
 
-  nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device):
-      nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device),
-      d3d_device(d3d_device) {
-  }
-
   nvenc_d3d11::~nvenc_d3d11() {
-    if (encoder) destroy_encoder();
-
     if (dll) {
       FreeLibrary(dll);
       dll = NULL;
     }
   }
 
-  ID3D11Texture2D *
-  nvenc_d3d11::get_input_texture() {
-    return d3d_input_texture.GetInterfacePtr();
-  }
-
   bool
   nvenc_d3d11::init_library() {
     if (dll) return true;
 
   #ifdef _WIN64
-    auto dll_name = "nvEncodeAPI64.dll";
+    constexpr auto dll_name = "nvEncodeAPI64.dll";
   #else
-    auto dll_name = "nvEncodeAPI.dll";
+    constexpr auto dll_name = "nvEncodeAPI.dll";
   #endif
 
     if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
@@ -45,7 +31,7 @@ namespace nvenc {
         auto new_nvenc = std::make_unique<NV_ENCODE_API_FUNCTION_LIST>();
         new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER);
         if (nvenc_failed(create_instance(new_nvenc.get()))) {
-          BOOST_LOG(error) << "NvEncodeAPICreateInstance failed: " << last_error_string;
+          BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string;
         }
         else {
           nvenc = std::move(new_nvenc);
@@ -53,11 +39,11 @@ namespace nvenc {
         }
       }
       else {
-        BOOST_LOG(error) << "No NvEncodeAPICreateInstance in " << dll_name;
+        BOOST_LOG(error) << "NvEnc: No NvEncodeAPICreateInstance() in " << dll_name;
       }
     }
     else {
-      BOOST_LOG(debug) << "Couldn't load NvEnc library " << dll_name;
+      BOOST_LOG(debug) << "NvEnc: Couldn't load NvEnc library " << dll_name;
     }
 
     if (dll) {
@@ -68,43 +54,5 @@ namespace nvenc {
     return false;
   }
 
-  bool
-  nvenc_d3d11::create_and_register_input_buffer() {
-    if (!d3d_input_texture) {
-      D3D11_TEXTURE2D_DESC desc = {};
-      desc.Width = encoder_params.width;
-      desc.Height = encoder_params.height;
-      desc.MipLevels = 1;
-      desc.ArraySize = 1;
-      desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
-      desc.SampleDesc.Count = 1;
-      desc.Usage = D3D11_USAGE_DEFAULT;
-      desc.BindFlags = D3D11_BIND_RENDER_TARGET;
-      if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
-        BOOST_LOG(error) << "NvEnc: couldn't create input texture";
-        return false;
-      }
-    }
-
-    if (!registered_input_buffer) {
-      NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
-      register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
-      register_resource.width = encoder_params.width;
-      register_resource.height = encoder_params.height;
-      register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
-      register_resource.bufferFormat = encoder_params.buffer_format;
-      register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
-
-      if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
-        BOOST_LOG(error) << "NvEncRegisterResource failed: " << last_error_string;
-        return false;
-      }
-
-      registered_input_buffer = register_resource.registeredResource;
-    }
-
-    return true;
-  }
-
 }  // namespace nvenc
 #endif
diff --git a/src/nvenc/nvenc_d3d11.h b/src/nvenc/nvenc_d3d11.h
index eac10165..2d4d4fe7 100644
--- a/src/nvenc/nvenc_d3d11.h
+++ b/src/nvenc/nvenc_d3d11.h
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_d3d11.h
- * @brief Declarations for base NVENC d3d11.
+ * @brief Declarations for abstract Direct3D11 NVENC encoder.
  */
 #pragma once
 #ifdef _WIN32
@@ -14,25 +14,33 @@ namespace nvenc {
 
   _COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device);
   _COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D);
+  _COM_SMARTPTR_TYPEDEF(IDXGIDevice, IID_IDXGIDevice);
+  _COM_SMARTPTR_TYPEDEF(IDXGIAdapter, IID_IDXGIAdapter);
 
-  class nvenc_d3d11 final: public nvenc_base {
+  /**
+   * @brief Abstract Direct3D11 NVENC encoder.
+   *        Encapsulates common code used by native and interop implementations.
+   */
+  class nvenc_d3d11: public nvenc_base {
   public:
-    nvenc_d3d11(ID3D11Device *d3d_device);
+    explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type):
+        nvenc_base(device_type) {}
+
     ~nvenc_d3d11();
 
-    ID3D11Texture2D *
-    get_input_texture();
+    /**
+     * @brief Get input surface texture.
+     * @return Input surface texture.
+     */
+    virtual ID3D11Texture2D *
+    get_input_texture() = 0;
 
-  private:
+  protected:
     bool
     init_library() override;
 
-    bool
-    create_and_register_input_buffer() override;
-
+  private:
     HMODULE dll = NULL;
-    const ID3D11DevicePtr d3d_device;
-    ID3D11Texture2DPtr d3d_input_texture;
   };
 
 }  // namespace nvenc
diff --git a/src/nvenc/nvenc_d3d11_native.cpp b/src/nvenc/nvenc_d3d11_native.cpp
new file mode 100644
index 00000000..a563b33d
--- /dev/null
+++ b/src/nvenc/nvenc_d3d11_native.cpp
@@ -0,0 +1,71 @@
+/**
+ * @file src/nvenc/nvenc_d3d11_native.cpp
+ * @brief Definitions for native Direct3D11 NVENC encoder.
+ */
+#ifdef _WIN32
+  #include "nvenc_d3d11_native.h"
+
+  #include "nvenc_utils.h"
+
+namespace nvenc {
+
+  nvenc_d3d11_native::nvenc_d3d11_native(ID3D11Device *d3d_device):
+      nvenc_d3d11(NV_ENC_DEVICE_TYPE_DIRECTX),
+      d3d_device(d3d_device) {
+    device = d3d_device;
+  }
+
+  nvenc_d3d11_native::~nvenc_d3d11_native() {
+    if (encoder) destroy_encoder();
+  }
+
+  ID3D11Texture2D *
+  nvenc_d3d11_native::get_input_texture() {
+    return d3d_input_texture.GetInterfacePtr();
+  }
+
+  bool
+  nvenc_d3d11_native::create_and_register_input_buffer() {
+    if (encoder_params.buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      BOOST_LOG(error) << "NvEnc: 10-bit 4:4:4 encoding is incompatible with D3D11 surface formats, use CUDA interop";
+      return false;
+    }
+
+    if (!d3d_input_texture) {
+      D3D11_TEXTURE2D_DESC desc = {};
+      desc.Width = encoder_params.width;
+      desc.Height = encoder_params.height;
+      desc.MipLevels = 1;
+      desc.ArraySize = 1;
+      desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
+      desc.SampleDesc.Count = 1;
+      desc.Usage = D3D11_USAGE_DEFAULT;
+      desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+      if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
+        BOOST_LOG(error) << "NvEnc: couldn't create input texture";
+        return false;
+      }
+    }
+
+    if (!registered_input_buffer) {
+      NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
+      register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
+      register_resource.width = encoder_params.width;
+      register_resource.height = encoder_params.height;
+      register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr();
+      register_resource.bufferFormat = encoder_params.buffer_format;
+      register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
+
+      if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
+        BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
+        return false;
+      }
+
+      registered_input_buffer = register_resource.registeredResource;
+    }
+
+    return true;
+  }
+
+}  // namespace nvenc
+#endif
diff --git a/src/nvenc/nvenc_d3d11_native.h b/src/nvenc/nvenc_d3d11_native.h
new file mode 100644
index 00000000..f9d49b18
--- /dev/null
+++ b/src/nvenc/nvenc_d3d11_native.h
@@ -0,0 +1,38 @@
+/**
+ * @file src/nvenc/nvenc_d3d11_native.h
+ * @brief Declarations for native Direct3D11 NVENC encoder.
+ */
+#pragma once
+#ifdef _WIN32
+
+  #include <comdef.h>
+  #include <d3d11.h>
+
+  #include "nvenc_d3d11.h"
+
+namespace nvenc {
+
+  /**
+   * @brief Native Direct3D11 NVENC encoder.
+   */
+  class nvenc_d3d11_native final: public nvenc_d3d11 {
+  public:
+    /**
+     * @param d3d_device Direct3D11 device used for encoding.
+     */
+    explicit nvenc_d3d11_native(ID3D11Device *d3d_device);
+    ~nvenc_d3d11_native();
+
+    ID3D11Texture2D *
+    get_input_texture() override;
+
+  private:
+    bool
+    create_and_register_input_buffer() override;
+
+    const ID3D11DevicePtr d3d_device;
+    ID3D11Texture2DPtr d3d_input_texture;
+  };
+
+}  // namespace nvenc
+#endif
diff --git a/src/nvenc/nvenc_d3d11_on_cuda.cpp b/src/nvenc/nvenc_d3d11_on_cuda.cpp
new file mode 100644
index 00000000..37fe8963
--- /dev/null
+++ b/src/nvenc/nvenc_d3d11_on_cuda.cpp
@@ -0,0 +1,267 @@
+/**
+ * @file src/nvenc/nvenc_d3d11_on_cuda.cpp
+ * @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
+ */
+#ifdef _WIN32
+  #include "nvenc_d3d11_on_cuda.h"
+
+  #include "nvenc_utils.h"
+
+namespace nvenc {
+
+  nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
+      nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
+      d3d_device(d3d_device) {
+  }
+
+  nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
+    if (encoder) destroy_encoder();
+
+    if (cuda_context) {
+      {
+        auto autopop_context = push_context();
+
+        if (cuda_d3d_input_texture) {
+          if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
+            BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
+          }
+          cuda_d3d_input_texture = nullptr;
+        }
+
+        if (cuda_surface) {
+          if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
+            BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
+          }
+          cuda_surface = 0;
+        }
+      }
+
+      if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
+        BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
+      }
+      cuda_context = nullptr;
+    }
+
+    if (cuda_functions.dll) {
+      FreeLibrary(cuda_functions.dll);
+      cuda_functions = {};
+    }
+  }
+
+  ID3D11Texture2D *
+  nvenc_d3d11_on_cuda::get_input_texture() {
+    return d3d_input_texture.GetInterfacePtr();
+  }
+
+  bool
+  nvenc_d3d11_on_cuda::init_library() {
+    if (!nvenc_d3d11::init_library()) return false;
+
+    constexpr auto dll_name = "nvcuda.dll";
+
+    if ((cuda_functions.dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
+      auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
+        location = (T) GetProcAddress(cuda_functions.dll, symbol);
+        return location != nullptr;
+      };
+      if (!load_function(cuda_functions.cuInit, "cuInit") ||
+          !load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
+          !load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
+          !load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
+          !load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
+          !load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
+          !load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
+          !load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
+          !load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
+          !load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
+          !load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
+          !load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
+          !load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
+          !load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
+        BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
+        FreeLibrary(cuda_functions.dll);
+        cuda_functions = {};
+      }
+    }
+    else {
+      BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
+    }
+
+    if (cuda_functions.dll) {
+      IDXGIDevicePtr dxgi_device;
+      IDXGIAdapterPtr dxgi_adapter;
+      if (d3d_device &&
+          SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
+          SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
+        CUdevice cuda_device;
+        if (cuda_succeeded(cuda_functions.cuInit(0)) &&
+            cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
+            cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
+            cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
+          device = cuda_context;
+        }
+        else {
+          BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
+        }
+      }
+      else {
+        BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
+      }
+    }
+
+    return device != nullptr;
+  }
+
+  bool
+  nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
+    if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
+      return false;
+    }
+
+    if (!d3d_input_texture) {
+      D3D11_TEXTURE2D_DESC desc = {};
+      desc.Width = encoder_params.width;
+      desc.Height = encoder_params.height * 3;  // Planar YUV
+      desc.MipLevels = 1;
+      desc.ArraySize = 1;
+      desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
+      desc.SampleDesc.Count = 1;
+      desc.Usage = D3D11_USAGE_DEFAULT;
+      desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+      if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
+        BOOST_LOG(error) << "NvEnc: couldn't create input texture";
+        return false;
+      }
+    }
+
+    {
+      auto autopop_context = push_context();
+      if (!autopop_context) return false;
+
+      if (!cuda_d3d_input_texture) {
+        if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
+              &cuda_d3d_input_texture,
+              d3d_input_texture,
+              CU_GRAPHICS_REGISTER_FLAGS_NONE))) {
+          BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
+          return false;
+        }
+      }
+
+      if (!cuda_surface) {
+        if (cuda_failed(cuda_functions.cuMemAllocPitch(
+              &cuda_surface,
+              &cuda_surface_pitch,
+              // Planar 16-bit YUV
+              encoder_params.width * 2,
+              encoder_params.height * 3, 16))) {
+          BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
+          return false;
+        }
+      }
+    }
+
+    if (!registered_input_buffer) {
+      NV_ENC_REGISTER_RESOURCE register_resource = { min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4) };
+      register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
+      register_resource.width = encoder_params.width;
+      register_resource.height = encoder_params.height;
+      register_resource.pitch = cuda_surface_pitch;
+      register_resource.resourceToRegister = (void *) cuda_surface;
+      register_resource.bufferFormat = encoder_params.buffer_format;
+      register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
+
+      if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, &register_resource))) {
+        BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
+        return false;
+      }
+
+      registered_input_buffer = register_resource.registeredResource;
+    }
+
+    return true;
+  }
+
+  bool
+  nvenc_d3d11_on_cuda::synchronize_input_buffer() {
+    auto autopop_context = push_context();
+    if (!autopop_context) return false;
+
+    if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
+      BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
+      return false;
+    }
+
+    auto unmap = [&]() -> bool {
+      if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
+        BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
+        return false;
+      }
+      return true;
+    };
+    auto unmap_guard = util::fail_guard(unmap);
+
+    CUarray input_texture_array;
+    if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
+      BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
+      return false;
+    }
+
+    {
+      CUDA_MEMCPY2D copy_params = {};
+      copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
+      copy_params.srcArray = input_texture_array;
+      copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+      copy_params.dstDevice = cuda_surface;
+      copy_params.dstPitch = cuda_surface_pitch;
+      // Planar 16-bit YUV
+      copy_params.WidthInBytes = encoder_params.width * 2;
+      copy_params.Height = encoder_params.height * 3;
+
+      if (cuda_failed(cuda_functions.cuMemcpy2D(&copy_params))) {
+        BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
+        return false;
+      }
+    }
+
+    unmap_guard.disable();
+    return unmap();
+  }
+
+  bool
+  nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
+    last_cuda_error = result;
+    return result == CUDA_SUCCESS;
+  }
+
+  bool
+  nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
+    last_cuda_error = result;
+    return result != CUDA_SUCCESS;
+  }
+
+  nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
+    if (pushed_context) {
+      CUcontext popped_context;
+      if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
+        BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
+      }
+    }
+  }
+
+  nvenc_d3d11_on_cuda::autopop_context
+  nvenc_d3d11_on_cuda::push_context() {
+    if (cuda_context &&
+        cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
+      return { *this, cuda_context };
+    }
+    else {
+      BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
+      return { *this, nullptr };
+    }
+  }
+
+}  // namespace nvenc
+#endif
diff --git a/src/nvenc/nvenc_d3d11_on_cuda.h b/src/nvenc/nvenc_d3d11_on_cuda.h
new file mode 100644
index 00000000..81114321
--- /dev/null
+++ b/src/nvenc/nvenc_d3d11_on_cuda.h
@@ -0,0 +1,96 @@
+/**
+ * @file src/nvenc/nvenc_d3d11_on_cuda.h
+ * @brief Declarations for CUDA NVENC encoder with Direct3D11 input surfaces.
+ */
+#pragma once
+#ifdef _WIN32
+
+  #include "nvenc_d3d11.h"
+
+  #include <ffnvcodec/dynlink_cuda.h>
+
+namespace nvenc {
+
+  /**
+   * @brief Interop Direct3D11 on CUDA NVENC encoder.
+   *        Input surface is Direct3D11, encoding is performed by CUDA.
+   */
+  class nvenc_d3d11_on_cuda final: public nvenc_d3d11 {
+  public:
+    /**
+     * @param d3d_device Direct3D11 device that will create input surface texture.
+     *                   CUDA encoding device will be derived from it.
+     */
+    explicit nvenc_d3d11_on_cuda(ID3D11Device *d3d_device);
+    ~nvenc_d3d11_on_cuda();
+
+    ID3D11Texture2D *
+    get_input_texture() override;
+
+  private:
+    bool
+    init_library() override;
+
+    bool
+    create_and_register_input_buffer() override;
+
+    bool
+    synchronize_input_buffer() override;
+
+    bool
+    cuda_succeeded(CUresult result);
+
+    bool
+    cuda_failed(CUresult result);
+
+    struct autopop_context {
+      autopop_context(nvenc_d3d11_on_cuda &parent, CUcontext pushed_context):
+          parent(parent),
+          pushed_context(pushed_context) {
+      }
+
+      ~autopop_context();
+
+      explicit
+      operator bool() const {
+        return pushed_context != nullptr;
+      }
+
+      nvenc_d3d11_on_cuda &parent;
+      CUcontext pushed_context = nullptr;
+    };
+
+    autopop_context
+    push_context();
+
+    HMODULE dll = NULL;
+    const ID3D11DevicePtr d3d_device;
+    ID3D11Texture2DPtr d3d_input_texture;
+
+    struct {
+      tcuInit *cuInit;
+      tcuD3D11GetDevice *cuD3D11GetDevice;
+      tcuCtxCreate_v2 *cuCtxCreate;
+      tcuCtxDestroy_v2 *cuCtxDestroy;
+      tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
+      tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
+      tcuMemAllocPitch_v2 *cuMemAllocPitch;
+      tcuMemFree_v2 *cuMemFree;
+      tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
+      tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+      tcuGraphicsMapResources *cuGraphicsMapResources;
+      tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+      tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+      tcuMemcpy2D_v2 *cuMemcpy2D;
+      HMODULE dll;
+    } cuda_functions = {};
+
+    CUresult last_cuda_error = CUDA_SUCCESS;
+    CUcontext cuda_context = nullptr;
+    CUgraphicsResource cuda_d3d_input_texture = nullptr;
+    CUdeviceptr cuda_surface = 0;
+    size_t cuda_surface_pitch = 0;
+  };
+
+}  // namespace nvenc
+#endif
diff --git a/src/nvenc/nvenc_encoded_frame.h b/src/nvenc/nvenc_encoded_frame.h
index 007345a0..46a8e46d 100644
--- a/src/nvenc/nvenc_encoded_frame.h
+++ b/src/nvenc/nvenc_encoded_frame.h
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_encoded_frame.h
- * @brief Declarations for base NVENC encoded frame.
+ * @brief Declarations for NVENC encoded frame.
  */
 #pragma once
 
@@ -8,10 +8,15 @@
 #include <vector>
 
 namespace nvenc {
+
+  /**
+   * @brief Encoded frame.
+   */
   struct nvenc_encoded_frame {
     std::vector<uint8_t> data;
     uint64_t frame_index = 0;
     bool idr = false;
     bool after_ref_frame_invalidation = false;
   };
+
 }  // namespace nvenc
diff --git a/src/nvenc/nvenc_utils.cpp b/src/nvenc/nvenc_utils.cpp
index ff274d53..26e2dc30 100644
--- a/src/nvenc/nvenc_utils.cpp
+++ b/src/nvenc/nvenc_utils.cpp
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_utils.cpp
- * @brief Definitions for base NVENC utilities.
+ * @brief Definitions for NVENC utilities.
  */
 #include <cassert>
 
@@ -18,6 +18,12 @@ namespace nvenc {
       case NV_ENC_BUFFER_FORMAT_NV12:
         return DXGI_FORMAT_NV12;
 
+      case NV_ENC_BUFFER_FORMAT_AYUV:
+        return DXGI_FORMAT_AYUV;
+
+      case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return DXGI_FORMAT_R16_UINT;
+
       default:
         return DXGI_FORMAT_UNKNOWN;
     }
@@ -33,6 +39,12 @@ namespace nvenc {
       case platf::pix_fmt_e::p010:
         return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
 
+      case platf::pix_fmt_e::ayuv:
+        return NV_ENC_BUFFER_FORMAT_AYUV;
+
+      case platf::pix_fmt_e::yuv444p16:
+        return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+
       default:
         return NV_ENC_BUFFER_FORMAT_UNDEFINED;
     }
diff --git a/src/nvenc/nvenc_utils.h b/src/nvenc/nvenc_utils.h
index 09c88ff5..db428676 100644
--- a/src/nvenc/nvenc_utils.h
+++ b/src/nvenc/nvenc_utils.h
@@ -1,6 +1,6 @@
 /**
  * @file src/nvenc/nvenc_utils.h
- * @brief Declarations for base NVENC utilities.
+ * @brief Declarations for NVENC utilities.
  */
 #pragma once
 
diff --git a/src/nvhttp.cpp b/src/nvhttp.cpp
index 085402ff..8ac56797 100644
--- a/src/nvhttp.cpp
+++ b/src/nvhttp.cpp
@@ -720,17 +720,32 @@ namespace nvhttp {
     }
 
     uint32_t codec_mode_flags = SCM_H264;
+    if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
+      codec_mode_flags |= SCM_H264_HIGH8_444;
+    }
     if (video::active_hevc_mode >= 2) {
       codec_mode_flags |= SCM_HEVC;
+      if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
+        codec_mode_flags |= SCM_HEVC_REXT8_444;
+      }
     }
     if (video::active_hevc_mode >= 3) {
       codec_mode_flags |= SCM_HEVC_MAIN10;
+      if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
+        codec_mode_flags |= SCM_HEVC_REXT10_444;
+      }
     }
     if (video::active_av1_mode >= 2) {
       codec_mode_flags |= SCM_AV1_MAIN8;
+      if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
+        codec_mode_flags |= SCM_AV1_HIGH8_444;
+      }
     }
     if (video::active_av1_mode >= 3) {
       codec_mode_flags |= SCM_AV1_MAIN10;
+      if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
+        codec_mode_flags |= SCM_AV1_HIGH10_444;
+      }
     }
     tree.put("root.ServerCodecModeSupport", codec_mode_flags);
 
diff --git a/src/platform/common.h b/src/platform/common.h
index 24cd658b..5009c183 100644
--- a/src/platform/common.h
+++ b/src/platform/common.h
@@ -209,6 +209,9 @@ namespace platf {
     yuv420p10,  ///< YUV 4:2:0 10-bit
     nv12,  ///< NV12
     p010,  ///< P010
+    ayuv,  ///< AYUV
+    yuv444p16,  ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4
+    y410,  ///< Y410
     unknown  ///< Unknown
   };
 
@@ -223,6 +226,9 @@ namespace platf {
       _CONVERT(yuv420p10);
       _CONVERT(nv12);
       _CONVERT(p010);
+      _CONVERT(ayuv);
+      _CONVERT(yuv444p16);
+      _CONVERT(y410);
       _CONVERT(unknown);
     }
 #undef _CONVERT
diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp
index 532b80d6..ed88e8d5 100644
--- a/src/platform/windows/display_vram.cpp
+++ b/src/platform/windows/display_vram.cpp
@@ -17,7 +17,8 @@ extern "C" {
 #include "src/config.h"
 #include "src/logging.h"
 #include "src/nvenc/nvenc_config.h"
-#include "src/nvenc/nvenc_d3d11.h"
+#include "src/nvenc/nvenc_d3d11_native.h"
+#include "src/nvenc/nvenc_d3d11_on_cuda.h"
 #include "src/nvenc/nvenc_utils.h"
 #include "src/video.h"
 
@@ -110,6 +111,16 @@ namespace platf::dxgi {
   blob_t convert_yuv420_planar_y_ps_linear_hlsl;
   blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
   blob_t convert_yuv420_planar_y_vs_hlsl;
+  blob_t convert_yuv444_packed_ayuv_ps_hlsl;
+  blob_t convert_yuv444_packed_ayuv_ps_linear_hlsl;
+  blob_t convert_yuv444_packed_vs_hlsl;
+  blob_t convert_yuv444_planar_ps_hlsl;
+  blob_t convert_yuv444_planar_ps_linear_hlsl;
+  blob_t convert_yuv444_planar_ps_perceptual_quantizer_hlsl;
+  blob_t convert_yuv444_packed_y410_ps_hlsl;
+  blob_t convert_yuv444_packed_y410_ps_linear_hlsl;
+  blob_t convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl;
+  blob_t convert_yuv444_planar_vs_hlsl;
   blob_t cursor_ps_hlsl;
   blob_t cursor_ps_normalize_white_hlsl;
   blob_t cursor_vs_hlsl;
@@ -402,18 +413,38 @@ namespace platf::dxgi {
           return -1;
         }
 
-        device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
-        device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
-        device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0);
-        device_ctx->RSSetViewports(1, &outY_view);
-        device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res);
-        device_ctx->Draw(3, 0);
+        auto draw = [&](auto &input, auto &y_or_yuv_viewports, auto &uv_viewport) {
+          device_ctx->PSSetShaderResources(0, 1, &input);
 
-        device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
-        device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
-        device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
-        device_ctx->RSSetViewports(1, &outUV_view);
-        device_ctx->Draw(3, 0);
+          // Draw Y/YUV
+          device_ctx->OMSetRenderTargets(1, &out_Y_or_YUV_rtv, nullptr);
+          device_ctx->VSSetShader(convert_Y_or_YUV_vs.get(), nullptr, 0);
+          device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_or_YUV_fp16_ps.get() : convert_Y_or_YUV_ps.get(), nullptr, 0);
+          auto viewport_count = (format == DXGI_FORMAT_R16_UINT) ? 3 : 1;
+          assert(viewport_count <= y_or_yuv_viewports.size());
+          device_ctx->RSSetViewports(viewport_count, y_or_yuv_viewports.data());
+          device_ctx->Draw(3 * viewport_count, 0);  // vertex shader will spread vertices across viewports
+
+          // Draw UV if needed
+          if (out_UV_rtv) {
+            assert(format == DXGI_FORMAT_NV12 || format == DXGI_FORMAT_P010);
+            device_ctx->OMSetRenderTargets(1, &out_UV_rtv, nullptr);
+            device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
+            device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
+            device_ctx->RSSetViewports(1, &uv_viewport);
+            device_ctx->Draw(3, 0);
+          }
+        };
+
+        // Clear render target view(s) once so that the aspect ratio mismatch "bars" appear black
+        if (!rtvs_cleared) {
+          auto black = create_black_texture_for_rtv_clear();
+          if (black) draw(black, out_Y_or_YUV_viewports_for_clear, out_UV_viewport_for_clear);
+          rtvs_cleared = true;
+        }
+
+        // Draw captured frame
+        draw(img_ctx.encoder_input_res, out_Y_or_YUV_viewports, out_UV_viewport);
 
         // Release encoder mutex to allow capture code to reuse this image
         img_ctx.encoder_mutex->ReleaseSync(0);
@@ -429,6 +460,12 @@ namespace platf::dxgi {
     apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
       auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);
 
+      if (format == DXGI_FORMAT_AYUV ||
+          format == DXGI_FORMAT_R16_UINT ||
+          format == DXGI_FORMAT_Y410) {
+        color_vectors = ::video::new_color_vectors_from_colorspace(colorspace);
+      }
+
       if (!color_vectors) {
         BOOST_LOG(error) << "No vector data for colorspace"sv;
         return;
@@ -440,6 +477,7 @@ namespace platf::dxgi {
         return;
       }
 
+      device_ctx->VSSetConstantBuffers(3, 1, &color_matrix);
       device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
       this->color_matrix = std::move(color_matrix);
     }
@@ -465,8 +503,20 @@ namespace platf::dxgi {
       auto offsetX = (out_width - out_width_f) / 2;
       auto offsetY = (out_height - out_height_f) / 2;
 
-      outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
-      outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
+      out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };  // Y plane
+      out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0];  // U plane
+      out_Y_or_YUV_viewports[1].TopLeftY += out_height;
+      out_Y_or_YUV_viewports[2] = out_Y_or_YUV_viewports[1];  // V plane
+      out_Y_or_YUV_viewports[2].TopLeftY += out_height;
+
+      out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f };  // Y plane
+      out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0];  // U plane
+      out_Y_or_YUV_viewports_for_clear[1].TopLeftY += out_height;
+      out_Y_or_YUV_viewports_for_clear[2] = out_Y_or_YUV_viewports_for_clear[1];  // V plane
+      out_Y_or_YUV_viewports_for_clear[2].TopLeftY += out_height;
+
+      out_UV_viewport = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
+      out_UV_viewport_for_clear = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f };
 
       float subsample_offset_in[16 / sizeof(float)] { 1.0f / (float) out_width_f, 1.0f / (float) out_height_f };  // aligned to 16-byte
       subsample_offset = make_buffer(device.get(), subsample_offset_in);
@@ -488,36 +538,106 @@ namespace platf::dxgi {
         device_ctx->VSSetConstantBuffers(1, 1, &rotation);
       }
 
-      D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc {
-        format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM,
-        D3D11_RTV_DIMENSION_TEXTURE2D
+      DXGI_FORMAT rtv_Y_or_YUV_format = DXGI_FORMAT_UNKNOWN;
+      DXGI_FORMAT rtv_UV_format = DXGI_FORMAT_UNKNOWN;
+      bool rtv_simple_clear = false;
+
+      switch (format) {
+        case DXGI_FORMAT_NV12:
+          rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UNORM;
+          rtv_UV_format = DXGI_FORMAT_R8G8_UNORM;
+          rtv_simple_clear = true;
+          break;
+
+        case DXGI_FORMAT_P010:
+          rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UNORM;
+          rtv_UV_format = DXGI_FORMAT_R16G16_UNORM;
+          rtv_simple_clear = true;
+          break;
+
+        case DXGI_FORMAT_AYUV:
+          rtv_Y_or_YUV_format = DXGI_FORMAT_R8G8B8A8_UINT;
+          break;
+
+        case DXGI_FORMAT_R16_UINT:
+          rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UINT;
+          break;
+
+        case DXGI_FORMAT_Y410:
+          rtv_Y_or_YUV_format = DXGI_FORMAT_R10G10B10A2_UINT;
+          break;
+
+        default:
+          BOOST_LOG(error) << "Unable to create render target views because of the unrecognized surface format";
+          return -1;
+      }
+
+      auto create_rtv = [&](auto &rt, DXGI_FORMAT rt_format) -> bool {
+        D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
+        rtv_desc.Format = rt_format;
+        rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
+
+        auto status = device->CreateRenderTargetView(output_texture.get(), &rtv_desc, &rt);
+        if (FAILED(status)) {
+          BOOST_LOG(error) << "Failed to create render target view: " << util::log_hex(status);
+          return false;
+        }
+
+        return true;
       };
 
-      auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
+      // Create Y/YUV render target view
+      if (!create_rtv(out_Y_or_YUV_rtv, rtv_Y_or_YUV_format)) return -1;
+
+      // Create UV render target view if needed
+      if (rtv_UV_format != DXGI_FORMAT_UNKNOWN && !create_rtv(out_UV_rtv, rtv_UV_format)) return -1;
+
+      if (rtv_simple_clear) {
+        // Clear the RTVs to ensure the aspect ratio padding is black
+        const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+        device_ctx->ClearRenderTargetView(out_Y_or_YUV_rtv.get(), y_black);
+        if (out_UV_rtv) {
+          const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
+          device_ctx->ClearRenderTargetView(out_UV_rtv.get(), uv_black);
+        }
+        rtvs_cleared = true;
       }
-
-      nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
-
-      status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
+      else {
+        // Can't use ClearRenderTargetView(), will clear on first convert()
+        rtvs_cleared = false;
       }
 
-      // Clear the RTVs to ensure the aspect ratio padding is black
-      const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
-      device_ctx->ClearRenderTargetView(nv12_Y_rt.get(), y_black);
-      const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
-      device_ctx->ClearRenderTargetView(nv12_UV_rt.get(), uv_black);
-
       return 0;
     }
 
     int
     init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
+      switch (pix_fmt) {
+        case pix_fmt_e::nv12:
+          format = DXGI_FORMAT_NV12;
+          break;
+
+        case pix_fmt_e::p010:
+          format = DXGI_FORMAT_P010;
+          break;
+
+        case pix_fmt_e::ayuv:
+          format = DXGI_FORMAT_AYUV;
+          break;
+
+        case pix_fmt_e::yuv444p16:
+          format = DXGI_FORMAT_R16_UINT;
+          break;
+
+        case pix_fmt_e::y410:
+          format = DXGI_FORMAT_Y410;
+          break;
+
+        default:
+          BOOST_LOG(error) << "D3D11 backend doesn't support pixel format: " << from_pix_fmt(pix_fmt);
+          return -1;
+      }
+
       D3D_FEATURE_LEVEL featureLevels[] {
         D3D_FEATURE_LEVEL_11_1,
         D3D_FEATURE_LEVEL_11_0,
@@ -556,61 +676,82 @@ namespace platf::dxgi {
         BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
       }
 
-      format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
-      status = device->CreateVertexShader(convert_yuv420_planar_y_vs_hlsl->GetBufferPointer(), convert_yuv420_planar_y_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
-      if (status) {
-        BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
+#define create_vertex_shader_helper(x, y)                                                                    \
+  if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
+    BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status);            \
+    return -1;                                                                                               \
+  }
+#define create_pixel_shader_helper(x, y)                                                                    \
+  if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
+    BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status);            \
+    return -1;                                                                                              \
+  }
 
-      status = device->CreateVertexShader(convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs);
-      if (status) {
-        BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
+      switch (format) {
+        case DXGI_FORMAT_NV12:
+          // Semi-planar 8-bit YUV 4:2:0
+          create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
+          create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
+          create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
+          create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
+          create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
+          create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
+          break;
 
-      // If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ.
-      if (format == DXGI_FORMAT_P010 && display->is_hdr()) {
-        status = device->CreatePixelShader(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
-        if (status) {
-          BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
+        case DXGI_FORMAT_P010:
+          // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
+          create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
+          create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
+          create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
+          create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
+          if (display->is_hdr()) {
+            create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
+            create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
+          }
+          else {
+            create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
+            create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
+          }
+          break;
+
+        case DXGI_FORMAT_R16_UINT:
+          // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
+          create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
+          create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
+          if (display->is_hdr()) {
+            create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
+          }
+          else {
+            create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
+          }
+          break;
+
+        case DXGI_FORMAT_AYUV:
+          // Packed 8-bit YUV 4:4:4
+          create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
+          create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
+          create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
+          break;
+
+        case DXGI_FORMAT_Y410:
+          // Packed 10-bit YUV 4:4:4
+          create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
+          create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
+          if (display->is_hdr()) {
+            create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
+          }
+          else {
+            create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
+          }
+          break;
+
+        default:
+          BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
           return -1;
-        }
-
-        status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
-        if (status) {
-          BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
-          return -1;
-        }
-      }
-      else {
-        // If the display is in Advanced Color mode, the desktop format will be scRGB FP16.
-        // scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders.
-        status = device->CreatePixelShader(convert_yuv420_planar_y_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps);
-        if (status) {
-          BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
-          return -1;
-        }
-
-        status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_linear_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps);
-        if (status) {
-          BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
-          return -1;
-        }
       }
 
-      // These shaders consume standard 8-bit sRGB input
-      status = device->CreatePixelShader(convert_yuv420_planar_y_ps_hlsl->GetBufferPointer(), convert_yuv420_planar_y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps);
-      if (status) {
-        BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
-
-      status = device->CreatePixelShader(convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferPointer(), convert_yuv420_packed_uv_type0_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps);
-      if (status) {
-        BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
+#undef create_vertex_shader_helper
+#undef create_pixel_shader_helper
 
       auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
       if (!default_color_vectors) {
@@ -623,6 +764,7 @@ namespace platf::dxgi {
         BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
         return -1;
       }
+      device_ctx->VSSetConstantBuffers(3, 1, &color_matrix);
       device_ctx->PSSetConstantBuffers(0, 1, &color_matrix);
 
       this->display = std::dynamic_pointer_cast<display_base_t>(display);
@@ -653,7 +795,7 @@ namespace platf::dxgi {
 
       device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
       device_ctx->PSSetSamplers(0, 1, &sampler_linear);
-      device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+      device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
 
       return 0;
     }
@@ -725,6 +867,41 @@ namespace platf::dxgi {
       return 0;
     }
 
+    shader_res_t
+    create_black_texture_for_rtv_clear() {
+      constexpr auto width = 32;
+      constexpr auto height = 32;
+
+      D3D11_TEXTURE2D_DESC texture_desc = {};
+      texture_desc.Width = width;
+      texture_desc.Height = height;
+      texture_desc.MipLevels = 1;
+      texture_desc.ArraySize = 1;
+      texture_desc.SampleDesc.Count = 1;
+      texture_desc.Usage = D3D11_USAGE_IMMUTABLE;
+      texture_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+      texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+
+      std::vector<uint8_t> mem(4 * width * height, 0);
+      D3D11_SUBRESOURCE_DATA texture_data = { mem.data(), 4 * width, 0 };
+
+      texture2d_t texture;
+      auto status = device->CreateTexture2D(&texture_desc, &texture_data, &texture);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Failed to create black texture: " << util::log_hex(status);
+        return {};
+      }
+
+      shader_res_t resource_view;
+      status = device->CreateShaderResourceView(texture.get(), nullptr, &resource_view);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Failed to create black texture resource view: " << util::log_hex(status);
+        return {};
+      }
+
+      return resource_view;
+    }
+
     ::video::color_t *color_p;
 
     buf_t subsample_offset;
@@ -733,8 +910,9 @@ namespace platf::dxgi {
     blend_t blend_disable;
     sampler_state_t sampler_linear;
 
-    render_target_t nv12_Y_rt;
-    render_target_t nv12_UV_rt;
+    render_target_t out_Y_or_YUV_rtv;
+    render_target_t out_UV_rtv;
+    bool rtvs_cleared = false;
 
     // d3d_img_t::id -> encoder_img_ctx_t
     // These store the encoder textures for each img_t that passes through
@@ -744,15 +922,16 @@ namespace platf::dxgi {
 
     std::shared_ptr<display_base_t> display;
 
+    vs_t convert_Y_or_YUV_vs;
+    ps_t convert_Y_or_YUV_ps;
+    ps_t convert_Y_or_YUV_fp16_ps;
+
     vs_t convert_UV_vs;
     ps_t convert_UV_ps;
     ps_t convert_UV_fp16_ps;
-    ps_t convert_Y_ps;
-    ps_t convert_Y_fp16_ps;
-    vs_t scene_vs;
 
-    D3D11_VIEWPORT outY_view;
-    D3D11_VIEWPORT outUV_view;
+    std::array<D3D11_VIEWPORT, 3> out_Y_or_YUV_viewports, out_Y_or_YUV_viewports_for_clear;
+    D3D11_VIEWPORT out_UV_viewport, out_UV_viewport_for_clear;
 
     DXGI_FORMAT format;
 
@@ -871,7 +1050,12 @@ namespace platf::dxgi {
 
       if (base.init(display, adapter_p, pix_fmt)) return false;
 
-      nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
+      if (pix_fmt == pix_fmt_e::yuv444p16) {
+        nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11_on_cuda>(base.device.get());
+      }
+      else {
+        nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11_native>(base.device.get());
+      }
       nvenc = nvenc_d3d.get();
 
       return true;
@@ -1409,7 +1593,7 @@ namespace platf::dxgi {
 
     device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu);
     device_ctx->PSSetSamplers(0, 1, &sampler_linear);
-    device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+    device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
 
     return 0;
   }
@@ -1703,20 +1887,10 @@ namespace platf::dxgi {
 
   std::unique_ptr<avcodec_encode_device_t>
   display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
-    if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
-      BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
-
-      return nullptr;
-    }
-
     auto device = std::make_unique<d3d_avcodec_encode_device_t>();
-
-    auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
-
-    if (ret) {
+    if (device->init(shared_from_this(), adapter.get(), pix_fmt) != 0) {
       return nullptr;
     }
-
     return device;
   }
 
@@ -1746,6 +1920,16 @@ namespace platf::dxgi {
     compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
     compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);
     compile_vertex_shader_helper(convert_yuv420_planar_y_vs);
+    compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps);
+    compile_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear);
+    compile_vertex_shader_helper(convert_yuv444_packed_vs);
+    compile_pixel_shader_helper(convert_yuv444_planar_ps);
+    compile_pixel_shader_helper(convert_yuv444_planar_ps_linear);
+    compile_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer);
+    compile_pixel_shader_helper(convert_yuv444_packed_y410_ps);
+    compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear);
+    compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer);
+    compile_vertex_shader_helper(convert_yuv444_planar_vs);
     compile_pixel_shader_helper(cursor_ps);
     compile_pixel_shader_helper(cursor_ps_normalize_white);
     compile_vertex_shader_helper(cursor_vs);
diff --git a/src/rtsp.cpp b/src/rtsp.cpp
index 3528c258..3f146937 100644
--- a/src/rtsp.cpp
+++ b/src/rtsp.cpp
@@ -978,6 +978,7 @@ namespace rtsp_stream {
     args.try_emplace("x-nv-aqos.qosTrafficType"sv, "4"sv);
     args.try_emplace("x-ml-video.configuredBitrateKbps"sv, "0"sv);
     args.try_emplace("x-ss-general.encryptionEnabled"sv, "0"sv);
+    args.try_emplace("x-ss-video[0].chromaSamplingType"sv, "0"sv);
 
     stream::config_t config;
 
@@ -1013,6 +1014,7 @@ namespace rtsp_stream {
       config.monitor.encoderCscMode = util::from_view(args.at("x-nv-video[0].encoderCscMode"sv));
       config.monitor.videoFormat = util::from_view(args.at("x-nv-vqos[0].bitStreamFormat"sv));
       config.monitor.dynamicRange = util::from_view(args.at("x-nv-video[0].dynamicRangeMode"sv));
+      config.monitor.chromaSamplingType = util::from_view(args.at("x-ss-video[0].chromaSamplingType"sv));
 
       configuredBitrateKbps = util::from_view(args.at("x-ml-video.configuredBitrateKbps"sv));
     }
diff --git a/src/utility.h b/src/utility.h
index c3b4f3d8..e9adefd8 100644
--- a/src/utility.h
+++ b/src/utility.h
@@ -267,6 +267,12 @@ namespace util {
     return Hex<T>(elem, rev);
   }
 
+  template <typename T>
+  std::string
+  log_hex(const T &value) {
+    return "0x" + Hex<T>(value, false).to_string();
+  }
+
   template <class It>
   std::string
   hex_vec(It begin, It end, bool rev = false) {
diff --git a/src/video.cpp b/src/video.cpp
index 908b7fa9..6827b6c7 100644
--- a/src/video.cpp
+++ b/src/video.cpp
@@ -53,31 +53,36 @@ namespace video {
   namespace nv {
 
     enum class profile_h264_e : int {
-      baseline,  ///< Baseline profile
-      main,  ///< Main profile
-      high,  ///< High profile
-      high_444p,  ///< High 4:4:4 Predictive profile
+      high = 2,  ///< High profile
+      high_444p = 3,  ///< High 4:4:4 Predictive profile
     };
 
     enum class profile_hevc_e : int {
-      main,  ///< Main profile
-      main_10,  ///< Main 10 profile
-      rext,  ///< Rext profile
+      main = 0,  ///< Main profile
+      main_10 = 1,  ///< Main 10 profile
+      rext = 2,  ///< Rext profile
     };
+
   }  // namespace nv
 
   namespace qsv {
 
     enum class profile_h264_e : int {
-      baseline = 66,  ///< Baseline profile
-      main = 77,  ///< Main profile
       high = 100,  ///< High profile
+      high_444p = 244,  ///< High 4:4:4 Predictive profile
     };
 
     enum class profile_hevc_e : int {
       main = 1,  ///< Main profile
       main_10 = 2,  ///< Main 10 profile
+      rext = 4,  ///< RExt profile
     };
+
+    enum class profile_av1_e : int {
+      main = 1,  ///< Main profile
+      high = 2,  ///< High profile
+    };
+
   }  // namespace qsv
 
   util::Either<avcodec_buffer_t, int>
@@ -274,6 +279,7 @@ namespace video {
     NO_RC_BUF_LIMIT = 1 << 7,  ///< Don't set rc_buffer_size
     REF_FRAMES_INVALIDATION = 1 << 8,  ///< Support reference frames invalidation
     ALWAYS_REPROBE = 1 << 9,  ///< This is an encoder of last resort and we want to aggressively probe for a better one
+    YUV444_SUPPORT = 1 << 10,  ///< Encoder may support 4:4:4 chroma sampling depending on hardware
   };
 
   class avcodec_encode_session_t: public encode_session_t {
@@ -447,44 +453,39 @@ namespace video {
     "nvenc"sv,
     std::make_unique<encoder_platform_formats_nvenc>(
       platf::mem_type_e::dxgi,
-      platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010),
+      platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010,
+      platf::pix_fmt_e::ayuv, platf::pix_fmt_e::yuv444p16),
     {
-      // Common options
-      {},
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
-      {},
+      {},  // Common options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
+      {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "av1_nvenc"s,
     },
     {
-      // Common options
-      {},
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
-      {},
+      {},  // Common options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
+      {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "hevc_nvenc"s,
     },
     {
-      // Common options
-      {},
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
-      {},
+      {},  // Common options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
+      {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "h264_nvenc"s,
     },
-    PARALLEL_ENCODING | REF_FRAMES_INVALIDATION  // flags
+    PARALLEL_ENCODING | REF_FRAMES_INVALIDATION | YUV444_SUPPORT  // flags
   };
 #elif !defined(__APPLE__)
   encoder_t nvenc {
@@ -498,6 +499,7 @@ namespace video {
       AV_PIX_FMT_CUDA,
   #endif
       AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
+      AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
   #ifdef _WIN32
       dxgi_init_avcodec_hardware_input_buffer
   #else
@@ -516,12 +518,11 @@ namespace video {
         { "multipass"s, &config::video.nv_legacy.multipass },
         { "aq"s, &config::video.nv_legacy.aq },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
-      {},
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
+      {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "av1_nvenc"s,
     },
@@ -537,14 +538,16 @@ namespace video {
         { "multipass"s, &config::video.nv_legacy.multipass },
         { "aq"s, &config::video.nv_legacy.aq },
       },
-      // SDR-specific options
       {
+        // SDR-specific options
         { "profile"s, (int) nv::profile_hevc_e::main },
       },
-      // HDR-specific options
       {
+        // HDR-specific options
         { "profile"s, (int) nv::profile_hevc_e::main_10 },
       },
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "hevc_nvenc"s,
@@ -561,11 +564,13 @@ namespace video {
         { "multipass"s, &config::video.nv_legacy.multipass },
         { "aq"s, &config::video.nv_legacy.aq },
       },
-      // SDR-specific options
       {
+        // SDR-specific options
         { "profile"s, (int) nv::profile_h264_e::high },
       },
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "h264_nvenc"s,
@@ -581,6 +586,7 @@ namespace video {
       AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_QSV,
       AV_PIX_FMT_QSV,
       AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
+      AV_PIX_FMT_VUYX, AV_PIX_FMT_XV30,
       dxgi_init_avcodec_hardware_input_buffer),
     {
       // Common options
@@ -591,12 +597,23 @@ namespace video {
         { "low_delay_brc"s, 1 },
         { "low_power"s, 1 },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
-      {},
+      {
+        // SDR-specific options
+        { "profile"s, (int) qsv::profile_av1_e::main },
+      },
+      {
+        // HDR-specific options
+        { "profile"s, (int) qsv::profile_av1_e::main },
+      },
+      {
+        // YUV444 SDR-specific options
+        { "profile"s, (int) qsv::profile_av1_e::high },
+      },
+      {
+        // YUV444 HDR-specific options
+        { "profile"s, (int) qsv::profile_av1_e::high },
+      },
+      {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "av1_qsv"s,
     },
@@ -611,16 +628,24 @@ namespace video {
         { "recovery_point_sei"s, 0 },
         { "pic_timing_sei"s, 0 },
       },
-      // SDR-specific options
       {
+        // SDR-specific options
         { "profile"s, (int) qsv::profile_hevc_e::main },
       },
-      // HDR-specific options
       {
+        // HDR-specific options
         { "profile"s, (int) qsv::profile_hevc_e::main_10 },
       },
-      // Fallback options
       {
+        // YUV444 SDR-specific options
+        { "profile"s, (int) qsv::profile_hevc_e::rext },
+      },
+      {
+        // YUV444 HDR-specific options
+        { "profile"s, (int) qsv::profile_hevc_e::rext },
+      },
+      {
+        // Fallback options
         { "low_power"s, []() { return config::video.qsv.qsv_slow_hevc ? 0 : 1; } },
       },
       std::nullopt,  // QP rate control fallback
@@ -640,20 +665,24 @@ namespace video {
         { "pic_timing_sei"s, 0 },
         { "max_dec_frame_buffering"s, 1 },
       },
-      // SDR-specific options
       {
+        // SDR-specific options
         { "profile"s, (int) qsv::profile_h264_e::high },
       },
-      // HDR-specific options
-      {},
-      // Fallback options
+      {},  // HDR-specific options
       {
+        // YUV444 SDR-specific options
+        { "profile"s, (int) qsv::profile_h264_e::high_444p },
+      },
+      {},  // YUV444 HDR-specific options
+      {
+        // Fallback options
         { "low_power"s, 0 },  // Some old/low-end Intel GPUs don't support low power encoding
       },
       std::nullopt,  // QP rate control fallback
       "h264_qsv"s,
     },
-    PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT
+    PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT | YUV444_SUPPORT
   };
 
   encoder_t amdvce {
@@ -662,6 +691,7 @@ namespace video {
       AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE,
       AV_PIX_FMT_D3D11,
       AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
+      AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
       dxgi_init_avcodec_hardware_input_buffer),
     {
       // Common options
@@ -676,6 +706,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "av1_amf"s,
@@ -698,6 +730,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "hevc_amf"s,
@@ -716,12 +750,12 @@ namespace video {
         { "vbaq"s, &config::video.amd.amd_vbaq },
         { "enforce_hrd"s, &config::video.amd.amd_enforce_hrd },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {
+        // Fallback options
         { "usage"s, 2 /* AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY */ },  // Workaround for https://github.com/GPUOpen-LibrariesAndSDKs/AMF/issues/410
       },
       std::nullopt,  // QP rate control fallback
@@ -737,6 +771,7 @@ namespace video {
       AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE,
       AV_PIX_FMT_NONE,
       AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
+      AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10,
       nullptr),
     {
       // libsvtav1 takes different presets than libx264/libx265.
@@ -749,6 +784,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
 
       // QP rate control fallback
@@ -776,6 +813,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "libx265"s,
@@ -788,11 +827,13 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,  // QP rate control fallback
       "libx264"s,
     },
-    H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE
+    H264_ONLY | PARALLEL_ENCODING | ALWAYS_REPROBE | YUV444_SUPPORT
   };
 
 #ifdef __linux__
@@ -802,6 +843,7 @@ namespace video {
       AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_NONE,
       AV_PIX_FMT_VAAPI,
       AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
+      AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
       vaapi_init_avcodec_hardware_input_buffer),
     {
       // Common options
@@ -810,12 +852,12 @@ namespace video {
         { "async_depth"s, 1 },
         { "idr_interval"s, std::numeric_limits<int>::max() },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {
+        // Fallback options
         { "low_power"s, 0 },  // Not all VAAPI drivers expose LP entrypoints
       },
       std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
@@ -829,12 +871,12 @@ namespace video {
         { "sei"s, 0 },
         { "idr_interval"s, std::numeric_limits<int>::max() },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {
+        // Fallback options
         { "low_power"s, 0 },  // Not all VAAPI drivers expose LP entrypoints
       },
       std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
@@ -848,12 +890,12 @@ namespace video {
         { "sei"s, 0 },
         { "idr_interval"s, std::numeric_limits<int>::max() },
       },
-      // SDR-specific options
-      {},
-      // HDR-specific options
-      {},
-      // Fallback options
+      {},  // SDR-specific options
+      {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {
+        // Fallback options
         { "low_power"s, 0 },  // Not all VAAPI drivers expose LP entrypoints
       },
       std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
@@ -871,6 +913,7 @@ namespace video {
       AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_HWDEVICE_TYPE_NONE,
       AV_PIX_FMT_VIDEOTOOLBOX,
       AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
+      AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
       vt_init_avcodec_hardware_input_buffer),
     {
       // Common options
@@ -882,6 +925,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,
       "av1_videotoolbox"s,
@@ -896,6 +941,8 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {},  // Fallback options
       std::nullopt,
       "hevc_videotoolbox"s,
@@ -910,9 +957,12 @@ namespace video {
       },
       {},  // SDR-specific options
       {},  // HDR-specific options
+      {},  // YUV444 SDR-specific options
+      {},  // YUV444 HDR-specific options
       {
+        // Fallback options
         { "flags"s, "-low_delay" },
-      },  // Fallback options
+      },
       std::nullopt,
       "h264_videotoolbox"s,
     },
@@ -941,6 +991,7 @@ namespace video {
   int active_hevc_mode;
   int active_av1_mode;
   bool last_encoder_probe_supported_ref_frames_invalidation = false;
+  std::array<bool, 3> last_encoder_probe_supported_yuv444_for_codec = {};
 
   void
   reset_display(std::shared_ptr<platf::display_t> &disp, const platf::mem_type_e &type, const std::string &display_name, const config_t &config) {
@@ -1396,6 +1447,11 @@ namespace video {
       return nullptr;
     }
 
+    if (config.chromaSamplingType == 1 && !video_format[encoder_t::YUV444]) {
+      BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 not supported"sv;
+      return nullptr;
+    }
+
     auto codec = avcodec_find_encoder_by_name(video_format.name.c_str());
     if (!codec) {
       BOOST_LOG(error) << "Couldn't open ["sv << video_format.name << ']';
@@ -1404,7 +1460,11 @@ namespace video {
     }
 
     auto colorspace = encode_device->colorspace;
-    auto sw_fmt = (colorspace.bit_depth == 10) ? platform_formats->avcodec_pix_fmt_10bit : platform_formats->avcodec_pix_fmt_8bit;
+    auto sw_fmt = (colorspace.bit_depth == 8 && config.chromaSamplingType == 0)  ? platform_formats->avcodec_pix_fmt_8bit :
+                  (colorspace.bit_depth == 8 && config.chromaSamplingType == 1)  ? platform_formats->avcodec_pix_fmt_yuv444_8bit :
+                  (colorspace.bit_depth == 10 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_10bit :
+                  (colorspace.bit_depth == 10 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_10bit :
+                                                                                   AV_PIX_FMT_NONE;
 
     // Allow up to 1 retry to apply the set of fallback options.
     //
@@ -1421,16 +1481,25 @@ namespace video {
 
       switch (config.videoFormat) {
         case 0:
-          ctx->profile = FF_PROFILE_H264_HIGH;
+          // 10-bit h264 encoding is not supported by our streaming protocol
+          assert(!config.dynamicRange);
+          ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_H264_HIGH_444_PREDICTIVE : FF_PROFILE_H264_HIGH;
           break;
 
         case 1:
-          ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN;
+          if (config.chromaSamplingType == 1) {
+            // HEVC uses the same RExt profile for both 8 and 10 bit YUV 4:4:4 encoding
+            ctx->profile = FF_PROFILE_HEVC_REXT;
+          }
+          else {
+            ctx->profile = config.dynamicRange ? FF_PROFILE_HEVC_MAIN_10 : FF_PROFILE_HEVC_MAIN;
+          }
           break;
 
         case 2:
           // AV1 supports both 8 and 10 bit encoding with the same Main profile
-          ctx->profile = FF_PROFILE_AV1_MAIN;
+          // but YUV 4:4:4 sampling requires High profile
+          ctx->profile = (config.chromaSamplingType == 1) ? FF_PROFILE_AV1_HIGH : FF_PROFILE_AV1_MAIN;
           break;
       }
 
@@ -1561,6 +1630,11 @@ namespace video {
       for (auto &option : (config.dynamicRange ? video_format.hdr_options : video_format.sdr_options)) {
         handle_option(option);
       }
+      if (config.chromaSamplingType == 1) {
+        for (auto &option : (config.dynamicRange ? video_format.hdr444_options : video_format.sdr444_options)) {
+          handle_option(option);
+        }
+      }
       if (retries > 0) {
         for (auto &option : video_format.fallback_options) {
           handle_option(option);
@@ -1856,7 +1930,24 @@ namespace video {
     std::unique_ptr<platf::encode_device_t> result;
 
     auto colorspace = colorspace_from_client_config(config, disp.is_hdr());
-    auto pix_fmt = (colorspace.bit_depth == 10) ? encoder.platform_formats->pix_fmt_10bit : encoder.platform_formats->pix_fmt_8bit;
+
+    platf::pix_fmt_e pix_fmt;
+    if (config.chromaSamplingType == 1) {
+      // YUV 4:4:4
+      if (!(encoder.flags & YUV444_SUPPORT)) {
+        // Encoder can't support YUV 4:4:4 regardless of hardware capabilities
+        return {};
+      }
+      pix_fmt = (colorspace.bit_depth == 10) ?
+                  encoder.platform_formats->pix_fmt_yuv444_10bit :
+                  encoder.platform_formats->pix_fmt_yuv444_8bit;
+    }
+    else {
+      // YUV 4:2:0
+      pix_fmt = (colorspace.bit_depth == 10) ?
+                  encoder.platform_formats->pix_fmt_10bit :
+                  encoder.platform_formats->pix_fmt_8bit;
+    }
 
     {
       auto encoder_name = config.videoFormat == 0 ? encoder.h264.name :
@@ -2300,8 +2391,8 @@ namespace video {
     encoder.av1.capabilities.set();
 
     // First, test encoder viability
-    config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0 };
-    config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 };
+    config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0, 0 };
+    config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 0 };
 
     // If the encoder isn't supported at all (not even H.264), bail early
     reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config_autoselect);
@@ -2420,35 +2511,49 @@ namespace video {
       encoder.av1.capabilities.reset();
     }
 
-    std::vector<std::pair<encoder_t::flag_e, config_t>> configs {
-      { encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1 } },
-    };
+    // Test HDR and YUV444 support
+    {
+      // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth
+      if (encoder.flags & YUV444_SUPPORT) {
+        config_t config_h264_yuv444 { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 1 };
+        encoder.h264[encoder_t::YUV444] = validate_config(disp, encoder, config_h264_yuv444);
+      }
+      else {
+        encoder.h264[encoder_t::YUV444] = false;
+      }
 
-    for (auto &[flag, config] : configs) {
-      auto h264 = config;
-      auto hevc = config;
-      auto av1 = config;
-
-      h264.videoFormat = 0;
-      hevc.videoFormat = 1;
-      av1.videoFormat = 2;
+      const config_t generic_hdr_config = { 1920, 1080, 60, 1000, 1, 0, 3, 1, 1, 0 };
 
       // Reset the display since we're switching from SDR to HDR
-      reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, config);
+      reset_display(disp, encoder.platform_formats->dev_type, config::video.output_name, generic_hdr_config);
       if (!disp) {
         return false;
       }
 
+      auto test_hdr_and_yuv444 = [&](auto &flag_map, auto video_format) {
+        auto config = generic_hdr_config;
+        config.videoFormat = video_format;
+
+        if (!flag_map[encoder_t::PASSED]) return;
+
+        // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported.
+        config.chromaSamplingType = 1;
+        if ((encoder.flags & YUV444_SUPPORT) && validate_config(disp, encoder, config) >= 0) {
+          flag_map[encoder_t::DYNAMIC_RANGE] = true;
+          flag_map[encoder_t::YUV444] = true;
+          return;
+        }
+
+        // Test 4:2:0 HDR
+        config.chromaSamplingType = 0;
+        flag_map[encoder_t::DYNAMIC_RANGE] = validate_config(disp, encoder, config) >= 0;
+      };
+
       // HDR is not supported with H.264. Don't bother even trying it.
-      encoder.h264[flag] = flag != encoder_t::DYNAMIC_RANGE && validate_config(disp, encoder, h264) >= 0;
+      encoder.h264[encoder_t::DYNAMIC_RANGE] = false;
 
-      if (encoder.hevc[encoder_t::PASSED]) {
-        encoder.hevc[flag] = validate_config(disp, encoder, hevc) >= 0;
-      }
-
-      if (encoder.av1[encoder_t::PASSED]) {
-        encoder.av1[flag] = validate_config(disp, encoder, av1) >= 0;
-      }
+      test_hdr_and_yuv444(encoder.hevc, 1);
+      test_hdr_and_yuv444(encoder.av1, 2);
     }
 
     encoder.h264[encoder_t::VUI_PARAMETERS] = encoder.h264[encoder_t::VUI_PARAMETERS] && !config::sunshine.flags[config::flag::FORCE_VIDEO_HEADER_REPLACE];
@@ -2605,6 +2710,12 @@ namespace video {
     auto &encoder = *chosen_encoder;
 
     last_encoder_probe_supported_ref_frames_invalidation = (encoder.flags & REF_FRAMES_INVALIDATION);
+    last_encoder_probe_supported_yuv444_for_codec[0] = encoder.h264[encoder_t::PASSED] &&
+                                                       encoder.h264[encoder_t::YUV444];
+    last_encoder_probe_supported_yuv444_for_codec[1] = encoder.hevc[encoder_t::PASSED] &&
+                                                       encoder.hevc[encoder_t::YUV444];
+    last_encoder_probe_supported_yuv444_for_codec[2] = encoder.av1[encoder_t::PASSED] &&
+                                                       encoder.av1[encoder_t::YUV444];
 
     BOOST_LOG(debug) << "------  h264 ------"sv;
     for (int x = 0; x < encoder_t::MAX_FLAGS; ++x) {
@@ -2793,6 +2904,10 @@ namespace video {
   platf::pix_fmt_e
   map_pix_fmt(AVPixelFormat fmt) {
     switch (fmt) {
+      case AV_PIX_FMT_VUYX:
+        return platf::pix_fmt_e::ayuv;
+      case AV_PIX_FMT_XV30:
+        return platf::pix_fmt_e::y410;
       case AV_PIX_FMT_YUV420P10:
         return platf::pix_fmt_e::yuv420p10;
       case AV_PIX_FMT_YUV420P:
diff --git a/src/video.h b/src/video.h
index b46517c8..0b1baac8 100644
--- a/src/video.h
+++ b/src/video.h
@@ -39,6 +39,7 @@ namespace video {
     virtual ~encoder_platform_formats_t() = default;
     platf::mem_type_e dev_type;
     platf::pix_fmt_e pix_fmt_8bit, pix_fmt_10bit;
+    platf::pix_fmt_e pix_fmt_yuv444_8bit, pix_fmt_yuv444_10bit;
   };
 
   struct encoder_platform_formats_avcodec: encoder_platform_formats_t {
@@ -50,21 +51,28 @@ namespace video {
       const AVPixelFormat &avcodec_dev_pix_fmt,
       const AVPixelFormat &avcodec_pix_fmt_8bit,
       const AVPixelFormat &avcodec_pix_fmt_10bit,
+      const AVPixelFormat &avcodec_pix_fmt_yuv444_8bit,
+      const AVPixelFormat &avcodec_pix_fmt_yuv444_10bit,
       const init_buffer_function_t &init_avcodec_hardware_input_buffer_function):
         avcodec_base_dev_type { avcodec_base_dev_type },
         avcodec_derived_dev_type { avcodec_derived_dev_type },
         avcodec_dev_pix_fmt { avcodec_dev_pix_fmt },
         avcodec_pix_fmt_8bit { avcodec_pix_fmt_8bit },
         avcodec_pix_fmt_10bit { avcodec_pix_fmt_10bit },
+        avcodec_pix_fmt_yuv444_8bit { avcodec_pix_fmt_yuv444_8bit },
+        avcodec_pix_fmt_yuv444_10bit { avcodec_pix_fmt_yuv444_10bit },
         init_avcodec_hardware_input_buffer { init_avcodec_hardware_input_buffer_function } {
       dev_type = map_base_dev_type(avcodec_base_dev_type);
       pix_fmt_8bit = map_pix_fmt(avcodec_pix_fmt_8bit);
       pix_fmt_10bit = map_pix_fmt(avcodec_pix_fmt_10bit);
+      pix_fmt_yuv444_8bit = map_pix_fmt(avcodec_pix_fmt_yuv444_8bit);
+      pix_fmt_yuv444_10bit = map_pix_fmt(avcodec_pix_fmt_yuv444_10bit);
     }
 
     AVHWDeviceType avcodec_base_dev_type, avcodec_derived_dev_type;
     AVPixelFormat avcodec_dev_pix_fmt;
     AVPixelFormat avcodec_pix_fmt_8bit, avcodec_pix_fmt_10bit;
+    AVPixelFormat avcodec_pix_fmt_yuv444_8bit, avcodec_pix_fmt_yuv444_10bit;
 
     init_buffer_function_t init_avcodec_hardware_input_buffer;
   };
@@ -73,10 +81,14 @@ namespace video {
     encoder_platform_formats_nvenc(
       const platf::mem_type_e &dev_type,
       const platf::pix_fmt_e &pix_fmt_8bit,
-      const platf::pix_fmt_e &pix_fmt_10bit) {
+      const platf::pix_fmt_e &pix_fmt_10bit,
+      const platf::pix_fmt_e &pix_fmt_yuv444_8bit,
+      const platf::pix_fmt_e &pix_fmt_yuv444_10bit) {
       encoder_platform_formats_t::dev_type = dev_type;
       encoder_platform_formats_t::pix_fmt_8bit = pix_fmt_8bit;
       encoder_platform_formats_t::pix_fmt_10bit = pix_fmt_10bit;
+      encoder_platform_formats_t::pix_fmt_yuv444_8bit = pix_fmt_yuv444_8bit;
+      encoder_platform_formats_t::pix_fmt_yuv444_10bit = pix_fmt_yuv444_10bit;
     }
   };
 
@@ -87,6 +99,7 @@ namespace video {
       REF_FRAMES_RESTRICT,  ///< Set maximum reference frames.
       CBR,  ///< Some encoders don't support CBR, if not supported attempt constant quantization parameter instead.
       DYNAMIC_RANGE,  ///< HDR support.
+      YUV444,  ///< YUV 4:4:4 support.
       VUI_PARAMETERS,  ///< AMD encoder with VAAPI doesn't add VUI parameters to SPS.
       MAX_FLAGS  ///< Maximum number of flags.
     };
@@ -101,6 +114,7 @@ namespace video {
         _CONVERT(REF_FRAMES_RESTRICT);
         _CONVERT(CBR);
         _CONVERT(DYNAMIC_RANGE);
+        _CONVERT(YUV444);
         _CONVERT(VUI_PARAMETERS);
         _CONVERT(MAX_FLAGS);
       }
@@ -126,6 +140,8 @@ namespace video {
       std::vector<option_t> common_options;
       std::vector<option_t> sdr_options;
       std::vector<option_t> hdr_options;
+      std::vector<option_t> sdr444_options;
+      std::vector<option_t> hdr444_options;
       std::vector<option_t> fallback_options;
 
       // QP option to set in the case that CBR/VBR is not supported
@@ -312,11 +328,14 @@ namespace video {
     /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit
        HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */
     int dynamicRange;
+
+    int chromaSamplingType;  // 0 - 4:2:0, 1 - 4:4:4
   };
 
   extern int active_hevc_mode;
   extern int active_av1_mode;
   extern bool last_encoder_probe_supported_ref_frames_invalidation;
+  extern std::array<bool, 3> last_encoder_probe_supported_yuv444_for_codec;  // 0 - H.264, 1 - HEVC, 2 - AV1
 
   void
   capture(
diff --git a/src/video_colorspace.cpp b/src/video_colorspace.cpp
index 5f838052..7b1ddadf 100644
--- a/src/video_colorspace.cpp
+++ b/src/video_colorspace.cpp
@@ -182,4 +182,109 @@ namespace video {
     return result;
   }
 
+  const color_t *
+  new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace) {
+    constexpr auto generate_color_vectors = [](const sunshine_colorspace_t &colorspace) -> color_t {
+      double Kr, Kb;
+      switch (colorspace.colorspace) {
+        case colorspace_e::rec601:
+          Kr = 0.299;
+          Kb = 0.114;
+          break;
+        case colorspace_e::rec709:
+        default:
+          Kr = 0.2126;
+          Kb = 0.0722;
+          break;
+        case colorspace_e::bt2020:
+        case colorspace_e::bt2020sdr:
+          Kr = 0.2627;
+          Kb = 0.0593;
+          break;
+      }
+      double Kg = 1.0 - Kr - Kb;
+
+      double y_mult, y_add;
+      double uv_mult, uv_add;
+
+      // "Matrix coefficients" section of ITU-T H.273
+      if (colorspace.full_range) {
+        y_mult = (1 << colorspace.bit_depth) - 1;
+        y_add = 0;
+        uv_mult = (1 << colorspace.bit_depth) - 1;
+        uv_add = (1 << (colorspace.bit_depth - 1));
+      }
+      else {
+        y_mult = (1 << (colorspace.bit_depth - 8)) * 219;
+        y_add = (1 << (colorspace.bit_depth - 8)) * 16;
+        uv_mult = (1 << (colorspace.bit_depth - 8)) * 224;
+        uv_add = (1 << (colorspace.bit_depth - 8)) * 128;
+      }
+
+      // For rounding
+      y_add += 0.5;
+      uv_add += 0.5;
+
+      color_t color_vectors;
+
+      color_vectors.color_vec_y[0] = Kr * y_mult;
+      color_vectors.color_vec_y[1] = Kg * y_mult;
+      color_vectors.color_vec_y[2] = Kb * y_mult;
+      color_vectors.color_vec_y[3] = y_add;
+
+      color_vectors.color_vec_u[0] = -0.5 * Kr / (1.0 - Kb) * uv_mult;
+      color_vectors.color_vec_u[1] = -0.5 * Kg / (1.0 - Kb) * uv_mult;
+      color_vectors.color_vec_u[2] = 0.5 * uv_mult;
+      color_vectors.color_vec_u[3] = uv_add;
+
+      color_vectors.color_vec_v[0] = 0.5 * uv_mult;
+      color_vectors.color_vec_v[1] = -0.5 * Kg / (1.0 - Kr) * uv_mult;
+      color_vectors.color_vec_v[2] = -0.5 * Kb / (1.0 - Kr) * uv_mult;
+      color_vectors.color_vec_v[3] = uv_add;
+
+      // Unused
+      color_vectors.range_y[0] = 1;
+      color_vectors.range_y[1] = 0;
+      color_vectors.range_uv[0] = 1;
+      color_vectors.range_uv[1] = 0;
+
+      return color_vectors;
+    };
+
+    static constexpr color_t colors[] = {
+      generate_color_vectors({ colorspace_e::rec601, false, 8 }),
+      generate_color_vectors({ colorspace_e::rec601, true, 8 }),
+      generate_color_vectors({ colorspace_e::rec601, false, 10 }),
+      generate_color_vectors({ colorspace_e::rec601, true, 10 }),
+      generate_color_vectors({ colorspace_e::rec709, false, 8 }),
+      generate_color_vectors({ colorspace_e::rec709, true, 8 }),
+      generate_color_vectors({ colorspace_e::rec709, false, 10 }),
+      generate_color_vectors({ colorspace_e::rec709, true, 10 }),
+      generate_color_vectors({ colorspace_e::bt2020, false, 8 }),
+      generate_color_vectors({ colorspace_e::bt2020, true, 8 }),
+      generate_color_vectors({ colorspace_e::bt2020, false, 10 }),
+      generate_color_vectors({ colorspace_e::bt2020, true, 10 }),
+    };
+
+    const color_t *result = nullptr;
+
+    switch (colorspace.colorspace) {
+      case colorspace_e::rec601:
+        result = &colors[0];
+        break;
+      case colorspace_e::rec709:
+      default:
+        result = &colors[4];
+        break;
+      case colorspace_e::bt2020:
+      case colorspace_e::bt2020sdr:
+        result = &colors[8];
+        break;
+    }
+
+    if (colorspace.bit_depth == 10) result += 2;
+    if (colorspace.full_range) result += 1;
+
+    return result;
+  }
 }  // namespace video
diff --git a/src/video_colorspace.h b/src/video_colorspace.h
index f3f9f3f3..d6469f99 100644
--- a/src/video_colorspace.h
+++ b/src/video_colorspace.h
@@ -57,4 +57,17 @@ namespace video {
   const color_t *
   color_vectors_from_colorspace(colorspace_e colorspace, bool full_range);
 
+  /**
+   * @brief New version of `color_vectors_from_colorspace()` function that better adheres to the standards.
+   *        Returned vectors are used to perform RGB->YUV conversion.
+   *        Unlike its predecessor, color vectors will produce output in `UINT` range, not `UNORM` range.
+   *        Input is still in `UNORM` range. Returned vectors won't modify color primaries and color
+   *        transfer function.
+   * @param colorspace Targeted YUV colorspace.
+   * @return `const color_t*` that contains RGB->YUV transformation vectors.
+   *         Components `range_y` and `range_uv` are there for backwards compatibility
+   *         and can be ignored in the computation.
+   */
+  const color_t *
+  new_color_vectors_from_colorspace(const sunshine_colorspace_t &colorspace);
 }  // namespace video
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl
new file mode 100644
index 00000000..73c45e9b
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps.hlsl
@@ -0,0 +1,3 @@
+#include "include/convert_base.hlsl"
+
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl
new file mode 100644
index 00000000..820e5128
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_ayuv_ps_linear.hlsl
@@ -0,0 +1,3 @@
+#include "include/convert_linear_base.hlsl"
+
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl
new file mode 100644
index 00000000..33e48145
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl
@@ -0,0 +1,10 @@
+cbuffer rotate_texture_steps_cbuffer : register(b1) {
+    int rotate_texture_steps;
+};
+
+#include "include/base_vs.hlsl"
+
+vertex_t main_vs(uint vertex_id : SV_VertexID)
+{
+    return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
+}
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl
new file mode 100644
index 00000000..b84c6617
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_base.hlsl"
+
+#define Y410
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl
new file mode 100644
index 00000000..f7dbbcb6
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_linear.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_linear_base.hlsl"
+
+#define Y410
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl
new file mode 100644
index 00000000..1682be7b
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_y410_ps_perceptual_quantizer.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_perceptual_quantizer_base.hlsl"
+
+#define Y410
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl
new file mode 100644
index 00000000..d6cca979
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_base.hlsl"
+
+#define PLANAR_VIEWPORTS
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl
new file mode 100644
index 00000000..46032651
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_linear.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_linear_base.hlsl"
+
+#define PLANAR_VIEWPORTS
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl
new file mode 100644
index 00000000..d390e81e
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_ps_perceptual_quantizer.hlsl
@@ -0,0 +1,4 @@
+#include "include/convert_perceptual_quantizer_base.hlsl"
+
+#define PLANAR_VIEWPORTS
+#include "include/convert_yuv444_ps_base.hlsl"
diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl
new file mode 100644
index 00000000..566da5d8
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl
@@ -0,0 +1,33 @@
+cbuffer rotate_texture_steps_cbuffer : register(b1) {
+    int rotate_texture_steps;
+};
+
+cbuffer color_matrix_cbuffer : register(b3) {
+    float4 color_vec_y;
+    float4 color_vec_u;
+    float4 color_vec_v;
+    float2 range_y;
+    float2 range_uv;
+};
+
+#define PLANAR_VIEWPORTS
+#include "include/base_vs.hlsl"
+
+vertex_t main_vs(uint vertex_id : SV_VertexID)
+{
+    vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
+
+    output.viewport = vertex_id / 3;
+
+    if (output.viewport == 0) {
+        output.color_vec = color_vec_y;
+    }
+    else if (output.viewport == 1) {
+        output.color_vec = color_vec_u;
+    }
+    else {
+        output.color_vec = color_vec_v;
+    }
+
+    return output;
+}
diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl
index c04fad39..c39e7c6f 100644
--- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl
+++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl
@@ -19,7 +19,7 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
         output.viewpoint_pos = float4(-1, 3, 0, 1);
         tex_coord = float2(0, -1);
     }
-    else if (vertex_id == 2) {
+    else {
         output.viewpoint_pos = float4(3, -1, 0, 1);
         tex_coord = float2(2, 1);
     }
diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl
index 9e4b28f1..cf755c5a 100644
--- a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl
+++ b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl
@@ -9,4 +9,8 @@ struct vertex_t
 #else
     float2 tex_coord : TEXCOORD;
 #endif
+#ifdef PLANAR_VIEWPORTS
+    uint viewport : SV_ViewportArrayIndex;
+    nointerpolation float4 color_vec : COLOR0;
+#endif
 };
diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl
new file mode 100644
index 00000000..2aa8401e
--- /dev/null
+++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv444_ps_base.hlsl
@@ -0,0 +1,39 @@
+Texture2D image : register(t0);
+SamplerState def_sampler : register(s0);
+
+#ifndef PLANAR_VIEWPORTS
+cbuffer color_matrix_cbuffer : register(b0) {
+    float4 color_vec_y;
+    float4 color_vec_u;
+    float4 color_vec_v;
+    float2 range_y;
+    float2 range_uv;
+};
+#endif
+
+#include "include/base_vs_types.hlsl"
+
+#ifdef PLANAR_VIEWPORTS
+uint main_ps(vertex_t input) : SV_Target
+#else
+uint4 main_ps(vertex_t input) : SV_Target
+#endif
+{
+    float3 rgb = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_coord, 0).rgb);
+
+#ifdef PLANAR_VIEWPORTS
+    // Planar R16, 10 most significant bits store the value
+    return uint(dot(input.color_vec.xyz, rgb) + input.color_vec.w) << 6;
+#else
+    float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
+    float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
+    float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
+
+#ifdef Y410
+    return uint4(u, y, v, 0);
+#else
+    // AYUV
+    return uint4(v, u, y, 0);
+#endif
+#endif
+}

From 727ea9037e2e92f56ca2bc0fc610334e9490d32d Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Fri, 16 Aug 2024 17:18:35 -0400
Subject: [PATCH 07/12] docs(docker): add ipc note (#3034)

---
 DOCKER_README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/DOCKER_README.md b/DOCKER_README.md
index be27e2fa..2ff7bbf8 100644
--- a/DOCKER_README.md
+++ b/DOCKER_README.md
@@ -54,6 +54,7 @@ docker run -d \
   --device /dev/dri/ \
   --name=<image_name> \
   --restart=unless-stopped \
+  --ipc=host \
   -e PUID=<uid> \
   -e PGID=<gid> \
   -e TZ=<timezone> \
@@ -80,6 +81,7 @@ services:
       - PUID=<uid>
       - PGID=<gid>
       - TZ=<timezone>
+    ipc: host
     ports:
       - "47984-47990:47984-47990/tcp"
       - "48010:48010"
@@ -125,6 +127,9 @@ port `47990` (e.g. `http://<host_ip>:47990`). The internal port must be `47990`,
 | `-e PGID=<gid>`             | Group ID             | `1001`             | False    |
 | `-e TZ=<timezone>`          | Lookup [TZ value][1] | `America/New_York` | False    |
 
+For additional configuration, it is recommended to reference the *Games on Whales*
+[sunshine config](https://github.com/games-on-whales/gow/blob/2e442292d79b9d996f886b8a03d22b6eb6bddf7b/compose/streamers/sunshine.yml).
+
 [1]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
 
 #### User / Group Identifiers:

From 7c4c905f0433c51a7f7d04ff9bfe19215b508898 Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Fri, 16 Aug 2024 18:41:31 -0400
Subject: [PATCH 08/12] build(macos): fix error in macos-13 build (#3022)

Co-authored-by: Vithorio Polten <reach@vithor.io>
---
 .github/workflows/CI.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 1ed66a7b..4ac6c217 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -484,6 +484,21 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Fix python
+        if: matrix.os_name == 'macos' && matrix.os_version == '13'
+        run: |
+          rm '/usr/local/bin/2to3'
+          rm '/usr/local/bin/2to3-3.12'
+          rm '/usr/local/bin/idle3'
+          rm '/usr/local/bin/idle3.12'
+          rm '/usr/local/bin/pydoc3'
+          rm '/usr/local/bin/pydoc3.12'
+          rm '/usr/local/bin/python3'
+          rm '/usr/local/bin/python3-config'
+          rm '/usr/local/bin/python3.12'
+          rm '/usr/local/bin/python3.12-config'
+          brew install python
+
       - name: Configure formula
         run: |
           # variables for formula

From c8d0d2b6157058a597ac34e85d3f58db661d64bf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 19:32:32 -0400
Subject: [PATCH 09/12] build(deps): bump babel from 2.15.0 to 2.16.0 (#2998)

Bumps [babel](https://github.com/python-babel/babel) from 2.15.0 to 2.16.0.
- [Release notes](https://github.com/python-babel/babel/releases)
- [Changelog](https://github.com/python-babel/babel/blob/master/CHANGES.rst)
- [Commits](https://github.com/python-babel/babel/compare/v2.15.0...v2.16.0)

---
updated-dependencies:
- dependency-name: babel
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 scripts/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 9cfd158f..cd2ef869 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,2 +1,2 @@
-Babel==2.15.0
+Babel==2.16.0
 clang-format

From 537e3e6935b60c375de602fed12cb7aacf99f762 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 18 Aug 2024 19:44:24 -0400
Subject: [PATCH 10/12] build(deps): bump third-party/tray from `d9f7e76` to
 `ebbd14f` (#3010)

Bumps [third-party/tray](https://github.com/LizardByte/tray) from `d9f7e76` to `ebbd14f`.
- [Commits](https://github.com/LizardByte/tray/compare/d9f7e768cd74390a3b7e68ceed6f9c1a05db9f08...ebbd14fe6af30e61ddbb710251f612d32e371d98)

---
updated-dependencies:
- dependency-name: third-party/tray
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 third-party/tray | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third-party/tray b/third-party/tray
index d9f7e768..ebbd14fe 160000
--- a/third-party/tray
+++ b/third-party/tray
@@ -1 +1 @@
-Subproject commit d9f7e768cd74390a3b7e68ceed6f9c1a05db9f08
+Subproject commit ebbd14fe6af30e61ddbb710251f612d32e371d98

From 170f4dd181c6dc57dd14da8ae575c92e429d9997 Mon Sep 17 00:00:00 2001
From: ns6089 <61738816+ns6089@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:30:13 +0300
Subject: [PATCH 11/12] fix(win/qsv): skip unsupported 4:4:4 codecs (#3029)

---
 src/platform/windows/display_vram.cpp |  7 +++
 src/video.cpp                         | 29 ++++++++-----
 src/video.h                           | 61 +++++++++++++++++----------
 3 files changed, 64 insertions(+), 33 deletions(-)

diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp
index ed88e8d5..96ddff84 100644
--- a/src/platform/windows/display_vram.cpp
+++ b/src/platform/windows/display_vram.cpp
@@ -1871,6 +1871,13 @@ namespace platf::dxgi {
       if (!boost::algorithm::ends_with(name, "_qsv")) {
         return false;
       }
+      if (config.chromaSamplingType == 1) {
+        if (config.videoFormat == 0 || config.videoFormat == 2) {
+          // QSV doesn't support 4:4:4 in H.264 or AV1
+          return false;
+        }
+        // TODO: Blacklist HEVC 4:4:4 based on adapter model
+      }
     }
     else if (adapter_desc.VendorId == 0x10de) {  // Nvidia
       // If it's not an NVENC encoder, it's not compatible with an Nvidia GPU
diff --git a/src/video.cpp b/src/video.cpp
index 6827b6c7..8c5829a2 100644
--- a/src/video.cpp
+++ b/src/video.cpp
@@ -1434,9 +1434,7 @@ namespace video {
 
     bool hardware = platform_formats->avcodec_base_dev_type != AV_HWDEVICE_TYPE_NONE;
 
-    auto &video_format = config.videoFormat == 0 ? encoder.h264 :
-                         config.videoFormat == 1 ? encoder.hevc :
-                                                   encoder.av1;
+    auto &video_format = encoder.codec_from_config(config);
     if (!video_format[encoder_t::PASSED] || !disp->is_codec_supported(video_format.name, config)) {
       BOOST_LOG(error) << encoder.name << ": "sv << video_format.name << " mode not supported"sv;
       return nullptr;
@@ -1950,10 +1948,7 @@ namespace video {
     }
 
     {
-      auto encoder_name = config.videoFormat == 0 ? encoder.h264.name :
-                          config.videoFormat == 1 ? encoder.hevc.name :
-                          config.videoFormat == 2 ? encoder.av1.name :
-                                                    "unknown";
+      auto encoder_name = encoder.codec_from_config(config).name;
 
       BOOST_LOG(info) << "Creating encoder " << logging::bracket(encoder_name);
 
@@ -2516,7 +2511,8 @@ namespace video {
       // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth
       if (encoder.flags & YUV444_SUPPORT) {
         config_t config_h264_yuv444 { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0, 1 };
-        encoder.h264[encoder_t::YUV444] = validate_config(disp, encoder, config_h264_yuv444);
+        encoder.h264[encoder_t::YUV444] = disp->is_codec_supported(encoder.h264.name, config_h264_yuv444) &&
+                                          validate_config(disp, encoder, config_h264_yuv444) >= 0;
       }
       else {
         encoder.h264[encoder_t::YUV444] = false;
@@ -2536,17 +2532,30 @@ namespace video {
 
         if (!flag_map[encoder_t::PASSED]) return;
 
+        auto encoder_codec_name = encoder.codec_from_config(config).name;
+
         // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported.
         config.chromaSamplingType = 1;
-        if ((encoder.flags & YUV444_SUPPORT) && validate_config(disp, encoder, config) >= 0) {
+        if ((encoder.flags & YUV444_SUPPORT) &&
+            disp->is_codec_supported(encoder_codec_name, config) &&
+            validate_config(disp, encoder, config) >= 0) {
           flag_map[encoder_t::DYNAMIC_RANGE] = true;
           flag_map[encoder_t::YUV444] = true;
           return;
         }
+        else {
+          flag_map[encoder_t::YUV444] = false;
+        }
 
         // Test 4:2:0 HDR
         config.chromaSamplingType = 0;
-        flag_map[encoder_t::DYNAMIC_RANGE] = validate_config(disp, encoder, config) >= 0;
+        if (disp->is_codec_supported(encoder_codec_name, config) &&
+            validate_config(disp, encoder, config) >= 0) {
+          flag_map[encoder_t::DYNAMIC_RANGE] = true;
+        }
+        else {
+          flag_map[encoder_t::DYNAMIC_RANGE] = false;
+        }
       };
 
       // HDR is not supported with H.264. Don't bother even trying it.
diff --git a/src/video.h b/src/video.h
index 0b1baac8..6a50b2e3 100644
--- a/src/video.h
+++ b/src/video.h
@@ -17,6 +17,29 @@ extern "C" {
 struct AVPacket;
 namespace video {
 
+  /* Encoding configuration requested by remote client */
+  struct config_t {
+    int width;  // Video width in pixels
+    int height;  // Video height in pixels
+    int framerate;  // Requested framerate, used in individual frame bitrate budget calculation
+    int bitrate;  // Video bitrate in kilobits (1000 bits) for requested framerate
+    int slicesPerFrame;  // Number of slices per frame
+    int numRefFrames;  // Max number of reference frames
+
+    /* Requested color range and SDR encoding colorspace, HDR encoding colorspace is always BT.2020+ST2084
+       Color range (encoderCscMode & 0x1) : 0 - limited, 1 - full
+       SDR encoding colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */
+    int encoderCscMode;
+
+    int videoFormat;  // 0 - H.264, 1 - HEVC, 2 - AV1
+
+    /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit
+       HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */
+    int dynamicRange;
+
+    int chromaSamplingType;  // 0 - 4:2:0, 1 - 4:4:4
+  };
+
   platf::mem_type_e
   map_base_dev_type(AVHWDeviceType type);
   platf::pix_fmt_e
@@ -163,6 +186,21 @@ namespace video {
       }
     } av1, hevc, h264;
 
+    const codec_t &
+    codec_from_config(const config_t &config) const {
+      switch (config.videoFormat) {
+        default:
+          BOOST_LOG(error) << "Unknown video format " << config.videoFormat << ", falling back to H.264";
+          // fallthrough
+        case 0:
+          return h264;
+        case 1:
+          return hevc;
+        case 2:
+          return av1;
+      }
+    }
+
     uint32_t flags;
   };
 
@@ -309,29 +347,6 @@ namespace video {
 
   using hdr_info_t = std::unique_ptr<hdr_info_raw_t>;
 
-  /* Encoding configuration requested by remote client */
-  struct config_t {
-    int width;  // Video width in pixels
-    int height;  // Video height in pixels
-    int framerate;  // Requested framerate, used in individual frame bitrate budget calculation
-    int bitrate;  // Video bitrate in kilobits (1000 bits) for requested framerate
-    int slicesPerFrame;  // Number of slices per frame
-    int numRefFrames;  // Max number of reference frames
-
-    /* Requested color range and SDR encoding colorspace, HDR encoding colorspace is always BT.2020+ST2084
-       Color range (encoderCscMode & 0x1) : 0 - limited, 1 - full
-       SDR encoding colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */
-    int encoderCscMode;
-
-    int videoFormat;  // 0 - H.264, 1 - HEVC, 2 - AV1
-
-    /* Encoding color depth (bit depth): 0 - 8-bit, 1 - 10-bit
-       HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */
-    int dynamicRange;
-
-    int chromaSamplingType;  // 0 - 4:2:0, 1 - 4:4:4
-  };
-
   extern int active_hevc_mode;
   extern int active_av1_mode;
   extern bool last_encoder_probe_supported_ref_frames_invalidation;

From 17c4b26af0709b5414cc32546dd687beb0e1257c Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Mon, 19 Aug 2024 08:29:49 -0500
Subject: [PATCH 12/12] fix(packaging): apply udev rules for uhid (#3041)

---
 packaging/linux/AppImage/AppRun       | 1 +
 packaging/linux/Arch/sunshine.install | 2 ++
 src_assets/linux/misc/postinst        | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/packaging/linux/AppImage/AppRun b/packaging/linux/AppImage/AppRun
index 404704c3..e90ee3a4 100644
--- a/packaging/linux/AppImage/AppRun
+++ b/packaging/linux/AppImage/AppRun
@@ -49,6 +49,7 @@ function install() {
   cat "$SUNSHINE_SHARE_HERE/udev/rules.d/60-sunshine.rules" | sudo tee /etc/udev/rules.d/60-sunshine.rules
   sudo udevadm control --reload-rules
   sudo udevadm trigger --property-match=DEVNAME=/dev/uinput
+  sudo udevadm trigger --property-match=DEVNAME=/dev/uhid
 
   # sunshine service
   mkdir -p ~/.config/systemd/user
diff --git a/packaging/linux/Arch/sunshine.install b/packaging/linux/Arch/sunshine.install
index a8a700f1..6b274cdf 100644
--- a/packaging/linux/Arch/sunshine.install
+++ b/packaging/linux/Arch/sunshine.install
@@ -5,7 +5,9 @@ do_setcap() {
 do_udev_reload() {
   udevadm control --reload-rules
   udevadm trigger --property-match=DEVNAME=/dev/uinput
+  udevadm trigger --property-match=DEVNAME=/dev/uhid
   modprobe uinput || true
+  modprobe uhid || true
 }
 
 post_install() {
diff --git a/src_assets/linux/misc/postinst b/src_assets/linux/misc/postinst
index e406c762..47deb784 100644
--- a/src_assets/linux/misc/postinst
+++ b/src_assets/linux/misc/postinst
@@ -8,9 +8,10 @@ if [ -x "$path_to_setcap" ] ; then
         $path_to_setcap cap_sys_admin+p $path_to_sunshine
 fi
 
-# Trigger udev rule reload for /dev/uinput
+# Trigger udev rule reload for /dev/uinput and /dev/uhid
 path_to_udevadm=$(which udevadm)
 if [ -x "$path_to_udevadm" ] ; then
   $path_to_udevadm control --reload-rules
   $path_to_udevadm trigger --property-match=DEVNAME=/dev/uinput
+  $path_to_udevadm trigger --property-match=DEVNAME=/dev/uhid
 fi