Add standalone NVENC encoder

2023-04-25 16:38:37 +03:00
parent 7fe52bc5f8
commit 68fa43a61c
34 changed files with 2124 additions and 642 deletions
--- a/src/platform/common.h
+++ b/src/platform/common.h
@@ -13,6 +13,7 @@
 #include "src/main.h"
 #include "src/thread_safe.h"
 #include "src/utility.h"
+#include "src/video_colorspace.h"

 extern "C" {
 #include <moonlight-common-c/src/Limelight.h>
@@ -45,6 +46,9 @@ namespace boost {
 namespace video {
  struct config_t;
 }  // namespace video
+namespace nvenc {
+  class nvenc_base;
+}

 namespace platf {
  // Limited by bits in activeGamepadMask
@@ -344,15 +348,28 @@ namespace platf {
    std::optional<null_t> null;
  };

-  struct hwdevice_t {
+  struct encode_device_t {
+    virtual ~encode_device_t() = default;
+
+    virtual int
+    convert(platf::img_t &img) = 0;
+
+    video::sunshine_colorspace_t colorspace;
+  };
+
+  struct avcodec_encode_device_t: encode_device_t {
    void *data {};
    AVFrame *frame {};

-    virtual int
-    convert(platf::img_t &img) {
+    int
+    convert(platf::img_t &img) override {
      return -1;
    }

+    virtual void
+    apply_colorspace() {
+    }
+
    /**
     * implementations must take ownership of 'frame'
     */
@@ -362,9 +379,6 @@ namespace platf {
      return -1;
    };

-    virtual void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {};
-
    /**
     * Implementations may set parameters during initialization of the hwframes context
     */
@@ -378,8 +392,13 @@ namespace platf {
    prepare_to_derive_context(int hw_device_type) {
      return 0;
    };
+  };

-    virtual ~hwdevice_t() = default;
+  struct nvenc_encode_device_t: encode_device_t {
+    virtual bool
+    init_encoder(const video::config_t &client_config, const video::sunshine_colorspace_t &colorspace) = 0;
+
+    nvenc::nvenc_base *nvenc = nullptr;
  };

  enum class capture_e : int {
@@ -440,9 +459,14 @@ namespace platf {
    virtual int
    dummy_img(img_t *img) = 0;

-    virtual std::shared_ptr<hwdevice_t>
-    make_hwdevice(pix_fmt_e pix_fmt) {
-      return std::make_shared<hwdevice_t>();
+    virtual std::unique_ptr<avcodec_encode_device_t>
+    make_avcodec_encode_device(pix_fmt_e pix_fmt) {
+      return nullptr;
+    }
+
+    virtual std::unique_ptr<nvenc_encode_device_t>
+    make_nvenc_encode_device(pix_fmt_e pix_fmt) {
+      return nullptr;
    }

    virtual bool
--- a/src/platform/linux/cuda.cpp
+++ b/src/platform/linux/cuda.cpp
@@ -88,7 +88,7 @@ namespace cuda {
    return 0;
  }

-  class cuda_t: public platf::hwdevice_t {
+  class cuda_t: public platf::avcodec_encode_device_t {
  public:
    int
    init(int in_width, int in_height) {
@@ -145,8 +145,8 @@ namespace cuda {
    }

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
-      sws.set_colorspace(colorspace, color_range);
+    apply_colorspace() override {
+      sws.apply_colorspace(colorspace);

      auto tex = tex_t::make(height, width * 4);
      if (!tex) {
@@ -223,19 +223,19 @@ namespace cuda {
    }
  };

-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, bool vram) {
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, bool vram) {
    if (init()) {
      return nullptr;
    }

-    std::shared_ptr<cuda_t> cuda;
+    std::unique_ptr<cuda_t> cuda;

    if (vram) {
-      cuda = std::make_shared<cuda_vram_t>();
+      cuda = std::make_unique<cuda_vram_t>();
    }
    else {
-      cuda = std::make_shared<cuda_ram_t>();
+      cuda = std::make_unique<cuda_ram_t>();
    }

    if (cuda->init(width, height)) {
@@ -675,9 +675,9 @@ namespace cuda {
        return platf::capture_e::ok;
      }

-      std::shared_ptr<platf::hwdevice_t>
-      make_hwdevice(platf::pix_fmt_e pix_fmt) override {
-        return ::cuda::make_hwdevice(width, height, true);
+      std::unique_ptr<platf::avcodec_encode_device_t>
+      make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) {
+        return ::cuda::make_avcodec_encode_device(width, height, true);
      }

      std::shared_ptr<platf::img_t>
--- a/src/platform/linux/cuda.cu
+++ b/src/platform/linux/cuda.cu
@@ -56,12 +56,11 @@ public:
 };
 } // namespace platf

-namespace video {
-using __float4 = float[4];
-using __float3 = float[3];
-using __float2 = float[2];
+// End special declarations

-struct alignas(16) color_t {
+namespace cuda {
+
+struct alignas(16) cuda_color_t {
  float4 color_vec_y;
  float4 color_vec_u;
  float4 color_vec_v;
@@ -69,22 +68,8 @@ struct alignas(16) color_t {
  float2 range_uv;
 };

-struct alignas(16) color_extern_t {
-  __float4 color_vec_y;
-  __float4 color_vec_u;
-  __float4 color_vec_v;
-  __float2 range_y;
-  __float2 range_uv;
-};
+static_assert(sizeof(video::color_t) == sizeof(cuda::cuda_color_t), "color matrix struct mismatch");

-static_assert(sizeof(video::color_t) == sizeof(video::color_extern_t), "color matrix struct mismatch");
-
-extern color_t colors[6];
-} // namespace video
-
-// End special declarations
-
-namespace cuda {
 auto constexpr INVALID_TEXTURE = std::numeric_limits<cudaTextureObject_t>::max();

 template<class T>
@@ -144,7 +129,7 @@ inline __device__ float3 bgra_to_rgb(float4 vec) {
  return make_float3(vec.z, vec.y, vec.x);
 }

-inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_matrix) {
+inline __device__ float2 calcUV(float3 pixel, const cuda_color_t *const color_matrix) {
  float4 vec_u = color_matrix->color_vec_u;
  float4 vec_v = color_matrix->color_vec_v;

@@ -157,7 +142,7 @@ inline __device__ float2 calcUV(float3 pixel, const video::color_t *const color_
  return make_float2(u, v);
 }

-inline __device__ float calcY(float3 pixel, const video::color_t *const color_matrix) {
+inline __device__ float calcY(float3 pixel, const cuda_color_t *const color_matrix) {
  float4 vec_y = color_matrix->color_vec_y;

  return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y;
@@ -166,7 +151,7 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
 __global__ void RGBA_to_NV12(
  cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
  std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
-  float scale, const viewport_t viewport, const video::color_t *const color_matrix) {
+  float scale, const viewport_t viewport, const cuda_color_t *const color_matrix) {

  int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
  int idY = (threadIdx.y + blockDim.y * blockIdx.y) * 2;
@@ -297,7 +282,7 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
  CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
  CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");

-  auto ptr = make_ptr<video::color_t>();
+  auto ptr = make_ptr<cuda_color_t>();
  if(!ptr) {
    return std::nullopt;
  }
@@ -316,32 +301,13 @@ int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std:
  dim3 block(threadsPerBlock);
  dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);

-  RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
+  RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (cuda_color_t *)color_matrix.get());

  return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
 }

-void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
-  video::color_t *color_p;
-  switch(colorspace) {
-  case 5: // SWS_CS_SMPTE170M
-    color_p = &video::colors[0];
-    break;
-  case 1: // SWS_CS_ITU709
-    color_p = &video::colors[2];
-    break;
-  case 9: // SWS_CS_BT2020
-    color_p = &video::colors[4];
-    break;
-  default:
-    color_p = &video::colors[0];
-  };
-
-  if(color_range > 1) {
-    // Full range
-    ++color_p;
-  }
-
+void sws_t::apply_colorspace(const video::sunshine_colorspace_t& colorspace) {
+  auto color_p = video::color_vectors_from_colorspace(colorspace);
  CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
 }

--- a/src/platform/linux/cuda.h
+++ b/src/platform/linux/cuda.h
@@ -6,6 +6,8 @@

 #if defined(SUNSHINE_BUILD_CUDA)

+  #include "src/video_colorspace.h"
+
  #include <cstdint>
  #include <memory>
  #include <optional>
@@ -13,7 +15,7 @@
  #include <vector>

 namespace platf {
-  class hwdevice_t;
+  class avcodec_encode_device_t;
  class img_t;
 }  // namespace platf

@@ -23,8 +25,8 @@ namespace cuda {
    std::vector<std::string>
    display_names();
  }
-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, bool vram);
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, bool vram);
  int
  init();
 }  // namespace cuda
@@ -109,7 +111,7 @@ namespace cuda {
    convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
+    apply_colorspace(const video::sunshine_colorspace_t &colorspace);

    int
    load_ram(platf::img_t &img, cudaArray_t array);
--- a/src/platform/linux/graphics.cpp
+++ b/src/platform/linux/graphics.cpp
@@ -607,27 +607,8 @@ namespace egl {
  }

  void
-  sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
-    video::color_t *color_p;
-    switch (colorspace) {
-      case 5:  // SWS_CS_SMPTE170M
-        color_p = &video::colors[0];
-        break;
-      case 1:  // SWS_CS_ITU709
-        color_p = &video::colors[2];
-        break;
-      case 9:  // SWS_CS_BT2020
-        color_p = &video::colors[4];
-        break;
-      default:
-        BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
-        color_p = &video::colors[0];
-    };
-
-    if (color_range > 1) {
-      // Full range
-      ++color_p;
-    }
+  sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
+    auto color_p = video::color_vectors_from_colorspace(colorspace);

    std::string_view members[] {
      util::view(color_p->color_vec_y),
@@ -741,7 +722,7 @@ namespace egl {
    gl::ctx.UseProgram(sws.program[1].handle());
    gl::ctx.Uniform1fv(loc_width_i, 1, &width_i);

-    auto color_p = &video::colors[0];
+    auto color_p = video::color_vectors_from_colorspace(video::colorspace_e::rec601, false);
    std::pair<const char *, std::string_view> members[] {
      std::make_pair("color_vec_y", util::view(color_p->color_vec_y)),
      std::make_pair("color_vec_u", util::view(color_p->color_vec_u)),
--- a/src/platform/linux/graphics.h
+++ b/src/platform/linux/graphics.h
@@ -14,6 +14,7 @@
 #include "src/main.h"
 #include "src/platform/common.h"
 #include "src/utility.h"
+#include "src/video_colorspace.h"

 #define SUNSHINE_STRINGIFY_HELPER(x) #x
 #define SUNSHINE_STRINGIFY(x) SUNSHINE_STRINGIFY_HELPER(x)
@@ -327,7 +328,7 @@ namespace egl {
    load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture);

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
+    apply_colorspace(const video::sunshine_colorspace_t &colorspace);

    // The first texture is the monitor image.
    // The second texture is the cursor image
--- a/src/platform/linux/kmsgrab.cpp
+++ b/src/platform/linux/kmsgrab.cpp
@@ -768,13 +768,13 @@ namespace platf {
        return capture_e::ok;
      }

-      std::shared_ptr<hwdevice_t>
-      make_hwdevice(pix_fmt_e pix_fmt) override {
+      std::unique_ptr<avcodec_encode_device_t>
+      make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
        if (mem_type == mem_type_e::vaapi) {
-          return va::make_hwdevice(width, height, false);
+          return va::make_avcodec_encode_device(width, height, false);
        }

-        return std::make_shared<hwdevice_t>();
+        return std::make_unique<avcodec_encode_device_t>();
      }

      capture_e
@@ -843,10 +843,10 @@ namespace platf {
      display_vram_t(mem_type_e mem_type):
          display_t(mem_type) {}

-      std::shared_ptr<hwdevice_t>
-      make_hwdevice(pix_fmt_e pix_fmt) override {
+      std::unique_ptr<avcodec_encode_device_t>
+      make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
        if (mem_type == mem_type_e::vaapi) {
-          return va::make_hwdevice(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
+          return va::make_avcodec_encode_device(width, height, dup(card.fd.el), img_offset_x, img_offset_y, true);
        }

        BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt);
--- a/src/platform/linux/vaapi.cpp
+++ b/src/platform/linux/vaapi.cpp
@@ -290,9 +290,9 @@ namespace va {
  }

  int
-  vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf);
+  vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf);

-  class va_t: public platf::hwdevice_t {
+  class va_t: public platf::avcodec_encode_device_t {
  public:
    int
    init(int in_width, int in_height, file_t &&render_device) {
@@ -304,7 +304,7 @@ namespace va {
        return -1;
      }

-      this->data = (void *) vaapi_make_hwdevice_ctx;
+      this->data = (void *) vaapi_init_avcodec_hardware_input_buffer;

      gbm.reset(gbm::create_device(file.el));
      if (!gbm) {
@@ -398,8 +398,8 @@ namespace va {
    }

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
-      sws.set_colorspace(colorspace, color_range);
+    apply_colorspace() override {
+      sws.apply_colorspace(colorspace);
    }

    va::display_t::pointer va_display;
@@ -526,7 +526,7 @@ namespace va {
  }

  int
-  vaapi_make_hwdevice_ctx(platf::hwdevice_t *base, AVBufferRef **hw_device_buf) {
+  vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *base, AVBufferRef **hw_device_buf) {
    if (!va::initialize) {
      BOOST_LOG(warning) << "libva not loaded"sv;
      return -1;
@@ -653,10 +653,10 @@ namespace va {
    return true;
  }

-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram) {
    if (vram) {
-      auto egl = std::make_shared<va::va_vram_t>();
+      auto egl = std::make_unique<va::va_vram_t>();
      if (egl->init(width, height, std::move(card), offset_x, offset_y)) {
        return nullptr;
      }
@@ -665,7 +665,7 @@ namespace va {
    }

    else {
-      auto egl = std::make_shared<va::va_ram_t>();
+      auto egl = std::make_unique<va::va_ram_t>();
      if (egl->init(width, height, std::move(card))) {
        return nullptr;
      }
@@ -674,8 +674,8 @@ namespace va {
    }
  }

-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram) {
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram) {
    auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str();

    file_t file = open(render_device, O_RDWR);
@@ -686,11 +686,11 @@ namespace va {
      return nullptr;
    }

-    return make_hwdevice(width, height, std::move(file), offset_x, offset_y, vram);
+    return make_avcodec_encode_device(width, height, std::move(file), offset_x, offset_y, vram);
  }

-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, bool vram) {
-    return make_hwdevice(width, height, 0, 0, vram);
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, bool vram) {
+    return make_avcodec_encode_device(width, height, 0, 0, vram);
  }
 }  // namespace va
--- a/src/platform/linux/vaapi.h
+++ b/src/platform/linux/vaapi.h
@@ -18,12 +18,12 @@ namespace va {
   * offset_y --> Vertical offset of the image in the texture
   * file_t card --> The file descriptor of the render device used for encoding
   */
-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, bool vram);
-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, int offset_x, int offset_y, bool vram);
-  std::shared_ptr<platf::hwdevice_t>
-  make_hwdevice(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, bool vram);
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, int offset_x, int offset_y, bool vram);
+  std::unique_ptr<platf::avcodec_encode_device_t>
+  make_avcodec_encode_device(int width, int height, file_t &&card, int offset_x, int offset_y, bool vram);

  // Ensure the render device pointed to by fd is capable of encoding h264 with the hevc_mode configured
  bool
--- a/src/platform/linux/wlgrab.cpp
+++ b/src/platform/linux/wlgrab.cpp
@@ -215,13 +215,13 @@ namespace wl {
      return 0;
    }

-    std::shared_ptr<platf::hwdevice_t>
-    make_hwdevice(platf::pix_fmt_e pix_fmt) override {
+    std::unique_ptr<platf::avcodec_encode_device_t>
+    make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
      if (mem_type == platf::mem_type_e::vaapi) {
-        return va::make_hwdevice(width, height, false);
+        return va::make_avcodec_encode_device(width, height, false);
      }

-      return std::make_shared<platf::hwdevice_t>();
+      return std::make_unique<platf::avcodec_encode_device_t>();
    }

    std::shared_ptr<platf::img_t>
@@ -323,13 +323,13 @@ namespace wl {
      return img;
    }

-    std::shared_ptr<platf::hwdevice_t>
-    make_hwdevice(platf::pix_fmt_e pix_fmt) override {
+    std::unique_ptr<platf::avcodec_encode_device_t>
+    make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override {
      if (mem_type == platf::mem_type_e::vaapi) {
-        return va::make_hwdevice(width, height, 0, 0, true);
+        return va::make_avcodec_encode_device(width, height, 0, 0, true);
      }

-      return std::make_shared<platf::hwdevice_t>();
+      return std::make_unique<platf::avcodec_encode_device_t>();
    }

    int
--- a/src/platform/linux/x11grab.cpp
+++ b/src/platform/linux/x11grab.cpp
@@ -553,19 +553,19 @@ namespace platf {
      return std::make_shared<x11_img_t>();
    }

-    std::shared_ptr<hwdevice_t>
-    make_hwdevice(pix_fmt_e pix_fmt) override {
+    std::unique_ptr<avcodec_encode_device_t>
+    make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
      if (mem_type == mem_type_e::vaapi) {
-        return va::make_hwdevice(width, height, false);
+        return va::make_avcodec_encode_device(width, height, false);
      }

 #ifdef SUNSHINE_BUILD_CUDA
      if (mem_type == mem_type_e::cuda) {
-        return cuda::make_hwdevice(width, height, false);
+        return cuda::make_avcodec_encode_device(width, height, false);
      }
 #endif

-      return std::make_shared<hwdevice_t>();
+      return std::make_unique<avcodec_encode_device_t>();
    }

    int
--- a/src/platform/macos/display.mm
+++ b/src/platform/macos/display.mm
@@ -94,15 +94,15 @@ namespace platf {
      return std::make_shared<av_img_t>();
    }

-    std::shared_ptr<hwdevice_t>
-    make_hwdevice(pix_fmt_e pix_fmt) override {
+    std::unique_ptr<avcodec_encode_device_t>
+    make_avcodec_encode_device(pix_fmt_e pix_fmt) override {
      if (pix_fmt == pix_fmt_e::yuv420p) {
        av_capture.pixelFormat = kCVPixelFormatType_32BGRA;

-        return std::make_shared<hwdevice_t>();
+        return std::make_unique<avcodec_encode_device_t>();
      }
      else if (pix_fmt == pix_fmt_e::nv12) {
-        auto device = std::make_shared<nv12_zero_device>();
+        auto device = std::make_unique<nv12_zero_device>();

        device->init(static_cast<void *>(av_capture), setResolution, setPixelFormat);

--- a/src/platform/macos/nv12_zero_device.cpp
+++ b/src/platform/macos/nv12_zero_device.cpp
@@ -70,10 +70,6 @@ namespace platf {
    return 0;
  }

-  void
-  nv12_zero_device::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
-  }
-
  int
  nv12_zero_device::init(void *display, resolution_fn_t resolution_fn, pixel_format_fn_t pixel_format_fn) {
    pixel_format_fn(display, '420v');
--- a/src/platform/macos/nv12_zero_device.h
+++ b/src/platform/macos/nv12_zero_device.h
@@ -8,7 +8,7 @@

 namespace platf {

-  class nv12_zero_device: public hwdevice_t {
+  class nv12_zero_device: public avcodec_encode_device_t {
    // display holds a pointer to an av_video object. Since the namespaces of AVFoundation
    // and FFMPEG collide, we need this opaque pointer and cannot use the definition
    void *display;
@@ -27,8 +27,6 @@ namespace platf {
    convert(img_t &img);
    int
    set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx);
-    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
  };

 }  // namespace platf
--- a/src/platform/windows/display.h
+++ b/src/platform/windows/display.h
@@ -13,6 +13,7 @@

 #include "src/platform/common.h"
 #include "src/utility.h"
+#include "src/video.h"

 namespace platf::dxgi {
  extern const char *format_str[];
@@ -215,8 +216,11 @@ namespace platf::dxgi {
    int
    init(const ::video::config_t &config, const std::string &display_name);

-    std::shared_ptr<platf::hwdevice_t>
-    make_hwdevice(pix_fmt_e pix_fmt) override;
+    std::unique_ptr<avcodec_encode_device_t>
+    make_avcodec_encode_device(pix_fmt_e pix_fmt) override;
+
+    std::unique_ptr<nvenc_encode_device_t>
+    make_nvenc_encode_device(pix_fmt_e pix_fmt) override;

    sampler_state_t sampler_linear;

--- a/src/platform/windows/display_vram.cpp
+++ b/src/platform/windows/display_vram.cpp
@@ -16,7 +16,11 @@ extern "C" {

 #include "display.h"
 #include "misc.h"
+#include "src/config.h"
 #include "src/main.h"
+#include "src/nvenc/nvenc_config.h"
+#include "src/nvenc/nvenc_d3d11.h"
+#include "src/nvenc/nvenc_utils.h"
 #include "src/video.h"

 #define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
@@ -361,10 +365,10 @@ namespace platf::dxgi {
    return compile_shader(file, "main_vs", "vs_5_0");
  }

-  class hwdevice_t: public platf::hwdevice_t {
+  class d3d_base_encode_device final {
  public:
    int
-    convert(platf::img_t &img_base) override {
+    convert(platf::img_t &img_base) {
      // Garbage collect mapped capture images whose weak references have expired
      for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
        if (it->second.img_weak.expired()) {
@@ -413,28 +417,15 @@ namespace platf::dxgi {
    }

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
-      switch (colorspace) {
-        case 5:  // SWS_CS_SMPTE170M
-          color_p = &::video::colors[0];
-          break;
-        case 1:  // SWS_CS_ITU709
-          color_p = &::video::colors[2];
-          break;
-        case 9:  // SWS_CS_BT2020
-          color_p = &::video::colors[4];
-          break;
-        default:
-          BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
-          color_p = &::video::colors[0];
-      };
+    apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
+      auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);

-      if (color_range > 1) {
-        // Full range
-        ++color_p;
+      if (!color_vectors) {
+        BOOST_LOG(error) << "No vector data for colorspace"sv;
+        return;
      }

-      auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
+      auto color_matrix = make_buffer(device.get(), *color_vectors);
      if (!color_matrix) {
        BOOST_LOG(warning) << "Failed to create color matrix"sv;
        return;
@@ -445,78 +436,14 @@ namespace platf::dxgi {
      this->color_matrix = std::move(color_matrix);
    }

-    void
-    init_hwframes(AVHWFramesContext *frames) override {
-      // We may be called with a QSV or D3D11VA context
-      if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
-        auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
-
-        // The encoder requires textures with D3D11_BIND_RENDER_TARGET set
-        d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
-        d3d11_frames->MiscFlags = 0;
-      }
-
-      // We require a single texture
-      frames->initial_pool_size = 1;
-    }
-
    int
-    prepare_to_derive_context(int hw_device_type) override {
-      // QuickSync requires our device to be multithread-protected
-      if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
-        multithread_t mt;
+    init_output(ID3D11Texture2D *frame_texture, int width, int height) {
+      // The underlying frame pool owns the texture, so we must reference it for ourselves
+      frame_texture->AddRef();
+      output_texture.reset(frame_texture);

-        auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
-        if (FAILED(status)) {
-          BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
-          return -1;
-        }
-
-        mt->SetMultithreadProtected(TRUE);
-      }
-
-      return 0;
-    }
-
-    int
-    set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
-      this->hwframe.reset(frame);
-      this->frame = frame;
-
-      // Populate this frame with a hardware buffer if one isn't there already
-      if (!frame->buf[0]) {
-        auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
-        if (err) {
-          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
-          BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-          return -1;
-        }
-      }
-
-      // If this is a frame from a derived context, we'll need to map it to D3D11
-      ID3D11Texture2D *frame_texture;
-      if (frame->format != AV_PIX_FMT_D3D11) {
-        frame_t d3d11_frame { av_frame_alloc() };
-
-        d3d11_frame->format = AV_PIX_FMT_D3D11;
-
-        auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
-        if (err) {
-          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
-          BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-          return -1;
-        }
-
-        // Get the texture from the mapped frame
-        frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
-      }
-      else {
-        // Otherwise, we can just use the texture inside the original frame
-        frame_texture = (ID3D11Texture2D *) frame->data[0];
-      }
-
-      auto out_width = frame->width;
-      auto out_height = frame->height;
+      auto out_width = width;
+      auto out_height = height;

      float in_width = display->width;
      float in_height = display->height;
@@ -533,10 +460,6 @@ namespace platf::dxgi {
      outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
      outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };

-      // The underlying frame pool owns the texture, so we must reference it for ourselves
-      frame_texture->AddRef();
-      hwframe_texture.reset(frame_texture);
-
      float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f };  // aligned to 16-byte
      info_scene = make_buffer(device.get(), info_in);

@@ -550,7 +473,7 @@ namespace platf::dxgi {
        D3D11_RTV_DIMENSION_TEXTURE2D
      };

-      auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
+      auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
      if (FAILED(status)) {
        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
        return -1;
@@ -558,7 +481,7 @@ namespace platf::dxgi {

      nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;

-      status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
+      status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
      if (FAILED(status)) {
        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
        return -1;
@@ -574,9 +497,7 @@ namespace platf::dxgi {
    }

    int
-    init(
-      std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
-      pix_fmt_e pix_fmt) {
+    init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
      D3D_FEATURE_LEVEL featureLevels[] {
        D3D_FEATURE_LEVEL_11_1,
        D3D_FEATURE_LEVEL_11_0,
@@ -615,8 +536,6 @@ namespace platf::dxgi {
        BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
      }

-      data = device.get();
-
      format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
      status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
      if (status) {
@@ -673,7 +592,13 @@ namespace platf::dxgi {
        return -1;
      }

-      color_matrix = make_buffer(device.get(), ::video::colors[0]);
+      auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
+      if (!default_color_vectors) {
+        BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
+        return -1;
+      }
+
+      color_matrix = make_buffer(device.get(), *default_color_vectors);
      if (!color_matrix) {
        BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
        return -1;
@@ -721,7 +646,6 @@ namespace platf::dxgi {
      return 0;
    }

-  private:
    struct encoder_img_ctx_t {
      // Used to determine if the underlying texture changes.
      // Not safe for actual use by the encoder!
@@ -789,9 +713,6 @@ namespace platf::dxgi {
      return 0;
    }

-  public:
-    frame_t hwframe;
-
    ::video::color_t *color_p;

    buf_t info_scene;
@@ -805,9 +726,6 @@ namespace platf::dxgi {
    render_target_t nv12_Y_rt;
    render_target_t nv12_UV_rt;

-    // The image referenced by hwframe
-    texture2d_t hwframe_texture;
-
    // d3d_img_t::id -> encoder_img_ctx_t
    // These store the encoder textures for each img_t that passes through
    // convert(). We can't store them in the img_t itself because it is shared
@@ -830,6 +748,149 @@ namespace platf::dxgi {

    device_t device;
    device_ctx_t device_ctx;
+
+    texture2d_t output_texture;
+  };
+
+  class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
+  public:
+    int
+    init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
+      int result = base.init(display, adapter_p, pix_fmt);
+      data = base.device.get();
+      return result;
+    }
+
+    int
+    convert(platf::img_t &img_base) override {
+      return base.convert(img_base);
+    }
+
+    void
+    apply_colorspace() override {
+      base.apply_colorspace(colorspace);
+    }
+
+    void
+    init_hwframes(AVHWFramesContext *frames) override {
+      // We may be called with a QSV or D3D11VA context
+      if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
+        auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
+
+        // The encoder requires textures with D3D11_BIND_RENDER_TARGET set
+        d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
+        d3d11_frames->MiscFlags = 0;
+      }
+
+      // We require a single texture
+      frames->initial_pool_size = 1;
+    }
+
+    int
+    prepare_to_derive_context(int hw_device_type) override {
+      // QuickSync requires our device to be multithread-protected
+      if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
+        multithread_t mt;
+
+        auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
+        if (FAILED(status)) {
+          BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
+          return -1;
+        }
+
+        mt->SetMultithreadProtected(TRUE);
+      }
+
+      return 0;
+    }
+
+    int
+    set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
+      this->hwframe.reset(frame);
+      this->frame = frame;
+
+      // Populate this frame with a hardware buffer if one isn't there already
+      if (!frame->buf[0]) {
+        auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
+        if (err) {
+          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
+          BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+          return -1;
+        }
+      }
+
+      // If this is a frame from a derived context, we'll need to map it to D3D11
+      ID3D11Texture2D *frame_texture;
+      if (frame->format != AV_PIX_FMT_D3D11) {
+        frame_t d3d11_frame { av_frame_alloc() };
+
+        d3d11_frame->format = AV_PIX_FMT_D3D11;
+
+        auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
+        if (err) {
+          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
+          BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+          return -1;
+        }
+
+        // Get the texture from the mapped frame
+        frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
+      }
+      else {
+        // Otherwise, we can just use the texture inside the original frame
+        frame_texture = (ID3D11Texture2D *) frame->data[0];
+      }
+
+      return base.init_output(frame_texture, frame->width, frame->height);
+    }
+
+  private:
+    d3d_base_encode_device base;
+    frame_t hwframe;
+  };
+
+  class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
+  public:
+    bool
+    init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
+      buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
+      if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
+        BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
+        return false;
+      }
+
+      if (base.init(display, adapter_p, pix_fmt)) return false;
+
+      nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
+      nvenc = nvenc_d3d.get();
+
+      return true;
+    }
+
+    bool
+    init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override {
+      if (!nvenc_d3d) return false;
+
+      nvenc::nvenc_config nvenc_config;
+      nvenc_config.quality_preset = config::video.nv.nv_preset ? (*config::video.nv.nv_preset - 11) : 1;
+      nvenc_config.h264_cavlc = (config::video.nv.nv_coder == NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC);
+
+      auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace);
+      if (!nvenc_d3d->create_encoder(nvenc_config, client_config, nvenc_colorspace, buffer_format)) return false;
+
+      base.apply_colorspace(colorspace);
+      return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0;
+    }
+
+    int
+    convert(platf::img_t &img_base) override {
+      return base.convert(img_base);
+    }
+
+  private:
+    d3d_base_encode_device base;
+    std::unique_ptr<nvenc::nvenc_d3d11> nvenc_d3d;
+    NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
  };

  bool
@@ -1464,26 +1525,32 @@ namespace platf::dxgi {
    };
  }

-  std::shared_ptr<platf::hwdevice_t>
-  display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
+  std::unique_ptr<avcodec_encode_device_t>
+  display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
    if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
      BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';

      return nullptr;
    }

-    auto hwdevice = std::make_shared<hwdevice_t>();
+    auto device = std::make_unique<d3d_avcodec_encode_device_t>();

-    auto ret = hwdevice->init(
-      shared_from_this(),
-      adapter.get(),
-      pix_fmt);
+    auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);

    if (ret) {
      return nullptr;
    }

-    return hwdevice;
+    return device;
+  }
+
+  std::unique_ptr<nvenc_encode_device_t>
+  display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) {
+    auto device = std::make_unique<d3d_nvenc_encode_device_t>();
+    if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
+      return nullptr;
+    }
+    return device;
  }

  int