From f2636b163e8bbe65deadda3ebf4e338e891e2644 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 2 Apr 2020 20:13:44 +0200
Subject: [PATCH 01/25] General structure complete

---
 sunshine/platform/common.h         |  17 +-
 sunshine/platform/windows_dxgi.cpp | 502 ++++++++++++++++++++++-------
 sunshine/video.cpp                 |  78 +++--
 3 files changed, 442 insertions(+), 155 deletions(-)
diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index fc35ab75..4ec9923c 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -28,6 +28,11 @@ constexpr std::uint16_t B            = 0x2000;
 constexpr std::uint16_t X            = 0x4000;
 constexpr std::uint16_t Y            = 0x8000;
 
+enum class pix_fmt_e {
+  yuv420p,
+  yuv420p10
+};
+
 struct gamepad_state_t {
   std::uint16_t buttonFlags;
   std::uint8_t lt;
@@ -58,6 +63,16 @@ public:
   virtual ~img_t() = default;
 };
 
+struct hwdevice_ctx_t {
+  std::shared_ptr<void> hwdevice;
+
+  virtual const platf::img_t*const convert(platf::img_t &img) {
+    return nullptr;
+  }
+
+  virtual ~hwdevice_ctx_t() = default;
+};
+
 enum class capture_e : int {
   ok,
   reinit,
@@ -80,7 +95,7 @@ public:
     return 0;
   }
 
-  virtual std::shared_ptr<void> get_hwdevice() {
+  virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
     return nullptr;
   }
 
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 5362b08d..2fed8e13 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -38,6 +38,15 @@ using dup_t        = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputD
 using texture2d_t  = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
 using resource_t   = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
 
+namespace video {
+using device_t         = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
+using ctx_t            = util::safe_ptr<ID3D11VideoContext, Release<ID3D11VideoContext>>;
+using processor_t      = util::safe_ptr<ID3D11VideoProcessor, Release<ID3D11VideoProcessor>>;
+using processor_out_t  = util::safe_ptr<ID3D11VideoProcessorOutputView, Release<ID3D11VideoProcessorOutputView>>;
+using processor_in_t   = util::safe_ptr<ID3D11VideoProcessorInputView, Release<ID3D11VideoProcessorInputView>>;
+using processor_enum_t = util::safe_ptr<ID3D11VideoProcessorEnumerator, Release<ID3D11VideoProcessorEnumerator>>;
+}
+
 extern const char *format_str[];
 
 class duplication_t {
@@ -105,7 +114,6 @@ public:
   }
 };
 
-class display_t;
 struct img_t : public ::platf::img_t  {
   ~img_t() override {
     delete[] data;
@@ -113,6 +121,11 @@ struct img_t : public ::platf::img_t  {
   }
 };
 
+struct img_d3d_t : public ::platf::img_t {
+  std::shared_ptr<platf::display_t> display;
+  texture2d_t texture;
+};
+
 struct cursor_t {
   std::vector<std::uint8_t> img_data;
 
@@ -273,118 +286,121 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {
   }
 }
 
-class display_t : public ::platf::display_t, public std::enable_shared_from_this<display_t> {
+class hwdevice_ctx_t : public platf::hwdevice_ctx_t {
 public:
-  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
-    auto img = (img_t*)img_base;
+  const platf::img_t*const convert(platf::img_t &img_base) override {
+    auto &img = (img_d3d_t&)img_base;
 
+    auto it = texture_to_processor_in.find(img.texture.get());
+    if(it == std::end(texture_to_processor_in)) {
+      D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
+
+      video::processor_in_t::pointer processor_in_p;
+      auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p);
+      if(FAILED(status)) {
+        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
+        return nullptr;
+      }
+      it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
+    }
+    auto &processor_in = it->second;
+
+    D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
+    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']';
+      return nullptr;
+    }
+
+    return &img;
+  }
+
+  int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
     HRESULT status;
 
-    DXGI_OUTDUPL_FRAME_INFO frame_info;
+    video::device_t::pointer vdevice_p;
+    status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query ID3D11VideoDevice interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    device.reset(vdevice_p);
 
-    resource_t::pointer res_p {};
-    auto capture_status = dup.next_frame(frame_info, &res_p);
-    resource_t res{res_p};
+    video::ctx_t::pointer ctx_p;
+    status = device_ctx_p->QueryInterface(IID_ID3D11VideoContext, (void**)&ctx_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query ID3D11VideoContext interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    ctx.reset(ctx_p);
 
-    if (capture_status != capture_e::ok) {
-      return capture_status;
+    D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc {
+      D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
+      { 1, 1 }, (UINT)in_width, (UINT)in_height,
+      { 1, 1 }, (UINT)out_width, (UINT)out_height,
+      D3D11_VIDEO_USAGE_PLAYBACK_NORMAL
+    };
+
+    video::processor_enum_t::pointer vp_e_p;
+    status = device->CreateVideoProcessorEnumerator(&contentDesc, &vp_e_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create video processor enumerator [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    processor_e.reset(vp_e_p);
+
+    video::processor_t::pointer processor_p;
+    status = device->CreateVideoProcessor(processor_e.get(), 0, &processor_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create video processor [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    processor.reset(processor_p);
+
+    D3D11_TEXTURE2D_DESC t {};
+    t.Width  = out_width;
+    t.Height = out_height;
+    t.MipLevels = 1;
+    t.ArraySize = 1;
+    t.SampleDesc.Count = 1;
+    t.Usage = D3D11_USAGE_DEFAULT;
+    t.Format = DXGI_FORMAT_420_OPAQUE;
+
+    dxgi::texture2d_t::pointer tex_p {};
+    status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
     }
 
-    if(frame_info.PointerShapeBufferSize > 0) {
-      auto &img_data = cursor.img_data;
+    img.texture.reset(tex_p);
+    img.display = std::move(display);
+    img.width = out_width;
+    img.height = out_height;
 
-      img_data.resize(frame_info.PointerShapeBufferSize);
-
-      UINT dummy;
-      status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &cursor.shape_info);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return capture_e::error;
-      }
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D };
+    video::processor_out_t::pointer processor_out_p;
+    device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
     }
+    processor_out.reset(processor_out_p);
 
-    if(frame_info.LastMouseUpdateTime.QuadPart) {
-      cursor.x = frame_info.PointerPosition.Position.x;
-      cursor.y = frame_info.PointerPosition.Position.y;
-      cursor.visible = frame_info.PointerPosition.Visible;
-    }
-
-    // If frame has been updated
-    if (frame_info.LastPresentTime.QuadPart != 0) {
-      {
-        texture2d_t::pointer src_p {};
-        status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
-        texture2d_t src{src_p};
-
-        if (FAILED(status)) {
-          BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
-          return capture_e::error;
-        }
-
-        //Copy from GPU to CPU
-        device_ctx->CopyResource(texture.get(), src.get());
-      }
-
-      if(img_info.pData) {
-        device_ctx->Unmap(texture.get(), 0);
-        img_info.pData = nullptr;
-      }
-
-      status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return capture_e::error;
-      }
-    }
-
-    const bool mouse_update = 
-      (frame_info.LastMouseUpdateTime.QuadPart || frame_info.PointerShapeBufferSize > 0) &&
-      (cursor_visible && cursor.visible);
-
-    const bool update_flag = frame_info.LastPresentTime.QuadPart != 0 || mouse_update;
-
-    if(!update_flag) {
-      return capture_e::timeout;
-    }
-
-    if(img->width != width || img->height != height) {
-      delete[] img->data;
-      img->data = new std::uint8_t[height * img_info.RowPitch];
-
-      img->width = width;
-      img->height = height;
-      img->row_pitch = img_info.RowPitch;
-    }
-
-    std::copy_n((std::uint8_t*)img_info.pData, height * img_info.RowPitch, (std::uint8_t*)img->data);
-
-    if(cursor_visible && cursor.visible) {
-      blend_cursor(cursor, *img);
-    }
-
-    return capture_e::ok;
+    return 0;
   }
 
-  std::shared_ptr<::platf::img_t> alloc_img() override {
-    auto img = std::make_shared<img_t>();
-
-    img->data         = nullptr;
-    img->row_pitch    = 0;
-    img->pixel_pitch  = 4;
-    img->width        = 0;
-    img->height       = 0;
-
-    return img;
-  }
-
-  int dummy_img(platf::img_t *img, int &) override {
-    auto dummy_data_p = new int[1];
-
-    return platf::display_t::dummy_img(img, *dummy_data_p);
-  }
+  img_d3d_t img;
+  video::device_t device;
+  video::ctx_t ctx;
+  video::processor_enum_t processor_e;
+  video::processor_t processor;
+  video::processor_out_t processor_out;
+  std::unordered_map<texture2d_t::pointer, video::processor_in_t> texture_to_processor_in;
+};
 
+class display_base_t : public ::platf::display_t {
+public:
   int init() {
 /* Uncomment when use of IDXGIOutput5 is implemented
   std::call_once(windows_cpp_once_flag, []() {
@@ -506,7 +522,8 @@ public:
     adapter->GetDesc(&adapter_desc);
 
     auto description = converter.to_bytes(adapter_desc.Description);
-    BOOST_LOG(info) << std::endl
+    BOOST_LOG(info)
+      << std::endl
       << "Device Description : " << description << std::endl
       << "Device Vendor ID   : 0x"sv << util::hex(adapter_desc.VendorId).to_string_view() << std::endl
       << "Device Device ID   : 0x"sv << util::hex(adapter_desc.DeviceId).to_string_view() << std::endl
@@ -580,6 +597,139 @@ public:
 
     BOOST_LOG(debug) << "Source format ["sv << format_str[dup_desc.ModeDesc.Format] << ']';
 
+    return 0;
+  }
+
+  factory1_t factory;
+  adapter_t adapter;
+  output_t output;
+  device_t device;
+  device_ctx_t device_ctx;
+  duplication_t dup;
+
+  int width, height;
+
+  DXGI_FORMAT format;
+  D3D_FEATURE_LEVEL feature_level;
+};
+
+class display_cpu_t : public display_base_t {
+public:
+  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
+    auto img = (img_t*)img_base;
+
+    HRESULT status;
+
+    DXGI_OUTDUPL_FRAME_INFO frame_info;
+
+    resource_t::pointer res_p {};
+    auto capture_status = dup.next_frame(frame_info, &res_p);
+    resource_t res{res_p};
+
+    if (capture_status != capture_e::ok) {
+      return capture_status;
+    }
+
+    if(frame_info.PointerShapeBufferSize > 0) {
+      auto &img_data = cursor.img_data;
+
+      img_data.resize(frame_info.PointerShapeBufferSize);
+
+      UINT dummy;
+      status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &cursor.shape_info);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
+
+        return capture_e::error;
+      }
+    }
+
+    if(frame_info.LastMouseUpdateTime.QuadPart) {
+      cursor.x = frame_info.PointerPosition.Position.x;
+      cursor.y = frame_info.PointerPosition.Position.y;
+      cursor.visible = frame_info.PointerPosition.Visible;
+    }
+
+    // If frame has been updated
+    if (frame_info.LastPresentTime.QuadPart != 0) {
+      {
+        texture2d_t::pointer src_p {};
+        status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
+        texture2d_t src{src_p};
+
+        if (FAILED(status)) {
+          BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
+          return capture_e::error;
+        }
+
+        //Copy from GPU to CPU
+        device_ctx->CopyResource(texture.get(), src.get());
+      }
+
+      if(img_info.pData) {
+        device_ctx->Unmap(texture.get(), 0);
+        img_info.pData = nullptr;
+      }
+
+      status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']';
+
+        return capture_e::error;
+      }
+    }
+
+    const bool mouse_update = 
+      (frame_info.LastMouseUpdateTime.QuadPart || frame_info.PointerShapeBufferSize > 0) &&
+      (cursor_visible && cursor.visible);
+
+    const bool update_flag = frame_info.LastPresentTime.QuadPart != 0 || mouse_update;
+
+    if(!update_flag) {
+      return capture_e::timeout;
+    }
+
+    if(img->width != width || img->height != height) {
+      delete[] img->data;
+      img->data = new std::uint8_t[height * img_info.RowPitch];
+
+      img->width = width;
+      img->height = height;
+      img->row_pitch = img_info.RowPitch;
+    }
+
+    std::copy_n((std::uint8_t*)img_info.pData, height * img_info.RowPitch, (std::uint8_t*)img->data);
+
+    if(cursor_visible && cursor.visible) {
+      blend_cursor(cursor, *img);
+    }
+
+    return capture_e::ok;
+  }
+
+  std::shared_ptr<platf::img_t> alloc_img() override {
+    auto img = std::make_shared<img_t>();
+
+    img->data         = nullptr;
+    img->row_pitch    = 0;
+    img->pixel_pitch  = 4;
+    img->width        = 0;
+    img->height       = 0;
+
+    return img;
+  }
+
+  int dummy_img(platf::img_t *img, int &) override {
+    auto dummy_data_p = new int[1];
+
+    return platf::display_t::dummy_img(img, *dummy_data_p);
+  }
+
+  int init() {
+    if(display_base_t::init()) {
+      return -1;
+    }
+
     D3D11_TEXTURE2D_DESC t {};
     t.Width  = width;
     t.Height = height;
@@ -591,7 +741,7 @@ public:
     t.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
 
     dxgi::texture2d_t::pointer tex_p {};
-    status = device->CreateTexture2D(&t, nullptr, &tex_p);
+    auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
 
     texture.reset(tex_p);
 
@@ -603,27 +753,128 @@ public:
     // map the texture simply to get the pitch and stride
     status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Error: Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
 
     return 0;
   }
 
-  factory1_t factory;
-  adapter_t adapter;
-  output_t output;
-  device_t device;
-  device_ctx_t device_ctx;
-  duplication_t dup;
   cursor_t cursor;
-  texture2d_t texture;
-
-  int width, height;
-
-  DXGI_FORMAT format;
-  D3D_FEATURE_LEVEL feature_level;
   D3D11_MAPPED_SUBRESOURCE img_info;
+  texture2d_t texture;
+};
+
+class display_gpu_t : public display_base_t, public std::enable_shared_from_this<display_gpu_t> {
+  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
+    auto img = (img_d3d_t*)img_base;
+
+    HRESULT status;
+
+    DXGI_OUTDUPL_FRAME_INFO frame_info;
+
+    resource_t::pointer res_p {};
+    auto capture_status = dup.next_frame(frame_info, &res_p);
+    resource_t res{res_p};
+
+    if (capture_status != capture_e::ok) {
+      return capture_status;
+    }
+
+    const bool update_flag = frame_info.LastPresentTime.QuadPart != 0;
+    if(!update_flag) {
+      return capture_e::timeout;
+    }
+
+    texture2d_t::pointer src_p{};
+    status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
+
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return capture_e::error;
+    }
+
+    img->row_pitch = 0;
+    img->width     = width;
+    img->height    = height;
+    img->data      = (std::uint8_t*)src_p;
+    img->texture.reset(src_p);
+
+    return capture_e::ok;
+  }
+
+  std::shared_ptr<platf::img_t> alloc_img() override {
+    auto img = std::make_shared<img_d3d_t>();
+
+    img->data        = nullptr;
+    img->row_pitch   = 0;
+    img->pixel_pitch = 4;
+    img->width       = 0;
+    img->height      = 0;
+    img->display     = shared_from_this();
+
+    return img;
+  }
+
+  int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
+    auto img = (img_d3d_t*)img_base;
+
+    D3D11_TEXTURE2D_DESC t {};
+    t.Width  = 1;
+    t.Height = 1;
+    t.MipLevels = 1;
+    t.ArraySize = 1;
+    t.SampleDesc.Count = 1;
+    t.Usage = D3D11_USAGE_DEFAULT;
+    t.Format = format;
+
+    D3D11_SUBRESOURCE_DATA data {
+      &dummy_data_p,
+      (UINT)img->row_pitch,
+      0
+    };
+
+    dxgi::texture2d_t::pointer tex_p {};
+    auto status = device->CreateTexture2D(&t, &data, &tex_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    img->texture.reset(tex_p);
+
+    D3D11_MAPPED_SUBRESOURCE img_info {};
+    // map the texture simply to get the pitch and stride
+    status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    img->row_pitch = img_info.RowPitch;
+    img->height    = 1;
+    img->width     = 1;
+    img->data      = (std::uint8_t*)img->texture.get();
+
+    device_ctx->Unmap(img->texture.get(), 0);
+    return 0;
+  }
+
+  std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
+    auto hwdevice = std::make_shared<hwdevice_ctx_t>();
+
+    auto ret = hwdevice->init(
+      shared_from_this(),
+      device.get(),
+      device_ctx.get(),
+      this->width, this->height,
+      width, height);
+
+    if(ret) {
+      return nullptr;
+    }
+
+    return hwdevice;
+  }
 };
 
 const char *format_str[] = {
@@ -755,16 +1006,21 @@ const char *format_str[] = {
 
 namespace platf {
 std::shared_ptr<display_t> display(int hwdevice_type) {
-  if(hwdevice_type != AV_HWDEVICE_TYPE_NONE) {
-    return nullptr;
+  if(hwdevice_type == AV_HWDEVICE_TYPE_D3D11VA) {
+    auto disp = std::make_shared<dxgi::display_gpu_t>();
+
+    if(!disp->init()) {
+      return disp;
+    }
+  }
+  else {
+    auto disp = std::make_shared<dxgi::display_cpu_t>();
+
+    if(!disp->init()) {
+      return disp;
+    }
   }
 
-  auto disp = std::make_shared<dxgi::display_t>();
-
-  if (disp->init()) {
-    return nullptr;
-  }
-
-  return disp;
+  return nullptr;
 }
 }
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index eeddcfc4..bf0ff6c9 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -42,8 +42,8 @@ using buffer_t    = util::safe_ptr<AVBufferRef, free_buffer>;
 using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
 using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
 
-void sw_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame);
-void nv_d3d_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame);
+void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
 
 struct encoder_t {
   struct option_t {
@@ -68,7 +68,7 @@ struct encoder_t {
 
   bool system_memory;
 
-  std::function<void(sws_t &, platf::img_t&, frame_t&)> img_to_frame;
+  std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
 };
 
 struct session_t {
@@ -212,7 +212,11 @@ void captureThread(
     next_frame += delay;
 
     auto &img = *round_robin++;
-    auto status = disp->snapshot(img.get(), display_cursor);
+    platf::capture_e status;
+    {
+      auto lg = display_wp.lock();
+      status = disp->snapshot(img.get(), display_cursor);
+    }
     switch (status) {
       case platf::capture_e::reinit: {
         reinit_event.raise(true);
@@ -553,13 +557,14 @@ void encode_run(
   idr_event_t idr_events,
   img_event_t images,
   config_t config,
-  platf::display_t &display,
+  platf::hwdevice_ctx_t *hwdevice_ctx,
   safe::signal_t &reinit_event,
   const encoder_t &encoder,
   void *channel_data) {
 
-  auto hwdevice = display.get_hwdevice();
-  auto session = make_session(encoder, config, hwdevice.get());
+  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
+
+  auto session = make_session(encoder, config, hwdevice);
   if(!session) {
     return;
   }
@@ -616,9 +621,15 @@ void encode_run(
                                      sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
                                      0, 1 << 16, 1 << 16);
           }
-        }
 
-        encoder.img_to_frame(sws, *img, session->frame);
+          encoder.img_to_frame(sws, *img, session->frame);
+        }
+        else {
+          auto converted_img = hwdevice_ctx->convert(*img);
+ 
+          encoder.img_to_frame(sws, *converted_img, session->frame);
+
+        }
       }
       else if(images->running()) {
         continue;
@@ -667,38 +678,45 @@ void capture(
   int key_frame_nr = 1;
   while(!shutdown_event->peek() && images->running()) {
     // Wait for the display to be ready
-    std::shared_ptr<platf::display_t> display;
+    std::shared_ptr<platf::hwdevice_ctx_t> hwdevice_ctx;
     {
       auto lg = ref->display_wp.lock();
       if(ref->display_wp->expired()) {
         continue;
       }
 
-      display = ref->display_wp->lock();
+      auto display = ref->display_wp->lock();
+
+      auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
+      hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
     }
 
-    encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, *display, ref->reinit_event, *ref->encoder_p, channel_data);
+    encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
   }
 
   images->stop();
 }
 
-bool validate_config(const encoder_t &encoder, const config_t &config, platf::display_t &disp) {
-  // Ensure everything but software fails succesfully, it's not ready yet
-  if(encoder.dev_type != AV_HWDEVICE_TYPE_NONE) {
+bool validate_config(const encoder_t &encoder, const config_t &config) {
+  auto disp = platf::display(encoder.dev_type);
+  if(!disp) {
     return false;
   }
 
-  auto hwdevice = disp.get_hwdevice();
 
-  auto session = make_session(encoder, config, hwdevice.get());
+  auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
+  auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+
+  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
+
+  auto session = make_session(encoder, config, hwdevice);
   if(!session) {
     return false;
   }
 
   int dummy_data;
-  auto img = disp.alloc_img();
-  disp.dummy_img(img.get(), dummy_data);
+  auto img = disp->alloc_img();
+  disp->dummy_img(img.get(), dummy_data);
 
   sws_t sws;
   if(encoder.system_memory) {
@@ -712,10 +730,13 @@ bool validate_config(const encoder_t &encoder, const config_t &config, platf::di
                              sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
                              0, 1 << 16, 1 << 16);
 
-
+    encoder.img_to_frame(sws, *img, session->frame);
   }
+  else {
+    auto converted_img = hwdevice_ctx->convert(*img);
 
-  encoder.img_to_frame(sws, *img, session->frame);
+    encoder.img_to_frame(sws, *converted_img, session->frame);
+  }
 
   session->frame->pict_type = AV_PICTURE_TYPE_I;
 
@@ -747,17 +768,12 @@ bool validate_encoder(const encoder_t &encoder) {
     1,
     1,
     1,
-    1
+    0
   };
 
-  auto disp = platf::display(encoder.dev_type);
-  if(!disp) {
-    return false;
-  }
-
   return
-    validate_config(encoder, config_h264, *disp) &&
-    validate_config(encoder, config_hevc, *disp);
+    validate_config(encoder, config_h264) &&
+    validate_config(encoder, config_hevc);
 }
 
 void init() {
@@ -776,7 +792,7 @@ void init() {
   }
 }
 
-void sw_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame) {
+void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   av_frame_make_writable(frame.get());
 
   const int linesizes[2] {
@@ -792,7 +808,7 @@ void sw_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame) {
   }
 }
 
-void nv_d3d_img_to_frame(sws_t &sws, platf::img_t &img, frame_t &frame) {
+void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   frame->data[0] = img.data;
   frame->data[1] = 0;
 

From afbca0f6cded67b9d63527658d8f7f02a76136fa Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Mon, 6 Apr 2020 23:15:03 +0300
Subject: [PATCH 02/25] initialize nvenc

---
 pre-compiled                       |   2 +-
 sunshine/platform/common.h         |   4 +-
 sunshine/platform/windows_dxgi.cpp |  44 ++++----
 sunshine/video.cpp                 | 165 ++++++++++++++++++-----------
 4 files changed, 129 insertions(+), 86 deletions(-)

diff --git a/pre-compiled b/pre-compiled
index 51f776db..afd9a9bb 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
+Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416
diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index 4ec9923c..2028a554 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -64,7 +64,7 @@ public:
 };
 
 struct hwdevice_ctx_t {
-  std::shared_ptr<void> hwdevice;
+  void *hwdevice {};
 
   virtual const platf::img_t*const convert(platf::img_t &img) {
     return nullptr;
@@ -96,7 +96,7 @@ public:
   }
 
   virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
-    return nullptr;
+    return std::make_shared<hwdevice_ctx_t>();
   }
 
   virtual ~display_t() = default;
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 2fed8e13..4729c72e 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -124,6 +124,8 @@ struct img_t : public ::platf::img_t  {
 struct img_d3d_t : public ::platf::img_t {
   std::shared_ptr<platf::display_t> display;
   texture2d_t texture;
+
+  ~img_d3d_t() override = default;
 };
 
 struct cursor_t {
@@ -308,11 +310,11 @@ public:
     D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
     auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed size and color conversion 0x["sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
       return nullptr;
     }
 
-    return &img;
+    return &this->img;
   }
 
   int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
@@ -364,7 +366,8 @@ public:
     t.ArraySize = 1;
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = DXGI_FORMAT_420_OPAQUE;
+    t.Format = DXGI_FORMAT_NV12;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
 
     dxgi::texture2d_t::pointer tex_p {};
     status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
@@ -377,19 +380,30 @@ public:
     img.display = std::move(display);
     img.width = out_width;
     img.height = out_height;
+    img.data = (std::uint8_t*)tex_p;
+    img.row_pitch = out_width;
+    img.pixel_pitch = 1;
 
-    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D };
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
     video::processor_out_t::pointer processor_out_p;
-    device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
+    status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
     processor_out.reset(processor_out_p);
 
+    device_p->AddRef();
+    hwdevice = device_p;
     return 0;
   }
 
+  ~hwdevice_ctx_t() override {
+    if(hwdevice) {
+      ((ID3D11Device*)hwdevice)->Release();
+    }
+  }
+
   img_d3d_t img;
   video::device_t device;
   video::ctx_t ctx;
@@ -837,25 +851,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     dxgi::texture2d_t::pointer tex_p {};
     auto status = device->CreateTexture2D(&t, &data, &tex_p);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
     img->texture.reset(tex_p);
 
-    D3D11_MAPPED_SUBRESOURCE img_info {};
-    // map the texture simply to get the pitch and stride
-    status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
+    img->height      = 1;
+    img->width       = 1;
+    img->data        = (std::uint8_t*)tex_p;
+    img->row_pitch   = 4;
+    img->pixel_pitch = 4;
 
-    img->row_pitch = img_info.RowPitch;
-    img->height    = 1;
-    img->width     = 1;
-    img->data      = (std::uint8_t*)img->texture.get();
-
-    device_ctx->Unmap(img->texture.get(), 0);
     return 0;
   }
 
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index bf0ff6c9..680b3d61 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -7,6 +7,7 @@
 
 extern "C" {
 #include <libswscale/swscale.h>
+#include <libavutil/hwcontext_d3d11va.h>
 }
 
 #include "platform/common.h"
@@ -43,7 +44,9 @@ using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
 using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
 
 void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
 
 struct encoder_t {
   struct option_t {
@@ -58,8 +61,10 @@ struct encoder_t {
   } profile;
 
   AVHWDeviceType dev_type;
+  AVPixelFormat dev_pix_fmt;
 
-  AVPixelFormat pix_fmt;
+  AVPixelFormat static_pix_fmt;
+  AVPixelFormat dynamic_pix_fmt;
 
   struct {
     std::vector<option_t> options;
@@ -69,6 +74,7 @@ struct encoder_t {
   bool system_memory;
 
   std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
+  std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
 };
 
 struct session_t {
@@ -87,23 +93,24 @@ static encoder_t nvenc {
   { 2, 0, 1 },
   AV_HWDEVICE_TYPE_D3D11VA,
   AV_PIX_FMT_D3D11,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
   {
-    { {"force-idr"s, 1} }, "nvenc_hevc"s
+    { {"force-idr"s, 1} }, "hevc_nvenc"s
   },
   {
-    { {"force-idr"s, 1} }, "nvenc_h264"s
+    { {"force-idr"s, 1} }, "h264_nvenc"s
   },
   false,
 
-  nv_d3d_img_to_frame
-
-  // D3D11Device
+  nv_d3d_img_to_frame,
+  nv_d3d_make_hwdevice_ctx
 };
 
 static encoder_t software {
   { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
   AV_HWDEVICE_TYPE_NONE,
   AV_PIX_FMT_NONE,
+  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
   {
     // x265's Info SEI is so long that it causes the IDR picture data to be
     // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
@@ -123,9 +130,8 @@ static encoder_t software {
   },
   true,
 
-  sw_img_to_frame
-
-  // nullptr
+  sw_img_to_frame,
+  nullptr
 };
 
 static std::vector<encoder_t> encoders {
@@ -192,7 +198,9 @@ void captureThread(
     }
   }
   auto &dummy_img = imgs.front();
-  disp->dummy_img(dummy_img.get(), dummy_data);
+  if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+    return;
+  }
 
   auto next_frame = std::chrono::steady_clock::now();
   while(capture_ctx_queue->running()) {
@@ -257,7 +265,9 @@ void captureThread(
             return;
           }
         }
-        disp->dummy_img(dummy_img.get(), dummy_data);
+        if(disp->dummy_img(dummy_img.get(), dummy_data)) {
+          return;
+        }
 
         reinit_event.reset();
         continue;
@@ -292,13 +302,22 @@ void captureThread(
   }
 }
 
-util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type) {
+util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
   buffer_t ctx;
 
-  AVBufferRef *ref;
-  auto err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+  int err;
+  if(hwdevice_ctx) {
+    ctx.reset(av_hwdevice_ctx_alloc(type));
+    ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx;
+
+    err = av_hwdevice_ctx_init(ctx.get());
+  }
+  else {
+    AVBufferRef *ref  {};
+    err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+    ctx.reset(ref);
+  }
 
-  ctx.reset(ref);
   if(err < 0) {
     return err;
   }
@@ -314,7 +333,7 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
   frame_ctx->sw_format = format;
   frame_ctx->height    = ctx->height;
   frame_ctx->width     = ctx->width;
-  frame_ctx->initial_pool_size = 20;
+  frame_ctx->initial_pool_size = 0;
 
   if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
     return err;
@@ -331,7 +350,9 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
   /* send the frame to the encoder */
   auto ret = avcodec_send_frame(ctx.get(), frame.get());
   if (ret < 0) {
-    BOOST_LOG(error) << "Could not send a frame for encoding"sv;
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Could not send a frame for encoding: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, ret);
+
     return -1;
   }
 
@@ -375,7 +396,7 @@ void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
   capture_thread_ctx.capture_thread.join();
 }
 
-std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, void *device_ctx) {
+std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
   auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
@@ -387,21 +408,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     return std::nullopt;
   }
 
-  buffer_t hwdevice;
-  if(hardware) {
-    auto buf_or_error = hwdevice_ctx(encoder.dev_type);
-    if(buf_or_error.has_right()) {
-      auto err = buf_or_error.right();
-
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to create FFMpeg "sv << video_format.name << ": "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-
-      return std::nullopt;;
-    }
-
-    hwdevice = std::move(buf_or_error.left());
-  }
-
   ctx_t ctx {avcodec_alloc_context3(codec) };
   ctx->width = config.width;
   ctx->height = config.height;
@@ -463,21 +469,23 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
 
   AVPixelFormat sw_fmt;
   if(config.dynamicRange == 0) {
-    sw_fmt = AV_PIX_FMT_YUV420P;
+    sw_fmt = encoder.static_pix_fmt;
   }
   else {
-    sw_fmt = AV_PIX_FMT_YUV420P10;
+    sw_fmt = encoder.dynamic_pix_fmt;
   }
 
+  buffer_t hwdevice;
   if(hardware) {
-    ctx->pix_fmt = encoder.pix_fmt;
+    ctx->pix_fmt = encoder.dev_pix_fmt;
 
-    ((AVHWFramesContext *)ctx->hw_frames_ctx->data)->device_ctx = (AVHWDeviceContext*)device_ctx;
-
-    if(auto err = hwframe_ctx(ctx, hwdevice, sw_fmt); err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Failed to initialize hardware frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
+    auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx);
+    if(buf_or_error.has_right()) {
+      return std::nullopt;
+    }
 
+    hwdevice = std::move(buf_or_error.left());
+    if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
       return std::nullopt;
     }
   }
@@ -516,9 +524,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     av_dict_set_int(&options, "qp", config::video.qp, 0);
   }
 
-  av_dict_set(&options, "preset", config::video.preset.c_str(), 0);
-  av_dict_set(&options, "tune", config::video.tune.c_str(), 0);
-
   auto codec_handle = open_codec(ctx, codec, &options);
 
   frame_t frame {av_frame_alloc() };
@@ -528,15 +533,9 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
 
 
   if(hardware) {
-    auto err = av_hwframe_get_buffer(ctx->hw_frames_ctx, frame.get(), 0);
-    if(err < 0) {
-      char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
-      BOOST_LOG(error) << "Coudn't create hardware frame: "sv <<  av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err) << std::endl;
-
-      return std::nullopt;
-    }
+    frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
   }
-  else {
+  else /* software */ {
     av_frame_get_buffer(frame.get(), 0);
   }
 
@@ -562,9 +561,7 @@ void encode_run(
   const encoder_t &encoder,
   void *channel_data) {
 
-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx);
   if(!session) {
     return;
   }
@@ -626,6 +623,9 @@ void encode_run(
         }
         else {
           auto converted_img = hwdevice_ctx->convert(*img);
+          if(!converted_img) {
+            return;
+          }
  
           encoder.img_to_frame(sws, *converted_img, session->frame);
 
@@ -657,6 +657,10 @@ void capture(
   void *channel_data) {
 
   auto images = std::make_shared<img_event_t::element_type>();
+  auto lg = util::fail_guard([&]() {
+    images->stop();
+    shutdown_event->raise(true);
+  });
 
   // Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread
   static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
@@ -689,12 +693,13 @@ void capture(
 
       auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
       hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+      if(!hwdevice_ctx) {
+        return;
+      }
     }
 
     encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
   }
-
-  images->stop();
 }
 
 bool validate_config(const encoder_t &encoder, const config_t &config) {
@@ -706,17 +711,20 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
 
   auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
   auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+  if(!hwdevice_ctx) {
+    return false;
+  }
 
-  void *hwdevice = hwdevice_ctx ? hwdevice_ctx->hwdevice.get() : nullptr;
-
-  auto session = make_session(encoder, config, hwdevice);
+  auto session = make_session(encoder, config, hwdevice_ctx.get());
   if(!session) {
     return false;
   }
 
   int dummy_data;
   auto img = disp->alloc_img();
-  disp->dummy_img(img.get(), dummy_data);
+  if(disp->dummy_img(img.get(), dummy_data)) {
+    return false;
+  }
 
   sws_t sws;
   if(encoder.system_memory) {
@@ -734,6 +742,9 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
   }
   else {
     auto converted_img = hwdevice_ctx->convert(*img);
+    if(!converted_img) {
+      return false;
+    }
 
     encoder.img_to_frame(sws, *converted_img, session->frame);
   }
@@ -754,7 +765,7 @@ bool validate_encoder(const encoder_t &encoder) {
     60,
     1000,
     1,
-    1,
+    0,
     1,
     0,
     0
@@ -765,7 +776,7 @@ bool validate_encoder(const encoder_t &encoder) {
     60,
     1000,
     1,
-    1,
+    0,
     1,
     1,
     0
@@ -809,13 +820,39 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
 }
 
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
+  // Need to have something refcounted
+  if(!frame->buf[0]) {
+    frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*));
+  }
+
+  auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data;
+  desc->texture = (ID3D11Texture2D*)img.data;
+  desc->index = 0;
+
   frame->data[0] = img.data;
   frame->data[1] = 0;
 
   frame->linesize[0] = img.row_pitch;
-  frame->linesize[1] = 0;
 
   frame->height = img.height;
   frame->width = img.width;
 }
+
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
+  buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
+  auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
+
+  std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
+  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
+
+  auto err = av_hwdevice_ctx_init(ctx_buf.get());
+  if(err) {
+    char err_str[AV_ERROR_MAX_STRING_SIZE] {0};
+    BOOST_LOG(error) << "Failed to create FFMpeg nvenc: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+
+    return err;
+  }
+
+  return ctx_buf;
+}
 }

From 8e3df43cafbc426158b3782f7888df00e1644150 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Tue, 7 Apr 2020 00:34:52 +0300
Subject: [PATCH 03/25] Pass both nvenc and software in validation

---
 sunshine/platform/windows_dxgi.cpp |   2 +-
 sunshine/video.cpp                 | 115 +++++++++++++++++++++--------
 2 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 4729c72e..2e5dcb7a 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -833,6 +833,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
   int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
     auto img = (img_d3d_t*)img_base;
 
+    img->row_pitch = 4;
     D3D11_TEXTURE2D_DESC t {};
     t.Width  = 1;
     t.Height = 1;
@@ -859,7 +860,6 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     img->height      = 1;
     img->width       = 1;
     img->data        = (std::uint8_t*)tex_p;
-    img->row_pitch   = 4;
     img->pixel_pitch = 4;
 
     return 0;
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 680b3d61..3385c529 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -4,6 +4,7 @@
 
 #include <atomic>
 #include <thread>
+#include <bitset>
 
 extern "C" {
 #include <libswscale/swscale.h>
@@ -49,6 +50,13 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
 util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
 
 struct encoder_t {
+  enum flag_e {
+    PASSED, // Is supported
+    REF_FRAMES_RESTRICT, // Set maximum reference frames
+    REF_FRAMES_AUTOSELECT, // Allow encoder to select maximum reference frames (If !REF_FRAMES_RESTRICT --> REF_FRAMES_AUTOSELECT)
+    MAX_FLAGS
+  };
+
   struct option_t {
     std::string name;
     std::variant<int, int*, std::string, std::string*> value;
@@ -69,6 +77,11 @@ struct encoder_t {
   struct {
     std::vector<option_t> options;
     std::string name;
+    std::bitset<MAX_FLAGS> capabilities;
+
+    std::bitset<MAX_FLAGS>::reference operator[](flag_e flag) {
+      return capabilities[(std::size_t)flag];
+    }
   } hevc, h264;
 
   bool system_memory;
@@ -158,6 +171,19 @@ struct capture_thread_ctx_t {
   return codec_t { ctx.get() };
 }
 
+void reset_display(std::shared_ptr<platf::display_t> &disp, AVHWDeviceType type) {
+  // We try this twice, in case we still get an error on reinitialization
+  for(int x = 0; x < 2; ++x) {
+    disp.reset();
+    disp = platf::display(type);
+    if(disp) {
+      break;
+    }
+ 
+    std::this_thread::sleep_for(200ms);
+  }
+}
+
 void captureThread(
   std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_queue,
   util::sync_t<std::weak_ptr<platf::display_t>> &display_wp,
@@ -234,24 +260,15 @@ void captureThread(
           img.reset();
         }
 
-        // We try this twice, in case we still get an error on reinitialization
-        for(int x = 0; x < 2; ++x) {
-          // Some classes of display cannot have multiple instances at once
-          disp.reset();
+        // Some classes of display cannot have multiple instances at once
+        disp.reset();
 
-          // display_wp is modified in this thread only
-          while(!display_wp->expired()) {
-            std::this_thread::sleep_for(100ms);
-          }
-
-          disp = platf::display(encoder.dev_type);
-          if(disp) {
-            break;
-          }
-
-          std::this_thread::sleep_for(200ms);
+        // display_wp is modified in this thread only
+        while(!display_wp->expired()) {
+          std::this_thread::sleep_for(100ms);
         }
 
+        reset_display(disp, encoder.dev_type);
         if(!disp) {
           return;
         }
@@ -702,8 +719,8 @@ void capture(
   }
 }
 
-bool validate_config(const encoder_t &encoder, const config_t &config) {
-  auto disp = platf::display(encoder.dev_type);
+bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &encoder, const config_t &config) {
+  reset_display(disp, encoder.dev_type);
   if(!disp) {
     return false;
   }
@@ -759,8 +776,21 @@ bool validate_config(const encoder_t &encoder, const config_t &config) {
   return true;
 }
 
-bool validate_encoder(const encoder_t &encoder) {
-  config_t config_h264 {
+bool validate_encoder(encoder_t &encoder) {
+  std::shared_ptr<platf::display_t> disp;
+  // First, test encoder viability
+  config_t config_max_ref_frames {
+    1920, 1080,
+    60,
+    1000,
+    1,
+    1,
+    1,
+    0,
+    0
+  };
+
+  config_t config_autoselect {
     1920, 1080,
     60,
     1000,
@@ -771,20 +801,41 @@ bool validate_encoder(const encoder_t &encoder) {
     0
   };
 
-  config_t config_hevc {
-    1920, 1080,
-    60,
-    1000,
-    1,
-    0,
-    1,
-    1,
-    0
-  };
+  auto max_ref_frames_h264 = validate_config(disp, encoder, config_max_ref_frames);
+  auto autoselect_h264     = validate_config(disp, encoder, config_autoselect);
 
-  return
-    validate_config(encoder, config_h264) &&
-    validate_config(encoder, config_hevc);
+  if(!max_ref_frames_h264 && !autoselect_h264) {
+    return false;
+  }
+
+  auto max_ref_frames_hevc = validate_config(disp, encoder, config_max_ref_frames);
+  auto autoselect_hevc     = validate_config(disp, encoder, config_autoselect);
+
+  encoder.h264[encoder_t::REF_FRAMES_RESTRICT] = max_ref_frames_h264;
+  encoder.h264[encoder_t::REF_FRAMES_AUTOSELECT] = autoselect_h264;
+  encoder.h264[encoder_t::PASSED] = true;
+  encoder.hevc[encoder_t::REF_FRAMES_RESTRICT] = max_ref_frames_hevc;
+  encoder.hevc[encoder_t::REF_FRAMES_AUTOSELECT] = autoselect_hevc;
+  encoder.hevc[encoder_t::PASSED] = max_ref_frames_hevc || autoselect_hevc;
+
+  std::vector<std::pair<encoder_t::flag_e, config_t>> configs; 
+  for(auto &[flag, config] : configs) {
+    auto h264 = config;
+    auto hevc = config;
+
+    h264.videoFormat = 0;
+    hevc.videoFormat = 1;
+
+    if(validate_config(disp, encoder, h264)) {
+      encoder.h264[flag] = true;
+    }
+
+    if(validate_config(disp, encoder, hevc)) {
+      encoder.hevc[flag] = true;
+    }
+  }
+  
+  return true;
 }
 
 void init() {

From ceb784c648b2e35ec6211df26fc214ad2c5e2e06 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Tue, 7 Apr 2020 14:54:56 +0300
Subject: [PATCH 04/25] Test capabilities of the encoders

---
 sunshine/video.cpp | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 3385c529..7896ad01 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -79,6 +79,10 @@ struct encoder_t {
     std::string name;
     std::bitset<MAX_FLAGS> capabilities;
 
+    bool operator[](flag_e flag) const {
+      return capabilities[(std::size_t)flag];
+    }
+
     std::bitset<MAX_FLAGS>::reference operator[](flag_e flag) {
       return capabilities[(std::size_t)flag];
     }
@@ -392,7 +396,7 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
 }
 
 int start_capture(capture_thread_ctx_t &capture_thread_ctx) {
-  capture_thread_ctx.encoder_p = &software;
+  capture_thread_ctx.encoder_p = &encoders.front();
   capture_thread_ctx.reinit_event.reset();
 
   capture_thread_ctx.capture_ctx_queue = std::make_shared<safe::queue_t<capture_ctx_t>>();
@@ -417,6 +421,7 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
   auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
+  assert(video_format[encoder_t::PASSED]);
 
   auto codec = avcodec_find_encoder_by_name(video_format.name.c_str());
   if(!codec) {
@@ -448,8 +453,13 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
   ctx->gop_size = std::numeric_limits<int>::max();
   ctx->keyint_min = ctx->gop_size;
 
-  // Some client decoders have limits on the number of reference frames
-  ctx->refs = config.numRefFrames;
+  if(config.numRefFrames == 0) {
+    ctx->refs = video_format[encoder_t::REF_FRAMES_AUTOSELECT] ? 0 : 1;
+  }
+  else {
+    // Some client decoders have limits on the number of reference frames
+    ctx->refs = video_format[encoder_t::REF_FRAMES_RESTRICT] ? config.numRefFrames : 0;
+  }
 
   ctx->flags |= (AV_CODEC_FLAG_CLOSED_GOP | AV_CODEC_FLAG_LOW_DELAY);
   ctx->flags2 |= AV_CODEC_FLAG2_FAST;
@@ -778,6 +788,10 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
 
 bool validate_encoder(encoder_t &encoder) {
   std::shared_ptr<platf::display_t> disp;
+
+  encoder.h264.capabilities.set();
+  encoder.hevc.capabilities.set();
+
   // First, test encoder viability
   config_t config_max_ref_frames {
     1920, 1080,
@@ -808,6 +822,9 @@ bool validate_encoder(encoder_t &encoder) {
     return false;
   }
 
+  config_max_ref_frames.videoFormat = 1;
+  config_autoselect.videoFormat = 1;
+
   auto max_ref_frames_hevc = validate_config(disp, encoder, config_max_ref_frames);
   auto autoselect_hevc     = validate_config(disp, encoder, config_autoselect);
 

From 65f44cc88592b31b493a4e1acc324bb66f9aa468 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Tue, 7 Apr 2020 18:57:59 +0300
Subject: [PATCH 05/25] Fix encoder flags not set properly

---
 sunshine/platform/windows_dxgi.cpp | 15 ++++++++-------
 sunshine/video.cpp                 | 22 +++++++++-------------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 2e5dcb7a..978a2a7b 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -808,7 +808,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
       return capture_e::error;
     }
 
-    img->row_pitch = 0;
+    img->row_pitch = width * 4;
     img->width     = width;
     img->height    = height;
     img->data      = (std::uint8_t*)src_p;
@@ -833,18 +833,19 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
   int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
     auto img = (img_d3d_t*)img_base;
 
-    img->row_pitch = 4;
+    img->row_pitch = width * 4;
     D3D11_TEXTURE2D_DESC t {};
-    t.Width  = 1;
-    t.Height = 1;
+    t.Width  = width;
+    t.Height = height;
     t.MipLevels = 1;
     t.ArraySize = 1;
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
     t.Format = format;
 
+    auto dummy_data = std::make_unique<int[]>(width * height);
     D3D11_SUBRESOURCE_DATA data {
-      &dummy_data_p,
+      dummy_data.get(),
       (UINT)img->row_pitch,
       0
     };
@@ -857,8 +858,8 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     }
     img->texture.reset(tex_p);
 
-    img->height      = 1;
-    img->width       = 1;
+    img->height      = height;
+    img->width       = width;
     img->data        = (std::uint8_t*)tex_p;
     img->pixel_pitch = 4;
 
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 7896ad01..d2a1fa45 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -250,6 +250,7 @@ void captureThread(
     next_frame += delay;
 
     auto &img = *round_robin++;
+    while(img.use_count() > 1) {}
     platf::capture_e status;
     {
       auto lg = display_wp.lock();
@@ -626,6 +627,7 @@ void encode_run(
     // When Moonlight request an IDR frame, send frames even if there is no new captured frame
     if(frame_nr > (key_frame_nr + config.framerate) || images->peek()) {
       if(auto img = images->pop(delay)) {
+        const platf::img_t *img_p;
         if(encoder.system_memory) {
           auto new_width  = img->width;
           auto new_height = img->height;
@@ -646,17 +648,16 @@ void encode_run(
                                      0, 1 << 16, 1 << 16);
           }
 
-          encoder.img_to_frame(sws, *img, session->frame);
+          img_p = img;
         }
         else {
-          auto converted_img = hwdevice_ctx->convert(*img);
-          if(!converted_img) {
+          img_p = hwdevice_ctx->convert(*img);
+          if(!img_p) {
             return;
           }
- 
-          encoder.img_to_frame(sws, *converted_img, session->frame);
-
         }
+
+        encoder.img_to_frame(sws, *img_p, session->frame);
       }
       else if(images->running()) {
         continue;
@@ -843,13 +844,8 @@ bool validate_encoder(encoder_t &encoder) {
     h264.videoFormat = 0;
     hevc.videoFormat = 1;
 
-    if(validate_config(disp, encoder, h264)) {
-      encoder.h264[flag] = true;
-    }
-
-    if(validate_config(disp, encoder, hevc)) {
-      encoder.hevc[flag] = true;
-    }
+    encoder.h264[flag] = validate_config(disp, encoder, h264);
+    encoder.hevc[flag] = validate_config(disp, encoder, hevc);
   }
   
   return true;

From c21038af88e086f947cfbdd14fbc162fed8b83c3 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Wed, 8 Apr 2020 02:15:08 +0300
Subject: [PATCH 06/25] Encode video with nvenc

---
 sunshine/platform/common.h         |  4 ++
 sunshine/platform/windows_dxgi.cpp | 76 +++++++++++++++++++++---------
 sunshine/video.cpp                 | 64 ++++++++++++++-----------
 3 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index 2028a554..60d791b0 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -6,6 +6,7 @@
 #define SUNSHINE_COMMON_H
 
 #include <string>
+#include <mutex>
 #include "sunshine/utility.h"
 
 struct sockaddr;
@@ -66,6 +67,9 @@ public:
 struct hwdevice_ctx_t {
   void *hwdevice {};
 
+  // Could be nullptr, depends on the encoder
+  std::shared_ptr<std::recursive_mutex> lock;
+
   virtual const platf::img_t*const convert(platf::img_t &img) {
     return nullptr;
   }
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 978a2a7b..d461fa16 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -308,6 +308,7 @@ public:
     auto &processor_in = it->second;
 
     D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
+    std::lock_guard lg { *lock };
     auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
@@ -317,9 +318,12 @@ public:
     return &this->img;
   }
 
-  int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
+  int init(std::shared_ptr<std::recursive_mutex> &lock, std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
     HRESULT status;
 
+    this->lock = lock;
+    std::lock_guard lg { *lock };
+
     video::device_t::pointer vdevice_p;
     status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
     if(FAILED(status)) {
@@ -531,7 +535,6 @@ public:
       return -1;
     }
 
-
     DXGI_ADAPTER_DESC adapter_desc;
     adapter->GetDesc(&adapter_desc);
 
@@ -780,6 +783,7 @@ public:
 };
 
 class display_gpu_t : public display_base_t, public std::enable_shared_from_this<display_gpu_t> {
+public:
   capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
     auto img = (img_d3d_t*)img_base;
 
@@ -787,6 +791,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
 
     DXGI_OUTDUPL_FRAME_INFO frame_info;
 
+    std::lock_guard lg { *lock };
     resource_t::pointer res_p {};
     auto capture_status = dup.next_frame(frame_info, &res_p);
     resource_t res{res_p};
@@ -800,7 +805,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
       return capture_e::timeout;
     }
 
-    texture2d_t::pointer src_p{};
+    texture2d_t::pointer src_p {};
     status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
 
     if (FAILED(status)) {
@@ -808,11 +813,9 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
       return capture_e::error;
     }
 
-    img->row_pitch = width * 4;
-    img->width     = width;
-    img->height    = height;
-    img->data      = (std::uint8_t*)src_p;
-    img->texture.reset(src_p);
+    texture2d_t src { src_p };
+
+    device_ctx->CopyResource(img->texture.get(), src.get());
 
     return capture_e::ok;
   }
@@ -820,20 +823,6 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
   std::shared_ptr<platf::img_t> alloc_img() override {
     auto img = std::make_shared<img_d3d_t>();
 
-    img->data        = nullptr;
-    img->row_pitch   = 0;
-    img->pixel_pitch = 4;
-    img->width       = 0;
-    img->height      = 0;
-    img->display     = shared_from_this();
-
-    return img;
-  }
-
-  int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
-    auto img = (img_d3d_t*)img_base;
-
-    img->row_pitch = width * 4;
     D3D11_TEXTURE2D_DESC t {};
     t.Width  = width;
     t.Height = height;
@@ -843,6 +832,28 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     t.Usage = D3D11_USAGE_DEFAULT;
     t.Format = format;
 
+    dxgi::texture2d_t::pointer tex_p {};
+    auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return nullptr;
+    }
+
+    img->data        = (std::uint8_t*)tex_p;
+    img->row_pitch   = 0;
+    img->pixel_pitch = 4;
+    img->width       = 0;
+    img->height      = 0;
+    img->texture.reset(tex_p);
+    img->display     = shared_from_this();
+
+    return img;
+  }
+
+  int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
+    auto img = (img_d3d_t*)img_base;
+
+    img->row_pitch = width * 4;
     auto dummy_data = std::make_unique<int[]>(width * height);
     D3D11_SUBRESOURCE_DATA data {
       dummy_data.get(),
@@ -850,14 +861,24 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
       0
     };
 
+    D3D11_TEXTURE2D_DESC t {};
+    t.Width  = width;
+    t.Height = height;
+    t.MipLevels = 1;
+    t.ArraySize = 1;
+    t.SampleDesc.Count = 1;
+    t.Usage = D3D11_USAGE_DEFAULT;
+    t.Format = format;
+
     dxgi::texture2d_t::pointer tex_p {};
     auto status = device->CreateTexture2D(&t, &data, &tex_p);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
-    img->texture.reset(tex_p);
 
+    img->data        = (std::uint8_t*)tex_p;
+    img->texture.reset(tex_p);
     img->height      = height;
     img->width       = width;
     img->data        = (std::uint8_t*)tex_p;
@@ -866,10 +887,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
     return 0;
   }
 
+  int init() {
+    lock = std::make_shared<std::recursive_mutex>();
+    std::lock_guard lg { *lock };
+    return display_base_t::init();
+  }
+
   std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
     auto hwdevice = std::make_shared<hwdevice_ctx_t>();
 
     auto ret = hwdevice->init(
+      lock,
       shared_from_this(),
       device.get(),
       device_ctx.get(),
@@ -882,6 +910,8 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this
 
     return hwdevice;
   }
+
+  std::shared_ptr<std::recursive_mutex> lock;
 };
 
 const char *format_str[] = {
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index d2a1fa45..1d5ae97d 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -38,7 +38,6 @@ void free_packet(AVPacket *packet) {
 }
 
 using ctx_t       = util::safe_ptr<AVCodecContext, free_ctx>;
-using codec_t     = util::safe_ptr_v2<AVCodecContext, int, avcodec_close>;
 using frame_t     = util::safe_ptr<AVFrame, free_frame>;
 using buffer_t    = util::safe_ptr<AVBufferRef, free_buffer>;
 using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
@@ -98,7 +97,6 @@ struct session_t {
   buffer_t hwdevice;
 
   ctx_t ctx;
-  codec_t codec_handle;
 
   frame_t frame;
 
@@ -112,10 +110,10 @@ static encoder_t nvenc {
   AV_PIX_FMT_D3D11,
   AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
   {
-    { {"force-idr"s, 1} }, "hevc_nvenc"s
+    { {"forced-idr"s, 1} }, "hevc_nvenc"s
   },
   {
-    { {"force-idr"s, 1} }, "h264_nvenc"s
+    { {"forced-idr"s, 1}, { "preset"s , 9} }, "h264_nvenc"s
   },
   false,
 
@@ -169,12 +167,6 @@ struct capture_thread_ctx_t {
   util::sync_t<std::weak_ptr<platf::display_t>> display_wp;
 };
 
-[[nodiscard]] codec_t open_codec(ctx_t &ctx, AVCodec *codec, AVDictionary **options) {
-  avcodec_open2(ctx.get(), codec, options);
-
-  return codec_t { ctx.get() };
-}
-
 void reset_display(std::shared_ptr<platf::display_t> &disp, AVHWDeviceType type) {
   // We try this twice, in case we still get an error on reinitialization
   for(int x = 0; x < 2; ++x) {
@@ -251,11 +243,8 @@ void captureThread(
 
     auto &img = *round_robin++;
     while(img.use_count() > 1) {}
-    platf::capture_e status;
-    {
-      auto lg = display_wp.lock();
-      status = disp->snapshot(img.get(), display_cursor);
-    }
+
+    auto status = disp->snapshot(img.get(), display_cursor);
     switch (status) {
       case platf::capture_e::reinit: {
         reinit_event.raise(true);
@@ -552,7 +541,7 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     av_dict_set_int(&options, "qp", config::video.qp, 0);
   }
 
-  auto codec_handle = open_codec(ctx, codec, &options);
+  avcodec_open2(ctx.get(), codec, &options);
 
   frame_t frame {av_frame_alloc() };
   frame->format = ctx->pix_fmt;
@@ -570,7 +559,6 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
   return std::make_optional(session_t {
     std::move(hwdevice),
     std::move(ctx),
-    std::move(codec_handle),
     std::move(frame),
     sw_fmt,
     sws_color_space
@@ -648,7 +636,7 @@ void encode_run(
                                      0, 1 << 16, 1 << 16);
           }
 
-          img_p = img;
+          img_p = img.get();
         }
         else {
           img_p = hwdevice_ctx->convert(*img);
@@ -667,7 +655,16 @@ void encode_run(
       }
     }
 
-    if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
+    int err;
+    if(hwdevice_ctx && hwdevice_ctx->lock) {
+      std::lock_guard lg { *hwdevice_ctx->lock };
+      err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
+    }
+    else {
+      err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
+    }
+    
+    if(err) {
       BOOST_LOG(fatal) << "Could not encode video packet"sv;
       log_flush();
       std::abort();
@@ -710,20 +707,20 @@ void capture(
   int key_frame_nr = 1;
   while(!shutdown_event->peek() && images->running()) {
     // Wait for the display to be ready
-    std::shared_ptr<platf::hwdevice_ctx_t> hwdevice_ctx;
+    std::shared_ptr<platf::display_t> display;
     {
       auto lg = ref->display_wp.lock();
       if(ref->display_wp->expired()) {
         continue;
       }
 
-      auto display = ref->display_wp->lock();
+      display = ref->display_wp->lock();
+    }
 
-      auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
-      hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
-      if(!hwdevice_ctx) {
-        return;
-      }
+    auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
+    auto hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+    if(!hwdevice_ctx) {
+      return;
     }
 
     encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
@@ -886,7 +883,7 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
 void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   // Need to have something refcounted
   if(!frame->buf[0]) {
-    frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*));
+    frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor));
   }
 
   auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data;
@@ -902,13 +899,24 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   frame->width = img.width;
 }
 
+void nvenc_lock(void *lock_p) {
+  ((std::recursive_mutex*)lock_p)->lock();
+}
+void nvenc_unlock(void *lock_p) {
+  ((std::recursive_mutex*)lock_p)->unlock();
+}
+
 util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
   buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
   auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
-
+  
   std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
   std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
 
+  ctx->lock_ctx = hwdevice_ctx->lock.get();
+  ctx->lock = nvenc_lock;
+  ctx->unlock = nvenc_unlock;
+
   auto err = av_hwdevice_ctx_init(ctx_buf.get());
   if(err) {
     char err_str[AV_ERROR_MAX_STRING_SIZE] {0};

From 7edaa0cce0dfc6e486327743f70885b79c7f4aca Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Fri, 10 Apr 2020 15:39:50 +0300
Subject: [PATCH 07/25] Encode with nvenc smoothly

---
 sunshine/platform/common.h         |  18 +--
 sunshine/platform/windows.cpp      |   4 +
 sunshine/platform/windows_dxgi.cpp |  77 +++++-----
 sunshine/round_robin.h             |   8 ++
 sunshine/stream.cpp                |  28 ++--
 sunshine/video.cpp                 | 220 +++++++++++++++++++++--------
 6 files changed, 228 insertions(+), 127 deletions(-)

diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index 60d791b0..d8a8171e 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -67,12 +67,10 @@ public:
 struct hwdevice_ctx_t {
   void *hwdevice {};
 
-  // Could be nullptr, depends on the encoder
-  std::shared_ptr<std::recursive_mutex> lock;
-
   virtual const platf::img_t*const convert(platf::img_t &img) {
     return nullptr;
   }
+  virtual void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {};
 
   virtual ~hwdevice_ctx_t() = default;
 };
@@ -86,18 +84,10 @@ enum class capture_e : int {
 
 class display_t {
 public:
-  virtual capture_e snapshot(img_t *img, bool cursor) = 0;
+  virtual capture_e snapshot(img_t *img, std::chrono::milliseconds timeout, bool cursor) = 0;
   virtual std::shared_ptr<img_t> alloc_img() = 0;
 
-  virtual int dummy_img(img_t *img, int &dummy_data_p) {
-    img->row_pitch   = 4;
-    img->height      = 1;
-    img->width       = 1;
-    img->pixel_pitch = 4;
-    img->data        = (std::uint8_t*)&dummy_data_p;
-
-    return 0;
-  }
+  virtual int dummy_img(img_t *img) = 0;
 
   virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
     return std::make_shared<hwdevice_ctx_t>();
@@ -137,6 +127,8 @@ int alloc_gamepad(input_t &input, int nr);
 void free_gamepad(input_t &input, int nr);
 
 [[nodiscard]] std::unique_ptr<deinit_t> init();
+
+int thread_priority();
 }
 
 #endif //SUNSHINE_COMMON_H
diff --git a/sunshine/platform/windows.cpp b/sunshine/platform/windows.cpp
index 98436ec7..3a113df8 100755
--- a/sunshine/platform/windows.cpp
+++ b/sunshine/platform/windows.cpp
@@ -331,6 +331,10 @@ void gamepad(input_t &input, int nr, const gamepad_state_t &gamepad_state) {
   }
 }
 
+int thread_priority()  {
+  return SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST) ? 0 : 1;
+}
+
 void freeInput(void *p) {
   auto vigem = (vigem_t*)p;
 
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index d461fa16..9e95942a 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -8,6 +8,7 @@ extern "C" {
 
 #include <dxgi.h>
 #include <d3d11.h>
+#include <d3d11_4.h>
 #include <d3dcommon.h>
 #include <dxgi1_2.h>
 
@@ -26,17 +27,17 @@ void Release(T *dxgi) {
   dxgi->Release();
 }
 
-using factory1_t   = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory1>>;
-using dxgi_t       = util::safe_ptr<IDXGIDevice, Release<IDXGIDevice>>;
-using dxgi1_t      = util::safe_ptr<IDXGIDevice1, Release<IDXGIDevice1>>;
-using device_t     = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
-using device_ctx_t = util::safe_ptr<ID3D11DeviceContext, Release<ID3D11DeviceContext>>;
-using adapter_t    = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
-using output_t     = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
-using output1_t    = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
-using dup_t        = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
-using texture2d_t  = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
-using resource_t   = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
+using factory1_t    = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory1>>;
+using dxgi_t        = util::safe_ptr<IDXGIDevice, Release<IDXGIDevice>>;
+using dxgi1_t       = util::safe_ptr<IDXGIDevice1, Release<IDXGIDevice1>>;
+using device_t      = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
+using device_ctx_t  = util::safe_ptr<ID3D11DeviceContext, Release<ID3D11DeviceContext>>;
+using adapter_t     = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
+using output_t      = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
+using output1_t     = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
+using dup_t         = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
+using texture2d_t   = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
+using resource_t    = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
 
 namespace video {
 using device_t         = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
@@ -54,13 +55,13 @@ public:
   dup_t dup;
   bool has_frame {};
 
-  capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, resource_t::pointer *res_p) {
+  capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p) {
     auto capture_status = release_frame();
     if(capture_status != capture_e::ok) {
       return capture_status;
     }
 
-    auto status = dup->AcquireNextFrame(1000, &frame_info, res_p);
+    auto status = dup->AcquireNextFrame(timeout.count(), &frame_info, res_p);
 
     switch(status) {
       case S_OK:
@@ -300,7 +301,8 @@ public:
       video::processor_in_t::pointer processor_in_p;
       auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p);
       if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
+        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv
+         << util::hex(status).to_string_view() << ']';
         return nullptr;
       }
       it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
@@ -308,7 +310,7 @@ public:
     auto &processor_in = it->second;
 
     D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
-    std::lock_guard lg { *lock };
+
     auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
@@ -318,11 +320,13 @@ public:
     return &this->img;
   }
 
-  int init(std::shared_ptr<std::recursive_mutex> &lock, std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
-    HRESULT status;
+  void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
+    colorspace |= (color_range >> 4);
+    ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace);
+  }
 
-    this->lock = lock;
-    std::lock_guard lg { *lock };
+  int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
+    HRESULT status;
 
     video::device_t::pointer vdevice_p;
     status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
@@ -344,7 +348,7 @@ public:
       D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
       { 1, 1 }, (UINT)in_width, (UINT)in_height,
       { 1, 1 }, (UINT)out_width, (UINT)out_height,
-      D3D11_VIDEO_USAGE_PLAYBACK_NORMAL
+      D3D11_VIDEO_USAGE_OPTIMAL_QUALITY
     };
 
     video::processor_enum_t::pointer vp_e_p;
@@ -632,7 +636,7 @@ public:
 
 class display_cpu_t : public display_base_t {
 public:
-  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
+  capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override {
     auto img = (img_t*)img_base;
 
     HRESULT status;
@@ -640,7 +644,7 @@ public:
     DXGI_OUTDUPL_FRAME_INFO frame_info;
 
     resource_t::pointer res_p {};
-    auto capture_status = dup.next_frame(frame_info, &res_p);
+    auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
     resource_t res{res_p};
 
     if (capture_status != capture_e::ok) {
@@ -736,10 +740,14 @@ public:
     return img;
   }
 
-  int dummy_img(platf::img_t *img, int &) override {
-    auto dummy_data_p = new int[1];
+  int dummy_img(platf::img_t *img) override {
+    img->data        = new std::uint8_t[4];
+    img->row_pitch   = 4;
+    img->pixel_pitch = 4;
+    img->width       = 1;
+    img->height      = 1;
 
-    return platf::display_t::dummy_img(img, *dummy_data_p);
+    return 0;
   }
 
   int init() {
@@ -784,23 +792,22 @@ public:
 
 class display_gpu_t : public display_base_t, public std::enable_shared_from_this<display_gpu_t> {
 public:
-  capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
+  capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override {
     auto img = (img_d3d_t*)img_base;
 
     HRESULT status;
 
     DXGI_OUTDUPL_FRAME_INFO frame_info;
 
-    std::lock_guard lg { *lock };
     resource_t::pointer res_p {};
-    auto capture_status = dup.next_frame(frame_info, &res_p);
+    auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
     resource_t res{res_p};
 
     if (capture_status != capture_e::ok) {
       return capture_status;
     }
 
-    const bool update_flag = frame_info.LastPresentTime.QuadPart != 0;
+    const bool update_flag = frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0;
     if(!update_flag) {
       return capture_e::timeout;
     }
@@ -814,7 +821,6 @@ public:
     }
 
     texture2d_t src { src_p };
-
     device_ctx->CopyResource(img->texture.get(), src.get());
 
     return capture_e::ok;
@@ -850,7 +856,7 @@ public:
     return img;
   }
 
-  int dummy_img(platf::img_t *img_base, int &dummy_data_p) override {
+  int dummy_img(platf::img_t *img_base) override {
     auto img = (img_d3d_t*)img_base;
 
     img->row_pitch = width * 4;
@@ -887,17 +893,10 @@ public:
     return 0;
   }
 
-  int init() {
-    lock = std::make_shared<std::recursive_mutex>();
-    std::lock_guard lg { *lock };
-    return display_base_t::init();
-  }
-
   std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
     auto hwdevice = std::make_shared<hwdevice_ctx_t>();
 
     auto ret = hwdevice->init(
-      lock,
       shared_from_this(),
       device.get(),
       device_ctx.get(),
@@ -910,8 +909,6 @@ public:
 
     return hwdevice;
   }
-
-  std::shared_ptr<std::recursive_mutex> lock;
 };
 
 const char *format_str[] = {
diff --git a/sunshine/round_robin.h b/sunshine/round_robin.h
index b3ae4475..47e125db 100755
--- a/sunshine/round_robin.h
+++ b/sunshine/round_robin.h
@@ -118,6 +118,14 @@ public:
     }
   }
 
+  void dec() {
+    if(_pos == _begin)  {
+      _pos = _end;
+    }
+    
+    --_pos;
+  }
+
   bool eq(const round_robin_t &other) const {
     return *_pos == *other._pos;
   }
diff --git a/sunshine/stream.cpp b/sunshine/stream.cpp
index c692d7c0..17013b11 100644
--- a/sunshine/stream.cpp
+++ b/sunshine/stream.cpp
@@ -604,21 +604,21 @@ void videoBroadcastThread(safe::signal_t *shutdown_event, udp::socket &sock, vid
 
     payload = {(char *) payload_new.data(), payload_new.size()};
 
-    // make sure moonlight recognizes the nalu code for IDR frames
-    if (packet->flags & AV_PKT_FLAG_KEY) {
-      // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix
-      std::string_view frame_old = "\000\000\001e"sv;
-      std::string_view frame_new = "\000\000\000\001e"sv;
-      if(session->config.monitor.videoFormat != 0) {
-        frame_old = "\000\000\001("sv;
-        frame_new = "\000\000\000\001("sv;
-      }
+    // // make sure moonlight recognizes the nalu code for IDR frames
+    // if (packet->flags & AV_PKT_FLAG_KEY) {
+    //   // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix
+    //   std::string_view frame_old = "\000\000\001e"sv;
+    //   std::string_view frame_new = "\000\000\000\001e"sv;
+    //   if(session->config.monitor.videoFormat != 0) {
+    //     frame_old = "\000\000\001("sv;
+    //     frame_new = "\000\000\000\001("sv;
+    //   }
 
-      assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) ==
-             std::end(payload));
-      payload_new = replace(payload, frame_old, frame_new);
-      payload = {(char *) payload_new.data(), payload_new.size()};
-    }
+    //   assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) ==
+    //          std::end(payload));
+    //   payload_new = replace(payload, frame_old, frame_new);
+    //   payload = {(char *) payload_new.data(), payload_new.size()};
+    // }
 
     // insert packet headers
     auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE;
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 1d5ae97d..3530f481 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -113,7 +113,12 @@ static encoder_t nvenc {
     { {"forced-idr"s, 1} }, "hevc_nvenc"s
   },
   {
-    { {"forced-idr"s, 1}, { "preset"s , 9} }, "h264_nvenc"s
+    {
+      { "forced-idr"s, 1},
+      { "profile"s, "high"s },
+      { "preset"s , "llhp" },
+      { "rc"s, "cbr_ld_hq"s },
+    }, "h264_nvenc"s
   },
   false,
 
@@ -209,9 +214,8 @@ void captureThread(
   display_wp = disp;
 
   std::vector<std::shared_ptr<platf::img_t>> imgs(12);
-  auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs) +1, std::end(imgs));
+  auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs), std::end(imgs));
 
-  int dummy_data = 0;
   for(auto &img : imgs) {
     img = disp->alloc_img();
     if(!img) {
@@ -219,9 +223,11 @@ void captureThread(
       return;
     }
   }
-  auto &dummy_img = imgs.front();
-  if(disp->dummy_img(dummy_img.get(), dummy_data)) {
-    return;
+
+  if(auto capture_ctx = capture_ctx_queue->pop())  {
+    capture_ctxs.emplace_back(std::move(*capture_ctx));
+
+    delay = capture_ctxs.back().delay;
   }
 
   auto next_frame = std::chrono::steady_clock::now();
@@ -229,22 +235,15 @@ void captureThread(
     while(capture_ctx_queue->peek()) {
       capture_ctxs.emplace_back(std::move(*capture_ctx_queue->pop()));
 
-      // Temporary image to ensure something is send to Moonlight even if no frame has been captured yet.
-      capture_ctxs.back().images->raise(dummy_img);
-
       delay = std::min(delay, capture_ctxs.back().delay);
     }
 
     auto now = std::chrono::steady_clock::now();
-    if(next_frame > now) {
-      std::this_thread::sleep_until(next_frame);
-    }
-    next_frame += delay;
 
     auto &img = *round_robin++;
     while(img.use_count() > 1) {}
 
-    auto status = disp->snapshot(img.get(), display_cursor);
+    auto status = disp->snapshot(img.get(), 1000ms, display_cursor);
     switch (status) {
       case platf::capture_e::reinit: {
         reinit_event.raise(true);
@@ -276,16 +275,14 @@ void captureThread(
             return;
           }
         }
-        if(disp->dummy_img(dummy_img.get(), dummy_data)) {
-          return;
-        }
 
         reinit_event.reset();
         continue;
       }
       case platf::capture_e::error:
-       return;
+        return;
       case platf::capture_e::timeout:
+        std::this_thread::sleep_for(1ms);
         continue;
       case platf::capture_e::ok:
         break;
@@ -310,9 +307,36 @@ void captureThread(
       capture_ctx->images->raise(img);
       ++capture_ctx;
     })
+
+    if(next_frame > now) {
+      std::this_thread::sleep_until(next_frame);
+    }
+    next_frame += delay;
   }
 }
 
+int start_capture(capture_thread_ctx_t &capture_thread_ctx) {
+  capture_thread_ctx.encoder_p = &encoders.front();
+  capture_thread_ctx.reinit_event.reset();
+
+  capture_thread_ctx.capture_ctx_queue = std::make_shared<safe::queue_t<capture_ctx_t>>();
+
+  capture_thread_ctx.capture_thread = std::thread {
+    captureThread,
+    capture_thread_ctx.capture_ctx_queue,
+    std::ref(capture_thread_ctx.display_wp),
+    std::ref(capture_thread_ctx.reinit_event),
+    std::ref(*capture_thread_ctx.encoder_p)
+  };
+
+  return 0;
+}
+void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
+  capture_thread_ctx.capture_ctx_queue->stop();
+
+  capture_thread_ctx.capture_thread.join();
+}
+
 util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
   buffer_t ctx;
 
@@ -385,28 +409,6 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
   return 0;
 }
 
-int start_capture(capture_thread_ctx_t &capture_thread_ctx) {
-  capture_thread_ctx.encoder_p = &encoders.front();
-  capture_thread_ctx.reinit_event.reset();
-
-  capture_thread_ctx.capture_ctx_queue = std::make_shared<safe::queue_t<capture_ctx_t>>();
-
-  capture_thread_ctx.capture_thread = std::thread {
-    captureThread,
-    capture_thread_ctx.capture_ctx_queue,
-    std::ref(capture_thread_ctx.display_wp),
-    std::ref(capture_thread_ctx.reinit_event),
-    std::ref(*capture_thread_ctx.encoder_p)
-  };
-
-  return 0;
-}
-void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
-  capture_thread_ctx.capture_ctx_queue->stop();
-
-  capture_thread_ctx.capture_thread.join();
-}
-
 std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
@@ -505,6 +507,8 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
       return std::nullopt;
     }
+
+    ctx->slices = config.slicesPerFrame;
   }
   else /* software */ {
     ctx->pix_fmt = sw_fmt;
@@ -530,7 +534,7 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
   if(config.bitrate > 500) {
     auto bitrate = config.bitrate * 1000;
     ctx->rc_max_rate = bitrate;
-    ctx->rc_buffer_size = bitrate / 100;
+    ctx->rc_buffer_size = bitrate / config.framerate;
     ctx->bit_rate = bitrate;
     ctx->rc_min_rate = bitrate;
   }
@@ -582,6 +586,8 @@ void encode_run(
     return;
   }
 
+  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
+
   auto delay = std::chrono::floor<std::chrono::nanoseconds>(1s) / config.framerate;
 
   auto img_width  = 0;
@@ -654,17 +660,8 @@ void encode_run(
         break;
       }
     }
-
-    int err;
-    if(hwdevice_ctx && hwdevice_ctx->lock) {
-      std::lock_guard lg { *hwdevice_ctx->lock };
-      err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
-    }
-    else {
-      err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
-    }
     
-    if(err) {
+    if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
       BOOST_LOG(fatal) << "Could not encode video packet"sv;
       log_flush();
       std::abort();
@@ -681,6 +678,110 @@ void capture(
   config_t config,
   void *channel_data) {
 
+  auto lg = util::fail_guard([&]() {
+    shutdown_event->raise(true);
+  });
+
+  const auto &encoder = encoders.front();
+  auto disp = platf::display(encoder.dev_type);
+  if(!disp) {
+    return;
+  }
+
+  auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
+  auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+  if(!hwdevice_ctx) {
+    return;
+  }
+
+  auto session = make_session(encoder, config, hwdevice_ctx.get());
+  if(!session) {
+    return;
+  }
+  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
+
+  auto img = disp->alloc_img();
+  if(disp->dummy_img(img.get())) {
+    return;
+  }
+
+  const platf::img_t* img_p = hwdevice_ctx->convert(*img);
+  if(!img_p) {
+    return;
+  }
+
+  sws_t sws;
+  encoder.img_to_frame(sws, *img_p, session->frame);
+
+  std::vector<std::shared_ptr<platf::img_t>> imgs(12);
+  for(auto &img : imgs) {
+    img = disp->alloc_img();
+  }
+
+  auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs), std::end(imgs));
+
+  int frame_nr = 1;
+  int key_frame_nr = 1;
+
+  auto max_delay = 1000ms / config.framerate;
+  
+  std::shared_ptr<platf::img_t> img_tmp;
+  auto next_frame = std::chrono::steady_clock::now();
+  while(!shutdown_event->peek()) {
+    if(idr_events->peek()) {
+      session->frame->pict_type = AV_PICTURE_TYPE_I;
+
+      auto event = idr_events->pop();
+      TUPLE_2D_REF(_, end, *event);
+
+      frame_nr = end;
+      key_frame_nr = end + config.framerate;
+    }
+    else if(frame_nr == key_frame_nr) {
+      session->frame->pict_type = AV_PICTURE_TYPE_I;
+    }
+
+    auto delay = std::max(0ms, std::chrono::duration_cast<std::chrono::milliseconds>(next_frame - std::chrono::steady_clock::now()));
+
+    auto status = disp->snapshot(round_robin->get(), delay, display_cursor);
+    switch(status)  {
+      case platf::capture_e::reinit:
+        return;
+      case platf::capture_e::error:
+        return;
+      case platf::capture_e::timeout:
+        next_frame += max_delay;
+        if(!img_tmp && frame_nr > (key_frame_nr + config.framerate))  {
+          continue;
+        }
+
+        break;
+      case platf::capture_e::ok:
+        img_tmp = *round_robin++;
+        break;
+    }
+
+    if(img_tmp) {
+      img_p = hwdevice_ctx->convert(*img_tmp);
+      img_tmp.reset();
+    }
+    
+    if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
+      BOOST_LOG(fatal) << "Could not encode video packet"sv;
+      log_flush();
+      std::abort();
+    }
+
+    session->frame->pict_type = AV_PICTURE_TYPE_NONE;
+  }
+}
+void capture_async(
+  safe::signal_t *shutdown_event,
+  packet_queue_t packets,
+  idr_event_t idr_events,
+  config_t config,
+  void *channel_data) {
+
   auto images = std::make_shared<img_event_t::element_type>();
   auto lg = util::fail_guard([&]() {
     images->stop();
@@ -723,6 +824,12 @@ void capture(
       return;
     }
 
+    auto dummy_img = display->alloc_img();
+    if(display->dummy_img(dummy_img.get())) {
+      return;
+    }
+    images->raise(std::move(dummy_img));
+
     encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
   }
 }
@@ -733,7 +840,6 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
     return false;
   }
 
-
   auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
   auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
   if(!hwdevice_ctx) {
@@ -744,10 +850,10 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
   if(!session) {
     return false;
   }
+  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
-  int dummy_data;
   auto img = disp->alloc_img();
-  if(disp->dummy_img(img.get(), dummy_data)) {
+  if(disp->dummy_img(img.get())) {
     return false;
   }
 
@@ -900,10 +1006,8 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
 }
 
 void nvenc_lock(void *lock_p) {
-  ((std::recursive_mutex*)lock_p)->lock();
 }
 void nvenc_unlock(void *lock_p) {
-  ((std::recursive_mutex*)lock_p)->unlock();
 }
 
 util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
@@ -913,10 +1017,6 @@ util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwde
   std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
   std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
 
-  ctx->lock_ctx = hwdevice_ctx->lock.get();
-  ctx->lock = nvenc_lock;
-  ctx->unlock = nvenc_unlock;
-
   auto err = av_hwdevice_ctx_init(ctx_buf.get());
   if(err) {
     char err_str[AV_ERROR_MAX_STRING_SIZE] {0};

From 679f74e53c927ca7d558dcf899dbfc60f54affc6 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Sun, 12 Apr 2020 02:33:17 +0300
Subject: [PATCH 08/25] Fix multicasting for nvenc

---
 sunshine/platform/common.h         |  12 +-
 sunshine/platform/windows_dxgi.cpp |  31 ++-
 sunshine/thread_safe.h             |  31 ++-
 sunshine/video.cpp                 | 352 +++++++++++++++++++++++------
 4 files changed, 332 insertions(+), 94 deletions(-)

diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index d8a8171e..e1b3c500 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -29,9 +29,17 @@ constexpr std::uint16_t B            = 0x2000;
 constexpr std::uint16_t X            = 0x4000;
 constexpr std::uint16_t Y            = 0x8000;
 
+enum class dev_type_e {
+  none,
+  dxgi,
+  unknown
+};
+
 enum class pix_fmt_e {
   yuv420p,
-  yuv420p10
+  yuv420p10,
+  nv12,
+  unknown
 };
 
 struct gamepad_state_t {
@@ -114,7 +122,7 @@ std::string from_sockaddr(const sockaddr *const);
 std::pair<std::uint16_t, std::string> from_sockaddr_ex(const sockaddr *const);
 
 std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate);
-std::shared_ptr<display_t> display(int hwdevice_type);
+std::shared_ptr<display_t> display(dev_type_e hwdevice_type);
 
 input_t input();
 void move_mouse(input_t &input, int deltaX, int deltaY);
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 9e95942a..3988b344 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -38,6 +38,7 @@ using output1_t     = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
 using dup_t         = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
 using texture2d_t   = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
 using resource_t    = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
+using multithread_t = util::safe_ptr<ID3D11Multithread, Release<ID3D11Multithread>>;
 
 namespace video {
 using device_t         = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
@@ -894,6 +895,12 @@ public:
   }
 
   std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
+    if(pix_fmt != platf::pix_fmt_e::nv12) {
+      BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
+
+      return nullptr;
+    }
+
     auto hwdevice = std::make_shared<hwdevice_ctx_t>();
 
     auto ret = hwdevice->init(
@@ -909,6 +916,24 @@ public:
 
     return hwdevice;
   }
+
+  int init() {
+    if(display_base_t::init()) {
+      return -1;
+    }
+
+    multithread_t::pointer multithread_p  {};
+    auto status = device->QueryInterface(__uuidof(multithread_t::element_type), (void**)&multithread_p);
+    multithread_t multithread { multithread_p };
+
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Couldn't query Multithread interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    multithread->SetMultithreadProtected(true);
+
+    return 0;
+  }
 };
 
 const char *format_str[] = {
@@ -1039,15 +1064,15 @@ const char *format_str[] = {
 }
 
 namespace platf {
-std::shared_ptr<display_t> display(int hwdevice_type) {
-  if(hwdevice_type == AV_HWDEVICE_TYPE_D3D11VA) {
+std::shared_ptr<display_t> display(platf::dev_type_e hwdevice_type) {
+  if(hwdevice_type == platf::dev_type_e::dxgi) {
     auto disp = std::make_shared<dxgi::display_gpu_t>();
 
     if(!disp->init()) {
       return disp;
     }
   }
-  else {
+  else if(hwdevice_type == platf::dev_type_e::none) {
     auto disp = std::make_shared<dxgi::display_cpu_t>();
 
     if(!disp->init()) {
diff --git a/sunshine/thread_safe.h b/sunshine/thread_safe.h
index c593fcff..1d4f12e6 100644
--- a/sunshine/thread_safe.h
+++ b/sunshine/thread_safe.h
@@ -33,7 +33,7 @@ public:
 
   // pop and view shoud not be used interchangebly
   status_t pop() {
-    std::unique_lock ul{_lock};
+    std::unique_lock ul{ _lock };
 
     if (!_continue) {
       return util::false_v<status_t>;
@@ -55,7 +55,7 @@ public:
   // pop and view shoud not be used interchangebly
   template<class Rep, class Period>
   status_t pop(std::chrono::duration<Rep, Period> delay) {
-    std::unique_lock ul{_lock};
+    std::unique_lock ul{ _lock };
 
     if (!_continue) {
       return util::false_v<status_t>;
@@ -74,7 +74,7 @@ public:
 
   // pop and view shoud not be used interchangebly
   const status_t &view() {
-    std::unique_lock ul{_lock};
+    std::unique_lock ul{ _lock };
 
     if (!_continue) {
       return util::false_v<status_t>;
@@ -98,7 +98,7 @@ public:
   }
 
   void stop() {
-    std::lock_guard lg{_lock};
+    std::lock_guard lg{ _lock };
 
     _continue = false;
 
@@ -106,7 +106,7 @@ public:
   }
 
   void reset() {
-    std::lock_guard lg{_lock};
+    std::lock_guard lg{ _lock };
 
     _continue = true;
 
@@ -118,8 +118,8 @@ public:
   }
 private:
 
-  bool _continue{true};
-  status_t _status;
+  bool _continue { true };
+  status_t _status { util::false_v<status_t> };
 
   std::condition_variable _cv;
   std::mutex _lock;
@@ -170,7 +170,7 @@ public:
   }
 
   status_t pop() {
-    std::unique_lock ul{_lock};
+    std::unique_lock ul{ _lock };
 
     if (!_continue) {
       return util::false_v<status_t>;
@@ -191,11 +191,12 @@ public:
   }
 
   std::vector<T> &unsafe() {
+    std::lock_guard  { _lock };
     return _queue;
   }
 
   void stop() {
-    std::lock_guard lg{_lock};
+    std::lock_guard lg{ _lock };
 
     _continue = false;
 
@@ -208,7 +209,7 @@ public:
 
 private:
 
-  bool _continue{true};
+  bool _continue{ true };
 
   std::mutex _lock;
   std::condition_variable _cv;
@@ -274,9 +275,8 @@ public:
 
     void release() {
       std::lock_guard lg { owner->_lock };
-      auto c = owner->_count.fetch_sub(1, std::memory_order_acquire);
 
-      if(c - 1 == 0) {
+      if(!--owner->_count) {
         owner->_destruct(*get());
         (*this)->~element_type();
       }
@@ -296,10 +296,9 @@ public:
   template<class FC, class FD>
   shared_t(FC && fc, FD &&fd) : _construct { std::forward<FC>(fc) }, _destruct { std::forward<FD>(fd) } {}
   [[nodiscard]] ptr_t ref() {
-    auto c = _count.fetch_add(1, std::memory_order_acquire);
-    if(!c) {
-      std::lock_guard lg { _lock };
+    std::lock_guard lg { _lock };
 
+    if(!_count++) {
       new(_object_buf.data()) element_type;
       if(_construct(*reinterpret_cast<element_type*>(_object_buf.data()))) {
         return ptr_t { nullptr };
@@ -314,7 +313,7 @@ private:
 
   std::array<std::uint8_t, sizeof(element_type)> _object_buf;
 
-  std::atomic<std::uint32_t> _count;
+  std::uint32_t _count;
   std::mutex _lock;
 };
 
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 3530f481..d4bbc936 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -37,6 +37,36 @@ void free_packet(AVPacket *packet) {
   av_packet_free(&packet);
 }
 
+namespace nv {
+enum class preset_e : int {
+    _default = 0,
+    slow,
+    medium,
+    fast,
+    hp,
+    hq,
+    bd,
+    ll_default,
+    llhq,
+    llhp,
+    lossless_default, // lossless presets must be the last ones
+    lossless_hp,
+};
+
+enum class profile_h264_e : int {
+  baseline,
+  main,
+  high,
+  high_444p,
+};
+
+enum class profile_hevc_e : int {
+  main,
+  main_10,
+  rext,
+};
+}
+
 using ctx_t       = util::safe_ptr<AVCodecContext, free_ctx>;
 using frame_t     = util::safe_ptr<AVFrame, free_frame>;
 using buffer_t    = util::safe_ptr<AVBufferRef, free_buffer>;
@@ -104,8 +134,41 @@ struct session_t {
   int sws_color_format;
 };
 
+struct encode_session_ctx_t {
+  safe::signal_t *shutdown_event;
+  safe::signal_t *join_event;
+  packet_queue_t packets;
+  idr_event_t idr_events;
+  config_t config;
+  int frame_nr;
+  int key_frame_nr;
+  void *channel_data;
+};
+
+struct encode_session_t {
+  encode_session_ctx_t *ctx;
+  
+  std::chrono::steady_clock::time_point next_frame;
+  std::chrono::milliseconds delay;
+
+  platf::img_t *img_tmp;
+  std::shared_ptr<platf::hwdevice_ctx_t> hwdevice;
+  session_t session;
+};
+
+using encode_session_ctx_queue_t = safe::queue_t<encode_session_ctx_t>;
+using encode_e = platf::capture_e;
+
+struct capture_synced_ctx_t {
+  encode_session_ctx_queue_t encode_session_ctx_queue;
+};
+
+int start_capture_sync(capture_synced_ctx_t &ctx);
+void end_capture_sync(capture_synced_ctx_t &ctx);
+auto capture_thread_sync = safe::make_shared<capture_synced_ctx_t>(start_capture_sync, end_capture_sync);
+
 static encoder_t nvenc {
-  { 2, 0, 1 },
+  { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
   AV_HWDEVICE_TYPE_D3D11VA,
   AV_PIX_FMT_D3D11,
   AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
@@ -115,9 +178,7 @@ static encoder_t nvenc {
   {
     {
       { "forced-idr"s, 1},
-      { "profile"s, "high"s },
-      { "preset"s , "llhp" },
-      { "rc"s, "cbr_ld_hq"s },
+      { "preset"s , (int)nv::preset_e::llhq },
     }, "h264_nvenc"s
   },
   false,
@@ -172,11 +233,39 @@ struct capture_thread_ctx_t {
   util::sync_t<std::weak_ptr<platf::display_t>> display_wp;
 };
 
+platf::dev_type_e map_dev_type(AVHWDeviceType type) {
+  switch(type) {
+    case AV_HWDEVICE_TYPE_D3D11VA:
+      return platf::dev_type_e::dxgi;
+    case AV_PICTURE_TYPE_NONE:
+      return platf::dev_type_e::none;
+    default:
+      return platf::dev_type_e::unknown;
+  }
+
+  return platf::dev_type_e::unknown;
+}
+
+platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) {
+  switch(fmt) {
+    case AV_PIX_FMT_YUV420P10:
+      return platf::pix_fmt_e::yuv420p10;
+    case AV_PIX_FMT_YUV420P:
+      return platf::pix_fmt_e::yuv420p;
+    case AV_PIX_FMT_NV12:
+      return platf::pix_fmt_e::nv12;
+    default:
+      return platf::pix_fmt_e::unknown;
+  }
+
+  return platf::pix_fmt_e::unknown;
+}
+
 void reset_display(std::shared_ptr<platf::display_t> &disp, AVHWDeviceType type) {
   // We try this twice, in case we still get an error on reinitialization
   for(int x = 0; x < 2; ++x) {
     disp.reset();
-    disp = platf::display(type);
+    disp = platf::display(map_dev_type(type));
     if(disp) {
       break;
     }
@@ -207,7 +296,7 @@ void captureThread(
 
   std::chrono::nanoseconds delay = 1s;
 
-  auto disp = platf::display(encoder.dev_type);
+  auto disp = platf::display(map_dev_type(encoder.dev_type));
   if(!disp) {
     return;
   }
@@ -585,7 +674,6 @@ void encode_run(
   if(!session) {
     return;
   }
-
   hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
   auto delay = std::chrono::floor<std::chrono::nanoseconds>(1s) / config.framerate;
@@ -671,47 +759,41 @@ void encode_run(
   }
 }
 
-void capture(
-  safe::signal_t *shutdown_event,
-  packet_queue_t packets,
-  idr_event_t idr_events,
-  config_t config,
-  void *channel_data) {
+std::optional<encode_session_t> make_session_from_ctx(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, encode_session_ctx_t &ctx) {
+  encode_session_t encode_session;
 
-  auto lg = util::fail_guard([&]() {
-    shutdown_event->raise(true);
-  });
+  encode_session.ctx = &ctx;
+  encode_session.next_frame = std::chrono::steady_clock::now();
 
-  const auto &encoder = encoders.front();
-  auto disp = platf::display(encoder.dev_type);
-  if(!disp) {
-    return;
-  }
+  encode_session.delay = 1000ms / ctx.config.framerate;
 
-  auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
-  auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
+  auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
+  auto hwdevice_ctx = disp->make_hwdevice_ctx(ctx.config.width, ctx.config.height, pix_fmt);
   if(!hwdevice_ctx) {
-    return;
+    return std::nullopt;
   }
 
-  auto session = make_session(encoder, config, hwdevice_ctx.get());
+  auto session = make_session(encoder, ctx.config, hwdevice_ctx.get());
   if(!session) {
-    return;
+    return std::nullopt;
   }
   hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
-  auto img = disp->alloc_img();
-  if(disp->dummy_img(img.get())) {
-    return;
-  }
+  encode_session.img_tmp = &img;
+  encode_session.hwdevice = std::move(hwdevice_ctx);
+  encode_session.session = std::move(*session);
 
-  const platf::img_t* img_p = hwdevice_ctx->convert(*img);
-  if(!img_p) {
-    return;
-  }
+  return std::move(encode_session);
+}
 
-  sws_t sws;
-  encoder.img_to_frame(sws, *img_p, session->frame);
+encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &encode_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) {
+  const auto &encoder = encoders.front();
+
+  std::shared_ptr<platf::display_t> disp;
+  reset_display(disp, encoder.dev_type);
+  if(!disp) {
+    return encode_e::error;
+  }
 
   std::vector<std::shared_ptr<platf::img_t>> imgs(12);
   for(auto &img : imgs) {
@@ -719,26 +801,41 @@ void capture(
   }
 
   auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs), std::end(imgs));
-
-  int frame_nr = 1;
-  int key_frame_nr = 1;
-
-  auto max_delay = 1000ms / config.framerate;
   
-  std::shared_ptr<platf::img_t> img_tmp;
-  auto next_frame = std::chrono::steady_clock::now();
-  while(!shutdown_event->peek()) {
-    if(idr_events->peek()) {
-      session->frame->pict_type = AV_PICTURE_TYPE_I;
+  auto dummy_img = disp->alloc_img();
+  auto img_tmp = dummy_img.get();
+  if(disp->dummy_img(img_tmp)) {
+    return encode_e::error;
+  }
 
-      auto event = idr_events->pop();
-      TUPLE_2D_REF(_, end, *event);
-
-      frame_nr = end;
-      key_frame_nr = end + config.framerate;
+  std::vector<encode_session_t> encode_sessions;
+  for(auto &ctx : encode_session_ctxs) {
+    auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *ctx);
+    if(!encode_session) {
+      return encode_e::error;
     }
-    else if(frame_nr == key_frame_nr) {
-      session->frame->pict_type = AV_PICTURE_TYPE_I;
+
+    encode_sessions.emplace_back(std::move(*encode_session));
+  }
+
+  auto next_frame = std::chrono::steady_clock::now();
+  while(encode_session_ctx_queue.running()) {
+    while(encode_session_ctx_queue.peek()) {
+      auto encode_session_ctx = encode_session_ctx_queue.pop();
+      if(!encode_session_ctx)  {
+        return encode_e::ok;
+      }
+
+      encode_session_ctxs.emplace_back(std::make_unique<encode_session_ctx_t>(std::move(*encode_session_ctx)));
+
+      auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *encode_session_ctxs.back());
+      if(!encode_session) {
+        return encode_e::error;
+      }
+
+      encode_sessions.emplace_back(std::move(*encode_session));
+
+      next_frame = std::chrono::steady_clock::now();
     }
 
     auto delay = std::max(0ms, std::chrono::duration_cast<std::chrono::milliseconds>(next_frame - std::chrono::steady_clock::now()));
@@ -746,35 +843,144 @@ void capture(
     auto status = disp->snapshot(round_robin->get(), delay, display_cursor);
     switch(status)  {
       case platf::capture_e::reinit:
-        return;
       case platf::capture_e::error:
-        return;
+        return status;
       case platf::capture_e::timeout:
-        next_frame += max_delay;
-        if(!img_tmp && frame_nr > (key_frame_nr + config.framerate))  {
-          continue;
-        }
-
         break;
       case platf::capture_e::ok:
-        img_tmp = *round_robin++;
+        img_tmp = round_robin->get();
+        ++round_robin;
         break;
     }
-
-    if(img_tmp) {
-      img_p = hwdevice_ctx->convert(*img_tmp);
-      img_tmp.reset();
-    }
     
-    if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
-      BOOST_LOG(fatal) << "Could not encode video packet"sv;
-      log_flush();
-      std::abort();
+    auto now = std::chrono::steady_clock::now();
+    
+    next_frame = now + 1s;
+    {auto pos = std::begin(encode_sessions);while( pos != std::end(encode_sessions)) {
+      auto ctx = pos->ctx;
+      if(ctx->shutdown_event->peek()) {
+        // Let waiting thread know it can delete shutdown_event
+        ctx->join_event->raise(true);
+
+        //FIXME: Causes segfault even if (pos + 1) != std::end()
+        // *pos = std::move(*(pos + 1));
+
+        {encode_session_t t { std::move(*pos) };}
+        
+        //FIXME: encode_session_t = std::move(encode_session_t) <=> segfault
+        pos = encode_sessions.erase(pos);
+        encode_session_ctxs.erase(std::find_if(std::begin(encode_session_ctxs), std::end(encode_session_ctxs), [&ctx_p=ctx](auto &ctx) {
+          return ctx.get() == ctx_p;
+        }));
+
+        if(encode_sessions.empty()) {
+          return encode_e::ok;
+        }
+
+        continue;
+      }
+
+      if(ctx->idr_events->peek()) {
+        pos->session.frame->pict_type = AV_PICTURE_TYPE_I;
+
+        auto event = ctx->idr_events->pop();
+        auto end = event->second;
+
+        ctx->frame_nr = end;
+        ctx->key_frame_nr = end + ctx->config.framerate;
+      }
+      else if(ctx->frame_nr == ctx->key_frame_nr) {
+        pos->session.frame->pict_type = AV_PICTURE_TYPE_I;
+      }
+
+      if(img_tmp) {
+        pos->img_tmp = img_tmp;
+      }
+
+      auto timeout = now > pos->next_frame;
+      if(timeout) {
+        pos->next_frame += pos->delay;
+      }
+      
+      next_frame = std::min(next_frame, pos->next_frame);
+
+      if(!timeout) {
+        ++pos;
+        continue;
+      }
+
+      sws_t sws;
+      if(pos->img_tmp) {
+        auto img_p = pos->hwdevice->convert(*pos->img_tmp);
+        pos->img_tmp = nullptr;
+
+        encoder.img_to_frame(sws, *img_p, pos->session.frame);
+      }
+
+      if(encode(ctx->frame_nr++, pos->session.ctx, pos->session.frame, ctx->packets, ctx->channel_data)) {
+        BOOST_LOG(fatal) << "Could not encode video packet"sv;
+        log_flush();
+        std::abort();
+      }
+
+      pos->session.frame->pict_type = AV_PICTURE_TYPE_NONE;
+
+      ++pos;
+    }}
+
+    img_tmp = nullptr;
+  }
+
+  return encode_e::ok;
+}
+
+void captureThreadSync() {
+  auto ref = capture_thread_sync.ref();
+
+  std::vector<std::unique_ptr<encode_session_ctx_t>> encode_session_ctxs;
+
+  auto &ctx = ref->encode_session_ctx_queue;
+  auto lg = util::fail_guard([&]() {
+    ctx.stop();
+
+    for(auto &ctx : encode_session_ctxs) {
+      ctx->shutdown_event->raise(true);
+      ctx->join_event->raise(true);
     }
 
-    session->frame->pict_type = AV_PICTURE_TYPE_NONE;
-  }
+    for(auto &ctx : ctx.unsafe()) {
+      ctx.shutdown_event->raise(true);
+      ctx.join_event->raise(true);
+    }
+  });
+
+  while(encode_run_sync(encode_session_ctxs, ctx) == encode_e::reinit);
 }
+
+int start_capture_sync(capture_synced_ctx_t &ctx) {
+  std::thread { &captureThreadSync }.detach();
+  return 0;
+}
+
+void end_capture_sync(capture_synced_ctx_t &ctx) {}
+
+void capture(
+  safe::signal_t *shutdown_event,
+  packet_queue_t packets,
+  idr_event_t idr_events,
+  config_t config,
+  void *channel_data) {
+  
+  safe::signal_t join_event;
+  auto ref = capture_thread_sync.ref();
+  ref->encode_session_ctx_queue.raise(encode_session_ctx_t {
+    shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data
+  });
+
+  // Wait for join signal
+  join_event.view();
+}
+
 void capture_async(
   safe::signal_t *shutdown_event,
   packet_queue_t packets,
@@ -840,7 +1046,7 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
     return false;
   }
 
-  auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
+  auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
   auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
   if(!hwdevice_ctx) {
     return false;

From c7a72553c4b61590b7ceffb49b04bddcd887e694 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Tue, 14 Apr 2020 00:15:24 +0300
Subject: [PATCH 09/25] Configure settings nvenc

---
 assets/sunshine.conf               |  60 ++++-
 sunshine/config.cpp                | 153 ++++++++++-
 sunshine/config.h                  |  18 +-
 sunshine/main.cpp                  |   4 +-
 sunshine/nvhttp.cpp                |   8 +-
 sunshine/platform/windows_dxgi.cpp |  19 --
 sunshine/rtsp.cpp                  |   6 +-
 sunshine/video.cpp                 | 395 +++++++++++++++++------------
 sunshine/video.h                   |   2 +-
 9 files changed, 454 insertions(+), 211 deletions(-)

diff --git a/assets/sunshine.conf b/assets/sunshine.conf
index daaf000b..3fb5cf17 100644
--- a/assets/sunshine.conf
+++ b/assets/sunshine.conf
@@ -105,20 +105,64 @@
 # Increasing the value slightly reduces encoding efficiency, but the tradeoff is usually
 # worth it to gain the use of more CPU cores for encoding. The ideal value is the lowest
 # value that can reliably encode at your desired streaming settings on your hardware.
-# min_threads = 2
+# min_threads = 1
 
 # Allows the client to request HEVC Main or HEVC Main10 video streams.
-# HEVC is more CPU-intensive to encode, so enabling this may reduce performance.
-# If set to 0 (default), Sunshine will not advertise support for HEVC
-# If set to 1, Sunshine will advertise support for HEVC Main profile
-# If set to 2, Sunshine will advertise support for HEVC Main and Main10 (HDR) profiles
-# hevc_mode = 2
+# HEVC is more CPU-intensive to encode, so enabling this may reduce performance when using software encoding.
+# If set to 0 (default), Sunshine will specify support for HEVC based on encoder
+# If set to 1, Sunshine will not advertise support for HEVC
+# If set to 2, Sunshine will advertise support for HEVC Main profile
+# If set to 3, Sunshine will advertise support for HEVC Main and Main10 (HDR) profiles
+# hevc_mode = 0
 
+# Force a specific encoder, otherwise Sunshine will use the first encoder that is available
+# supported encoders:
+#   nvenc
+#   software
+#
+# encoder = nvenc
+##################################### Software #####################################
 # See x264 --fullhelp for the different presets
-# preset  = superfast
-# tune    = zerolatency
+# sw_preset  = superfast
+# sw_tune    = zerolatency
 #
+
+##################################### NVENC #####################################
+###### presets ###########
+# default
+# hp     -- high performance
+# hq     -- high quality
+# slow   -- hq 2 passes
+# medium -- hq 1 pass
+# fast   -- hp 1 pass
+# bd
+# ll     -- low latency
+# llhq
+# llhp
+# lossless
+# losslesshp
+##########################
+# nv_preset = llhq
 #
+####### rate control #####
+# auto      -- let ffmpeg decide rate control
+# constqp   -- constant QP mode
+# vbr       -- variable bitrate
+# cbr       -- constant bitrate
+# cbr_hq    -- cbr high quality
+# cbr_ld_hq -- cbr low delay high quality
+# vbr_hq    -- vbr high quality
+##########################
+# nv_rc = auto
+
+###### h264 entropy ######
+# auto -- let ffmpeg nvenc decide the entropy encoding
+# cabac
+# cavlc
+##########################
+# nv_coder = auto
+
+
 ##############################################
 # Some configurable parameters, are merely toggles for specific features
 # The first occurrence turns it on, the second occurence turns it off, the third occurence turns it on again, etc, etc
diff --git a/sunshine/config.cpp b/sunshine/config.cpp
index 5eeabf71..2b027e1c 100644
--- a/sunshine/config.cpp
+++ b/sunshine/config.cpp
@@ -15,17 +15,97 @@
 #define APPS_JSON_PATH SUNSHINE_ASSETS_DIR "/" APPS_JSON
 namespace config {
 using namespace std::literals;
+
+namespace nv {
+enum preset_e : int {
+  _default = 0,
+  slow,
+  medium,
+  fast,
+  hp,
+  hq,
+  bd,
+  ll_default,
+  llhq,
+  llhp,
+  lossless_default, // lossless presets must be the last ones
+  lossless_hp,
+};
+
+enum rc_e : int {
+  constqp   = 0x0,       /**< Constant QP mode */
+  vbr       = 0x1,       /**< Variable bitrate mode */
+  cbr       = 0x2,       /**< Constant bitrate mode */
+  cbr_ld_hq = 0x8,       /**< low-delay CBR, high quality */
+  cbr_hq    = 0x10,      /**< CBR, high quality (slower) */
+  vbr_hq    = 0x20       /**< VBR, high quality (slower) */
+};
+
+enum coder_e : int {
+  _auto = 0,
+  cabac,
+  cavlc
+};
+
+std::optional<preset_e> preset_from_view(const std::string_view &preset) {
+#define _CONVERT_(x) if(preset == #x##sv) return x
+  _CONVERT_(slow);
+  _CONVERT_(medium);
+  _CONVERT_(fast);
+  _CONVERT_(hp);
+  _CONVERT_(bd);
+  _CONVERT_(ll_default);
+  _CONVERT_(llhq);
+  _CONVERT_(llhp);
+  _CONVERT_(lossless_default);
+  _CONVERT_(lossless_hp);
+  if(preset == "default"sv) return _default;
+#undef _CONVERT_
+  return std::nullopt;
+}
+
+std::optional<rc_e> rc_from_view(const std::string_view &rc) {
+#define _CONVERT_(x) if(rc == #x##sv) return x
+  _CONVERT_(constqp);
+  _CONVERT_(vbr);
+  _CONVERT_(cbr);
+  _CONVERT_(cbr_hq);
+  _CONVERT_(vbr_hq);
+  _CONVERT_(cbr_ld_hq);
+#undef _CONVERT_
+  return std::nullopt;
+}
+
+int coder_from_view(const std::string_view &coder) {
+  if(coder == "auto"sv) return _auto;
+  if(coder == "cabac"sv  || coder == "ac"sv) return cabac;
+  if(coder == "cavlc"sv  || coder == "vlc"sv) return cavlc;
+
+  return -1;
+}
+}
+
 video_t video {
   0, // crf
   28, // qp
 
-  2, // min_threads
-
   0, // hevc_mode
-  "superfast"s, // preset
-  "zerolatency"s, // tune
+
+  1, // min_threads
+  {
+    "superfast"s, // preset
+    "zerolatency"s, // tune
+  }, // software
+
+  {
+    nv::llhq,
+    std::nullopt,
+    -1
+  }, // nv
+
+  {}, // encoder
   {}, // adapter_name
-  {} // output_name
+  {}  // output_name
 };
 
 audio_t audio {};
@@ -138,6 +218,37 @@ void int_f(std::unordered_map<std::string, std::string> &vars, const std::string
   vars.erase(it);
 }
 
+void int_f(std::unordered_map<std::string, std::string> &vars, const std::string &name, std::optional<int> &input) {
+  auto it = vars.find(name);
+
+  if(it == std::end(vars)) {
+    return;
+  }
+
+  auto &val = it->second;
+  input = util::from_chars(&val[0], &val[0] + val.size());
+
+  vars.erase(it);
+}
+
+template<class F>
+void int_f(std::unordered_map<std::string, std::string> &vars, const std::string &name, int &input, F &&f) {
+  std::string tmp;
+  string_f(vars, name, tmp);
+  if(!tmp.empty()) {
+    input = f(tmp);
+  }
+}
+
+template<class F>
+void int_f(std::unordered_map<std::string, std::string> &vars, const std::string &name, std::optional<int> &input, F &&f) {
+  std::string tmp;
+  string_f(vars, name, tmp);
+  if(!tmp.empty()) {
+    input = f(tmp);
+  }
+}
+
 void int_between_f(std::unordered_map<std::string, std::string> &vars, const std::string &name, int &input, const std::pair<int, int> &range) {
   int temp = input;
 
@@ -149,6 +260,28 @@ void int_between_f(std::unordered_map<std::string, std::string> &vars, const std
   }
 }
 
+bool to_bool(std::string &boolean) {
+  std::for_each(std::begin(boolean), std::end(boolean), [](char ch)  { return (char)std::tolower(ch);  });
+
+  return
+    boolean == "true"sv   ||
+    boolean == "yes"sv    ||
+    boolean == "enable"sv ||
+    (std::find(std::begin(boolean), std::end(boolean), '1') != std::end(boolean));
+}
+void bool_f(std::unordered_map<std::string, std::string> &vars, const std::string  &name, int &input) {
+  std::string tmp;
+  string_restricted_f(vars, name, tmp, {
+    "enable"sv, "dis"
+  });
+
+  if(tmp.empty()) {
+    return;
+  }
+
+  input = to_bool(tmp) ? 1 : 0;
+}
+
 void print_help(const char *name) {
   std::cout <<
     "Usage: "sv << name << " [options] [/path/to/configuration_file]"sv << std::endl <<
@@ -190,10 +323,14 @@ void apply_config(std::unordered_map<std::string, std::string> &&vars) {
   int_f(vars, "qp", video.qp);
   int_f(vars, "min_threads", video.min_threads);
   int_between_f(vars, "hevc_mode", video.hevc_mode, {
-    0, 2
+    0, 3
   });
-  string_f(vars, "preset", video.preset);
-  string_f(vars, "tune", video.tune);
+  string_f(vars, "sw_preset", video.sw.preset);
+  string_f(vars, "sw_tune", video.sw.tune);
+  int_f(vars, "nv_preset", video.nv.preset, nv::preset_from_view);
+  int_f(vars, "nv_rc", video.nv.preset, nv::rc_from_view);
+  int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view);
+  string_f(vars, "encoder", video.encoder);
   string_f(vars, "adapter_name", video.adapter_name);
   string_f(vars, "output_name", video.output_name);
 
diff --git a/sunshine/config.h b/sunshine/config.h
index 93af19ee..419a5933 100644
--- a/sunshine/config.h
+++ b/sunshine/config.h
@@ -4,6 +4,7 @@
 #include <chrono>
 #include <string>
 #include <bitset>
+#include <optional>
 
 namespace config {
 struct video_t {
@@ -11,12 +12,21 @@ struct video_t {
   int crf; // higher == more compression and less quality
   int qp; // higher == more compression and less quality, ignored if crf != 0
 
-  int min_threads; // Minimum number of threads/slices for CPU encoding
-
   int hevc_mode;
-  std::string preset;
-  std::string tune;
 
+  int min_threads; // Minimum number of threads/slices for CPU encoding
+  struct {
+    std::string preset;
+    std::string tune;
+  } sw;
+
+  struct {
+    std::optional<int> preset;
+    std::optional<int> rc;
+    int coder;
+  } nv;
+
+  std::string encoder;
   std::string adapter_name;
   std::string output_name;
 };
diff --git a/sunshine/main.cpp b/sunshine/main.cpp
index 6d0e6e65..c21f81fc 100644
--- a/sunshine/main.cpp
+++ b/sunshine/main.cpp
@@ -140,7 +140,9 @@ int main(int argc, char *argv[]) {
   auto deinit_guard = platf::init();
   input::init();
   reed_solomon_init();
-  video::init();
+  if(video::init()) {
+    return 2;
+  }
 
   task_pool.start(1);
 
diff --git a/sunshine/nvhttp.cpp b/sunshine/nvhttp.cpp
index fcbd69bb..261ec05d 100644
--- a/sunshine/nvhttp.cpp
+++ b/sunshine/nvhttp.cpp
@@ -464,13 +464,13 @@ void serverinfo(std::shared_ptr<typename SimpleWeb::ServerBase<T>::Response> res
   tree.put("root.GfeVersion", GFE_VERSION);
   tree.put("root.uniqueid", unique_id);
   tree.put("root.mac", platf::get_mac_address(request->local_endpoint_address()));
-  tree.put("root.MaxLumaPixelsHEVC", config::video.hevc_mode > 0 ? "1869449984" : "0");
+  tree.put("root.MaxLumaPixelsHEVC", config::video.hevc_mode > 1 ? "1869449984" : "0");
   tree.put("root.LocalIP", request->local_endpoint_address());
 
-  if(config::video.hevc_mode == 2) {
+  if(config::video.hevc_mode == 3) {
     tree.put("root.ServerCodecModeSupport", "3843");
   }
-  else if(config::video.hevc_mode == 1) {
+  else if(config::video.hevc_mode == 2) {
     tree.put("root.ServerCodecModeSupport", "259");
   }
   else {
@@ -522,7 +522,7 @@ void applist(resp_https_t response, req_https_t request) {
   for(auto &proc : proc::proc.get_apps()) {
     pt::ptree app;
 
-    app.put("IsHdrSupported"s, config::video.hevc_mode == 2 ? 1 : 0);
+    app.put("IsHdrSupported"s, config::video.hevc_mode == 3 ? 1 : 0);
     app.put("AppTitle"s, proc.name);
     app.put("ID"s, ++x);
 
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 3988b344..2278a148 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -888,7 +888,6 @@ public:
     img->texture.reset(tex_p);
     img->height      = height;
     img->width       = width;
-    img->data        = (std::uint8_t*)tex_p;
     img->pixel_pitch = 4;
 
     return 0;
@@ -916,24 +915,6 @@ public:
 
     return hwdevice;
   }
-
-  int init() {
-    if(display_base_t::init()) {
-      return -1;
-    }
-
-    multithread_t::pointer multithread_p  {};
-    auto status = device->QueryInterface(__uuidof(multithread_t::element_type), (void**)&multithread_p);
-    multithread_t multithread { multithread_p };
-
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Couldn't query Multithread interface [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    multithread->SetMultithreadProtected(true);
-
-    return 0;
-  }
 };
 
 const char *format_str[] = {
diff --git a/sunshine/rtsp.cpp b/sunshine/rtsp.cpp
index 565b3cde..70aac786 100644
--- a/sunshine/rtsp.cpp
+++ b/sunshine/rtsp.cpp
@@ -97,7 +97,7 @@ public:
             std::vector<char> full_payload;
 
             auto old_msg = std::move(_queue_packet);
-            TUPLE_2D_REF(_, old_packet, old_msg);
+            auto &old_packet = old_msg.second;
 
             std::string_view new_payload{(char *) packet->data, packet->dataLength};
             std::string_view old_payload{(char *) old_packet->data, old_packet->dataLength};
@@ -274,7 +274,7 @@ void cmd_describe(rtsp_server_t *server, net::peer_t peer, msg_t&& req) {
   option.content = const_cast<char*>(seqn_str.c_str());
 
   std::string_view payload;
-  if(config::video.hevc_mode == 0) {
+  if(config::video.hevc_mode == 1) {
     payload = "surround-params=NONE"sv;
   }
   else {
@@ -404,7 +404,7 @@ void cmd_announce(rtsp_server_t *server, net::peer_t peer, msg_t &&req) {
     return;
   }
 
-  if(config.monitor.videoFormat != 0 && config::video.hevc_mode == 0) {
+  if(config.monitor.videoFormat != 0 && config::video.hevc_mode == 1) {
     BOOST_LOG(warning) << "HEVC is disabled, yet the client requested HEVC"sv;
 
     respond(server->host(), peer, &option, 400, "BAD REQUEST", req->sequenceNumber, {});
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index d4bbc936..6e6933c9 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -38,20 +38,6 @@ void free_packet(AVPacket *packet) {
 }
 
 namespace nv {
-enum class preset_e : int {
-    _default = 0,
-    slow,
-    medium,
-    fast,
-    hp,
-    hq,
-    bd,
-    ll_default,
-    llhq,
-    llhp,
-    lossless_default, // lossless presets must be the last ones
-    lossless_hp,
-};
 
 enum class profile_h264_e : int {
   baseline,
@@ -79,16 +65,23 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
 util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
 
 struct encoder_t {
+  std::string_view name;
   enum flag_e {
     PASSED, // Is supported
     REF_FRAMES_RESTRICT, // Set maximum reference frames
     REF_FRAMES_AUTOSELECT, // Allow encoder to select maximum reference frames (If !REF_FRAMES_RESTRICT --> REF_FRAMES_AUTOSELECT)
+    DYNAMIC_RANGE,
     MAX_FLAGS
   };
 
   struct option_t {
+    KITTY_DEFAULT_CONSTR(option_t)
+    option_t(const option_t &) = default;
+
     std::string name;
-    std::variant<int, int*, std::string, std::string*> value;
+    std::variant<int, int*, std::optional<int>*, std::string, std::string*> value;
+
+    option_t(std::string &&name, decltype(value) &&value) : name { std::move(name) }, value  { std::move(value) } {}
   };
 
   struct {
@@ -105,6 +98,8 @@ struct encoder_t {
 
   struct {
     std::vector<option_t> options;
+    std::optional<option_t> crf, qp;
+
     std::string name;
     std::bitset<MAX_FLAGS> capabilities;
 
@@ -118,12 +113,27 @@ struct encoder_t {
   } hevc, h264;
 
   bool system_memory;
+  bool hevc_mode;
 
   std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
   std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
 };
 
 struct session_t {
+  session_t() = default;
+  session_t(session_t&&) = default;
+
+  // Ensure objects are destroyed in the correct order
+  session_t &operator=(session_t &&other) {
+    sws_color_format = other.sws_color_format;
+    sw_format        = other.sw_format;
+    frame            = std::move(other.frame);
+    ctx              = std::move(other.ctx);
+    hwdevice         = std::move(other.hwdevice);
+
+    return *this;
+  }
+
   buffer_t hwdevice;
 
   ctx_t ctx;
@@ -159,66 +169,6 @@ struct encode_session_t {
 using encode_session_ctx_queue_t = safe::queue_t<encode_session_ctx_t>;
 using encode_e = platf::capture_e;
 
-struct capture_synced_ctx_t {
-  encode_session_ctx_queue_t encode_session_ctx_queue;
-};
-
-int start_capture_sync(capture_synced_ctx_t &ctx);
-void end_capture_sync(capture_synced_ctx_t &ctx);
-auto capture_thread_sync = safe::make_shared<capture_synced_ctx_t>(start_capture_sync, end_capture_sync);
-
-static encoder_t nvenc {
-  { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
-  AV_HWDEVICE_TYPE_D3D11VA,
-  AV_PIX_FMT_D3D11,
-  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
-  {
-    { {"forced-idr"s, 1} }, "hevc_nvenc"s
-  },
-  {
-    {
-      { "forced-idr"s, 1},
-      { "preset"s , (int)nv::preset_e::llhq },
-    }, "h264_nvenc"s
-  },
-  false,
-
-  nv_d3d_img_to_frame,
-  nv_d3d_make_hwdevice_ctx
-};
-
-static encoder_t software {
-  { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
-  AV_HWDEVICE_TYPE_NONE,
-  AV_PIX_FMT_NONE,
-  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
-  {
-    // x265's Info SEI is so long that it causes the IDR picture data to be
-    // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
-    // It also looks like gop_size isn't passed on to x265, so we have to set
-    // 'keyint=-1' in the parameters ourselves.
-    {
-      { "x265-params"s, "info=0:keyint=-1"s },
-      { "preset"s, &config::video.preset },
-      { "tune"s, &config::video.tune }
-    }, "libx265"s
-  },
-  {
-    {
-      { "preset"s, &config::video.preset },
-      { "tune"s, &config::video.tune }
-    }, "libx264"s
-  },
-  true,
-
-  sw_img_to_frame,
-  nullptr
-};
-
-static std::vector<encoder_t> encoders {
-  nvenc, software
-};
-
 struct capture_ctx_t {
   img_event_t images;
   std::chrono::nanoseconds delay;
@@ -233,6 +183,91 @@ struct capture_thread_ctx_t {
   util::sync_t<std::weak_ptr<platf::display_t>> display_wp;
 };
 
+struct capture_synced_ctx_t {
+  encode_session_ctx_queue_t encode_session_ctx_queue;
+};
+
+int start_capture_sync(capture_synced_ctx_t &ctx);
+void end_capture_sync(capture_synced_ctx_t &ctx);
+int start_capture(capture_thread_ctx_t &ctx);
+void end_capture(capture_thread_ctx_t &ctx);
+
+// Keep a reference counter to ensure the capture thread only runs when other threads have a reference to the capture thread
+auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
+auto capture_thread_sync = safe::make_shared<capture_synced_ctx_t>(start_capture_sync, end_capture_sync);
+
+static encoder_t nvenc {
+  "nvenc"sv,
+  { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
+  AV_HWDEVICE_TYPE_D3D11VA,
+  AV_PIX_FMT_D3D11,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
+  {
+    {
+      { "forced-idr"s, 1 },
+      { "zerolatency"s, 1 },
+      { "preset"s, &config::video.nv.preset },
+      { "rc"s, &config::video.nv.rc }
+    },
+    std::nullopt, std::nullopt,
+    "hevc_nvenc"s,
+  },
+  {
+    {
+      { "forced-idr"s, 1 },
+      { "zerolatency"s, 1 },
+      { "preset"s, &config::video.nv.preset },
+      { "rc"s, &config::video.nv.rc },
+      { "coder"s, &config::video.nv.coder }
+    },
+    std::nullopt, std::make_optional<encoder_t::option_t>({"qp"s, &config::video.qp}),
+    "h264_nvenc"s
+  },
+  false,
+  true,
+
+  nv_d3d_img_to_frame,
+  nv_d3d_make_hwdevice_ctx
+};
+
+static encoder_t software {
+  "software"sv,
+  { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 },
+  AV_HWDEVICE_TYPE_NONE,
+  AV_PIX_FMT_NONE,
+  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
+  {
+    // x265's Info SEI is so long that it causes the IDR picture data to be
+    // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic.
+    // It also looks like gop_size isn't passed on to x265, so we have to set
+    // 'keyint=-1' in the parameters ourselves.
+    {
+      { "x265-params"s, "info=0:keyint=-1"s },
+      { "preset"s, &config::video.sw.preset },
+      { "tune"s, &config::video.sw.tune }
+    },
+    std::make_optional<encoder_t::option_t>("crf"s, &config::video.crf), std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
+    "libx265"s
+  },
+  {
+    {
+      { "preset"s, &config::video.sw.preset },
+      { "tune"s, &config::video.sw.tune }
+    },
+    std::make_optional<encoder_t::option_t>("crf"s, &config::video.crf), std::make_optional<encoder_t::option_t>("qp"s, &config::video.qp),
+    "libx264"s
+  },
+  true,
+  false,
+
+  sw_img_to_frame,
+  nullptr
+};
+
+static std::vector<encoder_t> encoders {
+  nvenc, software
+};
+
 platf::dev_type_e map_dev_type(AVHWDeviceType type) {
   switch(type) {
     case AV_HWDEVICE_TYPE_D3D11VA:
@@ -502,7 +537,15 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
   auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
-  assert(video_format[encoder_t::PASSED]);
+  if(!video_format[encoder_t::PASSED]) {
+    BOOST_LOG(error) << encoder.name << ": "sv << video_format.name  << " mode not supported"sv;
+    return std::nullopt;
+  }
+
+  if(config.dynamicRange && !video_format[encoder_t::DYNAMIC_RANGE]) {
+    BOOST_LOG(error) << video_format.name << ": dynamic range not supported"sv;
+    return std::nullopt;
+  }
 
   auto codec = avcodec_find_encoder_by_name(video_format.name.c_str());
   if(!codec) {
@@ -606,18 +649,24 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     // most efficient encode, but we may want to provide more slices than
     // requested to ensure we have enough parallelism for good performance.
     ctx->slices = std::max(config.slicesPerFrame, config::video.min_threads);
-    ctx->thread_type = FF_THREAD_SLICE;
-    ctx->thread_count = ctx->slices;
   }
 
+  ctx->thread_type = FF_THREAD_SLICE;
+  ctx->thread_count = ctx->slices;
+
   AVDictionary *options {nullptr};
-  for(auto &option : video_format.options) {
+  auto handle_option = [&options](const encoder_t::option_t &option) {
     std::visit(util::overloaded {
       [&](int v) { av_dict_set_int(&options, option.name.c_str(), v, 0); },
       [&](int *v) { av_dict_set_int(&options, option.name.c_str(), *v, 0); },
+      [&](std::optional<int> *v) { if(*v) av_dict_set_int(&options, option.name.c_str(), **v, 0); },
       [&](const std::string &v) { av_dict_set(&options, option.name.c_str(), v.c_str(), 0); },
-      [&](std::string *v) { av_dict_set(&options, option.name.c_str(), v->c_str(), 0); }
+      [&](std::string *v) { if(!v->empty()) av_dict_set(&options, option.name.c_str(), v->c_str(), 0); }
     }, option.value);
+  };
+
+  for(auto &option : video_format.options) {
+    handle_option(option);
   }
 
   if(config.bitrate > 500) {
@@ -627,11 +676,15 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     ctx->bit_rate = bitrate;
     ctx->rc_min_rate = bitrate;
   }
-  else if(config::video.crf != 0) {
-    av_dict_set_int(&options, "crf", config::video.crf, 0);
+  else if(video_format.crf && config::video.crf != 0) {
+    handle_option(*video_format.crf);
+  }
+  else if(video_format.qp) {
+    handle_option(*video_format.qp);
   }
   else {
-    av_dict_set_int(&options, "qp", config::video.qp, 0);
+    BOOST_LOG(error) << "Couldn't set video quality: encoder "sv << encoder.name << " doesn't support either crf or qp"sv;
+    return std::nullopt;
   }
 
   avcodec_open2(ctx.get(), codec, &options);
@@ -694,8 +747,11 @@ void encode_run(
       session->frame->pict_type = AV_PICTURE_TYPE_I;
 
       auto event = idr_events->pop();
-      TUPLE_2D_REF(_, end, *event);
+      if(!event) {
+        return;
+      }
 
+      auto end = event->second;
       frame_nr = end;
       key_frame_nr = end + config.framerate;
     }
@@ -750,9 +806,8 @@ void encode_run(
     }
     
     if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) {
-      BOOST_LOG(fatal) << "Could not encode video packet"sv;
-      log_flush();
-      std::abort();
+      BOOST_LOG(error) << "Could not encode video packet"sv;
+      return;
     }
 
     session->frame->pict_type = AV_PICTURE_TYPE_NONE;
@@ -856,18 +911,12 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
     auto now = std::chrono::steady_clock::now();
     
     next_frame = now + 1s;
-    {auto pos = std::begin(encode_sessions);while( pos != std::end(encode_sessions)) {
+    KITTY_WHILE_LOOP(auto pos = std::begin(encode_sessions), pos != std::end(encode_sessions), {
       auto ctx = pos->ctx;
       if(ctx->shutdown_event->peek()) {
         // Let waiting thread know it can delete shutdown_event
         ctx->join_event->raise(true);
-
-        //FIXME: Causes segfault even if (pos + 1) != std::end()
-        // *pos = std::move(*(pos + 1));
-
-        {encode_session_t t { std::move(*pos) };}
         
-        //FIXME: encode_session_t = std::move(encode_session_t) <=> segfault
         pos = encode_sessions.erase(pos);
         encode_session_ctxs.erase(std::find_if(std::begin(encode_session_ctxs), std::end(encode_session_ctxs), [&ctx_p=ctx](auto &ctx) {
           return ctx.get() == ctx_p;
@@ -918,15 +967,16 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
       }
 
       if(encode(ctx->frame_nr++, pos->session.ctx, pos->session.frame, ctx->packets, ctx->channel_data)) {
-        BOOST_LOG(fatal) << "Could not encode video packet"sv;
-        log_flush();
-        std::abort();
+        BOOST_LOG(error) << "Could not encode video packet"sv;
+        ctx->shutdown_event->raise(true);
+
+        continue;
       }
 
       pos->session.frame->pict_type = AV_PICTURE_TYPE_NONE;
 
       ++pos;
-    }}
+    })
 
     img_tmp = nullptr;
   }
@@ -964,28 +1014,11 @@ int start_capture_sync(capture_synced_ctx_t &ctx) {
 
 void end_capture_sync(capture_synced_ctx_t &ctx) {}
 
-void capture(
-  safe::signal_t *shutdown_event,
-  packet_queue_t packets,
-  idr_event_t idr_events,
-  config_t config,
-  void *channel_data) {
-  
-  safe::signal_t join_event;
-  auto ref = capture_thread_sync.ref();
-  ref->encode_session_ctx_queue.raise(encode_session_ctx_t {
-    shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data
-  });
-
-  // Wait for join signal
-  join_event.view();
-}
-
 void capture_async(
   safe::signal_t *shutdown_event,
-  packet_queue_t packets,
-  idr_event_t idr_events,
-  config_t config,
+  packet_queue_t &packets,
+  idr_event_t &idr_events,
+  config_t &config,
   void *channel_data) {
 
   auto images = std::make_shared<img_event_t::element_type>();
@@ -994,8 +1027,6 @@ void capture_async(
     shutdown_event->raise(true);
   });
 
-  // Keep a reference counter to ensure the Fcapture thread only runs when other threads have a reference to the capture thread
-  static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
   auto ref = capture_thread.ref();
   if(!ref) {
     return;
@@ -1040,6 +1071,28 @@ void capture_async(
   }
 }
 
+void capture(
+  safe::signal_t *shutdown_event,
+  packet_queue_t packets,
+  idr_event_t idr_events,
+  config_t config,
+  void *channel_data) {
+  
+  if(encoders.front().system_memory) {
+    capture_async(shutdown_event, packets, idr_events, config, channel_data);
+  }
+  else {
+    safe::signal_t join_event;
+    auto ref = capture_thread_sync.ref();
+    ref->encode_session_ctx_queue.raise(encode_session_ctx_t {
+      shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data
+    }); 
+
+    // Wait for join signal
+    join_event.view();
+  }
+}
+
 bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &encoder, const config_t &config) {
   reset_display(disp, encoder.dev_type);
   if(!disp) {
@@ -1099,31 +1152,15 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
 bool validate_encoder(encoder_t &encoder) {
   std::shared_ptr<platf::display_t> disp;
 
+  auto force_hevc = config::video.hevc_mode >= 2;
+  auto test_hevc = force_hevc || (config::video.hevc_mode == 0 && encoder.hevc_mode);
+
   encoder.h264.capabilities.set();
   encoder.hevc.capabilities.set();
 
   // First, test encoder viability
-  config_t config_max_ref_frames {
-    1920, 1080,
-    60,
-    1000,
-    1,
-    1,
-    1,
-    0,
-    0
-  };
-
-  config_t config_autoselect {
-    1920, 1080,
-    60,
-    1000,
-    1,
-    0,
-    1,
-    0,
-    0
-  };
+  config_t config_max_ref_frames { 1920, 1080, 60, 1000, 1, 1, 1, 0, 0 };
+  config_t config_autoselect     { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 };
 
   auto max_ref_frames_h264 = validate_config(disp, encoder, config_max_ref_frames);
   auto autoselect_h264     = validate_config(disp, encoder, config_autoselect);
@@ -1132,20 +1169,30 @@ bool validate_encoder(encoder_t &encoder) {
     return false;
   }
 
-  config_max_ref_frames.videoFormat = 1;
-  config_autoselect.videoFormat = 1;
-
-  auto max_ref_frames_hevc = validate_config(disp, encoder, config_max_ref_frames);
-  auto autoselect_hevc     = validate_config(disp, encoder, config_autoselect);
-
   encoder.h264[encoder_t::REF_FRAMES_RESTRICT] = max_ref_frames_h264;
   encoder.h264[encoder_t::REF_FRAMES_AUTOSELECT] = autoselect_h264;
   encoder.h264[encoder_t::PASSED] = true;
-  encoder.hevc[encoder_t::REF_FRAMES_RESTRICT] = max_ref_frames_hevc;
-  encoder.hevc[encoder_t::REF_FRAMES_AUTOSELECT] = autoselect_hevc;
-  encoder.hevc[encoder_t::PASSED] = max_ref_frames_hevc || autoselect_hevc;
 
-  std::vector<std::pair<encoder_t::flag_e, config_t>> configs; 
+  if(test_hevc) {
+    config_max_ref_frames.videoFormat = 1;
+    config_autoselect.videoFormat = 1;
+
+    auto max_ref_frames_hevc = validate_config(disp, encoder, config_max_ref_frames);
+    auto autoselect_hevc     = validate_config(disp, encoder, config_autoselect);
+
+    // If HEVC must be supported, but it is not supported
+    if(force_hevc && !max_ref_frames_hevc && !autoselect_hevc) {
+      return false;
+    }
+
+    encoder.hevc[encoder_t::REF_FRAMES_RESTRICT] = max_ref_frames_hevc;
+    encoder.hevc[encoder_t::REF_FRAMES_AUTOSELECT] = autoselect_hevc;
+  }
+  encoder.hevc[encoder_t::PASSED] = test_hevc;
+
+  std::vector<std::pair<encoder_t::flag_e, config_t>> configs {
+    { encoder_t::DYNAMIC_RANGE, { 1920, 1080, 60, 1000, 1, 0, 1, 1, 1 } }
+  };
   for(auto &[flag, config] : configs) {
     auto h264 = config;
     auto hevc = config;
@@ -1154,26 +1201,53 @@ bool validate_encoder(encoder_t &encoder) {
     hevc.videoFormat = 1;
 
     encoder.h264[flag] = validate_config(disp, encoder, h264);
-    encoder.hevc[flag] = validate_config(disp, encoder, hevc);
+    if(test_hevc && encoder.hevc[encoder_t::PASSED]) {
+      encoder.hevc[flag] = validate_config(disp, encoder, hevc);
+    }
   }
-  
+
   return true;
 }
 
-void init() {
+int init() {
   KITTY_WHILE_LOOP(auto pos = std::begin(encoders), pos != std::end(encoders), {
-    if(!validate_encoder(*pos)) {
+    if(
+      (!config::video.encoder.empty() && pos->name != config::video.encoder)  ||
+      !validate_encoder(*pos)                                                 ||
+      (config::video.hevc_mode == 3 && !pos->hevc[encoder_t::DYNAMIC_RANGE])
+    ) {
       pos = encoders.erase(pos);
 
       continue;
     }
 
-    ++pos;
+    break;
   })
 
-  for(auto &encoder : encoders) {
-    BOOST_LOG(info) << "Found encoder ["sv << encoder.h264.name << ", "sv << encoder.hevc.name << ']';
+  if(encoders.empty()) {
+    if(config::video.encoder.empty())  {
+      BOOST_LOG(fatal) << "Couldn't find any encoder"sv;
+    }
+    else {
+      BOOST_LOG(fatal) << "Couldn't find any encoder matching ["sv << config::video.encoder << ']';
+    }
+
+    return -1;
   }
+
+  auto &encoder = encoders.front();
+  if(encoder.hevc[encoder_t::PASSED]) {
+    BOOST_LOG(info) << "Found encoder "sv << encoder.name << ": ["sv << encoder.h264.name << ", "sv << encoder.hevc.name << ']';
+  }
+  else {
+    BOOST_LOG(info) << "Found encoder "sv  << encoder.name << ": ["sv << encoder.h264.name << ']';
+  }
+
+  if(config::video.hevc_mode == 0) {
+    config::video.hevc_mode = encoder.hevc[encoder_t::PASSED] ? (encoder.hevc[encoder_t::DYNAMIC_RANGE] ? 3 : 2) : 1;
+  }
+
+  return 0;
 }
 
 void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
@@ -1211,11 +1285,6 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   frame->width = img.width;
 }
 
-void nvenc_lock(void *lock_p) {
-}
-void nvenc_unlock(void *lock_p) {
-}
-
 util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
   buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
   auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
diff --git a/sunshine/video.h b/sunshine/video.h
index fc3c1426..e4560e05 100644
--- a/sunshine/video.h
+++ b/sunshine/video.h
@@ -57,7 +57,7 @@ void capture(
   config_t config,
   void *channel_data);
 
-void init();
+int init();
 }
 
 #endif //SUNSHINE_VIDEO_H

From ad7f93c3cba9955334999326271acfd8eaa0f668 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Tue, 14 Apr 2020 00:59:43 +0300
Subject: [PATCH 10/25] Switch between nvenc and software encoding

---
 sunshine/stream.cpp | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/sunshine/stream.cpp b/sunshine/stream.cpp
index 17013b11..e3f504e0 100644
--- a/sunshine/stream.cpp
+++ b/sunshine/stream.cpp
@@ -604,21 +604,19 @@ void videoBroadcastThread(safe::signal_t *shutdown_event, udp::socket &sock, vid
 
     payload = {(char *) payload_new.data(), payload_new.size()};
 
-    // // make sure moonlight recognizes the nalu code for IDR frames
-    // if (packet->flags & AV_PKT_FLAG_KEY) {
-    //   // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix
-    //   std::string_view frame_old = "\000\000\001e"sv;
-    //   std::string_view frame_new = "\000\000\000\001e"sv;
-    //   if(session->config.monitor.videoFormat != 0) {
-    //     frame_old = "\000\000\001("sv;
-    //     frame_new = "\000\000\000\001("sv;
-    //   }
+    // make sure moonlight recognizes the nalu code for IDR frames
+    if (packet->flags & AV_PKT_FLAG_KEY) {
+      // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix
+      std::string_view frame_old = "\000\000\001e"sv;
+      std::string_view frame_new = "\000\000\000\001e"sv;
+      if(session->config.monitor.videoFormat != 0) {
+        frame_old = "\000\000\001("sv;
+        frame_new = "\000\000\000\001("sv;
+      }
 
-    //   assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) ==
-    //          std::end(payload));
-    //   payload_new = replace(payload, frame_old, frame_new);
-    //   payload = {(char *) payload_new.data(), payload_new.size()};
-    // }
+      payload_new = replace(payload, frame_old, frame_new);
+      payload = {(char *) payload_new.data(), payload_new.size()};
+    }
 
     // insert packet headers
     auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE;

From 525e8b3c6d5293c8854067a0189f1b759a759d42 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Wed, 15 Apr 2020 19:16:20 +0200
Subject: [PATCH 11/25] Refactor video.cpp

---
 pre-compiled                       |   2 +-
 sunshine/platform/common.h         |  18 +-
 sunshine/platform/windows_dxgi.cpp |  49 +--
 sunshine/utility.h                 |  66 ++++
 sunshine/video.cpp                 | 522 +++++++++++++++--------------
 5 files changed, 367 insertions(+), 290 deletions(-)

diff --git a/pre-compiled b/pre-compiled
index afd9a9bb..8ec14fd4 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416
+Subproject commit 8ec14fd4a40d85443084b283ab24415d729984cb
diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index e1b3c500..e0129060 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -72,15 +72,17 @@ public:
   virtual ~img_t() = default;
 };
 
-struct hwdevice_ctx_t {
-  void *hwdevice {};
+struct hwdevice_t {
+  void *data {};
+  platf::img_t *img {};
 
-  virtual const platf::img_t*const convert(platf::img_t &img) {
-    return nullptr;
+  virtual int convert(platf::img_t &img) {
+    return -1;
   }
+
   virtual void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {};
 
-  virtual ~hwdevice_ctx_t() = default;
+  virtual ~hwdevice_t() = default;
 };
 
 enum class capture_e : int {
@@ -97,11 +99,13 @@ public:
 
   virtual int dummy_img(img_t *img) = 0;
 
-  virtual std::shared_ptr<hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) {
-    return std::make_shared<hwdevice_ctx_t>();
+  virtual std::shared_ptr<hwdevice_t> make_hwdevice(int width, int height, pix_fmt_e pix_fmt) {
+    return std::make_shared<hwdevice_t>();
   }
 
   virtual ~display_t() = default;
+
+  int width, height;
 };
 
 class mic_t {
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 2278a148..c34931d1 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -290,9 +290,9 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {
   }
 }
 
-class hwdevice_ctx_t : public platf::hwdevice_ctx_t {
+class hwdevice_t : public platf::hwdevice_t {
 public:
-  const platf::img_t*const convert(platf::img_t &img_base) override {
+  int convert(platf::img_t &img_base) override {
     auto &img = (img_d3d_t&)img_base;
 
     auto it = texture_to_processor_in.find(img.texture.get());
@@ -304,7 +304,7 @@ public:
       if(FAILED(status)) {
         BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv
          << util::hex(status).to_string_view() << ']';
-        return nullptr;
+        return -1;
       }
       it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
     }
@@ -315,10 +315,10 @@ public:
     auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
-      return nullptr;
+      return -1;
     }
 
-    return &this->img;
+    return 0;
   }
 
   void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
@@ -329,6 +329,8 @@ public:
   int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
     HRESULT status;
 
+    platf::hwdevice_t::img = &img;
+
     video::device_t::pointer vdevice_p;
     status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
     if(FAILED(status)) {
@@ -403,13 +405,13 @@ public:
     processor_out.reset(processor_out_p);
 
     device_p->AddRef();
-    hwdevice = device_p;
+    data = device_p;
     return 0;
   }
 
-  ~hwdevice_ctx_t() override {
-    if(hwdevice) {
-      ((ID3D11Device*)hwdevice)->Release();
+  ~hwdevice_t() override {
+    if(data) {
+      ((ID3D11Device*)data)->Release();
     }
   }
 
@@ -629,8 +631,6 @@ public:
   device_ctx_t device_ctx;
   duplication_t dup;
 
-  int width, height;
-
   DXGI_FORMAT format;
   D3D_FEATURE_LEVEL feature_level;
 };
@@ -711,15 +711,6 @@ public:
       return capture_e::timeout;
     }
 
-    if(img->width != width || img->height != height) {
-      delete[] img->data;
-      img->data = new std::uint8_t[height * img_info.RowPitch];
-
-      img->width = width;
-      img->height = height;
-      img->row_pitch = img_info.RowPitch;
-    }
-
     std::copy_n((std::uint8_t*)img_info.pData, height * img_info.RowPitch, (std::uint8_t*)img->data);
 
     if(cursor_visible && cursor.visible) {
@@ -732,22 +723,16 @@ public:
   std::shared_ptr<platf::img_t> alloc_img() override {
     auto img = std::make_shared<img_t>();
 
-    img->data         = nullptr;
-    img->row_pitch    = 0;
     img->pixel_pitch  = 4;
-    img->width        = 0;
-    img->height       = 0;
+    img->row_pitch    = img->pixel_pitch * width;
+    img->width        = width;
+    img->height       = height;
+    img->data         = new std::uint8_t[img->row_pitch * height];
 
     return img;
   }
 
   int dummy_img(platf::img_t *img) override {
-    img->data        = new std::uint8_t[4];
-    img->row_pitch   = 4;
-    img->pixel_pitch = 4;
-    img->width       = 1;
-    img->height      = 1;
-
     return 0;
   }
 
@@ -893,14 +878,14 @@ public:
     return 0;
   }
 
-  std::shared_ptr<platf::hwdevice_ctx_t> make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override {
+  std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, pix_fmt_e pix_fmt) override {
     if(pix_fmt != platf::pix_fmt_e::nv12) {
       BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
 
       return nullptr;
     }
 
-    auto hwdevice = std::make_shared<hwdevice_ctx_t>();
+    auto hwdevice = std::make_shared<hwdevice_t>();
 
     auto ret = hwdevice->init(
       shared_from_this(),
diff --git a/sunshine/utility.h b/sunshine/utility.h
index 1ac3d5f6..e3dec5e0 100644
--- a/sunshine/utility.h
+++ b/sunshine/utility.h
@@ -436,6 +436,72 @@ public:
   }
 };
 
+
+template<class T>
+class wrap_ptr {
+public:
+  using element_type = T;
+  using pointer = element_type*;
+  using reference = element_type&;
+
+  wrap_ptr() : _own_ptr { false }, _p { nullptr } {}
+  wrap_ptr(pointer p) : _own_ptr { false }, _p { p } {}
+  wrap_ptr(std::unique_ptr<element_type> &&uniq_p) : _own_ptr { true }, _p { uniq_p.release() } {}
+  wrap_ptr(wrap_ptr &&other) : _own_ptr { other._own_ptr }, _p { other._p } {
+    other._own_ptr = false;
+  }
+
+  wrap_ptr &operator=(wrap_ptr &&other) {
+    if(_own_ptr) {
+      delete _p;
+    }
+
+    _p = other._p;
+
+    _own_ptr = other._own_ptr;
+    other._own_ptr = false;
+
+    return *this;
+  }
+
+  template<class V>
+  wrap_ptr &operator=(std::unique_ptr<V> &&uniq_ptr) {
+    static_assert(std::is_base_of_v<element_type, V>, "element_type must be base class of V");
+    _own_ptr = true;
+    _p = uniq_ptr.release();
+
+    return *this;
+  }
+
+  wrap_ptr &operator=(pointer p) {
+    if(_own_ptr) {
+      delete _p;
+    }
+
+    _p = p;
+    _own_ptr = false;
+
+    return *this;
+  }
+
+  const reference operator*() const {
+    return *_p;
+  }
+  reference operator*() {
+    return *_p;
+  }
+  const pointer operator->() const {
+    return _p;
+  }
+  pointer operator->() {
+    return _p;
+  }
+
+private:
+  bool _own_ptr;
+  pointer _p;
+};
+
 template<class T>
 class buffer_t {
 public:
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 6e6933c9..1325bce4 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -8,7 +8,6 @@
 
 extern "C" {
 #include <libswscale/swscale.h>
-#include <libavutil/hwcontext_d3d11va.h>
 }
 
 #include "platform/common.h"
@@ -18,6 +17,12 @@ extern "C" {
 #include "video.h"
 #include "main.h"
 
+#ifdef _WIN32
+extern "C" {
+#include <libavutil/hwcontext_d3d11va.h>
+}
+#endif
+
 namespace video {
 using namespace std::literals;
 
@@ -59,10 +64,61 @@ using buffer_t    = util::safe_ptr<AVBufferRef, free_buffer>;
 using sws_t       = util::safe_ptr<SwsContext, sws_freeContext>;
 using img_event_t = std::shared_ptr<safe::event_t<std::shared_ptr<platf::img_t>>>;
 
-void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
+platf::dev_type_e map_dev_type(AVHWDeviceType type);
+platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt);
 
-void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame);
-util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx);
+void sw_img_to_frame(const platf::img_t &img, frame_t &frame);
+void nv_d3d_img_to_frame(const platf::img_t &img, frame_t &frame);
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx);
+
+util::Either<buffer_t, int> make_hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx);
+int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format);
+
+class swdevice_t : public platf::hwdevice_t {
+public:
+  int convert(platf::img_t &img) override {
+    auto frame = (AVFrame *)data;
+
+    av_frame_make_writable(frame);
+
+    const int linesizes[2] {
+      img.row_pitch, 0
+    };
+
+    int ret = sws_scale(sws.get(), (std::uint8_t*const*)&img.data, linesizes, 0, img.height, frame->data, frame->linesize);
+    if(ret <= 0) {
+      BOOST_LOG(fatal) << "Couldn't convert image to required format and/or size"sv;
+
+      return -1;
+    }
+
+    return 0;
+  }
+
+  virtual void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {
+    sws_setColorspaceDetails(sws.get(),
+      sws_getCoefficients(SWS_CS_DEFAULT), 0,
+      sws_getCoefficients(colorspace), color_range -1,
+      0, 1 << 16, 1 << 16
+    );
+  }
+
+  int init(int in_width, int in_height, int out_width, int out_height, AVFrame *frame, AVPixelFormat format) {
+    sws.reset(sws_getContext(
+      in_width, in_height, AV_PIX_FMT_BGR0,
+      out_width, out_height, format,
+      SWS_LANCZOS | SWS_ACCURATE_RND,
+      nullptr, nullptr, nullptr
+    ));
+    data = frame;
+
+    return sws ? 0 : -1;
+  }
+
+  ~swdevice_t() override {}
+
+  sws_t sws;
+};
 
 struct encoder_t {
   std::string_view name;
@@ -115,36 +171,34 @@ struct encoder_t {
   bool system_memory;
   bool hevc_mode;
 
-  std::function<void(sws_t &, const platf::img_t&, frame_t&)> img_to_frame;
-  std::function<util::Either<buffer_t, int>(platf::hwdevice_ctx_t *hwdevice)> make_hwdevice_ctx;
+  std::function<void(const platf::img_t&, frame_t&)> img_to_frame;
+  std::function<util::Either<buffer_t, int>(platf::hwdevice_t *hwdevice)> make_hwdevice_ctx;
 };
 
-struct session_t {
+class session_t {
+public:
   session_t() = default;
-  session_t(session_t&&) = default;
+  session_t(ctx_t &&ctx, frame_t &&frame, util::wrap_ptr<platf::hwdevice_t> &&device) :
+    ctx { std::move(ctx) }, frame { std::move(frame) }, device { std::move(device) } {}
+
+  session_t(session_t &&other) :
+    ctx { std::move(other.ctx) }, frame { std::move(other.frame) }, device { std::move(other.device) } {}
 
   // Ensure objects are destroyed in the correct order
   session_t &operator=(session_t &&other) {
-    sws_color_format = other.sws_color_format;
-    sw_format        = other.sw_format;
-    frame            = std::move(other.frame);
-    ctx              = std::move(other.ctx);
-    hwdevice         = std::move(other.hwdevice);
+    device = std::move(other.device);
+    frame = std::move(other.frame);
+    ctx   = std::move(other.ctx);
 
     return *this;
   }
 
-  buffer_t hwdevice;
-
   ctx_t ctx;
-
   frame_t frame;
-
-  AVPixelFormat sw_format;
-  int sws_color_format;
+  util::wrap_ptr<platf::hwdevice_t> device;
 };
 
-struct encode_session_ctx_t {
+struct sync_session_ctx_t {
   safe::signal_t *shutdown_event;
   safe::signal_t *join_event;
   packet_queue_t packets;
@@ -155,18 +209,18 @@ struct encode_session_ctx_t {
   void *channel_data;
 };
 
-struct encode_session_t {
-  encode_session_ctx_t *ctx;
+struct sync_session_t {
+  sync_session_ctx_t *ctx;
   
   std::chrono::steady_clock::time_point next_frame;
   std::chrono::milliseconds delay;
 
   platf::img_t *img_tmp;
-  std::shared_ptr<platf::hwdevice_ctx_t> hwdevice;
+  std::shared_ptr<platf::hwdevice_t> hwdevice;
   session_t session;
 };
 
-using encode_session_ctx_queue_t = safe::queue_t<encode_session_ctx_t>;
+using encode_session_ctx_queue_t = safe::queue_t<sync_session_ctx_t>;
 using encode_e = platf::capture_e;
 
 struct capture_ctx_t {
@@ -174,7 +228,7 @@ struct capture_ctx_t {
   std::chrono::nanoseconds delay;
 };
 
-struct capture_thread_ctx_t {
+struct capture_thread_async_ctx_t {
   std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_queue;
   std::thread capture_thread;
 
@@ -183,19 +237,20 @@ struct capture_thread_ctx_t {
   util::sync_t<std::weak_ptr<platf::display_t>> display_wp;
 };
 
-struct capture_synced_ctx_t {
+struct capture_thread_sync_ctx_t {
   encode_session_ctx_queue_t encode_session_ctx_queue;
 };
 
-int start_capture_sync(capture_synced_ctx_t &ctx);
-void end_capture_sync(capture_synced_ctx_t &ctx);
-int start_capture(capture_thread_ctx_t &ctx);
-void end_capture(capture_thread_ctx_t &ctx);
+int start_capture_sync(capture_thread_sync_ctx_t &ctx);
+void end_capture_sync(capture_thread_sync_ctx_t &ctx);
+int start_capture_async(capture_thread_async_ctx_t &ctx);
+void end_capture_async(capture_thread_async_ctx_t &ctx);
 
 // Keep a reference counter to ensure the capture thread only runs when other threads have a reference to the capture thread
-auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
-auto capture_thread_sync = safe::make_shared<capture_synced_ctx_t>(start_capture_sync, end_capture_sync);
+auto capture_thread_async = safe::make_shared<capture_thread_async_ctx_t>(start_capture_async, end_capture_async);
+auto capture_thread_sync = safe::make_shared<capture_thread_sync_ctx_t>(start_capture_sync, end_capture_sync);
 
+#ifdef _WIN32
 static encoder_t nvenc {
   "nvenc"sv,
   { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
@@ -229,6 +284,7 @@ static encoder_t nvenc {
   nv_d3d_img_to_frame,
   nv_d3d_make_hwdevice_ctx
 };
+#endif
 
 static encoder_t software {
   "software"sv,
@@ -265,37 +321,12 @@ static encoder_t software {
 };
 
 static std::vector<encoder_t> encoders {
-  nvenc, software
+#ifdef _WIN32
+  nvenc,
+#endif
+  software
 };
 
-platf::dev_type_e map_dev_type(AVHWDeviceType type) {
-  switch(type) {
-    case AV_HWDEVICE_TYPE_D3D11VA:
-      return platf::dev_type_e::dxgi;
-    case AV_PICTURE_TYPE_NONE:
-      return platf::dev_type_e::none;
-    default:
-      return platf::dev_type_e::unknown;
-  }
-
-  return platf::dev_type_e::unknown;
-}
-
-platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) {
-  switch(fmt) {
-    case AV_PIX_FMT_YUV420P10:
-      return platf::pix_fmt_e::yuv420p10;
-    case AV_PIX_FMT_YUV420P:
-      return platf::pix_fmt_e::yuv420p;
-    case AV_PIX_FMT_NV12:
-      return platf::pix_fmt_e::nv12;
-    default:
-      return platf::pix_fmt_e::unknown;
-  }
-
-  return platf::pix_fmt_e::unknown;
-}
-
 void reset_display(std::shared_ptr<platf::display_t> &disp, AVHWDeviceType type) {
   // We try this twice, in case we still get an error on reinitialization
   for(int x = 0; x < 2; ++x) {
@@ -439,70 +470,6 @@ void captureThread(
   }
 }
 
-int start_capture(capture_thread_ctx_t &capture_thread_ctx) {
-  capture_thread_ctx.encoder_p = &encoders.front();
-  capture_thread_ctx.reinit_event.reset();
-
-  capture_thread_ctx.capture_ctx_queue = std::make_shared<safe::queue_t<capture_ctx_t>>();
-
-  capture_thread_ctx.capture_thread = std::thread {
-    captureThread,
-    capture_thread_ctx.capture_ctx_queue,
-    std::ref(capture_thread_ctx.display_wp),
-    std::ref(capture_thread_ctx.reinit_event),
-    std::ref(*capture_thread_ctx.encoder_p)
-  };
-
-  return 0;
-}
-void end_capture(capture_thread_ctx_t &capture_thread_ctx) {
-  capture_thread_ctx.capture_ctx_queue->stop();
-
-  capture_thread_ctx.capture_thread.join();
-}
-
-util::Either<buffer_t, int> hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) {
-  buffer_t ctx;
-
-  int err;
-  if(hwdevice_ctx) {
-    ctx.reset(av_hwdevice_ctx_alloc(type));
-    ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice_ctx;
-
-    err = av_hwdevice_ctx_init(ctx.get());
-  }
-  else {
-    AVBufferRef *ref  {};
-    err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
-    ctx.reset(ref);
-  }
-
-  if(err < 0) {
-    return err;
-  }
-
-  return ctx;
-}
-
-int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
-  buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get())};
-
-  auto frame_ctx = (AVHWFramesContext*)frame_ref->data;
-  frame_ctx->format    = ctx->pix_fmt;
-  frame_ctx->sw_format = format;
-  frame_ctx->height    = ctx->height;
-  frame_ctx->width     = ctx->width;
-  frame_ctx->initial_pool_size = 0;
-
-  if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
-    return err;
-  }
-
-  ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get());
-
-  return 0;
-}
-
 int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets, void *channel_data) {
   frame->pts = frame_nr;
 
@@ -533,7 +500,7 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets
   return 0;
 }
 
-std::optional<session_t>  make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) {
+std::optional<session_t> make_session(const encoder_t &encoder, const config_t &config, int width, int height, platf::hwdevice_t *hwdevice) {
   bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE;
 
   auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc;
@@ -554,7 +521,7 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     return std::nullopt;
   }
 
-  ctx_t ctx {avcodec_alloc_context3(codec) };
+  ctx_t ctx { avcodec_alloc_context3(codec) };
   ctx->width = config.width;
   ctx->height = config.height;
   ctx->time_base = AVRational{1, config.framerate};
@@ -626,17 +593,17 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     sw_fmt = encoder.dynamic_pix_fmt;
   }
 
-  buffer_t hwdevice;
+  buffer_t hwdevice_ctx;
   if(hardware) {
     ctx->pix_fmt = encoder.dev_pix_fmt;
 
-    auto buf_or_error = encoder.make_hwdevice_ctx(device_ctx);
+    auto buf_or_error = encoder.make_hwdevice_ctx(hwdevice);
     if(buf_or_error.has_right()) {
       return std::nullopt;
     }
 
-    hwdevice = std::move(buf_or_error.left());
-    if(hwframe_ctx(ctx, hwdevice, sw_fmt)) {
+    hwdevice_ctx = std::move(buf_or_error.left());
+    if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt)) {
       return std::nullopt;
     }
 
@@ -691,7 +658,7 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
 
   frame_t frame {av_frame_alloc() };
   frame->format = ctx->pix_fmt;
-  frame->width = ctx->width;
+  frame->width  = ctx->width;
   frame->height = ctx->height;
 
 
@@ -702,12 +669,26 @@ std::optional<session_t>  make_session(const encoder_t &encoder, const config_t
     av_frame_get_buffer(frame.get(), 0);
   }
 
+  util::wrap_ptr<platf::hwdevice_t> device;
+
+  if(!hwdevice->data) {
+    auto device_tmp = std::make_unique<swdevice_t>();
+
+    if(device_tmp->init(width, height, config.width, config.height, frame.get(), sw_fmt)) {
+      return std::nullopt;
+    }
+
+    device = std::move(device_tmp);
+  }
+  else {
+    device = hwdevice;
+  }
+
+  device->set_colorspace(sws_color_space, ctx->color_range);
   return std::make_optional(session_t {
-    std::move(hwdevice),
     std::move(ctx),
     std::move(frame),
-    sw_fmt,
-    sws_color_space
+    std::move(device)
   });
 }
 
@@ -718,25 +699,19 @@ void encode_run(
   idr_event_t idr_events,
   img_event_t images,
   config_t config,
-  platf::hwdevice_ctx_t *hwdevice_ctx,
+  int width, int height,
+  platf::hwdevice_t *hwdevice,
   safe::signal_t &reinit_event,
   const encoder_t &encoder,
   void *channel_data) {
 
-  auto session = make_session(encoder, config, hwdevice_ctx);
+  auto session = make_session(encoder, config, width, height, hwdevice);
   if(!session) {
     return;
   }
-  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
   auto delay = std::chrono::floor<std::chrono::nanoseconds>(1s) / config.framerate;
 
-  auto img_width  = 0;
-  auto img_height = 0;
-
-  // Initiate scaling context with correct height and width
-  sws_t sws;
-
   auto next_frame = std::chrono::steady_clock::now();
   while(true) {
     if(shutdown_event->peek() || reinit_event.peek() || !images->running()) {
@@ -765,37 +740,9 @@ void encode_run(
     // When Moonlight request an IDR frame, send frames even if there is no new captured frame
     if(frame_nr > (key_frame_nr + config.framerate) || images->peek()) {
       if(auto img = images->pop(delay)) {
-        const platf::img_t *img_p;
-        if(encoder.system_memory) {
-          auto new_width  = img->width;
-          auto new_height = img->height;
+        session->device->convert(*img);
 
-          if(img_width != new_width || img_height != new_height) {
-            img_width  = new_width;
-            img_height = new_height;
-
-            sws.reset(
-              sws_getContext(
-                img_width, img_height, AV_PIX_FMT_BGR0,
-                session->ctx->width, session->ctx->height, session->sw_format,
-                SWS_LANCZOS | SWS_ACCURATE_RND,
-                nullptr, nullptr, nullptr));
-
-            sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
-                                     sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
-                                     0, 1 << 16, 1 << 16);
-          }
-
-          img_p = img.get();
-        }
-        else {
-          img_p = hwdevice_ctx->convert(*img);
-          if(!img_p) {
-            return;
-          }
-        }
-
-        encoder.img_to_frame(sws, *img_p, session->frame);
+        encoder.img_to_frame(*session->device->img, session->frame);
       }
       else if(images->running()) {
         continue;
@@ -814,8 +761,8 @@ void encode_run(
   }
 }
 
-std::optional<encode_session_t> make_session_from_ctx(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, encode_session_ctx_t &ctx) {
-  encode_session_t encode_session;
+std::optional<sync_session_t> make_synced_session(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, sync_session_ctx_t &ctx) {
+  sync_session_t encode_session;
 
   encode_session.ctx = &ctx;
   encode_session.next_frame = std::chrono::steady_clock::now();
@@ -823,25 +770,24 @@ std::optional<encode_session_t> make_session_from_ctx(platf::display_t *disp, co
   encode_session.delay = 1000ms / ctx.config.framerate;
 
   auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
-  auto hwdevice_ctx = disp->make_hwdevice_ctx(ctx.config.width, ctx.config.height, pix_fmt);
-  if(!hwdevice_ctx) {
+  auto hwdevice = disp->make_hwdevice(ctx.config.width, ctx.config.height, pix_fmt);
+  if(!hwdevice) {
     return std::nullopt;
   }
 
-  auto session = make_session(encoder, ctx.config, hwdevice_ctx.get());
+  auto session = make_session(encoder, ctx.config, img.width, img.height, hwdevice.get());
   if(!session) {
     return std::nullopt;
   }
-  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
   encode_session.img_tmp = &img;
-  encode_session.hwdevice = std::move(hwdevice_ctx);
+  encode_session.hwdevice = std::move(hwdevice);
   encode_session.session = std::move(*session);
 
   return std::move(encode_session);
 }
 
-encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &encode_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) {
+encode_e encode_run_sync(std::vector<std::unique_ptr<sync_session_ctx_t>> &synced_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) {
   const auto &encoder = encoders.front();
 
   std::shared_ptr<platf::display_t> disp;
@@ -863,14 +809,14 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
     return encode_e::error;
   }
 
-  std::vector<encode_session_t> encode_sessions;
-  for(auto &ctx : encode_session_ctxs) {
-    auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *ctx);
-    if(!encode_session) {
+  std::vector<sync_session_t> synced_sessions;
+  for(auto &ctx : synced_session_ctxs) {
+    auto synced_session = make_synced_session(disp.get(), encoder, *dummy_img, *ctx);
+    if(!synced_session) {
       return encode_e::error;
     }
 
-    encode_sessions.emplace_back(std::move(*encode_session));
+    synced_sessions.emplace_back(std::move(*synced_session));
   }
 
   auto next_frame = std::chrono::steady_clock::now();
@@ -881,14 +827,14 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
         return encode_e::ok;
       }
 
-      encode_session_ctxs.emplace_back(std::make_unique<encode_session_ctx_t>(std::move(*encode_session_ctx)));
+      synced_session_ctxs.emplace_back(std::make_unique<sync_session_ctx_t>(std::move(*encode_session_ctx)));
 
-      auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *encode_session_ctxs.back());
+      auto encode_session = make_synced_session(disp.get(), encoder, *dummy_img, *synced_session_ctxs.back());
       if(!encode_session) {
         return encode_e::error;
       }
 
-      encode_sessions.emplace_back(std::move(*encode_session));
+      synced_sessions.emplace_back(std::move(*encode_session));
 
       next_frame = std::chrono::steady_clock::now();
     }
@@ -911,18 +857,18 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
     auto now = std::chrono::steady_clock::now();
     
     next_frame = now + 1s;
-    KITTY_WHILE_LOOP(auto pos = std::begin(encode_sessions), pos != std::end(encode_sessions), {
+    KITTY_WHILE_LOOP(auto pos = std::begin(synced_sessions), pos != std::end(synced_sessions), {
       auto ctx = pos->ctx;
       if(ctx->shutdown_event->peek()) {
         // Let waiting thread know it can delete shutdown_event
         ctx->join_event->raise(true);
         
-        pos = encode_sessions.erase(pos);
-        encode_session_ctxs.erase(std::find_if(std::begin(encode_session_ctxs), std::end(encode_session_ctxs), [&ctx_p=ctx](auto &ctx) {
+        pos = synced_sessions.erase(pos);
+        synced_session_ctxs.erase(std::find_if(std::begin(synced_session_ctxs), std::end(synced_session_ctxs), [&ctx_p=ctx](auto &ctx) {
           return ctx.get() == ctx_p;
         }));
 
-        if(encode_sessions.empty()) {
+        if(synced_sessions.empty()) {
           return encode_e::ok;
         }
 
@@ -960,10 +906,15 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
 
       sws_t sws;
       if(pos->img_tmp) {
-        auto img_p = pos->hwdevice->convert(*pos->img_tmp);
+        if(pos->hwdevice->convert(*pos->img_tmp)) {
+          BOOST_LOG(error) << "Could not convert image"sv;
+          ctx->shutdown_event->raise(true);
+
+          continue;
+        }
         pos->img_tmp = nullptr;
 
-        encoder.img_to_frame(sws, *img_p, pos->session.frame);
+        encoder.img_to_frame(*pos->hwdevice->img, pos->session.frame);
       }
 
       if(encode(ctx->frame_nr++, pos->session.ctx, pos->session.frame, ctx->packets, ctx->channel_data)) {
@@ -987,13 +938,13 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<encode_session_ctx_t>> &enc
 void captureThreadSync() {
   auto ref = capture_thread_sync.ref();
 
-  std::vector<std::unique_ptr<encode_session_ctx_t>> encode_session_ctxs;
+  std::vector<std::unique_ptr<sync_session_ctx_t>> synced_session_ctxs;
 
   auto &ctx = ref->encode_session_ctx_queue;
   auto lg = util::fail_guard([&]() {
     ctx.stop();
 
-    for(auto &ctx : encode_session_ctxs) {
+    for(auto &ctx : synced_session_ctxs) {
       ctx->shutdown_event->raise(true);
       ctx->join_event->raise(true);
     }
@@ -1004,16 +955,9 @@ void captureThreadSync() {
     }
   });
 
-  while(encode_run_sync(encode_session_ctxs, ctx) == encode_e::reinit);
+  while(encode_run_sync(synced_session_ctxs, ctx) == encode_e::reinit);
 }
 
-int start_capture_sync(capture_synced_ctx_t &ctx) {
-  std::thread { &captureThreadSync }.detach();
-  return 0;
-}
-
-void end_capture_sync(capture_synced_ctx_t &ctx) {}
-
 void capture_async(
   safe::signal_t *shutdown_event,
   packet_queue_t &packets,
@@ -1027,7 +971,7 @@ void capture_async(
     shutdown_event->raise(true);
   });
 
-  auto ref = capture_thread.ref();
+  auto ref = capture_thread_async.ref();
   if(!ref) {
     return;
   }
@@ -1056,8 +1000,8 @@ void capture_async(
     }
 
     auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10;
-    auto hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt);
-    if(!hwdevice_ctx) {
+    auto hwdevice = display->make_hwdevice(config.width, config.height, pix_fmt);
+    if(!hwdevice) {
       return;
     }
 
@@ -1067,7 +1011,14 @@ void capture_async(
     }
     images->raise(std::move(dummy_img));
 
-    encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data);
+    encode_run(
+      frame_nr, key_frame_nr,
+      shutdown_event,
+      packets, idr_events, images,
+      config, display->width, display->height,
+      hwdevice.get(),
+      ref->reinit_event, *ref->encoder_p,
+      channel_data);
   }
 }
 
@@ -1084,7 +1035,7 @@ void capture(
   else {
     safe::signal_t join_event;
     auto ref = capture_thread_sync.ref();
-    ref->encode_session_ctx_queue.raise(encode_session_ctx_t {
+    ref->encode_session_ctx_queue.raise(sync_session_ctx_t {
       shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data
     }); 
 
@@ -1100,44 +1051,25 @@ bool validate_config(std::shared_ptr<platf::display_t> &disp, const encoder_t &e
   }
 
   auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
-  auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt);
-  if(!hwdevice_ctx) {
+  auto hwdevice = disp->make_hwdevice(config.width, config.height, pix_fmt);
+  if(!hwdevice) {
     return false;
   }
 
-  auto session = make_session(encoder, config, hwdevice_ctx.get());
+  auto session = make_session(encoder, config, disp->width, disp->height, hwdevice.get());
   if(!session) {
     return false;
   }
-  hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range);
 
   auto img = disp->alloc_img();
   if(disp->dummy_img(img.get())) {
     return false;
   }
-
-  sws_t sws;
-  if(encoder.system_memory) {
-    sws.reset(sws_getContext(
-      img->width, img->height, AV_PIX_FMT_BGR0,
-      session->ctx->width, session->ctx->height, session->sw_format,
-      SWS_LANCZOS | SWS_ACCURATE_RND,
-      nullptr, nullptr, nullptr));
-
-    sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
-                             sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
-                             0, 1 << 16, 1 << 16);
-
-    encoder.img_to_frame(sws, *img, session->frame);
+  if(session->device->convert(*img)) {
+    return false;
   }
-  else {
-    auto converted_img = hwdevice_ctx->convert(*img);
-    if(!converted_img) {
-      return false;
-    }
 
-    encoder.img_to_frame(sws, *converted_img, session->frame);
-  }
+  encoder.img_to_frame(*hwdevice->img, session->frame);
 
   session->frame->pict_type = AV_PICTURE_TYPE_I;
 
@@ -1250,23 +1182,56 @@ int init() {
   return 0;
 }
 
-void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
-  av_frame_make_writable(frame.get());
+util::Either<buffer_t, int> make_hwdevice_ctx(AVHWDeviceType type, void *hwdevice) {
+  buffer_t ctx;
 
-  const int linesizes[2] {
-    img.row_pitch, 0
-  };
+  int err;
+  if(hwdevice) {
+    ctx.reset(av_hwdevice_ctx_alloc(type));
+    ((AVHWDeviceContext*)ctx.get())->hwctx = hwdevice;
 
-  int ret = sws_scale(sws.get(), (std::uint8_t*const*)&img.data, linesizes, 0, img.height, frame->data, frame->linesize);
-  if(ret <= 0) {
-    BOOST_LOG(fatal) << "Couldn't convert image to required format and/or size"sv;
-
-    log_flush();
-    std::abort();
+    err = av_hwdevice_ctx_init(ctx.get());
   }
+  else {
+    AVBufferRef *ref  {};
+    err = av_hwdevice_ctx_create(&ref, type, nullptr, nullptr, 0);
+    ctx.reset(ref);
+  }
+
+  if(err < 0) {
+    return err;
+  }
+
+  return ctx;
 }
 
-void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
+int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) {
+  buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get())};
+
+  auto frame_ctx = (AVHWFramesContext*)frame_ref->data;
+  frame_ctx->format    = ctx->pix_fmt;
+  frame_ctx->sw_format = format;
+  frame_ctx->height    = ctx->height;
+  frame_ctx->width     = ctx->width;
+  frame_ctx->initial_pool_size = 0;
+
+  if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) {
+    return err;
+  }
+
+  ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get());
+
+  return 0;
+}
+
+void sw_img_to_frame(const platf::img_t &img, frame_t &frame) {}
+
+#ifdef _WIN32
+void nv_d3d_img_to_frame(const platf::img_t &img, frame_t &frame) {
+  if(img.data == frame->data[0]) {
+    return;
+  }
+  
   // Need to have something refcounted
   if(!frame->buf[0]) {
     frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor));
@@ -1285,12 +1250,12 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) {
   frame->width = img.width;
 }
 
-util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) {
+util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) {
   buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) };
   auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
   
   std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
-  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice);
+  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->data);
 
   auto err = av_hwdevice_ctx_init(ctx_buf.get());
   if(err) {
@@ -1302,4 +1267,61 @@ util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwde
 
   return ctx_buf;
 }
+#endif
+
+int start_capture_async(capture_thread_async_ctx_t &capture_thread_ctx) {
+  capture_thread_ctx.encoder_p = &encoders.front();
+  capture_thread_ctx.reinit_event.reset();
+
+  capture_thread_ctx.capture_ctx_queue = std::make_shared<safe::queue_t<capture_ctx_t>>();
+
+  capture_thread_ctx.capture_thread = std::thread {
+    captureThread,
+    capture_thread_ctx.capture_ctx_queue,
+    std::ref(capture_thread_ctx.display_wp),
+    std::ref(capture_thread_ctx.reinit_event),
+    std::ref(*capture_thread_ctx.encoder_p)
+  };
+
+  return 0;
+}
+void end_capture_async(capture_thread_async_ctx_t &capture_thread_ctx) {
+  capture_thread_ctx.capture_ctx_queue->stop();
+
+  capture_thread_ctx.capture_thread.join();
+}
+
+int start_capture_sync(capture_thread_sync_ctx_t &ctx) {
+  std::thread { &captureThreadSync }.detach();
+  return 0;
+}
+void end_capture_sync(capture_thread_sync_ctx_t &ctx) {}
+
+platf::dev_type_e map_dev_type(AVHWDeviceType type) {
+  switch(type) {
+    case AV_HWDEVICE_TYPE_D3D11VA:
+      return platf::dev_type_e::dxgi;
+    case AV_PICTURE_TYPE_NONE:
+      return platf::dev_type_e::none;
+    default:
+      return platf::dev_type_e::unknown;
+  }
+
+  return platf::dev_type_e::unknown;
+}
+
+platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) {
+  switch(fmt) {
+    case AV_PIX_FMT_YUV420P10:
+      return platf::pix_fmt_e::yuv420p10;
+    case AV_PIX_FMT_YUV420P:
+      return platf::pix_fmt_e::yuv420p;
+    case AV_PIX_FMT_NV12:
+      return platf::pix_fmt_e::nv12;
+    default:
+      return platf::pix_fmt_e::unknown;
+  }
+
+  return platf::pix_fmt_e::unknown;
+}
 }

From 0b1a69a067cde710371bf0574733201e7ce09227 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Wed, 15 Apr 2020 21:07:00 +0200
Subject: [PATCH 12/25] Ensure it compiles on Linux again

---
 pre-compiled                |  2 +-
 sunshine/platform/common.h  |  2 --
 sunshine/platform/linux.cpp | 57 +++++++++++++++++++++++--------------
 3 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/pre-compiled b/pre-compiled
index 8ec14fd4..51f776db 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit 8ec14fd4a40d85443084b283ab24415d729984cb
+Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index e0129060..c2983388 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -139,8 +139,6 @@ int alloc_gamepad(input_t &input, int nr);
 void free_gamepad(input_t &input, int nr);
 
 [[nodiscard]] std::unique_ptr<deinit_t> init();
-
-int thread_priority();
 }
 
 #endif //SUNSHINE_COMMON_H
diff --git a/sunshine/platform/linux.cpp b/sunshine/platform/linux.cpp
index 46f043e2..1c82a892 100644
--- a/sunshine/platform/linux.cpp
+++ b/sunshine/platform/linux.cpp
@@ -145,14 +145,22 @@ struct x11_attr_t : public display_t {
     xwindow = DefaultRootWindow(xdisplay.get());
 
     refresh();
+
+    width  = xattr.width;
+    height = xattr.height;
   }
 
   void refresh() {
     XGetWindowAttributes(xdisplay.get(), xwindow, &xattr);
   }
 
-  capture_e snapshot(img_t *img_out_base, bool cursor) override {
+  capture_e snapshot(img_t *img_out_base, std::chrono::milliseconds timeout, bool cursor) override {
     refresh();
+
+    if(width != xattr.width || height != xattr.height) {
+      return capture_e::reinit;
+    }
+    
     XImage *img { XGetImage(
       xdisplay.get(),
       xwindow,
@@ -180,6 +188,11 @@ struct x11_attr_t : public display_t {
     return std::make_shared<x11_img_t>();
   }
 
+  int dummy_img(img_t *img) override {
+    snapshot(img, 0s, true);
+    return 0;
+  }
+
   xdisplay_t xdisplay;
   Window xwindow;
   XWindowAttributes xattr;
@@ -210,8 +223,8 @@ struct shm_attr_t : public x11_attr_t {
     while(!task_pool.cancel(refresh_task_id));
   }
 
-  capture_e snapshot(img_t *img, bool cursor) override {
-    if(display->width_in_pixels != xattr.width || display->height_in_pixels != xattr.height) {
+  capture_e snapshot(img_t *img, std::chrono::milliseconds timeout, bool cursor) override {
+    if(width != xattr.width || height != xattr.height) {
       return capture_e::reinit;
     }
 
@@ -219,7 +232,7 @@ struct shm_attr_t : public x11_attr_t {
       xcb.get(),
       display->root,
       0, 0,
-      display->width_in_pixels, display->height_in_pixels,
+      width, height,
       ~0,
       XCB_IMAGE_FORMAT_Z_PIXMAP,
       seg,
@@ -232,16 +245,6 @@ struct shm_attr_t : public x11_attr_t {
       return capture_e::reinit;
     }
 
-    if(img->width != display->width_in_pixels || img->height != display->height_in_pixels) {
-      delete[] img->data;
-
-      img->data = new std::uint8_t[frame_size()];
-      img->width = display->width_in_pixels;
-      img->height = display->height_in_pixels;
-      img->pixel_pitch = 4;
-      img->row_pitch = img->width * img->pixel_pitch;
-    }
-
     std::copy_n((std::uint8_t*)data.data, frame_size(), img->data);
 
     if(cursor) {
@@ -252,13 +255,18 @@ struct shm_attr_t : public x11_attr_t {
   }
 
   std::shared_ptr<img_t> alloc_img() override {
-    return std::make_shared<shm_img_t>();
+    auto img = std::make_shared<shm_img_t>();
+    img->width  = width;
+    img->height = height;
+    img->pixel_pitch = 4;
+    img->row_pitch = img->pixel_pitch * width;
+    img->data = new std::uint8_t[height * img->row_pitch];
+
+    return img;
   }
 
-  int dummy_img(platf::img_t *img, int &) override {
-    auto dummy_data_p = new int[1];
-
-    return platf::display_t::dummy_img(img, *dummy_data_p);
+  int dummy_img(platf::img_t *img) override {
+    return 0;
   }
 
   int init() {
@@ -293,11 +301,14 @@ struct shm_attr_t : public x11_attr_t {
       return -1;
     }
 
+    width  = display->width_in_pixels;
+    height = display->height_in_pixels;
+
     return 0;
   }
 
   std::uint32_t frame_size() {
-    return display->height_in_pixels * display->width_in_pixels * 4;
+    return width * height * 4;
   }
 };
 
@@ -331,7 +342,11 @@ std::shared_ptr<display_t> shm_display() {
   return shm;
 }
 
-std::shared_ptr<display_t> display(int hwdevice_type) {
+std::shared_ptr<display_t> display(platf::dev_type_e hwdevice_type) {
+  if(hwdevice_type != platf::dev_type_e::none) {
+    return nullptr;
+  }
+
   auto shm_disp = shm_display();
 
   if(!shm_disp) {

From c7d6e959e0c97fa28892e08ca4c1cf7b3b2f8d36 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 16 Apr 2020 15:35:12 +0200
Subject: [PATCH 13/25] Fix stream not closing properly when exiting app

---
 sunshine/platform/linux.cpp | 2 +-
 sunshine/round_robin.h      | 0
 sunshine/stream.cpp         | 7 +++++--
 3 files changed, 6 insertions(+), 3 deletions(-)
 mode change 100755 => 100644 sunshine/round_robin.h

diff --git a/sunshine/platform/linux.cpp b/sunshine/platform/linux.cpp
index 1c82a892..137ae0e4 100644
--- a/sunshine/platform/linux.cpp
+++ b/sunshine/platform/linux.cpp
@@ -160,7 +160,7 @@ struct x11_attr_t : public display_t {
     if(width != xattr.width || height != xattr.height) {
       return capture_e::reinit;
     }
-    
+
     XImage *img { XGetImage(
       xdisplay.get(),
       xwindow,
diff --git a/sunshine/round_robin.h b/sunshine/round_robin.h
old mode 100755
new mode 100644
diff --git a/sunshine/stream.cpp b/sunshine/stream.cpp
index e3f504e0..08899f80 100644
--- a/sunshine/stream.cpp
+++ b/sunshine/stream.cpp
@@ -494,8 +494,11 @@ void controlBroadcastThread(safe::signal_t *shutdown_event, control_server_t *se
 
       server->send(std::string_view {(char*)payload.data(), payload.size()});
 
-      shutdown_event->raise(true);
-      continue;
+      auto lg = server->_map_addr_session.lock();
+      for(auto pos = std::begin(*server->_map_addr_session); pos != std::end(*server->_map_addr_session); ++pos) {
+        auto session = pos->second.second;
+        session->shutdown_event.raise(true);
+      }
     }
 
     server->iterate(500ms);

From 87f3ab01819e389540d524f81f3986563cdf4c58 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Fri, 17 Apr 2020 12:28:23 +0200
Subject: [PATCH 14/25] Fix nvenc

---
 pre-compiled       | 2 +-
 sunshine/video.cpp | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pre-compiled b/pre-compiled
index 51f776db..8ec14fd4 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit 51f776dbd4b2ead239a966406447d12f7e942636
+Subproject commit 8ec14fd4a40d85443084b283ab24415d729984cb
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 1325bce4..6246f31d 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -1255,7 +1255,10 @@ util::Either<buffer_t, int> nv_d3d_make_hwdevice_ctx(platf::hwdevice_t *hwdevice
   auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx;
   
   std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0);
-  std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->data);
+
+  auto device = (ID3D11Device*)hwdevice_ctx->data;
+  device->AddRef();
+  ctx->device = device;
 
   auto err = av_hwdevice_ctx_init(ctx_buf.get());
   if(err) {

From 5a4055f3131e37c64c699f58d4c8f1a007064b60 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Fri, 17 Apr 2020 18:42:55 +0200
Subject: [PATCH 15/25] Pair and connect with Moonlight-iOS

---
 sunshine/nvhttp.cpp | 12 ++++++++++--
 sunshine/utility.h  | 10 +++++++++-
 sunshine/video.cpp  |  6 ++++--
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/sunshine/nvhttp.cpp b/sunshine/nvhttp.cpp
index 261ec05d..9fcd038e 100644
--- a/sunshine/nvhttp.cpp
+++ b/sunshine/nvhttp.cpp
@@ -168,7 +168,15 @@ void update_id_client(const std::string &uniqueID, std::string &&cert, op_e op)
 }
 
 void getservercert(pair_session_t &sess, pt::ptree &tree, const std::string &pin) {
-  auto salt = util::from_hex<std::array<uint8_t, 16>>(sess.async_insert_pin.salt, true);
+  if(sess.async_insert_pin.salt.size() < 32) {
+    tree.put("root.paired", 0);
+    tree.put("root.<xmlattr>.status_code", 400);
+    return;
+  }
+
+  std::string_view salt_view { sess.async_insert_pin.salt.data(), 32 };
+  
+  auto salt = util::from_hex<std::array<uint8_t, 16>>(salt_view, true);
 
   auto key = crypto::gen_aes_key(*salt, pin);
   sess.cipher_key = std::make_unique<crypto::aes_t>(key);
@@ -484,7 +492,7 @@ void serverinfo(std::shared_ptr<typename SimpleWeb::ServerBase<T>::Response> res
   auto current_appid = proc::proc.running();
   tree.put("root.PairStatus", pair_status);
   tree.put("root.currentgame", current_appid >= 0 ? current_appid + 1 : 0);
-  tree.put("root.state", "_SERVER_BUSY"); 
+  tree.put("root.state", current_appid >= 0 ? "_SERVER_BUSY" : "_SERVER_FREE");
 
   std::ostringstream data;
 
diff --git a/sunshine/utility.h b/sunshine/utility.h
index e3dec5e0..19f9a985 100644
--- a/sunshine/utility.h
+++ b/sunshine/utility.h
@@ -451,7 +451,7 @@ public:
     other._own_ptr = false;
   }
 
-  wrap_ptr &operator=(wrap_ptr &&other) {
+  wrap_ptr &operator=(wrap_ptr &&other) noexcept {
     if(_own_ptr) {
       delete _p;
     }
@@ -484,6 +484,14 @@ public:
     return *this;
   }
 
+  ~wrap_ptr() {
+    if(_own_ptr) {
+      delete _p;
+    }
+
+    _own_ptr = false;
+  }
+
   const reference operator*() const {
     return *_p;
   }
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 1325bce4..6ed2085d 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -545,7 +545,7 @@ std::optional<session_t> make_session(const encoder_t &encoder, const config_t &
   ctx->keyint_min = ctx->gop_size;
 
   if(config.numRefFrames == 0) {
-    ctx->refs = video_format[encoder_t::REF_FRAMES_AUTOSELECT] ? 0 : 1;
+    ctx->refs = video_format[encoder_t::REF_FRAMES_AUTOSELECT] ? 0 : 16;
   }
   else {
     // Some client decoders have limits on the number of reference frames
@@ -987,6 +987,7 @@ void capture_async(
 
   int frame_nr = 1;
   int key_frame_nr = 1;
+
   while(!shutdown_event->peek() && images->running()) {
     // Wait for the display to be ready
     std::shared_ptr<platf::display_t> display;
@@ -1028,7 +1029,8 @@ void capture(
   idr_event_t idr_events,
   config_t config,
   void *channel_data) {
-  
+
+  idr_events->raise(std::make_pair(0, 1));
   if(encoders.front().system_memory) {
     capture_async(shutdown_event, packets, idr_events, config, channel_data);
   }

From dd13131fe68b903f39c8e8a6e252b3e8465cbee5 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Fri, 17 Apr 2020 19:18:55 +0200
Subject: [PATCH 16/25] Fix video freezing when resizing display with 2 or more
 sessions

---
 sunshine/video.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 6ed2085d..ca42de3a 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -904,7 +904,6 @@ encode_e encode_run_sync(std::vector<std::unique_ptr<sync_session_ctx_t>> &synce
         continue;
       }
 
-      sws_t sws;
       if(pos->img_tmp) {
         if(pos->hwdevice->convert(*pos->img_tmp)) {
           BOOST_LOG(error) << "Could not convert image"sv;
@@ -989,6 +988,11 @@ void capture_async(
   int key_frame_nr = 1;
 
   while(!shutdown_event->peek() && images->running()) {
+    // Wait for the main capture event when the display is being reinitialized
+    if(ref->reinit_event.peek()) {
+      std::this_thread::sleep_for(100ms);
+      continue;
+    }
     // Wait for the display to be ready
     std::shared_ptr<platf::display_t> display;
     {

From 2f978b3159fa276f9f9341f35dbd4e419c739539 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Fri, 17 Apr 2020 21:57:27 +0200
Subject: [PATCH 17/25] update pre-compiled

---
 pre-compiled | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pre-compiled b/pre-compiled
index 8ec14fd4..afd9a9bb 160000
--- a/pre-compiled
+++ b/pre-compiled
@@ -1 +1 @@
-Subproject commit 8ec14fd4a40d85443084b283ab24415d729984cb
+Subproject commit afd9a9bbfc6ee1a064b0c1f9210bc20b2170c416

From 70bf11ec27d12984f37afc5af18bdc02abec787c Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Sun, 19 Apr 2020 00:10:47 +0300
Subject: [PATCH 18/25] Increase accuracy of fps for nvenc and proper pixel
 format

---
 sunshine/platform/common.h         |  1 +
 sunshine/platform/windows_dxgi.cpp | 17 ++++++++++++-----
 sunshine/video.cpp                 |  8 +++++---
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h
index c2983388..140b54b1 100644
--- a/sunshine/platform/common.h
+++ b/sunshine/platform/common.h
@@ -39,6 +39,7 @@ enum class pix_fmt_e {
   yuv420p,
   yuv420p10,
   nv12,
+  p010,
   unknown
 };
 
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index c34931d1..7832d127 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -326,7 +326,11 @@ public:
     ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace);
   }
 
-  int init(std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) {
+  int init(
+    std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p,
+    int in_width, int in_height, int out_width, int out_height,
+    pix_fmt_e pix_fmt
+  ) {
     HRESULT status;
 
     platf::hwdevice_t::img = &img;
@@ -377,13 +381,13 @@ public:
     t.ArraySize = 1;
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = DXGI_FORMAT_NV12;
+    t.Format = pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010;
     t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
 
     dxgi::texture2d_t::pointer tex_p {};
     status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
     if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Failed to create video output texture [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
 
@@ -823,6 +827,7 @@ public:
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
     t.Format = format;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET;
 
     dxgi::texture2d_t::pointer tex_p {};
     auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
@@ -861,6 +866,7 @@ public:
     t.SampleDesc.Count = 1;
     t.Usage = D3D11_USAGE_DEFAULT;
     t.Format = format;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET;
 
     dxgi::texture2d_t::pointer tex_p {};
     auto status = device->CreateTexture2D(&t, &data, &tex_p);
@@ -879,7 +885,7 @@ public:
   }
 
   std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, pix_fmt_e pix_fmt) override {
-    if(pix_fmt != platf::pix_fmt_e::nv12) {
+    if(pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
       BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
 
       return nullptr;
@@ -892,7 +898,8 @@ public:
       device.get(),
       device_ctx.get(),
       this->width, this->height,
-      width, height);
+      width, height,
+      pix_fmt);
 
     if(ret) {
       return nullptr;
diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index afdc4a39..c11c353d 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -213,7 +213,7 @@ struct sync_session_t {
   sync_session_ctx_t *ctx;
   
   std::chrono::steady_clock::time_point next_frame;
-  std::chrono::milliseconds delay;
+  std::chrono::nanoseconds delay;
 
   platf::img_t *img_tmp;
   std::shared_ptr<platf::hwdevice_t> hwdevice;
@@ -256,7 +256,7 @@ static encoder_t nvenc {
   { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 },
   AV_HWDEVICE_TYPE_D3D11VA,
   AV_PIX_FMT_D3D11,
-  AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
   {
     {
       { "forced-idr"s, 1 },
@@ -767,7 +767,7 @@ std::optional<sync_session_t> make_synced_session(platf::display_t *disp, const
   encode_session.ctx = &ctx;
   encode_session.next_frame = std::chrono::steady_clock::now();
 
-  encode_session.delay = 1000ms / ctx.config.framerate;
+  encode_session.delay = std::chrono::nanoseconds { 1s } / ctx.config.framerate;
 
   auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt);
   auto hwdevice = disp->make_hwdevice(ctx.config.width, ctx.config.height, pix_fmt);
@@ -1327,6 +1327,8 @@ platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) {
       return platf::pix_fmt_e::yuv420p;
     case AV_PIX_FMT_NV12:
       return platf::pix_fmt_e::nv12;
+    case AV_PIX_FMT_P010:
+      return platf::pix_fmt_e::p010;
     default:
       return platf::pix_fmt_e::unknown;
   }

From 17e9b803db3ff3125450d798b95dd87aec345eb2 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Wed, 22 Apr 2020 00:07:26 +0300
Subject: [PATCH 19/25] Display cursor type color with nvenc

---
 sunshine/platform/windows_dxgi.cpp | 172 +++++++++++++++++++++++++++--
 1 file changed, 165 insertions(+), 7 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 7832d127..4417bf2f 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -138,6 +138,12 @@ struct cursor_t {
   bool visible;
 };
 
+struct gpu_cursor_t {
+  texture2d_t texture;
+
+  LONG width, height;
+};
+
 void blend_cursor_monochrome(const cursor_t &cursor, img_t &img) {
   int height = cursor.shape_info.Height / 2;
   int width  = cursor.shape_info.Width;
@@ -290,8 +296,63 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {
   }
 }
 
+std::vector<std::uint8_t> make_cursor_image(std::vector<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info)  {
+  switch(shape_info.Type) {
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
+      return std::move(img_data);
+    default:
+      break;
+  }
+
+  shape_info.Height /= 2;
+
+  std::vector<std::uint8_t> cursor_img;
+  cursor_img.resize(shape_info.Width * shape_info.Height * 4);
+  std::fill_n((std::uint32_t*)cursor_img.data(), cursor_img.size() / sizeof(std::uint32_t), 0x99888888);
+
+  return cursor_img;
+}
+
 class hwdevice_t : public platf::hwdevice_t {
 public:
+  hwdevice_t(std::vector<hwdevice_t*> *hwdevices_p) : hwdevices_p { hwdevices_p } {}
+  hwdevice_t() = delete;
+
+  void set_cursor_pos(LONG rel_x, LONG rel_y, bool visible) {
+    LONG x = ((float)rel_x) / in_width * out_width;
+    LONG y = ((float)rel_y) / in_height * out_height;
+
+    // Ensure it's within bounds
+    auto left   = std::min<LONG>(out_width, std::max<LONG>(0, x));
+    auto top    = std::min<LONG>(out_height, std::max<LONG>(0, y));
+    auto right  = std::max<LONG>(0, std::min<LONG>(out_width, x + cursor_width));
+    auto bottom = std::max<LONG>(0, std::min<LONG>(out_height, y + cursor_height));
+
+    RECT rect { left, top, right, bottom };
+    ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect);
+
+    cursor_visible = visible;
+  }
+
+  int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) {
+    D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
+
+    video::processor_in_t::pointer processor_in_p;
+    auto status = device->CreateVideoProcessorInputView(texture, processor_e.get(), &input_desc, &processor_in_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create cursor VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    cursor_in.reset(processor_in_p);
+
+    cursor_width = ((float)width) / in_width * out_width;
+    cursor_height = ((float)height) / in_height * out_height;
+
+    return 0;
+  }
+
   int convert(platf::img_t &img_base) override {
     auto &img = (img_d3d_t&)img_base;
 
@@ -302,17 +363,19 @@ public:
       video::processor_in_t::pointer processor_in_p;
       auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p);
       if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv
-         << util::hex(status).to_string_view() << ']';
+        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
         return -1;
       }
       it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
     }
     auto &processor_in = it->second;
 
-    D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr };
+    D3D11_VIDEO_PROCESSOR_STREAM stream[] {
+      { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr },
+      { TRUE, 0, 0, 0, 0, nullptr, cursor_in.get(), nullptr }
+    };
 
-    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream);
+    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, cursor_visible ? 2 : 1, stream);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
@@ -333,8 +396,15 @@ public:
   ) {
     HRESULT status;
 
+    cursor_visible = false;
+
     platf::hwdevice_t::img = &img;
 
+    this->out_width  = out_width;
+    this->out_height = out_height;
+    this->in_width   = in_width;
+    this->in_height  = in_height;
+
     video::device_t::pointer vdevice_p;
     status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
     if(FAILED(status)) {
@@ -401,13 +471,16 @@ public:
 
     D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
     video::processor_out_t::pointer processor_out_p;
-    status = device->CreateVideoProcessorOutputView(tex_p, processor_e.get(), &output_desc, &processor_out_p);
+    status = device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
     if(FAILED(status)) {
       BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
       return -1;
     }
     processor_out.reset(processor_out_p);
 
+    // Tell video processor alpha values need to be enabled
+    ctx->VideoProcessorSetStreamAlpha(processor.get(), 1, TRUE, 1.0f);
+
     device_p->AddRef();
     data = device_p;
     return 0;
@@ -417,6 +490,11 @@ public:
     if(data) {
       ((ID3D11Device*)data)->Release();
     }
+
+    auto it = std::find(std::begin(*hwdevices_p), std::end(*hwdevices_p), this);
+    if(it != std::end(*hwdevices_p)) {
+      hwdevices_p->erase(it);
+    }
   }
 
   img_d3d_t img;
@@ -426,6 +504,16 @@ public:
   video::processor_t processor;
   video::processor_out_t processor_out;
   std::unordered_map<texture2d_t::pointer, video::processor_in_t> texture_to_processor_in;
+
+  video::processor_in_t cursor_in;
+
+  bool cursor_visible;
+  LONG cursor_width, cursor_height;
+
+  float out_width, out_height;
+  float in_width, in_height;
+
+  std::vector<hwdevice_t*> *hwdevices_p;
 };
 
 class display_base_t : public ::platf::display_t {
@@ -797,11 +885,72 @@ public:
       return capture_status;
     }
 
-    const bool update_flag = frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0;
+    const bool update_flag =
+      frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0 ||
+      frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0;
+
     if(!update_flag) {
       return capture_e::timeout;
     }
 
+    if(frame_info.PointerShapeBufferSize > 0) {
+      DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {};
+
+      std::vector<std::uint8_t> img_data;
+      img_data.resize(frame_info.PointerShapeBufferSize);
+
+      UINT dummy;
+      status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &shape_info);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
+
+        return capture_e::error;
+      }
+
+      auto cursor_img = make_cursor_image(std::move(img_data), shape_info);
+
+      D3D11_SUBRESOURCE_DATA data {
+        cursor_img.data(),
+        4 * shape_info.Width,
+        0
+      };
+
+      // Create texture for cursor
+      D3D11_TEXTURE2D_DESC t {};
+      t.Width  = shape_info.Width;
+      t.Height = cursor_img.size() / data.SysMemPitch;
+      t.MipLevels = 1;
+      t.ArraySize = 1;
+      t.SampleDesc.Count = 1;
+      t.Usage = D3D11_USAGE_DEFAULT;
+      t.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+      t.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+      dxgi::texture2d_t::pointer tex_p {};
+      auto status = device->CreateTexture2D(&t, &data, &tex_p);
+      if(FAILED(status)) {
+        BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
+        return capture_e::error;
+      }
+      texture2d_t texture { tex_p };
+
+      for(auto *hwdevice : hwdevices) {
+        if(hwdevice->set_cursor_texture(tex_p, t.Width, t.Height)) {
+          return capture_e::error;
+        }
+      }
+
+      cursor.texture = std::move(texture);
+      cursor.width   = t.Width;
+      cursor.height  = t.Height;
+    }
+
+    if(frame_info.LastMouseUpdateTime.QuadPart) {
+      for(auto *hwdevice : hwdevices) {
+        hwdevice->set_cursor_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible && cursor_visible);
+      }
+    }
+
     texture2d_t::pointer src_p {};
     status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
 
@@ -891,7 +1040,7 @@ public:
       return nullptr;
     }
 
-    auto hwdevice = std::make_shared<hwdevice_t>();
+    auto hwdevice = std::make_shared<hwdevice_t>(&hwdevices);
 
     auto ret = hwdevice->init(
       shared_from_this(),
@@ -905,8 +1054,17 @@ public:
       return nullptr;
     }
 
+    if(cursor.texture && hwdevice->set_cursor_texture(cursor.texture.get(), cursor.width, cursor.height)) {
+      return nullptr;
+    }
+
+    hwdevices.emplace_back(hwdevice.get());
+
     return hwdevice;
   }
+
+  gpu_cursor_t cursor;
+  std::vector<hwdevice_t*> hwdevices;
 };
 
 const char *format_str[] = {

From 519f7a8bf18962050f44bfe3457d0694f6113014 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Wed, 22 Apr 2020 22:55:33 +0300
Subject: [PATCH 20/25] convert pointer shape monochrome to color

---
 sunshine/platform/windows_dxgi.cpp | 75 ++++++++++++++++++++++++++----
 1 file changed, 67 insertions(+), 8 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 4417bf2f..a2966831 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -296,7 +296,7 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {
   }
 }
 
-std::vector<std::uint8_t> make_cursor_image(std::vector<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info)  {
+util::buffer_t<std::uint8_t> make_cursor_image(util::buffer_t<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info)  {
   switch(shape_info.Type) {
     case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
     case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
@@ -307,9 +307,69 @@ std::vector<std::uint8_t> make_cursor_image(std::vector<std::uint8_t> &&img_data
 
   shape_info.Height /= 2;
 
-  std::vector<std::uint8_t> cursor_img;
-  cursor_img.resize(shape_info.Width * shape_info.Height * 4);
-  std::fill_n((std::uint32_t*)cursor_img.data(), cursor_img.size() / sizeof(std::uint32_t), 0x99888888);
+  util::buffer_t<std::uint8_t> cursor_img { shape_info.Width * shape_info.Height * 4 };
+
+  auto bytes = shape_info.Pitch * shape_info.Height;
+  auto pixel_begin = (std::uint32_t*)std::begin(cursor_img);
+  auto pixel_data = pixel_begin;
+  auto and_mask = std::begin(img_data);
+  auto xor_mask = std::begin(img_data) + bytes;
+
+  for(auto x = 0; x < bytes; ++x)  {
+    for(auto c = 7; c >= 0; --c) {
+      auto bit = 1 << c;
+      auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0);
+
+      constexpr std::uint32_t black = 0xFF000000;
+      constexpr std::uint32_t white = 0xFFFFFFFF;
+      constexpr std::uint32_t transparent = 0;
+      switch(color_type) {
+        case 0: //black
+          *pixel_data = black;
+          break;
+        case 2: //white
+          *pixel_data = white;
+          break;
+        case 1: //transparent
+        {
+          *pixel_data = transparent;
+
+          break;
+        }
+        case 3: //inverse
+        {
+          auto top_p    = pixel_data - shape_info.Width;
+          auto left_p   = pixel_data - 1;
+          auto right_p  = pixel_data + 1;
+          auto bottom_p = pixel_data + shape_info.Width;
+
+          // Get the x coordinate of the pixel
+          auto column = (pixel_data - pixel_begin) % shape_info.Width != 0;
+
+          if(top_p >= pixel_begin && *top_p == transparent) {
+            *top_p = black;
+          }
+
+          if(column != 0 && left_p >= pixel_begin && *left_p == transparent) {
+            *left_p = black;
+          }
+
+          if(bottom_p < (std::uint32_t*)std::end(cursor_img)) {
+            *bottom_p = black;
+          }
+
+          if(column != shape_info.Width -1) {
+            *right_p = black;
+          }
+          *pixel_data = white;
+        }
+      }
+
+      ++pixel_data;
+    }
+    ++and_mask;
+    ++xor_mask;
+  }
 
   return cursor_img;
 }
@@ -896,11 +956,10 @@ public:
     if(frame_info.PointerShapeBufferSize > 0) {
       DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {};
 
-      std::vector<std::uint8_t> img_data;
-      img_data.resize(frame_info.PointerShapeBufferSize);
+      util::buffer_t<std::uint8_t> img_data { frame_info.PointerShapeBufferSize };
 
       UINT dummy;
-      status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &shape_info);
+      status = dup.dup->GetFramePointerShape(img_data.size(), std::begin(img_data), &dummy, &shape_info);
       if (FAILED(status)) {
         BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
 
@@ -910,7 +969,7 @@ public:
       auto cursor_img = make_cursor_image(std::move(img_data), shape_info);
 
       D3D11_SUBRESOURCE_DATA data {
-        cursor_img.data(),
+        std::begin(cursor_img),
         4 * shape_info.Width,
         0
       };

From 2e52402e2750ab85b793783e7efb02b1273c3a20 Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 23 Apr 2020 00:09:27 +0300
Subject: [PATCH 21/25] Correctly truncate cursor image

---
 sunshine/platform/windows_dxgi.cpp | 38 ++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index a2966831..39841074 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -380,17 +380,25 @@ public:
   hwdevice_t() = delete;
 
   void set_cursor_pos(LONG rel_x, LONG rel_y, bool visible) {
-    LONG x = ((float)rel_x) / in_width * out_width;
-    LONG y = ((float)rel_y) / in_height * out_height;
+    LONG x = ((double)rel_x) * out_width / (double)in_width;
+    LONG y = ((double)rel_y) * out_height / (double)in_height;
 
     // Ensure it's within bounds
-    auto left   = std::min<LONG>(out_width, std::max<LONG>(0, x));
-    auto top    = std::min<LONG>(out_height, std::max<LONG>(0, y));
-    auto right  = std::max<LONG>(0, std::min<LONG>(out_width, x + cursor_width));
-    auto bottom = std::max<LONG>(0, std::min<LONG>(out_height, y + cursor_height));
+    auto left_out   = std::min<LONG>(out_width, std::max<LONG>(0, x));
+    auto top_out    = std::min<LONG>(out_height, std::max<LONG>(0, y));
+    auto right_out  = std::max<LONG>(0, std::min<LONG>(out_width, x + cursor_scaled_width));
+    auto bottom_out = std::max<LONG>(0, std::min<LONG>(out_height, y + cursor_scaled_height));
 
-    RECT rect { left, top, right, bottom };
-    ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect);
+    auto left_in   = std::max<LONG>(0, -rel_x);
+    auto top_in    = std::max<LONG>(0, -rel_y);
+    auto right_in  = std::min<LONG>(in_width - rel_x, cursor_width);
+    auto bottom_in = std::min<LONG>(in_height - rel_y, cursor_height);
+
+    RECT rect_in { left_in, top_in, right_in, bottom_in };
+    RECT rect_out { left_out, top_out, right_out, bottom_out };
+
+    ctx->VideoProcessorSetStreamSourceRect(processor.get(), 1, TRUE, &rect_in);
+    ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect_out);
 
     cursor_visible = visible;
   }
@@ -407,8 +415,10 @@ public:
 
     cursor_in.reset(processor_in_p);
 
-    cursor_width = ((float)width) / in_width * out_width;
-    cursor_height = ((float)height) / in_height * out_height;
+    cursor_width  = width;
+    cursor_height = height;
+    cursor_scaled_width = ((double)width) / in_width * out_width;
+    cursor_scaled_height = ((double)height) / in_height * out_height;
 
     return 0;
   }
@@ -568,10 +578,12 @@ public:
   video::processor_in_t cursor_in;
 
   bool cursor_visible;
-  LONG cursor_width, cursor_height;
 
-  float out_width, out_height;
-  float in_width, in_height;
+  LONG cursor_width, cursor_height;
+  LONG cursor_scaled_width, cursor_scaled_height;
+
+  LONG in_width, in_height;
+  double out_width, out_height;
 
   std::vector<hwdevice_t*> *hwdevices_p;
 };

From fa489531b0f5fd67a745796451e71cf4cb00d4da Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 23 Apr 2020 00:23:40 +0300
Subject: [PATCH 22/25] Don't access video device ctx merely for setting cursor
 invisible

---
 sunshine/platform/windows_dxgi.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
index 39841074..cf414872 100644
--- a/sunshine/platform/windows_dxgi.cpp
+++ b/sunshine/platform/windows_dxgi.cpp
@@ -380,6 +380,12 @@ public:
   hwdevice_t() = delete;
 
   void set_cursor_pos(LONG rel_x, LONG rel_y, bool visible) {
+    cursor_visible = visible;
+
+    if(!visible) {
+      return;
+    }
+
     LONG x = ((double)rel_x) * out_width / (double)in_width;
     LONG y = ((double)rel_y) * out_height / (double)in_height;
 
@@ -399,8 +405,6 @@ public:
 
     ctx->VideoProcessorSetStreamSourceRect(processor.get(), 1, TRUE, &rect_in);
     ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect_out);
-
-    cursor_visible = visible;
   }
 
   int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) {

From 4bccec1c392e0f22fdc80279429b93e2803bc2dc Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 23 Apr 2020 15:41:40 +0200
Subject: [PATCH 23/25] Refactor platorm Windows

---
 CMakeLists.txt                                |    9 +-
 .../{windows_wasapi.cpp => windows/audio.cpp} |    2 +-
 sunshine/platform/windows/display.h           |  116 ++
 sunshine/platform/windows/display_base.cpp    |  424 ++++++
 sunshine/platform/windows/display_ram.cpp     |  301 ++++
 sunshine/platform/windows/display_vram.cpp    |  503 +++++++
 .../{windows.cpp => windows/input.cpp}        |    2 +-
 sunshine/platform/windows_dxgi.cpp            | 1291 -----------------
 8 files changed, 1352 insertions(+), 1296 deletions(-)
 rename sunshine/platform/{windows_wasapi.cpp => windows/audio.cpp} (99%)
 create mode 100644 sunshine/platform/windows/display.h
 create mode 100644 sunshine/platform/windows/display_base.cpp
 create mode 100644 sunshine/platform/windows/display_ram.cpp
 create mode 100644 sunshine/platform/windows/display_vram.cpp
 rename sunshine/platform/{windows.cpp => windows/input.cpp} (99%)
 delete mode 100644 sunshine/platform/windows_dxgi.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 788c9438..ddc2dad2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,9 +57,12 @@ if(WIN32)
 	include_directories(
 		ViGEmClient/include)
 	set(PLATFORM_TARGET_FILES
-		sunshine/platform/windows.cpp
-		sunshine/platform/windows_dxgi.cpp
-		sunshine/platform/windows_wasapi.cpp
+		sunshine/platform/windows/input.cpp
+		sunshine/platform/windows/display.h
+		sunshine/platform/windows/display_base.cpp
+		sunshine/platform/windows/display_vram.cpp
+		sunshine/platform/windows/display_ram.cpp
+		sunshine/platform/windows/audio.cpp
 		ViGEmClient/src/ViGEmClient.cpp
 		ViGEmClient/include/ViGEm/Client.h
 		ViGEmClient/include/ViGEm/Common.h
diff --git a/sunshine/platform/windows_wasapi.cpp b/sunshine/platform/windows/audio.cpp
similarity index 99%
rename from sunshine/platform/windows_wasapi.cpp
rename to sunshine/platform/windows/audio.cpp
index 97412106..8d824b70 100644
--- a/sunshine/platform/windows_wasapi.cpp
+++ b/sunshine/platform/windows/audio.cpp
@@ -12,7 +12,7 @@
 
 #include "sunshine/config.h"
 #include "sunshine/main.h"
-#include "common.h"
+#include "sunshine/platform/common.h"
 
 const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
 const IID IID_IMMDeviceEnumerator    = __uuidof(IMMDeviceEnumerator);
diff --git a/sunshine/platform/windows/display.h b/sunshine/platform/windows/display.h
new file mode 100644
index 00000000..662115e8
--- /dev/null
+++ b/sunshine/platform/windows/display.h
@@ -0,0 +1,116 @@
+//
+// Created by loki on 4/23/20.
+//
+
+#ifndef SUNSHINE_DISPLAY_H
+#define SUNSHINE_DISPLAY_H
+
+#include <dxgi.h>
+#include <d3d11.h>
+#include <d3d11_4.h>
+#include <d3dcommon.h>
+#include <dxgi1_2.h>
+
+#include "sunshine/utility.h"
+#include "sunshine/platform/common.h"
+
+namespace platf::dxgi {
+extern const char *format_str[];
+
+template<class T>
+void Release(T *dxgi) {
+  dxgi->Release();
+}
+
+using factory1_t    = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory1>>;
+using dxgi_t        = util::safe_ptr<IDXGIDevice, Release<IDXGIDevice>>;
+using dxgi1_t       = util::safe_ptr<IDXGIDevice1, Release<IDXGIDevice1>>;
+using device_t      = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
+using device_ctx_t  = util::safe_ptr<ID3D11DeviceContext, Release<ID3D11DeviceContext>>;
+using adapter_t     = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
+using output_t      = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
+using output1_t     = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
+using dup_t         = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
+using texture2d_t   = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
+using resource_t    = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
+using multithread_t = util::safe_ptr<ID3D11Multithread, Release<ID3D11Multithread>>;
+
+namespace video {
+using device_t         = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
+using ctx_t            = util::safe_ptr<ID3D11VideoContext, Release<ID3D11VideoContext>>;
+using processor_t      = util::safe_ptr<ID3D11VideoProcessor, Release<ID3D11VideoProcessor>>;
+using processor_out_t  = util::safe_ptr<ID3D11VideoProcessorOutputView, Release<ID3D11VideoProcessorOutputView>>;
+using processor_in_t   = util::safe_ptr<ID3D11VideoProcessorInputView, Release<ID3D11VideoProcessorInputView>>;
+using processor_enum_t = util::safe_ptr<ID3D11VideoProcessorEnumerator, Release<ID3D11VideoProcessorEnumerator>>;
+}
+
+class hwdevice_t;
+struct cursor_t {
+  std::vector<std::uint8_t> img_data;
+
+  DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info;
+  int x, y;
+  bool visible;
+};
+
+struct gpu_cursor_t {
+  texture2d_t texture;
+
+  LONG width, height;
+};
+
+class duplication_t {
+public:
+  dup_t dup;
+  bool has_frame {};
+
+  capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p);
+  capture_e reset(dup_t::pointer dup_p = dup_t::pointer());
+  capture_e release_frame();
+
+  ~duplication_t();
+};
+
+class display_base_t : public display_t {
+public:
+  int init();
+
+  factory1_t factory;
+  adapter_t adapter;
+  output_t output;
+  device_t device;
+  device_ctx_t device_ctx;
+  duplication_t dup;
+
+  DXGI_FORMAT format;
+  D3D_FEATURE_LEVEL feature_level;
+};
+
+class display_ram_t : public display_base_t {
+public:
+  capture_e snapshot(img_t *img, std::chrono::milliseconds timeout, bool cursor_visible) override;
+  std::shared_ptr<img_t> alloc_img() override;
+  int dummy_img(img_t *img) override;
+
+  int init();
+
+  cursor_t cursor;
+  D3D11_MAPPED_SUBRESOURCE img_info;
+  texture2d_t texture;
+};
+
+class display_vram_t : public display_base_t, public std::enable_shared_from_this<display_vram_t> {
+public:
+  capture_e snapshot(img_t *img, std::chrono::milliseconds timeout, bool cursor_visible) override;
+
+  std::shared_ptr<img_t> alloc_img() override;
+  int dummy_img(img_t *img_base) override;
+
+  std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, pix_fmt_e pix_fmt) override;
+
+  gpu_cursor_t cursor;
+  std::vector<hwdevice_t*> hwdevices;
+};
+}
+
+#endif
\ No newline at end of file
diff --git a/sunshine/platform/windows/display_base.cpp b/sunshine/platform/windows/display_base.cpp
new file mode 100644
index 00000000..4b758007
--- /dev/null
+++ b/sunshine/platform/windows/display_base.cpp
@@ -0,0 +1,424 @@
+//
+// Created by loki on 1/12/20.
+//
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+}
+
+#include <codecvt>
+
+#include "sunshine/config.h"
+#include "sunshine/main.h"
+#include "sunshine/platform/common.h"
+
+#include "display.h"
+
+namespace platf {
+using namespace std::literals;
+}
+namespace platf::dxgi {
+capture_e duplication_t::next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p) {
+  auto capture_status = release_frame();
+  if(capture_status != capture_e::ok) {
+    return capture_status;
+  }
+
+  auto status = dup->AcquireNextFrame(timeout.count(), &frame_info, res_p);
+
+  switch(status) {
+    case S_OK:
+      has_frame = true;
+      return capture_e::ok;
+    case DXGI_ERROR_WAIT_TIMEOUT:
+      return capture_e::timeout;
+    case WAIT_ABANDONED:
+    case DXGI_ERROR_ACCESS_LOST:
+    case DXGI_ERROR_ACCESS_DENIED:
+      return capture_e::reinit;
+    default:
+      BOOST_LOG(error) << "Couldn't acquire next frame [0x"sv << util::hex(status).to_string_view();
+      return capture_e::error;
+  }
+}
+
+capture_e duplication_t::reset(dup_t::pointer dup_p) {
+  auto capture_status = release_frame();
+
+  dup.reset(dup_p);
+
+  return capture_status;
+}
+
+capture_e duplication_t::release_frame() {
+  if(!has_frame) {
+    return capture_e::ok;
+  }
+
+  auto status = dup->ReleaseFrame();
+  switch (status) {
+    case S_OK:
+      has_frame = false;
+      return capture_e::ok;
+    case DXGI_ERROR_WAIT_TIMEOUT:
+      return capture_e::timeout;
+    case WAIT_ABANDONED:
+    case DXGI_ERROR_ACCESS_LOST:
+    case DXGI_ERROR_ACCESS_DENIED:
+      has_frame = false;
+      return capture_e::reinit;
+    default:
+      BOOST_LOG(error) << "Couldn't release frame [0x"sv << util::hex(status).to_string_view();
+      return capture_e::error;
+  }
+}
+
+duplication_t::~duplication_t() {
+  release_frame();
+}
+
+int display_base_t::init() {
+/* Uncomment when use of IDXGIOutput5 is implemented
+  std::call_once(windows_cpp_once_flag, []() {
+    DECLARE_HANDLE(DPI_AWARENESS_CONTEXT);
+    const auto DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2 = ((DPI_AWARENESS_CONTEXT)-4);
+
+    typedef BOOL (*User32_SetProcessDpiAwarenessContext)(DPI_AWARENESS_CONTEXT value);
+
+    auto user32 = LoadLibraryA("user32.dll");
+    auto f = (User32_SetProcessDpiAwarenessContext)GetProcAddress(user32, "SetProcessDpiAwarenessContext");
+    if(f) {
+      f(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
+    }
+
+    FreeLibrary(user32);
+  });
+*/
+  dxgi::factory1_t::pointer   factory_p {};
+  dxgi::adapter_t::pointer    adapter_p {};
+  dxgi::output_t::pointer     output_p {};
+  dxgi::device_t::pointer     device_p {};
+  dxgi::device_ctx_t::pointer device_ctx_p {};
+
+  HRESULT status;
+
+  status = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory_p);
+  factory.reset(factory_p);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']';
+    return -1;
+  }
+
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
+
+  auto adapter_name = converter.from_bytes(config::video.adapter_name);
+  auto output_name = converter.from_bytes(config::video.output_name);
+
+  for(int x = 0; factory_p->EnumAdapters1(x, &adapter_p) != DXGI_ERROR_NOT_FOUND; ++x) {
+    dxgi::adapter_t adapter_tmp { adapter_p };
+
+    DXGI_ADAPTER_DESC1 adapter_desc;
+    adapter_tmp->GetDesc1(&adapter_desc);
+
+    if(!adapter_name.empty() && adapter_desc.Description != adapter_name) {
+      continue;
+    }
+
+    for(int y = 0; adapter_tmp->EnumOutputs(y, &output_p) != DXGI_ERROR_NOT_FOUND; ++y) {
+      dxgi::output_t output_tmp {output_p };
+
+      DXGI_OUTPUT_DESC desc;
+      output_tmp->GetDesc(&desc);
+
+      if(!output_name.empty() && desc.DeviceName != output_name) {
+        continue;
+      }
+
+      if(desc.AttachedToDesktop) {
+        output = std::move(output_tmp);
+
+        width  = desc.DesktopCoordinates.right - desc.DesktopCoordinates.left;
+        height = desc.DesktopCoordinates.bottom - desc.DesktopCoordinates.top;
+      }
+    }
+
+    if(output) {
+      adapter = std::move(adapter_tmp);
+      break;
+    }
+  }
+
+  if(!output) {
+    BOOST_LOG(error) << "Failed to locate an output device"sv;
+    return -1;
+  }
+
+  D3D_FEATURE_LEVEL featureLevels[] {
+    D3D_FEATURE_LEVEL_12_1,
+    D3D_FEATURE_LEVEL_12_0,
+    D3D_FEATURE_LEVEL_11_1,
+    D3D_FEATURE_LEVEL_11_0,
+    D3D_FEATURE_LEVEL_10_1,
+    D3D_FEATURE_LEVEL_10_0,
+    D3D_FEATURE_LEVEL_9_3,
+    D3D_FEATURE_LEVEL_9_2,
+    D3D_FEATURE_LEVEL_9_1
+  };
+
+  status = adapter->QueryInterface(IID_IDXGIAdapter, (void**)&adapter_p);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to query IDXGIAdapter interface"sv;
+
+    return -1;
+  }
+
+  status = D3D11CreateDevice(
+    adapter_p,
+    D3D_DRIVER_TYPE_UNKNOWN,
+    nullptr,
+    D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
+    featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
+    D3D11_SDK_VERSION,
+    &device_p,
+    &feature_level,
+    &device_ctx_p);
+
+  adapter_p->Release();
+
+  device.reset(device_p);
+  device_ctx.reset(device_ctx_p);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to create D3D11 device [0x"sv << util::hex(status).to_string_view() << ']';
+
+    return -1;
+  }
+
+  DXGI_ADAPTER_DESC adapter_desc;
+  adapter->GetDesc(&adapter_desc);
+
+  auto description = converter.to_bytes(adapter_desc.Description);
+  BOOST_LOG(info)
+    << std::endl
+    << "Device Description : " << description << std::endl
+    << "Device Vendor ID   : 0x"sv << util::hex(adapter_desc.VendorId).to_string_view() << std::endl
+    << "Device Device ID   : 0x"sv << util::hex(adapter_desc.DeviceId).to_string_view() << std::endl
+    << "Device Video Mem   : "sv << adapter_desc.DedicatedVideoMemory / 1048576 << " MiB"sv << std::endl
+    << "Device Sys Mem     : "sv << adapter_desc.DedicatedSystemMemory / 1048576 << " MiB"sv << std::endl
+    << "Share Sys Mem      : "sv << adapter_desc.SharedSystemMemory / 1048576 << " MiB"sv << std::endl
+    << "Feature Level      : 0x"sv << util::hex(feature_level).to_string_view() << std::endl
+    << "Capture size       : "sv << width << 'x'  << height;
+
+  // Bump up thread priority
+  {
+    dxgi::dxgi_t::pointer dxgi_p {};
+    status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p);
+    dxgi::dxgi_t dxgi { dxgi_p };
+
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    dxgi->SetGPUThreadPriority(7);
+  }
+
+  // Try to reduce latency
+  {
+    dxgi::dxgi1_t::pointer dxgi_p {};
+    status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p);
+    dxgi::dxgi1_t dxgi { dxgi_p };
+
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    dxgi->SetMaximumFrameLatency(1);
+  }
+
+  //FIXME: Duplicate output on RX580 in combination with DOOM (2016) --> BSOD
+  //TODO: Use IDXGIOutput5 for improved performance
+  {
+    dxgi::output1_t::pointer output1_p {};
+    status = output->QueryInterface(IID_IDXGIOutput1, (void**)&output1_p);
+    dxgi::output1_t output1 {output1_p };
+
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query IDXGIOutput1 from the output"sv;
+      return -1;
+    }
+
+    // We try this twice, in case we still get an error on reinitialization
+    for(int x = 0; x < 2; ++x) {
+      dxgi::dup_t::pointer dup_p {};
+      status = output1->DuplicateOutput((IUnknown*)device.get(), &dup_p);
+      if(SUCCEEDED(status)) {
+        dup.reset(dup_p);
+        break;
+      }
+      std::this_thread::sleep_for(200ms);
+    }
+
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "DuplicateOutput Failed [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+  }
+
+  DXGI_OUTDUPL_DESC dup_desc;
+  dup.dup->GetDesc(&dup_desc);
+
+  format = dup_desc.ModeDesc.Format;
+
+  BOOST_LOG(debug) << "Source format ["sv << format_str[dup_desc.ModeDesc.Format] << ']';
+
+  return 0;
+}
+
+const char *format_str[] = {
+  "DXGI_FORMAT_UNKNOWN",
+  "DXGI_FORMAT_R32G32B32A32_TYPELESS",
+  "DXGI_FORMAT_R32G32B32A32_FLOAT",
+  "DXGI_FORMAT_R32G32B32A32_UINT",
+  "DXGI_FORMAT_R32G32B32A32_SINT",
+  "DXGI_FORMAT_R32G32B32_TYPELESS",
+  "DXGI_FORMAT_R32G32B32_FLOAT",
+  "DXGI_FORMAT_R32G32B32_UINT",
+  "DXGI_FORMAT_R32G32B32_SINT",
+  "DXGI_FORMAT_R16G16B16A16_TYPELESS",
+  "DXGI_FORMAT_R16G16B16A16_FLOAT",
+  "DXGI_FORMAT_R16G16B16A16_UNORM",
+  "DXGI_FORMAT_R16G16B16A16_UINT",
+  "DXGI_FORMAT_R16G16B16A16_SNORM",
+  "DXGI_FORMAT_R16G16B16A16_SINT",
+  "DXGI_FORMAT_R32G32_TYPELESS",
+  "DXGI_FORMAT_R32G32_FLOAT",
+  "DXGI_FORMAT_R32G32_UINT",
+  "DXGI_FORMAT_R32G32_SINT",
+  "DXGI_FORMAT_R32G8X24_TYPELESS",
+  "DXGI_FORMAT_D32_FLOAT_S8X24_UINT",
+  "DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS",
+  "DXGI_FORMAT_X32_TYPELESS_G8X24_UINT",
+  "DXGI_FORMAT_R10G10B10A2_TYPELESS",
+  "DXGI_FORMAT_R10G10B10A2_UNORM",
+  "DXGI_FORMAT_R10G10B10A2_UINT",
+  "DXGI_FORMAT_R11G11B10_FLOAT",
+  "DXGI_FORMAT_R8G8B8A8_TYPELESS",
+  "DXGI_FORMAT_R8G8B8A8_UNORM",
+  "DXGI_FORMAT_R8G8B8A8_UNORM_SRGB",
+  "DXGI_FORMAT_R8G8B8A8_UINT",
+  "DXGI_FORMAT_R8G8B8A8_SNORM",
+  "DXGI_FORMAT_R8G8B8A8_SINT",
+  "DXGI_FORMAT_R16G16_TYPELESS",
+  "DXGI_FORMAT_R16G16_FLOAT",
+  "DXGI_FORMAT_R16G16_UNORM",
+  "DXGI_FORMAT_R16G16_UINT",
+  "DXGI_FORMAT_R16G16_SNORM",
+  "DXGI_FORMAT_R16G16_SINT",
+  "DXGI_FORMAT_R32_TYPELESS",
+  "DXGI_FORMAT_D32_FLOAT",
+  "DXGI_FORMAT_R32_FLOAT",
+  "DXGI_FORMAT_R32_UINT",
+  "DXGI_FORMAT_R32_SINT",
+  "DXGI_FORMAT_R24G8_TYPELESS",
+  "DXGI_FORMAT_D24_UNORM_S8_UINT",
+  "DXGI_FORMAT_R24_UNORM_X8_TYPELESS",
+  "DXGI_FORMAT_X24_TYPELESS_G8_UINT",
+  "DXGI_FORMAT_R8G8_TYPELESS",
+  "DXGI_FORMAT_R8G8_UNORM",
+  "DXGI_FORMAT_R8G8_UINT",
+  "DXGI_FORMAT_R8G8_SNORM",
+  "DXGI_FORMAT_R8G8_SINT",
+  "DXGI_FORMAT_R16_TYPELESS",
+  "DXGI_FORMAT_R16_FLOAT",
+  "DXGI_FORMAT_D16_UNORM",
+  "DXGI_FORMAT_R16_UNORM",
+  "DXGI_FORMAT_R16_UINT",
+  "DXGI_FORMAT_R16_SNORM",
+  "DXGI_FORMAT_R16_SINT",
+  "DXGI_FORMAT_R8_TYPELESS",
+  "DXGI_FORMAT_R8_UNORM",
+  "DXGI_FORMAT_R8_UINT",
+  "DXGI_FORMAT_R8_SNORM",
+  "DXGI_FORMAT_R8_SINT",
+  "DXGI_FORMAT_A8_UNORM",
+  "DXGI_FORMAT_R1_UNORM",
+  "DXGI_FORMAT_R9G9B9E5_SHAREDEXP",
+  "DXGI_FORMAT_R8G8_B8G8_UNORM",
+  "DXGI_FORMAT_G8R8_G8B8_UNORM",
+  "DXGI_FORMAT_BC1_TYPELESS",
+  "DXGI_FORMAT_BC1_UNORM",
+  "DXGI_FORMAT_BC1_UNORM_SRGB",
+  "DXGI_FORMAT_BC2_TYPELESS",
+  "DXGI_FORMAT_BC2_UNORM",
+  "DXGI_FORMAT_BC2_UNORM_SRGB",
+  "DXGI_FORMAT_BC3_TYPELESS",
+  "DXGI_FORMAT_BC3_UNORM",
+  "DXGI_FORMAT_BC3_UNORM_SRGB",
+  "DXGI_FORMAT_BC4_TYPELESS",
+  "DXGI_FORMAT_BC4_UNORM",
+  "DXGI_FORMAT_BC4_SNORM",
+  "DXGI_FORMAT_BC5_TYPELESS",
+  "DXGI_FORMAT_BC5_UNORM",
+  "DXGI_FORMAT_BC5_SNORM",
+  "DXGI_FORMAT_B5G6R5_UNORM",
+  "DXGI_FORMAT_B5G5R5A1_UNORM",
+  "DXGI_FORMAT_B8G8R8A8_UNORM",
+  "DXGI_FORMAT_B8G8R8X8_UNORM",
+  "DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM",
+  "DXGI_FORMAT_B8G8R8A8_TYPELESS",
+  "DXGI_FORMAT_B8G8R8A8_UNORM_SRGB",
+  "DXGI_FORMAT_B8G8R8X8_TYPELESS",
+  "DXGI_FORMAT_B8G8R8X8_UNORM_SRGB",
+  "DXGI_FORMAT_BC6H_TYPELESS",
+  "DXGI_FORMAT_BC6H_UF16",
+  "DXGI_FORMAT_BC6H_SF16",
+  "DXGI_FORMAT_BC7_TYPELESS",
+  "DXGI_FORMAT_BC7_UNORM",
+  "DXGI_FORMAT_BC7_UNORM_SRGB",
+  "DXGI_FORMAT_AYUV",
+  "DXGI_FORMAT_Y410",
+  "DXGI_FORMAT_Y416",
+  "DXGI_FORMAT_NV12",
+  "DXGI_FORMAT_P010",
+  "DXGI_FORMAT_P016",
+  "DXGI_FORMAT_420_OPAQUE",
+  "DXGI_FORMAT_YUY2",
+  "DXGI_FORMAT_Y210",
+  "DXGI_FORMAT_Y216",
+  "DXGI_FORMAT_NV11",
+  "DXGI_FORMAT_AI44",
+  "DXGI_FORMAT_IA44",
+  "DXGI_FORMAT_P8",
+  "DXGI_FORMAT_A8P8",
+  "DXGI_FORMAT_B4G4R4A4_UNORM",
+
+  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+  "DXGI_FORMAT_P208",
+  "DXGI_FORMAT_V208",
+  "DXGI_FORMAT_V408"
+};
+
+}
+
+namespace platf {
+std::shared_ptr<display_t> display(dev_type_e hwdevice_type) {
+  if(hwdevice_type == dev_type_e::dxgi) {
+    auto disp = std::make_shared<dxgi::display_vram_t>();
+
+    if(!disp->init()) {
+      return disp;
+    }
+  }
+  else if(hwdevice_type == dev_type_e::none) {
+    auto disp = std::make_shared<dxgi::display_ram_t>();
+
+    if(!disp->init()) {
+      return disp;
+    }
+  }
+
+  return nullptr;
+}
+}
diff --git a/sunshine/platform/windows/display_ram.cpp b/sunshine/platform/windows/display_ram.cpp
new file mode 100644
index 00000000..2f26eca5
--- /dev/null
+++ b/sunshine/platform/windows/display_ram.cpp
@@ -0,0 +1,301 @@
+#include "sunshine/main.h"
+#include "display.h"
+
+namespace platf {
+using namespace std::literals;
+}
+
+namespace platf::dxgi {
+struct img_t : public ::platf::img_t  {
+  ~img_t() override {
+    delete[] data;
+    data = nullptr;
+  }
+};
+
+void blend_cursor_monochrome(const cursor_t &cursor, img_t &img) {
+  int height = cursor.shape_info.Height / 2;
+  int width  = cursor.shape_info.Width;
+  int pitch  = cursor.shape_info.Pitch;
+
+  // img cursor.{x,y} < 0, skip parts of the cursor.img_data
+  auto cursor_skip_y = -std::min(0, cursor.y);
+  auto cursor_skip_x = -std::min(0, cursor.x);
+
+  // img cursor.{x,y} > img.{x,y}, truncate parts of the cursor.img_data 
+  auto cursor_truncate_y = std::max(0, cursor.y - img.height);
+  auto cursor_truncate_x = std::max(0, cursor.x - img.width);
+
+  auto cursor_width = width - cursor_skip_x - cursor_truncate_x;
+  auto cursor_height = height - cursor_skip_y - cursor_truncate_y;
+
+  if(cursor_height > height || cursor_width > width) {
+    return;
+  }
+
+  auto img_skip_y    = std::max(0, cursor.y);
+  auto img_skip_x    = std::max(0, cursor.x);
+
+  auto cursor_img_data = cursor.img_data.data() + cursor_skip_y * pitch;
+
+  int delta_height = std::min(cursor_height - cursor_truncate_y, std::max(0, img.height - img_skip_y));
+  int delta_width = std::min(cursor_width - cursor_truncate_x, std::max(0, img.width - img_skip_x));
+
+  auto pixels_per_byte = width / pitch;
+  auto bytes_per_row = delta_width / pixels_per_byte;
+
+  auto img_data = (int*)img.data;
+  for(int i = 0; i < delta_height; ++i) {
+    auto and_mask = &cursor_img_data[i * pitch];
+    auto xor_mask = &cursor_img_data[(i + height) * pitch];
+
+    auto img_pixel_p = &img_data[(i + img_skip_y) * (img.row_pitch / img.pixel_pitch) + img_skip_x];
+
+    auto skip_x = cursor_skip_x;
+    for(int x = 0; x < bytes_per_row; ++x) {
+      for(auto bit = 0u; bit < 8; ++bit) {
+        if(skip_x > 0) {
+          --skip_x;
+
+          continue;
+        }
+
+        int and_ = *and_mask & (1 << (7 - bit)) ? -1 : 0;
+        int xor_ = *xor_mask & (1 << (7 - bit)) ? -1 : 0;
+
+        *img_pixel_p &= and_;
+        *img_pixel_p ^= xor_;
+
+        ++img_pixel_p;
+      }
+
+      ++and_mask;
+      ++xor_mask;
+    }
+  }
+}
+
+void apply_color_alpha(int *img_pixel_p, int cursor_pixel) {
+  auto colors_out = (std::uint8_t*)&cursor_pixel;
+  auto colors_in  = (std::uint8_t*)img_pixel_p;
+
+  //TODO: When use of IDXGIOutput5 is implemented, support different color formats
+  auto alpha = colors_out[3];
+  if(alpha == 255) {
+    *img_pixel_p = cursor_pixel;
+  }
+  else {
+    colors_in[0] = colors_out[0] + (colors_in[0] * (255 - alpha) + 255/2) / 255;
+    colors_in[1] = colors_out[1] + (colors_in[1] * (255 - alpha) + 255/2) / 255;
+    colors_in[2] = colors_out[2] + (colors_in[2] * (255 - alpha) + 255/2) / 255;
+  }
+}
+
+void apply_color_masked(int *img_pixel_p, int cursor_pixel) {
+  //TODO: When use of IDXGIOutput5 is implemented, support different color formats
+  auto alpha = ((std::uint8_t*)&cursor_pixel)[3];
+  if(alpha == 0xFF) {
+    *img_pixel_p ^= cursor_pixel;
+  }
+  else {
+    *img_pixel_p = cursor_pixel;
+  }
+}
+
+void blend_cursor_color(const cursor_t &cursor, img_t &img, const bool masked) {
+  int height = cursor.shape_info.Height;
+  int width  = cursor.shape_info.Width;
+  int pitch  = cursor.shape_info.Pitch;
+
+  // img cursor.y < 0, skip parts of the cursor.img_data
+  auto cursor_skip_y = -std::min(0, cursor.y);
+  auto cursor_skip_x = -std::min(0, cursor.x);
+
+  // img cursor.{x,y} > img.{x,y}, truncate parts of the cursor.img_data 
+  auto cursor_truncate_y = std::max(0, cursor.y - img.height);
+  auto cursor_truncate_x = std::max(0, cursor.x - img.width);
+
+  auto img_skip_y    = std::max(0, cursor.y);
+  auto img_skip_x    = std::max(0, cursor.x);
+
+  auto cursor_width = width - cursor_skip_x - cursor_truncate_x;
+  auto cursor_height = height - cursor_skip_y - cursor_truncate_y;
+
+  if(cursor_height > height || cursor_width > width) {
+    return;
+  }
+
+  auto cursor_img_data = (int*)&cursor.img_data[cursor_skip_y * pitch];
+
+  int delta_height = std::min(cursor_height - cursor_truncate_y, std::max(0, img.height - img_skip_y));
+  int delta_width = std::min(cursor_width - cursor_truncate_x, std::max(0, img.width - img_skip_x));
+
+  auto img_data = (int*)img.data;
+
+  for(int i = 0; i < delta_height; ++i) {
+    auto cursor_begin = &cursor_img_data[i * cursor.shape_info.Width + cursor_skip_x];
+    auto cursor_end = &cursor_begin[delta_width];
+
+    auto img_pixel_p = &img_data[(i + img_skip_y) * (img.row_pitch / img.pixel_pitch) + img_skip_x];
+    std::for_each(cursor_begin, cursor_end, [&](int cursor_pixel) {
+      if(masked) {
+        apply_color_masked(img_pixel_p, cursor_pixel);
+      }
+      else {
+        apply_color_alpha(img_pixel_p, cursor_pixel);
+      }
+      ++img_pixel_p;
+    });
+  }
+}
+
+void blend_cursor(const cursor_t &cursor, img_t &img) {
+  switch(cursor.shape_info.Type) {
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
+      blend_cursor_color(cursor, img, false);
+      break;
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME:
+      blend_cursor_monochrome(cursor, img);
+      break;
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
+      blend_cursor_color(cursor, img, true);
+      break;
+    default:
+      BOOST_LOG(warning) << "Unsupported cursor format ["sv << cursor.shape_info.Type << ']';
+  }
+}
+
+capture_e display_ram_t::snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) {
+  auto img = (img_t*)img_base;
+
+  HRESULT status;
+
+  DXGI_OUTDUPL_FRAME_INFO frame_info;
+
+  resource_t::pointer res_p {};
+  auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
+  resource_t res{res_p};
+
+  if (capture_status != capture_e::ok) {
+    return capture_status;
+  }
+
+  if(frame_info.PointerShapeBufferSize > 0) {
+    auto &img_data = cursor.img_data;
+
+    img_data.resize(frame_info.PointerShapeBufferSize);
+
+    UINT dummy;
+    status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &cursor.shape_info);
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return capture_e::error;
+    }
+  }
+
+  if(frame_info.LastMouseUpdateTime.QuadPart) {
+    cursor.x = frame_info.PointerPosition.Position.x;
+    cursor.y = frame_info.PointerPosition.Position.y;
+    cursor.visible = frame_info.PointerPosition.Visible;
+  }
+
+  // If frame has been updated
+  if (frame_info.LastPresentTime.QuadPart != 0) {
+    {
+      texture2d_t::pointer src_p {};
+      status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
+      texture2d_t src{src_p};
+
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
+        return capture_e::error;
+      }
+
+      //Copy from GPU to CPU
+      device_ctx->CopyResource(texture.get(), src.get());
+    }
+
+    if(img_info.pData) {
+      device_ctx->Unmap(texture.get(), 0);
+      img_info.pData = nullptr;
+    }
+
+    status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return capture_e::error;
+    }
+  }
+
+  const bool mouse_update = 
+    (frame_info.LastMouseUpdateTime.QuadPart || frame_info.PointerShapeBufferSize > 0) &&
+    (cursor_visible && cursor.visible);
+
+  const bool update_flag = frame_info.LastPresentTime.QuadPart != 0 || mouse_update;
+
+  if(!update_flag) {
+    return capture_e::timeout;
+  }
+
+  std::copy_n((std::uint8_t*)img_info.pData, height * img_info.RowPitch, (std::uint8_t*)img->data);
+
+  if(cursor_visible && cursor.visible) {
+    blend_cursor(cursor, *img);
+  }
+
+  return capture_e::ok;
+}
+
+std::shared_ptr<platf::img_t> display_ram_t::alloc_img() {
+  auto img = std::make_shared<img_t>();
+
+  img->pixel_pitch  = 4;
+  img->row_pitch    = img->pixel_pitch * width;
+  img->width        = width;
+  img->height       = height;
+  img->data         = new std::uint8_t[img->row_pitch * height];
+
+  return img;
+}
+
+int display_ram_t::dummy_img(platf::img_t *img) {
+  return 0;
+}
+
+int display_ram_t::init() {
+  if(display_base_t::init()) {
+    return -1;
+  }
+
+  D3D11_TEXTURE2D_DESC t {};
+  t.Width  = width;
+  t.Height = height;
+  t.MipLevels = 1;
+  t.ArraySize = 1;
+  t.SampleDesc.Count = 1;
+  t.Usage = D3D11_USAGE_STAGING;
+  t.Format = format;
+  t.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+
+  dxgi::texture2d_t::pointer tex_p {};
+  auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
+
+  texture.reset(tex_p);
+
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
+    return -1;
+  }
+
+  // map the texture simply to get the pitch and stride
+  status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
+    return -1;
+  }
+
+  return 0;
+}
+}
\ No newline at end of file
diff --git a/sunshine/platform/windows/display_vram.cpp b/sunshine/platform/windows/display_vram.cpp
new file mode 100644
index 00000000..d73e6584
--- /dev/null
+++ b/sunshine/platform/windows/display_vram.cpp
@@ -0,0 +1,503 @@
+#include "sunshine/main.h"
+#include "display.h"
+
+namespace platf {
+using namespace std::literals;
+}
+
+namespace platf::dxgi {
+struct img_d3d_t : public platf::img_t {
+  std::shared_ptr<platf::display_t> display;
+  texture2d_t texture;
+
+  ~img_d3d_t() override = default;
+};
+
+util::buffer_t<std::uint8_t> make_cursor_image(util::buffer_t<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info)  {
+  switch(shape_info.Type) {
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
+    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
+      return std::move(img_data);
+    default:
+      break;
+  }
+
+  shape_info.Height /= 2;
+
+  util::buffer_t<std::uint8_t> cursor_img { shape_info.Width * shape_info.Height * 4 };
+
+  auto bytes = shape_info.Pitch * shape_info.Height;
+  auto pixel_begin = (std::uint32_t*)std::begin(cursor_img);
+  auto pixel_data = pixel_begin;
+  auto and_mask = std::begin(img_data);
+  auto xor_mask = std::begin(img_data) + bytes;
+
+  for(auto x = 0; x < bytes; ++x)  {
+    for(auto c = 7; c >= 0; --c) {
+      auto bit = 1 << c;
+      auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0);
+
+      constexpr std::uint32_t black = 0xFF000000;
+      constexpr std::uint32_t white = 0xFFFFFFFF;
+      constexpr std::uint32_t transparent = 0;
+      switch(color_type) {
+        case 0: //black
+          *pixel_data = black;
+          break;
+        case 2: //white
+          *pixel_data = white;
+          break;
+        case 1: //transparent
+        {
+          *pixel_data = transparent;
+
+          break;
+        }
+        case 3: //inverse
+        {
+          auto top_p    = pixel_data - shape_info.Width;
+          auto left_p   = pixel_data - 1;
+          auto right_p  = pixel_data + 1;
+          auto bottom_p = pixel_data + shape_info.Width;
+
+          // Get the x coordinate of the pixel
+          auto column = (pixel_data - pixel_begin) % shape_info.Width != 0;
+
+          if(top_p >= pixel_begin && *top_p == transparent) {
+            *top_p = black;
+          }
+
+          if(column != 0 && left_p >= pixel_begin && *left_p == transparent) {
+            *left_p = black;
+          }
+
+          if(bottom_p < (std::uint32_t*)std::end(cursor_img)) {
+            *bottom_p = black;
+          }
+
+          if(column != shape_info.Width -1) {
+            *right_p = black;
+          }
+          *pixel_data = white;
+        }
+      }
+
+      ++pixel_data;
+    }
+    ++and_mask;
+    ++xor_mask;
+  }
+
+  return cursor_img;
+}
+
+class hwdevice_t : public platf::hwdevice_t {
+public:
+  hwdevice_t(std::vector<hwdevice_t*> *hwdevices_p) : hwdevices_p { hwdevices_p } {}
+  hwdevice_t() = delete;
+
+  void set_cursor_pos(LONG rel_x, LONG rel_y, bool visible) {
+    cursor_visible = visible;
+
+    if(!visible) {
+      return;
+    }
+
+    LONG x = ((double)rel_x) * out_width / (double)in_width;
+    LONG y = ((double)rel_y) * out_height / (double)in_height;
+
+    // Ensure it's within bounds
+    auto left_out   = std::min<LONG>(out_width, std::max<LONG>(0, x));
+    auto top_out    = std::min<LONG>(out_height, std::max<LONG>(0, y));
+    auto right_out  = std::max<LONG>(0, std::min<LONG>(out_width, x + cursor_scaled_width));
+    auto bottom_out = std::max<LONG>(0, std::min<LONG>(out_height, y + cursor_scaled_height));
+
+    auto left_in   = std::max<LONG>(0, -rel_x);
+    auto top_in    = std::max<LONG>(0, -rel_y);
+    auto right_in  = std::min<LONG>(in_width - rel_x, cursor_width);
+    auto bottom_in = std::min<LONG>(in_height - rel_y, cursor_height);
+
+    RECT rect_in { left_in, top_in, right_in, bottom_in };
+    RECT rect_out { left_out, top_out, right_out, bottom_out };
+
+    ctx->VideoProcessorSetStreamSourceRect(processor.get(), 1, TRUE, &rect_in);
+    ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect_out);
+  }
+
+  int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) {
+    D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
+
+    video::processor_in_t::pointer processor_in_p;
+    auto status = device->CreateVideoProcessorInputView(texture, processor_e.get(), &input_desc, &processor_in_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create cursor VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    cursor_in.reset(processor_in_p);
+
+    cursor_width  = width;
+    cursor_height = height;
+    cursor_scaled_width = ((double)width) / in_width * out_width;
+    cursor_scaled_height = ((double)height) / in_height * out_height;
+
+    return 0;
+  }
+
+  int convert(platf::img_t &img_base) override {
+    auto &img = (img_d3d_t&)img_base;
+
+    auto it = texture_to_processor_in.find(img.texture.get());
+    if(it == std::end(texture_to_processor_in)) {
+      D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
+
+      video::processor_in_t::pointer processor_in_p;
+      auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p);
+      if(FAILED(status)) {
+        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
+        return -1;
+      }
+      it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
+    }
+    auto &processor_in = it->second;
+
+    D3D11_VIDEO_PROCESSOR_STREAM stream[] {
+      { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr },
+      { TRUE, 0, 0, 0, 0, nullptr, cursor_in.get(), nullptr }
+    };
+
+    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, cursor_visible ? 2 : 1, stream);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    return 0;
+  }
+
+  void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
+    colorspace |= (color_range >> 4);
+    ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace);
+  }
+
+  int init(
+    std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p,
+    int in_width, int in_height, int out_width, int out_height,
+    pix_fmt_e pix_fmt
+  ) {
+    HRESULT status;
+
+    cursor_visible = false;
+
+    platf::hwdevice_t::img = &img;
+
+    this->out_width  = out_width;
+    this->out_height = out_height;
+    this->in_width   = in_width;
+    this->in_height  = in_height;
+
+    video::device_t::pointer vdevice_p;
+    status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query ID3D11VideoDevice interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    device.reset(vdevice_p);
+
+    video::ctx_t::pointer ctx_p;
+    status = device_ctx_p->QueryInterface(IID_ID3D11VideoContext, (void**)&ctx_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to query ID3D11VideoContext interface [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    ctx.reset(ctx_p);
+
+    D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc {
+      D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
+      { 1, 1 }, (UINT)in_width, (UINT)in_height,
+      { 1, 1 }, (UINT)out_width, (UINT)out_height,
+      D3D11_VIDEO_USAGE_OPTIMAL_QUALITY
+    };
+
+    video::processor_enum_t::pointer vp_e_p;
+    status = device->CreateVideoProcessorEnumerator(&contentDesc, &vp_e_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create video processor enumerator [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    processor_e.reset(vp_e_p);
+
+    video::processor_t::pointer processor_p;
+    status = device->CreateVideoProcessor(processor_e.get(), 0, &processor_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create video processor [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    processor.reset(processor_p);
+
+    D3D11_TEXTURE2D_DESC t {};
+    t.Width  = out_width;
+    t.Height = out_height;
+    t.MipLevels = 1;
+    t.ArraySize = 1;
+    t.SampleDesc.Count = 1;
+    t.Usage = D3D11_USAGE_DEFAULT;
+    t.Format = pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
+
+    dxgi::texture2d_t::pointer tex_p {};
+    status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create video output texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+
+    img.texture.reset(tex_p);
+    img.display = std::move(display);
+    img.width = out_width;
+    img.height = out_height;
+    img.data = (std::uint8_t*)tex_p;
+    img.row_pitch = out_width;
+    img.pixel_pitch = 1;
+
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
+    video::processor_out_t::pointer processor_out_p;
+    status = device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
+      return -1;
+    }
+    processor_out.reset(processor_out_p);
+
+    // Tell video processor alpha values need to be enabled
+    ctx->VideoProcessorSetStreamAlpha(processor.get(), 1, TRUE, 1.0f);
+
+    device_p->AddRef();
+    data = device_p;
+    return 0;
+  }
+
+  ~hwdevice_t() override {
+    if(data) {
+      ((ID3D11Device*)data)->Release();
+    }
+
+    auto it = std::find(std::begin(*hwdevices_p), std::end(*hwdevices_p), this);
+    if(it != std::end(*hwdevices_p)) {
+      hwdevices_p->erase(it);
+    }
+  }
+
+  img_d3d_t img;
+  video::device_t device;
+  video::ctx_t ctx;
+  video::processor_enum_t processor_e;
+  video::processor_t processor;
+  video::processor_out_t processor_out;
+  std::unordered_map<texture2d_t::pointer, video::processor_in_t> texture_to_processor_in;
+
+  video::processor_in_t cursor_in;
+
+  bool cursor_visible;
+
+  LONG cursor_width, cursor_height;
+  LONG cursor_scaled_width, cursor_scaled_height;
+
+  LONG in_width, in_height;
+  double out_width, out_height;
+
+  std::vector<hwdevice_t*> *hwdevices_p;
+};
+
+capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) {
+  auto img = (img_d3d_t*)img_base;
+
+  HRESULT status;
+
+  DXGI_OUTDUPL_FRAME_INFO frame_info;
+
+  resource_t::pointer res_p {};
+  auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
+  resource_t res{res_p};
+
+  if (capture_status != capture_e::ok) {
+    return capture_status;
+  }
+
+  const bool update_flag =
+    frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0 ||
+    frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0;
+
+  if(!update_flag) {
+    return capture_e::timeout;
+  }
+
+  if(frame_info.PointerShapeBufferSize > 0) {
+    DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {};
+
+    util::buffer_t<std::uint8_t> img_data { frame_info.PointerShapeBufferSize };
+
+    UINT dummy;
+    status = dup.dup->GetFramePointerShape(img_data.size(), std::begin(img_data), &dummy, &shape_info);
+    if (FAILED(status)) {
+      BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
+
+      return capture_e::error;
+    }
+
+    auto cursor_img = make_cursor_image(std::move(img_data), shape_info);
+
+    D3D11_SUBRESOURCE_DATA data {
+      std::begin(cursor_img),
+      4 * shape_info.Width,
+      0
+    };
+
+    // Create texture for cursor
+    D3D11_TEXTURE2D_DESC t {};
+    t.Width  = shape_info.Width;
+    t.Height = cursor_img.size() / data.SysMemPitch;
+    t.MipLevels = 1;
+    t.ArraySize = 1;
+    t.SampleDesc.Count = 1;
+    t.Usage = D3D11_USAGE_DEFAULT;
+    t.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+    t.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+    dxgi::texture2d_t::pointer tex_p {};
+    auto status = device->CreateTexture2D(&t, &data, &tex_p);
+    if(FAILED(status)) {
+      BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
+      return capture_e::error;
+    }
+    texture2d_t texture { tex_p };
+
+    for(auto *hwdevice : hwdevices) {
+      if(hwdevice->set_cursor_texture(tex_p, t.Width, t.Height)) {
+        return capture_e::error;
+      }
+    }
+
+    cursor.texture = std::move(texture);
+    cursor.width   = t.Width;
+    cursor.height  = t.Height;
+  }
+
+  if(frame_info.LastMouseUpdateTime.QuadPart) {
+    for(auto *hwdevice : hwdevices) {
+      hwdevice->set_cursor_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible && cursor_visible);
+    }
+  }
+
+  texture2d_t::pointer src_p {};
+  status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
+
+  if (FAILED(status)) {
+    BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
+    return capture_e::error;
+  }
+
+  texture2d_t src { src_p };
+  device_ctx->CopyResource(img->texture.get(), src.get());
+
+  return capture_e::ok;
+}
+
+std::shared_ptr<platf::img_t> display_vram_t::alloc_img() {
+  auto img = std::make_shared<img_d3d_t>();
+
+  D3D11_TEXTURE2D_DESC t {};
+  t.Width  = width;
+  t.Height = height;
+  t.MipLevels = 1;
+  t.ArraySize = 1;
+  t.SampleDesc.Count = 1;
+  t.Usage = D3D11_USAGE_DEFAULT;
+  t.Format = format;
+  t.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+  dxgi::texture2d_t::pointer tex_p {};
+  auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']';
+    return nullptr;
+  }
+
+  img->data        = (std::uint8_t*)tex_p;
+  img->row_pitch   = 0;
+  img->pixel_pitch = 4;
+  img->width       = 0;
+  img->height      = 0;
+  img->texture.reset(tex_p);
+  img->display     = shared_from_this();
+
+  return img;
+}
+
+int display_vram_t::dummy_img(platf::img_t *img_base) {
+  auto img = (img_d3d_t*)img_base;
+
+  img->row_pitch = width * 4;
+  auto dummy_data = std::make_unique<int[]>(width * height);
+  D3D11_SUBRESOURCE_DATA data {
+    dummy_data.get(),
+    (UINT)img->row_pitch,
+    0
+  };
+
+  D3D11_TEXTURE2D_DESC t {};
+  t.Width  = width;
+  t.Height = height;
+  t.MipLevels = 1;
+  t.ArraySize = 1;
+  t.SampleDesc.Count = 1;
+  t.Usage = D3D11_USAGE_DEFAULT;
+  t.Format = format;
+  t.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+  dxgi::texture2d_t::pointer tex_p {};
+  auto status = device->CreateTexture2D(&t, &data, &tex_p);
+  if(FAILED(status)) {
+    BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
+    return -1;
+  }
+
+  img->data        = (std::uint8_t*)tex_p;
+  img->texture.reset(tex_p);
+  img->height      = height;
+  img->width       = width;
+  img->pixel_pitch = 4;
+
+  return 0;
+}
+
+std::shared_ptr<platf::hwdevice_t> display_vram_t::make_hwdevice(int width, int height, pix_fmt_e pix_fmt) {
+  if(pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
+    BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
+
+    return nullptr;
+  }
+
+  auto hwdevice = std::make_shared<hwdevice_t>(&hwdevices);
+
+  auto ret = hwdevice->init(
+    shared_from_this(),
+    device.get(),
+    device_ctx.get(),
+    this->width, this->height,
+    width, height,
+    pix_fmt);
+
+  if(ret) {
+    return nullptr;
+  }
+
+  if(cursor.texture && hwdevice->set_cursor_texture(cursor.texture.get(), cursor.width, cursor.height)) {
+    return nullptr;
+  }
+
+  hwdevices.emplace_back(hwdevice.get());
+
+  return hwdevice;
+}
+}
\ No newline at end of file
diff --git a/sunshine/platform/windows.cpp b/sunshine/platform/windows/input.cpp
similarity index 99%
rename from sunshine/platform/windows.cpp
rename to sunshine/platform/windows/input.cpp
index 3a113df8..ba0c2338 100755
--- a/sunshine/platform/windows.cpp
+++ b/sunshine/platform/windows/input.cpp
@@ -10,7 +10,7 @@
 #include <ViGEm/Client.h>
 
 #include "sunshine/main.h"
-#include "common.h"
+#include "sunshine/platform/common.h"
 
 namespace platf {
 using namespace std::literals;
diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp
deleted file mode 100644
index cf414872..00000000
--- a/sunshine/platform/windows_dxgi.cpp
+++ /dev/null
@@ -1,1291 +0,0 @@
-//
-// Created by loki on 1/12/20.
-//
-
-extern "C" {
-#include <libavcodec/avcodec.h>
-}
-
-#include <dxgi.h>
-#include <d3d11.h>
-#include <d3d11_4.h>
-#include <d3dcommon.h>
-#include <dxgi1_2.h>
-
-#include <codecvt>
-
-#include "sunshine/config.h"
-#include "sunshine/main.h"
-#include "common.h"
-
-namespace platf {
-using namespace std::literals;
-}
-namespace platf::dxgi {
-template<class T>
-void Release(T *dxgi) {
-  dxgi->Release();
-}
-
-using factory1_t    = util::safe_ptr<IDXGIFactory1, Release<IDXGIFactory1>>;
-using dxgi_t        = util::safe_ptr<IDXGIDevice, Release<IDXGIDevice>>;
-using dxgi1_t       = util::safe_ptr<IDXGIDevice1, Release<IDXGIDevice1>>;
-using device_t      = util::safe_ptr<ID3D11Device, Release<ID3D11Device>>;
-using device_ctx_t  = util::safe_ptr<ID3D11DeviceContext, Release<ID3D11DeviceContext>>;
-using adapter_t     = util::safe_ptr<IDXGIAdapter1, Release<IDXGIAdapter1>>;
-using output_t      = util::safe_ptr<IDXGIOutput, Release<IDXGIOutput>>;
-using output1_t     = util::safe_ptr<IDXGIOutput1, Release<IDXGIOutput1>>;
-using dup_t         = util::safe_ptr<IDXGIOutputDuplication, Release<IDXGIOutputDuplication>>;
-using texture2d_t   = util::safe_ptr<ID3D11Texture2D, Release<ID3D11Texture2D>>;
-using resource_t    = util::safe_ptr<IDXGIResource, Release<IDXGIResource>>;
-using multithread_t = util::safe_ptr<ID3D11Multithread, Release<ID3D11Multithread>>;
-
-namespace video {
-using device_t         = util::safe_ptr<ID3D11VideoDevice, Release<ID3D11VideoDevice>>;
-using ctx_t            = util::safe_ptr<ID3D11VideoContext, Release<ID3D11VideoContext>>;
-using processor_t      = util::safe_ptr<ID3D11VideoProcessor, Release<ID3D11VideoProcessor>>;
-using processor_out_t  = util::safe_ptr<ID3D11VideoProcessorOutputView, Release<ID3D11VideoProcessorOutputView>>;
-using processor_in_t   = util::safe_ptr<ID3D11VideoProcessorInputView, Release<ID3D11VideoProcessorInputView>>;
-using processor_enum_t = util::safe_ptr<ID3D11VideoProcessorEnumerator, Release<ID3D11VideoProcessorEnumerator>>;
-}
-
-extern const char *format_str[];
-
-class duplication_t {
-public:
-  dup_t dup;
-  bool has_frame {};
-
-  capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p) {
-    auto capture_status = release_frame();
-    if(capture_status != capture_e::ok) {
-      return capture_status;
-    }
-
-    auto status = dup->AcquireNextFrame(timeout.count(), &frame_info, res_p);
-
-    switch(status) {
-      case S_OK:
-        has_frame = true;
-        return capture_e::ok;
-      case DXGI_ERROR_WAIT_TIMEOUT:
-        return capture_e::timeout;
-      case WAIT_ABANDONED:
-      case DXGI_ERROR_ACCESS_LOST:
-      case DXGI_ERROR_ACCESS_DENIED:
-        return capture_e::reinit;
-      default:
-        BOOST_LOG(error) << "Couldn't acquire next frame [0x"sv << util::hex(status).to_string_view();
-        return capture_e::error;
-    }
-  }
-
-  capture_e reset(dup_t::pointer dup_p = dup_t::pointer()) {
-    auto capture_status = release_frame();
-
-    dup.reset(dup_p);
-
-    return capture_status;
-  }
-
-  capture_e release_frame() {
-    if(!has_frame) {
-      return capture_e::ok;
-    }
-
-    auto status = dup->ReleaseFrame();
-    switch (status) {
-      case S_OK:
-        has_frame = false;
-        return capture_e::ok;
-      case DXGI_ERROR_WAIT_TIMEOUT:
-        return capture_e::timeout;
-      case WAIT_ABANDONED:
-      case DXGI_ERROR_ACCESS_LOST:
-      case DXGI_ERROR_ACCESS_DENIED:
-        has_frame = false;
-        return capture_e::reinit;
-      default:
-        BOOST_LOG(error) << "Couldn't release frame [0x"sv << util::hex(status).to_string_view();
-        return capture_e::error;
-    }
-  }
-
-  ~duplication_t() {
-    release_frame();
-  }
-};
-
-struct img_t : public ::platf::img_t  {
-  ~img_t() override {
-    delete[] data;
-    data = nullptr;
-  }
-};
-
-struct img_d3d_t : public ::platf::img_t {
-  std::shared_ptr<platf::display_t> display;
-  texture2d_t texture;
-
-  ~img_d3d_t() override = default;
-};
-
-struct cursor_t {
-  std::vector<std::uint8_t> img_data;
-
-  DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info;
-  int x, y;
-  bool visible;
-};
-
-struct gpu_cursor_t {
-  texture2d_t texture;
-
-  LONG width, height;
-};
-
-void blend_cursor_monochrome(const cursor_t &cursor, img_t &img) {
-  int height = cursor.shape_info.Height / 2;
-  int width  = cursor.shape_info.Width;
-  int pitch  = cursor.shape_info.Pitch;
-
-  // img cursor.{x,y} < 0, skip parts of the cursor.img_data
-  auto cursor_skip_y = -std::min(0, cursor.y);
-  auto cursor_skip_x = -std::min(0, cursor.x);
-
-  // img cursor.{x,y} > img.{x,y}, truncate parts of the cursor.img_data 
-  auto cursor_truncate_y = std::max(0, cursor.y - img.height);
-  auto cursor_truncate_x = std::max(0, cursor.x - img.width);
-
-  auto cursor_width = width - cursor_skip_x - cursor_truncate_x;
-  auto cursor_height = height - cursor_skip_y - cursor_truncate_y;
-
-  if(cursor_height > height || cursor_width > width) {
-    return;
-  }
-
-  auto img_skip_y    = std::max(0, cursor.y);
-  auto img_skip_x    = std::max(0, cursor.x);
-
-  auto cursor_img_data = cursor.img_data.data() + cursor_skip_y * pitch;
-
-  int delta_height = std::min(cursor_height - cursor_truncate_y, std::max(0, img.height - img_skip_y));
-  int delta_width = std::min(cursor_width - cursor_truncate_x, std::max(0, img.width - img_skip_x));
-
-  auto pixels_per_byte = width / pitch;
-  auto bytes_per_row = delta_width / pixels_per_byte;
-
-  auto img_data = (int*)img.data;
-  for(int i = 0; i < delta_height; ++i) {
-    auto and_mask = &cursor_img_data[i * pitch];
-    auto xor_mask = &cursor_img_data[(i + height) * pitch];
-
-    auto img_pixel_p = &img_data[(i + img_skip_y) * (img.row_pitch / img.pixel_pitch) + img_skip_x];
-
-    auto skip_x = cursor_skip_x;
-    for(int x = 0; x < bytes_per_row; ++x) {
-      for(auto bit = 0u; bit < 8; ++bit) {
-        if(skip_x > 0) {
-          --skip_x;
-
-          continue;
-        }
-
-        int and_ = *and_mask & (1 << (7 - bit)) ? -1 : 0;
-        int xor_ = *xor_mask & (1 << (7 - bit)) ? -1 : 0;
-
-        *img_pixel_p &= and_;
-        *img_pixel_p ^= xor_;
-
-        ++img_pixel_p;
-      }
-
-      ++and_mask;
-      ++xor_mask;
-    }
-  }
-}
-
-void apply_color_alpha(int *img_pixel_p, int cursor_pixel) {
-  auto colors_out = (std::uint8_t*)&cursor_pixel;
-  auto colors_in  = (std::uint8_t*)img_pixel_p;
-
-  //TODO: When use of IDXGIOutput5 is implemented, support different color formats
-  auto alpha = colors_out[3];
-  if(alpha == 255) {
-    *img_pixel_p = cursor_pixel;
-  }
-  else {
-    colors_in[0] = colors_out[0] + (colors_in[0] * (255 - alpha) + 255/2) / 255;
-    colors_in[1] = colors_out[1] + (colors_in[1] * (255 - alpha) + 255/2) / 255;
-    colors_in[2] = colors_out[2] + (colors_in[2] * (255 - alpha) + 255/2) / 255;
-  }
-}
-
-void apply_color_masked(int *img_pixel_p, int cursor_pixel) {
-  //TODO: When use of IDXGIOutput5 is implemented, support different color formats
-  auto alpha = ((std::uint8_t*)&cursor_pixel)[3];
-  if(alpha == 0xFF) {
-    *img_pixel_p ^= cursor_pixel;
-  }
-  else {
-    *img_pixel_p = cursor_pixel;
-  }
-}
-
-void blend_cursor_color(const cursor_t &cursor, img_t &img, const bool masked) {
-  int height = cursor.shape_info.Height;
-  int width  = cursor.shape_info.Width;
-  int pitch  = cursor.shape_info.Pitch;
-
-  // img cursor.y < 0, skip parts of the cursor.img_data
-  auto cursor_skip_y = -std::min(0, cursor.y);
-  auto cursor_skip_x = -std::min(0, cursor.x);
-
-  // img cursor.{x,y} > img.{x,y}, truncate parts of the cursor.img_data 
-  auto cursor_truncate_y = std::max(0, cursor.y - img.height);
-  auto cursor_truncate_x = std::max(0, cursor.x - img.width);
-
-  auto img_skip_y    = std::max(0, cursor.y);
-  auto img_skip_x    = std::max(0, cursor.x);
-
-  auto cursor_width = width - cursor_skip_x - cursor_truncate_x;
-  auto cursor_height = height - cursor_skip_y - cursor_truncate_y;
-
-  if(cursor_height > height || cursor_width > width) {
-    return;
-  }
-
-  auto cursor_img_data = (int*)&cursor.img_data[cursor_skip_y * pitch];
-
-  int delta_height = std::min(cursor_height - cursor_truncate_y, std::max(0, img.height - img_skip_y));
-  int delta_width = std::min(cursor_width - cursor_truncate_x, std::max(0, img.width - img_skip_x));
-
-  auto img_data = (int*)img.data;
-
-  for(int i = 0; i < delta_height; ++i) {
-    auto cursor_begin = &cursor_img_data[i * cursor.shape_info.Width + cursor_skip_x];
-    auto cursor_end = &cursor_begin[delta_width];
-
-    auto img_pixel_p = &img_data[(i + img_skip_y) * (img.row_pitch / img.pixel_pitch) + img_skip_x];
-    std::for_each(cursor_begin, cursor_end, [&](int cursor_pixel) {
-      if(masked) {
-        apply_color_masked(img_pixel_p, cursor_pixel);
-      }
-      else {
-        apply_color_alpha(img_pixel_p, cursor_pixel);
-      }
-      ++img_pixel_p;
-    });
-  }
-}
-
-void blend_cursor(const cursor_t &cursor, img_t &img) {
-  switch(cursor.shape_info.Type) {
-    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
-      blend_cursor_color(cursor, img, false);
-      break;
-    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME:
-      blend_cursor_monochrome(cursor, img);
-      break;
-    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
-      blend_cursor_color(cursor, img, true);
-      break;
-    default:
-      BOOST_LOG(warning) << "Unsupported cursor format ["sv << cursor.shape_info.Type << ']';
-  }
-}
-
-util::buffer_t<std::uint8_t> make_cursor_image(util::buffer_t<std::uint8_t> &&img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info)  {
-  switch(shape_info.Type) {
-    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
-    case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
-      return std::move(img_data);
-    default:
-      break;
-  }
-
-  shape_info.Height /= 2;
-
-  util::buffer_t<std::uint8_t> cursor_img { shape_info.Width * shape_info.Height * 4 };
-
-  auto bytes = shape_info.Pitch * shape_info.Height;
-  auto pixel_begin = (std::uint32_t*)std::begin(cursor_img);
-  auto pixel_data = pixel_begin;
-  auto and_mask = std::begin(img_data);
-  auto xor_mask = std::begin(img_data) + bytes;
-
-  for(auto x = 0; x < bytes; ++x)  {
-    for(auto c = 7; c >= 0; --c) {
-      auto bit = 1 << c;
-      auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0);
-
-      constexpr std::uint32_t black = 0xFF000000;
-      constexpr std::uint32_t white = 0xFFFFFFFF;
-      constexpr std::uint32_t transparent = 0;
-      switch(color_type) {
-        case 0: //black
-          *pixel_data = black;
-          break;
-        case 2: //white
-          *pixel_data = white;
-          break;
-        case 1: //transparent
-        {
-          *pixel_data = transparent;
-
-          break;
-        }
-        case 3: //inverse
-        {
-          auto top_p    = pixel_data - shape_info.Width;
-          auto left_p   = pixel_data - 1;
-          auto right_p  = pixel_data + 1;
-          auto bottom_p = pixel_data + shape_info.Width;
-
-          // Get the x coordinate of the pixel
-          auto column = (pixel_data - pixel_begin) % shape_info.Width != 0;
-
-          if(top_p >= pixel_begin && *top_p == transparent) {
-            *top_p = black;
-          }
-
-          if(column != 0 && left_p >= pixel_begin && *left_p == transparent) {
-            *left_p = black;
-          }
-
-          if(bottom_p < (std::uint32_t*)std::end(cursor_img)) {
-            *bottom_p = black;
-          }
-
-          if(column != shape_info.Width -1) {
-            *right_p = black;
-          }
-          *pixel_data = white;
-        }
-      }
-
-      ++pixel_data;
-    }
-    ++and_mask;
-    ++xor_mask;
-  }
-
-  return cursor_img;
-}
-
-class hwdevice_t : public platf::hwdevice_t {
-public:
-  hwdevice_t(std::vector<hwdevice_t*> *hwdevices_p) : hwdevices_p { hwdevices_p } {}
-  hwdevice_t() = delete;
-
-  void set_cursor_pos(LONG rel_x, LONG rel_y, bool visible) {
-    cursor_visible = visible;
-
-    if(!visible) {
-      return;
-    }
-
-    LONG x = ((double)rel_x) * out_width / (double)in_width;
-    LONG y = ((double)rel_y) * out_height / (double)in_height;
-
-    // Ensure it's within bounds
-    auto left_out   = std::min<LONG>(out_width, std::max<LONG>(0, x));
-    auto top_out    = std::min<LONG>(out_height, std::max<LONG>(0, y));
-    auto right_out  = std::max<LONG>(0, std::min<LONG>(out_width, x + cursor_scaled_width));
-    auto bottom_out = std::max<LONG>(0, std::min<LONG>(out_height, y + cursor_scaled_height));
-
-    auto left_in   = std::max<LONG>(0, -rel_x);
-    auto top_in    = std::max<LONG>(0, -rel_y);
-    auto right_in  = std::min<LONG>(in_width - rel_x, cursor_width);
-    auto bottom_in = std::min<LONG>(in_height - rel_y, cursor_height);
-
-    RECT rect_in { left_in, top_in, right_in, bottom_in };
-    RECT rect_out { left_out, top_out, right_out, bottom_out };
-
-    ctx->VideoProcessorSetStreamSourceRect(processor.get(), 1, TRUE, &rect_in);
-    ctx->VideoProcessorSetStreamDestRect(processor.get(), 1, TRUE, &rect_out);
-  }
-
-  int set_cursor_texture(texture2d_t::pointer texture, LONG width, LONG height) {
-    D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
-
-    video::processor_in_t::pointer processor_in_p;
-    auto status = device->CreateVideoProcessorInputView(texture, processor_e.get(), &input_desc, &processor_in_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create cursor VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    cursor_in.reset(processor_in_p);
-
-    cursor_width  = width;
-    cursor_height = height;
-    cursor_scaled_width = ((double)width) / in_width * out_width;
-    cursor_scaled_height = ((double)height) / in_height * out_height;
-
-    return 0;
-  }
-
-  int convert(platf::img_t &img_base) override {
-    auto &img = (img_d3d_t&)img_base;
-
-    auto it = texture_to_processor_in.find(img.texture.get());
-    if(it == std::end(texture_to_processor_in)) {
-      D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC input_desc = { 0, (D3D11_VPIV_DIMENSION)D3D11_VPIV_DIMENSION_TEXTURE2D, { 0, 0 } };
-
-      video::processor_in_t::pointer processor_in_p;
-      auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p);
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
-      it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first;
-    }
-    auto &processor_in = it->second;
-
-    D3D11_VIDEO_PROCESSOR_STREAM stream[] {
-      { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr },
-      { TRUE, 0, 0, 0, 0, nullptr, cursor_in.get(), nullptr }
-    };
-
-    auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, cursor_visible ? 2 : 1, stream);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    return 0;
-  }
-
-  void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
-    colorspace |= (color_range >> 4);
-    ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace);
-  }
-
-  int init(
-    std::shared_ptr<platf::display_t> display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p,
-    int in_width, int in_height, int out_width, int out_height,
-    pix_fmt_e pix_fmt
-  ) {
-    HRESULT status;
-
-    cursor_visible = false;
-
-    platf::hwdevice_t::img = &img;
-
-    this->out_width  = out_width;
-    this->out_height = out_height;
-    this->in_width   = in_width;
-    this->in_height  = in_height;
-
-    video::device_t::pointer vdevice_p;
-    status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to query ID3D11VideoDevice interface [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    device.reset(vdevice_p);
-
-    video::ctx_t::pointer ctx_p;
-    status = device_ctx_p->QueryInterface(IID_ID3D11VideoContext, (void**)&ctx_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to query ID3D11VideoContext interface [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    ctx.reset(ctx_p);
-
-    D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc {
-      D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
-      { 1, 1 }, (UINT)in_width, (UINT)in_height,
-      { 1, 1 }, (UINT)out_width, (UINT)out_height,
-      D3D11_VIDEO_USAGE_OPTIMAL_QUALITY
-    };
-
-    video::processor_enum_t::pointer vp_e_p;
-    status = device->CreateVideoProcessorEnumerator(&contentDesc, &vp_e_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create video processor enumerator [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    processor_e.reset(vp_e_p);
-
-    video::processor_t::pointer processor_p;
-    status = device->CreateVideoProcessor(processor_e.get(), 0, &processor_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create video processor [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    processor.reset(processor_p);
-
-    D3D11_TEXTURE2D_DESC t {};
-    t.Width  = out_width;
-    t.Height = out_height;
-    t.MipLevels = 1;
-    t.ArraySize = 1;
-    t.SampleDesc.Count = 1;
-    t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010;
-    t.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
-
-    dxgi::texture2d_t::pointer tex_p {};
-    status = device_p->CreateTexture2D(&t, nullptr, &tex_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create video output texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    img.texture.reset(tex_p);
-    img.display = std::move(display);
-    img.width = out_width;
-    img.height = out_height;
-    img.data = (std::uint8_t*)tex_p;
-    img.row_pitch = out_width;
-    img.pixel_pitch = 1;
-
-    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC output_desc { D3D11_VPOV_DIMENSION_TEXTURE2D, 0 };
-    video::processor_out_t::pointer processor_out_p;
-    status = device->CreateVideoProcessorOutputView(img.texture.get(), processor_e.get(), &output_desc, &processor_out_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create VideoProcessorOutputView [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-    processor_out.reset(processor_out_p);
-
-    // Tell video processor alpha values need to be enabled
-    ctx->VideoProcessorSetStreamAlpha(processor.get(), 1, TRUE, 1.0f);
-
-    device_p->AddRef();
-    data = device_p;
-    return 0;
-  }
-
-  ~hwdevice_t() override {
-    if(data) {
-      ((ID3D11Device*)data)->Release();
-    }
-
-    auto it = std::find(std::begin(*hwdevices_p), std::end(*hwdevices_p), this);
-    if(it != std::end(*hwdevices_p)) {
-      hwdevices_p->erase(it);
-    }
-  }
-
-  img_d3d_t img;
-  video::device_t device;
-  video::ctx_t ctx;
-  video::processor_enum_t processor_e;
-  video::processor_t processor;
-  video::processor_out_t processor_out;
-  std::unordered_map<texture2d_t::pointer, video::processor_in_t> texture_to_processor_in;
-
-  video::processor_in_t cursor_in;
-
-  bool cursor_visible;
-
-  LONG cursor_width, cursor_height;
-  LONG cursor_scaled_width, cursor_scaled_height;
-
-  LONG in_width, in_height;
-  double out_width, out_height;
-
-  std::vector<hwdevice_t*> *hwdevices_p;
-};
-
-class display_base_t : public ::platf::display_t {
-public:
-  int init() {
-/* Uncomment when use of IDXGIOutput5 is implemented
-  std::call_once(windows_cpp_once_flag, []() {
-    DECLARE_HANDLE(DPI_AWARENESS_CONTEXT);
-    const auto DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2 = ((DPI_AWARENESS_CONTEXT)-4);
-
-    typedef BOOL (*User32_SetProcessDpiAwarenessContext)(DPI_AWARENESS_CONTEXT value);
-
-    auto user32 = LoadLibraryA("user32.dll");
-    auto f = (User32_SetProcessDpiAwarenessContext)GetProcAddress(user32, "SetProcessDpiAwarenessContext");
-    if(f) {
-      f(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
-    }
-
-    FreeLibrary(user32);
-  });
-*/
-    dxgi::factory1_t::pointer   factory_p {};
-    dxgi::adapter_t::pointer    adapter_p {};
-    dxgi::output_t::pointer     output_p {};
-    dxgi::device_t::pointer     device_p {};
-    dxgi::device_ctx_t::pointer device_ctx_p {};
-
-    HRESULT status;
-
-    status = CreateDXGIFactory1(IID_IDXGIFactory1, (void**)&factory_p);
-    factory.reset(factory_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create DXGIFactory1 [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
-
-    auto adapter_name = converter.from_bytes(config::video.adapter_name);
-    auto output_name = converter.from_bytes(config::video.output_name);
-
-    for(int x = 0; factory_p->EnumAdapters1(x, &adapter_p) != DXGI_ERROR_NOT_FOUND; ++x) {
-      dxgi::adapter_t adapter_tmp { adapter_p };
-
-      DXGI_ADAPTER_DESC1 adapter_desc;
-      adapter_tmp->GetDesc1(&adapter_desc);
-
-      if(!adapter_name.empty() && adapter_desc.Description != adapter_name) {
-        continue;
-      }
-
-      for(int y = 0; adapter_tmp->EnumOutputs(y, &output_p) != DXGI_ERROR_NOT_FOUND; ++y) {
-        dxgi::output_t output_tmp {output_p };
-
-        DXGI_OUTPUT_DESC desc;
-        output_tmp->GetDesc(&desc);
-
-        if(!output_name.empty() && desc.DeviceName != output_name) {
-          continue;
-        }
-
-        if(desc.AttachedToDesktop) {
-          output = std::move(output_tmp);
-
-          width  = desc.DesktopCoordinates.right - desc.DesktopCoordinates.left;
-          height = desc.DesktopCoordinates.bottom - desc.DesktopCoordinates.top;
-        }
-      }
-
-      if(output) {
-        adapter = std::move(adapter_tmp);
-        break;
-      }
-    }
-
-    if(!output) {
-      BOOST_LOG(error) << "Failed to locate an output device"sv;
-      return -1;
-    }
-
-    D3D_FEATURE_LEVEL featureLevels[] {
-      D3D_FEATURE_LEVEL_12_1,
-      D3D_FEATURE_LEVEL_12_0,
-      D3D_FEATURE_LEVEL_11_1,
-      D3D_FEATURE_LEVEL_11_0,
-      D3D_FEATURE_LEVEL_10_1,
-      D3D_FEATURE_LEVEL_10_0,
-      D3D_FEATURE_LEVEL_9_3,
-      D3D_FEATURE_LEVEL_9_2,
-      D3D_FEATURE_LEVEL_9_1
-    };
-
-    status = adapter->QueryInterface(IID_IDXGIAdapter, (void**)&adapter_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to query IDXGIAdapter interface"sv;
-
-      return -1;
-    }
-
-    status = D3D11CreateDevice(
-      adapter_p,
-      D3D_DRIVER_TYPE_UNKNOWN,
-      nullptr,
-      D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
-      featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL),
-      D3D11_SDK_VERSION,
-      &device_p,
-      &feature_level,
-      &device_ctx_p);
-
-    adapter_p->Release();
-
-    device.reset(device_p);
-    device_ctx.reset(device_ctx_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create D3D11 device [0x"sv << util::hex(status).to_string_view() << ']';
-
-      return -1;
-    }
-
-    DXGI_ADAPTER_DESC adapter_desc;
-    adapter->GetDesc(&adapter_desc);
-
-    auto description = converter.to_bytes(adapter_desc.Description);
-    BOOST_LOG(info)
-      << std::endl
-      << "Device Description : " << description << std::endl
-      << "Device Vendor ID   : 0x"sv << util::hex(adapter_desc.VendorId).to_string_view() << std::endl
-      << "Device Device ID   : 0x"sv << util::hex(adapter_desc.DeviceId).to_string_view() << std::endl
-      << "Device Video Mem   : "sv << adapter_desc.DedicatedVideoMemory / 1048576 << " MiB"sv << std::endl
-      << "Device Sys Mem     : "sv << adapter_desc.DedicatedSystemMemory / 1048576 << " MiB"sv << std::endl
-      << "Share Sys Mem      : "sv << adapter_desc.SharedSystemMemory / 1048576 << " MiB"sv << std::endl
-      << "Feature Level      : 0x"sv << util::hex(feature_level).to_string_view() << std::endl
-      << "Capture size       : "sv << width << 'x'  << height;
-
-    // Bump up thread priority
-    {
-      dxgi::dxgi_t::pointer dxgi_p {};
-      status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p);
-      dxgi::dxgi_t dxgi { dxgi_p };
-
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
-
-      dxgi->SetGPUThreadPriority(7);
-    }
-
-    // Try to reduce latency
-    {
-      dxgi::dxgi1_t::pointer dxgi_p {};
-      status = device->QueryInterface(IID_IDXGIDevice, (void**)&dxgi_p);
-      dxgi::dxgi1_t dxgi { dxgi_p };
-
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
-
-      dxgi->SetMaximumFrameLatency(1);
-    }
-
-    //FIXME: Duplicate output on RX580 in combination with DOOM (2016) --> BSOD
-    //TODO: Use IDXGIOutput5 for improved performance
-    {
-      dxgi::output1_t::pointer output1_p {};
-      status = output->QueryInterface(IID_IDXGIOutput1, (void**)&output1_p);
-      dxgi::output1_t output1 {output1_p };
-
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to query IDXGIOutput1 from the output"sv;
-        return -1;
-      }
-
-      // We try this twice, in case we still get an error on reinitialization
-      for(int x = 0; x < 2; ++x) {
-        dxgi::dup_t::pointer dup_p {};
-        status = output1->DuplicateOutput((IUnknown*)device.get(), &dup_p);
-        if(SUCCEEDED(status)) {
-          dup.reset(dup_p);
-          break;
-        }
-        std::this_thread::sleep_for(200ms);
-      }
-
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "DuplicateOutput Failed [0x"sv << util::hex(status).to_string_view() << ']';
-        return -1;
-      }
-    }
-
-    DXGI_OUTDUPL_DESC dup_desc;
-    dup.dup->GetDesc(&dup_desc);
-
-    format = dup_desc.ModeDesc.Format;
-
-    BOOST_LOG(debug) << "Source format ["sv << format_str[dup_desc.ModeDesc.Format] << ']';
-
-    return 0;
-  }
-
-  factory1_t factory;
-  adapter_t adapter;
-  output_t output;
-  device_t device;
-  device_ctx_t device_ctx;
-  duplication_t dup;
-
-  DXGI_FORMAT format;
-  D3D_FEATURE_LEVEL feature_level;
-};
-
-class display_cpu_t : public display_base_t {
-public:
-  capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override {
-    auto img = (img_t*)img_base;
-
-    HRESULT status;
-
-    DXGI_OUTDUPL_FRAME_INFO frame_info;
-
-    resource_t::pointer res_p {};
-    auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
-    resource_t res{res_p};
-
-    if (capture_status != capture_e::ok) {
-      return capture_status;
-    }
-
-    if(frame_info.PointerShapeBufferSize > 0) {
-      auto &img_data = cursor.img_data;
-
-      img_data.resize(frame_info.PointerShapeBufferSize);
-
-      UINT dummy;
-      status = dup.dup->GetFramePointerShape(img_data.size(), img_data.data(), &dummy, &cursor.shape_info);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return capture_e::error;
-      }
-    }
-
-    if(frame_info.LastMouseUpdateTime.QuadPart) {
-      cursor.x = frame_info.PointerPosition.Position.x;
-      cursor.y = frame_info.PointerPosition.Position.y;
-      cursor.visible = frame_info.PointerPosition.Visible;
-    }
-
-    // If frame has been updated
-    if (frame_info.LastPresentTime.QuadPart != 0) {
-      {
-        texture2d_t::pointer src_p {};
-        status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
-        texture2d_t src{src_p};
-
-        if (FAILED(status)) {
-          BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
-          return capture_e::error;
-        }
-
-        //Copy from GPU to CPU
-        device_ctx->CopyResource(texture.get(), src.get());
-      }
-
-      if(img_info.pData) {
-        device_ctx->Unmap(texture.get(), 0);
-        img_info.pData = nullptr;
-      }
-
-      status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return capture_e::error;
-      }
-    }
-
-    const bool mouse_update = 
-      (frame_info.LastMouseUpdateTime.QuadPart || frame_info.PointerShapeBufferSize > 0) &&
-      (cursor_visible && cursor.visible);
-
-    const bool update_flag = frame_info.LastPresentTime.QuadPart != 0 || mouse_update;
-
-    if(!update_flag) {
-      return capture_e::timeout;
-    }
-
-    std::copy_n((std::uint8_t*)img_info.pData, height * img_info.RowPitch, (std::uint8_t*)img->data);
-
-    if(cursor_visible && cursor.visible) {
-      blend_cursor(cursor, *img);
-    }
-
-    return capture_e::ok;
-  }
-
-  std::shared_ptr<platf::img_t> alloc_img() override {
-    auto img = std::make_shared<img_t>();
-
-    img->pixel_pitch  = 4;
-    img->row_pitch    = img->pixel_pitch * width;
-    img->width        = width;
-    img->height       = height;
-    img->data         = new std::uint8_t[img->row_pitch * height];
-
-    return img;
-  }
-
-  int dummy_img(platf::img_t *img) override {
-    return 0;
-  }
-
-  int init() {
-    if(display_base_t::init()) {
-      return -1;
-    }
-
-    D3D11_TEXTURE2D_DESC t {};
-    t.Width  = width;
-    t.Height = height;
-    t.MipLevels = 1;
-    t.ArraySize = 1;
-    t.SampleDesc.Count = 1;
-    t.Usage = D3D11_USAGE_STAGING;
-    t.Format = format;
-    t.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
-
-    dxgi::texture2d_t::pointer tex_p {};
-    auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
-
-    texture.reset(tex_p);
-
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    // map the texture simply to get the pitch and stride
-    status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &img_info);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    return 0;
-  }
-
-  cursor_t cursor;
-  D3D11_MAPPED_SUBRESOURCE img_info;
-  texture2d_t texture;
-};
-
-class display_gpu_t : public display_base_t, public std::enable_shared_from_this<display_gpu_t> {
-public:
-  capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override {
-    auto img = (img_d3d_t*)img_base;
-
-    HRESULT status;
-
-    DXGI_OUTDUPL_FRAME_INFO frame_info;
-
-    resource_t::pointer res_p {};
-    auto capture_status = dup.next_frame(frame_info, timeout, &res_p);
-    resource_t res{res_p};
-
-    if (capture_status != capture_e::ok) {
-      return capture_status;
-    }
-
-    const bool update_flag =
-      frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0 ||
-      frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0;
-
-    if(!update_flag) {
-      return capture_e::timeout;
-    }
-
-    if(frame_info.PointerShapeBufferSize > 0) {
-      DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {};
-
-      util::buffer_t<std::uint8_t> img_data { frame_info.PointerShapeBufferSize };
-
-      UINT dummy;
-      status = dup.dup->GetFramePointerShape(img_data.size(), std::begin(img_data), &dummy, &shape_info);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return capture_e::error;
-      }
-
-      auto cursor_img = make_cursor_image(std::move(img_data), shape_info);
-
-      D3D11_SUBRESOURCE_DATA data {
-        std::begin(cursor_img),
-        4 * shape_info.Width,
-        0
-      };
-
-      // Create texture for cursor
-      D3D11_TEXTURE2D_DESC t {};
-      t.Width  = shape_info.Width;
-      t.Height = cursor_img.size() / data.SysMemPitch;
-      t.MipLevels = 1;
-      t.ArraySize = 1;
-      t.SampleDesc.Count = 1;
-      t.Usage = D3D11_USAGE_DEFAULT;
-      t.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
-      t.BindFlags = D3D11_BIND_RENDER_TARGET;
-
-      dxgi::texture2d_t::pointer tex_p {};
-      auto status = device->CreateTexture2D(&t, &data, &tex_p);
-      if(FAILED(status)) {
-        BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
-        return capture_e::error;
-      }
-      texture2d_t texture { tex_p };
-
-      for(auto *hwdevice : hwdevices) {
-        if(hwdevice->set_cursor_texture(tex_p, t.Width, t.Height)) {
-          return capture_e::error;
-        }
-      }
-
-      cursor.texture = std::move(texture);
-      cursor.width   = t.Width;
-      cursor.height  = t.Height;
-    }
-
-    if(frame_info.LastMouseUpdateTime.QuadPart) {
-      for(auto *hwdevice : hwdevices) {
-        hwdevice->set_cursor_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible && cursor_visible);
-      }
-    }
-
-    texture2d_t::pointer src_p {};
-    status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p);
-
-    if (FAILED(status)) {
-      BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']';
-      return capture_e::error;
-    }
-
-    texture2d_t src { src_p };
-    device_ctx->CopyResource(img->texture.get(), src.get());
-
-    return capture_e::ok;
-  }
-
-  std::shared_ptr<platf::img_t> alloc_img() override {
-    auto img = std::make_shared<img_d3d_t>();
-
-    D3D11_TEXTURE2D_DESC t {};
-    t.Width  = width;
-    t.Height = height;
-    t.MipLevels = 1;
-    t.ArraySize = 1;
-    t.SampleDesc.Count = 1;
-    t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = format;
-    t.BindFlags = D3D11_BIND_RENDER_TARGET;
-
-    dxgi::texture2d_t::pointer tex_p {};
-    auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return nullptr;
-    }
-
-    img->data        = (std::uint8_t*)tex_p;
-    img->row_pitch   = 0;
-    img->pixel_pitch = 4;
-    img->width       = 0;
-    img->height      = 0;
-    img->texture.reset(tex_p);
-    img->display     = shared_from_this();
-
-    return img;
-  }
-
-  int dummy_img(platf::img_t *img_base) override {
-    auto img = (img_d3d_t*)img_base;
-
-    img->row_pitch = width * 4;
-    auto dummy_data = std::make_unique<int[]>(width * height);
-    D3D11_SUBRESOURCE_DATA data {
-      dummy_data.get(),
-      (UINT)img->row_pitch,
-      0
-    };
-
-    D3D11_TEXTURE2D_DESC t {};
-    t.Width  = width;
-    t.Height = height;
-    t.MipLevels = 1;
-    t.ArraySize = 1;
-    t.SampleDesc.Count = 1;
-    t.Usage = D3D11_USAGE_DEFAULT;
-    t.Format = format;
-    t.BindFlags = D3D11_BIND_RENDER_TARGET;
-
-    dxgi::texture2d_t::pointer tex_p {};
-    auto status = device->CreateTexture2D(&t, &data, &tex_p);
-    if(FAILED(status)) {
-      BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']';
-      return -1;
-    }
-
-    img->data        = (std::uint8_t*)tex_p;
-    img->texture.reset(tex_p);
-    img->height      = height;
-    img->width       = width;
-    img->pixel_pitch = 4;
-
-    return 0;
-  }
-
-  std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, pix_fmt_e pix_fmt) override {
-    if(pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
-      BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']';
-
-      return nullptr;
-    }
-
-    auto hwdevice = std::make_shared<hwdevice_t>(&hwdevices);
-
-    auto ret = hwdevice->init(
-      shared_from_this(),
-      device.get(),
-      device_ctx.get(),
-      this->width, this->height,
-      width, height,
-      pix_fmt);
-
-    if(ret) {
-      return nullptr;
-    }
-
-    if(cursor.texture && hwdevice->set_cursor_texture(cursor.texture.get(), cursor.width, cursor.height)) {
-      return nullptr;
-    }
-
-    hwdevices.emplace_back(hwdevice.get());
-
-    return hwdevice;
-  }
-
-  gpu_cursor_t cursor;
-  std::vector<hwdevice_t*> hwdevices;
-};
-
-const char *format_str[] = {
-  "DXGI_FORMAT_UNKNOWN",
-  "DXGI_FORMAT_R32G32B32A32_TYPELESS",
-  "DXGI_FORMAT_R32G32B32A32_FLOAT",
-  "DXGI_FORMAT_R32G32B32A32_UINT",
-  "DXGI_FORMAT_R32G32B32A32_SINT",
-  "DXGI_FORMAT_R32G32B32_TYPELESS",
-  "DXGI_FORMAT_R32G32B32_FLOAT",
-  "DXGI_FORMAT_R32G32B32_UINT",
-  "DXGI_FORMAT_R32G32B32_SINT",
-  "DXGI_FORMAT_R16G16B16A16_TYPELESS",
-  "DXGI_FORMAT_R16G16B16A16_FLOAT",
-  "DXGI_FORMAT_R16G16B16A16_UNORM",
-  "DXGI_FORMAT_R16G16B16A16_UINT",
-  "DXGI_FORMAT_R16G16B16A16_SNORM",
-  "DXGI_FORMAT_R16G16B16A16_SINT",
-  "DXGI_FORMAT_R32G32_TYPELESS",
-  "DXGI_FORMAT_R32G32_FLOAT",
-  "DXGI_FORMAT_R32G32_UINT",
-  "DXGI_FORMAT_R32G32_SINT",
-  "DXGI_FORMAT_R32G8X24_TYPELESS",
-  "DXGI_FORMAT_D32_FLOAT_S8X24_UINT",
-  "DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS",
-  "DXGI_FORMAT_X32_TYPELESS_G8X24_UINT",
-  "DXGI_FORMAT_R10G10B10A2_TYPELESS",
-  "DXGI_FORMAT_R10G10B10A2_UNORM",
-  "DXGI_FORMAT_R10G10B10A2_UINT",
-  "DXGI_FORMAT_R11G11B10_FLOAT",
-  "DXGI_FORMAT_R8G8B8A8_TYPELESS",
-  "DXGI_FORMAT_R8G8B8A8_UNORM",
-  "DXGI_FORMAT_R8G8B8A8_UNORM_SRGB",
-  "DXGI_FORMAT_R8G8B8A8_UINT",
-  "DXGI_FORMAT_R8G8B8A8_SNORM",
-  "DXGI_FORMAT_R8G8B8A8_SINT",
-  "DXGI_FORMAT_R16G16_TYPELESS",
-  "DXGI_FORMAT_R16G16_FLOAT",
-  "DXGI_FORMAT_R16G16_UNORM",
-  "DXGI_FORMAT_R16G16_UINT",
-  "DXGI_FORMAT_R16G16_SNORM",
-  "DXGI_FORMAT_R16G16_SINT",
-  "DXGI_FORMAT_R32_TYPELESS",
-  "DXGI_FORMAT_D32_FLOAT",
-  "DXGI_FORMAT_R32_FLOAT",
-  "DXGI_FORMAT_R32_UINT",
-  "DXGI_FORMAT_R32_SINT",
-  "DXGI_FORMAT_R24G8_TYPELESS",
-  "DXGI_FORMAT_D24_UNORM_S8_UINT",
-  "DXGI_FORMAT_R24_UNORM_X8_TYPELESS",
-  "DXGI_FORMAT_X24_TYPELESS_G8_UINT",
-  "DXGI_FORMAT_R8G8_TYPELESS",
-  "DXGI_FORMAT_R8G8_UNORM",
-  "DXGI_FORMAT_R8G8_UINT",
-  "DXGI_FORMAT_R8G8_SNORM",
-  "DXGI_FORMAT_R8G8_SINT",
-  "DXGI_FORMAT_R16_TYPELESS",
-  "DXGI_FORMAT_R16_FLOAT",
-  "DXGI_FORMAT_D16_UNORM",
-  "DXGI_FORMAT_R16_UNORM",
-  "DXGI_FORMAT_R16_UINT",
-  "DXGI_FORMAT_R16_SNORM",
-  "DXGI_FORMAT_R16_SINT",
-  "DXGI_FORMAT_R8_TYPELESS",
-  "DXGI_FORMAT_R8_UNORM",
-  "DXGI_FORMAT_R8_UINT",
-  "DXGI_FORMAT_R8_SNORM",
-  "DXGI_FORMAT_R8_SINT",
-  "DXGI_FORMAT_A8_UNORM",
-  "DXGI_FORMAT_R1_UNORM",
-  "DXGI_FORMAT_R9G9B9E5_SHAREDEXP",
-  "DXGI_FORMAT_R8G8_B8G8_UNORM",
-  "DXGI_FORMAT_G8R8_G8B8_UNORM",
-  "DXGI_FORMAT_BC1_TYPELESS",
-  "DXGI_FORMAT_BC1_UNORM",
-  "DXGI_FORMAT_BC1_UNORM_SRGB",
-  "DXGI_FORMAT_BC2_TYPELESS",
-  "DXGI_FORMAT_BC2_UNORM",
-  "DXGI_FORMAT_BC2_UNORM_SRGB",
-  "DXGI_FORMAT_BC3_TYPELESS",
-  "DXGI_FORMAT_BC3_UNORM",
-  "DXGI_FORMAT_BC3_UNORM_SRGB",
-  "DXGI_FORMAT_BC4_TYPELESS",
-  "DXGI_FORMAT_BC4_UNORM",
-  "DXGI_FORMAT_BC4_SNORM",
-  "DXGI_FORMAT_BC5_TYPELESS",
-  "DXGI_FORMAT_BC5_UNORM",
-  "DXGI_FORMAT_BC5_SNORM",
-  "DXGI_FORMAT_B5G6R5_UNORM",
-  "DXGI_FORMAT_B5G5R5A1_UNORM",
-  "DXGI_FORMAT_B8G8R8A8_UNORM",
-  "DXGI_FORMAT_B8G8R8X8_UNORM",
-  "DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM",
-  "DXGI_FORMAT_B8G8R8A8_TYPELESS",
-  "DXGI_FORMAT_B8G8R8A8_UNORM_SRGB",
-  "DXGI_FORMAT_B8G8R8X8_TYPELESS",
-  "DXGI_FORMAT_B8G8R8X8_UNORM_SRGB",
-  "DXGI_FORMAT_BC6H_TYPELESS",
-  "DXGI_FORMAT_BC6H_UF16",
-  "DXGI_FORMAT_BC6H_SF16",
-  "DXGI_FORMAT_BC7_TYPELESS",
-  "DXGI_FORMAT_BC7_UNORM",
-  "DXGI_FORMAT_BC7_UNORM_SRGB",
-  "DXGI_FORMAT_AYUV",
-  "DXGI_FORMAT_Y410",
-  "DXGI_FORMAT_Y416",
-  "DXGI_FORMAT_NV12",
-  "DXGI_FORMAT_P010",
-  "DXGI_FORMAT_P016",
-  "DXGI_FORMAT_420_OPAQUE",
-  "DXGI_FORMAT_YUY2",
-  "DXGI_FORMAT_Y210",
-  "DXGI_FORMAT_Y216",
-  "DXGI_FORMAT_NV11",
-  "DXGI_FORMAT_AI44",
-  "DXGI_FORMAT_IA44",
-  "DXGI_FORMAT_P8",
-  "DXGI_FORMAT_A8P8",
-  "DXGI_FORMAT_B4G4R4A4_UNORM",
-
-  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-  "DXGI_FORMAT_P208",
-  "DXGI_FORMAT_V208",
-  "DXGI_FORMAT_V408"
-};
-
-}
-
-namespace platf {
-std::shared_ptr<display_t> display(platf::dev_type_e hwdevice_type) {
-  if(hwdevice_type == platf::dev_type_e::dxgi) {
-    auto disp = std::make_shared<dxgi::display_gpu_t>();
-
-    if(!disp->init()) {
-      return disp;
-    }
-  }
-  else if(hwdevice_type == platf::dev_type_e::none) {
-    auto disp = std::make_shared<dxgi::display_cpu_t>();
-
-    if(!disp->init()) {
-      return disp;
-    }
-  }
-
-  return nullptr;
-}
-}

From 22418cb613cff3de7bcdd1e2103fd84b969ee24e Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 23 Apr 2020 15:48:05 +0200
Subject: [PATCH 24/25] Moved linux specific files to folder platform/linux

---
 CMakeLists.txt                                         | 4 ++--
 sunshine/platform/{linux.cpp => linux/display.cpp}     | 2 +-
 sunshine/platform/{linux_evdev.cpp => linux/input.cpp} | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename sunshine/platform/{linux.cpp => linux/display.cpp} (99%)
 rename sunshine/platform/{linux_evdev.cpp => linux/input.cpp} (99%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddc2dad2..01e933ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,8 +85,8 @@ else()
 
 	find_package(X11 REQUIRED)
 	set(PLATFORM_TARGET_FILES
-		sunshine/platform/linux.cpp
-		sunshine/platform/linux_evdev.cpp)
+		sunshine/platform/linux/display.cpp
+		sunshine/platform/linux/input.cpp)
 	
 	set(PLATFORM_LIBRARIES
 		Xfixes
diff --git a/sunshine/platform/linux.cpp b/sunshine/platform/linux/display.cpp
similarity index 99%
rename from sunshine/platform/linux.cpp
rename to sunshine/platform/linux/display.cpp
index 137ae0e4..17ef98a1 100644
--- a/sunshine/platform/linux.cpp
+++ b/sunshine/platform/linux/display.cpp
@@ -2,7 +2,7 @@
 // Created by loki on 6/21/19.
 //
 
-#include "common.h"
+#include "sunshine/platform/common.h"
 
 #include <fstream>
 #include <bitset>
diff --git a/sunshine/platform/linux_evdev.cpp b/sunshine/platform/linux/input.cpp
similarity index 99%
rename from sunshine/platform/linux_evdev.cpp
rename to sunshine/platform/linux/input.cpp
index af9e39ed..9de79069 100644
--- a/sunshine/platform/linux_evdev.cpp
+++ b/sunshine/platform/linux/input.cpp
@@ -9,7 +9,7 @@
 #include <cstring>
 #include <filesystem>
 
-#include "common.h"
+#include "sunshine/platform/common.h"
 #include "sunshine/main.h"
 #include "sunshine/utility.h"
 

From 7ee59669daa1c9e0b6f694fe78b804ad6b922e7b Mon Sep 17 00:00:00 2001
From: loki <loki@fakeemail.com>
Date: Thu, 23 Apr 2020 15:49:47 +0200
Subject: [PATCH 25/25] Removed unnecessary header include

---
 sunshine/platform/windows/display_base.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sunshine/platform/windows/display_base.cpp b/sunshine/platform/windows/display_base.cpp
index 4b758007..4b42880c 100644
--- a/sunshine/platform/windows/display_base.cpp
+++ b/sunshine/platform/windows/display_base.cpp
@@ -2,10 +2,6 @@
 // Created by loki on 1/12/20.
 //
 
-extern "C" {
-#include <libavcodec/avcodec.h>
-}
-
 #include <codecvt>
 
 #include "sunshine/config.h"