From 94181fd047956ae0b65f206776c37ebf8460710f Mon Sep 17 00:00:00 2001 From: loki Date: Fri, 27 Mar 2020 21:57:29 +0100 Subject: [PATCH] Prevent unnecessary copies of entire frames on Windows --- CMakeLists.txt | 1 + sunshine/platform/common.h | 2 +- sunshine/platform/linux.cpp | 8 +- sunshine/platform/windows.cpp | 4 +- sunshine/platform/windows_dxgi.cpp | 141 +++++++++++----------- sunshine/round_robin.h | 141 ++++++++++++++++++++++ sunshine/stream.cpp | 2 +- sunshine/utility.h | 63 ---------- sunshine/video.cpp | 183 +++++++++++++++++------------ 9 files changed, 333 insertions(+), 212 deletions(-) create mode 100755 sunshine/round_robin.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 172d696d..788c9438 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,7 @@ set(SUNSHINE_TARGET_FILES sunshine/thread_pool.h sunshine/thread_safe.h sunshine/sync.h + sunshine/round_robin.h ${PLATFORM_TARGET_FILES}) include_directories( diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 5a9165c1..0c0a5255 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -91,7 +91,7 @@ std::string from_sockaddr(const sockaddr *const); std::pair from_sockaddr_ex(const sockaddr *const); std::unique_ptr microphone(std::uint32_t sample_rate); -std::unique_ptr display(); +std::shared_ptr display(); input_t input(); void move_mouse(input_t &input, int deltaX, int deltaY); diff --git a/sunshine/platform/linux.cpp b/sunshine/platform/linux.cpp index 3520c84a..407cd6d3 100644 --- a/sunshine/platform/linux.cpp +++ b/sunshine/platform/linux.cpp @@ -315,8 +315,8 @@ struct mic_attr_t : public mic_t { } }; -std::unique_ptr shm_display() { - auto shm = std::make_unique(); +std::shared_ptr shm_display() { + auto shm = std::make_shared(); if(shm->init()) { return nullptr; @@ -325,11 +325,11 @@ std::unique_ptr shm_display() { return shm; } -std::unique_ptr display() { +std::shared_ptr display() { auto shm_disp = shm_display(); if(!shm_disp) { - return std::unique_ptr { new x11_attr_t {} }; + return std::make_shared(); } return shm_disp; diff --git a/sunshine/platform/windows.cpp b/sunshine/platform/windows.cpp index b9fd87da..98436ec7 100755 --- a/sunshine/platform/windows.cpp +++ b/sunshine/platform/windows.cpp @@ -249,9 +249,7 @@ void keyboard(input_t &input, uint16_t modcode, bool release) { auto key_state = GetAsyncKeyState(modcode); bool key_state_down = (key_state & KEY_STATE_DOWN) != 0; if(key_state_down != release) { - BOOST_LOG(warning) << "Key state of vkey ["sv << util::hex(modcode).to_string_view() << "] does not match the desired state ["sv << (release ? "on]"sv : "off]"sv); - - return; + BOOST_LOG(debug) << "Key state of vkey ["sv << util::hex(modcode).to_string_view() << "] does not match the desired state ["sv << (release ? "on]"sv : "off]"sv); } INPUT i {}; diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp index 87f52865..98d779a4 100644 --- a/sunshine/platform/windows_dxgi.cpp +++ b/sunshine/platform/windows_dxgi.cpp @@ -104,9 +104,57 @@ public: class display_t; struct img_t : public ::platf::img_t { ~img_t() override { - delete[] data; - data = nullptr; + unmap(); } + + void unmap() { + if(info.pData) { + device_ctx_p->Unmap(texture.get(), 0); + + info.pData = nullptr; + } + } + + int reset(int width, int height, DXGI_FORMAT format, device_t::pointer device, device_ctx_t::pointer device_ctx_p, const std::shared_ptr &display) { + unmap(); + + D3D11_TEXTURE2D_DESC t {}; + t.Width = width; + t.Height = height; + t.MipLevels = 1; + t.ArraySize = 1; + t.SampleDesc.Count = 1; + t.Usage = D3D11_USAGE_STAGING; + t.Format = format; + t.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + dxgi::texture2d_t::pointer tex_p {}; + auto status = device->CreateTexture2D(&t, nullptr, &tex_p); + texture.reset(tex_p); + + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + + this->display = display; + this->device_ctx_p = device_ctx_p; + this->data = nullptr; + this->row_pitch = 0; + this->pixel_pitch = 4; + this->width = width; + this->height = height; + + return 0; + } + + std::shared_ptr display; + + texture2d_t texture; + D3D11_MAPPED_SUBRESOURCE info {}; + + + device_ctx_t::pointer device_ctx_p; }; struct cursor_t { @@ -247,10 +295,16 @@ void blend_cursor(const cursor_t &cursor, img_t &img) { } } -class display_t : public ::platf::display_t { +class display_t : public ::platf::display_t, public std::enable_shared_from_this { public: capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override { - auto img = (img_t *) img_base; + auto img = (img_t*)img_base; + if(img->display.get() != this) { + if(img->reset(width, height, format, device.get(), device_ctx.get(), shared_from_this())) { + return capture_e::error; + } + } + HRESULT status; DXGI_OUTDUPL_FRAME_INFO frame_info; @@ -296,15 +350,15 @@ public: } //Copy from GPU to CPU - device_ctx->CopyResource(texture.get(), src.get()); + device_ctx->CopyResource(img->texture.get(), src.get()); } - if(current_img.pData) { - device_ctx->Unmap(texture.get(), 0); - current_img.pData = nullptr; + if(img->info.pData) { + device_ctx->Unmap(img->texture.get(), 0); + img->info.pData = nullptr; } - status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, ¤t_img); + status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img->info); if (FAILED(status)) { BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']'; @@ -312,28 +366,23 @@ public: } } + /* const bool update_flag = frame_info.LastMouseUpdateTime.QuadPart || frame_info.LastPresentTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0; - + */ + const bool update_flag = frame_info.LastPresentTime.QuadPart != 0; if(!update_flag) { return capture_e::timeout; } - if(img->width != width || img->height != height) { - delete[] img->data; - img->data = new std::uint8_t[height * current_img.RowPitch]; + img->row_pitch = img->info.RowPitch; + img->data = (std::uint8_t*)img->info.pData; - img->width = width; - img->height = height; - img->row_pitch = current_img.RowPitch; - } - - std::copy_n((std::uint8_t*)current_img.pData, height * current_img.RowPitch, (std::uint8_t*)img->data); - if(cursor_visible && cursor.visible) { - blend_cursor(cursor, *img); + if(cursor_visible) { // && cursor.visible) { + //blend_cursor(cursor, *img); } return capture_e::ok; @@ -342,11 +391,9 @@ public: std::shared_ptr<::platf::img_t> alloc_img() override { auto img = std::make_shared(); - img->data = nullptr; - img->height = 0; - img->width = 0; - img->row_pitch = 0; - img->pixel_pitch = 4; + if(img->reset(width, height, format, device.get(), device_ctx.get(), shared_from_this())) { + return nullptr; + } return img; } @@ -368,8 +415,6 @@ public: FreeLibrary(user32); }); */ - current_img.pData = nullptr; // current_img is not yet mapped - dxgi::factory1_t::pointer factory_p {}; dxgi::adapter_t::pointer adapter_p {}; dxgi::output_t::pointer output_p {}; @@ -548,43 +593,9 @@ public: BOOST_LOG(debug) << "Source format ["sv << format_str[dup_desc.ModeDesc.Format] << ']'; - D3D11_TEXTURE2D_DESC t {}; - t.Width = width; - t.Height = height; - t.MipLevels = 1; - t.ArraySize = 1; - t.SampleDesc.Count = 1; - t.Usage = D3D11_USAGE_STAGING; - t.Format = format; - t.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - - dxgi::texture2d_t::pointer tex_p {}; - status = device->CreateTexture2D(&t, nullptr, &tex_p); - - texture.reset(tex_p); - - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - - // map the texture simply to get the pitch and stride - status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, ¤t_img); - if(FAILED(status)) { - BOOST_LOG(error) << "Error: Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - return 0; } - ~display_t() override { - if(current_img.pData) { - device_ctx->Unmap(texture.get(), 0); - current_img.pData = nullptr; - } - } - factory1_t factory; adapter_t adapter; output_t output; @@ -592,13 +603,11 @@ public: device_ctx_t device_ctx; duplication_t dup; cursor_t cursor; - texture2d_t texture; int width, height; DXGI_FORMAT format; D3D_FEATURE_LEVEL feature_level; - D3D11_MAPPED_SUBRESOURCE current_img; }; const char *format_str[] = { @@ -729,8 +738,8 @@ const char *format_str[] = { } namespace platf { -std::unique_ptr display() { - auto disp = std::make_unique(); +std::shared_ptr display() { + auto disp = std::make_shared(); if (disp->init()) { return nullptr; diff --git a/sunshine/round_robin.h b/sunshine/round_robin.h new file mode 100755 index 00000000..b3ae4475 --- /dev/null +++ b/sunshine/round_robin.h @@ -0,0 +1,141 @@ +#ifndef KITTY_UTIL_ITERATOR_H +#define KITTY_UTIL_ITERATOR_H + +#include + +namespace util { +template +class it_wrap_t : public std::iterator { +public: + typedef T iterator; + typedef typename std::iterator::value_type class_t; + + typedef class_t& reference; + typedef class_t* pointer; + + typedef std::ptrdiff_t diff_t; + + iterator operator += (diff_t step) { + while(step-- > 0) { + ++_this(); + } + + return _this(); + } + + iterator operator -= (diff_t step) { + while(step-- > 0) { + --_this(); + } + + return _this(); + } + + iterator operator +(diff_t step) { + iterator new_ = _this(); + + return new_ += step; + } + + iterator operator -(diff_t step) { + iterator new_ = _this(); + + return new_ -= step; + } + + diff_t operator -(iterator first) { + diff_t step = 0; + while(first != _this()) { + ++step; + ++first; + } + + return step; + } + + iterator operator++() { _this().inc(); return _this(); } + iterator operator--() { _this().dec(); return _this(); } + + iterator operator++(int) { + iterator new_ = _this(); + + ++_this(); + + return new_; + } + + iterator operator--(int) { + iterator new_ = _this(); + + --_this(); + + return new_; + } + + reference operator*() { return *_this().get(); } + const reference operator*() const { return *_this().get(); } + + pointer operator->() { return &*_this(); } + const pointer operator->() const { return &*_this(); } + + bool operator != (const iterator &other) const { + return !(_this() == other); + } + + bool operator < (const iterator &other) const { + return !(_this() >= other); + } + + bool operator >= (const iterator &other) const { + return _this() == other || _this() > other; + } + + bool operator <= (const iterator &other) const { + return _this() == other || _this() < other; + } + + bool operator == (const iterator &other) const { return _this().eq(other); }; + bool operator > (const iterator &other) const { return _this().gt(other); } +private: + + iterator &_this() { return *static_cast(this); } + const iterator &_this() const { return *static_cast(this); } +}; + +template +class round_robin_t : public it_wrap_t> { +public: + using iterator = It; + using pointer = V*; + + round_robin_t(iterator begin, iterator end) : _begin(begin), _end(end), _pos(begin) {} + + void inc() { + ++_pos; + + if(_pos == _end) { + _pos = _begin; + } + } + + bool eq(const round_robin_t &other) const { + return *_pos == *other._pos; + } + + pointer get() const { + return &*_pos; + } +private: + It _begin; + It _end; + + It _pos; +}; + +template +round_robin_t make_round_robin(It begin, It end) { + return round_robin_t(begin, end); +} +} + +#endif diff --git a/sunshine/stream.cpp b/sunshine/stream.cpp index 9f6092a4..c692d7c0 100644 --- a/sunshine/stream.cpp +++ b/sunshine/stream.cpp @@ -614,7 +614,7 @@ void videoBroadcastThread(safe::signal_t *shutdown_event, udp::socket &sock, vid frame_new = "\000\000\000\001("sv; } - assert(std::search(std::begin(payload), std::end(payload), std::begin(hevc_i_frame), std::end(hevc_i_frame)) == + assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) == std::end(payload)); payload_new = replace(payload, frame_old, frame_new); payload = {(char *) payload_new.data(), payload_new.size()}; diff --git a/sunshine/utility.h b/sunshine/utility.h index 0bebdd98..171b54b7 100644 --- a/sunshine/utility.h +++ b/sunshine/utility.h @@ -388,69 +388,6 @@ void c_free(T *p) { template using c_ptr = safe_ptr>; -template -class FakeContainer { - typedef T pointer; - - pointer _begin; - pointer _end; - -public: - FakeContainer(pointer begin, pointer end) : _begin(begin), _end(end) {} - - pointer begin() { return _begin; } - pointer end() { return _end; } - - const pointer begin() const { return _begin; } - const pointer end() const { return _end; } - - const pointer cbegin() const { return _begin; } - const pointer cend() const { return _end; } - - pointer data() { return begin(); } - const pointer data() const { return cbegin(); } - - std::size_t size() const { return std::distance(begin(), end()); } -}; - -template -FakeContainer toContainer(T begin, T end) { - return { begin, end }; -} - -template -FakeContainer toContainer(T begin, std::size_t end) { - return { begin, begin + end }; -} - -template -FakeContainer toContainer(T * const begin) { - T *end = begin; - - auto default_val = T(); - while(*end != default_val) { - ++end; - } - - return toContainer(begin, end); -} - -template -struct _init_helper; - -template class T, class H, class... Args> -struct _init_helper, H> { - using type = T; - - static type move(Args&&... args, H&&) { - return std::make_tuple(std::move(args)...); - } - - static type copy(const Args&... args, const H&) { - return std::make_tuple(args...); - } -}; - inline std::int64_t from_chars(const char *begin, const char *end) { std::int64_t res {}; std::int64_t mul = 1; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 1353b8e8..103eccc7 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -11,6 +11,7 @@ extern "C" { #include "platform/common.h" #include "thread_pool.h" +#include "round_robin.h" #include "config.h" #include "video.h" #include "main.h" @@ -138,40 +139,6 @@ struct capture_thread_ctx_t { return codec_t { ctx.get() }; } -int capture_display(platf::img_t *img, std::unique_ptr &disp) { - auto status = disp->snapshot(img, display_cursor); - switch (status) { - case platf::capture_e::reinit: { - // We try this twice, in case we still get an error on reinitialization - for(int x = 0; x < 2; ++x) { - disp.reset(); - disp = platf::display(); - - if(disp) { - break; - } - - std::this_thread::sleep_for(200ms); - } - - if(!disp) { - return -1; - } - - return 0; - } - case platf::capture_e::error: - return -1; - case platf::capture_e::timeout: - return 0; - case platf::capture_e::ok: - return 1; - default: - BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']'; - return -1; - } -} - void captureThread(std::shared_ptr> capture_ctx_queue) { std::vector capture_ctxs; @@ -190,6 +157,22 @@ void captureThread(std::shared_ptr> capture_ctx_que std::chrono::nanoseconds delay = 1s; auto disp = platf::display(); + if(!disp) { + return; + } + + std::vector> imgs(12); + auto round_robin = util::make_round_robin>(std::begin(imgs), std::end(imgs)); + + for(auto &img : imgs) { + img = disp->alloc_img(); + if(!img) { + BOOST_LOG(error) << "Couldn't initialize an image"sv; + return; + } + } + + auto next_frame = std::chrono::steady_clock::now(); while(capture_ctx_queue->running()) { while(capture_ctx_queue->peek()) { capture_ctxs.emplace_back(std::move(*capture_ctx_queue->pop())); @@ -197,13 +180,62 @@ void captureThread(std::shared_ptr> capture_ctx_que delay = std::min(delay, capture_ctxs.back().delay); } - std::shared_ptr img = disp->alloc_img(); - auto result = capture_display(img.get(), disp); - if(result < 0) { - return; + auto now = std::chrono::steady_clock::now(); + if(next_frame > now) { + std::this_thread::sleep_until(next_frame); } - if(!result) { - continue; + next_frame += delay; + + auto &img = *round_robin++; + auto status = disp->snapshot(img.get(), display_cursor); + switch (status) { + case platf::capture_e::reinit: { + // Some classes of images contain references to the display --> display won't delete unless img is deleted + for(auto &img : imgs) { + img.reset(); + } + + while(disp.use_count() > 1) { + std::this_thread::sleep_for(100ms); + } + + // We try this twice, in case we still get an error on reinitialization + for(int x = 0; x < 2; ++x) { + // Some classes of display cannot have multiple instances at once + disp.reset(); + disp = platf::display(); + + if(disp) { + break; + } + + std::this_thread::sleep_for(200ms); + } + + if(!disp) { + return; + } + + // Re-allocate images + for(auto &img : imgs) { + img = disp->alloc_img(); + if(!img) { + BOOST_LOG(error) << "Couldn't initialize an image"sv; + return; + } + } + + continue; + } + case platf::capture_e::error: + return; + case platf::capture_e::timeout: + continue; + case platf::capture_e::ok: + break; + default: + BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']'; + return; } KITTY_WHILE_LOOP(auto capture_ctx = std::begin(capture_ctxs), capture_ctx != std::end(capture_ctxs), { @@ -508,6 +540,20 @@ void capture( int framerate = config.framerate; auto images = std::make_shared(); + + // Temporary image to ensure something is send to Moonlight even if no frame has been captured yet. + int dummy_data = 0; + { + auto img = std::make_shared(); + img->row_pitch = 4; + img->height = 1; + img->width = 1; + img->pixel_pitch = 4; + img->data = (std::uint8_t*)&dummy_data; + + images->raise(std::move(img)); + } + // Keep a reference counter to ensure the capture thread only runs when other threads have a reference to the capture thread static auto capture_thread = safe::make_shared(start_capture, end_capture); auto ref = capture_thread.ref(); @@ -533,15 +579,6 @@ void capture( // Initiate scaling context with correct height and width sws_t sws; - // Temporary image to ensure something is send to Moonlight even if no frame has been captured yet. - int dummy_data = 0; - auto img = std::make_shared(); - img->row_pitch = 4; - img->height = 1; - img->width = 1; - img->pixel_pitch = 4; - img->data = (std::uint8_t*)&dummy_data; - auto next_frame = std::chrono::steady_clock::now(); while(true) { if(shutdown_event->peek() || !images->running()) { @@ -566,8 +603,29 @@ void capture( // When Moonlight request an IDR frame, send frames even if there is no new captured frame if(frame_nr > (key_frame_nr + config.framerate) || images->peek()) { - if(auto tmp_img = images->pop(delay)) { - img = std::move(tmp_img); + if(auto img = images->pop(delay)) { + if(software.system_memory) { + auto new_width = img->width; + auto new_height = img->height; + + if(img_width != new_width || img_height != new_height) { + img_width = new_width; + img_height = new_height; + + sws.reset( + sws_getContext( + img_width, img_height, AV_PIX_FMT_BGR0, + session->ctx->width, session->ctx->height, session->ctx->pix_fmt, + SWS_LANCZOS | SWS_ACCURATE_RND, + nullptr, nullptr, nullptr)); + + sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0, + sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1, + 0, 1 << 16, 1 << 16); + } + } + + software.img_to_frame(sws, *img, session->frame); } else if(images->running()) { continue; @@ -577,29 +635,6 @@ void capture( } } - if(software.system_memory) { - auto new_width = img->width; - auto new_height = img->height; - - if(img_width != new_width || img_height != new_height) { - img_width = new_width; - img_height = new_height; - - sws.reset( - sws_getContext( - img_width, img_height, AV_PIX_FMT_BGR0, - session->ctx->width, session->ctx->height, session->ctx->pix_fmt, - SWS_LANCZOS | SWS_ACCURATE_RND, - nullptr, nullptr, nullptr)); - - sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0, - sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1, - 0, 1 << 16, 1 << 16); - } - } - - software.img_to_frame(sws, *img, session->frame); - encode(frame_nr++, session->ctx, session->frame, packets, channel_data); session->frame->pict_type = AV_PICTURE_TYPE_NONE;