Ensure the background color is black
This commit is contained in:
@@ -325,7 +325,7 @@ public:
|
|||||||
cuda_ctx = ((AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx)->cuda_ctx;
|
cuda_ctx = ((AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx)->cuda_ctx;
|
||||||
|
|
||||||
ctx_t ctx { cuda_ctx };
|
ctx_t ctx { cuda_ctx };
|
||||||
sws = sws_t::make(width * 4, height, frame->width, frame->height);
|
sws = sws_t::make(width, height, frame->width, frame->height, width * 4);
|
||||||
|
|
||||||
if(!sws) {
|
if(!sws) {
|
||||||
return -1;
|
return -1;
|
||||||
@@ -343,6 +343,28 @@ public:
|
|||||||
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
|
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
|
||||||
ctx_t ctx { cuda_ctx };
|
ctx_t ctx { cuda_ctx };
|
||||||
sws->set_colorspace(colorspace, color_range);
|
sws->set_colorspace(colorspace, color_range);
|
||||||
|
|
||||||
|
|
||||||
|
// The default green color is ugly.
|
||||||
|
// Update the background color
|
||||||
|
platf::img_t img;
|
||||||
|
img.width = frame->width;
|
||||||
|
img.height = frame->height;
|
||||||
|
img.pixel_pitch = 4;
|
||||||
|
img.row_pitch = img.width * img.pixel_pitch;
|
||||||
|
|
||||||
|
std::vector<std::uint8_t> image_data;
|
||||||
|
image_data.resize(img.row_pitch * img.height);
|
||||||
|
|
||||||
|
img.data = image_data.data();
|
||||||
|
|
||||||
|
if(sws->load_ram(img)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sws->convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], {
|
||||||
|
frame->width, frame->height, 0, 0
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
frame_t hwframe;
|
frame_t hwframe;
|
||||||
|
|||||||
@@ -133,22 +133,25 @@ inline __device__ float calcY(float3 pixel, const video::color_t *const color_ma
|
|||||||
__global__ void RGBA_to_NV12(
|
__global__ void RGBA_to_NV12(
|
||||||
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
|
cudaTextureObject_t srcImage, std::uint8_t *dstY, std::uint8_t *dstUV,
|
||||||
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
|
std::uint32_t dstPitchY, std::uint32_t dstPitchUV,
|
||||||
std::uint32_t width, std::uint32_t height, const video::color_t *const color_matrix) {
|
const viewport_t viewport, const video::color_t *const color_matrix) {
|
||||||
|
|
||||||
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
|
int idX = (threadIdx.x + blockDim.x * blockIdx.x) * 2;
|
||||||
int idY = (threadIdx.y + blockDim.y * blockIdx.y);
|
int idY = (threadIdx.y + blockDim.y * blockIdx.y);
|
||||||
|
|
||||||
if(idX >= width) return;
|
if(idX >= viewport.width) return;
|
||||||
if(idY >= height) return;
|
if(idY >= viewport.height) return;
|
||||||
|
|
||||||
|
float x = (float)idX / (float)viewport.width / 4;
|
||||||
|
float y = (float)idY / (float)viewport.height;
|
||||||
|
|
||||||
|
idX += viewport.offsetX;
|
||||||
|
idY += viewport.offsetY;
|
||||||
|
|
||||||
dstY = dstY + idX + idY * dstPitchY;
|
dstY = dstY + idX + idY * dstPitchY;
|
||||||
dstUV = dstUV + idX + (idY / 2 * dstPitchUV);
|
dstUV = dstUV + idX + (idY / 2 * dstPitchUV);
|
||||||
|
|
||||||
float x = (float)idX / (float)width / 4;
|
|
||||||
float y = (float)idY / (float)height;
|
|
||||||
|
|
||||||
float3 rgb_l = bgra_to_rgb(tex2D<uchar4>(srcImage, x, y));
|
float3 rgb_l = bgra_to_rgb(tex2D<uchar4>(srcImage, x, y));
|
||||||
float3 rgb_r = bgra_to_rgb(tex2D<uchar4>(srcImage, x + 0.25f / width, y + 1.0f / height));
|
float3 rgb_r = bgra_to_rgb(tex2D<uchar4>(srcImage, x + 0.25f / viewport.width, y + 1.0f / viewport.height));
|
||||||
|
|
||||||
float2 uv = calcUV((rgb_l + rgb_r) * 0.5f, color_matrix);
|
float2 uv = calcUV((rgb_l + rgb_r) * 0.5f, color_matrix);
|
||||||
|
|
||||||
@@ -158,11 +161,11 @@ __global__ void RGBA_to_NV12(
|
|||||||
dstY[1] = calcY(rgb_r, color_matrix);
|
dstY[1] = calcY(rgb_r, color_matrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int threadsPerBlock, ptr_t &&color_matrix)
|
sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix)
|
||||||
: array {}, texture { INVALID_TEXTURE }, width { out_width }, height { out_height }, threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } {
|
: array {}, texture { INVALID_TEXTURE }, threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } {
|
||||||
auto format = cudaCreateChannelDesc<uchar4>();
|
auto format = cudaCreateChannelDesc<uchar4>();
|
||||||
|
|
||||||
CU_CHECK_VOID(cudaMallocArray(&array, &format, in_width, in_height, cudaArrayDefault), "Couldn't allocate cuda array");
|
CU_CHECK_VOID(cudaMallocArray(&array, &format, pitch, in_height, cudaArrayDefault), "Couldn't allocate cuda array");
|
||||||
|
|
||||||
cudaResourceDesc res {};
|
cudaResourceDesc res {};
|
||||||
res.resType = cudaResourceTypeArray;
|
res.resType = cudaResourceTypeArray;
|
||||||
@@ -177,6 +180,22 @@ sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int thr
|
|||||||
std::fill_n(std::begin(desc.addressMode), 2, cudaAddressModeClamp);
|
std::fill_n(std::begin(desc.addressMode), 2, cudaAddressModeClamp);
|
||||||
|
|
||||||
CU_CHECK_VOID(cudaCreateTextureObject(&texture, &res, &desc, nullptr), "Couldn't create cuda texture");
|
CU_CHECK_VOID(cudaCreateTextureObject(&texture, &res, &desc, nullptr), "Couldn't create cuda texture");
|
||||||
|
|
||||||
|
|
||||||
|
// Ensure aspect ratio is maintained
|
||||||
|
auto scalar = std::fminf(out_width / (float)in_width, out_height / (float)in_height);
|
||||||
|
auto out_width_f = in_width * scalar;
|
||||||
|
auto out_height_f = in_height * scalar;
|
||||||
|
|
||||||
|
// result is always positive
|
||||||
|
auto offsetX_f = (out_width - out_width_f) / 2;
|
||||||
|
auto offsetY_f = (out_height - out_height_f) / 2;
|
||||||
|
|
||||||
|
viewport.width = out_width_f;
|
||||||
|
viewport.height = out_height_f;
|
||||||
|
|
||||||
|
viewport.offsetX = offsetX_f;
|
||||||
|
viewport.offsetY = offsetY_f;
|
||||||
}
|
}
|
||||||
|
|
||||||
sws_t::~sws_t() {
|
sws_t::~sws_t() {
|
||||||
@@ -193,7 +212,7 @@ sws_t::~sws_t() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<sws_t> sws_t::make(int in_width, int in_height, int out_width, int out_height) {
|
std::unique_ptr<sws_t> sws_t::make(int in_width, int in_height, int out_width, int out_height, int pitch) {
|
||||||
cudaDeviceProp props;
|
cudaDeviceProp props;
|
||||||
int device;
|
int device;
|
||||||
CU_CHECK_PTR(cudaGetDevice(&device), "Couldn't get cuda device");
|
CU_CHECK_PTR(cudaGetDevice(&device), "Couldn't get cuda device");
|
||||||
@@ -204,7 +223,7 @@ std::unique_ptr<sws_t> sws_t::make(int in_width, int in_height, int out_width, i
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto sws = std::make_unique<sws_t>(in_width, in_height, out_width, out_height, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr));
|
auto sws = std::make_unique<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr));
|
||||||
|
|
||||||
if(sws->texture == INVALID_TEXTURE) {
|
if(sws->texture == INVALID_TEXTURE) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@@ -214,13 +233,17 @@ std::unique_ptr<sws_t> sws_t::make(int in_width, int in_height, int out_width, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV) {
|
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV) {
|
||||||
int threadsX = width / 2;
|
return convert(Y, UV, pitchY, pitchUV, viewport);
|
||||||
int threadsY = height;
|
}
|
||||||
|
|
||||||
|
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport) {
|
||||||
|
int threadsX = viewport.width / 2;
|
||||||
|
int threadsY = viewport.height;
|
||||||
|
|
||||||
dim3 block(threadsPerBlock, threadsPerBlock);
|
dim3 block(threadsPerBlock, threadsPerBlock);
|
||||||
dim3 grid(div_align(threadsX, threadsPerBlock), div_align(threadsY, threadsPerBlock));
|
dim3 grid(div_align(threadsX, threadsPerBlock), div_align(threadsY, threadsPerBlock));
|
||||||
|
|
||||||
RGBA_to_NV12<<<block, grid>>>(texture, Y, UV, pitchY, pitchUV, width, height, (video::color_t*)color_matrix.get());
|
RGBA_to_NV12<<<block, grid>>>(texture, Y, UV, pitchY, pitchUV, viewport, (video::color_t*)color_matrix.get());
|
||||||
|
|
||||||
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
|
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,21 +36,27 @@ public:
|
|||||||
|
|
||||||
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
|
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
|
||||||
|
|
||||||
|
struct viewport_t {
|
||||||
|
int width, height;
|
||||||
|
int offsetX, offsetY;
|
||||||
|
};
|
||||||
|
|
||||||
class sws_t {
|
class sws_t {
|
||||||
public:
|
public:
|
||||||
~sws_t();
|
~sws_t();
|
||||||
sws_t(int in_width, int in_height, int out_width, int out_height, int threadsPerBlock, ptr_t &&color_matrix);
|
sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* in_width, out_width -- The width and height of the captured image in bytes
|
* in_width, in_height -- The width and height of the captured image in pixels
|
||||||
* out_width, out_height -- the width and height of the NV12 image in pixels
|
* out_width, out_height -- the width and height of the NV12 image in pixels
|
||||||
*
|
*
|
||||||
* cuda_device -- pointer to the cuda device
|
* pitch -- The size of a single row of pixels in bytes
|
||||||
*/
|
*/
|
||||||
static std::unique_ptr<sws_t> make(int in_width, int in_height, int out_width, int out_height);
|
static std::unique_ptr<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
|
||||||
|
|
||||||
// Converts loaded image into a CUDevicePtr
|
// Converts loaded image into a CUDevicePtr
|
||||||
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV);
|
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV);
|
||||||
|
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport);
|
||||||
|
|
||||||
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
|
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
|
||||||
|
|
||||||
@@ -60,9 +66,9 @@ public:
|
|||||||
cudaArray_t array;
|
cudaArray_t array;
|
||||||
cudaTextureObject_t texture;
|
cudaTextureObject_t texture;
|
||||||
|
|
||||||
int width, height;
|
|
||||||
|
|
||||||
int threadsPerBlock;
|
int threadsPerBlock;
|
||||||
|
|
||||||
|
viewport_t viewport;
|
||||||
};
|
};
|
||||||
} // namespace cuda
|
} // namespace cuda
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user