Add Cuda kernel, to perform denoising
This commit is contained in:
@@ -3,6 +3,44 @@
|
|||||||
#include <device_launch_parameters.h>
|
#include <device_launch_parameters.h>
|
||||||
#include <opencv2/opencv.hpp>
|
#include <opencv2/opencv.hpp>
|
||||||
|
|
||||||
|
#define BLOCK_SIZE_X 16
|
||||||
|
#define BLOCK_SIZE_Y 16
|
||||||
|
|
||||||
|
__global__ void tvDenoisingKernel(float* image, int width, int height, float lambda, int maxIterations)
|
||||||
|
{
|
||||||
|
// Calculate the global thread index
|
||||||
|
int col = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int row = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
int index = row * width + col;
|
||||||
|
|
||||||
|
// Declare shared memory arrays
|
||||||
|
__shared__ float gradientX[BLOCK_SIZE_X][BLOCK_SIZE_Y];
|
||||||
|
__shared__ float gradientY[BLOCK_SIZE_X][BLOCK_SIZE_Y];
|
||||||
|
__shared__ float updatedImage[BLOCK_SIZE_X][BLOCK_SIZE_Y];
|
||||||
|
|
||||||
|
// Perform TV denoising iteratively
|
||||||
|
for (int iteration = 0; iteration < maxIterations; ++iteration)
|
||||||
|
{
|
||||||
|
// Calculate the gradients using central differences
|
||||||
|
gradientX[threadIdx.x][threadIdx.y] = image[index + 1] - image[index - 1];
|
||||||
|
gradientY[threadIdx.x][threadIdx.y] = image[index + width] - image[index - width];
|
||||||
|
|
||||||
|
// Synchronize threads to ensure all gradient calculations are complete
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
// Apply TV denoising update rule
|
||||||
|
updatedImage[threadIdx.x][threadIdx.y] = image[index] + lambda * (
|
||||||
|
gradientX[threadIdx.x][threadIdx.y] - gradientX[threadIdx.x - 1][threadIdx.y] +
|
||||||
|
gradientY[threadIdx.x][threadIdx.y] - gradientY[threadIdx.x][threadIdx.y - 1]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Update the global image array with the updated pixel values
|
||||||
|
image[index] = updatedImage[threadIdx.x][threadIdx.y];
|
||||||
|
|
||||||
|
// Synchronize threads to ensure all image updates are complete
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations)
|
extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations)
|
||||||
{
|
{
|
||||||
@@ -25,7 +63,15 @@ extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations)
|
|||||||
// Copy the image data from host to device
|
// Copy the image data from host to device
|
||||||
cudaMemcpy(d_image, floatImage.ptr<float>(0), width * height * sizeof(float), cudaMemcpyHostToDevice);
|
cudaMemcpy(d_image, floatImage.ptr<float>(0), width * height * sizeof(float), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Invoke the TV denoising kernel
|
||||||
|
tvDenoisingKernel<<<gridSize, blockSize>>>(d_image, width, height, lambda, maxIterations);
|
||||||
|
|
||||||
|
// Copy the denoised image data back from device to host
|
||||||
|
cudaMemcpy(floatImage.ptr<float>(0), d_image, width * height * sizeof(float), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
// Convert the denoised image back to the original data type
|
||||||
|
floatImage.convertTo(image, image.type());
|
||||||
|
|
||||||
// Free the GPU memory
|
// Free the GPU memory
|
||||||
cudaFree(d_image);
|
cudaFree(d_image);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user