From 20d887293037106fc22b23fcdde87d755c54ac4a Mon Sep 17 00:00:00 2001 From: Vargha Csongor Date: Sun, 25 Jun 2023 14:34:54 +0200 Subject: [PATCH] Add Cuda kernel, to perform denoising --- src/tv_denoising.cu | 48 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/tv_denoising.cu b/src/tv_denoising.cu index b312b45..d5756c3 100644 --- a/src/tv_denoising.cu +++ b/src/tv_denoising.cu @@ -3,6 +3,44 @@ #include #include +#define BLOCK_SIZE_X 16 +#define BLOCK_SIZE_Y 16 + +__global__ void tvDenoisingKernel(float* image, int width, int height, float lambda, int maxIterations) +{ + // Calculate the global thread index + int col = blockIdx.x * blockDim.x + threadIdx.x; + int row = blockIdx.y * blockDim.y + threadIdx.y; + int index = row * width + col; + + // Declare shared memory arrays + __shared__ float gradientX[BLOCK_SIZE_X][BLOCK_SIZE_Y]; + __shared__ float gradientY[BLOCK_SIZE_X][BLOCK_SIZE_Y]; + __shared__ float updatedImage[BLOCK_SIZE_X][BLOCK_SIZE_Y]; + + // Perform TV denoising iteratively + for (int iteration = 0; iteration < maxIterations; ++iteration) + { + // Calculate the gradients using central differences + gradientX[threadIdx.x][threadIdx.y] = image[index + 1] - image[index - 1]; + gradientY[threadIdx.x][threadIdx.y] = image[index + width] - image[index - width]; + + // Synchronize threads to ensure all gradient calculations are complete + __syncthreads(); + + // Apply TV denoising update rule + updatedImage[threadIdx.x][threadIdx.y] = image[index] + lambda * ( + gradientX[threadIdx.x][threadIdx.y] - gradientX[threadIdx.x - 1][threadIdx.y] + + gradientY[threadIdx.x][threadIdx.y] - gradientY[threadIdx.x][threadIdx.y - 1] + ); + + // Update the global image array with the updated pixel values + image[index] = updatedImage[threadIdx.x][threadIdx.y]; + + // Synchronize threads to ensure all image updates are complete + __syncthreads(); + } +} extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations) { @@ -25,7 +63,15 @@ extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations) // Copy the image data from host to device cudaMemcpy(d_image, floatImage.ptr(0), width * height * sizeof(float), cudaMemcpyHostToDevice); - + // Invoke the TV denoising kernel + tvDenoisingKernel<<>>(d_image, width, height, lambda, maxIterations); + + // Copy the denoised image data back from device to host + cudaMemcpy(floatImage.ptr(0), d_image, width * height * sizeof(float), cudaMemcpyDeviceToHost); + + // Convert the denoised image back to the original data type + floatImage.convertTo(image, image.type()); + // Free the GPU memory cudaFree(d_image); }