From 4d8dd20b06de1c969b01a3641592f68eea1f37df Mon Sep 17 00:00:00 2001 From: Vargha Csongor Date: Tue, 27 Jun 2023 20:38:59 +0200 Subject: [PATCH] check if threadIdx.x > 0 && threadIdx.y > 0 --- src/tv_denoising.cu | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/tv_denoising.cu b/src/tv_denoising.cu index d5756c3..53fa204 100644 --- a/src/tv_denoising.cu +++ b/src/tv_denoising.cu @@ -21,7 +21,7 @@ __global__ void tvDenoisingKernel(float* image, int width, int height, float lam // Perform TV denoising iteratively for (int iteration = 0; iteration < maxIterations; ++iteration) { - // Calculate the gradients using central differences + // Calculate the gradients using central differences gradientX[threadIdx.x][threadIdx.y] = image[index + 1] - image[index - 1]; gradientY[threadIdx.x][threadIdx.y] = image[index + width] - image[index - width]; @@ -29,10 +29,12 @@ __global__ void tvDenoisingKernel(float* image, int width, int height, float lam __syncthreads(); // Apply TV denoising update rule - updatedImage[threadIdx.x][threadIdx.y] = image[index] + lambda * ( - gradientX[threadIdx.x][threadIdx.y] - gradientX[threadIdx.x - 1][threadIdx.y] + - gradientY[threadIdx.x][threadIdx.y] - gradientY[threadIdx.x][threadIdx.y - 1] - ); + if (threadIdx.x > 0 && threadIdx.y > 0) { + updatedImage[threadIdx.x][threadIdx.y] = image[index] + lambda * ( + gradientX[threadIdx.x][threadIdx.y] - gradientX[threadIdx.x - 1][threadIdx.y] + + gradientY[threadIdx.x][threadIdx.y] - gradientY[threadIdx.x][threadIdx.y - 1] + ); + } // Update the global image array with the updated pixel values image[index] = updatedImage[threadIdx.x][threadIdx.y]; @@ -65,6 +67,12 @@ extern "C" void TVDenoising(cv::Mat& image, float lambda, int maxIterations) // Invoke the TV denoising kernel tvDenoisingKernel<<>>(d_image, width, height, lambda, maxIterations); + // Check for errors during kernel launch + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("Error: %s\n", cudaGetErrorString(err)); + } // Copy the denoised image data back from device to host cudaMemcpy(floatImage.ptr(0), d_image, width * height * sizeof(float), cudaMemcpyDeviceToHost);