英文:
Cuda NPP Filter Median functions
问题
I've translated the code portion you provided, excluding the parts related to code comments and output messages:
#define gpuErrchk(ans) gpuAssert((ans), __FILE__, __LINE__)
inline int gpuAssert(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
return 1;
}
return 0;
}
int gpuMedfilt2(const float* pSrc, float* pDst, int M, int N, int winSize)
{
NppStatus status;
Npp32f* d_in, *d_out;
Npp32s nSrcStep = N * sizeof(float), nDstStep = N * sizeof(float);
NppiSize oSizeROI = {N, M};
NppiSize oMaskSize = {winSize, winSize};
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
Npp8u* pBuffer;
Npp32u pBufferSize;
size_t d_in_pitch, d_out_pitch;
if (gpuErrchk(cudaMallocPitch((void**)&d_in, &d_in_pitch, N * sizeof(float), M)))
return 0;
if (gpuErrchk(cudaMallocPitch((void**)&d_out, &d_out_pitch, N * sizeof(float), M)))
{
cudaFree((void*)d_in);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)d_in, d_in_pitch, (const void*)pSrc, nSrcStep, N * sizeof(float), M, cudaMemcpyHostToDevice)))
{
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedianGetBufferSize_32f_C1R(oSizeROI, oMaskSize, &pBufferSize)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to calculate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMalloc((void**)&pBuffer, pBufferSize)))
{
fprintf(stderr, "NPP Error: Failed to allocate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedian_32f_C1R(d_in, d_in_pitch, d_out, d_out_pitch, oSizeROI, oMaskSize, oAnchor, pBuffer)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to call nppiFilterMedian_32f_C1R function\n");
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)pDst, nDstStep, (const void*)d_out, d_out_pitch, sizeof(float) * N, M, cudaMemcpyDeviceToHost)))
{
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
return 1;
}
Please note that this translation preserves the C/C++ code structure and does not include code comments or error messages.
英文:
Was wondering if any one more versed in how to call this NPP cuda function could tell me where the mistake is occurring?
#define gpuErrchk(ans) gpuAssert((ans), __FILE__, __LINE__)
inline int gpuAssert(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
return 1;
}
return 0;
}
int gpuMedfilt2(const float* pSrc, float* pDst, int M, int N, int winSize)
{
NppStatus status;
Npp32f* d_in, *d_out;
Npp32s nSrcStep = N * sizeof(float), nDstStep = N * sizeof(float);
NppiSize oSizeROI = {N, M};
NppiSize oMaskSize = {winSize, winSize};
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
Npp8u* pBuffer;
Npp32u pBufferSize;
size_t d_in_pitch, d_out_pitch;
if (gpuErrchk(cudaMallocPitch((void**)&d_in, &d_in_pitch, N * sizeof(float), M)))
return 0;
if (gpuErrchk(cudaMallocPitch((void**)&d_out, &d_out_pitch, N * sizeof(float), M)))
{
cudaFree((void*)d_in);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)d_in, d_in_pitch, (const void*)pSrc, nSrcStep, N * sizeof(float), M, cudaMemcpyHostToDevice)))
{
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedianGetBufferSize_32f_C1R(oSizeROI, oMaskSize, &pBufferSize)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to calculate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMalloc((void**)&pBuffer, pBufferSize)))
{
fprintf(stderr, "NPP Error: Failed to allocate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedian_32f_C1R(d_in, d_in_pitch, d_out, d_out_pitch, oSizeROI, oMaskSize, oAnchor, pBuffer)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to call nppiFilterMedian_32f_C1R function\n");
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)pDst, nDstStep, (const void*)d_out, d_out_pitch, sizeof(float) * N, M, cudaMemcpyDeviceToHost)))
{
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
return 1;
}
compute-sanitizer shows many errors.
My main looks like this,
float* in = malloc(sizeof(float) * M * N);
float* out = malloc(sizeof(float) * M * N);
gpuMedfilt2(in, out, M, N, 5);
Thank you for any insight.
I feel like something is wrong with the cudaMallocPitch and the cudaMemcpy2D. I am not getting the memory sizes right?
Here is output from compute-sanitizer, this occurs around ~100+ times.
========= Invalid __global__ read of size 4 bytes
========= at 0x2b8 in void FilterMedianKernelSortingNetworkShared::RunKernel5x5<float, (int)1, (int)1, (int)25>(Pixel<T1, T2> *, int, NppiSize, NppiSize, const Pixel<T1, T2> *, int, int)
========= by thread (1,0,0) in block (0,2,0)
========= Address 0x701d1fffc is out of bounds
========= and is 4 bytes before the nearest allocation at 0x701d20000 of size 25,600 bytes
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x30b442]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x393adb]
========= in /home/rctodd/cuda11.7/lib/libnppif.so.11
========= Host Frame: [0x3ef278]
========= in /home/rctodd/cuda11.7/lib/libnppif.so.11
========= Host Frame: [0x140046]
========= in /home/rctodd/cuda11.7/lib/libnppif.so.11
========= Host Frame: [0x1401cb]
========= in /home/rctodd/cuda11.7/lib/libnppif.so.11
========= Host Frame:nppiFilterMedian_32f_C1R [0x12266f]
========= in /home/rctodd/cuda11.7/lib/libnppif.so.11
========= Host Frame:gpuMedfilt2 [0x1601]
========= in /home/rctodd/code/cuda/cuMedfilt2/libgpuMedfilt2.so
========= Host Frame:main [0x127e]
========= in /home/rctodd/code/cuda/cuMedfilt2/./app
========= Host Frame:../csu/libc-start.c:342:__libc_start_main [0x24083]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0x110e]
========= in /home/rctodd/code/cuda/cuMedfilt2/./app
=========
ldd of my shared library I compile against
ldd libgpuMedfilt2.so
linux-vdso.so.1 (0x00007fff6533d000)
libcudart.so.11.0 => /home/rctodd/cuda11.7/lib/libcudart.so.11.0 (0x00007f1bb2911000)
libnppif.so.11 => /home/rctodd/cuda11.7/lib/libnppif.so.11 (0x00007f1badfbc000)
libstdc++.so.6 => /lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007f1baddc8000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f1badbd6000)
/lib64/ld-linux-x86-64.so.2 (0x00007f1bb2bbd000)
libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007f1badbd0000)
libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007f1badbad000)
librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x00007f1badba1000)
libnppc.so.11 => /home/rctodd/cuda11.7/lib/libnppc.so.11 (0x00007f1bad813000)
libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f1bad6c4000)
libgcc_s.so.1 => /home/rctodd/cuda11.7/lib/libgcc_s.so.1 (0x00007f1bad6ab000)
Here is my compile commands
# Shared library
/./home/rctodd/cuda11.7/bin/nvcc -o libgpuMedfilt2.so -shared gpuMedfilt2.cu --compiler-options '-fPIC' -Xlinker -L/home/rctodd/cuda11.7/lib -Xlinker -rpath=/home/rctodd/cuda11.7/lib -lcudart -lnppif -arch=sm_50
# Application
gcc -o app main.c -L$(pwd) -Wl,-rpath=$(pwd) -lgpuMedfilt2
答案1
得分: 1
以下是您提供的代码部分的翻译:
"NPP"提供的“普通”滤波功能期望掩码/滤波核的任何放置都会落在图像中正确定义的像素上。这意味着您不能以这种方式对输入图像进行边缘到边缘的滤波。您必须留下一个未经滤波的边界,其大小将取决于您的掩码/滤波核维度。([某些](https://stackoverflow.com/questions/76323245/nppifiltergaussborder-8u-c1r-osrcsize-and-osrcoffset-parameters) NPP滤波函数提供了一个`Boundary`变体,它将对边界像素进行“自动”处理,即计算所需但位于定义图像之外的像素,但[中值滤波不是其中之一](https://docs.nvidia.com/cuda/npp/group__image__rank__filters.html)。)
您的代码违反了这一期望,因此`compute-sanitizer`报告了非法的越界访问。
解决这一期望的一种典型方法是将滤波器限制在“适合”原始图像的区域,留下足够的已定义像素边界区域,以使滤波器在滤波器区域内始终选择定义像素(来自原始图像)。
滤波核锚定像素的选择将影响这一点,但您选择了滤波核中心的“典型”锚点。
因此,在您的情况下,我们可以滤波一个“中心区域”,在原始图像的顶部、底部、左侧和右侧留下2个像素未经滤波,从而使水平和垂直的滤波图像比原始尺寸少4个像素。
这里有一个实际示例。中值滤波有几个有趣之处。其中之一是在仍然为具有某些特性的“噪音”提供“低通”滤波效果的同时,保持图像边缘“不变”。以下示例演示了这一点:
$ cat t19.cu
#include <npp.h>
#include <nppi.h>
#include <cstdio>
#include <iostream>
#define gpuErrchk(ans) gpuAssert((ans), __FILE__, __LINE__)
inline int gpuAssert(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
return 1;
}
return 0;
}
int gpuMedfilt2(const float* pSrc, float* pDst, int ih, int iw, int winSize)
{
NppStatus status;
Npp32f* d_in, *d_out;
Npp32s nSrcStep = iw * sizeof(float), nDstStep = iw * sizeof(float);
NppiSize oMaskSize = {winSize, winSize};
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
NppiSize oSizeROI = {iw-2*oAnchor.x, ih-2*oAnchor.y};
Npp8u* pBuffer;
Npp32u pBufferSize;
size_t d_in_pitch, d_out_pitch;
if (gpuErrchk(cudaMallocPitch((void**)&d_in, &d_in_pitch, iw * sizeof(float), ih)))
return 0;
if (gpuErrchk(cudaMallocPitch((void**)&d_out, &d_out_pitch, iw * sizeof(float), ih)))
{
cudaFree((void*)d_in);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)(d_in), d_in_pitch, (const void*)pSrc, nSrcStep, iw * sizeof(float), ih, cudaMemcpyHostToDevice)))
{
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedianGetBufferSize_32f_C1R(oSizeROI, oMaskSize, &pBufferSize)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to calculate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMalloc((void**)&pBuffer, pBufferSize)))
{
fprintf(stderr, "NPP Error: Failed to allocate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
cudaMemset(d_out, 0, ih*d_out_pitch); // filter will not touch every output pixel
if ((status = nppiFilterMedian_32f_C1R((float *)((unsigned char *)d_in+oAnchor.y*d_in_pitch)+oAnchor.x, d_in_pitch, (float *)((unsigned char *)d_out+oAnchor.y*d_out_pitch)+oAnchor.x, d_out_pitch, oSizeROI, oMaskSize, oAnchor, pBuffer)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to call nppiFilterMedian_32f_C1R function\n");
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)pDst, nDstStep, (const void*)(d_out), d_out_pitch, sizeof(float) * iw, ih, cudaMemcpyDeviceToHost)))
{
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
return (int)status;
}
int main(){
const int sz = 36;
const int iw = sz;
const int ih = sz;
const int ms = 5;
float* in = (float *)malloc(sizeof(float) * ih * iw);
float* out = (float *)malloc(sizeof(float) * ih * iw);
for (int i = 0; i < sz; i++)
for (int j = 0; j < sz; j++) {
<details>
<summary>英文:</summary>
The "ordinary" filtering functions provided by NPP expect that any placement of the mask/filter kernel will land on properly defined pixels in the image. The ramification of this is that you cannot filter an input image edge-to-edge this way. You must leave an unfiltered boundary, the size of which will depend on your mask/filter kernel dimensions. ([Some](https://stackoverflow.com/questions/76323245/nppifiltergaussborder-8u-c1r-osrcsize-and-osrcoffset-parameters) NPP filter functions provide a `Boundary` variant which will have "automatic" handling of boundary pixels, i.e. pixels needed for calculation but which fall outside of the defined image, but [median filter is not one of those](https://docs.nvidia.com/cuda/npp/group__image__rank__filters.html). )
Your code violates this expectation, so its not surprising that `compute-sanitizer` reports illegal, out-of-bounds accesses.
A typical method to address this expectation is to restrict the filter to a region that "fits within" the original image, leaving enough border area of defined pixels so that the placement of the filter within the filter region always selects defined pixels (from the original image) within the filter kernel area.
The choice of filter kernel anchor pixel will affect this, but you have chosen a "typical" anchor at the center of the filter kernel.
Therefore, in your case we can filter a "central region", leaving a boundary of 2 pixels unfiltered, at the top, bottom, left, and right of the original image, resulting in a filtered image that is 4 pixels less than the original dimensions for horizontal and vertical
Here is a worked example. The median filter is interesting for several reasons. One of its capabilities is to leave image edges "intact" while still offering something like a "low-pass" filter effect for "noise" having certain properties. The following example demonstrates that:
$ cat t19.cu
#include <npp.h>
#include <nppi.h>
#include <cstdio>
#include <iostream>
#define gpuErrchk(ans) gpuAssert((ans), __FILE__, __LINE__)
inline int gpuAssert(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
return 1;
}
return 0;
}
int gpuMedfilt2(const float* pSrc, float* pDst, int ih, int iw, int winSize)
{
NppStatus status;
Npp32f* d_in, *d_out;
Npp32s nSrcStep = iw * sizeof(float), nDstStep = iw * sizeof(float);
NppiSize oMaskSize = {winSize, winSize};
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
NppiSize oSizeROI = {iw-2*oAnchor.x, ih-2*oAnchor.y};
Npp8u* pBuffer;
Npp32u pBufferSize;
size_t d_in_pitch, d_out_pitch;
if (gpuErrchk(cudaMallocPitch((void**)&d_in, &d_in_pitch, iw * sizeof(float), ih)))
return 0;
if (gpuErrchk(cudaMallocPitch((void**)&d_out, &d_out_pitch, iw * sizeof(float), ih)))
{
cudaFree((void*)d_in);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)(d_in), d_in_pitch, (const void*)pSrc, nSrcStep, iw * sizeof(float), ih, cudaMemcpyHostToDevice)))
{
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if ((status = nppiFilterMedianGetBufferSize_32f_C1R(oSizeROI, oMaskSize, &pBufferSize)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to calculate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMalloc((void**)&pBuffer, pBufferSize)))
{
fprintf(stderr, "NPP Error: Failed to allocate buffer space for median filter operation\n");
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
cudaMemset(d_out, 0, ih*d_out_pitch); // filter will not touch every output pixel
if ((status = nppiFilterMedian_32f_C1R((float *)((unsigned char *)d_in+oAnchor.y*d_in_pitch)+oAnchor.x, d_in_pitch, (float *)((unsigned char *)d_out+oAnchor.y*d_out_pitch)+oAnchor.x, d_out_pitch, oSizeROI, oMaskSize, oAnchor, pBuffer)) != NPP_SUCCESS)
{
fprintf(stderr, "NPP Error: Failed to call nppiFilterMedian_32f_C1R function\n");
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
if (gpuErrchk(cudaMemcpy2D((void*)pDst, nDstStep, (const void*)(d_out), d_out_pitch, sizeof(float) * iw, ih, cudaMemcpyDeviceToHost)))
{
cudaFree((void*)pBuffer);
cudaFree((void*)d_in);
cudaFree((void*)d_out);
return 0;
}
return (int)status;
}
int main(){
const int sz = 36;
const int iw = sz;
const int ih = sz;
const int ms = 5;
float* in = (float *)malloc(sizeof(float) * ih * iw);
float* out = (float *)malloc(sizeof(float) * ih * iw);
for (int i = 0; i < sz; i++)
for (int j = 0; j < sz; j++) {
float pix_val = (j>(sz/2))?1.0f:0; // create image with edge
if ((j%ms==0)&&(i%ms==0)) pix_val += 0.1f; // additive noise
in[i*sz+j] = pix_val;}
gpuMedfilt2(in, out, ih, iw, ms);
for (int i = 0; i < sz; i++) {
for (int j = 0; j < sz; j++)
std::cout << out[i*sz+j] << " ";
std::cout << std::endl;
}
}
$ nvcc -o t19 t19.cu -lnpps -lnppif
$ compute-sanitizer ./t19
========= COMPUTE-SANITIZER
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
========= ERROR SUMMARY: 0 errors
$
We note that the additive noise is gone, and the vertical edge is "intact" (i.e. unchanged) at the center of the image. We also note that there appears to be a boundary of 2 pixels all the way around, which are unfiltered (and set to 0 by the `cudaMemset` operation). Another possible method to handle the output border region (rather than setting it to zero) would be to copy the input image to the output image, before the filtering operation, in place of the `cudaMemset` operation, effectively setting output pixels equal to input pixels, in the border region.
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论