问题

你尝试运行一个从互联网上找到的 OpenCL 示例，但遇到了一个错误。错误信息显示在代码中的 kernelSource 变量未声明，导致编译错误。

要解决这个错误，你需要在程序中定义 kernelSource 变量，并将 OpenCL 内核代码存储在这个变量中。可以按照以下步骤来完成：

在你的 C 代码中添加 kernelSource 变量的定义，将 OpenCL 内核代码存储在其中。

const char *kernelSource = 
"// OpenCL kernel. Each work item takes care of one element of c\n"
"__kernel void vecAdd(  __global double *a,                    \n"
"                       __global double *b,                    \n"
"                       __global double *c,                    \n"
"                       const unsigned int n)                  \n"
"{                                                            \n"
"    // Get our global thread ID                               \n"
"    int id = get_global_id(0);                                 \n"
"                                                              \n"
"    // Make sure we do not go out of bounds                   \n"
"    if (id < n)                                               \n"
"        c[id] = a[id] + b[id];                                \n"
"}                                                            \n";

确保 kernelSource 包含 OpenCL 内核代码。

这样，你的代码应该能够成功编译和运行了。如果还有其他问题或疑问，请随时提出。

英文:

Im trying to run a OpenCL sample from the internet. It looks like this:

VecAdd.c

#define PROGRAM_FILE &quot;vecAdd.cl&quot;
#define KERNEL_FUNC &quot;vecAdd&quot;

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;math.h&gt;

#ifdef MAC
#include &lt;OpenCL/cl.h&gt;
#else
#include &lt;CL/cl.h&gt;
#endif


int main( int argc, char* argv[] )
{
    // Length of vectors
    unsigned int n = 100000;

    // Host input vectors
    double *h_a;
    double *h_b;
    // Host output vector
    double *h_c;

    // Device input buffers
    cl_mem d_a;
    cl_mem d_b;
    // Device output buffer
    cl_mem d_c;

    cl_platform_id cpPlatform;        // OpenCL platform
    cl_device_id device_id;           // device ID
    cl_context context;               // context
    cl_command_queue queue;           // command queue
    cl_program program;               // program
    cl_kernel kernel;                 // kernel

    // Size, in bytes, of each vector
    size_t bytes = n*sizeof(double);

    // Allocate memory for each vector on host
    h_a = (double*)malloc(bytes);
    h_b = (double*)malloc(bytes);
    h_c = (double*)malloc(bytes);

    // Initialize vectors on host
    int i;
    for( i = 0; i &lt; n; i++ )
    {
        h_a[i] = sinf(i)*sinf(i);
        h_b[i] = cosf(i)*cosf(i);
    }

    size_t globalSize, localSize;
    cl_int err;

    // Number of work items in each local work group
    localSize = 64;

    // Number of total work items - localSize must be devisor
    globalSize = ceil(n/(float)localSize)*localSize;

    // Bind to platform
    err = clGetPlatformIDs(1, &amp;cpPlatform, NULL);

    // Get ID for the device
    err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &amp;device_id, NULL);

    // Create a context
    context = clCreateContext(0, 1, &amp;device_id, NULL, NULL, &amp;err);

    // Create a command queue
    queue = clCreateCommandQueue(context, device_id, 0, &amp;err);


    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1,
                            (const char **) &amp; kernelSource, NULL, &amp;err);

    // Build the program executable
    clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

    // Create the compute kernel in the program we wish to run
    kernel = clCreateKernel(program, &quot;vecAdd&quot;, &amp;err);

    // Create the input and output arrays in device memory for our calculation
    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);

    // Write our data set into the input array in device memory
    err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
                                   bytes, h_a, 0, NULL, NULL);
    err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
                                   bytes, h_b, 0, NULL, NULL);

    // Set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &amp;d_a);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &amp;d_b);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &amp;d_c);
    err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &amp;n);

    // Execute the kernel over the entire range of the data set
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &amp;globalSize, &amp;localSize,
                                                              0, NULL, NULL);

    // Wait for the command queue to get serviced before reading back results
    clFinish(queue);

    // Read the results from the device
    clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
                                bytes, h_c, 0, NULL, NULL );

    //Sum up vector c and print result divided by n, this should equal 1 within error
    double sum = 0;
    for(i=0; i&lt;n; i++)
        sum += h_c[i];
    printf(&quot;final result: %f\n&quot;, sum/n);

    // release OpenCL resources
    clReleaseMemObject(d_a);
    clReleaseMemObject(d_b);
    clReleaseMemObject(d_c);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    //release host memory
    free(h_a);
    free(h_b);
    free(h_c);

    return 0;
}

VecAdd.cl

// OpenCL kernel. Each work item takes care of one element of c
__kernel void vecAdd(  __global double *a,                    
                       __global double *b,                   
                       __global double *c,                     
                       const unsigned int n)                   
{                                                             
    //Get our global thread ID                                
    int id = get_global_id(0);                                
                                                               
    //Make sure we do not go out of bounds                    
    if (id &lt; n)                                                
        c[id] = a[id] + b[id];                                 
}

When I try to run VecAdd.c with CodeBlocks I get an error on this line:
program = clCreateProgramWithSource(context, 1, (const char **) & kernelSource, NULL, &err);

The Error look like this:
vecAdd.c|79|error: 'kernelSource' undeclared (first use in this function)

I expected no error since the print_info.cpp sample worked fine and printed:

OpenCL Device Info:
Name: Intel(R) UHD Graphics 620
Vendor: Intel(R) Corporation
Version: OpenCL 3.0 NEO
Max size of work-items: (256,256,256)
Max size of work-groups: 256
Number of compute units: 24
Global memory size (bytes): 6762340352
Local memory size per compute unit (bytes): 2730

答案1

得分: 0

以下是翻译好的代码部分：

#include <iostream> // write to console
#include <fstream> // read/write files

// ...

int main( int argc, char* argv[] )
{

	// ...

	std::string kernelSource = "";
	{
		std::ifstream file("./VecAdd.cl", std::ios::in); // path might be different for you
		if(file.fail()) std::cout << "Error: File does not exist!\n";
		kernelSource = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
		file.close();
	}

	// Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, NULL, &err);

	// ...

}

请注意，这只是代码的一部分，而不包括完整的上下文。如果您需要完整的翻译或有其他问题，请随时提出。

英文:

The sample code is incomplete. It's missing the part where it reads the VecAdd.cl file to the string kernelSource. You may add:

#include &lt;iostream&gt; // write to console
#include &lt;fstream&gt; // read/write files

// ...

int main( int argc, char* argv[] )
{

	// ...

	std::string kernelSource = &quot;&quot;;
	{
		std::ifstream file(&quot;./VecAdd.cl&quot;, std::ios::in); // path might be different for you
		if(file.fail()) stc::cout &lt;&lt; &quot;Error: File does not exist!\n&quot;;
		kernelSource = std::string((std::istreambuf_iterator&lt;char&gt;(file)), std::istreambuf_iterator&lt;char&gt;());
		file.close();
	}

	// Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char**)&amp;kernelSource, NULL, &amp;err);

	// ...

}

For a much easier start with OpenCL, have a look at this OpenCL-Wrapper. This simplifies using the API a lot, without giving up any functionality or performance. By default it comes with a vector addition example. Notice how much shorter and less complicated the code is compared to the regular OpenCL bloat.

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

C++ OpenCL构建错误: kernelSource未声明

问题

答案1

如何将z3::expr(bv_val)翻译成数字的位表示？

Catch2: 使用相对误差检查结果是否为0.0失败

为什么在C++中无效选择后’break’不起作用？

Rust调试构建链接失败，而发布构建通过，并且在macOS上都通过。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论