英文:
C++ OpenCL Build Error: kernelSource undeclared
问题
你尝试运行一个从互联网上找到的 OpenCL 示例,但遇到了一个错误。错误信息显示在代码中的 kernelSource
变量未声明,导致编译错误。
要解决这个错误,你需要在程序中定义 kernelSource
变量,并将 OpenCL 内核代码存储在这个变量中。可以按照以下步骤来完成:
- 在你的 C 代码中添加
kernelSource
变量的定义,将 OpenCL 内核代码存储在其中。
const char *kernelSource =
"// OpenCL kernel. Each work item takes care of one element of c\n"
"__kernel void vecAdd( __global double *a, \n"
" __global double *b, \n"
" __global double *c, \n"
" const unsigned int n) \n"
"{ \n"
" // Get our global thread ID \n"
" int id = get_global_id(0); \n"
" \n"
" // Make sure we do not go out of bounds \n"
" if (id < n) \n"
" c[id] = a[id] + b[id]; \n"
"} \n";
- 确保
kernelSource
包含 OpenCL 内核代码。
这样,你的代码应该能够成功编译和运行了。如果还有其他问题或疑问,请随时提出。
英文:
Im trying to run a OpenCL sample from the internet. It looks like this:
VecAdd.c
#define PROGRAM_FILE "vecAdd.cl"
#define KERNEL_FUNC "vecAdd"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
int main( int argc, char* argv[] )
{
// Length of vectors
unsigned int n = 100000;
// Host input vectors
double *h_a;
double *h_b;
// Host output vector
double *h_c;
// Device input buffers
cl_mem d_a;
cl_mem d_b;
// Device output buffer
cl_mem d_c;
cl_platform_id cpPlatform; // OpenCL platform
cl_device_id device_id; // device ID
cl_context context; // context
cl_command_queue queue; // command queue
cl_program program; // program
cl_kernel kernel; // kernel
// Size, in bytes, of each vector
size_t bytes = n*sizeof(double);
// Allocate memory for each vector on host
h_a = (double*)malloc(bytes);
h_b = (double*)malloc(bytes);
h_c = (double*)malloc(bytes);
// Initialize vectors on host
int i;
for( i = 0; i < n; i++ )
{
h_a[i] = sinf(i)*sinf(i);
h_b[i] = cosf(i)*cosf(i);
}
size_t globalSize, localSize;
cl_int err;
// Number of work items in each local work group
localSize = 64;
// Number of total work items - localSize must be devisor
globalSize = ceil(n/(float)localSize)*localSize;
// Bind to platform
err = clGetPlatformIDs(1, &cpPlatform, NULL);
// Get ID for the device
err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
// Create a context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
// Create a command queue
queue = clCreateCommandQueue(context, device_id, 0, &err);
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1,
(const char **) & kernelSource, NULL, &err);
// Build the program executable
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
// Create the compute kernel in the program we wish to run
kernel = clCreateKernel(program, "vecAdd", &err);
// Create the input and output arrays in device memory for our calculation
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);
// Write our data set into the input array in device memory
err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
bytes, h_a, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
bytes, h_b, 0, NULL, NULL);
// Set the arguments to our compute kernel
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
// Execute the kernel over the entire range of the data set
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
0, NULL, NULL);
// Wait for the command queue to get serviced before reading back results
clFinish(queue);
// Read the results from the device
clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
bytes, h_c, 0, NULL, NULL );
//Sum up vector c and print result divided by n, this should equal 1 within error
double sum = 0;
for(i=0; i<n; i++)
sum += h_c[i];
printf("final result: %f\n", sum/n);
// release OpenCL resources
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
//release host memory
free(h_a);
free(h_b);
free(h_c);
return 0;
}
VecAdd.cl
// OpenCL kernel. Each work item takes care of one element of c
__kernel void vecAdd( __global double *a,
__global double *b,
__global double *c,
const unsigned int n)
{
//Get our global thread ID
int id = get_global_id(0);
//Make sure we do not go out of bounds
if (id < n)
c[id] = a[id] + b[id];
}
When I try to run VecAdd.c with CodeBlocks I get an error on this line:
program = clCreateProgramWithSource(context, 1, (const char **) & kernelSource, NULL, &err);
The Error look like this:
vecAdd.c|79|error: 'kernelSource' undeclared (first use in this function)
I expected no error since the print_info.cpp sample worked fine and printed:
OpenCL Device Info:
Name: Intel(R) UHD Graphics 620
Vendor: Intel(R) Corporation
Version: OpenCL 3.0 NEO
Max size of work-items: (256,256,256)
Max size of work-groups: 256
Number of compute units: 24
Global memory size (bytes): 6762340352
Local memory size per compute unit (bytes): 2730
答案1
得分: 0
以下是翻译好的代码部分:
#include <iostream> // write to console
#include <fstream> // read/write files
// ...
int main( int argc, char* argv[] )
{
// ...
std::string kernelSource = "";
{
std::ifstream file("./VecAdd.cl", std::ios::in); // path might be different for you
if(file.fail()) std::cout << "Error: File does not exist!\n";
kernelSource = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
}
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, NULL, &err);
// ...
}
请注意,这只是代码的一部分,而不包括完整的上下文。如果您需要完整的翻译或有其他问题,请随时提出。
英文:
The sample code is incomplete. It's missing the part where it reads the VecAdd.cl
file to the string kernelSource
. You may add:
#include <iostream> // write to console
#include <fstream> // read/write files
// ...
int main( int argc, char* argv[] )
{
// ...
std::string kernelSource = "";
{
std::ifstream file("./VecAdd.cl", std::ios::in); // path might be different for you
if(file.fail()) stc::cout << "Error: File does not exist!\n";
kernelSource = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
}
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, NULL, &err);
// ...
}
For a much easier start with OpenCL, have a look at this OpenCL-Wrapper. This simplifies using the API a lot, without giving up any functionality or performance. By default it comes with a vector addition example. Notice how much shorter and less complicated the code is compared to the regular OpenCL bloat.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论