如何解决初学者 OpenCL 代码中的内存分配问题
我正在尝试使用 Intel CPU 和集成 Iris 显卡运行初级 OpenCL 测试。我正在使用标准 g++ 和 -framework OpenCL
作为编译开关编译代码。我已经尝试通过使用 gdb 运行并在线参考一些指南来清理代码。但是,我仍然看到一个错误,我认为这与内存分配有关。我在下面粘贴了我的整个代码;如果您发现任何明显错误,请帮助。
对于冗长的评论深表歉意。如果我也有一些错误的假设,请告诉我:)
#include <iostream>
#include <OpenCL/opencl.h>
#include <cassert>
// the kernel that we want to execute on the device.
// here,you are doing an addition of elements in an array.
const char* kernelAdd =
{
"__kernel void add (global int* data)\n"
"{\n"
" int work_item_id = get_global_id(0);\n"
" data[work_item_id] *= 2;\n"
"}\n"
};
int main (int argc,char* argv[])
{
cl_int ret_val;
// getting the platform ID that can used - here we are getting only one
cl_platform_id platformID;
cl_uint numPlatforms;
if((clGetPlatformIDs(1,&platformID,&numPlatforms)))
std::cout << "clGetPlatformIDs failed!" << std::endl;
// getting OpenCL device ID for our GPU - here too,we are getting only one
cl_device_id deviceID;
cl_uint numDevices;
if((clGetDeviceIDs(platformID,CL_DEVICE_TYPE_GPU,1,&deviceID,&numDevices)))
std::cout << "clGetDeviceIDs failed!" << std::endl;
// printing out some device info. here we have chosen CL_DEVICE_NAME.
// you can choose any others by referring
// https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clGetDeviceInfo.html
typedef char typeInfo;
size_t sizeInfo = 16*sizeof(typeInfo);
typeInfo* deviceInfo = new typeInfo(sizeInfo);
if((clGetDeviceInfo(deviceID,CL_DEVICE_NAME,sizeInfo,(void*) deviceInfo,NULL)))
std::cout << "clGetDeviceInfo failed!" << std::endl;
std::cout << "CL_DEVICE_NAME = " << deviceInfo << ",platform ID = ";
std::cout << platformID << ",deviceID = " << deviceID << std::endl;
// set up a context for our device
cl_context_properties contextProp[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties) platformID,0};
cl_context context = clCreateContext(contextProp,NULL,&ret_val);
if (ret_val)
std::cout << "clCreateContext failed!" << std::endl;
// set up a queue for our device
cl_command_queue queue = clCreateCommandQueue(context,deviceID,(cl_command_queue_properties) NULL,&ret_val);
if (ret_val)
std::cout << "clCreateCommandQueue failed!" << std::endl;
// creating our data set that we want to compute on
int N = 1 << 4;
size_t data_size = sizeof(int) * N;
int* input_data = new int(N);
int* output_data = new int(N);
for (int i = 0; i < data_size; i++)
{
input_data[i] = rand() % 1000;
}
// create a buffer to where you will eventually enqueue the program for the device
cl_mem buffer = clCreateBuffer(context,CL_MEM_READ_WRITE,data_size,&ret_val);
if (ret_val)
std::cout << "clCreateBuffer failed!" << std::endl;
// copying our data set to the buffer
if((clEnqueueWriteBuffer(queue,buffer,CL_TRUE,input_data,NULL)))
std::cout << "clEnqueueWriteBuffer failed!" << std::endl;
// we compile the device program with our source above and create a kernel for it.
// also,we are allowed to create a device program with a binary that we can point to.
cl_program program = clCreateProgramWithSource(context,(const char**) &kernelAdd,&ret_val);
if (ret_val)
std::cout << "clCreateProgramWithSource failed!" << std::endl;
if((clBuildProgram(program,NULL)))
std::cout << "clBuildProgram failed!" << std::endl;
cl_kernel kernel = clCreateKernel(program,"add",&ret_val);
if (ret_val)
std::cout << "clCreateKernel failed! ret_val = " << ret_val << std::endl;
// configure options to find the arguments to the kernel
if((clSetKernelArg(kernel,sizeof(buffer),&buffer)))
std::cout << "clSetKernelArg failed!" << std::endl;
// the total number of work items that we want to use
const size_t global_dimensions[3] = {data_size,0};
if((clEnqueueNDRangeKernel(queue,kernel,global_dimensions,NULL)))
std::cout << "clEnqueueNDRangeKernel failed!" << std::endl;
// read back output into another buffer
ret_val = clEnqueueReadBuffer(queue,output_data,NULL);
if(ret_val)
std::cout << "clEnqueueReadBuffer failed! ret_val = " << ret_val << std::endl;
std::cout << "Kernel completed" << std::endl;
// Release kernel,program,and memory objects
if(clReleaseMemObject(buffer))
std::cout << "clReleaseMemObject failed!" << std::endl;
if(clReleaseKernel(kernel))
std::cout << "clReleaseKernel failed!" << std::endl;
if(clReleaseProgram(program))
std::cout << "clReleaseProgram failed!" << std::endl;
if(clReleaseCommandQueue(queue))
std::cout << "clReleaseCommandQueue failed!" << std::endl;
if(clReleaseContext(context))
std::cout << "clReleaseContext failed!" << std::endl;
for (int i = 0; i < data_size; i++)
{
assert(output_data[i] == input_data[i]/2);
}
return 0;
}
输出如下:
CL_DEVICE_NAME = Iris,platform ID = 0x7fff0000,deviceID = 0x1024500
objc[1034]: Method cache corrupted. This may be a message to an invalid object,or a memory error somewhere else.
objc[1034]: receiver 0x7fefb8712a90,SEL 0x7fff7ce87c58,isa 0x7fff99268208,cache 0x7fff99268218,buckets 0x7fefb87043c0,mask 0x3,occupied 0x1
objc[1034]: receiver 48 bytes,buckets 64 bytes
objc[1034]: selector 'dealloc'
objc[1034]: isa 'OS_xpc_array'
objc[1034]: Method cache corrupted. This may be a message to an invalid object,or a memory error somewhere else.
make: *** [all] Abort trap: 6
解决方法
很常见的错误
int* input_data = new int(N);
应该
int* input_data = new int[N];
您的版本分配了一个 int
并将其初始化为 N
。要分配 N
整数,您需要方括号。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。