如何解决错误:内核参数具有非平凡复制的可构造类/结构类型+sycl+tbb
我尝试提供一种“地图”框架,它通过一些指定目标类型(cpu 或 GPU/加速器)的参数来包装 OneAPI 调用以隐藏硬件定位问题。我的地图骨架传递函数及其衍生物与牛顿方法的初始点。 但我有一个错误:
kernel parameter has non-trivially copy constructible class/struct type 'std::function<double (double)>'
我的代码是:
#include <CL/sycl.hpp>
#include <iostream>
#include <tbb/tbb.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <vector>
#include <string>
#include <queue>
#include<tbb/blocked_range.h>
#include <tbb/global_control.h>
#include <chrono>
#include "uTimer.cpp"
#include <cmath>
#include <random>
#include <ctime>
#include <numeric>
#include <cstdlib>
//#include <dos.h> //for delay
//#include <conio.h> //for getch()
//#include <complex>
#define EPSILON 0.000001 // The step size across the X and Y axis
using namespace tbb;
class Clock {
private:
typedef std::chrono::high_resolution_clock clock;
std::chrono::time_point<clock> t;
public:
Clock() {
start();
}
void start() {
t = clock::Now();
}
double stop() const {
return std::chrono::duration_cast<std::chrono::duration<double>>(
clock::Now() - t).count();
}
};
//std::complex<double> mycomplex(10.0,2.0);
template<class Tin,class Tout>
class Map {
private:
std::function<Tout(Tin)> fun;
std::function<Tout(Tin)> dfun;
public:
Map() {};
Map(std::function<Tout(Tin)> f,std::function<Tout(Tin)> df) {
fun = f;
dfun = df;
};
void operator()(bool use_tbb,Tin &x1) {
int iter=100;
Tout x;
if (use_tbb) {
uTimer *timer = new uTimer("Executing Code On cpu");
tbb::parallel_for(tbb::blocked_range < int > (0,iter),[&](tbb::blocked_range<int> t) {
for (int index = t.begin(); index < t.end(); ++index) {
do
{
x = x1;
x1 = x - (fun(x) / dfun(x));
}while (std::abs(x1 - x) >= EPSILON);
}
});
timer->~uTimer();
}else {
sycl::buffer<Tin,1> x1_buffer(&x1,iter);
sycl::buffer<Tout,1> x_buffer(&x,iter);
//Profiling GPU
// Initialize property list with profiling information
sycl::property_list propList {
sycl::property::queue::enable_profiling() };
// Build the command queue (constructed to handle event profling)
sycl::queue gpuQueue = cl::sycl::queue(sycl::gpu_selector(),propList);
// print out the device information used for the kernel code
std::cout << "Device: "
<< gpuQueue.get_device().get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "Compute Units: "
<< gpuQueue.get_device().get_info<
sycl::info::device::max_compute_units>()
<< std::endl;
auto start_overall = std::chrono::system_clock::Now();
auto event = gpuQueue.submit([&](sycl::handler &h) {
//local copy of fun
auto f = fun;
auto df = dfun;
sycl::accessor x1_accessor(x1_buffer,h,sycl::read_write);
sycl::accessor x_accessor(x_buffer,sycl::read_write);
h.parallel_for(iter,[=](sycl::id<1> index) {
do
{
x_accessor[index] = x1_accessor[index];
x1_accessor[index] = x_accessor[index] - (f(x_accessor[index]) / df(x_accessor[index]));
}while (sycl::fabs(f(x1_accessor[index]))>= EPSILON);
});
});
event.wait();
auto end_overall = std::chrono::system_clock::Now();
cl_ulong submit_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_submit>();
cl_ulong start_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_start>();
cl_ulong end_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_end>();
auto submission_time = (start_time - submit_time) / 1000000.0f;
std::cout << "Submit Time: " << submission_time << " ms"
<< std::endl;
auto execution_time = (end_time - start_time) / 1000000.0f;
std::cout << "Execution Time: " << execution_time << " ms"
<< std::endl;
auto execution_overall = std::chrono::duration_cast
< std::chrono::milliseconds > (end_overall - start_overall);
std::cout << "Overall Execution Time: " << execution_overall.count()
<< " ms" << std::endl;
};
};
};
int main(int argc,char *argv[]) {
//Define a function
auto f = [](double x) {return pow(x,3);};
//Define the derivative of function
auto df = [](double x) {return pow(x,2) *3;};
//Define an instance of Map class
auto m1 = Map<double,double>(f,df);
double x1 = 3;
m1(true,x1);
//print the result
//for (auto &e : r) {
//std::cout << e << " ";
//}
return 0;
}
此外,如果我们不考虑错误,我认为我的代码中的某些内容似乎不正确,但我无法理解它是什么。
解决方法
你不能为所欲为。如果您尝试摆脱 std::function
并使用函数指针,您仍然无法做到(即使它可以简单地复制)。在 SYCL 和任何其他此类语言(CUDA、hip、OpenCL 等)中,设备编译器需要能够编译内核执行/调用的所有函数。所以不,你不能“传入”一个函数。归结为您之前回答的问题之一here
您可以尝试将 lambdas 定义为其他地方的函数,然后从内核中调用它们。如果您希望能够在运行时在各种函数之间进行选择,您可以编写一个模板化内核(假设是一个枚举)并通过 if constexpr
(在内核中)调度您的调用以避免运行时成本(和代码重复数据删除)。最后会实例化 n 个 SYCL 内核,每个内核都调用您的一个函数。它们将由设备编译器等正确编译。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。