微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

如何在推力库cuda 11.x中调试临时缓冲区::分配获取临时缓冲区失败?

如何解决如何在推力库cuda 11.x中调试临时缓冲区::分配获取临时缓冲区失败?

我有一个程序可以像我一样运行,但当我使用 cuda 10.2 时它应该运行,但是当我尝试将相同的程序迁移到 cuda 11.1/2 时,我遇到了来自代码推力部分的临时缓冲区::分配错误,在网上进一步查找时,我看到了使用 cudaDeviceSetLimit(cudalimitMallocHeapSize,custom_size) 增加堆 malloc 大小的建议,这样做时我的程序运行速度非常慢并且经常崩溃,我附上了导致以下错误代码部分。>

__global__ 
void max_kernel(float *begin,float *dst,const int n_classes,const int size){
    int i = blockDim.x*blockIdx.x + threadIdx.x;
    if (i > size)
        return;

    thrust::device_ptr<float> beg ( &begin[i*n_classes] ) ;
    thrust::device_ptr<float> end = beg + n_classes;
    thrust::device_ptr<float> result = thrust::max_element(thrust::device,beg,end);

    dst[i] = result - beg;
}

void maxElem(float *begin,const int c,const int h,const int w){
    int blocks = (h*w)/32+1;
    int threads = 32;
    max_kernel<<<blocks,threads,0>>>(begin,dst,c,h*w);   
}


以及来自 cuda-gdb 的回溯

Thread 1 "demo" received signal CUDA_EXCEPTION_14,Warp Illegal Address.
[Switching focus to CUDA kernel 0,grid 272,block (304,0),thread (0,device 0,sm 12,warp 37,lane 0]

#0  0x000055557cdfedd8 in thrust::cuda_cub::(anonymous namespace)::get_value_msvc2005_war<thrust::cuda_cub::par_t,thrust::tuple<float,long,thrust::null_type,thrust::null_type>*>(thrust::cuda_cub::execution_policy<thrust::cuda_cub::par_t>&,thrust::null_type>*)::war_nvbugs_881631::device_path(thrust::cuda_cub::execution_policy<thrust::cuda_cub::par_t>&,thrust::null_type>*) (ptr=0x7ffec97fb800) at /usr/local/cuda/include/thrust/system/cuda/detail/get_value.h:61
#1  0x000055557cdfe1c0 in thrust::cuda_cub::(anonymous namespace)::get_value_msvc2005_war<thrust::cuda_cub::par_t,thrust::null_type>*> (
    exec=<optimized out>,ptr=0x7fff7dadc000) at /usr/local/cuda/include/thrust/system/cuda/detail/get_value.h:79
#2  0x000055557cde91e0 in thrust::cuda_cub::get_value<thrust::cuda_cub::par_t,thrust::null_type>*> (exec=<optimized out>,ptr=0x7fff7dadc000)
    at /usr/local/cuda/include/thrust/system/cuda/detail/get_value.h:91
#3  0x000055557cdd4090 in thrust::cuda_cub::__extrema::extrema<thrust::cuda_cub::par_t,thrust::zip_iterator<thrust::tuple<thrust::device_ptr<float>,thrust::cuda_cub::counting_iterator_t<long>,thrust::null_type> >,thrust::cuda_cub::__extrema::arg_max_f<float,thrust::less<float> >,thrust::null_type> > (
    policy=0x7fff7dadc000,first=...,num_items=32767,binary_op=...) at /usr/local/cuda/include/thrust/system/cuda/detail/extrema.h:363
#4  thrust::cuda_cub::__extrema::element<thrust::cuda_cub::__extrema::arg_max_f,thrust::cuda_cub::par_t,thrust::device_ptr<float>,thrust::less<float> > (
    policy=0x7fff7dadc000,last=...,binary_pred=...) at /usr/local/cuda/include/thrust/system/cuda/detail/extrema.h:397
#5  thrust::cuda_cub::max_element<thrust::cuda_cub::par_t,thrust::less<float> > (policy=<optimized out>,binary_pred=...) at /usr/local/cuda/include/thrust/system/cuda/detail/extrema.h:466
#6  0x000055557d058520 in thrust::cuda_cub::max_element<thrust::cuda_cub::par_t,thrust::device_ptr<float> > (policy=<optimized out>,last=...)
    at /usr/local/cuda/include/thrust/system/cuda/detail/extrema.h:491
#7  0x000055557ccc3ac0 in thrust::max_element<thrust::cuda_cub::par_t,thrust::device_ptr<float> > (exec=<optimized out>,last=...)
    at /usr/local/cuda/include/thrust/detail/extrema.inl:55
#8  0x000055557d109e90 in max_kernel<<<(32769,1,1),(32,1)>>> (begin=0x7fff1e000000,dst=0x7fff22c00000,n_classes=19,size=1048576)
    at /home/dev/DNNLIB/kernels/postprocessing.cu:44

postprocessing.cu 中的第 44 行是 thrust::device_ptr<float> result = thrust::max_element(thrust::device,end);

任何关于我哪里出错的帮助都会很好。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。