如何解决在 WSL2 上使用 Cuda 使我“没有可在设备上执行的内核映像”
我正在尝试在 WSL2 上的 C++ 程序中使用 Cuda 和 Thrust。我按照 here 中的说明在 WSL2 上启用 Cuda。这是一个小示例程序:
首先,我定义:
export CUDA_LIBRARY_DIRECTORY=/usr/local/cuda-11.0/lib64
export CUDA_INCLUDE_DIRECTORY=/usr/local/cuda-11.0/include
export CUDACXX=/usr/local/cuda-11.0/bin/nvcc
CMakeLists.txt
cmake_minimum_required(VERSION 2.8)
project(proj LANGUAGES CXX CUDA)
set (CMAKE_CXX_STANDARD 14)
#### use cuda ####
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=sm_50;-lineinfo; -cudart=static; -Xptxas; -v)
include_directories($ENV{CUDA_INCLUDE_DIRECTORY})
link_directories($ENV{CUDA_LIBRARY_DIRECTORY})
ADD_EXECUTABLE(
proj
src/cudafile.cu
src/main.cpp)
main.cpp
#include<thrust/host_vector.h>
#include<thrust/device_vector.h>
#include<thrust/device_ptr.h>
void func(int size,int* a1,int* a2,int* a3);
void FillWithValue(int* arr,int size,int val);
int main()
{
int size=1000;
int *arr1,*arr2,*arr3;
cudamalloc((void**)&arr1,size * sizeof(int));
FillWithValue(arr1,size,1);
cudamalloc((void**)&arr2,size * sizeof(int));
FillWithValue(arr2,2);
cudamalloc((void**)&arr3,size * sizeof(int));
int* harr = new int [size];
cudamemcpy(harr,arr1,size*sizeof(int),cudamemcpyDevicetoHost);
fprintf(stdout,"%d\n",harr[0]);
func(size,arr2,arr3);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
fprintf(stderr,"Cuda error: %s.\n",cudaGetErrorString(err));
return 1;
}
cudafile.cu
#include<thrust/host_vector.h>
#include<thrust/device_vector.h>
#include<thrust/device_ptr.h>
#define blocksize 512
#define maxblocks 65535
__global__ void funcKernel(int size,int* a3)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
while (i < size)
{
a3[i]=a1[i]+a2[i];
}
}
void func(int size,int* a3)
{
int gridsize = size / blocksize + 1;
if (gridsize > maxblocks) gridsize = maxblocks;
funcKernel << <gridsize,blocksize >> > (size,a1,a2,a3);
}
void FillWithValue(int* arr,int val)
{
thrust::device_ptr<int> d = thrust::device_pointer_cast(arr);
thrust::fill(d,d + size,val);
}
0
Cuda error: no kernel image is available for execution on the device.
现在,第一个 fprintf 的输出证明 Thrust 填充函数填充数组失败,cudaGetLastError() 捕获错误,证明内核也失败。
这是详细的 cmake 构建:
cmake ..
-- The CXX compiler identification is GNU 9.3.0
-- The CUDA compiler identification is NVIDIA 11.0.221
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Check for working CUDA compiler: /usr/local/cuda-11.0/bin/nvcc
-- Check for working CUDA compiler: /usr/local/cuda-11.0/bin/nvcc -- works
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Configuring done
-- Generating done
-- Build files have been written to: /mnt/d/work/wsl2-projects/tests/kernels/build
制作
/usr/bin/cmake -S/mnt/d/work/wsl2-projects/tests/kernels -B/mnt/d/work/wsl2-projects/tests/kernels/build --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles/progress.marks
make -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
make -f CMakeFiles/proj.dir/build.make CMakeFiles/proj.dir/depend
make[2]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
cd /mnt/d/work/wsl2-projects/tests/kernels/build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /mnt/d/work/wsl2-projects/tests/kernels /mnt/d/work/wsl2-projects/tests/kernels /mnt/d/work/wsl2-projects/tests/kernels/build /mnt/d/work/wsl2-projects/tests/kernels/build /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles/proj.dir/DependInfo.cmake --color=
Scanning dependencies of target proj
make[2]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
make -f CMakeFiles/proj.dir/build.make CMakeFiles/proj.dir/build
make[2]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
[ 33%] Building CUDA object CMakeFiles/proj.dir/src/cudafile.cu.o
/usr/local/cuda-11.0/bin/nvcc -x cu -c /mnt/d/work/wsl2-projects/tests/kernels/src/cudafile.cu -o CMakeFiles/proj.dir/src/cudafile.cu.o
[ 66%] Building CXX object CMakeFiles/proj.dir/src/main.cpp.o
/usr/bin/c++ -I/usr/local/cuda-11.0/include -std=gnu++14 -o CMakeFiles/proj.dir/src/main.cpp.o -c /mnt/d/work/wsl2-projects/tests/kernels/src/main.cpp
[100%] Linking CXX executable proj
/usr/bin/cmake -E cmake_link_script CMakeFiles/proj.dir/link.txt --verbose=1
/usr/bin/c++ -rdynamic CMakeFiles/proj.dir/src/cudafile.cu.o CMakeFiles/proj.dir/src/main.cpp.o -o proj -L/usr/local/cuda-11.0/lib64 -L/usr/local/cuda-11.0/targets/x86_64-linux/lib/stubs -L/usr/local/cuda-11.0/targets/x86_64-linux/lib -lcudadevrt -lcudart_static -lrt -lpthread -ldl
make[2]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
[100%] Built target proj
make[1]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
/usr/bin/cmake -E cmake_progress_start /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles 0
是否与我的 GPU 无法匹配 Cuda 版本有关?我想降级到 Cuda 10 或 9,但我不知道如何像 here 一样安装它,这样它就不会用另一个 Nvidia 驱动程序替换驱动程序。
附加信息:
- GeForce GTX 950M
- Windows 11 家庭版。构建 22000.51。
- WSL2:Ubuntu-20.04
- Cuda 编译工具,9.1 版,V9.1.85
解决方法
根据 Robert Crovella 的评论,我设法让程序以正确的输出正常运行并且没有错误。
在 CMakeLists.txt 中,我使用了
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50 -lineinfo -cudart=static -Xptxas -v")
代替
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=sm_50;-lineinfo; -cudart=static; -Xptxas; -v)
现在输出是
1
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。