如何解决如何使用 CuSOLVER 计算线性方程组的解
我正在尝试使用 CuSOLVER 来计算双精度线性方程组的解。我正在关注这里的方法声明:
https://docs.nvidia.com/cuda/cusolver/index.html#cusolverDN-lt-t-gt-gesv
通常,首先必须使用方法 cusolverDnDDgesv_bufferSize
找到工作区的最佳大小。可以用 cusolverDnDDgesv
求解线性系统。第一部分似乎有效,所以我可以无误地计算缓冲区大小。但是,当运行 cusolverDnDDgesv
时,我得到 Segmentation fault (core dumped)
。我可以使用 CuSOLVER 的其他功能,但在使用 cusolverDnDDgesv
时我无法弄清楚问题是什么。你能帮忙解决这个问题吗?
这是一个最小的例子。我用 nvcc cusolverDnZZgesv.cu -o prog.out -lcusolver
运行它。
#include <assert.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <cusolverDn.h>
#include <iostream>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char *argv[]) {
cusolverDnHandle_t handle = NULL;
cusolverStatus_t cusolver_status;
cudaError_t cudaStat1;
cudaError_t cudaStat2;
cudaError_t cudaStat3;
// Set dimensions of A*x=b.
const int n = 3;
const int ldda = n;
const int lddb = n;
const int lddx = n;
const int nrhs = 1; // number of right hand side vectors
/* | 1 2 3 |
* A = | 4 5 6 |
* | 2 1 1 |
*
* x = (1 1 1)'
* b = (6 15 4)'
*/
// Matrix A.
double A[ldda * n] = {1.0,4.0,2.0,5.0,1.0,3.0,6.0,1.0};
// Vector b,right hand side of equation.
double B[lddb * nrhs] = {6.0,15.0,4.0};
// Solution of A*x=b.
double X[lddx * nrhs] = {1.0,1.0};
/* device memory */
double *d_A = NULL;
double *d_X = NULL;
double *d_B = NULL;
// Allocate memory on GPU.
cudaStat1 = cudamalloc((void **)&d_A,sizeof(double) * ldda * n);
cudaStat2 = cudamalloc((void **)&d_X,sizeof(double) * lddx * nrhs);
cudaStat3 = cudamalloc((void **)&d_B,sizeof(double) * lddb * nrhs);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
// copy to GPU.
cudaStat1 =
cudamemcpy(d_A,A,sizeof(double) * ldda * n,cudamemcpyHostToDevice);
cudaStat2 =
cudamemcpy(d_B,B,sizeof(double) * lddb * nrhs,cudamemcpyHostToDevice);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
// int lwork = 0;
//
// cusolver_status =
// cusolverDnDgeqrf_bufferSize(handle,n,d_A,ldda,&lwork);
// ###########################################
// cusolverDnDDgesv_bufferSize
// ###########################################
int *dipiv = NULL;
int *dwork = NULL;
size_t lwork_bytes = 0;
cusolver_status = cusolverDnCreate(&handle);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
cusolver_status =
cusolverDnDDgesv_bufferSize(handle,nrhs,dipiv,d_B,lddb,d_X,lddx,dwork,&lwork_bytes);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
// ###########################################
// cusolverDnDDgesv
// ###########################################
void *dWorkspace = NULL;
int *dinfo = NULL;
int niter;
cudaStat1 = cudamalloc((void **)&dWorkspace,sizeof(double) * lwork_bytes);
cudaStat2 = cudamalloc((void **)&dinfo,sizeof(int));
cudaStat3 = cudamalloc((void **)&dipiv,sizeof(int) * n);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
cusolver_status =
cusolverDnDDgesv(handle,dWorkspace,lwork_bytes,&niter,dinfo);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
// Write exact result to terminal.
std::cout << "Result:" << std::endl;
for (double x : X)
std::cout << x << std::endl;
// copy result from GPU to cpu.
cudaStat1 =
cudamemcpy(X,sizeof(double) * lddx * n,cudamemcpyDevicetoHost);
// Write result from GPU to terminal.
std::cout << "Result from GPU:" << std::endl;
for (double x : X)
std::cout << x << std::endl;
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。