如何输出产生逻辑错误的函数调用行

如何解决如何输出产生逻辑错误的函数调用行

我正在使用 CUDA 创建一个矩阵数学库，以提高我的 CNN 性能（并更好地理解 C++）。

我希望能够添加错误处理并告诉用户（我）在使用矩阵类时出了什么问题。

这可以在我的主文件中看到，因为在这种情况下，我试图将 10 * 10 矩阵添加到 15 * 15 矩阵。这是一个不可能的操作，需要一些输出来告诉用户。例如

Error in file "Main.cu" on line: 9 (Dimensions inconsistent)

如果你在函数内部检查行号是检查的行号，我已经看过使用宏来检查但我想知道是否有另一种方法而不必每次添加两个矩阵时都调用宏在一起。

Main.cu

#include "Matrix.cuh"


int main() {
    double* init;
    cudamallocManaged(&init,sizeof(double));

    Matrix A(10,10,2);
    Matrix B(15,15,3);
    Matrix C = A + B;

    A.printM("A");
    B.printM("B");
    C.printM("C");

    //cudaFree(init);
    return 0;
}

Matrix.cu

#include "Matrix.cuh"

__global__
void sumMatrix(Matrix* A,Matrix* B,Matrix* C)
{
    int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
    if (x < A->ColumnCount && y < A->RowCount)
    {
        C->VALUES[y * A->ColumnCount + x] = A->VALUES[y * A->ColumnCount + x] + B->VALUES[y * A->ColumnCount + x];
    }
}

__global__
void matrixInit(Row* rows,int R,int C,double* VALUES,double val) {
    int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
    if (x < C && y < R)
    {
        if (x == 0)
        {
            rows[y].Count = C;
            rows[y].values = VALUES + C * y;
        }
        VALUES[y * C + x] = val;
    }
}

Matrix::Matrix(int R,double val)
{
    cudamallocManaged(&VALUES,R * C * sizeof(double));
    cudamallocManaged(&rows,R * sizeof(Row));
    RowCount = R;
    ColumnCount = C;

    dim3 gridDim(ceil(C / (double)BLOCK_SIZE),ceil(R / (double)BLOCK_SIZE),1);
    dim3 blockDim(BLOCK_SIZE,BLOCK_SIZE,1);
    matrixInit << <gridDim,blockDim >> > (rows,R,C,VALUES,val);
    cudaDeviceSynchronize();
    cudaCheckerrors("MATRIX INIT VAL");
}

Matrix::Matrix(int R,int C)
{
    cudamallocManaged(&VALUES,0);
    cudaDeviceSynchronize();
    cudaCheckerrors("MATRIX INIT VAL");
}

void Matrix::updatePointers()
{
    for (size_t i = 0; i < RowCount; i++)
    {
        rows[i].values = VALUES + (i * ColumnCount);
    }
}

void Matrix::removePointers()
{
    VALUES = nullptr;
    rows = nullptr;
}

void Matrix::printM(const char* msg)
{
    std::cout << "Matrix " << msg << ": " << RowCount << "*" << ColumnCount << std::endl;
    for (size_t i = 0; i < RowCount; i++)
    {
        for (size_t j = 0; j < ColumnCount; j++)
        {
            std::cout << rows[i][j] << " ";
        }
        std::cout << std::endl;
    }
}

Matrix Matrix::sum(Matrix B)
{
    Matrix* A_p,* B_p,* C_p;
    Matrix C(RowCount,ColumnCount);

    cudamallocManaged(&A_p,sizeof(Matrix));
    cudamallocManaged(&B_p,sizeof(Matrix));
    cudamallocManaged(&C_p,sizeof(Matrix));

    memcpy(A_p,this,sizeof(Matrix));
    memcpy(B_p,&B,sizeof(Matrix));
    memcpy(C_p,&C,sizeof(Matrix));

    dim3 gridDim(ceil(ColumnCount / (double)BLOCK_SIZE),ceil(RowCount / (double)BLOCK_SIZE),1);

    sumMatrix << < gridDim,blockDim >> > (A_p,B_p,C_p);
    cudaDeviceSynchronize();
    cudaCheckerrors("SUM");

    B.removePointers();
    C.removePointers();

    return *C_p;
}

Row& Matrix::operator[](size_t i)
{
    if (i >= RowCount)
    {
        std::cout << "OUT OF BOUNDS";
        std::exit(1);
    }
    return rows[i];
}


Matrix& Matrix::operator+(Matrix B)
{
    Matrix C = sum(B);
    Matrix* C_p;
    cudamallocManaged(&C_p,sizeof(Matrix));
    B.removePointers();
    C.removePointers();
    return *C_p;
}

Matrix::~Matrix()
{
    if (VALUES != nullptr && rows != nullptr)
    {
        cudaFree(VALUES);
        cudaFree(rows);
    }
}