在 MPI 上下文中使用 boost-odeint 而不使用 mpi_state 会导致自适应步进器的时间步长不同

如何解决在 MPI 上下文中使用 boost-odeint 而不使用 mpi_state 会导致自适应步进器的时间步长不同

我正在尝试升级使用 odeint 的现有代码，以便能够使用 MPI 更有效地计算右侧函数。可以根据文档将 boost::odeint 与 MPI 结合使用，但这需要将求解器的 state_type 更改为 mpi_state，这是我想避免的以保持向后兼容性。
因此，我打算在将向量移交给 odeint 函数之前将它们分成几部分（不告诉函数它在 MPI 上下文中运行），然后再次将它们组合到 RHS 函数中。一个简短的演示功能（我使用 PETSc 进行矢量通信）如下所示：

struct Petsc_RHS_state {
    Petsc_RHS_state(MPI_Comm &communicator,const int global_size) : communicator(communicator) {
        VecCreate(communicator,&local_vec);
        VecSetSizes(local_vec,PETSC_DECIDE,global_size);
        VecSetFromOptions(local_vec);
        VecZeroEntries(local_vec);
        VecAssemblyBegin(local_vec);
        VecAssemblyEnd(local_vec);

        VecCreate(communicator,&scaling_vec);
        VecSetSizes(scaling_vec,global_size);
        VecSetFromOptions(scaling_vec);
        PetscScalar set_val = 1;
        for(int i = 0; i < global_size; ++i)
            VecSetValues(scaling_vec,1,&i,&set_val,INSERT_VALUES);
        VecAssemblyBegin(scaling_vec);
        VecAssemblyEnd(scaling_vec);

        MatCreateDense(communicator,global_size,NULL,&diagonal_mat);
        MatSetFromOptions(diagonal_mat);
        MatZeroEntries(diagonal_mat);
        MatAssemblyBegin(diagonal_mat,MAT_FINAL_ASSEMBLY);
        MatAssemblyEnd(diagonal_mat,MAT_FINAL_ASSEMBLY);
    }

    ~Petsc_RHS_state(){
        VecDestroy(&local_vec);
        VecDestroy(&scaling_vec);
    };

    void operator()(const state_type &x,state_type &dxdt,double t) const {
        std::cout << t << '\n';
        const size_t M = x.size();
        PetscScalar *local_ptr;
        VecGetArray(local_vec,&local_ptr);
        for(size_t m = 0; m < M; ++m) {
            *(local_ptr + m) = x[m];
        }
        VecRestoreArray(local_vec,&local_ptr);
        VecPointwiseMult(local_vec,scaling_vec,local_vec);
        MatDiagonalScale(diagonal_mat,local_vec);
        VecGetArray(local_vec,&local_ptr);
        for(size_t m = 0; m < M; ++m) {
            *(local_ptr + m) = calculation_function(*(local_ptr + m),t);
        }
        for(size_t m = 0; m < M; ++m) {
            dxdt[m] = *(local_ptr + m);
        }
        VecRestoreArray(local_vec,&local_ptr);
    }

    template <typename T>
    T calculation_function (const T &x,const double t) const {
        return 3. / (2 * t * t) + x / (2 * t);
    }

    MPI_Comm communicator;
    Vec local_vec,scaling_vec;
    Mat diagonal_mat;
};
 
void test_ts_arma_with_pure_petsc_preconfigured (const size_t matrix_size_rows,const size_t matrix_size_cols,const arma::cx_colvec &initial_condition_vec,arma::cx_colvec &solution_vec,const double dt,const double t_min,const double t_max) {
    PetscMPIInt rank,size;

    MPI_Comm_size(PETSC_COMM_WORLD,&size);
    MPI_Comm_rank(PETSC_COMM_WORLD,&rank);

    boost::mpi::communicator boost_comm(PETSC_COMM_WORLD,boost::mpi::comm_attach);

    state_type x_initial = arma::conv_to<value_type>::from(initial_condition_vec);

    Mat input_mat;
    MatCreateDense(PETSC_COMM_WORLD,matrix_size_rows,matrix_size_cols,&input_mat);
    MatZeroEntries(input_mat);
    MatAssemblyBegin(input_mat,MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(input_mat,MAT_FINAL_ASSEMBLY);

    PetscInt local_cols,global_cols,local_rows,global_rows;
    PetscInt first_row,last_row;
    MatGetLocalSize(input_mat,&local_rows,&local_cols);
    MatGetSize(input_mat,&global_rows,&global_cols);
    MatGetownershipRange(input_mat,&first_row,&last_row);

    state_type x_split((last_row - first_row) * global_cols);

    std::cout << "Rank " << rank << " has a total size of " << matrix_size_rows << "," << matrix_size_cols << '\n';
    std::cout << "Rank " << rank << " has an initial size of " << initial_condition_vec.n_elem << '\n';
    std::cout << "Rank " << rank << " has a local matrix size of " << local_rows << "," << local_cols << '\n';
    std::cout << "Rank " << rank << " has a global matrix size of " << global_rows << "," << global_cols << '\n';
    std::cout << "Rank " << rank << " has rows from " << first_row << " to " << last_row << '\n';
    std::cout << "Rank " << rank << " has a vector size of " << x_split.size() << '\n';

    const PetscScalar *read_ptr;
    MatDenseGetArrayRead (input_mat,&read_ptr);
    for(int j = 0; j < global_cols; ++j) {
        for(int i = 0; i < last_row - first_row; ++i) {
            const int cur_pos = i + j * (last_row - first_row);
            x_split[cur_pos] = *(read_ptr + cur_pos);
        }
    }
    MatDenseRestoreArrayRead (input_mat,&read_ptr);

    runge_kutta4<state_type> stepper;

    Petsc_RHS_state local_calculator(PETSC_COMM_WORLD,matrix_size_cols * matrix_size_rows);

        integrate_adaptive(bulirsch_stoer<state_type>(1e-8,1e-6),local_calculator,x_split,t_min,t_max,dt);

    PetscScalar *write_ptr;
    MatDenseGetArrayWrite (input_mat,&write_ptr);
    for(int j = 0; j < global_cols; ++j) {
        for(int i = 0; i < last_row - first_row; ++i) {
            const int cur_pos = i + j * (last_row - first_row);
            *(write_ptr + cur_pos) = x_split[cur_pos];
        }
    }
    MatDenseRestoreArrayWrite (input_mat,&write_ptr);

    MatView(input_mat,PETSC_VIEWER_STDOUT_WORLD);

    Mat local_mat;
    MatCreateRedundantMatrix(input_mat,size,MPI_COMM_NULL,MAT_INITIAL_MATRIX,&local_mat);
    MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
    //MatGetValues(local_mat,Nradius,vector_indices_radius.memptr(),Nomega,vector_indices_omega.memptr(),in_mat.memptr());
    const PetscScalar *local_mat_read_ptr;
    MatDenseGetArrayRead(local_mat,&local_mat_read_ptr);
    for(int i = 0; i < (int)matrix_size_cols; ++i) {
        for (int j = 0; j < (int)matrix_size_rows; ++j) {
            const PetscInt cur_pos = j + i * matrix_size_cols;
            x_initial[cur_pos] = *(local_mat_read_ptr + cur_pos);
        }
    }
    MatDenseRestoreArrayRead(local_mat,&local_mat_read_ptr);
    MatDestroy(&local_mat);

    MPI_Bcast(x_initial.data(),2 * x_initial.size(),MPI_DOUBLE,PETSC_COMM_WORLD);

    solution_vec = arma::conv_to<arma::cx_colvec>::from(x_initial);
    MatDestroy(&input_mat);
}

这适用于小型测试程序，但现在我遇到了时间步长在不同线程之间可能会有少量变化的问题。例如：

Step number    Rank    Time step
n              1       0.00025
n              0       0.00025//Ok
n + 1          1       0.00051152
n + 1          0       0.000511523//Not ok

odeint 的不同线程之间是否存在通信问题，或者这里出现其他问题？这种方法是否可行，还是我应该依赖不同的方法？