rozkład cholesky Błąd ScaLapack
Dostaję następujący błąd i nie jestem pewien dlaczego.
{ 1, 1}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 1, 0}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 0, 1}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 0, 0}: On entry to PDPOTRF parameter number 2 had an illegal value
info < 0: If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i.
Wiem, co oznaczają komunikaty o błędach, ale postępowałem zgodnie z datowaną dokumentacją dostępną w sieci, najlepiej jak to możliwe, i próbowałem połączyć równoległą faktoryzację cholesky z roboczych przykładowych kodów w sieci. Nie wiem, gdzie poszedłem źle.
Czy ktoś mógłby wyjaśnić, gdzie się pomyliłem w poniższym kodzie? Oto przegląd tego, co robi kod, testuję z 4 procesorami i dzielę macierz 8x8 na siatkę bloków procesora 2 x 2 ładuje macierz z pliku, heres przykładowy plik macierzy 8 x 8,
182 147 140 125 132 76 126 157
147 213 185 150 209 114 166 188
140 185 232 129 194 142 199 205
125 150 129 143 148 81 104 150
132 209 194 148 214 122 172 189
76 114 142 81 122 102 129 117
126 166 199 104 172 129 187 181
157 188 205 150 189 117 181 259
Podążyłem za przykładami, aby rozpowszechnić Matrycę na 4 oddzielne lokalne tablice 4x4 po jednej na każdym z 4 węzłów. Później uciekamdescinit_
i zadzwoń do stowarzyszonegopdpotrf_
procedura, która daje powyższy błąd. Nie mam pojęcia, gdzie się pomyliłem i starałem się śledzić dokumentację najlepiej, jak potrafiłem. Pomógłby również roboczy przykład równoległego rozkładu choleskiego w fortran
Odwołania do wywołań funkcji
Uruchom parametry
code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4 (i've also tried many different things here as well)
I Wydrukowałem powyższe parametry na każdym węźle i są one takie same
#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;
/*
To compile:
mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
To run:
mpirun -np 4 ./test matrixfile 8 8 2 2
*/
extern "C" {
/* Cblacs declarations */
void Cblacs_pinfo(int*, int*);
void Cblacs_get(int, int, int*);
void Cblacs_gridinit(int*, const char*, int, int);
void Cblacs_gridinfo(int, int*, int*, int*,int*);
void Cblacs_pcoord(int, int, int*, int*);
void Cblacs_gridexit(int);
void Cblacs_barrier(int, const char*);
void Cdgerv2d(int, int, int, double*, int, int, int);
void Cdgesd2d(int, int, int, double*, int, int, int);
int numroc_(int*, int*, int*, int*, int*);
void pdpotrf_(char*, int*, double*,
int*, int*, int*, int*);
void descinit_( int *, int *, int *, int *, int *, int *, int *,
int *, int *, int *);
}
int main(int argc, char **argv){
/* MPI */
int mpirank,nprocs;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
double MPIelapsed;
double MPIt2;
double MPIt1;
/* Helping vars */
int iZERO = 0;
int verbose = 1;
bool mpiroot = (mpirank == 0);
if (argc < 6) {
if (mpiroot)
cerr << "Usage: matrixTest matrixfile N M Nb Mb"
<< endl
<< " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
<< endl;
MPI_Finalize();
return 1;
}
/* Scalapack / Blacs Vars */
int N, M, Nb, Mb;
int descA[9];
int info = 0;
// int mla = 4;
int ord = 8;
double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;
/* Parse command line arguments */
if (mpiroot) {
/* Read command line arguments */
stringstream stream;
stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
stream >> N >> M >> Nb >> Mb;
/* Reserve space and read matrix (with transposition!) */
A_glob = new double[N*M];
A_glob2 = new double[N*M];
string fname(argv[1]);
ifstream file(fname.c_str());
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
file >> *(A_glob + N*c + r);
}
}
/* Print matrix */
if(verbose == 1) {
cout << "Matrix A:\n";
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
cout << setw(3) << *(A_glob + N*c + r) << " ";
}
cout << "\n";
}
cout << endl;
}
}
/* Begin Cblas context */
int ctxt, myid, myrow, mycol, numproc;
//<TODO> make dynamic
int procrows = 2, proccols = 2;
Cblacs_pinfo(&myid, &numproc);
Cblacs_get(0, 0, &ctxt);
Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
/* process coordinates for the process grid */
// Cblacs_pcoord(ctxt, myid, &myrow, &mycol);
/* Broadcast of the matrix dimensions */
int dimensions[4];
if (mpiroot) {
dimensions[0] = N;//Global Rows
dimensions[1] = M;//Global Cols
dimensions[2] = Nb;//Local Rows
dimensions[3] = Mb;//Local Cols
}
MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
N = dimensions[0];
M = dimensions[1];
Nb = dimensions[2];
Mb = dimensions[3];
int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);
int lda = max(1,nrows);
MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* Print grid pattern */
if (myid == 0)
cout << "Processes grid pattern:" << endl;
for (int r = 0; r < procrows; ++r) {
for (int c = 0; c < proccols; ++c) {
Cblacs_barrier(ctxt, "All");
if (myrow == r && mycol == c) {
cout << myid << " " << flush;
}
}
Cblacs_barrier(ctxt, "All");
if (myid == 0)
cout << endl;
}
if(myid == 0){
cout <<"Run Parameters"<<endl;
cout <<"Global Rows = " << M <<endl;
cout <<"Global Cols = " << N <<endl;
cout <<"Local Block Rows = " << Mb <<endl;
cout <<"Local Block Cols = " << Nb <<endl;
cout << "nrows = "<<nrows<<endl;
cout << "ncols = "<<ncols<<endl;
cout << "lda = "<<lda<<endl;
cout <<"Order = "<<ord<<endl;
}
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
A_loc = new double[nrows*ncols];
for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;
/* Scatter matrix */
int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
sendc = 0;
int nr = Nb;
if (N-r < Nb)
nr = N-r;
for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
int nc = Mb;
if (M-c < Mb)
nc = M-c;
if (mpiroot) {
Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
}
if (myrow == sendr && mycol == sendc) {
Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
recvc = (recvc+nc)%ncols;
}
}
if (myrow == sendr)
recvr = (recvr+nr)%nrows;
}
/* Print local matrices */
if(verbose == 1) {
for (int id = 0; id < numproc; ++id) {
if (id == myid) {
cout << "A_loc on node " << myid << endl;
for (int r = 0; r < nrows; ++r) {
for (int c = 0; c < ncols; ++c)
cout << setw(3) << *(A_loc+nrows*c+r) << " ";
cout << endl;
}
cout << endl;
}
Cblacs_barrier(ctxt, "All");
}
}
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
/* DescInit */
info=0;
descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);
if(mpiroot){
if(verbose == 1){
if (info == 0){
cout<<"Description init sucesss!"<<endl;
}
if(info < 0){
cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
<<"Info = " << info<<endl;
}
}
// Cblacs_barrier(ctxt, "All");
}
//psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb, info) */
// psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb, &info);
//pXelset http://www.netlib.org/scalapack/tools/pdelset.f
/* CHOLESKY HERE */
info = 0;
MPIt1=MPI_Wtime();
pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
MPIt2 = MPI_Wtime();
MPIelapsed=MPIt2-MPIt1;
if(mpiroot){
std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;
if(info == 0){
std::cout<<"SUCCESS"<<std::endl;
}
if(info < 0){
cout << "info < 0: If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
cout<<"info = " << info << endl;
}
if(info > 0){
std::cout<<"matrix is not positve definte"<<std::endl;
}
}
//sanity check set global matrix to zero before it's recieved by nodes
if(mpiroot){
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
A_glob2[c *N + r] = 0;
}
}
}
/* Gather matrix */
sendr = 0;
for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
sendc = 0;
// Number of rows to be sent
// Is this the last row block?
int nr = Nb;
if (N-r < Nb)
nr = N-r;
for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
// Number of cols to be sent
// Is this the last col block?
int nc = Mb;
if (M-c < Mb)
nc = M-c;
if (myrow == sendr && mycol == sendc) {
// Send a nr-by-nc submatrix to process (sendr, sendc)
Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
recvc = (recvc+nc)%ncols;
}
if (mpiroot) {
// Receive the same data
// The leading dimension of the local matrix is nrows!
Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
}
}
if (myrow == sendr)
recvr = (recvr+nr)%nrows;
}
/* Print test matrix */
if (mpiroot) {
if(verbose == 1){
cout << "Matrix A test:\n";
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
cout << setw(3) << *(A_glob2+N*c+r) << " ";
}
cout << endl;
}
}
}
/* Release resources */
delete[] A_glob;
delete[] A_glob2;
delete[] A_loc;
Cblacs_gridexit(ctxt);
MPI_Finalize();
}