error de descomposición de Cholesky ScaLapack

Recibo el siguiente error y no estoy seguro de por qué.

{    1,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    1,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value


 info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. 

Sé lo que significan los mensajes de error, pero seguí la documentación fechada disponible en la web lo mejor posible y traté de reconstruir una factorización cholesky paralela a partir de códigos de ejemplo en la web. No estoy seguro de dónde me equivoqué.

¿Podría alguien explicar dónde me equivoqué en el código de abajo? Aquí hay una descripción general de lo que hace el código, estoy probando con 4 procesadores y divido la matriz 8x8 en 2 x 2. La cuadrícula de bloques del procesador carga una matriz del archivo, aquí está un ejemplo de 8 x 8 matrixfile,

182   147   140   125   132    76   126   157
147   213   185   150   209   114   166   188
140   185   232   129   194   142   199   205
125   150   129   143   148    81   104   150
132   209   194   148   214   122   172   189
76   114   142    81   122   102   129   117
126   166   199   104   172   129   187   181
157   188   205   150   189   117   181   259

Seguí los ejemplos para distribuir Matrix a 4 matrices locales 4x4 separadas, una en cada uno de los 4 nodos. Entonces corrodescinit_ y llamar a los asociadospdpotrf_ rutina que produce el error anterior. No tengo idea de dónde me equivoqué e intenté seguir la documentación lo mejor que pude. Un ejemplo práctico de una descomposición paralela de Cholesky en Fortran también ayudaría enormemente.

Referencias para llamadas a funciones

pdpotrf_

descinit_

Parámetros de ejecución

code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4   (i've also tried many different things here as well)

Imprimí los parámetros anteriores en cada nodo y son los mismos

#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;


/*
  To compile:
  mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
  To run:
  mpirun -np 4 ./test matrixfile 8 8 2 2

*/

extern "C" {
  /* Cblacs declarations */
  void Cblacs_pinfo(int*, int*);
  void Cblacs_get(int, int, int*);
  void Cblacs_gridinit(int*, const char*, int, int);
  void Cblacs_gridinfo(int, int*, int*, int*,int*);
  void Cblacs_pcoord(int, int, int*, int*);
  void Cblacs_gridexit(int);
  void Cblacs_barrier(int, const char*);
  void Cdgerv2d(int, int, int, double*, int, int, int);
  void Cdgesd2d(int, int, int, double*, int, int, int);

  int numroc_(int*, int*, int*, int*, int*);

  void pdpotrf_(char*, int*, double*,
        int*, int*, int*, int*);

  void descinit_( int *, int *, int *, int *, int *, int *, int *,
          int *, int *, int *);

}

int main(int argc, char **argv){
  /* MPI */
  int mpirank,nprocs;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  double MPIelapsed;
  double MPIt2;
  double MPIt1;

  /* Helping vars */
  int iZERO = 0;
  int verbose = 1;
  bool mpiroot = (mpirank == 0);

  if (argc < 6) {
    if (mpiroot)
      cerr << "Usage: matrixTest matrixfile N M Nb Mb"
       << endl
       << " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
       << endl;

    MPI_Finalize();
    return 1;
  }
  /* Scalapack / Blacs Vars */
  int N, M, Nb, Mb;
  int descA[9];
  int info = 0;
  //  int mla = 4;
  int ord = 8;



  double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;

  /* Parse command line arguments */
  if (mpiroot) {
    /* Read command line arguments */
    stringstream stream;
    stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
    stream >> N >> M >> Nb >> Mb;


    /* Reserve space and read matrix (with transposition!) */
    A_glob  = new double[N*M];
    A_glob2 = new double[N*M];
    string fname(argv[1]);
    ifstream file(fname.c_str());
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    file >> *(A_glob + N*c + r);
      }
    }

    /* Print matrix */

      if(verbose == 1) {
    cout << "Matrix A:\n";
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
        cout << setw(3) << *(A_glob + N*c + r) << " ";
      }
      cout << "\n";
    }
    cout << endl;
      }


  }

  /* Begin Cblas context */
  int ctxt, myid, myrow, mycol, numproc;
  //<TODO> make dynamic
  int procrows = 2, proccols = 2;
  Cblacs_pinfo(&myid, &numproc);
  Cblacs_get(0, 0, &ctxt);
  Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
  Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
  /* process coordinates for the process grid */
  // Cblacs_pcoord(ctxt, myid, &myrow, &mycol);


  /* Broadcast of the matrix dimensions */
  int dimensions[4];
  if (mpiroot) {
    dimensions[0] = N;//Global Rows
    dimensions[1] = M;//Global Cols
    dimensions[2] = Nb;//Local Rows
    dimensions[3] = Mb;//Local Cols
  }
  MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
  N = dimensions[0];
  M = dimensions[1];
  Nb = dimensions[2];
  Mb = dimensions[3];

  int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
  int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);

  int lda = max(1,nrows);

  MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* Print grid pattern */
  if (myid == 0)
    cout << "Processes grid pattern:" << endl;
  for (int r = 0; r < procrows; ++r) {
    for (int c = 0; c < proccols; ++c) {
      Cblacs_barrier(ctxt, "All");
      if (myrow == r && mycol == c) {
    cout << myid << " " << flush;
      }
    }
    Cblacs_barrier(ctxt, "All");
    if (myid == 0)
      cout << endl;
  }

  if(myid == 0){
    cout <<"Run Parameters"<<endl;
    cout <<"Global Rows = " << M <<endl;
    cout <<"Global Cols = " << N <<endl;
    cout <<"Local Block Rows = " << Mb <<endl;
    cout <<"Local Block Cols = " << Nb <<endl;
    cout << "nrows = "<<nrows<<endl;
    cout << "ncols = "<<ncols<<endl;
    cout << "lda = "<<lda<<endl;
    cout <<"Order = "<<ord<<endl;
  }


  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }
  A_loc = new double[nrows*ncols];
  for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;

  /* Scatter matrix */
  int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (mpiroot) {
    Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
      }

      if (myrow == sendr && mycol == sendc) {
    Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

    }

    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }

  /* Print local matrices */
  if(verbose == 1) {
  for (int id = 0; id < numproc; ++id) {
    if (id == myid) {
    cout << "A_loc on node " << myid << endl;
    for (int r = 0; r < nrows; ++r) {
      for (int c = 0; c < ncols; ++c)
        cout << setw(3) << *(A_loc+nrows*c+r) << " ";
      cout << endl;
    }
    cout << endl;
      }
      Cblacs_barrier(ctxt, "All");
    }
  }

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  /* DescInit */
  info=0;
  descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);

  if(mpiroot){
    if(verbose == 1){
      if (info == 0){
    cout<<"Description init sucesss!"<<endl;
      }
      if(info < 0){
    cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
         <<"Info = " << info<<endl;
      }
    }
    // Cblacs_barrier(ctxt, "All");
  }

  //psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb,  info) */
  //    psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb,  &info);
  //pXelset http://www.netlib.org/scalapack/tools/pdelset.f


  /* CHOLESKY HERE */
  info = 0;
  MPIt1=MPI_Wtime();

  pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  MPIt2 = MPI_Wtime();
  MPIelapsed=MPIt2-MPIt1;
  if(mpiroot){
    std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;

    if(info == 0){
      std::cout<<"SUCCESS"<<std::endl;
    }
    if(info < 0){

      cout << "info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
      cout<<"info = " << info << endl;
    }
    if(info > 0){
      std::cout<<"matrix is not positve definte"<<std::endl;
    }
  }

  //sanity check set global matrix to zero before it's recieved by nodes
  if(mpiroot){
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    A_glob2[c *N + r]  = 0; 
      }
    }
  }

  /* Gather matrix */
  sendr = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    // Number of rows to be sent
    // Is this the last row block?
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      // Number of cols to be sent
      // Is this the last col block?
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (myrow == sendr && mycol == sendc) {
    // Send a nr-by-nc submatrix to process (sendr, sendc)
    Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

      if (mpiroot) {
    // Receive the same data
    // The leading dimension of the local matrix is nrows!
    Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
      }
    }
    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }
  /* Print test matrix */
  if (mpiroot) {
    if(verbose == 1){
      cout << "Matrix A test:\n";
      for (int r = 0; r < N; ++r) {
    for (int c = 0; c < M; ++c) {
      cout << setw(3) << *(A_glob2+N*c+r) << " ";
    }
    cout << endl;
      }
    }
  }

  /* Release resources */
  delete[] A_glob;
  delete[] A_glob2;
  delete[] A_loc;
  Cblacs_gridexit(ctxt);
  MPI_Finalize();
}

Respuestas a la pregunta(1)

Su respuesta a la pregunta