From 1a4d83ebab1535ed1f0b11565e2974d80093c1b4 Mon Sep 17 00:00:00 2001 From: Daniel Wortmann Date: Tue, 3 Jul 2018 12:25:37 +0200 Subject: [PATCH] Changes to chase-MPI interface. Still not functional... --- diagonalization/chase_diag.F90 | 206 +++++++++++++++++++---- external/chase/src/FLEUR/fleur_chase.cpp | 48 ++++-- types/types_mpimat.F90 | 104 +++++++++++- 3 files changed, 310 insertions(+), 48 deletions(-) diff --git a/diagonalization/chase_diag.F90 b/diagonalization/chase_diag.F90 index c670c446..444f0f0b 100644 --- a/diagonalization/chase_diag.F90 +++ b/diagonalization/chase_diag.F90 @@ -8,7 +8,7 @@ !-------------------------------------------------------------------------------- MODULE m_chase_diag #ifdef CPP_CHASE - + USE m_judft IMPLICIT NONE interface @@ -31,23 +31,23 @@ IMPLICIT NONE end subroutine chase_r end interface - !MPI + !MPI INTERFACE - SUBROUTINE mpi_dchase_init( mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen) BIND( c, name = 'dchase_init' ) + SUBROUTINE mpi_dchase_init( mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen,npr,npc,myrow,mycol) BIND( c, name = 'dchase_init' ) USE, INTRINSIC :: iso_c_binding - INTEGER(c_int) :: mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen + INTEGER(c_int) :: mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen,myrow,mycol,npr,npc END SUBROUTINE mpi_dchase_init END INTERFACE INTERFACE - SUBROUTINE mpi_zchase_init( mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen) BIND( c, name = 'zchase_init' ) + SUBROUTINE mpi_zchase_init( mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen,npr,npc,myrow,mycol) BIND( c, name = 'zchase_init' ) USE, INTRINSIC :: iso_c_binding - INTEGER(c_int) :: mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen + INTEGER(c_int) :: mpi_comm, n, nev, nex, xoff,yoff,xlen,ylen,myrow,mycol,npr,npc END SUBROUTINE mpi_zchase_init END INTERFACE INTERFACE - SUBROUTINE mpi_chase_r(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'dchase_solve_' ) + SUBROUTINE mpi_chase_r(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'dchase_solve' ) USE, INTRINSIC :: iso_c_binding REAL(c_double_complex) :: h(*), v(*) INTEGER(c_int) :: deg @@ -58,7 +58,7 @@ IMPLICIT NONE INTERFACE - SUBROUTINE mpi_chase_c(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'zchase_solve_' ) + SUBROUTINE mpi_chase_c(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'zchase_solve' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) INTEGER(c_int) :: deg @@ -306,21 +306,28 @@ IMPLICIT NONE CLASS(t_mat), ALLOCATABLE, INTENT(OUT) :: zmat REAL, INTENT(OUT) :: eig(:) - INTEGER :: i, j, nev, nex, nbands,xoff,yoff,xlen,ylen,ierr - INTEGER :: info + INTEGER :: i, j, nev, nex, nbands,xoff,yoff,xlen,ylen,ierr,nb_x,nb_y + INTEGER :: info,irank + REAL :: scale !scaling of eigenvalues from scalapack - CLASS(t_mat), ALLOCATABLE :: zMatTemp,chase_mat - REAL, ALLOCATABLE :: eigenvalues(:) + CLASS(t_mat), ALLOCATABLE :: zMatTemp + TYPE(t_mpimat) :: chase_mat + REAL, ALLOCATABLE :: eigenvalues(:) include 'mpif.h' + CALL MPI_COMM_RANK(MPI_COMM_WORLD,irank,info) + PRINT *,"Chase 1" + !Transform to standard problem using SCALAPACK IF (hmat%l_real) THEN CALL pdpotrf('U',smat%global_size1,smat%data_r,1,1,smat%blacs_desc,info) + PRINT *,"pdpotrf",info IF (info.NE.0) THEN WRITE (*,*) 'Error in pdpotrf: info =',info CALL juDFT_error("Diagonalization failed",calledby="chase_diag") ENDIF - CALL pdsygst(1,'U',smat%global_size1,hmat%data_r,1,1,hmat%blacs_desc,smat%data_r,1,1,smat%blacs_desc,info) + CALL pdsygst(1,'U',smat%global_size1,hmat%data_r,1,1,smat%blacs_desc,smat%data_r,1,1,smat%blacs_desc,scale,info) + IF (ABS(scale-1)>1E-10) call judft_error("Scale parameter not implemented in chase_diag") IF (info.NE.0) THEN WRITE (6,*) 'Error in pdsygst: info =',info CALL juDFT_error("Diagonalization failed",calledby="chase_diag") @@ -331,55 +338,58 @@ IMPLICIT NONE WRITE (*,*) 'Error in pzpotrf: info =',info CALL juDFT_error("Diagonalization failed",calledby="chase_diag") ENDIF - CALL pzhegst(1,'U',smat%global_size1,hmat%data_c,1,1,hmat%blacs_desc,smat%data_c,1,1,hmat%blacs_desc,info) + CALL pzhegst(1,'U',smat%global_size1,hmat%data_c,1,1,smat%blacs_desc,smat%data_c,1,1,smat%blacs_desc,scale,info) + IF (ABS(scale-1)>1E-10) call judft_error("Scale parameter not implemented in chase_diag") IF (info.NE.0) THEN WRITE (6,*) 'Error in pzhegst: info =',info CALL juDFT_error("Diagonalization failed",calledby="chase_diag") ENDIF END IF + + ! H is only set in the upper half, solve_evp_real needs a full matrix + ! Set lower half from upper half + + call hmat%generate_full_matrix() + + !call hmat%print_matrix(444) ! Now we are ready to set up chase + PRINT *,"Chase 2" nev = min(ne,hmat%global_size1) nex = min(max(nev/4, 45), hmat%global_size1-nev) !dimensioning for workspace - IF (smat%l_real) THEN - CALL mpi_dchase_init( hmat%mpi_com,hmat%global_size1, nev, nex, xoff,yoff,xlen,ylen) - ELSE - CALL mpi_zchase_init( hmat%mpi_com,hmat%global_size1, nev, nex, xoff,yoff,xlen,ylen) - ENDIF - CALL MPI_ALLREDUCE(MPI_IN_PLACE,xlen,1,MPI_INTEGER,MPI_MAX,smat%mpi_com,ierr) - CALL MPI_ALLREDUCE(MPI_IN_PLACE,ylen,1,MPI_INTEGER,MPI_MAX,smat%mpi_com,ierr) - - ALLOCATE(t_mpimat::chase_mat) - CALL chase_mat%init(smat%l_real,smat%global_size1,smat%global_size2,smat%mpi_com,.TRUE.,xlen,ylen) - - CALL chase_mat%copy(hmat,1,1) !redistribute matrix + + CALL priv_init_chasempimat(hmat,chase_mat,nev,nex) ALLOCATE(eigenvalues(nev+nex)) eigenvalues = 0.0 ALLOCATE(t_mpimat::zmatTemp) - CALL zMatTemp%init(hmat%l_real,hmat%global_size1,nev+nex,MPI_COMM_SELF,.TRUE.,hmat%global_size1,nev+nex) !Generate a pseudo-distributed matrix + CALL zMatTemp%init(hmat%l_real,hmat%global_size1,nev+nex,MPI_COMM_SELF,.TRUE.) !Generate a pseudo-distributed matrix + PRINT *,"Chase 3" IF (hmat%l_real) THEN IF(iter.EQ.1) THEN - CALL chase_r(hmat%data_r, hmat%global_size1, zMatTemp%data_r, eigenvalues, nev, nex, 25, 1e-6, 'R', 'S' ) + CALL mpi_chase_r(chase_mat%data_r, zMatTemp%data_r, eigenvalues, 25, 1e-10, 'R', 'S' ) ELSE CALL read_eig(chase_eig_id,ikpt,jsp,neig=nbands,eig=eigenvalues,zmat=zMatTemp) - CALL chase_r(hmat%data_r, hmat%global_size1, zMatTemp%data_r, eigenvalues, nev, nex, 25, 1e-6, 'A', 'S' ) + CALL mpi_chase_r(chase_mat%data_r, zMatTemp%data_r, eigenvalues, 25, 1e-10, 'A', 'S' ) END IF ELSE IF(iter.EQ.1) THEN - CALL chase_c(hmat%data_c, hmat%global_size1, zMatTemp%data_c, eigenvalues, nev, nex, 25, 1e-6, 'R', 'S' ) + CALL mpi_chase_c(chase_mat%data_c, zMatTemp%data_c, eigenvalues, 25, 1e-10, 'R', 'S' ) ELSE CALL read_eig(chase_eig_id,ikpt,jsp,neig=nbands,eig=eigenvalues,zmat=zMatTemp) - CALL chase_c(hmat%data_c, hmat%global_size1, zMatTemp%data_c, eigenvalues, nev, nex, 25, 1e-6, 'A', 'S' ) + CALL mpi_chase_c(chase_mat%data_c, zMatTemp%data_c, eigenvalues, 25, 1e-10, 'A', 'S' ) END IF ENDIF + + if (irank==0) PRINT *,eigenvalues + stop "DEBUG" ne = nev CALL write_eig(chase_eig_id,ikpt,jsp,nev+nex,nev+nex,& eigenvalues(:(nev+nex)),zmat=zMatTemp) - CALL zmatTemp%copy(hmat,1,1) !Copy matrix into distributed form + CALL hmat%copy(zmatTemp,1,1) !Copy matrix into distributed form call zmatTemp%free() ! --> recover the generalized eigenvectors z by solving z' = l^t * z @@ -406,8 +416,138 @@ IMPLICIT NONE DO i = 1, ne eig(i) = eigenvalues(i) END DO +#endif END SUBROUTINE chase_diag_MPI +#ifdef CPP_MPI + SUBROUTINE priv_init_chasempimat(hmat,mat,nev,nex) + USE m_types_mpimat + IMPLICIT NONE + TYPE(t_mpimat),INTENT(INOUT)::hmat,mat + INTEGER,INTENT(IN) :: nev,nex + INTEGER::nbc,nbr + + INCLUDE 'mpif.h' + INTEGER :: myrow,mycol + INTEGER :: npblacs,np,myid + INTEGER :: rowlen,collen,rowoff,coloff + INTEGER :: k,i,j + INTEGER :: ierr + + INTEGER,ALLOCATABLE :: iblacsnums(:),ihelp(:),iusermap(:,:) + + EXTERNAL descinit, blacs_get + EXTERNAL blacs_pinfo, blacs_gridinit + INTEGER,EXTERNAL::numroc,indxl2g + + mat%mpi_com=hmat%mpi_com + mat%global_size1=hmat%global_size1 + mat%global_size2=hmat%global_size1 + mat%l_real=hmat%l_real + + !Determine rank and no of processors + CALL MPI_COMM_RANK(hmat%mpi_com,myid,ierr) + CALL MPI_COMM_SIZE(hmat%mpi_com,np,ierr) + + !Init ChASE + IF (mat%l_real) THEN + CALL mpi_dchase_init(hmat%mpi_com,mat%global_size1, nev, nex, rowoff,coloff,rowlen,collen,& + mat%nprow,mat%npcol,myrow,mycol) + ELSE + CALL mpi_zchase_init(hmat%mpi_com,mat%global_size1, nev, nex, rowoff,coloff,rowlen,collen,& + mat%nprow,mat%npcol,myrow,mycol) + ENDIF + + !Determine block-sizes + CALL MPI_ALLREDUCE(rowlen,nbr,1,MPI_INTEGER,MPI_MAX,mat%mpi_com,ierr) + CALL MPI_ALLREDUCE(collen,nbc,1,MPI_INTEGER,MPI_MAX,mat%mpi_com,ierr) + + !Determine scalapack grid + !PRINT *,"G1:",myid,myrow,mycol,mat%nprow,mat%npcol + !CALL priv_processor_grid(mat%global_size1,rowoff,coloff,nbr,nbc,myrow,mycol,mat%nprow,mat%npcol) + !PRINT *,"G2:",myid,myrow,mycol,mat%nprow,mat%npcol + !Now included in init-call to chase above + + ALLOCATE(iusermap(mat%nprow,mat%npcol)) + iusermap=-2 + !Get BLACS ranks for all MPI ranks + CALL BLACS_PINFO(iusermap(myrow+1,mycol+1),npblacs) ! iamblacs = local process rank (e.g. myid) + CALL MPI_ALLREDUCE(MPI_IN_PLACE, iusermap, np,MPI_INTEGER,MPI_MAX,mat%mpi_com,ierr) + !Get the Blacs default context + CALL BLACS_GET(0,0,mat%blacs_ctext) + ! Create the Grid + CALL BLACS_GRIDMAP(mat%blacs_ctext,iusermap,mat%nprow,mat%nprow,mat%npcol) + + !Now create the matrix + mat%matsize1=numroc(mat%global_size1,nbr,myrow,0,mat%nprow) + mat%matsize2=numroc(mat%global_size1,nbc,mycol,0,mat%npcol) + IF (mat%l_real) THEN + ALLOCATE(mat%data_r(mat%matsize1,mat%matsize2)) + ELSE + ALLOCATE(mat%data_c(mat%matsize1,mat%matsize2)) + END IF + !Check for consistency + IF (mat%matsize1.NE.rowlen.OR.mat%matsize2.NE.collen) THEN + PRINT *,myid,"R:",mat%matsize1,rowlen,nbr + PRINT *,myid,"C:",mat%matsize2,collen,nbc + CALL judft_error("Distribution failed for chase") + ENDIF + + !Create blacs descriptor for chase matrix + CALL descinit(mat%blacs_desc,mat%global_size1,mat%global_size2,nbr,nbc,0,0,mat%blacs_ctext,mat%matsize1,ierr) + IF (ierr /=0 ) CALL judft_error('Creation of BLACS descriptor failed') + + !Copy data from hmat + CALL mat%copy(hmat,1,1) + + !Test if indices match + PRINT *,"I",coloff+3,indxl2g(3,mat%blacs_desc(6), mycol, 0, mat%npcol) + PRINT *,"I",rowoff+4,indxl2g(4,mat%blacs_desc(5), myrow, 0, mat%nprow) + + call hmat%print_matrix(445) + CALL mat%print_matrix(444) + + DO i=1,mat%matsize1 + DO j=1,mat%matsize2 + PRINT *,"MAT",i+rowoff,j+coloff,mat%data_r(i,j) + ENDDO + ENDDO + + + END SUBROUTINE priv_init_chasempimat + + + +! SUBROUTINE priv_processor_grid(myid,np,myrow,mycol,nprow,npcol) +! IMPLICIT NONE +! INTEGER,INTENT(IN) :: myid,np +! INTEGER,INTENT(OUT):: myrow,mycol,nprow,npcol +! +! INTEGER:: j + +! distloop: DO j=INT(SQRT(REAL(np))),1,-1 +! IF ( (np/j) * j == np) THEN +! nprow = np/j +! npcol = j +! EXIT distloop +! ENDIF +! ENDDO distloop +! mycol=myid/nprow +! myrow=myid-(myid/nprow)*nprow +! END SUBROUTINE priv_processor_grid + + SUBROUTINE priv_processor_grid(matsize,rowoff,coloff,nbr,nbc,myrow,mycol,nprow,npcol) + IMPLICIT NONE + INTEGER,INTENT(IN) :: matsize,rowoff,coloff,nbr,nbc + INTEGER,INTENT(OUT):: myrow,mycol,nprow,npcol + + npcol=(matsize-1)/nbc+1 + nprow=(matsize-1)/nbr+1 + + myrow=rowoff/nbr + mycol=coloff/nbc + + END SUBROUTINE priv_processor_grid #endif -END MODULE m_chase_diag + END MODULE m_chase_diag diff --git a/external/chase/src/FLEUR/fleur_chase.cpp b/external/chase/src/FLEUR/fleur_chase.cpp index dcfd8d80..9037fb85 100644 --- a/external/chase/src/FLEUR/fleur_chase.cpp +++ b/external/chase/src/FLEUR/fleur_chase.cpp @@ -23,8 +23,10 @@ using namespace chase::mpi; class ChASE_State { public: template - static ChaseMpiProperties* constructProperties(std::size_t N, std::size_t nev, - std::size_t nex, MPI_Comm comm); + static ChaseMpiProperties* constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + MPI_Comm comm); template static ChaseMpiProperties* getProperties(); @@ -37,7 +39,7 @@ ChaseMpiProperties>* ChASE_State::complex_double_prec = nullptr; template <> - ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, std::size_t nev, std::size_t nex, MPI_Comm comm) { @@ -88,10 +90,21 @@ void call_chase(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, template void chase_setup(MPI_Fint* fcomm, int* N, int* nev, int* nex, std::size_t* xoff, - std::size_t* yoff, std::size_t* xlen, std::size_t* ylen) { + std::size_t* yoff, std::size_t* xlen, std::size_t* ylen, + std::size_t* dimx, std::size_t* dimy, std::size_t* myx, + std::size_t* myy) { + std::size_t xoff_, yoff_, xlen_, ylen_; MPI_Comm comm = MPI_Comm_f2c(*fcomm); auto props = ChASE_State::constructProperties(*N, *nev, *nex, comm); props->get_off(xoff, yoff, xlen, ylen); + + auto dims = props->get_dims(); + *dimx = dims[0]; + *dimy = dims[1]; + + auto coord = props->get_coord(); + *myx = coord[0]; + *myy = coord[1]; } template @@ -143,28 +156,39 @@ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, } void zchase_init(MPI_Fint* fcomm, int* N, int* nev, int* nex, int* xoff, - int* yoff, int* xlen, int* ylen) { - std::size_t xoff_, yoff_, xlen_, ylen_; + int* yoff, int* xlen, int* ylen, int* dimx, int* dimy, + int* myx, int* myy) { + std::size_t xoff_, yoff_, xlen_, ylen_, dimx_, dimy_, myx_, myy_; chase_setup>(fcomm, N, nev, nex, &xoff_, &yoff_, &xlen_, - &ylen_); + &ylen_, &dimx_, &dimy_, &myx_, &myy_); *xoff = static_cast(xoff_); *yoff = static_cast(yoff_); *xlen = static_cast(xlen_); - *ylen = static_cast(xlen_); + *ylen = static_cast(ylen_); + *dimx = static_cast(dimx_); + *dimy = static_cast(dimy_); + *myx = static_cast(myx_); + *myy = static_cast(myy_); } void dchase_init(MPI_Fint* fcomm, int* N, int* nev, int* nex, int* xoff, - int* yoff, int* xlen, int* ylen) { - std::size_t xoff_, yoff_, xlen_, ylen_; + int* yoff, int* xlen, int* ylen, int* dimx, int* dimy, + int* myx, int* myy) { + std::size_t xoff_, yoff_, xlen_, ylen_, dimx_, dimy_, myx_, myy_; - chase_setup(fcomm, N, nev, nex, &xoff_, &yoff_, &xlen_, &ylen_); + chase_setup(fcomm, N, nev, nex, &xoff_, &yoff_, &xlen_, &ylen_, + &dimx_, &dimy_, &myx_, &myy_); *xoff = static_cast(xoff_); *yoff = static_cast(yoff_); *xlen = static_cast(xlen_); - *ylen = static_cast(xlen_); + *ylen = static_cast(ylen_); + *dimx = static_cast(dimx_); + *dimy = static_cast(dimy_); + *myx = static_cast(myx_); + *myy = static_cast(myy_); } void zchase_solve(std::complex* H, std::complex* V, diff --git a/types/types_mpimat.F90 b/types/types_mpimat.F90 index ea0fb4e3..742eedc3 100644 --- a/types/types_mpimat.F90 +++ b/types/types_mpimat.F90 @@ -7,6 +7,7 @@ MODULE m_types_mpimat USE m_judft USE m_types_rcmat + IMPLICIT NONE PRIVATE INTEGER,PARAMETER :: DEFAULT_BLOCKSIZE=64 INTEGER, PARAMETER :: dlen_=9 @@ -29,12 +30,102 @@ MODULE m_types_mpimat PROCEDURE,PASS :: free => mpimat_free ! mpimat_init ! mpimat_add_transpose !0) CALL MPI_RECV(tmp,1,MPI_INTEGER,irank-1,0,mat%mpi_com,status,err) !lock + WRITE(filename,"(a,i0)") "out.",fileno + OPEN(fileno,file=filename,access='append') + + CALL blacs_gridinfo(mat%blacs_desc(2),npr,npc,r,c) + DO i=1,mat%matsize1 + DO j=1,mat%matsize2 + IF (mat%l_real) THEN + WRITE(fileno,"(5(i0,1x),2(f10.5,1x))") irank,i,j,indxl2g(i,mat%blacs_desc(5),r,0,npr),& + indxl2g(j,mat%blacs_desc(6),c,0,npc),mat%data_r(i,j) + ELSE + WRITE(fileno,"(5(i0,1x),2(f10.5,1x))") irank,i,j,indxl2g(i,mat%blacs_desc(5),r,0,npr),& + indxl2g(j,mat%blacs_desc(6),c,0,npc),mat%data_c(i,j) + END IF + ENDDO + ENDDO + CLOSE(fileno) + IF (irank+10) mat%blacs_desc(2)=-1 + END IF END SUBROUTINE mpimat_init - SUBROUTINE priv_create_blacsgrid(mpi_subcom,l_2d,m1,m2,nbc,nbr,ictextblacs,sc_desc,local_size1,local_size2,npcol,nprow) + SUBROUTINE priv_create_blacsgrid(mpi_subcom,l_2d,m1,m2,nbc,nbr,ictextblacs,sc_desc,local_size1,local_size2,nprow,npcol) IMPLICIT NONE INTEGER,INTENT(IN) :: mpi_subcom INTEGER,INTENT(IN) :: m1,m2 @@ -165,7 +262,7 @@ CONTAINS EXTERNAL descinit, blacs_get EXTERNAL blacs_pinfo, blacs_gridinit - + !Determine rank and no of processors CALL MPI_COMM_RANK(mpi_subcom,myid,ierr) CALL MPI_COMM_SIZE(mpi_subcom,np,ierr) @@ -204,6 +301,7 @@ CONTAINS myrowssca=(m1-1)/(nbr*nprow)*nbr+ MIN(MAX(m1-(m1-1)/(nbr*nprow)*nbr*nprow-nbr*myrow,0),nbr) ! Number of rows the local process gets in ScaLAPACK distribution mycolssca=(m2-1)/(nbc*npcol)*nbc+ MIN(MAX(m2-(m2-1)/(nbc*npcol)*nbc*npcol-nbc*mycol,0),nbc) + !Get BLACS ranks for all MPI ranks CALL BLACS_PINFO(iamblacs,npblacs) ! iamblacs = local process rank (e.g. myid) -- GitLab