elpa.F90 7.75 KB
Newer Older
1 2 3 4 5
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
6 7
MODULE m_elpa
  PRIVATE
8 9
#ifdef CPP_ELPA  
  interface elpa_diag
10
     module procedure elpa_r,elpa_c
11
  end interface elpa_diag
12 13 14
  
  !Module to call elpa library for parallel diagonalization
  !uses ssubredist1/2 for redistribution
15

16
  PUBLIC elpa_diag
17 18 19

CONTAINS
  ! First the real version of the code
20 21
#define CPP_CHOLESKY cholesky_real
#define CPP_invert_trm invert_trm_real
22
#define CPP_solve_evp_1stage solve_evp_real_1stage
23 24 25 26 27 28
#define CPP_solve_evp solve_evp_real
#define CPP_solve_evp_2stage solve_evp_real_2stage
#define CPP_REALCOMPLEX real
#define CPP_transpose pdtran
#define CPP_ONE 1.0
#define CPP_ZERO 0.0
29 30 31
#ifdef CPP_ELPA_201605004
#define CPP_mult elpa_mult_at_b_real
#else
32
#define CPP_mult mult_at_b_real
33
#endif
34
#define CPP_REAL
35
  SUBROUTINE elpa_r(m, SUB_COMM, a,b, z,eig,num)
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    ! 
    !----------------------------------------------------
    !- Parallel eigensystem solver - driver routine based on chani; dw'12
    !
    ! m ........ actual (=leading) dimension of full a & b matrices
    !            must be problem size, as input a, b  are one-dimensional
    !            and shall be redistributed to two-dimensional matrices
    !            actual (=leading) dimension of eigenvector z(,)
    ! n ........ number of columns of full (sub)matrix ( about n/np)
    ! SUB_COMM.. communicator for MPI
    ! a,b   .... packed (sub)matrices, here expanded to non-packed
    ! z,eig .... eigenvectors and values, output
    ! num ...... number of ev's searched (and found) on this node
    !            On input, overall number of ev's searched,
    !            On output, local number of ev's found
    !
    !----------------------------------------------------
    !
  
#include "elpa_cpp.F90"

  END SUBROUTINE elpa_r


  ! Now the complex version
61 62
#define CPP_CHOLESKY cholesky_complex
#define CPP_invert_trm invert_trm_complex
63
#define CPP_solve_evp_1stage solve_evp_complex_1stage
64 65 66 67 68 69
#define CPP_solve_evp solve_evp_complex
#define CPP_solve_evp_2stage solve_evp_complex_2stage
#define CPP_REALCOMPLEX complex
#define CPP_transpose pztranc
#define CPP_ONE cmplx(1.,0.)
#define CPP_ZERO cmplx(0.,0.)
70
#ifdef CPP_ELPA_201605004
71
#define CPP_mult mult_ah_b_complex
72 73 74
#else
#define CPP_mult mult_ah_b_complex
#endif
75
#undef CPP_REAL
76
 SUBROUTINE elpa_c(m, SUB_COMM, a,b, z,eig,num)
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
    ! 
    !----------------------------------------------------
    !- Parallel eigensystem solver - driver routine based on chani; dw'12
    !
    ! m ........ actual (=leading) dimension of full a & b matrices
    !            must be problem size, as input a, b  are one-dimensional
    !            and shall be redistributed to two-dimensional matrices
    !            actual (=leading) dimension of eigenvector z(,)
    ! n ........ number of columns of full (sub)matrix ( about n/np)
    ! SUB_COMM.. communicator for MPI
    ! a,b   .... packed (sub)matrices, here expanded to non-packed
    ! z,eig .... eigenvectors and values, output
    ! num ...... number of ev's searched (and found) on this node
    !            On input, overall number of ev's searched,
    !            On output, local number of ev's found
    !
    !----------------------------------------------------
    !
95 96
  
#include "elpa_cpp.F90"
97

98
  END SUBROUTINE elpa_c
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202

  SUBROUTINE priv_create_blacsgrid(mpi_subcom,m, np,nb,myid,npcol,nprow,mycol,&
       myrow,myrowssca,mycolssca, ictextblacs,sc_desc,mpi_comm_rows,mpi_comm_cols)
    USE m_juDFT
    USE elpa1
    IMPLICIT NONE
    INCLUDE 'mpif.h'
    INTEGER,INTENT(IN) :: mpi_subcom
    INTEGER,INTENT(IN) :: m
    INTEGER,INTENT(OUT):: np,nb, npcol,nprow,myrowssca,mycolssca
    INTEGER,INTENT(OUT):: myrow,mycol,myid
    INTEGER,INTENT(OUT):: ictextblacs,sc_desc(:)
    INTEGER,INTENT(OUT):: mpi_comm_rows,mpi_comm_cols


    INTEGER     :: iamblacs,npblacs
    INTEGER     :: nprow2,npcol2,myrowblacs,mycolblacs
    INTEGER     :: k,i,j
    INTEGER     :: ierr

    INTEGER,ALLOCATABLE :: iblacsnums(:),ihelp(:),iusermap(:,:)

    EXTERNAL descinit, blacs_get
    EXTERNAL blacs_pinfo, blacs_gridinit

    !Determine rank and no of processors
    CALL MPI_COMM_RANK(mpi_subcom,myid,ierr)
    CALL MPI_COMM_SIZE(mpi_subcom,np,ierr)

    !print *,"priv_create_blacsgrid"
    ! determine block size
    !
    nb = 20
    IF (m.GT.2048)   nb = 30 !2
    IF (m.GT.8*2048) nb = 60 !4

    ! compute processor grid, as square as possible
    ! If not square with more rows than columns

    distloop: DO j=INT(SQRT(REAL(np))),1,-1
       IF ( (np/j) * j == np) THEN
          npcol = np/j
          nprow = j
          EXIT distloop
       ENDIF
    ENDDO distloop
    ALLOCATE(iblacsnums(np),ihelp(np),iusermap(nprow,npcol))

    !   An nprow*npcol processor grid will be created
    !   Row and column index myrow, mycol of this processor in the grid
    !   and distribution of A and B in ScaLAPACK
    !   The local processor will get myrowssca rows and mycolssca columns
    !   of A and B
    !

    myrow = myid/npcol  ! my row number in the BLACS nprow*npcol grid
    mycol = myid -(myid/npcol)*npcol  ! my column number in the BLACS nprow*npcol grid
    !
    !  Now allocate Asca to put the elements of Achi or receivebuffer to
    !
    myrowssca=(m-1)/(nb*nprow)*nb+ MIN(MAX(m-(m-1)/(nb*nprow)*nb*nprow-nb*myrow,0),nb)
    !     Number of rows the local process gets in ScaLAPACK distribution
    mycolssca=(m-1)/(nb*npcol)*nb+ MIN(MAX(m-(m-1)/(nb*npcol)*nb*npcol-nb*mycol,0),nb)



    !Get BLACS ranks for all MPI ranks
    CALL BLACS_PINFO(iamblacs,npblacs)  ! iamblacs = local process rank (e.g. myid)
    ! npblacs  = number of available processes
    iblacsnums=-2
    ihelp=-2
    ihelp(myid+1)=iamblacs ! Get the Blacs id corresponding to the MPI id
    !print *,"ALLREDUCE:",mpi_subcom
    CALL MPI_ALLREDUCE(ihelp, iblacsnums, np,MPI_INTEGER,MPI_MAX,mpi_subcom,ierr)
    IF (ierr.NE.0)CALL juDFT_error('Error in allreduce for BLACS nums' ,calledby='elpa')

    !     iblacsnums(i) is the BLACS-process number of MPI-process i-1
    k = 1
    DO i = 1, nprow
       DO j = 1, npcol
          iusermap(i,j) = iblacsnums(k)
          k = k + 1
       ENDDO
    ENDDO
    !Get the Blacs default context
    CALL BLACS_GET(0,0,ictextblacs)
    ! Create the Grid
    CALL BLACS_GRIDMAP(ictextblacs,iusermap,nprow,nprow,npcol)
    !     Now control, whether the BLACS grid is the one we wanted
    CALL BLACS_GRIDINFO(ictextblacs, nprow2,npcol2,myrowblacs,mycolblacs)
    IF (nprow2 /= nprow) THEN
       WRITE(6,*) 'Wrong number of rows in BLACS grid'
       WRITE(6,*) 'nprow=',nprow,' nprow2=',nprow2
       CALL juDFT_error('Wrong number of rows in BLACS grid',calledby= 'elpa')
    ENDIF
    IF (npcol2 /= npcol) THEN
       WRITE(6,*) 'Wrong number of columns in BLACS grid'
       WRITE(6,*) 'npcol=',npcol,' npcol2=',npcol2
       CALL juDFT_error('Wrong number of columns in BLACS grid', calledby ='elpa')

    ENDIF

    !Create communicators for ELPA
    !print *,"creating ELPA comms"
203
#if defined (CPP_ELPA_201605004) || defined (CPP_ELPA_201605003)||defined(CPP_ELPA_NEW)
204 205
    ierr=get_elpa_row_col_comms(mpi_subcom, myrowblacs, mycolblacs,mpi_comm_rows, mpi_comm_cols)
#else
206
    CALL get_elpa_row_col_comms(mpi_subcom, myrowblacs, mycolblacs,mpi_comm_rows, mpi_comm_cols)
207
#endif
208 209 210 211 212 213 214
    !print *,"creating ELPA comms  --  done"

    !Create the descriptors
    CALL descinit(sc_desc,m,m,nb,nb,0,0,ictextblacs,myrowssca,ierr)
    IF (ierr /=0 ) CALL juDFT_error('descinit1 failed',calledby='elpa')

  END SUBROUTINE priv_create_blacsgrid
215 216 217
#else
  LOGICAL :: elpa_used=.false.
#endif
218 219
end MODULE m_elpa