elpa_cpp.F90 10.9 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
    USE m_juDFT
7 8
    USE m_subredist1
    USE m_subredist2
9
    USE elpa1
10 11 12
#ifdef CPP_ELPA2
    USE elpa2
#endif
13 14 15 16
#ifdef CPP_ELPA_201705003    
    USE elpa
#endif
IMPLICIT NONE
17 18
    INCLUDE 'mpif.h'

19
    INTEGER, INTENT (IN)                  :: m
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
    INTEGER, INTENT (IN)                  :: SUB_COMM
    INTEGER, INTENT (INOUT)               :: num
    REAL,    INTENT   (OUT)               :: eig(:)
    CPP_REALCOMPLEX, ALLOCATABLE, INTENT (INOUT)  :: a(:),b(:)
    CPP_REALCOMPLEX, INTENT   (OUT)               :: z(:,:)
    !

    !...  Local variables
    !
    INTEGER           :: sc_desc(9) !SCALAPACK DESCRIPTOR
    INTEGER           :: np,nb,myid,npcol,nprow,mycol,myrow,myrowssca
    INTEGER           :: mycolssca,ictextblacs,mpi_comm_rows
    INTEGER           :: mpi_comm_cols
    INTEGER           :: num2

    LOGICAL           :: ok
    INTEGER           :: err,ierr
    INTEGER           :: i,k,n_col,n_row
    ! large distributed Matrices
    REAL,ALLOCATABLE     :: eig2(:)
    CPP_REALCOMPLEX, ALLOCATABLE :: asca(:,:), bsca(:,:),tmp2(:,:)
    CPP_REALCOMPLEX, ALLOCATABLE :: eigvec(:,:)
    INTEGER, EXTERNAL :: numroc, indxl2g  !SCALAPACK functions
43
#ifdef CPP_ELPA_201705003    
44
    INTEGER :: kernel
45
    CLASS(elpa_t),pointer :: elpa_obj
46

47 48 49
    err = elpa_init(20170403)
    elpa_obj => elpa_allocate()
#endif
50

51
    num2=num
52 53
    CALL priv_create_blacsgrid(sub_comm,m, np,nb,myid,npcol,nprow,mycol,myrow,myrowssca,mycolssca,&
         ictextblacs,sc_desc,mpi_comm_rows,mpi_comm_cols)
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#ifdef CPP_ELPA_201705003    
    CALL elpa_obj%set("na", m, err)
    CALL elpa_obj%set("nev", num2, err)
    CALL elpa_obj%set("local_nrows", myrowssca, err)
    CALL elpa_obj%set("local_ncols", mycolssca, err)
    CALL elpa_obj%set("nblk", nb, err)
    CALL elpa_obj%set("mpi_comm_parent", sub_comm, err)
    CALL elpa_obj%set("process_row", myrow, err)
    CALL elpa_obj%set("process_col", mycol, err)
#ifdef CPP_ELPA2
    CALL elpa_obj%set("solver", ELPA_SOLVER_2STAGE)
#else
    CALL elpa_obj%set("solver", ELPA_SOLVER_1STAGE)
#endif
    err = elpa_obj%setup()
69 70 71 72
    if (myid == 0) then
       call elpa_obj%get("complex_kernel", kernel)
       print *, "elpa uses " // elpa_int_value_to_string("complex_kernel", kernel) // " kernel"
    endif
73 74
#endif

75 76 77 78 79 80 81 82 83 84 85 86 87 88

    !     Number of columns the local process gets in ScaLAPACK distribution
    ALLOCATE ( asca(myrowssca,mycolssca), stat=err )
    IF (err/=0) CALL juDFT_error("allocate asca", calledby="chani_elpa")
    asca=0.0

    ALLOCATE ( bsca(myrowssca,mycolssca), stat=err  )
    IF (err/=0) CALL juDFT_error("allocate bsca", calledby="chani_elpa")
    bsca=0.0
    !
    ! transfer to ScaLAPACK block cyclic 2d distribution with
    ! block-size nb
    !
    !print *,"Before subredist"
89 90 91 92 93 94 95
#ifdef CPP_REAL
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_r=a, asca_r=asca )
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_r=b, asca_r=bsca)
#else
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_c=a, asca_c=asca )
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_c=b, asca_c=bsca)
#endif
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    ! for saving memory one might deallocate(a,b)
    !print *,"Before Allocate"
    ALLOCATE ( eig2(m), stat=err ) ! The eigenvalue array for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eig2"', calledby ='elpa')


    ALLOCATE ( eigvec(myrowssca,mycolssca), stat=err ) ! Eigenvectors for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eigvec"',calledby ='elpa')

    ALLOCATE ( tmp2(myrowssca,mycolssca), stat=err ) ! tmp_array
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "tmp2"', calledby ='elpa')
    !print *,"Before elpa"

    !ELPA -start here
    ! Solve generalized problem
    !
    ! 1. Calculate Cholesky factorization of Matrix B = U**T * U
    !    and invert triangular matrix U
    !
    ! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
    ! The only reason having them is that the Scalapack counterpart
    ! PDPOTRF very often fails on higher processor numbers for unknown reasons!
118 119 120 121 122

#if defined(CPP_ELPA_201705003)
    CALL elpa_obj%cholesky(bsca, err)
    CALL elpa_obj%invert_triangular(bsca, err)
#elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
123 124 125
    ok=CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.)
    ok=CPP_invert_trm(m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.)
#elif defined CPP_ELPA_NEW
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
    CALL CPP_invert_trm(m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
#else
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb, mpi_comm_rows,mpi_comm_cols)
    CALL CPP_invert_trm(m, bsca, SIZE(bsca,1), nb, mpi_comm_rows, mpi_comm_cols)
#endif

    ! 2. Calculate U**-T * A * U**-1

    ! 2a. eigvec = U**-T * A

    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half
    CALL CPP_transpose (m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)
    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO
147 148 149 150

#if defined (CPP_ELPA_201705003)
    CALL elpa_obj%hermitian_multiply('U','L', m, bsca, asca, myrowssca, mycolssca, eigvec, myrowssca, mycolssca, err)
#elif defined (CPP_ELPA_201605004)
Daniel Wortmann's avatar
Daniel Wortmann committed
151 152
    ok=CPP_mult ('U', 'L',m, m,bsca,myrowssca,mycolssca,asca,SIZE(asca,1),SIZE(asca,2),nb,&
         mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca,mycolssca)
153
#elif defined (CPP_ELPA_201605003)
154 155
    ok=CPP_mult ('U', 'L',m, m,bsca,myrowssca,asca,SIZE(asca,1),nb, mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca)
#else
156
    CALL CPP_mult ('U', 'L',m, m,bsca,myrowssca,asca,SIZE(asca,1),nb, mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca)
157
#endif
158

159 160 161 162
    ! 2b. tmp2 = eigvec**T
    CALL CPP_transpose(m,m,CPP_ONE,eigvec,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

    ! 2c. A =  U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
163 164 165
#if defined (CPP_ELPA_201705003)
    CALL elpa_obj%hermitian_multiply('U','U', m, bsca, tmp2, myrowssca, mycolssca, asca, myrowssca, mycolssca, err)
#elif defined (CPP_ELPA_201605004)
Daniel Wortmann's avatar
Daniel Wortmann committed
166 167
    ok=CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1),SIZE(bsca,2), tmp2,&
         SIZE(tmp2,1),SIZE(tmp2,2),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1),SIZE(asca,2))
168
#elif defined (CPP_ELPA_201605003)
169 170 171
    ok=CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1), tmp2,&
         SIZE(tmp2,1),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
#else
172 173
    CALL CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1), tmp2,&
         SIZE(tmp2,1),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
174 175
#endif

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half

    CALL CPP_transpose(m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)


    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO

    ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
    !    Eigenvectors go to eigvec
192 193 194
#if defined (CPP_ELPA_201705003)
    CALL elpa_obj%eigenvectors(asca, eig2, eigvec, err)
#elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
195 196 197 198 199 200 201 202
#ifdef CPP_ELPA2
    ok=CPP_solve_evp_2stage(m,num2,asca,SIZE(asca,1),&
         eig2,eigvec,SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    ok=CPP_solve_evp_1stage(m, num2,asca,SIZE(asca,1),&
         eig2,eigvec, SIZE(asca,1), nb,mycolssca,mpi_comm_rows, mpi_comm_cols)
#endif
#elif defined CPP_ELPA_NEW
203 204
#ifdef CPP_ELPA2
    err=CPP_solve_evp_2stage(m,num2,asca,SIZE(asca,1),&
Daniel Wortmann's avatar
Daniel Wortmann committed
205
         eig2,eigvec, SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols,sub_comm)
206 207
#else
    err=CPP_solve_evp(m, num2,asca,SIZE(asca,1),&
Daniel Wortmann's avatar
Daniel Wortmann committed
208
         eig2,eigvec, SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols)
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
#endif
#else
#ifdef CPP_ELPA2
    CALL CPP_solve_evp_2stage(m,&
         num2,asca,SIZE(asca,1),eig2,eigvec,SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    CALL CPP_solve_evp(m, num2,&
         asca,SIZE(asca,1),eig2,eigvec, SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols)
#endif
#endif

    ! 4. Backtransform eigenvectors: Z = U**-1 * eigvec

    ! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
    CALL CPP_transpose(m,m,CPP_ONE,bsca,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

225 226 227
#if defined (CPP_ELPA_201705003)
    CALL elpa_obj%hermitian_multiply('L','N', num2, tmp2, eigvec, myrowssca, mycolssca, asca, myrowssca, mycolssca, err)
#elif defined (CPP_ELPA_201605004)
Daniel Wortmann's avatar
Daniel Wortmann committed
228 229 230
    ok= CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),SIZE(asca,2),&
         eigvec, SIZE(asca,1),SIZE(asca,2),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1),SIZE(asca,2))
#elif CPP_ELPA_201605003
231 232 233
    ok= CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),&
         eigvec, SIZE(asca,1),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
#else
234 235
    CALL CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),&
         eigvec, SIZE(asca,1),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
236
#endif
237 238 239 240 241

#if defined (CPP_ELPA_201705003)
    CALL elpa_deallocate(elpa_obj)
    CALL elpa_uninit()
#endif
242 243
    ! END of ELPA stuff
    CALL BLACS_GRIDEXIT(ictextblacs,ierr)
244
#if ( !defined (CPP_ELPA_201705003))
245 246
    CALL MPI_COMM_FREE(mpi_comm_rows,ierr)
    CALL MPI_COMM_FREE(mpi_comm_cols,ierr)
247
#endif
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
    !print *,"elpa done"

    !
    !     Put those eigenvalues expected by chani to eig, i.e. for
    !     process i these are eigenvalues i+1, np+i+1, 2*np+i+1...
    !     Only num=num2/np eigenvalues per process
    !
    num=FLOOR(REAL(num2)/np)
    IF (myid.LT.num2-(num2/np)*np) num=num+1
    k=1
    DO i=myid+1,num2,np
       eig(k)=eig2(i)
       k=k+1
    ENDDO

    !write(*,"(a,i5,99f10.4)") "ELPA:",myid,eig

    !
    !     Redistribute eigvec from ScaLAPACK distribution to each process
    !     having all eigenvectors corresponding to his eigenvalues as above
    !
269 270 271 272 273
#ifdef CPP_REAL
    CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,z,asca)
#else
    CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,achi_c=z,asca_c=asca)
#endif
274 275 276 277 278
    !

    DEALLOCATE ( asca )
    DEALLOCATE ( bsca )
    DEALLOCATE (eigvec, tmp2, eig2)
279