elpa_cpp.F90 9.13 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
    USE m_juDFT
7 8
    USE m_subredist1
    USE m_subredist2
9
    USE elpa1
10 11 12 13 14 15
#ifdef CPP_ELPA2
    USE elpa2
#endif
    IMPLICIT NONE
    INCLUDE 'mpif.h'

16
    INTEGER, INTENT (IN)                  :: m
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
    INTEGER, INTENT (IN)                  :: SUB_COMM
    INTEGER, INTENT (INOUT)               :: num
    REAL,    INTENT   (OUT)               :: eig(:)
    CPP_REALCOMPLEX, ALLOCATABLE, INTENT (INOUT)  :: a(:),b(:)
    CPP_REALCOMPLEX, INTENT   (OUT)               :: z(:,:)
    !

    !...  Local variables
    !
    INTEGER           :: sc_desc(9) !SCALAPACK DESCRIPTOR
    INTEGER           :: np,nb,myid,npcol,nprow,mycol,myrow,myrowssca
    INTEGER           :: mycolssca,ictextblacs,mpi_comm_rows
    INTEGER           :: mpi_comm_cols
    INTEGER           :: num2

    LOGICAL           :: ok
    INTEGER           :: err,ierr
    INTEGER           :: i,k,n_col,n_row
    ! large distributed Matrices
    REAL,ALLOCATABLE     :: eig2(:)
    CPP_REALCOMPLEX, ALLOCATABLE :: asca(:,:), bsca(:,:),tmp2(:,:)
    CPP_REALCOMPLEX, ALLOCATABLE :: eigvec(:,:)
    INTEGER, EXTERNAL :: numroc, indxl2g  !SCALAPACK functions


    CALL priv_create_blacsgrid(sub_comm,m, np,nb,myid,npcol,nprow,mycol,myrow,myrowssca,mycolssca,&
         ictextblacs,sc_desc,mpi_comm_rows,mpi_comm_cols)

    !     Number of columns the local process gets in ScaLAPACK distribution
    ALLOCATE ( asca(myrowssca,mycolssca), stat=err )
    IF (err/=0) CALL juDFT_error("allocate asca", calledby="chani_elpa")
    asca=0.0

    ALLOCATE ( bsca(myrowssca,mycolssca), stat=err  )
    IF (err/=0) CALL juDFT_error("allocate bsca", calledby="chani_elpa")
    bsca=0.0
    !
    ! transfer to ScaLAPACK block cyclic 2d distribution with
    ! block-size nb
    !
    !print *,"Before subredist"
58 59 60 61 62 63 64
#ifdef CPP_REAL
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_r=a, asca_r=asca )
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_r=b, asca_r=bsca)
#else
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_c=a, asca_c=asca )
    CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb, achi_c=b, asca_c=bsca)
#endif
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
    ! for saving memory one might deallocate(a,b)
    !print *,"Before Allocate"
    ALLOCATE ( eig2(m), stat=err ) ! The eigenvalue array for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eig2"', calledby ='elpa')


    ALLOCATE ( eigvec(myrowssca,mycolssca), stat=err ) ! Eigenvectors for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eigvec"',calledby ='elpa')

    ALLOCATE ( tmp2(myrowssca,mycolssca), stat=err ) ! tmp_array
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "tmp2"', calledby ='elpa')
    !print *,"Before elpa"

    !ELPA -start here
    ! Solve generalized problem
    !
    ! 1. Calculate Cholesky factorization of Matrix B = U**T * U
    !    and invert triangular matrix U
    !
    ! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
    ! The only reason having them is that the Scalapack counterpart
    ! PDPOTRF very often fails on higher processor numbers for unknown reasons!
Daniel Wortmann's avatar
Daniel Wortmann committed
87
#if defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
88 89 90
    ok=CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.)
    ok=CPP_invert_trm(m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.)
#elif defined CPP_ELPA_NEW
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
    CALL CPP_invert_trm(m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
#else
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb, mpi_comm_rows,mpi_comm_cols)
    CALL CPP_invert_trm(m, bsca, SIZE(bsca,1), nb, mpi_comm_rows, mpi_comm_cols)
#endif

    ! 2. Calculate U**-T * A * U**-1

    ! 2a. eigvec = U**-T * A

    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half
    CALL CPP_transpose (m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)
    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO
Daniel Wortmann's avatar
Daniel Wortmann committed
112 113 114 115
#ifdef CPP_ELPA_201605004
    ok=CPP_mult ('U', 'L',m, m,bsca,myrowssca,mycolssca,asca,SIZE(asca,1),SIZE(asca,2),nb,&
         mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca,mycolssca)
#elif CPP_ELPA_201605003
116 117
    ok=CPP_mult ('U', 'L',m, m,bsca,myrowssca,asca,SIZE(asca,1),nb, mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca)
#else
118
    CALL CPP_mult ('U', 'L',m, m,bsca,myrowssca,asca,SIZE(asca,1),nb, mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca)
119
#endif
120 121 122 123
    ! 2b. tmp2 = eigvec**T
    CALL CPP_transpose(m,m,CPP_ONE,eigvec,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

    ! 2c. A =  U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
Daniel Wortmann's avatar
Daniel Wortmann committed
124 125 126 127
#ifdef CPP_ELPA_201605004
    ok=CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1),SIZE(bsca,2), tmp2,&
         SIZE(tmp2,1),SIZE(tmp2,2),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1),SIZE(asca,2))
#elif CPP_ELPA_201605003
128 129 130
    ok=CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1), tmp2,&
         SIZE(tmp2,1),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
#else
131 132
    CALL CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1), tmp2,&
         SIZE(tmp2,1),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
133 134
#endif

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152

    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half

    CALL CPP_transpose(m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)


    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO

    ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
    !    Eigenvectors go to eigvec
    num2=num
Daniel Wortmann's avatar
Daniel Wortmann committed
153
#if defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
154 155 156 157 158 159 160 161
#ifdef CPP_ELPA2
    ok=CPP_solve_evp_2stage(m,num2,asca,SIZE(asca,1),&
         eig2,eigvec,SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    ok=CPP_solve_evp_1stage(m, num2,asca,SIZE(asca,1),&
         eig2,eigvec, SIZE(asca,1), nb,mycolssca,mpi_comm_rows, mpi_comm_cols)
#endif
#elif defined CPP_ELPA_NEW
162 163
#ifdef CPP_ELPA2
    err=CPP_solve_evp_2stage(m,num2,asca,SIZE(asca,1),&
Daniel Wortmann's avatar
Daniel Wortmann committed
164
         eig2,eigvec, SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols,sub_comm)
165 166
#else
    err=CPP_solve_evp(m, num2,asca,SIZE(asca,1),&
Daniel Wortmann's avatar
Daniel Wortmann committed
167
         eig2,eigvec, SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols)
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
#endif
#else
#ifdef CPP_ELPA2
    CALL CPP_solve_evp_2stage(m,&
         num2,asca,SIZE(asca,1),eig2,eigvec,SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    CALL CPP_solve_evp(m, num2,&
         asca,SIZE(asca,1),eig2,eigvec, SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols)
#endif
#endif

    ! 4. Backtransform eigenvectors: Z = U**-1 * eigvec

    ! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
    CALL CPP_transpose(m,m,CPP_ONE,bsca,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

Daniel Wortmann's avatar
Daniel Wortmann committed
184 185 186 187
#ifdef CPP_ELPA_201605004
    ok= CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),SIZE(asca,2),&
         eigvec, SIZE(asca,1),SIZE(asca,2),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1),SIZE(asca,2))
#elif CPP_ELPA_201605003
188 189 190
    ok= CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),&
         eigvec, SIZE(asca,1),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
#else
191 192
    CALL CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),&
         eigvec, SIZE(asca,1),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))
193
#endif
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    ! END of ELPA stuff
    CALL BLACS_GRIDEXIT(ictextblacs,ierr)
    CALL MPI_COMM_FREE(mpi_comm_rows,ierr)
    CALL MPI_COMM_FREE(mpi_comm_cols,ierr)
    !print *,"elpa done"

    !
    !     Put those eigenvalues expected by chani to eig, i.e. for
    !     process i these are eigenvalues i+1, np+i+1, 2*np+i+1...
    !     Only num=num2/np eigenvalues per process
    !
    num=FLOOR(REAL(num2)/np)
    IF (myid.LT.num2-(num2/np)*np) num=num+1
    k=1
    DO i=myid+1,num2,np
       eig(k)=eig2(i)
       k=k+1
    ENDDO

    !write(*,"(a,i5,99f10.4)") "ELPA:",myid,eig

    !
    !     Redistribute eigvec from ScaLAPACK distribution to each process
    !     having all eigenvectors corresponding to his eigenvalues as above
    !
219 220 221 222 223
#ifdef CPP_REAL
    CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,z,asca)
#else
    CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,achi_c=z,asca_c=asca)
#endif
224 225 226 227 228 229
    !

    DEALLOCATE ( asca )
    DEALLOCATE ( bsca )
    DEALLOCATE (eigvec, tmp2, eig2)