elpa_cpp.F90 7.08 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
    USE m_juDFT
    USE elpa1
#ifdef CPP_ELPA2
    USE elpa2
#endif
    IMPLICIT NONE
    INCLUDE 'mpif.h'

    INTEGER, INTENT (IN)                  :: m,n
    INTEGER, INTENT (IN)                  :: SUB_COMM
    INTEGER, INTENT (INOUT)               :: num
    REAL,    INTENT   (OUT)               :: eig(:)
    CPP_REALCOMPLEX, ALLOCATABLE, INTENT (INOUT)  :: a(:),b(:)
    CPP_REALCOMPLEX, INTENT   (OUT)               :: z(:,:)
    !

#include "elpa_cpp.F90"

  END SUBROUTINE elpa_r
    !...  Local variables
    !
    INTEGER           :: sc_desc(9) !SCALAPACK DESCRIPTOR
    INTEGER           :: np,nb,myid,npcol,nprow,mycol,myrow,myrowssca
    INTEGER           :: mycolssca,ictextblacs,mpi_comm_rows
    INTEGER           :: mpi_comm_cols
    INTEGER           :: num2

    LOGICAL           :: ok
    INTEGER           :: err,ierr
    INTEGER           :: i,k,n_col,n_row
    ! large distributed Matrices
    REAL,ALLOCATABLE     :: eig2(:)
    CPP_REALCOMPLEX, ALLOCATABLE :: asca(:,:), bsca(:,:),tmp2(:,:)
    CPP_REALCOMPLEX, ALLOCATABLE :: eigvec(:,:)
    INTEGER, EXTERNAL :: numroc, indxl2g  !SCALAPACK functions


    CALL priv_create_blacsgrid(sub_comm,m, np,nb,myid,npcol,nprow,mycol,myrow,myrowssca,mycolssca,&
         ictextblacs,sc_desc,mpi_comm_rows,mpi_comm_cols)

    !     Number of columns the local process gets in ScaLAPACK distribution
    ALLOCATE ( asca(myrowssca,mycolssca), stat=err )
    IF (err/=0) CALL juDFT_error("allocate asca", calledby="chani_elpa")
    asca=0.0

    ALLOCATE ( bsca(myrowssca,mycolssca), stat=err  )
    IF (err/=0) CALL juDFT_error("allocate bsca", calledby="chani_elpa")
    bsca=0.0
    !
    ! transfer to ScaLAPACK block cyclic 2d distribution with
    ! block-size nb
    !
    !print *,"Before subredist"
    CALL subredist1(a,m,asca,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb )
    CALL subredist1(b,m,bsca,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb )

    ! for saving memory one might deallocate(a,b)
    !print *,"Before Allocate"
    ALLOCATE ( eig2(m), stat=err ) ! The eigenvalue array for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eig2"', calledby ='elpa')


    ALLOCATE ( eigvec(myrowssca,mycolssca), stat=err ) ! Eigenvectors for ScaLAPACK
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "eigvec"',calledby ='elpa')

    ALLOCATE ( tmp2(myrowssca,mycolssca), stat=err ) ! tmp_array
    IF (err.NE.0) CALL juDFT_error('Failed to allocated "tmp2"', calledby ='elpa')
    !print *,"Before elpa"

    !ELPA -start here
    ! Solve generalized problem
    !
    ! 1. Calculate Cholesky factorization of Matrix B = U**T * U
    !    and invert triangular matrix U
    !
    ! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
    ! The only reason having them is that the Scalapack counterpart
    ! PDPOTRF very often fails on higher processor numbers for unknown reasons!
#ifdef CPP_ELPA_NEW
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
    CALL CPP_invert_trm(m,bsca,SIZE(bsca,1),nb,mycolssca,mpi_comm_rows,mpi_comm_cols,.false.,ok)
#else
    CALL CPP_CHOLESKY (m,bsca,SIZE(bsca,1),nb, mpi_comm_rows,mpi_comm_cols)
    CALL CPP_invert_trm(m, bsca, SIZE(bsca,1), nb, mpi_comm_rows, mpi_comm_cols)
#endif

    ! 2. Calculate U**-T * A * U**-1

    ! 2a. eigvec = U**-T * A

    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half
    CALL CPP_transpose (m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)
    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO

    CALL CPP_mult ('U', 'L',m, m,bsca,myrowssca,asca,SIZE(asca,1),nb, mpi_comm_rows, mpi_comm_cols,eigvec,myrowssca)

    ! 2b. tmp2 = eigvec**T
    CALL CPP_transpose(m,m,CPP_ONE,eigvec,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

    ! 2c. A =  U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
    CALL CPP_mult ('U', 'U', m, m, bsca, SIZE(bsca,1), tmp2,&
         SIZE(tmp2,1),nb, mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))

    ! A is only set in the upper half, solve_evp_real needs a full matrix
    ! Set lower half from upper half

    CALL CPP_transpose(m,m,CPP_ONE,asca,1,1,sc_desc, CPP_ZERO,eigvec,1,1,sc_desc)


    DO i=1,SIZE(asca,1)
       ! Get global column corresponding to i and number of local rows up to
       ! and including the diagonal, these are unchanged in A
       n_col = indxl2g(i,     nb, mycol, 0, npcol)
       n_row = numroc (n_col, nb, myrow, 0, nprow)
       asca(n_row+1:myrowssca,i) = eigvec(n_row+1:myrowssca,i)
    ENDDO

    ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
    !    Eigenvectors go to eigvec
    num2=num
#ifdef CPP_ELPA_NEW
#ifdef CPP_ELPA2
    err=CPP_solve_evp_2stage(m,num2,asca,SIZE(asca,1),&
         eig2,eigvec,SIZE(asca,1), nb,mycolssca, mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    err=CPP_solve_evp(m, num2,asca,SIZE(asca,1),&
         eig2,eigvec, SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols)
#endif
#else
#ifdef CPP_ELPA2
    CALL CPP_solve_evp_2stage(m,&
         num2,asca,SIZE(asca,1),eig2,eigvec,SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols,sub_comm)
#else
    CALL CPP_solve_evp(m, num2,&
         asca,SIZE(asca,1),eig2,eigvec, SIZE(asca,1), nb,mpi_comm_rows, mpi_comm_cols)
#endif
#endif

    ! 4. Backtransform eigenvectors: Z = U**-1 * eigvec

    ! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
    CALL CPP_transpose(m,m,CPP_ONE,bsca,1,1,sc_desc,CPP_ZERO,tmp2,1,1,sc_desc)

    CALL CPP_mult ('L', 'N',m, num2, tmp2, SIZE(asca,1),&
         eigvec, SIZE(asca,1),nb,mpi_comm_rows, mpi_comm_cols, asca, SIZE(asca,1))

    ! END of ELPA stuff
    CALL BLACS_GRIDEXIT(ictextblacs,ierr)
    CALL MPI_COMM_FREE(mpi_comm_rows,ierr)
    CALL MPI_COMM_FREE(mpi_comm_cols,ierr)
    !print *,"elpa done"

    !
    !     Put those eigenvalues expected by chani to eig, i.e. for
    !     process i these are eigenvalues i+1, np+i+1, 2*np+i+1...
    !     Only num=num2/np eigenvalues per process
    !
    num=FLOOR(REAL(num2)/np)
    IF (myid.LT.num2-(num2/np)*np) num=num+1
    k=1
    DO i=myid+1,num2,np
       eig(k)=eig2(i)
       k=k+1
    ENDDO

    !write(*,"(a,i5,99f10.4)") "ELPA:",myid,eig

    !
    !     Redistribute eigvec from ScaLAPACK distribution to each process
    !     having all eigenvectors corresponding to his eigenvalues as above
    !
    CALL subredist2(z,m,num2,asca,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb)
    !

    DEALLOCATE ( asca )
    DEALLOCATE ( bsca )
    DEALLOCATE (eigvec, tmp2, eig2)