hsmt_nonsph.F90 8.95 KB
Newer Older
Daniel Wortmann's avatar
Daniel Wortmann committed
1 2 3 4 5
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
6
MODULE m_hsmt_nonsph
Daniel Wortmann's avatar
Daniel Wortmann committed
7
  USE m_juDFT
8
  IMPLICIT NONE
Daniel Wortmann's avatar
Daniel Wortmann committed
9 10
  PRIVATE
  PUBLIC hsmt_nonsph
11
CONTAINS
Daniel Wortmann's avatar
Daniel Wortmann committed
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
  SUBROUTINE hsmt_nonsph(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
    USE m_types
    IMPLICIT NONE
    TYPE(t_mpi),INTENT(IN)        :: mpi
    TYPE(t_sym),INTENT(IN)        :: sym
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_cell),INTENT(IN)       :: cell
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_tlmplm),INTENT(IN)     :: td
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN)          :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(IN)            :: chi
    !     .. Array Arguments ..
    REAL,INTENT(IN)               :: fj(:,0:,:),gj(:,0:,:)
    CLASS(t_mat),INTENT(INOUT)     ::hmat
    CALL timestart("non-spherical setup")
    IF (mpi%n_size==1) THEN
       CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
    ELSE
       CALL priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
    ENDIF
    CALL timestop("non-spherical setup")
  END SUBROUTINE hsmt_nonsph
36

Daniel Wortmann's avatar
Daniel Wortmann committed
37 38 39 40
  SUBROUTINE priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
!Calculate overlap matrix
    USE m_hsmt_ab
    USE m_constants, ONLY : fpi_const,tpi_const
41
    USE m_types
Daniel Wortmann's avatar
Daniel Wortmann committed
42
    USE m_ylm
43 44 45 46 47 48 49 50
#if defined (_CUDA)
  !   cublas: required to use generic BLAS interface
  !   cudafor: required to use CUDA runtime API routines (e.g.
  !   cudaDeviceSynchronize)
    USE cublas
    USE cudafor
    USE nvtx
#endif
51
    IMPLICIT NONE
Daniel Wortmann's avatar
Daniel Wortmann committed
52
    TYPE(t_mpi),INTENT(IN)      :: mpi
53
    TYPE(t_sym),INTENT(IN)      :: sym
Daniel Wortmann's avatar
Daniel Wortmann committed
54
    TYPE(t_noco),INTENT(IN)     :: noco
55 56
    TYPE(t_cell),INTENT(IN)     :: cell
    TYPE(t_atoms),INTENT(IN)    :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
57 58
    TYPE(t_lapw),INTENT(IN)     :: lapw
    TYPE(t_tlmplm),INTENT(IN)   :: td
59 60
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
61 62
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(in)   :: chi
63 64
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
65 66
    REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
    CLASS(t_mat),INTENT(INOUT)::hmat
67 68

    
Daniel Wortmann's avatar
Daniel Wortmann committed
69 70 71
    INTEGER:: nn,na,ab_size,l,ll,m
    COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab2(:,:)
    real :: rchi
72 73 74 75 76 77 78
#ifdef _CUDA
    COMPLEX,ALLOCATABLE,DEVICE :: c_dev(:,:), ab1_dev(:,:), ab_dev(:,:)
    COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:)
    integer :: i, j, istat
    call nvtxStartRange("hsmt_nonsph",1)    
    print*, "running CUDA version"
#endif
79

80 81 82 83 84 85 86 87 88
    ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2),ab1(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
#ifdef _CUDA
    ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2)))
    ALLOCATE(ab1_dev(size(ab1,1),size(ab1,2)))
    ALLOCATE(ab_dev(size(ab,1),size(ab,2)))
    h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp)) !WORKAROUND, var_dev=CONJG(var_dev) does not work (pgi18.4)
    !note that basically all matrices in the GPU version are conjugates of their
    !cpu counterparts
#endif
89

90
    IF (iintsp.NE.jintsp) ALLOCATE(ab2(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
91

Daniel Wortmann's avatar
Daniel Wortmann committed
92 93 94 95
    IF (hmat%l_real) THEN
       IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
          DEALLOCATE(hmat%data_c)
          ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
96
       ENDIF
Daniel Wortmann's avatar
Daniel Wortmann committed
97 98
       hmat%data_c=0.0
    ENDIF
99 100 101 102
#ifdef _CUDA
    ALLOCATE(c_dev(SIZE(hmat%data_c,1),SIZE(hmat%data_c,2)))
    c_dev = hmat%data_c
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
103 104 105 106 107 108
    
    DO nn = 1,atoms%neq(n)
       na = SUM(atoms%neq(:n-1))+nn
       IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
          rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
          
109 110 111 112 113 114
!#ifdef _CUDA
          !CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab_dev,ab_size,.TRUE.)
          !   istat = cudaDeviceSynchronize() 
!#else
          CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
!#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
115
          !Calculate Hamiltonian
116 117 118 119 120 121
#ifdef _CUDA
          ab_dev = CONJG(ab)
          CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab1_dev,SIZE(ab1_dev,1))
#else
          CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
122 123
          !ab1=MATMUL(ab(:lapw%nv(iintsp),:ab_size),td%h_loc(:ab_size,:ab_size,n,isp))
          IF (iintsp==jintsp) THEN
124 125 126 127 128 129
#ifdef _CUDA
    call nvtxStartRange("zherk",3)    
             CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,ab1_dev,SIZE(ab1_dev,1),1.0,c_dev,SIZE(c_dev,1))
             istat = cudaDeviceSynchronize() 
    call nvtxEndRange()    
#else
Daniel Wortmann's avatar
Daniel Wortmann committed
130
             CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,CONJG(ab1),SIZE(ab1,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
131
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
132 133
          ELSE  !here the l_ss off-diagonal part starts
             !Second set of ab is needed
134 135
             CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
             CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(:,:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab2,SIZE(ab2,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
136
             !Multiply for Hamiltonian
137
             CALL zgemm("N","T",lapw%nv(jintsp),lapw%nv(iintsp),ab_size,chi,conjg(ab2),SIZE(ab2,1),ab1,SIZE(ab1,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
138 139 140
          ENDIF
       ENDIF
    END DO
141 142 143
#ifdef _CUDA
    hmat%data_c = c_dev
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
144 145 146 147
    
    IF (hmat%l_real) THEN
       hmat%data_r=hmat%data_r+REAL(hmat%data_c)
    ENDIF
148 149 150 151 152

#ifdef _CUDA
    call nvtxEndRange
#endif
 END SUBROUTINE priv_noMPI
153 154


Daniel Wortmann's avatar
Daniel Wortmann committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
  SUBROUTINE priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
!Calculate overlap matrix
    USE m_hsmt_ab
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE m_ylm
    IMPLICIT NONE
    TYPE(t_mpi),INTENT(IN)      :: mpi
    TYPE(t_sym),INTENT(IN)      :: sym
    TYPE(t_noco),INTENT(IN)     :: noco
    TYPE(t_cell),INTENT(IN)     :: cell
    TYPE(t_atoms),INTENT(IN)    :: atoms
    TYPE(t_lapw),INTENT(IN)     :: lapw
    TYPE(t_tlmplm),INTENT(IN)   :: td
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(in)   :: chi
    !     ..
    !     .. Array Arguments ..
    REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
    CLASS(t_mat),INTENT(INOUT)::hmat
177

Daniel Wortmann's avatar
Daniel Wortmann committed
178 179
    
    INTEGER:: nn,na,ab_size,l,ll,m,i,ii
180
    COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab_select(:,:)
Daniel Wortmann's avatar
Daniel Wortmann committed
181
    real :: rchi
182

183
    ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab1(lapw%nv(iintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab_select(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
184

185
    !IF (iintsp.NE.jintsp) ALLOCATE(ab_select1(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
186

Daniel Wortmann's avatar
Daniel Wortmann committed
187 188 189 190 191 192 193 194 195 196 197 198 199
    IF (hmat%l_real) THEN
       IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
          DEALLOCATE(hmat%data_c)
          ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
       ENDIF
       hmat%data_c=0.0
    ENDIF
    
    DO nn = 1,atoms%neq(n)
       na = SUM(atoms%neq(:n-1))+nn
       IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
          rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
          
200
          CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
Daniel Wortmann's avatar
Daniel Wortmann committed
201 202
          !Calculate Hamiltonian
        
203
          CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
204
          !Cut out of ab1 only the needed elements here
205
          ab_select=ab1(mpi%n_rank+1:lapw%nv(iintsp):mpi%n_size,:)
Daniel Wortmann's avatar
Daniel Wortmann committed
206
          IF (iintsp==jintsp) THEN
207
             CALL zgemm("N","T",lapw%nv(iintsp),lapw%num_local_cols(iintsp),ab_size,CMPLX(rchi,0.0),CONJG(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(iintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
208 209
          ELSE
             !Second set of ab is needed
210
             CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
211
             CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(:,:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
212
             !Multiply for Hamiltonian
213
             CALL zgemm("N","t",lapw%nv(iintsp),lapw%num_local_cols(iintsp),ab_size,chi,conjg(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(iintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))   
Daniel Wortmann's avatar
Daniel Wortmann committed
214 215 216 217 218 219 220 221 222 223
          ENDIF
       ENDIF
    END DO
    !delete lower part of matrix
    !i=0
    !DO ii=mpi%n_rank+1,lapw%nv(iintsp),mpi%n_size
    !   i=i+1
    !   hmat%data_c(ii+1:,i)=0.0
    !ENDDO
    IF (hmat%l_real) THEN
224
       hmat%data_r=hmat%data_r+hmat%data_c
Daniel Wortmann's avatar
Daniel Wortmann committed
225 226 227
    ENDIF
    
  END SUBROUTINE priv_MPI
228

Daniel Wortmann's avatar
Daniel Wortmann committed
229
  
230
END MODULE m_hsmt_nonsph