hsmt_nonsph.F90 11.9 KB
Newer Older
Daniel Wortmann's avatar
Daniel Wortmann committed
1 2 3 4 5
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
6
MODULE m_hsmt_nonsph
Daniel Wortmann's avatar
Daniel Wortmann committed
7
  USE m_juDFT
8
  IMPLICIT NONE
Daniel Wortmann's avatar
Daniel Wortmann committed
9 10
  PRIVATE
  PUBLIC hsmt_nonsph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
11 12 13 14 15 16
  INTERFACE priv_noMPI
    module procedure priv_noMPI_cpu
#ifdef CPP_GPU
    module procedure priv_noMPI_gpu
#endif
  END INTERFACE
17
CONTAINS
Daniel Wortmann's avatar
Daniel Wortmann committed
18 19 20 21 22 23 24 25 26 27 28 29 30 31
  SUBROUTINE hsmt_nonsph(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
    USE m_types
    IMPLICIT NONE
    TYPE(t_mpi),INTENT(IN)        :: mpi
    TYPE(t_sym),INTENT(IN)        :: sym
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_cell),INTENT(IN)       :: cell
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_tlmplm),INTENT(IN)     :: td
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN)          :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(IN)            :: chi
    !     .. Array Arguments ..
32 33 34
#if defined CPP_GPU
    REAL,MANAGED,INTENT(IN)    :: fj(:,:,:),gj(:,:,:)
#else
35
    REAL,INTENT(IN)            :: fj(:,0:,:),gj(:,0:,:)
36
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
37
    CLASS(t_mat),INTENT(INOUT)     ::hmat
Uliana Alekseeva's avatar
Uliana Alekseeva committed
38 39
#if defined CPP_GPU
    COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:)
40
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
41 42
    CALL timestart("non-spherical setup")
    IF (mpi%n_size==1) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
43 44 45 46
#if defined CPP_GPU
    ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2)))
    h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp)) 

47
       CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj,gj,hmat)
48
#else
Daniel Wortmann's avatar
Daniel Wortmann committed
49
       CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
50
#endif
Daniel Wortmann's avatar
Daniel Wortmann committed
51 52 53 54 55
    ELSE
       CALL priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
    ENDIF
    CALL timestop("non-spherical setup")
  END SUBROUTINE hsmt_nonsph
56

Uliana Alekseeva's avatar
Uliana Alekseeva committed
57
#if defined CPP_GPU
58
  SUBROUTINE priv_noMPI_gpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,hmat)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
59 60
!Calculate overlap matrix, GPU version
!note that basically all matrices in the GPU version are conjugates of their cpu counterparts
Daniel Wortmann's avatar
Daniel Wortmann committed
61 62
    USE m_hsmt_ab
    USE m_constants, ONLY : fpi_const,tpi_const
63
    USE m_types
Daniel Wortmann's avatar
Daniel Wortmann committed
64
    USE m_ylm
65
  !   cublas: required to use generic BLAS interface
66 67 68
  !   cudafor: required to use CUDA runtime API routines 
  !   nvtx: profiling
    USE cublas   
69 70
    USE cudafor
    USE nvtx
71

72
    IMPLICIT NONE
Daniel Wortmann's avatar
Daniel Wortmann committed
73
    TYPE(t_mpi),INTENT(IN)      :: mpi
74
    TYPE(t_sym),INTENT(IN)      :: sym
Daniel Wortmann's avatar
Daniel Wortmann committed
75
    TYPE(t_noco),INTENT(IN)     :: noco
76 77
    TYPE(t_cell),INTENT(IN)     :: cell
    TYPE(t_atoms),INTENT(IN)    :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
78
    TYPE(t_lapw),INTENT(IN)     :: lapw
Uliana Alekseeva's avatar
Uliana Alekseeva committed
79
    COMPLEX, INTENT(IN),DEVICE  :: h_loc_dev(:,:)
80 81
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
82 83
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(in)   :: chi
84 85
    !     ..
    !     .. Array Arguments ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
86
    REAL,   INTENT(IN),   DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:)
87
    CLASS(t_mat),INTENT(INOUT)     ::hmat
88
    
Daniel Wortmann's avatar
Daniel Wortmann committed
89 90
    INTEGER:: nn,na,ab_size,l,ll,m
    real :: rchi
Uliana Alekseeva's avatar
Uliana Alekseeva committed
91
    COMPLEX,ALLOCATABLE,DEVICE :: ab1_dev(:,:), ab_dev(:,:), ab2_dev(:,:)
92 93
    integer :: i, j, istat
    call nvtxStartRange("hsmt_nonsph",1)    
94

95 96
    ALLOCATE(ab1_dev(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
    ALLOCATE(ab_dev(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
97
    IF (iintsp.NE.jintsp) ALLOCATE(ab2_dev(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
98

99 100 101 102 103 104 105 106
    IF (hmat%l_real) THEN
       IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
          DEALLOCATE(hmat%data_c)
          ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
       ENDIF
       hmat%data_c=0.0
    ENDIF

Daniel Wortmann's avatar
Daniel Wortmann committed
107 108 109 110
    DO nn = 1,atoms%neq(n)
       na = SUM(atoms%neq(:n-1))+nn
       IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
          rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
111 112

          CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj_dev,gj_dev,ab_dev,ab_size,.TRUE.)
113

Daniel Wortmann's avatar
Daniel Wortmann committed
114
          !Calculate Hamiltonian
115 116
          CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),&
                     h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab1_dev,SIZE(ab1_dev,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
117
          IF (iintsp==jintsp) THEN
118
             call nvtxStartRange("zherk",3)
119
             CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,ab1_dev,SIZE(ab1_dev,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
120
             istat = cudaDeviceSynchronize() 
121
             call nvtxEndRange()    
Daniel Wortmann's avatar
Daniel Wortmann committed
122 123
          ELSE  !here the l_ss off-diagonal part starts
             !Second set of ab is needed
124
             CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj_dev,gj_dev,ab_dev,ab_size,.TRUE.)
125
             CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),&
126
                        h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab2_dev,SIZE(ab2_dev,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
127
             !Multiply for Hamiltonian
Uliana Alekseeva's avatar
Uliana Alekseeva committed
128

129 130 131 132 133 134 135
             !$cuf kernel do<<<*,256>>>
             do i = 1,size(ab1_dev,2)
               do j = 1,size(ab1_dev,1)
                  ab1_dev(j,i) = conjg(ab1_dev(j,i))
               enddo
             enddo
             CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,ab2_dev,SIZE(ab2_dev,1),&
136
                        ab1_dev,SIZE(ab1_dev,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
137 138 139
          ENDIF
       ENDIF
    END DO
140

141 142 143
    IF (hmat%l_real) THEN
       hmat%data_r=hmat%data_r+REAL(hmat%data_c)
    ENDIF
144
    call nvtxEndRange
145
 END SUBROUTINE priv_noMPI_gpu
146
#endif
147

Uliana Alekseeva's avatar
Uliana Alekseeva committed
148
  SUBROUTINE priv_noMPI_cpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
!Calculate overlap matrix
    USE m_hsmt_ab
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE m_ylm

    IMPLICIT NONE
    TYPE(t_mpi),INTENT(IN)      :: mpi
    TYPE(t_sym),INTENT(IN)      :: sym
    TYPE(t_noco),INTENT(IN)     :: noco
    TYPE(t_cell),INTENT(IN)     :: cell
    TYPE(t_atoms),INTENT(IN)    :: atoms
    TYPE(t_lapw),INTENT(IN)     :: lapw
    TYPE(t_tlmplm),INTENT(IN)   :: td
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(in)   :: chi
    !     ..
    !     .. Array Arguments ..
    REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
    CLASS(t_mat),INTENT(INOUT)::hmat

    
    INTEGER:: nn,na,ab_size,l,ll,m
    COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab2(:,:)
    real :: rchi

    ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2),ab1(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))

    IF (iintsp.NE.jintsp) ALLOCATE(ab2(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))

    IF (hmat%l_real) THEN
       IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
          DEALLOCATE(hmat%data_c)
          ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
       ENDIF
       hmat%data_c=0.0
    ENDIF
    
    DO nn = 1,atoms%neq(n)
       na = SUM(atoms%neq(:n-1))+nn
       IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
          rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))

          CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
          !Calculate Hamiltonian
Uliana Alekseeva's avatar
Uliana Alekseeva committed
196 197
          CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),&
                     td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
198 199
          !ab1=MATMUL(ab(:lapw%nv(iintsp),:ab_size),td%h_loc(:ab_size,:ab_size,n,isp))
          IF (iintsp==jintsp) THEN
200 201 202 203
             IF (isp<3) THEN
                CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,CONJG(ab1),SIZE(ab1,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
             ELSE !This is the case of a local off-diagonal contribution.
                  !It is not Hermitian, so we need to USE zgemm CALL
204
                CALL hsmt_ab(sym,atoms,noco,isp,2,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
205 206 207
                CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,CONJG(ab),SIZE(ab,1),&
                     ab1,SIZE(ab1,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
             ENDIF
208 209 210
          ELSE  !here the l_ss off-diagonal part starts
             !Second set of ab is needed
             CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
211 212
             CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),&
                        td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab2,SIZE(ab2,1))
213
             !Multiply for Hamiltonian
Uliana Alekseeva's avatar
Uliana Alekseeva committed
214 215
             CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,conjg(ab2),SIZE(ab2,1),&
                        ab1,SIZE(ab1,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
216 217 218 219 220 221 222 223
          ENDIF
       ENDIF
    END DO
    
    IF (hmat%l_real) THEN
       hmat%data_r=hmat%data_r+REAL(hmat%data_c)
    ENDIF

Uliana Alekseeva's avatar
Uliana Alekseeva committed
224
 END SUBROUTINE priv_noMPI_cpu
225 226


Daniel Wortmann's avatar
Daniel Wortmann committed
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
  SUBROUTINE priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
!Calculate overlap matrix
    USE m_hsmt_ab
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE m_ylm
    IMPLICIT NONE
    TYPE(t_mpi),INTENT(IN)      :: mpi
    TYPE(t_sym),INTENT(IN)      :: sym
    TYPE(t_noco),INTENT(IN)     :: noco
    TYPE(t_cell),INTENT(IN)     :: cell
    TYPE(t_atoms),INTENT(IN)    :: atoms
    TYPE(t_lapw),INTENT(IN)     :: lapw
    TYPE(t_tlmplm),INTENT(IN)   :: td
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX,INTENT(in)   :: chi
    !     ..
    !     .. Array Arguments ..
    REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
    CLASS(t_mat),INTENT(INOUT)::hmat
249

Daniel Wortmann's avatar
Daniel Wortmann committed
250 251
    
    INTEGER:: nn,na,ab_size,l,ll,m,i,ii
252
    COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab_select(:,:)
Daniel Wortmann's avatar
Daniel Wortmann committed
253
    real :: rchi
254

255
    ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab1(lapw%nv(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab_select(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
256

257
    !IF (iintsp.NE.jintsp) ALLOCATE(ab_select1(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
258

Daniel Wortmann's avatar
Daniel Wortmann committed
259 260 261 262 263 264 265 266 267 268 269 270 271
    IF (hmat%l_real) THEN
       IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
          DEALLOCATE(hmat%data_c)
          ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
       ENDIF
       hmat%data_c=0.0
    ENDIF
    
    DO nn = 1,atoms%neq(n)
       na = SUM(atoms%neq(:n-1))+nn
       IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
          rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
          
272
          CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
Daniel Wortmann's avatar
Daniel Wortmann committed
273 274
          !Calculate Hamiltonian
        
275
          CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
276
          !Cut out of ab1 only the needed elements here
277
          ab_select=ab1(mpi%n_rank+1:lapw%nv(jintsp):mpi%n_size,:)
Daniel Wortmann's avatar
Daniel Wortmann committed
278
          IF (iintsp==jintsp) THEN
279
             CALL zgemm("N","T",lapw%nv(iintsp),lapw%num_local_cols(iintsp),ab_size,CMPLX(rchi,0.0),CONJG(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(iintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
280 281
          ELSE
             !Second set of ab is needed
282
             CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
283
             CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(:,:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
Daniel Wortmann's avatar
Daniel Wortmann committed
284
             !Multiply for Hamiltonian
285
             CALL zgemm("N","t",lapw%nv(iintsp),lapw%num_local_cols(jintsp),ab_size,chi,conjg(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(jintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))   
Daniel Wortmann's avatar
Daniel Wortmann committed
286 287 288 289 290 291 292 293 294 295
          ENDIF
       ENDIF
    END DO
    !delete lower part of matrix
    !i=0
    !DO ii=mpi%n_rank+1,lapw%nv(iintsp),mpi%n_size
    !   i=i+1
    !   hmat%data_c(ii+1:,i)=0.0
    !ENDDO
    IF (hmat%l_real) THEN
296
       hmat%data_r=hmat%data_r+hmat%data_c
Daniel Wortmann's avatar
Daniel Wortmann committed
297 298 299
    ENDIF
    
  END SUBROUTINE priv_MPI
300

Daniel Wortmann's avatar
Daniel Wortmann committed
301
  
302
END MODULE m_hsmt_nonsph