hsmt_sph.F90 12.8 KB
Newer Older
1
! -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
2 3 4 5 6
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
Matthias Redies's avatar
Matthias Redies committed
8 9
   USE m_juDFT
   IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10

Matthias Redies's avatar
Matthias Redies committed
11
   INTERFACE hsmt_sph
Daniel Wortmann's avatar
Daniel Wortmann committed
12 13 14
#ifdef _OPENACC
      module procedure hsmt_sph_acc
#else
Matthias Redies's avatar
Matthias Redies committed
15
      module procedure hsmt_sph_cpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
16
#endif
Matthias Redies's avatar
Matthias Redies committed
17
   END INTERFACE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
18

19
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
20

21
SUBROUTINE hsmt_sph_acc(n,atoms,fmpi,isp,input,nococonv,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fjgj,smat,hmat)
22 23
   USE m_constants, ONLY : fpi_const,tpi_const
   USE m_types
24
   USE m_hsmt_fjgj
Daniel Wortmann's avatar
Daniel Wortmann committed
25
#ifdef CPP_GPU
Daniel Wortmann's avatar
Daniel Wortmann committed
26
   USE nvtx
Daniel Wortmann's avatar
Daniel Wortmann committed
27
#endif
28 29
   IMPLICIT NONE
   TYPE(t_input),INTENT(IN)      :: input
30
   TYPE(t_mpi),INTENT(IN)        :: fmpi
31
   TYPE(t_nococonv),INTENT(IN)   :: nococonv
32 33 34
   TYPE(t_atoms),INTENT(IN)      :: atoms
   TYPE(t_lapw),INTENT(IN)       :: lapw
   TYPE(t_usdus),INTENT(IN)      :: usdus
35
   TYPE(t_fjgj),INTENT(IN)       :: fjgj
36
   CLASS(t_mat),INTENT(INOUT)    :: smat,hmat
Daniel Wortmann's avatar
Daniel Wortmann committed
37 38 39 40 41 42 43 44 45
    !     ..
   !     .. Scalar Arguments ..
   INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
   COMPLEX, INTENT(IN)  :: chi
   !     ..
   !     .. Array Arguments ..
   REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
   REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)

46
   !     ..
Daniel Wortmann's avatar
Daniel Wortmann committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
   !     .. Local Scalars ..
   REAL tnn(3), elall,fjkiln,gjkiln,ddnln,ski(3)
   REAL apw_lo1,apw_lo2,w1

   INTEGER kii,ki,kj,l,nn,kj_end,l3,jv,kj_off,kj_vec,l31

   !     ..
   !     .. Local Arrays ..
   REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
   REAL qssbti(3),qssbtj(3)
   REAL :: plegend(0:2)
   REAL :: xlegend
   REAL :: VecHelpS,VecHelpH
   REAL :: cph_re, cph_im
   REAL :: dot, fct, fct2

   CALL timestart("spherical setup")
   !call nvtxStartRange("hsmt_sph",1)
   DO l = 0,atoms%lmaxd
      fleg1(l) = REAL(l+l+1)/REAL(l+1)
      fleg2(l) = REAL(l)/REAL(l+1)
      fl2p1(l) = REAL(l+l+1)/fpi_const
   END DO ! l
   qssbti=MERGE(- nococonv%qss/2,+ nococonv%qss/2,jintsp.EQ.1)
   qssbtj=MERGE(- nococonv%qss/2,+ nococonv%qss/2,iintsp.EQ.1)
   !$acc  data &
Daniel Wortmann's avatar
Daniel Wortmann committed
73
   !$acc&   copyin(jintsp,iintsp,n,fleg1,fleg2,isp,fl2p1,el,e_shift,chi,qssbti,qssbtj)&
74
   !$acc&   copyin(lapw,atoms,fmpi,input,usdus)&
Daniel Wortmann's avatar
Daniel Wortmann committed
75 76
   !$acc&   copyin(lapw%nv,lapw%gvec,lapw%gk)&
   !$acc&   copyin(atoms%lmax,atoms%rmt,atoms%lnonsph,atoms%neq,atoms%taual)&
77
   !$acc&   copyin(fmpi%n_size,fmpi%n_rank)&
Daniel Wortmann's avatar
Daniel Wortmann committed
78 79
   !$acc&   copyin(input%l_useapw)&
   !$acc&   copyin(usdus%dus,usdus%uds,usdus%us,usdus%ddn,usdus%duds)&
Daniel Wortmann's avatar
Daniel Wortmann committed
80
   !$acc&   present(fjgj,fjgj%fj,fjgj%gj)&
Daniel Wortmann's avatar
Daniel Wortmann committed
81 82
   !$acc&   present(hmat,smat,hmat%data_c,hmat%data_r,smat%data_r,smat%data_c)

Daniel Wortmann's avatar
Daniel Wortmann committed
83 84
   !$acc parallel
   !$acc loop gang
85
   DO  ki =  fmpi%n_rank+1, lapw%nv(jintsp), fmpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
86 87 88
      !$acc loop  vector independent&
      !$acc &    PRIVATE(ski,plegend,tnn,vechelps,vechelph,xlegend,fjkiln,gjkiln,ddnln,elall,l3,l,fct,fct2,cph_re,cph_im,dot)
      DO  kj = 1, min(ki,lapw%nv(iintsp))
89
         kii=(ki-1)/fmpi%n_size+1
Daniel Wortmann's avatar
Daniel Wortmann committed
90 91
         ski = lapw%gvec(:,ki,jintsp) + qssbti(:)

Daniel Wortmann's avatar
Daniel Wortmann committed
92 93 94
         !--->          update overlap and l-diagonal hamiltonian matrix
         VecHelpS = 0.0
         VecHelpH = 0.0
Daniel Wortmann's avatar
Daniel Wortmann committed
95

Daniel Wortmann's avatar
Daniel Wortmann committed
96 97 98 99
         !--->       x for legendre polynomials
         xlegend =dot_product(lapw%gk(1:3,kj,iintsp),lapw%gk(1:3,ki,jintsp))
         !$acc loop seq
         DO  l = 0,atoms%lmax(n)
100 101
            fjkiln = fjgj%fj(ki,l,isp,jintsp)
            gjkiln = fjgj%gj(ki,l,isp,jintsp)
Daniel Wortmann's avatar
Daniel Wortmann committed
102 103 104 105 106 107 108 109
            IF (input%l_useapw) THEN
               w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
               apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 + fjkiln * usdus%us(l,n,isp)  * usdus%dus(l,n,isp) )
               apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 + gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
            ENDIF ! useapw
            ddnln = usdus%ddn(l,n,isp)
            elall = el(l,n,isp)
            IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
Daniel Wortmann's avatar
Daniel Wortmann committed
110

Daniel Wortmann's avatar
Daniel Wortmann committed
111 112 113 114 115 116 117 118 119 120
            !--->       legendre polynomials
            l3 = modulo(l, 3)
            IF (l == 0) THEN
               plegend(0) = 1.0
            ELSE IF (l == 1) THEN
               plegend(1) = xlegend
            ELSE
               plegend(l3) = fleg1(l-1)*xlegend*plegend(modulo(l-1,3)) - fleg2(l-1)*plegend(modulo(l-2,3))
            END IF ! l

121 122
            fct  = plegend(l3)*fl2p1(l)       * ( fjkiln*fjgj%fj(kj,l,isp,iintsp) + gjkiln*fjgj%gj(l,kj,isp,iintsp)*ddnln )
            fct2 = plegend(l3)*fl2p1(l) * 0.5 * ( gjkiln*fjgj%fj(kj,l,isp,iintsp) + fjkiln*fjgj%gj(l,kj,isp,iintsp) )
Daniel Wortmann's avatar
Daniel Wortmann committed
123 124 125 126 127

            VecHelpS = VecHelpS + fct
            VecHelpH = VecHelpH + fct*elall + fct2

            IF (input%l_useapw) THEN
128
               VecHelpH = VecHelpH + plegend(l3) * ( apw_lo1*fjgj%fj(kj,l,isp,iintsp) + apw_lo2*fjgj%gj(l,kj,isp,iintsp) )
Daniel Wortmann's avatar
Daniel Wortmann committed
129 130 131 132 133 134 135 136 137 138
            ENDIF ! useapw

            !--->          end loop over l
         ENDDO ! l
         !$end acc
         !--->             set up phase factors
         cph_re = 0.0
         cph_im = 0.0
         DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
            tnn(1:3) = tpi_const*atoms%taual(1:3,nn)
Daniel Wortmann's avatar
Daniel Wortmann committed
139

Daniel Wortmann's avatar
Daniel Wortmann committed
140
            dot = DOT_PRODUCT(ski(1:3) - lapw%gvec(1:3,kj,iintsp) - qssbtj(1:3), tnn(1:3))
Daniel Wortmann's avatar
Daniel Wortmann committed
141

Daniel Wortmann's avatar
Daniel Wortmann committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
            cph_re = cph_re + COS(dot)
            cph_im = cph_im - SIN(dot)
            ! IF (iintsp.NE.jintsp) cph_im=-cph_im
         END DO ! nn

         IF (smat%l_real) THEN
            smat%data_r(kj,kii) = &
            smat%data_r(kj,kii) + cph_re * VecHelpS
            hmat%data_r(kj,kii) = &
            hmat%data_r(kj,kii) + cph_re * VecHelpH
         ELSE  ! real
            smat%data_c(kj,kii) = &
            smat%data_c(kj,kii) + chi*cmplx(cph_re,cph_im) * VecHelpS
            hmat%data_c(kj,kii) = &
            hmat%data_c(kj,kii) + chi*cmplx(cph_re,cph_im) * VecHelpH
         ENDIF ! real
Daniel Wortmann's avatar
Daniel Wortmann committed
158

Daniel Wortmann's avatar
Daniel Wortmann committed
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

      END DO ! kj_off
      !$acc end loop
      !--->    end loop over ki
   ENDDO

   !$acc end loop
   !$acc end parallel
   !$acc end data
   !$acc wait
   CALL timestop("spherical setup")
   !call nvtxEndRange()
   RETURN
END SUBROUTINE hsmt_sph_acc

174
SUBROUTINE hsmt_sph_cpu(n,atoms,fmpi,isp,input,nococonv,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fjgj,smat,hmat)
Daniel Wortmann's avatar
Daniel Wortmann committed
175 176 177 178
   USE m_constants, ONLY : fpi_const,tpi_const
   USE m_types
   USE m_hsmt_fjgj
#ifdef CPP_GPU
Daniel Wortmann's avatar
Daniel Wortmann committed
179
   USE nvtx
Daniel Wortmann's avatar
Daniel Wortmann committed
180 181 182
#endif
   IMPLICIT NONE
   TYPE(t_input),INTENT(IN)      :: input
183
   TYPE(t_mpi),INTENT(IN)        :: fmpi
Daniel Wortmann's avatar
Daniel Wortmann committed
184 185 186 187 188 189 190
   TYPE(t_nococonv),INTENT(IN)   :: nococonv
   TYPE(t_atoms),INTENT(IN)      :: atoms
   TYPE(t_lapw),INTENT(IN)       :: lapw
   TYPE(t_usdus),INTENT(IN)      :: usdus
   TYPE(t_fjgj),INTENT(IN)       :: fjgj
   CLASS(t_mat),INTENT(INOUT)    :: smat,hmat
    !     ..
191 192 193 194 195 196 197
   !     .. Scalar Arguments ..
   INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
   COMPLEX, INTENT(IN)  :: chi
   !     ..
   !     .. Array Arguments ..
   REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
   REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
198

199 200 201 202 203
   !     ..
   !     .. Local Scalars ..
   REAL tnn(3), elall,fjkiln,gjkiln,ddnln,ski(3)
   REAL apw_lo1,apw_lo2,w1

Daniel Wortmann's avatar
Daniel Wortmann committed
204
   INTEGER kii,ki,kj,l,nn,kj_end,l3,jv,kj_off,kj_vec,l31
205 206 207 208 209 210 211 212 213 214

   !     ..
   !     .. Local Arrays ..
   REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
   REAL qssbti(3),qssbtj(3)
   REAL, ALLOCATABLE :: plegend(:,:)
   REAL, ALLOCATABLE :: xlegend(:)
   REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
   REAL, ALLOCATABLE :: cph_re(:), cph_im(:)
   REAL, ALLOCATABLE :: dot(:), fct(:), fct2(:)
215
   INTEGER, PARAMETER :: NVEC = 128
216 217 218
   INTEGER :: NVEC_rem  !remainder

   CALL timestart("spherical setup")
Daniel Wortmann's avatar
Daniel Wortmann committed
219
   !call nvtxStartRange("hsmt_sph",1)
220 221 222 223 224
   DO l = 0,atoms%lmaxd
      fleg1(l) = REAL(l+l+1)/REAL(l+1)
      fleg2(l) = REAL(l)/REAL(l+1)
      fl2p1(l) = REAL(l+l+1)/fpi_const
   END DO ! l
225
  !$OMP     PARALLEL DEFAULT(NONE)&
226
  !$OMP     SHARED(lapw,atoms,nococonv,fmpi,input,usdus,smat,hmat)&
227 228 229 230 231
  !$OMP     SHARED(jintsp,iintsp,n,fleg1,fleg2,fjgj,isp,fl2p1,el,e_shift,chi)&
  !$OMP     PRIVATE(kii,ki,ski,kj,kj_off,kj_vec,plegend,xlegend,l,l3,kj_end,qssbti,qssbtj,fct2)&
  !$OMP     PRIVATE(cph_re,cph_im,dot,nn,tnn,fjkiln,gjkiln)&
  !$OMP     PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct)&
  !$OMP     PRIVATE(VecHelpS,VecHelpH,NVEC_rem)
232 233 234 235 236
   ALLOCATE(cph_re(NVEC),cph_im(NVEC))
   ALLOCATE(dot(NVEC),fct(NVEC),fct2(NVEC))
   ALLOCATE(plegend(NVEC,0:2))
   ALLOCATE(xlegend(NVEC))
   ALLOCATE(VecHelpS(NVEC),VecHelpH(NVEC))
237 238
   qssbti=MERGE(- nococonv%qss/2,+ nococonv%qss/2,jintsp.EQ.1)
   qssbtj=MERGE(- nococonv%qss/2,+ nococonv%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
239
   !$OMP      DO SCHEDULE(DYNAMIC,1)
240
   DO  ki =  fmpi%n_rank+1, lapw%nv(jintsp), fmpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
241
      kj_end=min(ki,lapw%nv(iintsp))
242
      kii=(ki-1)/fmpi%n_size+1
Daniel Wortmann's avatar
Daniel Wortmann committed
243
      ski = lapw%gvec(:,ki,jintsp) + qssbti(:)
Daniel Wortmann's avatar
Daniel Wortmann committed
244
      DO  kj_off = 1, lapw%nv(iintsp), NVEC
Daniel Wortmann's avatar
Daniel Wortmann committed
245
         NVEC_rem = NVEC
246 247
         kj_vec = kj_off - 1 + NVEC
         IF (kj_vec > kj_end) THEN
Daniel Wortmann's avatar
Daniel Wortmann committed
248 249
            kj_vec = kj_end
            NVEC_rem = kj_end - kj_off + 1
250
         ENDIF
Daniel Wortmann's avatar
Daniel Wortmann committed
251
         if (NVEC_rem<0 ) exit
252
         !--->          update overlap and l-diagonal hamiltonian matrix
253 254
         VecHelpS = 0.0
         VecHelpH = 0.0
255

256 257 258
         !--->       x for legendre polynomials
         DO jv = 0, NVEC_rem-1
            kj = jv + kj_off
Daniel Wortmann's avatar
Daniel Wortmann committed
259
            xlegend(jv+1) =dot_product(lapw%gk(1:3,kj,iintsp),lapw%gk(1:3,ki,jintsp))
260 261
         END DO ! kj
         DO  l = 0,atoms%lmax(n)
262

263 264
            fjkiln = fjgj%fj(ki,l,isp,jintsp)
            gjkiln = fjgj%gj(ki,l,isp,jintsp)
265 266 267 268 269 270 271 272 273 274

            IF (input%l_useapw) THEN
               w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
               apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 + fjkiln * usdus%us(l,n,isp)  * usdus%dus(l,n,isp) )
               apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 + gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
            ENDIF ! useapw

            ddnln = usdus%ddn(l,n,isp)
            elall = el(l,n,isp)
            IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
Daniel Wortmann's avatar
Daniel Wortmann committed
275

276 277 278
            !--->       legendre polynomials
            l3 = modulo(l, 3)
            IF (l == 0) THEN
Daniel Wortmann's avatar
Daniel Wortmann committed
279
               plegend(:NVEC_REM,0) = 1.0
280
            ELSE IF (l == 1) THEN
281
               plegend(:NVEC_REM,1) = xlegend(:NVEC_REM)
282
            ELSE
283
               plegend(:NVEC_REM,l3) = fleg1(l-1)*xlegend(:NVEC_REM)*plegend(:NVEC_REM,modulo(l-1,3)) - fleg2(l-1)*plegend(:NVEC_REM,modulo(l-2,3))
284 285
            END IF ! l

286 287
            fct(:NVEC_REM)  = plegend(:NVEC_REM,l3)*fl2p1(l)       * ( fjkiln*fjgj%fj(kj_off:kj_vec,l,isp,iintsp) + gjkiln*fjgj%gj(kj_off:kj_vec,l,isp,iintsp)*ddnln )
            fct2(:NVEC_REM) = plegend(:NVEC_REM,l3)*fl2p1(l) * 0.5 * ( gjkiln*fjgj%fj(kj_off:kj_vec,l,isp,iintsp) + fjkiln*fjgj%gj(kj_off:kj_vec,l,isp,iintsp) )
288 289 290

            VecHelpS(:NVEC_REM) = VecHelpS(:NVEC_REM) + fct(:NVEC_REM)
            VecHelpH(:NVEC_REM) = VecHelpH(:NVEC_REM) + fct(:NVEC_REM)*elall + fct2(:NVEC_REM)
291 292

            IF (input%l_useapw) THEN
293
               VecHelpH(:NVEC_REM) = VecHelpH(:NVEC_REM) + plegend(:NVEC_REM,l3) * ( apw_lo1*fjgj%fj(kj_off:kj_vec,l,isp,iintsp) + apw_lo2*fjgj%gj(kj_off:kj_vec,l,isp,iintsp) )
294 295 296 297 298 299 300 301 302 303 304 305 306
            ENDIF ! useapw

            !--->          end loop over l
         ENDDO ! l
         !--->             set up phase factors
         cph_re = 0.0
         cph_im = 0.0
         DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
            tnn(1:3) = tpi_const*atoms%taual(1:3,nn)
            DO jv = 0, NVEC_rem-1
               kj = jv + kj_off
               dot(jv+1) = DOT_PRODUCT(ski(1:3) - lapw%gvec(1:3,kj,iintsp) - qssbtj(1:3), tnn(1:3))
            END DO ! kj
307 308
            cph_re(:NVEC_REM) = cph_re(:NVEC_REM) + COS(dot(:NVEC_REM))
            cph_im(:NVEC_REM) = cph_im(:NVEC_REM) - SIN(dot(:NVEC_REM))
309 310
            ! IF (iintsp.NE.jintsp) cph_im=-cph_im
         END DO ! nn
311

312 313
         IF (smat%l_real) THEN
            smat%data_r(kj_off:kj_vec,kii) = &
314
            smat%data_r(kj_off:kj_vec,kii) + cph_re(:NVEC_REM) * VecHelpS(:NVEC_REM)
315
            hmat%data_r(kj_off:kj_vec,kii) = &
316
            hmat%data_r(kj_off:kj_vec,kii) + cph_re(:NVEC_REM) * VecHelpH(:NVEC_REM)
317 318
         ELSE  ! real
            smat%data_c(kj_off:kj_vec,kii) = &
319
            smat%data_c(kj_off:kj_vec,kii) + chi*cmplx(cph_re(:NVEC_REM),cph_im(:NVEC_REM)) * VecHelpS(:NVEC_REM)
320
            hmat%data_c(kj_off:kj_vec,kii) = &
321
            hmat%data_c(kj_off:kj_vec,kii) + chi*cmplx(cph_re(:NVEC_REM),cph_im(:NVEC_REM)) * VecHelpH(:NVEC_REM)
322
         ENDIF ! real
Daniel Wortmann's avatar
Daniel Wortmann committed
323

324 325 326 327

      END DO ! kj_off
      !--->    end loop over ki
   ENDDO
328
   !$OMP     END DO
329 330
   DEALLOCATE(plegend)
   DEALLOCATE(VecHelpS,VecHelpH)
331
   !$OMP     END PARALLEL
332 333 334 335 336
   CALL timestop("spherical setup")
   RETURN
END SUBROUTINE hsmt_sph_cpu


Uliana Alekseeva's avatar
Uliana Alekseeva committed
337

338
END MODULE m_hsmt_sph