hsmt_sph.F90 19.6 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
Matthias Redies's avatar
Matthias Redies committed
8 9
   USE m_juDFT
   IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10

Matthias Redies's avatar
Matthias Redies committed
11 12
   INTERFACE hsmt_sph
      module procedure hsmt_sph_cpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
13
#ifdef CPP_GPU
Matthias Redies's avatar
Matthias Redies committed
14
      module procedure hsmt_sph_gpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
15
#endif
Matthias Redies's avatar
Matthias Redies committed
16
   END INTERFACE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
17

18
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
19 20

#ifdef CPP_GPU
Matthias Redies's avatar
Matthias Redies committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
   ATTRIBUTES(global)&
      SUBROUTINE HsmtSphGpuKernel_real(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                                       lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                                       smat_data,hmat_data,&
                                       uds,dus,us,duds,rmt)
   INTEGER, VALUE, INTENT(IN) :: loop_size
   INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
   REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
   INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
   REAL,INTENT(IN)    :: gk(:,:,:)
   REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)
   REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
   REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
   REAL,INTENT(IN)    :: taual(:,:)
   REAL,INTENT(IN)    :: ddn(0:lmaxd)
   REAL,INTENT(IN)    :: el(0:lmaxd)
   REAL,VALUE,INTENT(IN)    :: e_shift
   REAL,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
   !+APW
   REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd)
   REAL,INTENT(IN),OPTIONAL   :: rmt
   !-APW
Uliana Alekseeva's avatar
Uliana Alekseeva committed
43

Matthias Redies's avatar
Matthias Redies committed
44 45 46 47 48 49 50
   REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
   REAL, ALLOCATABLE :: plegend(:)
   REAL cph
   REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
   REAL apw_lo1,apw_lo2,apw1,w1
   INTEGER kii,ki,kj,l,nn,k
   INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
51

Matthias Redies's avatar
Matthias Redies committed
52 53 54
   ALLOCATE(plegend(0:lmaxd))
   plegend=0.0
   plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
55

Matthias Redies's avatar
Matthias Redies committed
56
   k = (blockidx%x-1)*blockdim%x + threadidx%x
Uliana Alekseeva's avatar
Uliana Alekseeva committed
57

Matthias Redies's avatar
Matthias Redies committed
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
   !TODO!!!
   !for seq, i.e. ki_start = 1, ki_step = 1
   loop_start = (k-1) * loop_size + 1
   loop_end = loop_start + loop_size - 1
   if (loop_end > ki_end ) loop_end = ki_end

   DO ki = loop_start,loop_end,ki_step
      !DO  ki =  ki_start,ki_end,ki_step
      DO kj = 1,ki
         kii=(ki-1)/ki_step+1
         ski = gvec(:,ki,jintsp) + qssbti
         !--->             set up phase factors
         cph = 0.0
         DO nn = nn_start,nn_end
            tnn = tpi_const*taual(:,nn)
            cph = cph + COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
         ENDDO
         !--->       legendre polynomials
         plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
         DO l = 1,lmax - 1
            plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
         END DO
         DO  l = 0,lmax
            fjkiln = fj(ki,l,jintsp)
            gjkiln = gj(ki,l,jintsp)
            !+APW
            IF (PRESENT(uds)) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
86 87
               w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
               apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
Matthias Redies's avatar
Matthias Redies committed
88
                                                    fjkiln * us(l) * dus(l) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
89
               apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
Matthias Redies's avatar
Matthias Redies committed
90 91 92 93 94 95 96 97 98 99
                                                    gjkiln * uds(l) * duds(l) )
            ENDIF
            !-APW
            ddnln =  ddn(l)
            elall = el(l)
            IF (l<=lnonsph) elall=elall-e_shift!(isp)
            !DO kj = 1,ki
            fct  = plegend(l)*fl2p1(l)*&
                   ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
            fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
100

Matthias Redies's avatar
Matthias Redies committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
            smat_data(kj,kii)=smat_data(kj,kii)+cph*fct
            hmat_data(kj,kii)=hmat_data(kj,kii) + cph * ( fct * elall + fct2)
            !+APW
            IF (PRESENT(uds)) THEN
               apw1 = cph * plegend(l)  * &
                      ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
               hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
            ENDIF
            !-APW
            !ENDDO
            !--->          end loop over l
         ENDDO
      ENDDO
      !--->    end loop over ki
   ENDDO
   DEALLOCATE(plegend)
END SUBROUTINE HsmtSphGpuKernel_real
Uliana Alekseeva's avatar
Uliana Alekseeva committed
118

Matthias Redies's avatar
Matthias Redies committed
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
ATTRIBUTES(global)&
   SUBROUTINE HsmtSphGpuKernel_cmplx(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                                     lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                                     smat_data,hmat_data,&
                                     uds,dus,us,duds,rmt)
INTEGER, VALUE, INTENT(IN) :: loop_size
INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
COMPLEX, VALUE, INTENT(IN)  :: chi
REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
REAL,INTENT(IN)    :: gk(:,:,:)
REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)
REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
REAL,INTENT(IN)    :: taual(:,:)
REAL,INTENT(IN)    :: ddn(0:lmaxd)
REAL,INTENT(IN)    :: el(0:lmaxd)
REAL, VALUE,INTENT(IN)    :: e_shift
COMPLEX,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
!+APW
REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd)
REAL,INTENT(IN),OPTIONAL   :: rmt
!-APW
Uliana Alekseeva's avatar
Uliana Alekseeva committed
142

Matthias Redies's avatar
Matthias Redies committed
143 144 145 146 147 148 149 150
REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
REAL, ALLOCATABLE :: plegend(:)
COMPLEX :: cph
REAL apw_lo1,apw_lo2,w1
COMPLEX capw1
REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
INTEGER kii,ki,kj,kjj,l,nn,kj_end,k
INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
151

Matthias Redies's avatar
Matthias Redies committed
152 153 154
ALLOCATE(plegend(0:lmaxd))
plegend=0.0
plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
155

Matthias Redies's avatar
Matthias Redies committed
156
k = (blockidx%x-1)*blockdim%x + threadidx%x
Uliana Alekseeva's avatar
Uliana Alekseeva committed
157

Matthias Redies's avatar
Matthias Redies committed
158 159 160 161 162
!TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1
loop_start = (k-1) * loop_size + 1
loop_end = loop_start + loop_size - 1
if (loop_end > ki_end ) loop_end = ki_end
Uliana Alekseeva's avatar
Uliana Alekseeva committed
163

Matthias Redies's avatar
Matthias Redies committed
164 165 166 167 168 169
DO ki = loop_start,loop_end,ki_step
   !DO  ki =  ki_start,ki_end,ki_step
   kj_end = MIN(ki,nv(iintsp))
   DO kj = 1,kj_end
      kii=(ki-1)/ki_step+1
      ski = gvec(:,ki,jintsp) + qssbti
Uliana Alekseeva's avatar
Uliana Alekseeva committed
170

Matthias Redies's avatar
Matthias Redies committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
      !--->             set up phase factors
      cph = 0.0
      DO nn = nn_start,nn_end
         tnn = tpi_const*taual(:,nn)
         cph = cph +&
               CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                     SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
         ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
      ENDDO
      !--->       legendre polynomials
      plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
      DO l = 1,lmax - 1
         plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
      END DO
      DO  l = 0,lmax
         fjkiln = fj(ki,l,jintsp)
         gjkiln = gj(ki,l,jintsp)
         !+APW
         IF (PRESENT(uds)) THEN
            w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
            apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                                                 fjkiln * us(l) * dus(l) )
            apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                                                 gjkiln * uds(l) * duds(l) )
         ENDIF
         !-APW
         ddnln =  ddn(l)
         elall = el(l)
         IF (l<=lnonsph) elall=elall-e_shift!(isp)
         fct  = plegend(l)*fl2p1(l)*&
                ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
         fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
203

Matthias Redies's avatar
Matthias Redies committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
         smat_data(kj,kii)=smat_data(kj,kii) + chi * cph * fct
         hmat_data(kj,kii)=hmat_data(kj,kii) + chi * cph * ( fct * elall + fct2)
         !+APW
         IF (PRESENT(uds)) THEN
            capw1 = cph*plegend(l)&
                    * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
            hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
         ENDIF
         !-APW
         !--->          end loop over l
      ENDDO
   ENDDO
   !--->    end loop over ki
ENDDO
DEALLOCATE(plegend)
END SUBROUTINE HsmtSphGpuKernel_cmplx
Uliana Alekseeva's avatar
Uliana Alekseeva committed
220

Matthias Redies's avatar
Matthias Redies committed
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
SUBROUTINE hsmt_sph_gpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
   USE m_constants, ONLY : fpi_const,tpi_const
   USE m_types
   USE nvtx
   IMPLICIT NONE
   TYPE(t_input),INTENT(IN)      :: input
   TYPE(t_mpi),INTENT(IN)        :: mpi
   TYPE(t_noco),INTENT(IN)       :: noco
   TYPE(t_atoms),INTENT(IN)      :: atoms
   TYPE(t_lapw),INTENT(IN)       :: lapw
   TYPE(t_usdus),INTENT(IN)      :: usdus
   CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
   !     ..
   !     .. Scalar Arguments ..
   INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
   COMPLEX, INTENT(IN)  :: chi
   !     ..
   !     .. Array Arguments ..
   REAL,MANAGED,INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
   REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
   REAL,MANAGED,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
   !     ..
   !     .. Local Scalars ..
   INTEGER l
   INTEGER :: grid, block, loop_size

   !     ..
   !     .. Local Arrays ..
   REAL,MANAGED :: fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
   REAL,MANAGED :: fl2p1bt(0:atoms%lmaxd)
   REAL,MANAGED :: qssbti(3),qssbtj(3)
   INTEGER, DEVICE :: nv_dev(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
253

Matthias Redies's avatar
Matthias Redies committed
254 255 256
   call nvtxStartRange("hsmt_sph",2)
   CALL timestart("spherical setup")
   print*, "HsmtSph_GPU"
Uliana Alekseeva's avatar
Uliana Alekseeva committed
257

Matthias Redies's avatar
Matthias Redies committed
258 259 260 261 262 263 264 265
   DO l = 0,atoms%lmaxd
      fleg1(l) = REAL(l+l+1)/REAL(l+1)
      fleg2(l) = REAL(l)/REAL(l+1)
      fl2p1(l) = REAL(l+l+1)/fpi_const
      fl2p1bt(l) = fl2p1(l)*0.5
   END DO
   qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
   qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
266

Matthias Redies's avatar
Matthias Redies committed
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
   ! pretty ugly solution
   nv_dev = lapw%nv
   loop_size = 1
   block = 32   ! number of threads in a block
   grid = ceiling(real(lapw%nv(jintsp))/(loop_SIZE*block))
   !loop_size = max(lapw%nv(jintsp)/(grid*block),1)   !number of iterations performed by each thread
   !if (loop_size * grid*block < lapw%nv(jintsp)) loop_size = loop_size + 1
   IF (input%l_useapw) THEN
      !TODO!!!!
      ! APW case is not testet
      IF (smat%l_real) THEN
         CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                                                    smat%data_r,hmat%data_r,&
                                           usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
      ELSE
         CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                     atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                   chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                     usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                                                     smat%data_c,hmat%data_c,&
                                           usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
      ENDIF
   ELSE
      IF (smat%l_real) THEN
         CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
      ELSE
         CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                     atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                   chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                     usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
      ENDIF
   ENDIF
   CALL timestop("spherical setup")
Uliana Alekseeva's avatar
Uliana Alekseeva committed
310

Matthias Redies's avatar
Matthias Redies committed
311 312 313
   call nvtxEndRange
   RETURN
END SUBROUTINE hsmt_sph_gpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
314 315 316
#endif

  SUBROUTINE hsmt_sph_cpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
317 318
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
hpclab07's avatar
hpclab07 committed
319
    USE m_LegendrePoly
320
    IMPLICIT NONE
321
    TYPE(t_input),INTENT(IN)      :: input
Daniel Wortmann's avatar
Daniel Wortmann committed
322
    TYPE(t_mpi),INTENT(IN)        :: mpi
323 324
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
325 326 327
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
328 329
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
330 331
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
332 333
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
334
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
335
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
Daniel Wortmann's avatar
Daniel Wortmann committed
336
    REAL,    INTENT (IN) :: fj(:,0:,:),gj(:,0:,:)
337 338
    !     ..
    !     .. Local Scalars ..
339
    REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
hpclab07's avatar
hpclab07 committed
340
    REAL apw_lo1,apw_lo2,apw1,w1,plegend
Daniel Wortmann's avatar
Daniel Wortmann committed
341
    COMPLEX capw1
hpclab08's avatar
hpclab08 committed
342 343
    INTEGER kii,ki,kj,l,nn
    INTEGER kj_BlockSize, kj_BlockNum, kjb, kj_start, kj_end
344 345
    !     ..
    !     .. Local Arrays ..
Daniel Wortmann's avatar
Daniel Wortmann committed
346 347
    REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd)
348
    REAL qssbti(3),qssbtj(3)
349
    REAL, ALLOCATABLE :: gdot(:)
350
    REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
351

352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
   COMPLEX, ALLOCATABLE :: cph(:),cp_h(:,:)
   LOGICAL apw(0:atoms%lmaxd)

   CALL timestart("spherical setup")

   DO l = 0,atoms%lmaxd
      fleg1(l) = REAL(l+l+1)/REAL(l+1)
      fleg2(l) = REAL(l)/REAL(l+1)
      fl2p1(l) = REAL(l+l+1)/fpi_const
      fl2p1bt(l) = fl2p1(l)*0.5
   END DO
   ALLOCATE(cp_h(MAXVAL(lapw%nv),SUM(atoms%neq(:n-1))+1:SUM(atoms%neq(:n))))
   DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
      CALL lapw%phase_factors(1,atoms%taual(:,nn),noco%qss,cp_h(:,nn))
   ENDDO
hpclab08's avatar
hpclab08 committed
367

368
    kj_BlockSize = 1000
369

Gregor Michalicek's avatar
Gregor Michalicek committed
370
    !$OMP PARALLEL DEFAULT(NONE)&
371
    !$OMP SHARED(cp_h,kj_BlockSize,lapw,atoms,noco,mpi,input,usdus,smat,hmat)&
Gregor Michalicek's avatar
Gregor Michalicek committed
372
    !$OMP SHARED(jintsp,iintsp,n,fleg1,fleg2,fj,gj,isp,fl2p1,el,e_shift,fl2p1bt,chi)&
373
    !$OMP PRIVATE(kii,ki,ski,kj,gdot,l,qssbti,qssbtj,fct2,plegend)&
Daniel Wortmann's avatar
Daniel Wortmann committed
374 375
    !$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
hpclab08's avatar
hpclab08 committed
376 377
    !$OMP PRIVATE(capw1,VecHelpS,VecHelpH)&
    !$OMP PRIVATE(kj_start,kj_end,kj_BlockNum, kjb) 
Daniel Wortmann's avatar
Daniel Wortmann committed
378
    ALLOCATE(cph(MAXVAL(lapw%nv)))
379
    ALLOCATE(gdot(0:atoms%lmaxd))
380
    ALLOCATE(VecHelpS(MAXVAL(lapw%nv)),VecHelpH(MAXVAL(lapw%nv)))
381
    gdot=0.0
382 383
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
384
    !$OMP  DO SCHEDULE(DYNAMIC,1)
385
    DO  ki =  mpi%n_rank+1, lapw%nv(jintsp), mpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
386
       kii=(ki-1)/mpi%n_size+1
387
       ski = lapw%gvec(:,ki,jintsp) + qssbti
hpclab08's avatar
hpclab08 committed
388 389 390 391 392 393 394
       kj_BlockNum = ki/kj_BlockSize + 1
       DO kjb = 1, kj_BlockNum 
          kj_start = (kjb-1) * kj_BlockSize + 1
          kj_end = MIN(ki,kjb * kj_BlockSize,lapw%nv(iintsp))

          !--->       legendre polynomials
          DO kj = kj_start,kj_end
hpclab07's avatar
hpclab07 committed
395
             gdot(kj) = DOT_PRODUCT(lapw%gk(:,kj,iintsp),lapw%gk(:,ki,jintsp))
hpclab08's avatar
hpclab08 committed
396 397 398 399 400 401
          END DO
          !--->             set up phase factors
          cph(kj_start:kj_end) = 0.0
          DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
             tnn = tpi_const*atoms%taual(:,nn)
             DO kj = kj_start,kj_end
402 403 404
                cph(kj) = cph(kj) +cp_h(ki,nn)/cp_h(kj,nn)
!                   CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,iintsp)-qssbtj,tnn)),&
!                   SIN(DOT_PRODUCT(lapw%gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
hpclab08's avatar
hpclab08 committed
405 406
                ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
             END DO
Daniel Wortmann's avatar
Daniel Wortmann committed
407
          END DO
408

hpclab08's avatar
hpclab08 committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
          !--->          update overlap and l-diagonal hamiltonian matrix
          VecHelpS = 0.d0
          VecHelpH = 0.d0
          DO  l = 0,atoms%lmax(n)
             fjkiln = fj(ki,l,jintsp)
             gjkiln = gj(ki,l,jintsp)
          
             IF (input%l_useapw) THEN
                w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                   usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
                apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                   fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
                apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                   gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
             ENDIF
             ddnln =  usdus%ddn(l,n,isp)
             elall = el(l,n,isp)
             IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
             IF (smat%l_real) THEN
                DO kj = kj_start,kj_end
hpclab07's avatar
hpclab07 committed
429
                   plegend = LegendrePoly_scalar(l,gdot(kj))
430
                   fct  = plegend*fl2p1(l)*&
hpclab08's avatar
hpclab08 committed
431
                        ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
432
                   fct2 = plegend*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
hpclab08's avatar
hpclab08 committed
433 434 435 436 437

                   smat%data_r(kj,kii)=smat%data_r(kj,kii)+REAL(cph(kj))*fct
                   hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
                   !+APW
                   IF (input%l_useapw) THEN
438
                      apw1 = REAL(cph(kj)) * plegend  * &
hpclab08's avatar
hpclab08 committed
439 440 441 442 443 444
                         ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                      hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + apw1
                   ENDIF
                   !-APW
                ENDDO
             ELSE
hpclab07's avatar
hpclab07 committed
445
                plegend = LegendrePoly_scalar(l,gdot(kj))
hpclab08's avatar
hpclab08 committed
446
                DO kj = kj_start,kj_end
447
                   fct  = plegend*fl2p1(l)*&
hpclab08's avatar
hpclab08 committed
448
                      ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
449
                   fct2 = plegend*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
450
 
hpclab08's avatar
hpclab08 committed
451 452
                   VecHelpS(kj) = VecHelpS(kj) + fct
                   VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Daniel Wortmann's avatar
Daniel Wortmann committed
453

hpclab08's avatar
hpclab08 committed
454
                   IF (input%l_useapw) THEN
455
                      capw1 = cph(kj)*plegend *&
hpclab08's avatar
hpclab08 committed
456 457 458 459
                          ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                      hmat%data_c(kj,kii)=hmat%data_c(kj,kii) + capw1
                   ENDIF
                END DO
460

hpclab08's avatar
hpclab08 committed
461 462 463 464 465 466
             ENDIF
          !--->          end loop over l
          ENDDO
          IF (.not.smat%l_real) THEN
              smat%data_c(kj_start:kj_end,kii)=smat%data_c(kj_start:kj_end,kii) + chi*cph(kj_start:kj_end) * VecHelpS(kj_start:kj_end)
              hmat%data_c(kj_start:kj_end,kii)=hmat%data_c(kj_start:kj_end,kii) + chi*cph(kj_start:kj_end) * VecHelpH(kj_start:kj_end)
Daniel Wortmann's avatar
Daniel Wortmann committed
467
          ENDIF
hpclab08's avatar
hpclab08 committed
468 469 470

       !--->    end loop over kj-blocks 
       ENDDO 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
471
    !--->    end loop over ki
Daniel Wortmann's avatar
Daniel Wortmann committed
472 473
    ENDDO
    !$OMP END DO
474
    DEALLOCATE(gdot)
Daniel Wortmann's avatar
Daniel Wortmann committed
475
    DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
476
    DEALLOCATE(VecHelpS,VecHelpH)
477
    !$OMP END PARALLEL
Daniel Wortmann's avatar
Daniel Wortmann committed
478
    CALL timestop("spherical setup")
479 480

    RETURN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
481
  END SUBROUTINE hsmt_sph_cpu
482
END MODULE m_hsmt_sph