hsmt_sph.F90 19.3 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
Matthias Redies's avatar
Matthias Redies committed
8 9
   USE m_juDFT
   IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10

Matthias Redies's avatar
Matthias Redies committed
11 12
   INTERFACE hsmt_sph
      module procedure hsmt_sph_cpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
13
#ifdef CPP_GPU
Matthias Redies's avatar
Matthias Redies committed
14
      module procedure hsmt_sph_gpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
15
#endif
Matthias Redies's avatar
Matthias Redies committed
16
   END INTERFACE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
17

18
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
19 20

#ifdef CPP_GPU
Matthias Redies's avatar
Matthias Redies committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
   ATTRIBUTES(global)&
      SUBROUTINE HsmtSphGpuKernel_real(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                                       lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                                       smat_data,hmat_data,&
                                       uds,dus,us,duds,rmt)
   INTEGER, VALUE, INTENT(IN) :: loop_size
   INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
   REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
   INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
   REAL,INTENT(IN)    :: gk(:,:,:)
   REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)
   REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
   REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
   REAL,INTENT(IN)    :: taual(:,:)
   REAL,INTENT(IN)    :: ddn(0:lmaxd)
   REAL,INTENT(IN)    :: el(0:lmaxd)
   REAL,VALUE,INTENT(IN)    :: e_shift
   REAL,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
   !+APW
   REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd)
   REAL,INTENT(IN),OPTIONAL   :: rmt
   !-APW
Uliana Alekseeva's avatar
Uliana Alekseeva committed
43

Matthias Redies's avatar
Matthias Redies committed
44 45 46 47 48 49 50
   REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
   REAL, ALLOCATABLE :: plegend(:)
   REAL cph
   REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
   REAL apw_lo1,apw_lo2,apw1,w1
   INTEGER kii,ki,kj,l,nn,k
   INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
51

Matthias Redies's avatar
Matthias Redies committed
52 53 54
   ALLOCATE(plegend(0:lmaxd))
   plegend=0.0
   plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
55

Matthias Redies's avatar
Matthias Redies committed
56
   k = (blockidx%x-1)*blockdim%x + threadidx%x
Uliana Alekseeva's avatar
Uliana Alekseeva committed
57

Matthias Redies's avatar
Matthias Redies committed
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
   !TODO!!!
   !for seq, i.e. ki_start = 1, ki_step = 1
   loop_start = (k-1) * loop_size + 1
   loop_end = loop_start + loop_size - 1
   if (loop_end > ki_end ) loop_end = ki_end

   DO ki = loop_start,loop_end,ki_step
      !DO  ki =  ki_start,ki_end,ki_step
      DO kj = 1,ki
         kii=(ki-1)/ki_step+1
         ski = gvec(:,ki,jintsp) + qssbti
         !--->             set up phase factors
         cph = 0.0
         DO nn = nn_start,nn_end
            tnn = tpi_const*taual(:,nn)
            cph = cph + COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
         ENDDO
         !--->       legendre polynomials
         plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
         DO l = 1,lmax - 1
            plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
         END DO
         DO  l = 0,lmax
            fjkiln = fj(ki,l,jintsp)
            gjkiln = gj(ki,l,jintsp)
            !+APW
            IF (PRESENT(uds)) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
86 87
               w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
               apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
Matthias Redies's avatar
Matthias Redies committed
88
                                                    fjkiln * us(l) * dus(l) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
89
               apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
Matthias Redies's avatar
Matthias Redies committed
90 91 92 93 94 95 96 97 98 99
                                                    gjkiln * uds(l) * duds(l) )
            ENDIF
            !-APW
            ddnln =  ddn(l)
            elall = el(l)
            IF (l<=lnonsph) elall=elall-e_shift!(isp)
            !DO kj = 1,ki
            fct  = plegend(l)*fl2p1(l)*&
                   ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
            fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
100

Matthias Redies's avatar
Matthias Redies committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
            smat_data(kj,kii)=smat_data(kj,kii)+cph*fct
            hmat_data(kj,kii)=hmat_data(kj,kii) + cph * ( fct * elall + fct2)
            !+APW
            IF (PRESENT(uds)) THEN
               apw1 = cph * plegend(l)  * &
                      ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
               hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
            ENDIF
            !-APW
            !ENDDO
            !--->          end loop over l
         ENDDO
      ENDDO
      !--->    end loop over ki
   ENDDO
   DEALLOCATE(plegend)
END SUBROUTINE HsmtSphGpuKernel_real
Uliana Alekseeva's avatar
Uliana Alekseeva committed
118

Matthias Redies's avatar
Matthias Redies committed
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
ATTRIBUTES(global)&
   SUBROUTINE HsmtSphGpuKernel_cmplx(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                                     lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                                     smat_data,hmat_data,&
                                     uds,dus,us,duds,rmt)
INTEGER, VALUE, INTENT(IN) :: loop_size
INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
COMPLEX, VALUE, INTENT(IN)  :: chi
REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
REAL,INTENT(IN)    :: gk(:,:,:)
REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)
REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
REAL,INTENT(IN)    :: taual(:,:)
REAL,INTENT(IN)    :: ddn(0:lmaxd)
REAL,INTENT(IN)    :: el(0:lmaxd)
REAL, VALUE,INTENT(IN)    :: e_shift
COMPLEX,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
!+APW
REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd)
REAL,INTENT(IN),OPTIONAL   :: rmt
!-APW
Uliana Alekseeva's avatar
Uliana Alekseeva committed
142

Matthias Redies's avatar
Matthias Redies committed
143 144 145 146 147 148 149 150
REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
REAL, ALLOCATABLE :: plegend(:)
COMPLEX :: cph
REAL apw_lo1,apw_lo2,w1
COMPLEX capw1
REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
INTEGER kii,ki,kj,kjj,l,nn,kj_end,k
INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
151

Matthias Redies's avatar
Matthias Redies committed
152 153 154
ALLOCATE(plegend(0:lmaxd))
plegend=0.0
plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
155

Matthias Redies's avatar
Matthias Redies committed
156
k = (blockidx%x-1)*blockdim%x + threadidx%x
Uliana Alekseeva's avatar
Uliana Alekseeva committed
157

Matthias Redies's avatar
Matthias Redies committed
158 159 160 161 162
!TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1
loop_start = (k-1) * loop_size + 1
loop_end = loop_start + loop_size - 1
if (loop_end > ki_end ) loop_end = ki_end
Uliana Alekseeva's avatar
Uliana Alekseeva committed
163

Matthias Redies's avatar
Matthias Redies committed
164 165 166 167 168 169
DO ki = loop_start,loop_end,ki_step
   !DO  ki =  ki_start,ki_end,ki_step
   kj_end = MIN(ki,nv(iintsp))
   DO kj = 1,kj_end
      kii=(ki-1)/ki_step+1
      ski = gvec(:,ki,jintsp) + qssbti
Uliana Alekseeva's avatar
Uliana Alekseeva committed
170

Matthias Redies's avatar
Matthias Redies committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
      !--->             set up phase factors
      cph = 0.0
      DO nn = nn_start,nn_end
         tnn = tpi_const*taual(:,nn)
         cph = cph +&
               CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                     SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
         ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
      ENDDO
      !--->       legendre polynomials
      plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
      DO l = 1,lmax - 1
         plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
      END DO
      DO  l = 0,lmax
         fjkiln = fj(ki,l,jintsp)
         gjkiln = gj(ki,l,jintsp)
         !+APW
         IF (PRESENT(uds)) THEN
            w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
            apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                                                 fjkiln * us(l) * dus(l) )
            apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                                                 gjkiln * uds(l) * duds(l) )
         ENDIF
         !-APW
         ddnln =  ddn(l)
         elall = el(l)
         IF (l<=lnonsph) elall=elall-e_shift!(isp)
         fct  = plegend(l)*fl2p1(l)*&
                ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
         fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
203

Matthias Redies's avatar
Matthias Redies committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
         smat_data(kj,kii)=smat_data(kj,kii) + chi * cph * fct
         hmat_data(kj,kii)=hmat_data(kj,kii) + chi * cph * ( fct * elall + fct2)
         !+APW
         IF (PRESENT(uds)) THEN
            capw1 = cph*plegend(l)&
                    * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
            hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
         ENDIF
         !-APW
         !--->          end loop over l
      ENDDO
   ENDDO
   !--->    end loop over ki
ENDDO
DEALLOCATE(plegend)
END SUBROUTINE HsmtSphGpuKernel_cmplx
Uliana Alekseeva's avatar
Uliana Alekseeva committed
220

Matthias Redies's avatar
Matthias Redies committed
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
SUBROUTINE hsmt_sph_gpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
   USE m_constants, ONLY : fpi_const,tpi_const
   USE m_types
   USE nvtx
   IMPLICIT NONE
   TYPE(t_input),INTENT(IN)      :: input
   TYPE(t_mpi),INTENT(IN)        :: mpi
   TYPE(t_noco),INTENT(IN)       :: noco
   TYPE(t_atoms),INTENT(IN)      :: atoms
   TYPE(t_lapw),INTENT(IN)       :: lapw
   TYPE(t_usdus),INTENT(IN)      :: usdus
   CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
   !     ..
   !     .. Scalar Arguments ..
   INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
   COMPLEX, INTENT(IN)  :: chi
   !     ..
   !     .. Array Arguments ..
   REAL,MANAGED,INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
   REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
   REAL,MANAGED,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
   !     ..
   !     .. Local Scalars ..
   INTEGER l
   INTEGER :: grid, block, loop_size

   !     ..
   !     .. Local Arrays ..
   REAL,MANAGED :: fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
   REAL,MANAGED :: fl2p1bt(0:atoms%lmaxd)
   REAL,MANAGED :: qssbti(3),qssbtj(3)
   INTEGER, DEVICE :: nv_dev(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
253

Matthias Redies's avatar
Matthias Redies committed
254 255 256
   call nvtxStartRange("hsmt_sph",2)
   CALL timestart("spherical setup")
   print*, "HsmtSph_GPU"
Uliana Alekseeva's avatar
Uliana Alekseeva committed
257

Matthias Redies's avatar
Matthias Redies committed
258 259 260 261 262 263 264 265
   DO l = 0,atoms%lmaxd
      fleg1(l) = REAL(l+l+1)/REAL(l+1)
      fleg2(l) = REAL(l)/REAL(l+1)
      fl2p1(l) = REAL(l+l+1)/fpi_const
      fl2p1bt(l) = fl2p1(l)*0.5
   END DO
   qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
   qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
266

Matthias Redies's avatar
Matthias Redies committed
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
   ! pretty ugly solution
   nv_dev = lapw%nv
   loop_size = 1
   block = 32   ! number of threads in a block
   grid = ceiling(real(lapw%nv(jintsp))/(loop_SIZE*block))
   !loop_size = max(lapw%nv(jintsp)/(grid*block),1)   !number of iterations performed by each thread
   !if (loop_size * grid*block < lapw%nv(jintsp)) loop_size = loop_size + 1
   IF (input%l_useapw) THEN
      !TODO!!!!
      ! APW case is not testet
      IF (smat%l_real) THEN
         CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                                                    smat%data_r,hmat%data_r,&
                                           usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
      ELSE
         CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                     atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                   chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                     usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                                                     smat%data_c,hmat%data_c,&
                                           usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
      ENDIF
   ELSE
      IF (smat%l_real) THEN
         CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
      ELSE
         CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                                                     atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
                                            lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                                                   chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                                                     usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
      ENDIF
   ENDIF
   CALL timestop("spherical setup")
Uliana Alekseeva's avatar
Uliana Alekseeva committed
310

Matthias Redies's avatar
Matthias Redies committed
311 312 313
   call nvtxEndRange
   RETURN
END SUBROUTINE hsmt_sph_gpu
Uliana Alekseeva's avatar
Uliana Alekseeva committed
314 315 316
#endif

  SUBROUTINE hsmt_sph_cpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
317 318 319
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    IMPLICIT NONE
320
    TYPE(t_input),INTENT(IN)      :: input
Daniel Wortmann's avatar
Daniel Wortmann committed
321
    TYPE(t_mpi),INTENT(IN)        :: mpi
322 323
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
324 325 326
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
327 328
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
329 330
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
331 332
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
333
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
334
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
Daniel Wortmann's avatar
Daniel Wortmann committed
335
    REAL,    INTENT (IN) :: fj(:,0:,:),gj(:,0:,:)
336 337
    !     ..
    !     .. Local Scalars ..
338
    REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Daniel Wortmann's avatar
Daniel Wortmann committed
339 340
    REAL apw_lo1,apw_lo2,apw1,w1
    COMPLEX capw1
hpclab08's avatar
hpclab08 committed
341 342
    INTEGER kii,ki,kj,l,nn
    INTEGER kj_BlockSize, kj_BlockNum, kjb, kj_start, kj_end
343 344
    !     ..
    !     .. Local Arrays ..
Daniel Wortmann's avatar
Daniel Wortmann committed
345 346
    REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd)
347
    REAL qssbti(3),qssbtj(3)
348
    REAL, ALLOCATABLE :: gdot(:)
349
    REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Daniel Wortmann's avatar
Daniel Wortmann committed
350
    COMPLEX, ALLOCATABLE :: cph(:)
351 352
    LOGICAL apw(0:atoms%lmaxd)

hpclab08's avatar
hpclab08 committed
353 354
    kj_BlockSize = 1000

Daniel Wortmann's avatar
Daniel Wortmann committed
355
    CALL timestart("spherical setup")
356 357

    DO l = 0,atoms%lmaxd
358 359 360
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
361 362
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
Gregor Michalicek's avatar
Gregor Michalicek committed
363
    !$OMP PARALLEL DEFAULT(NONE)&
hpclab08's avatar
hpclab08 committed
364
    !$OMP SHARED(kj_BlockSize,lapw,atoms,noco,mpi,input,usdus,smat,hmat)&
Gregor Michalicek's avatar
Gregor Michalicek committed
365
    !$OMP SHARED(jintsp,iintsp,n,fleg1,fleg2,fj,gj,isp,fl2p1,el,e_shift,fl2p1bt,chi)&
366
    !$OMP PRIVATE(kii,ki,ski,kj,gdot,l,qssbti,qssbtj,fct2,plegend)&
Daniel Wortmann's avatar
Daniel Wortmann committed
367 368
    !$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
hpclab08's avatar
hpclab08 committed
369 370
    !$OMP PRIVATE(capw1,VecHelpS,VecHelpH)&
    !$OMP PRIVATE(kj_start,kj_end,kj_BlockNum, kjb) 
Daniel Wortmann's avatar
Daniel Wortmann committed
371
    ALLOCATE(cph(MAXVAL(lapw%nv)))
372
    ALLOCATE(gdot(0:atoms%lmaxd))
373
    ALLOCATE(VecHelpS(MAXVAL(lapw%nv)),VecHelpH(MAXVAL(lapw%nv)))
374
    gdot=0.0
375 376
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
377
    !$OMP  DO SCHEDULE(DYNAMIC,1)
378
    DO  ki =  mpi%n_rank+1, lapw%nv(jintsp), mpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
379
       kii=(ki-1)/mpi%n_size+1
380
       ski = lapw%gvec(:,ki,jintsp) + qssbti
hpclab08's avatar
hpclab08 committed
381 382 383 384 385 386 387
       kj_BlockNum = ki/kj_BlockSize + 1
       DO kjb = 1, kj_BlockNum 
          kj_start = (kjb-1) * kj_BlockSize + 1
          kj_end = MIN(ki,kjb * kj_BlockSize,lapw%nv(iintsp))

          !--->       legendre polynomials
          DO kj = kj_start,kj_end
388
             gdot(kj,1) = DOT_PRODUCT(lapw%gk(:,kj,iintsp),lapw%gk(:,ki,jintsp))
hpclab08's avatar
hpclab08 committed
389 390 391 392 393 394 395 396 397 398 399
          END DO
          !--->             set up phase factors
          cph(kj_start:kj_end) = 0.0
          DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
             tnn = tpi_const*atoms%taual(:,nn)
             DO kj = kj_start,kj_end
                cph(kj) = cph(kj) +&
                   CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,iintsp)-qssbtj,tnn)),&
                   SIN(DOT_PRODUCT(lapw%gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
                ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
             END DO
Daniel Wortmann's avatar
Daniel Wortmann committed
400
          END DO
401

hpclab08's avatar
hpclab08 committed
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
          !--->          update overlap and l-diagonal hamiltonian matrix
          VecHelpS = 0.d0
          VecHelpH = 0.d0
          DO  l = 0,atoms%lmax(n)
             fjkiln = fj(ki,l,jintsp)
             gjkiln = gj(ki,l,jintsp)
          
             IF (input%l_useapw) THEN
                w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                   usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
                apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                   fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
                apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                   gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
             ENDIF
             ddnln =  usdus%ddn(l,n,isp)
             elall = el(l,n,isp)
             IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
             IF (smat%l_real) THEN
                DO kj = kj_start,kj_end
422 423
                   plegend = LegendrePoly(l,gdot(kj))
                   fct  = plegend*fl2p1(l)*&
hpclab08's avatar
hpclab08 committed
424
                        ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
425
                   fct2 = plegend*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
hpclab08's avatar
hpclab08 committed
426 427 428 429 430

                   smat%data_r(kj,kii)=smat%data_r(kj,kii)+REAL(cph(kj))*fct
                   hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
                   !+APW
                   IF (input%l_useapw) THEN
431
                      apw1 = REAL(cph(kj)) * plegend  * &
hpclab08's avatar
hpclab08 committed
432 433 434 435 436 437
                         ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                      hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + apw1
                   ENDIF
                   !-APW
                ENDDO
             ELSE
438
                plegend = LegendrePoly(l,gdot(kj))
hpclab08's avatar
hpclab08 committed
439
                DO kj = kj_start,kj_end
440
                   fct  = plegend*fl2p1(l)*&
hpclab08's avatar
hpclab08 committed
441
                      ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
442
                   fct2 = plegend*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
443
 
hpclab08's avatar
hpclab08 committed
444 445
                   VecHelpS(kj) = VecHelpS(kj) + fct
                   VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Daniel Wortmann's avatar
Daniel Wortmann committed
446

hpclab08's avatar
hpclab08 committed
447
                   IF (input%l_useapw) THEN
448
                      capw1 = cph(kj)*plegend *&
hpclab08's avatar
hpclab08 committed
449 450 451 452
                          ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                      hmat%data_c(kj,kii)=hmat%data_c(kj,kii) + capw1
                   ENDIF
                END DO
453

hpclab08's avatar
hpclab08 committed
454 455 456 457 458 459
             ENDIF
          !--->          end loop over l
          ENDDO
          IF (.not.smat%l_real) THEN
              smat%data_c(kj_start:kj_end,kii)=smat%data_c(kj_start:kj_end,kii) + chi*cph(kj_start:kj_end) * VecHelpS(kj_start:kj_end)
              hmat%data_c(kj_start:kj_end,kii)=hmat%data_c(kj_start:kj_end,kii) + chi*cph(kj_start:kj_end) * VecHelpH(kj_start:kj_end)
Daniel Wortmann's avatar
Daniel Wortmann committed
460
          ENDIF
hpclab08's avatar
hpclab08 committed
461 462 463

       !--->    end loop over kj-blocks 
       ENDDO 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
464
    !--->    end loop over ki
Daniel Wortmann's avatar
Daniel Wortmann committed
465 466
    ENDDO
    !$OMP END DO
467
    DEALLOCATE(gdot)
Daniel Wortmann's avatar
Daniel Wortmann committed
468
    DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
469
    DEALLOCATE(VecHelpS,VecHelpH)
470
    !$OMP END PARALLEL
Daniel Wortmann's avatar
Daniel Wortmann committed
471
    CALL timestop("spherical setup")
472 473

    RETURN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
474
  END SUBROUTINE hsmt_sph_cpu
475
END MODULE m_hsmt_sph