hsmt_sph.F90 18.6 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
8 9
  USE m_juDFT
  IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10 11 12 13 14 15 16 17

  INTERFACE hsmt_sph
    module procedure hsmt_sph_cpu
#ifdef CPP_GPU
    module procedure hsmt_sph_gpu
#endif
  END INTERFACE

18
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
19 20

#ifdef CPP_GPU
Uliana Alekseeva's avatar
Uliana Alekseeva committed
21 22
  ATTRIBUTES(global)&
  SUBROUTINE HsmtSphGpuKernel_real(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
23
                       lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
24 25
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
26
     INTEGER, VALUE, INTENT(IN) :: loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
27
     INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
28
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
29
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2) 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
30 31 32
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
33
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
34 35 36
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
37
     REAL,VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
38 39 40 41 42 43 44
     REAL,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
Uliana Alekseeva's avatar
Uliana Alekseeva committed
45 46
     REAL, ALLOCATABLE :: plegend(:)
     REAL cph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
47
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
48
     REAL apw_lo1,apw_lo2,apw1,w1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
49
     INTEGER kii,ki,kj,l,nn,k
Uliana Alekseeva's avatar
Uliana Alekseeva committed
50
     INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
51

Uliana Alekseeva's avatar
Uliana Alekseeva committed
52
     ALLOCATE(plegend(0:lmaxd))
Uliana Alekseeva's avatar
Uliana Alekseeva committed
53
     plegend=0.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
54
     plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
55

Uliana Alekseeva's avatar
Uliana Alekseeva committed
56 57 58 59 60 61 62 63 64 65
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
66
       DO kj = 1,ki
Uliana Alekseeva's avatar
Uliana Alekseeva committed
67 68 69 70 71 72 73 74 75 76 77 78 79
          kii=(ki-1)/ki_step+1
          ski = gvec(:,ki,jintsp) + qssbti
          !--->             set up phase factors
          cph = 0.0
          DO nn = nn_start,nn_end
             tnn = tpi_const*taual(:,nn)
             cph = cph + COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn))
             ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          ENDDO
          !--->       legendre polynomials
          plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
          DO l = 1,lmax - 1
             plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
80
          END DO
Uliana Alekseeva's avatar
Uliana Alekseeva committed
81 82 83 84 85 86 87
          DO  l = 0,lmax
             fjkiln = fj(ki,l,jintsp)
             gjkiln = gj(ki,l,jintsp)
             !+APW
             IF (PRESENT(uds)) THEN
               w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
               apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
88
                  fjkiln * us(l) * dus(l) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
89
               apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
90
                  gjkiln * uds(l) * duds(l) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
91 92 93 94 95 96 97
             ENDIF
             !-APW
             ddnln =  ddn(l)
             elall = el(l)
             IF (l<=lnonsph) elall=elall-e_shift!(isp)
             !DO kj = 1,ki
             fct  = plegend(l)*fl2p1(l)*&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
98
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
99
             fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
100

Uliana Alekseeva's avatar
Uliana Alekseeva committed
101 102 103 104 105
             smat_data(kj,kii)=smat_data(kj,kii)+cph*fct
             hmat_data(kj,kii)=hmat_data(kj,kii) + cph * ( fct * elall + fct2)
             !+APW
             IF (PRESENT(uds)) THEN
                 apw1 = cph * plegend(l)  * &
Uliana Alekseeva's avatar
Uliana Alekseeva committed
106 107
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
108 109 110 111
             ENDIF
             !-APW
             !ENDDO
          !--->          end loop over l
Uliana Alekseeva's avatar
Uliana Alekseeva committed
112 113 114 115 116
          ENDDO
       ENDDO
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
117
  END SUBROUTINE HsmtSphGpuKernel_real
Uliana Alekseeva's avatar
Uliana Alekseeva committed
118

Uliana Alekseeva's avatar
Uliana Alekseeva committed
119 120
  ATTRIBUTES(global)&
  SUBROUTINE HsmtSphGpuKernel_cmplx(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
121 122 123
                       lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
124
     INTEGER, VALUE, INTENT(IN) :: loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
125 126
     INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
     COMPLEX, VALUE, INTENT(IN)  :: chi
Uliana Alekseeva's avatar
Uliana Alekseeva committed
127
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
128
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
129 130 131
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
132
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
133 134 135
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
136
     REAL, VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
137 138 139 140 141 142 143
     COMPLEX,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
Uliana Alekseeva's avatar
Uliana Alekseeva committed
144 145
     REAL, ALLOCATABLE :: plegend(:)
     COMPLEX :: cph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
146 147
     REAL apw_lo1,apw_lo2,w1
     COMPLEX capw1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
148
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
149 150
     INTEGER kii,ki,kj,kjj,l,nn,kj_end,k
     INTEGER :: loop_start, loop_end, i
Uliana Alekseeva's avatar
Uliana Alekseeva committed
151

Uliana Alekseeva's avatar
Uliana Alekseeva committed
152
     ALLOCATE(plegend(0:lmaxd))
Uliana Alekseeva's avatar
Uliana Alekseeva committed
153
     plegend=0.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
154
     plegend(0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
155

Uliana Alekseeva's avatar
Uliana Alekseeva committed
156 157 158 159 160 161 162 163 164 165
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
166 167 168 169 170 171 172 173 174 175
       kj_end = MIN(ki,nv(iintsp)) 
       DO kj = 1,kj_end
          kii=(ki-1)/ki_step+1
          ski = gvec(:,ki,jintsp) + qssbti

          !--->             set up phase factors
          cph = 0.0
          DO nn = nn_start,nn_end
             tnn = tpi_const*taual(:,nn)
             cph = cph +&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
176 177
                  CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
Uliana Alekseeva's avatar
Uliana Alekseeva committed
178 179 180 181 182 183
             ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          ENDDO
          !--->       legendre polynomials
          plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
          DO l = 1,lmax - 1
             plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
184
          END DO
Uliana Alekseeva's avatar
Uliana Alekseeva committed
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
          DO  l = 0,lmax
             fjkiln = fj(ki,l,jintsp)
             gjkiln = gj(ki,l,jintsp)
             !+APW
             IF (PRESENT(uds)) THEN
                w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
                apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                    fjkiln * us(l) * dus(l) )
                apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                    gjkiln * uds(l) * duds(l) )
             ENDIF
             !-APW
             ddnln =  ddn(l)
             elall = el(l)
             IF (l<=lnonsph) elall=elall-e_shift!(isp)
             fct  = plegend(l)*fl2p1(l)*&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
201
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
202
             fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
203
 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
204 205 206 207 208
             smat_data(kj,kii)=smat_data(kj,kii) + chi * cph * fct
             hmat_data(kj,kii)=hmat_data(kj,kii) + chi * cph * ( fct * elall + fct2)
             !+APW
             IF (PRESENT(uds)) THEN
                 capw1 = cph*plegend(l)&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
209 210
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
211 212 213 214
             ENDIF
             !-APW
          !--->          end loop over l
          ENDDO
Uliana Alekseeva's avatar
Uliana Alekseeva committed
215 216 217 218
       ENDDO
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
219
  END SUBROUTINE HsmtSphGpuKernel_cmplx
Uliana Alekseeva's avatar
Uliana Alekseeva committed
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239


  SUBROUTINE hsmt_sph_gpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE nvtx
    IMPLICIT NONE
    TYPE(t_input),INTENT(IN)      :: input
    TYPE(t_mpi),INTENT(IN)        :: mpi
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
    !     ..
    !     .. Array Arguments ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
240
    REAL,MANAGED,INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
241 242 243 244
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
    REAL,MANAGED,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
    !     ..
    !     .. Local Scalars ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
245
    INTEGER l
Uliana Alekseeva's avatar
Uliana Alekseeva committed
246
    INTEGER :: grid, block, loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
247 248 249

    !     ..
    !     .. Local Arrays ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
250 251 252 253
    REAL,MANAGED :: fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL,MANAGED :: fl2p1bt(0:atoms%lmaxd)
    REAL,MANAGED :: qssbti(3),qssbtj(3)
    INTEGER, DEVICE :: nv_dev(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
254 255 256

    call nvtxStartRange("hsmt_sph",2)    
    CALL timestart("spherical setup")
Uliana Alekseeva's avatar
Uliana Alekseeva committed
257
    print*, "HsmtSph_GPU"
Uliana Alekseeva's avatar
Uliana Alekseeva committed
258 259 260 261 262 263 264 265 266 267

    DO l = 0,atoms%lmaxd
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)

Uliana Alekseeva's avatar
Uliana Alekseeva committed
268 269
    ! pretty ugly solution
    nv_dev = lapw%nv
Uliana Alekseeva's avatar
Uliana Alekseeva committed
270 271 272 273 274
    loop_size = 1
    block = 32   ! number of threads in a block
    grid = ceiling(real(lapw%nv(jintsp))/(loop_SIZE*block)) 
    !loop_size = max(lapw%nv(jintsp)/(grid*block),1)   !number of iterations performed by each thread
    !if (loop_size * grid*block < lapw%nv(jintsp)) loop_size = loop_size + 1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
275
    IF (input%l_useapw) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
276 277
       !TODO!!!!
       ! APW case is not testet
Uliana Alekseeva's avatar
Uliana Alekseeva committed
278
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
279 280
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
281
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
282
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
283 284 285 286
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_r,hmat%data_r,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
287 288
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
289 290 291 292 293 294
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_c,hmat%data_c,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
295 296
    ELSE
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
297 298
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
299
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
300
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
301
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
302
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
303 304
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
                    atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
305 306 307
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
308 309 310 311 312 313 314 315 316 317
       ENDIF
    ENDIF
    CALL timestop("spherical setup")

    call nvtxEndRange
    RETURN
  END SUBROUTINE hsmt_sph_gpu
#endif

  SUBROUTINE hsmt_sph_cpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
318 319 320
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    IMPLICIT NONE
321
    TYPE(t_input),INTENT(IN)      :: input
Daniel Wortmann's avatar
Daniel Wortmann committed
322
    TYPE(t_mpi),INTENT(IN)        :: mpi
323 324
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
325 326 327
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
328 329
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
330 331
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
332 333
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
334
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
335
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
Daniel Wortmann's avatar
Daniel Wortmann committed
336
    REAL,    INTENT (IN) :: fj(:,0:,:),gj(:,0:,:)
337 338
    !     ..
    !     .. Local Scalars ..
339
    REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Daniel Wortmann's avatar
Daniel Wortmann committed
340
    REAL apw_lo1,apw_lo2,apw1,w1
341

Daniel Wortmann's avatar
Daniel Wortmann committed
342
    COMPLEX capw1
343
    INTEGER kii,ki,kj,l,nn,kj_end
344 345 346

    !     ..
    !     .. Local Arrays ..
Daniel Wortmann's avatar
Daniel Wortmann committed
347 348
    REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd)
349 350
    REAL qssbti(3),qssbtj(3)
    REAL, ALLOCATABLE :: plegend(:,:)
351
    REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Daniel Wortmann's avatar
Daniel Wortmann committed
352
    COMPLEX, ALLOCATABLE :: cph(:)
353 354
    LOGICAL apw(0:atoms%lmaxd)

Daniel Wortmann's avatar
Daniel Wortmann committed
355
    CALL timestart("spherical setup")
356 357

    DO l = 0,atoms%lmaxd
358 359 360
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
361 362
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
Gregor Michalicek's avatar
Gregor Michalicek committed
363 364 365 366
    !$OMP PARALLEL DEFAULT(NONE)&
    !$OMP SHARED(lapw,atoms,noco,mpi,input,usdus,smat,hmat)&
    !$OMP SHARED(jintsp,iintsp,n,fleg1,fleg2,fj,gj,isp,fl2p1,el,e_shift,fl2p1bt,chi)&
    !$OMP PRIVATE(kii,ki,ski,kj,plegend,l,kj_end,qssbti,qssbtj,fct2)&
Daniel Wortmann's avatar
Daniel Wortmann committed
367 368
    !$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
369
    !$OMP PRIVATE(capw1,VecHelpS,VecHelpH) 
Daniel Wortmann's avatar
Daniel Wortmann committed
370 371
    ALLOCATE(cph(MAXVAL(lapw%nv)))
    ALLOCATE(plegend(MAXVAL(lapw%nv),0:atoms%lmaxd))
372
    ALLOCATE(VecHelpS(MAXVAL(lapw%nv)),VecHelpH(MAXVAL(lapw%nv)))
Daniel Wortmann's avatar
Daniel Wortmann committed
373 374
    plegend=0.0
    plegend(:,0)=1.0
375 376
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
377
    !$OMP  DO SCHEDULE(DYNAMIC,1)
378
    DO  ki =  mpi%n_rank+1, lapw%nv(jintsp), mpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
379
       kii=(ki-1)/mpi%n_size+1
380
       ski = lapw%gvec(:,ki,jintsp) + qssbti
Daniel Wortmann's avatar
Daniel Wortmann committed
381 382
       !--->       legendre polynomials
       DO kj = 1,ki
383
          plegend(kj,1) = DOT_PRODUCT(lapw%gk(:,kj,iintsp),lapw%gk(:,ki,jintsp))
Daniel Wortmann's avatar
Daniel Wortmann committed
384 385 386 387 388 389 390 391 392 393
       END DO
       DO l = 1,atoms%lmax(n) - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
          tnn = tpi_const*atoms%taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
394 395 396
                  CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(lapw%gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
Daniel Wortmann's avatar
Daniel Wortmann committed
397 398 399 400
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
401 402 403
       kj_end = MIN(ki,lapw%nv(iintsp)) 
       VecHelpS = 0.d0
       VecHelpH = 0.d0
Daniel Wortmann's avatar
Daniel Wortmann committed
404
       DO  l = 0,atoms%lmax(n)
405 406
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
407
          !
Daniel Wortmann's avatar
Daniel Wortmann committed
408 409 410 411 412 413 414 415 416 417 418
          IF (input%l_useapw) THEN
             w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                  usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
             apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                  fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
             apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                  gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
             !
          ENDIF
          ddnln =  usdus%ddn(l,n,isp)
          elall = el(l,n,isp)
419
          IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
Daniel Wortmann's avatar
Daniel Wortmann committed
420 421 422
          IF (smat%l_real) THEN
             DO kj = 1,ki
                fct  = plegend(kj,l)*fl2p1(l)*&
423
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
424 425
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )

Daniel Wortmann's avatar
Daniel Wortmann committed
426
                smat%data_r(kj,kii)=smat%data_r(kj,kii)+REAL(cph(kj))*fct
427
                hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
Daniel Wortmann's avatar
Daniel Wortmann committed
428 429 430 431 432
                !+APW
                IF (input%l_useapw) THEN
                   apw1 = REAL(cph(kj)) * plegend(kj,l)  * &
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + apw1
433
                ENDIF
Daniel Wortmann's avatar
Daniel Wortmann committed
434 435 436
                !-APW
             ENDDO
          ELSE
437 438
             DO kj = 1,kj_end
                fct  = plegend(kj,l)*fl2p1(l)*&
439
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
440 441 442 443
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
 
                VecHelpS(kj) = VecHelpS(kj) + fct
                VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Daniel Wortmann's avatar
Daniel Wortmann committed
444 445 446 447 448

                IF (input%l_useapw) THEN
                   capw1 = cph(kj)*plegend(kj,l)&
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_c(kj,kii)=hmat%data_c(kj,kii) + capw1
449 450 451
                ENDIF
             END DO

Daniel Wortmann's avatar
Daniel Wortmann committed
452
          ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
453
       !--->          end loop over l
Daniel Wortmann's avatar
Daniel Wortmann committed
454
       ENDDO
455 456 457 458
       IF (.not.smat%l_real) THEN
           smat%data_c(:kj_end,kii)=smat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end)
           hmat%data_c(:kj_end,kii)=hmat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
459
    !--->    end loop over ki
Daniel Wortmann's avatar
Daniel Wortmann committed
460 461
    ENDDO
    !$OMP END DO
462
    DEALLOCATE(plegend)
Daniel Wortmann's avatar
Daniel Wortmann committed
463
    DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
464
    DEALLOCATE(VecHelpS,VecHelpH)
465
    !$OMP END PARALLEL
Daniel Wortmann's avatar
Daniel Wortmann committed
466
    CALL timestop("spherical setup")
467 468

    RETURN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
469
  END SUBROUTINE hsmt_sph_cpu
470
END MODULE m_hsmt_sph