hsmt_sph.F90 19.1 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
8 9
  USE m_juDFT
  IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10 11 12 13 14 15 16 17

  INTERFACE hsmt_sph
    module procedure hsmt_sph_cpu
#ifdef CPP_GPU
    module procedure hsmt_sph_gpu
#endif
  END INTERFACE

18
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
19 20

#ifdef CPP_GPU
Uliana Alekseeva's avatar
Uliana Alekseeva committed
21 22
  ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_real(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                       lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
23 24
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
25 26
     INTEGER, VALUE, INTENT(IN) :: grid, block
     INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
27
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
28
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2) 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
29 30 31
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
32
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
33 34 35
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
36
     REAL,VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
37 38 39 40 41 42 43 44 45
     REAL,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
     REAL, ALLOCATABLE :: plegend(:,:)
     COMPLEX, ALLOCATABLE :: cph(:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
46
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
47
     REAL apw_lo1,apw_lo2,apw1,w1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
48 49
     INTEGER kii,ki,kj,l,nn,k
     INTEGER :: loop_start, loop_end, i, loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
50 51 52 53 54 55

     ALLOCATE(cph(MAXVAL(nv)))
     ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
     plegend=0.0
     plegend(:,0)=1.0

Uliana Alekseeva's avatar
Uliana Alekseeva committed
56 57 58 59 60 61 62 63 64 65 66 67
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_size = max(ki_end/(grid*block),1)
     if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
       kii=(ki-1)/ki_step+1
       ski = gvec(:,ki,jintsp) + qssbti
       !--->       legendre polynomials
       DO kj = 1,ki
          plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
       END DO
       DO l = 1,lmax - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = nn_start,nn_end
          tnn = tpi_const*taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
                  CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
       DO  l = 0,lmax
          !+APW
          IF (PRESENT(uds)) THEN
             w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
             apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                  fjkiln * us(l) * dus(l) )
             apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                  gjkiln * uds(l) * duds(l) )
          ENDIF
          !-APW
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
          ddnln =  ddn(l)
          elall = el(l)
          IF (l<=lnonsph) elall=elall-e_shift!(isp)
          DO kj = 1,ki
              fct  = plegend(kj,l)*fl2p1(l)*&
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
108 109
              fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )

Uliana Alekseeva's avatar
Uliana Alekseeva committed
110
              smat_data(kj,kii)=smat_data(kj,kii)+REAL(cph(kj))*fct
Uliana Alekseeva's avatar
Uliana Alekseeva committed
111
              hmat_data(kj,kii)=hmat_data(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125
              !+APW
              IF (PRESENT(uds)) THEN
                 apw1 = REAL(cph(kj)) * plegend(kj,l)  * &
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
              ENDIF
              !-APW
          ENDDO
       !--->          end loop over l
       ENDDO
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
     DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
126
  END SUBROUTINE HsmtSphGpuKernel_real
Uliana Alekseeva's avatar
Uliana Alekseeva committed
127

Uliana Alekseeva's avatar
Uliana Alekseeva committed
128
  ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_cmplx(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
129 130 131
                       lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
132 133 134
     INTEGER, VALUE, INTENT(IN) :: grid, block
     INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
     COMPLEX, VALUE, INTENT(IN)  :: chi
Uliana Alekseeva's avatar
Uliana Alekseeva committed
135
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
136
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
137 138 139
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
140
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
141 142 143
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
144
     REAL, VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
145 146 147 148 149 150 151 152
     COMPLEX,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
     REAL, ALLOCATABLE :: plegend(:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
153
     REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
154 155 156
     COMPLEX, ALLOCATABLE :: cph(:)
     REAL apw_lo1,apw_lo2,w1
     COMPLEX capw1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
157 158 159
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
     INTEGER kii,ki,kj,l,nn,kj_end,k
     INTEGER :: loop_start, loop_end, i, loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
160 161 162 163 164

     ALLOCATE(cph(MAXVAL(nv)))
     ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
     plegend=0.0
     plegend(:,0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
165
     ALLOCATE(VecHelpS(MAXVAL(nv)),VecHelpH(MAXVAL(nv)))
Uliana Alekseeva's avatar
Uliana Alekseeva committed
166

Uliana Alekseeva's avatar
Uliana Alekseeva committed
167 168 169 170 171 172 173 174 175 176 177 178
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_size = max(ki_end/(grid*block),1)
     if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
       kii=(ki-1)/ki_step+1
       ski = gvec(:,ki,jintsp) + qssbti
       !--->       legendre polynomials
       DO kj = 1,ki
          plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
       END DO
       DO l = 1,lmax - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = nn_start,nn_end
          tnn = tpi_const*taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
                  CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
Uliana Alekseeva's avatar
Uliana Alekseeva committed
201 202 203
       kj_end = MIN(ki,nv(iintsp)) 
       VecHelpS = 0.d0
       VecHelpH = 0.d0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
       DO  l = 0,lmax
          !+APW
          IF (PRESENT(uds)) THEN
             w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
             apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                  fjkiln * us(l) * dus(l) )
             apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                  gjkiln * uds(l) * duds(l) )
          ENDIF
          !-APW
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
          ddnln =  ddn(l)
          elall = el(l)
          IF (l<=lnonsph) elall=elall-e_shift!(isp)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
219 220 221 222 223 224 225
          DO kj = 1,kj_end
              fct  = plegend(kj,l)*fl2p1(l)*&
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
              fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
 
              VecHelpS(kj) = VecHelpS(kj) + fct
              VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
226 227 228 229 230 231 232 233 234 235
              !+APW
              IF (PRESENT(uds)) THEN
                 capw1 = cph(kj)*plegend(kj,l)&
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
              ENDIF
              !-APW
          END DO
       !--->          end loop over l
       ENDDO
Uliana Alekseeva's avatar
Uliana Alekseeva committed
236 237
       smat_data(:kj_end,kii)=smat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end)
       hmat_data(:kj_end,kii)=hmat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
238 239 240 241
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
     DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
242 243
     DEALLOCATE(VecHelpS,VecHelpH)
  END SUBROUTINE HsmtSphGpuKernel_cmplx
Uliana Alekseeva's avatar
Uliana Alekseeva committed
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263


  SUBROUTINE hsmt_sph_gpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE nvtx
    IMPLICIT NONE
    TYPE(t_input),INTENT(IN)      :: input
    TYPE(t_mpi),INTENT(IN)        :: mpi
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
    !     ..
    !     .. Array Arguments ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
264
    REAL,MANAGED,INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
265 266 267 268
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
    REAL,MANAGED,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
    !     ..
    !     .. Local Scalars ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
269
    INTEGER l
Uliana Alekseeva's avatar
Uliana Alekseeva committed
270
    INTEGER :: grid, block
Uliana Alekseeva's avatar
Uliana Alekseeva committed
271 272 273

    !     ..
    !     .. Local Arrays ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
274 275 276 277
    REAL,MANAGED :: fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL,MANAGED :: fl2p1bt(0:atoms%lmaxd)
    REAL,MANAGED :: qssbti(3),qssbtj(3)
    INTEGER, DEVICE :: nv_dev(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
278 279 280 281 282 283 284 285 286 287 288 289 290

    call nvtxStartRange("hsmt_sph",2)    
    CALL timestart("spherical setup")

    DO l = 0,atoms%lmaxd
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)

Uliana Alekseeva's avatar
Uliana Alekseeva committed
291 292 293 294
    ! pretty ugly solution
    nv_dev = lapw%nv
    block = 256
    grid = lapw%nv(jintsp)/(block*4) + 1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
295
    IF (input%l_useapw) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
296 297
       !TODO!!!!
       ! APW case is not testet
Uliana Alekseeva's avatar
Uliana Alekseeva committed
298
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
299
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
300
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
301
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
302 303 304 305
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_r,hmat%data_r,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
306
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
307 308 309 310 311 312
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_c,hmat%data_c,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
313 314
    ELSE
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
315
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
316
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
317
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
318
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
319
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
320
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
321 322 323
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
324 325 326 327 328 329 330 331 332 333
       ENDIF
    ENDIF
    CALL timestop("spherical setup")

    call nvtxEndRange
    RETURN
  END SUBROUTINE hsmt_sph_gpu
#endif

  SUBROUTINE hsmt_sph_cpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
334 335 336
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    IMPLICIT NONE
337
    TYPE(t_input),INTENT(IN)      :: input
Daniel Wortmann's avatar
Daniel Wortmann committed
338
    TYPE(t_mpi),INTENT(IN)        :: mpi
339 340
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
341 342 343
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
344 345
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
346 347
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
348 349
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
350
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
351
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
Daniel Wortmann's avatar
Daniel Wortmann committed
352
    REAL,    INTENT (IN) :: fj(:,0:,:),gj(:,0:,:)
353 354
    !     ..
    !     .. Local Scalars ..
355
    REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Daniel Wortmann's avatar
Daniel Wortmann committed
356
    REAL apw_lo1,apw_lo2,apw1,w1
357

Daniel Wortmann's avatar
Daniel Wortmann committed
358
    COMPLEX capw1
359
    INTEGER kii,ki,kj,l,nn,kj_end
360 361 362

    !     ..
    !     .. Local Arrays ..
Daniel Wortmann's avatar
Daniel Wortmann committed
363 364
    REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd)
365 366
    REAL qssbti(3),qssbtj(3)
    REAL, ALLOCATABLE :: plegend(:,:)
367
    REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Daniel Wortmann's avatar
Daniel Wortmann committed
368
    COMPLEX, ALLOCATABLE :: cph(:)
369 370
    LOGICAL apw(0:atoms%lmaxd)

Daniel Wortmann's avatar
Daniel Wortmann committed
371
    CALL timestart("spherical setup")
372 373

    DO l = 0,atoms%lmaxd
374 375 376
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
377 378
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
Daniel Wortmann's avatar
Daniel Wortmann committed
379
    !$OMP PARALLEL DEFAULT(SHARED)&
380
    !$OMP PRIVATE(kii,ki,ski,kj,plegend,l,kj_end)&
Daniel Wortmann's avatar
Daniel Wortmann committed
381 382
    !$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
383
    !$OMP PRIVATE(capw1,VecHelpS,VecHelpH) 
Daniel Wortmann's avatar
Daniel Wortmann committed
384 385
    ALLOCATE(cph(MAXVAL(lapw%nv)))
    ALLOCATE(plegend(MAXVAL(lapw%nv),0:atoms%lmaxd))
386
    ALLOCATE(VecHelpS(MAXVAL(lapw%nv)),VecHelpH(MAXVAL(lapw%nv)))
Daniel Wortmann's avatar
Daniel Wortmann committed
387 388
    plegend=0.0
    plegend(:,0)=1.0
389 390
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
391
    !$OMP  DO SCHEDULE(DYNAMIC,1)
392
    DO  ki =  mpi%n_rank+1, lapw%nv(jintsp), mpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
393
       kii=(ki-1)/mpi%n_size+1
394
       ski = lapw%gvec(:,ki,jintsp) + qssbti
Daniel Wortmann's avatar
Daniel Wortmann committed
395 396
       !--->       legendre polynomials
       DO kj = 1,ki
397
          plegend(kj,1) = DOT_PRODUCT(lapw%gk(:,kj,iintsp),lapw%gk(:,ki,jintsp))
Daniel Wortmann's avatar
Daniel Wortmann committed
398 399 400 401 402 403 404 405 406 407
       END DO
       DO l = 1,atoms%lmax(n) - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
          tnn = tpi_const*atoms%taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
408 409 410
                  CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(lapw%gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
Daniel Wortmann's avatar
Daniel Wortmann committed
411 412 413 414
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
415 416 417
       kj_end = MIN(ki,lapw%nv(iintsp)) 
       VecHelpS = 0.d0
       VecHelpH = 0.d0
Daniel Wortmann's avatar
Daniel Wortmann committed
418
       DO  l = 0,atoms%lmax(n)
419 420
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
421
          !
Daniel Wortmann's avatar
Daniel Wortmann committed
422 423 424 425 426 427 428 429 430 431 432
          IF (input%l_useapw) THEN
             w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                  usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
             apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                  fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
             apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                  gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
             !
          ENDIF
          ddnln =  usdus%ddn(l,n,isp)
          elall = el(l,n,isp)
433
          IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
Daniel Wortmann's avatar
Daniel Wortmann committed
434 435 436
          IF (smat%l_real) THEN
             DO kj = 1,ki
                fct  = plegend(kj,l)*fl2p1(l)*&
437
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
438 439
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )

Daniel Wortmann's avatar
Daniel Wortmann committed
440
                smat%data_r(kj,kii)=smat%data_r(kj,kii)+REAL(cph(kj))*fct
441
                hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
Daniel Wortmann's avatar
Daniel Wortmann committed
442 443 444 445 446
                !+APW
                IF (input%l_useapw) THEN
                   apw1 = REAL(cph(kj)) * plegend(kj,l)  * &
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + apw1
447
                ENDIF
Daniel Wortmann's avatar
Daniel Wortmann committed
448 449 450
                !-APW
             ENDDO
          ELSE
451 452
             DO kj = 1,kj_end
                fct  = plegend(kj,l)*fl2p1(l)*&
453
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
454 455 456 457
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
 
                VecHelpS(kj) = VecHelpS(kj) + fct
                VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Daniel Wortmann's avatar
Daniel Wortmann committed
458 459 460 461 462

                IF (input%l_useapw) THEN
                   capw1 = cph(kj)*plegend(kj,l)&
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_c(kj,kii)=hmat%data_c(kj,kii) + capw1
463 464 465
                ENDIF
             END DO

Daniel Wortmann's avatar
Daniel Wortmann committed
466
          ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
467
       !--->          end loop over l
Daniel Wortmann's avatar
Daniel Wortmann committed
468
       ENDDO
469 470 471 472
       IF (.not.smat%l_real) THEN
           smat%data_c(:kj_end,kii)=smat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end)
           hmat%data_c(:kj_end,kii)=hmat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
473
    !--->    end loop over ki
Daniel Wortmann's avatar
Daniel Wortmann committed
474 475
    ENDDO
    !$OMP END DO
476
    DEALLOCATE(plegend)
Daniel Wortmann's avatar
Daniel Wortmann committed
477
    DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
478
    DEALLOCATE(VecHelpS,VecHelpH)
479
    !$OMP END PARALLEL
Daniel Wortmann's avatar
Daniel Wortmann committed
480
    CALL timestop("spherical setup")
481 482

    RETURN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
483
  END SUBROUTINE hsmt_sph_cpu
484
END MODULE m_hsmt_sph