hsmt_sph.F90 19.3 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
8 9
  USE m_juDFT
  IMPLICIT NONE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
10 11 12 13 14 15 16 17

  INTERFACE hsmt_sph
    module procedure hsmt_sph_cpu
#ifdef CPP_GPU
    module procedure hsmt_sph_gpu
#endif
  END INTERFACE

18
CONTAINS
Uliana Alekseeva's avatar
Uliana Alekseeva committed
19 20

#ifdef CPP_GPU
Uliana Alekseeva's avatar
Uliana Alekseeva committed
21 22
  ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_real(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
                       lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
23 24
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
25 26
     INTEGER, VALUE, INTENT(IN) :: grid, block
     INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
Uliana Alekseeva's avatar
Uliana Alekseeva committed
27
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
28
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2) 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
29 30 31
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
32
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
33 34 35
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
36
     REAL,VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
37 38 39 40 41 42 43 44 45
     REAL,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
     REAL, ALLOCATABLE :: plegend(:,:)
     COMPLEX, ALLOCATABLE :: cph(:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
46
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
47
     REAL apw_lo1,apw_lo2,apw1,w1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
48 49
     INTEGER kii,ki,kj,l,nn,k
     INTEGER :: loop_start, loop_end, i, loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
50 51 52 53 54 55

     ALLOCATE(cph(MAXVAL(nv)))
     ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
     plegend=0.0
     plegend(:,0)=1.0

Uliana Alekseeva's avatar
Uliana Alekseeva committed
56 57 58 59 60 61 62 63 64 65 66 67
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_size = max(ki_end/(grid*block),1)
     if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
       kii=(ki-1)/ki_step+1
       ski = gvec(:,ki,jintsp) + qssbti
       !--->       legendre polynomials
       DO kj = 1,ki
          plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
       END DO
       DO l = 1,lmax - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = nn_start,nn_end
          tnn = tpi_const*taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
                  CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
       DO  l = 0,lmax
          !+APW
          IF (PRESENT(uds)) THEN
             w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
             apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                  fjkiln * us(l) * dus(l) )
             apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                  gjkiln * uds(l) * duds(l) )
          ENDIF
          !-APW
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
          ddnln =  ddn(l)
          elall = el(l)
          IF (l<=lnonsph) elall=elall-e_shift!(isp)
          DO kj = 1,ki
              fct  = plegend(kj,l)*fl2p1(l)*&
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
Uliana Alekseeva's avatar
Uliana Alekseeva committed
108 109
              fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )

Uliana Alekseeva's avatar
Uliana Alekseeva committed
110
              smat_data(kj,kii)=smat_data(kj,kii)+REAL(cph(kj))*fct
Uliana Alekseeva's avatar
Uliana Alekseeva committed
111
              hmat_data(kj,kii)=hmat_data(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125
              !+APW
              IF (PRESENT(uds)) THEN
                 apw1 = REAL(cph(kj)) * plegend(kj,l)  * &
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
              ENDIF
              !-APW
          ENDDO
       !--->          end loop over l
       ENDDO
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
     DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
126
  END SUBROUTINE HsmtSphGpuKernel_real
Uliana Alekseeva's avatar
Uliana Alekseeva committed
127

Uliana Alekseeva's avatar
Uliana Alekseeva committed
128
  ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_cmplx(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
129 130 131
                       lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
                       smat_data,hmat_data,&
                       uds,dus,us,duds,rmt)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
132 133 134
     INTEGER, VALUE, INTENT(IN) :: grid, block
     INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
     COMPLEX, VALUE, INTENT(IN)  :: chi
Uliana Alekseeva's avatar
Uliana Alekseeva committed
135
     REAL,INTENT(IN)    :: qssbti(3),qssbtj(3)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
136
     INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
137 138 139
     REAL,INTENT(IN)    :: gk(:,:,:)
     REAL,INTENT(IN)    :: fleg1(0:lmaxd),fleg2(0:lmaxd),fl2p1(0:lmaxd)     
     REAL,INTENT(IN)    :: fl2p1bt(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
140
     REAL,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
141 142 143
     REAL,INTENT(IN)    :: taual(:,:)  
     REAL,INTENT(IN)    :: ddn(0:lmaxd) 
     REAL,INTENT(IN)    :: el(0:lmaxd)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
144
     REAL, VALUE,INTENT(IN)    :: e_shift
Uliana Alekseeva's avatar
Uliana Alekseeva committed
145 146 147 148 149 150 151 152
     COMPLEX,INTENT(INOUT) :: smat_data(:,:),hmat_data(:,:)
     !+APW
     REAL,INTENT(IN),OPTIONAL   :: uds(0:lmaxd),dus(0:lmaxd),us(0:lmaxd),duds(0:lmaxd) 
     REAL,INTENT(IN),OPTIONAL   :: rmt
     !-APW

     REAL,   PARAMETER :: tpi_const=2.*3.1415926535897932
     REAL, ALLOCATABLE :: plegend(:,:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
153
     REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
154 155 156
     COMPLEX, ALLOCATABLE :: cph(:)
     REAL apw_lo1,apw_lo2,w1
     COMPLEX capw1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
157 158 159
     REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
     INTEGER kii,ki,kj,l,nn,kj_end,k
     INTEGER :: loop_start, loop_end, i, loop_size
Uliana Alekseeva's avatar
Uliana Alekseeva committed
160 161 162 163 164

     ALLOCATE(cph(MAXVAL(nv)))
     ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
     plegend=0.0
     plegend(:,0)=1.0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
165
     ALLOCATE(VecHelpS(MAXVAL(nv)),VecHelpH(MAXVAL(nv)))
Uliana Alekseeva's avatar
Uliana Alekseeva committed
166

Uliana Alekseeva's avatar
Uliana Alekseeva committed
167 168 169 170 171 172 173 174 175 176 177 178
     k = (blockidx%x-1)*blockdim%x + threadidx%x
 
     !TODO!!!     
     !for seq, i.e. ki_start = 1, ki_step = 1
     loop_size = max(ki_end/(grid*block),1)
     if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
     loop_start = (k-1) * loop_size + 1
     loop_end = loop_start + loop_size - 1
     if (loop_end > ki_end ) loop_end = ki_end

     DO ki = loop_start,loop_end,ki_step
     !DO  ki =  ki_start,ki_end,ki_step 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
       kii=(ki-1)/ki_step+1
       ski = gvec(:,ki,jintsp) + qssbti
       !--->       legendre polynomials
       DO kj = 1,ki
          plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
       END DO
       DO l = 1,lmax - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = nn_start,nn_end
          tnn = tpi_const*taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
                  CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
Uliana Alekseeva's avatar
Uliana Alekseeva committed
201 202 203
       kj_end = MIN(ki,nv(iintsp)) 
       VecHelpS = 0.d0
       VecHelpH = 0.d0
Uliana Alekseeva's avatar
Uliana Alekseeva committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
       DO  l = 0,lmax
          !+APW
          IF (PRESENT(uds)) THEN
             w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
             apw_lo1 = fl2p1(l) * 0.5 * rmt**2 * ( gjkiln * w1 +&
                  fjkiln * us(l) * dus(l) )
             apw_lo2 = fl2p1(l) * 0.5 * rmt**2 * ( fjkiln * w1 +&
                  gjkiln * uds(l) * duds(l) )
          ENDIF
          !-APW
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
          ddnln =  ddn(l)
          elall = el(l)
          IF (l<=lnonsph) elall=elall-e_shift!(isp)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
219 220 221 222 223 224 225
          DO kj = 1,kj_end
              fct  = plegend(kj,l)*fl2p1(l)*&
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
              fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
 
              VecHelpS(kj) = VecHelpS(kj) + fct
              VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Uliana Alekseeva's avatar
Uliana Alekseeva committed
226 227 228 229 230 231 232 233 234 235
              !+APW
              IF (PRESENT(uds)) THEN
                 capw1 = cph(kj)*plegend(kj,l)&
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                 hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
              ENDIF
              !-APW
          END DO
       !--->          end loop over l
       ENDDO
Uliana Alekseeva's avatar
Uliana Alekseeva committed
236 237
       smat_data(:kj_end,kii)=smat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end)
       hmat_data(:kj_end,kii)=hmat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
238 239 240 241
     !--->    end loop over ki
     ENDDO
     DEALLOCATE(plegend)
     DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
242 243
     DEALLOCATE(VecHelpS,VecHelpH)
  END SUBROUTINE HsmtSphGpuKernel_cmplx
Uliana Alekseeva's avatar
Uliana Alekseeva committed
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263


  SUBROUTINE hsmt_sph_gpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    USE nvtx
    IMPLICIT NONE
    TYPE(t_input),INTENT(IN)      :: input
    TYPE(t_mpi),INTENT(IN)        :: mpi
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
    !     ..
    !     .. Array Arguments ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
264
    REAL,MANAGED,INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
265 266 267 268
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
    REAL,MANAGED,INTENT(IN)    :: fj(:,0:,:),gj(:,0:,:)
    !     ..
    !     .. Local Scalars ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
269
    INTEGER l
Uliana Alekseeva's avatar
Uliana Alekseeva committed
270
    INTEGER :: grid, block
Uliana Alekseeva's avatar
Uliana Alekseeva committed
271 272 273

    !     ..
    !     .. Local Arrays ..
Uliana Alekseeva's avatar
Uliana Alekseeva committed
274 275 276 277
    REAL,MANAGED :: fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL,MANAGED :: fl2p1bt(0:atoms%lmaxd)
    REAL,MANAGED :: qssbti(3),qssbtj(3)
    INTEGER, DEVICE :: nv_dev(2)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
278 279 280 281 282 283 284 285 286 287 288 289 290

    call nvtxStartRange("hsmt_sph",2)    
    CALL timestart("spherical setup")

    DO l = 0,atoms%lmaxd
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)

Uliana Alekseeva's avatar
Uliana Alekseeva committed
291 292 293 294
    ! pretty ugly solution
    nv_dev = lapw%nv
    block = 256
    grid = lapw%nv(jintsp)/(block*4) + 1
Uliana Alekseeva's avatar
Uliana Alekseeva committed
295
    IF (input%l_useapw) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
296 297
       !TODO!!!!
       ! APW case is not testet
Uliana Alekseeva's avatar
Uliana Alekseeva committed
298
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
299
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
300
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
301
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
302 303 304 305
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_r,hmat%data_r,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
306
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
307 308 309 310 311 312
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
                    smat%data_c,hmat%data_c,&
                    usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
313 314
    ELSE
       IF (smat%l_real) THEN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
315
          CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
316
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
317
                    qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
318
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
319
       ELSE
Uliana Alekseeva's avatar
Uliana Alekseeva committed
320
          CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
Uliana Alekseeva's avatar
Uliana Alekseeva committed
321 322 323
                    lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
                    chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
                    usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
324 325 326 327 328 329 330 331 332 333
       ENDIF
    ENDIF
    CALL timestop("spherical setup")

    call nvtxEndRange
    RETURN
  END SUBROUTINE hsmt_sph_gpu
#endif

  SUBROUTINE hsmt_sph_cpu(n,atoms,mpi,isp,input,noco,iintsp,jintsp,chi,lapw,el,e_shift,usdus,fj,gj,smat,hmat)
334 335 336
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_types
    IMPLICIT NONE
337
    TYPE(t_input),INTENT(IN)      :: input
Daniel Wortmann's avatar
Daniel Wortmann committed
338
    TYPE(t_mpi),INTENT(IN)        :: mpi
339 340
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_atoms),INTENT(IN)      :: atoms
Daniel Wortmann's avatar
Daniel Wortmann committed
341 342 343
    TYPE(t_lapw),INTENT(IN)       :: lapw
    TYPE(t_usdus),INTENT(IN)      :: usdus
    CLASS(t_mat),INTENT(INOUT)     :: smat,hmat
344 345
    !     ..
    !     .. Scalar Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
346 347
    INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
    COMPLEX, INTENT(IN)  :: chi
348 349
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
350
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,input%jspins)
351
    REAL,    INTENT (IN) :: e_shift!(atoms%ntype,input%jspins)
Daniel Wortmann's avatar
Daniel Wortmann committed
352
    REAL,    INTENT (IN) :: fj(:,0:,:),gj(:,0:,:)
353 354
    !     ..
    !     .. Local Scalars ..
355
    REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
Daniel Wortmann's avatar
Daniel Wortmann committed
356
    REAL apw_lo1,apw_lo2,apw1,w1
357

Daniel Wortmann's avatar
Daniel Wortmann committed
358
    COMPLEX capw1
359
    INTEGER kii,ki,kj,l,nn,kj_end
360 361 362

    !     ..
    !     .. Local Arrays ..
Daniel Wortmann's avatar
Daniel Wortmann committed
363 364
    REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd)
365 366
    REAL qssbti(3),qssbtj(3)
    REAL, ALLOCATABLE :: plegend(:,:)
367
    REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:)
Daniel Wortmann's avatar
Daniel Wortmann committed
368
    COMPLEX, ALLOCATABLE :: cph(:)
369 370
    LOGICAL apw(0:atoms%lmaxd)

Daniel Wortmann's avatar
Daniel Wortmann committed
371
    CALL timestart("spherical setup")
372 373

    DO l = 0,atoms%lmaxd
374 375 376
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
377 378
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
Gregor Michalicek's avatar
Gregor Michalicek committed
379 380 381 382
    !$OMP PARALLEL DEFAULT(NONE)&
    !$OMP SHARED(lapw,atoms,noco,mpi,input,usdus,smat,hmat)&
    !$OMP SHARED(jintsp,iintsp,n,fleg1,fleg2,fj,gj,isp,fl2p1,el,e_shift,fl2p1bt,chi)&
    !$OMP PRIVATE(kii,ki,ski,kj,plegend,l,kj_end,qssbti,qssbtj,fct2)&
Daniel Wortmann's avatar
Daniel Wortmann committed
383 384
    !$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
385
    !$OMP PRIVATE(capw1,VecHelpS,VecHelpH) 
Daniel Wortmann's avatar
Daniel Wortmann committed
386 387
    ALLOCATE(cph(MAXVAL(lapw%nv)))
    ALLOCATE(plegend(MAXVAL(lapw%nv),0:atoms%lmaxd))
388
    ALLOCATE(VecHelpS(MAXVAL(lapw%nv)),VecHelpH(MAXVAL(lapw%nv)))
Daniel Wortmann's avatar
Daniel Wortmann committed
389 390
    plegend=0.0
    plegend(:,0)=1.0
391 392
    qssbti=MERGE(- noco%qss/2,+ noco%qss/2,jintsp.EQ.1)
    qssbtj=MERGE(- noco%qss/2,+ noco%qss/2,iintsp.EQ.1)
Daniel Wortmann's avatar
Daniel Wortmann committed
393
    !$OMP  DO SCHEDULE(DYNAMIC,1)
394
    DO  ki =  mpi%n_rank+1, lapw%nv(jintsp), mpi%n_size
Daniel Wortmann's avatar
Daniel Wortmann committed
395
       kii=(ki-1)/mpi%n_size+1
396
       ski = lapw%gvec(:,ki,jintsp) + qssbti
Daniel Wortmann's avatar
Daniel Wortmann committed
397 398
       !--->       legendre polynomials
       DO kj = 1,ki
399
          plegend(kj,1) = DOT_PRODUCT(lapw%gk(:,kj,iintsp),lapw%gk(:,ki,jintsp))
Daniel Wortmann's avatar
Daniel Wortmann committed
400 401 402 403 404 405 406 407 408 409
       END DO
       DO l = 1,atoms%lmax(n) - 1
          plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
       END DO
       !--->             set up phase factors
       cph = 0.0
       DO nn = SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n))
          tnn = tpi_const*atoms%taual(:,nn)
          DO kj = 1,ki
             cph(kj) = cph(kj) +&
410 411 412
                  CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,iintsp)-qssbtj,tnn)),&
                  SIN(DOT_PRODUCT(lapw%gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
            ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
Daniel Wortmann's avatar
Daniel Wortmann committed
413 414 415 416
          END DO
       END DO

       !--->          update overlap and l-diagonal hamiltonian matrix
417 418 419
       kj_end = MIN(ki,lapw%nv(iintsp)) 
       VecHelpS = 0.d0
       VecHelpH = 0.d0
Daniel Wortmann's avatar
Daniel Wortmann committed
420
       DO  l = 0,atoms%lmax(n)
421 422
          fjkiln = fj(ki,l,jintsp)
          gjkiln = gj(ki,l,jintsp)
423
          !
Daniel Wortmann's avatar
Daniel Wortmann committed
424 425 426 427 428 429 430 431 432 433 434
          IF (input%l_useapw) THEN
             w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                  usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
             apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                  fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
             apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                  gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
             !
          ENDIF
          ddnln =  usdus%ddn(l,n,isp)
          elall = el(l,n,isp)
435
          IF (l<=atoms%lnonsph(n)) elall=elall-e_shift!(isp)
Daniel Wortmann's avatar
Daniel Wortmann committed
436 437 438
          IF (smat%l_real) THEN
             DO kj = 1,ki
                fct  = plegend(kj,l)*fl2p1(l)*&
439
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
440 441
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )

Daniel Wortmann's avatar
Daniel Wortmann committed
442
                smat%data_r(kj,kii)=smat%data_r(kj,kii)+REAL(cph(kj))*fct
443
                hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2)
Daniel Wortmann's avatar
Daniel Wortmann committed
444 445 446 447 448
                !+APW
                IF (input%l_useapw) THEN
                   apw1 = REAL(cph(kj)) * plegend(kj,l)  * &
                        ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_r(kj,kii)=hmat%data_r(kj,kii) + apw1
449
                ENDIF
Daniel Wortmann's avatar
Daniel Wortmann committed
450 451 452
                !-APW
             ENDDO
          ELSE
453 454
             DO kj = 1,kj_end
                fct  = plegend(kj,l)*fl2p1(l)*&
455
                     ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
456 457 458 459
                fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
 
                VecHelpS(kj) = VecHelpS(kj) + fct
                VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 
Daniel Wortmann's avatar
Daniel Wortmann committed
460 461 462 463 464

                IF (input%l_useapw) THEN
                   capw1 = cph(kj)*plegend(kj,l)&
                        * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
                   hmat%data_c(kj,kii)=hmat%data_c(kj,kii) + capw1
465 466 467
                ENDIF
             END DO

Daniel Wortmann's avatar
Daniel Wortmann committed
468
          ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
469
       !--->          end loop over l
Daniel Wortmann's avatar
Daniel Wortmann committed
470
       ENDDO
471 472 473 474
       IF (.not.smat%l_real) THEN
           smat%data_c(:kj_end,kii)=smat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end)
           hmat%data_c(:kj_end,kii)=hmat%data_c(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
       ENDIF
Uliana Alekseeva's avatar
Uliana Alekseeva committed
475
    !--->    end loop over ki
Daniel Wortmann's avatar
Daniel Wortmann committed
476 477
    ENDDO
    !$OMP END DO
478
    DEALLOCATE(plegend)
Daniel Wortmann's avatar
Daniel Wortmann committed
479
    DEALLOCATE(cph)
Uliana Alekseeva's avatar
Uliana Alekseeva committed
480
    DEALLOCATE(VecHelpS,VecHelpH)
481
    !$OMP END PARALLEL
Daniel Wortmann's avatar
Daniel Wortmann committed
482
    CALL timestop("spherical setup")
483 484

    RETURN
Uliana Alekseeva's avatar
Uliana Alekseeva committed
485
  END SUBROUTINE hsmt_sph_cpu
486
END MODULE m_hsmt_sph