Commit f7713d61 authored by Daniel Wortmann's avatar Daniel Wortmann

Merge branch 'develop' of iffgit.fz-juelich.de:fleur/fleur into develop

parents 7a9c2342 ef8fa644
...@@ -18,11 +18,12 @@ MODULE m_hsmt_sph ...@@ -18,11 +18,12 @@ MODULE m_hsmt_sph
CONTAINS CONTAINS
#ifdef CPP_GPU #ifdef CPP_GPU
ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_real(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,& ATTRIBUTES(global)&
SUBROUTINE HsmtSphGpuKernel_real(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,& lnonsph,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
smat_data,hmat_data,& smat_data,hmat_data,&
uds,dus,us,duds,rmt) uds,dus,us,duds,rmt)
INTEGER, VALUE, INTENT(IN) :: grid, block INTEGER, VALUE, INTENT(IN) :: loop_size
INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph INTEGER,VALUE,INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
REAL,INTENT(IN) :: qssbti(3),qssbtj(3) REAL,INTENT(IN) :: qssbti(3),qssbtj(3)
INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2) INTEGER,INTENT(IN) :: gvec(:,:,:),nv(2)
...@@ -41,53 +42,45 @@ CONTAINS ...@@ -41,53 +42,45 @@ CONTAINS
!-APW !-APW
REAL, PARAMETER :: tpi_const=2.*3.1415926535897932 REAL, PARAMETER :: tpi_const=2.*3.1415926535897932
REAL, ALLOCATABLE :: plegend(:,:) REAL, ALLOCATABLE :: plegend(:)
COMPLEX, ALLOCATABLE :: cph(:) REAL cph
REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3) REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
REAL apw_lo1,apw_lo2,apw1,w1 REAL apw_lo1,apw_lo2,apw1,w1
INTEGER kii,ki,kj,l,nn,k INTEGER kii,ki,kj,l,nn,k
INTEGER :: loop_start, loop_end, i, loop_size INTEGER :: loop_start, loop_end, i
ALLOCATE(cph(MAXVAL(nv))) ALLOCATE(plegend(0:lmaxd))
ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
plegend=0.0 plegend=0.0
plegend(:,0)=1.0 plegend(0)=1.0
k = (blockidx%x-1)*blockdim%x + threadidx%x k = (blockidx%x-1)*blockdim%x + threadidx%x
!TODO!!! !TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1 !for seq, i.e. ki_start = 1, ki_step = 1
loop_size = max(ki_end/(grid*block),1)
if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
loop_start = (k-1) * loop_size + 1 loop_start = (k-1) * loop_size + 1
loop_end = loop_start + loop_size - 1 loop_end = loop_start + loop_size - 1
if (loop_end > ki_end ) loop_end = ki_end if (loop_end > ki_end ) loop_end = ki_end
DO ki = loop_start,loop_end,ki_step DO ki = loop_start,loop_end,ki_step
!DO ki = ki_start,ki_end,ki_step !DO ki = ki_start,ki_end,ki_step
DO kj = 1,ki
kii=(ki-1)/ki_step+1 kii=(ki-1)/ki_step+1
ski = gvec(:,ki,jintsp) + qssbti ski = gvec(:,ki,jintsp) + qssbti
!---> legendre polynomials
DO kj = 1,ki
plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
END DO
DO l = 1,lmax - 1
plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
END DO
!---> set up phase factors !---> set up phase factors
cph = 0.0 cph = 0.0
DO nn = nn_start,nn_end DO nn = nn_start,nn_end
tnn = tpi_const*taual(:,nn) tnn = tpi_const*taual(:,nn)
DO kj = 1,ki cph = cph + COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn))
cph(kj) = cph(kj) +&
CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj)) ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
ENDDO
!---> legendre polynomials
plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
DO l = 1,lmax - 1
plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
END DO END DO
END DO
!---> update overlap and l-diagonal hamiltonian matrix
DO l = 0,lmax DO l = 0,lmax
fjkiln = fj(ki,l,jintsp)
gjkiln = gj(ki,l,jintsp)
!+APW !+APW
IF (PRESENT(uds)) THEN IF (PRESENT(uds)) THEN
w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) ) w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
...@@ -97,39 +90,38 @@ CONTAINS ...@@ -97,39 +90,38 @@ CONTAINS
gjkiln * uds(l) * duds(l) ) gjkiln * uds(l) * duds(l) )
ENDIF ENDIF
!-APW !-APW
fjkiln = fj(ki,l,jintsp)
gjkiln = gj(ki,l,jintsp)
ddnln = ddn(l) ddnln = ddn(l)
elall = el(l) elall = el(l)
IF (l<=lnonsph) elall=elall-e_shift!(isp) IF (l<=lnonsph) elall=elall-e_shift!(isp)
DO kj = 1,ki !DO kj = 1,ki
fct = plegend(kj,l)*fl2p1(l)*& fct = plegend(l)*fl2p1(l)*&
( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln ) ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) ) fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
smat_data(kj,kii)=smat_data(kj,kii)+REAL(cph(kj))*fct smat_data(kj,kii)=smat_data(kj,kii)+cph*fct
hmat_data(kj,kii)=hmat_data(kj,kii) + REAL(cph(kj)) * ( fct * elall + fct2) hmat_data(kj,kii)=hmat_data(kj,kii) + cph * ( fct * elall + fct2)
!+APW !+APW
IF (PRESENT(uds)) THEN IF (PRESENT(uds)) THEN
apw1 = REAL(cph(kj)) * plegend(kj,l) * & apw1 = cph * plegend(l) * &
( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) ) ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
hmat_data(kj,kii)=hmat_data(kj,kii) + apw1 hmat_data(kj,kii)=hmat_data(kj,kii) + apw1
ENDIF ENDIF
!-APW !-APW
ENDDO !ENDDO
!---> end loop over l !---> end loop over l
ENDDO ENDDO
ENDDO
!---> end loop over ki !---> end loop over ki
ENDDO ENDDO
DEALLOCATE(plegend) DEALLOCATE(plegend)
DEALLOCATE(cph)
END SUBROUTINE HsmtSphGpuKernel_real END SUBROUTINE HsmtSphGpuKernel_real
ATTRIBUTES(global) SUBROUTINE HsmtSphGpuKernel_cmplx(grid,block,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,& ATTRIBUTES(global)&
SUBROUTINE HsmtSphGpuKernel_cmplx(loop_size,iintsp,jintsp,nv,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,&
lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,& lnonsph,chi,qssbti,qssbtj,gvec,gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,taual,ddn,el,e_shift,&
smat_data,hmat_data,& smat_data,hmat_data,&
uds,dus,us,duds,rmt) uds,dus,us,duds,rmt)
INTEGER, VALUE, INTENT(IN) :: grid, block INTEGER, VALUE, INTENT(IN) :: loop_size
INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph INTEGER, VALUE, INTENT(IN) :: iintsp,jintsp,lmaxd,lmax,ki_start,ki_end,ki_step,nn_start,nn_end,lnonsph
COMPLEX, VALUE, INTENT(IN) :: chi COMPLEX, VALUE, INTENT(IN) :: chi
REAL,INTENT(IN) :: qssbti(3),qssbtj(3) REAL,INTENT(IN) :: qssbti(3),qssbtj(3)
...@@ -149,59 +141,50 @@ CONTAINS ...@@ -149,59 +141,50 @@ CONTAINS
!-APW !-APW
REAL, PARAMETER :: tpi_const=2.*3.1415926535897932 REAL, PARAMETER :: tpi_const=2.*3.1415926535897932
REAL, ALLOCATABLE :: plegend(:,:) REAL, ALLOCATABLE :: plegend(:)
REAL, ALLOCATABLE :: VecHelpS(:),VecHelpH(:) COMPLEX :: cph
COMPLEX, ALLOCATABLE :: cph(:)
REAL apw_lo1,apw_lo2,w1 REAL apw_lo1,apw_lo2,w1
COMPLEX capw1 COMPLEX capw1
REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3) REAL tnn(3), elall,fct,fct2,fjkiln,gjkiln,ddnln,ski(3)
INTEGER kii,ki,kj,l,nn,kj_end,k INTEGER kii,ki,kj,kjj,l,nn,kj_end,k
INTEGER :: loop_start, loop_end, i, loop_size INTEGER :: loop_start, loop_end, i
ALLOCATE(cph(MAXVAL(nv))) ALLOCATE(plegend(0:lmaxd))
ALLOCATE(plegend(MAXVAL(nv),0:lmaxd))
plegend=0.0 plegend=0.0
plegend(:,0)=1.0 plegend(0)=1.0
ALLOCATE(VecHelpS(MAXVAL(nv)),VecHelpH(MAXVAL(nv)))
k = (blockidx%x-1)*blockdim%x + threadidx%x k = (blockidx%x-1)*blockdim%x + threadidx%x
!TODO!!! !TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1 !for seq, i.e. ki_start = 1, ki_step = 1
loop_size = max(ki_end/(grid*block),1)
if (loop_size * grid*block < ki_end) loop_size = loop_size + 1
loop_start = (k-1) * loop_size + 1 loop_start = (k-1) * loop_size + 1
loop_end = loop_start + loop_size - 1 loop_end = loop_start + loop_size - 1
if (loop_end > ki_end ) loop_end = ki_end if (loop_end > ki_end ) loop_end = ki_end
DO ki = loop_start,loop_end,ki_step DO ki = loop_start,loop_end,ki_step
!DO ki = ki_start,ki_end,ki_step !DO ki = ki_start,ki_end,ki_step
kj_end = MIN(ki,nv(iintsp))
DO kj = 1,kj_end
kii=(ki-1)/ki_step+1 kii=(ki-1)/ki_step+1
ski = gvec(:,ki,jintsp) + qssbti ski = gvec(:,ki,jintsp) + qssbti
!---> legendre polynomials
DO kj = 1,ki
plegend(kj,1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
END DO
DO l = 1,lmax - 1
plegend(:ki,l+1) = fleg1(l)*plegend(:ki,1)*plegend(:ki,l) - fleg2(l)*plegend(:ki,l-1)
END DO
!---> set up phase factors !---> set up phase factors
cph = 0.0 cph = 0.0
DO nn = nn_start,nn_end DO nn = nn_start,nn_end
tnn = tpi_const*taual(:,nn) tnn = tpi_const*taual(:,nn)
DO kj = 1,ki cph = cph +&
cph(kj) = cph(kj) +&
CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),& CMPLX(COS(DOT_PRODUCT(ski-gvec(:,kj,iintsp)-qssbtj,tnn)),&
SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn))) SIN(DOT_PRODUCT(gvec(:,kj,iintsp)+qssbtj-ski,tnn)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj)) ! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
ENDDO
!---> legendre polynomials
plegend(1) = DOT_PRODUCT(gk(:,kj,iintsp),gk(:,ki,jintsp))
DO l = 1,lmax - 1
plegend(l+1) = fleg1(l)*plegend(1)*plegend(l) - fleg2(l)*plegend(l-1)
END DO END DO
END DO
!---> update overlap and l-diagonal hamiltonian matrix
kj_end = MIN(ki,nv(iintsp))
VecHelpS = 0.d0
VecHelpH = 0.d0
DO l = 0,lmax DO l = 0,lmax
fjkiln = fj(ki,l,jintsp)
gjkiln = gj(ki,l,jintsp)
!+APW !+APW
IF (PRESENT(uds)) THEN IF (PRESENT(uds)) THEN
w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) ) w1 = 0.5 * ( uds(l)*dus(l) + us(l)*duds(l) )
...@@ -211,35 +194,28 @@ CONTAINS ...@@ -211,35 +194,28 @@ CONTAINS
gjkiln * uds(l) * duds(l) ) gjkiln * uds(l) * duds(l) )
ENDIF ENDIF
!-APW !-APW
fjkiln = fj(ki,l,jintsp)
gjkiln = gj(ki,l,jintsp)
ddnln = ddn(l) ddnln = ddn(l)
elall = el(l) elall = el(l)
IF (l<=lnonsph) elall=elall-e_shift!(isp) IF (l<=lnonsph) elall=elall-e_shift!(isp)
DO kj = 1,kj_end fct = plegend(l)*fl2p1(l)*&
fct = plegend(kj,l)*fl2p1(l)*&
( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln ) ( fjkiln*fj(kj,l,iintsp) + gjkiln*gj(kj,l,iintsp)*ddnln )
fct2 = plegend(kj,l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) ) fct2 = plegend(l)*fl2p1bt(l) * ( fjkiln*gj(kj,l,iintsp) + gjkiln*fj(kj,l,iintsp) )
VecHelpS(kj) = VecHelpS(kj) + fct smat_data(kj,kii)=smat_data(kj,kii) + chi * cph * fct
VecHelpH(kj) = VecHelpH(kj) + fct*elall + fct2 hmat_data(kj,kii)=hmat_data(kj,kii) + chi * cph * ( fct * elall + fct2)
!+APW !+APW
IF (PRESENT(uds)) THEN IF (PRESENT(uds)) THEN
capw1 = cph(kj)*plegend(kj,l)& capw1 = cph*plegend(l)&
* ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) ) * ( apw_lo1 * fj(kj,l,iintsp) + apw_lo2 * gj(kj,l,iintsp) )
hmat_data(kj,kii)=hmat_data(kj,kii) + capw1 hmat_data(kj,kii)=hmat_data(kj,kii) + capw1
ENDIF ENDIF
!-APW !-APW
END DO
!---> end loop over l !---> end loop over l
ENDDO ENDDO
smat_data(:kj_end,kii)=smat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpS(:kj_end) ENDDO
hmat_data(:kj_end,kii)=hmat_data(:kj_end,kii) + chi*cph(:kj_end) * VecHelpH(:kj_end)
!---> end loop over ki !---> end loop over ki
ENDDO ENDDO
DEALLOCATE(plegend) DEALLOCATE(plegend)
DEALLOCATE(cph)
DEALLOCATE(VecHelpS,VecHelpH)
END SUBROUTINE HsmtSphGpuKernel_cmplx END SUBROUTINE HsmtSphGpuKernel_cmplx
...@@ -267,7 +243,7 @@ CONTAINS ...@@ -267,7 +243,7 @@ CONTAINS
! .. ! ..
! .. Local Scalars .. ! .. Local Scalars ..
INTEGER l INTEGER l
INTEGER :: grid, block INTEGER :: grid, block, loop_size
! .. ! ..
! .. Local Arrays .. ! .. Local Arrays ..
...@@ -278,6 +254,7 @@ CONTAINS ...@@ -278,6 +254,7 @@ CONTAINS
call nvtxStartRange("hsmt_sph",2) call nvtxStartRange("hsmt_sph",2)
CALL timestart("spherical setup") CALL timestart("spherical setup")
print*, "HsmtSph_GPU"
DO l = 0,atoms%lmaxd DO l = 0,atoms%lmaxd
fleg1(l) = REAL(l+l+1)/REAL(l+1) fleg1(l) = REAL(l+l+1)/REAL(l+1)
...@@ -290,20 +267,25 @@ CONTAINS ...@@ -290,20 +267,25 @@ CONTAINS
! pretty ugly solution ! pretty ugly solution
nv_dev = lapw%nv nv_dev = lapw%nv
block = 256 loop_size = 1
grid = lapw%nv(jintsp)/(block*4) + 1 block = 32 ! number of threads in a block
grid = ceiling(real(lapw%nv(jintsp))/(loop_SIZE*block))
!loop_size = max(lapw%nv(jintsp)/(grid*block),1) !number of iterations performed by each thread
!if (loop_size * grid*block < lapw%nv(jintsp)) loop_size = loop_size + 1
IF (input%l_useapw) THEN IF (input%l_useapw) THEN
!TODO!!!! !TODO!!!!
! APW case is not testet ! APW case is not testet
IF (smat%l_real) THEN IF (smat%l_real) THEN
CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,& CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),& lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,& qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
usdus%ddn(:,n,isp),el(:,n,isp),e_shift,& usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
smat%data_r,hmat%data_r,& smat%data_r,hmat%data_r,&
usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n)) usdus%uds(:,n,isp),usdus%dus(:,n,isp),usdus%us(:,n,isp),usdus%duds(:,n,isp),atoms%rmt(n))
ELSE ELSE
CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,& CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),& lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,& chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
usdus%ddn(:,n,isp),el(:,n,isp),e_shift,& usdus%ddn(:,n,isp),el(:,n,isp),e_shift,&
...@@ -312,12 +294,14 @@ CONTAINS ...@@ -312,12 +294,14 @@ CONTAINS
ENDIF ENDIF
ELSE ELSE
IF (smat%l_real) THEN IF (smat%l_real) THEN
CALL HsmtSphGpuKernel_real<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,& CALL HsmtSphGpuKernel_real<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),& lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,& qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r) usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_r,hmat%data_r)
ELSE ELSE
CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(grid,block,iintsp,jintsp,nv_dev,atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,& CALL HsmtSphGpuKernel_cmplx<<<grid,block>>>(loop_size,iintsp,jintsp,nv_dev,&
atoms%lmaxd,atoms%lmax(n),mpi%n_rank+1,&
lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),& lapw%nv(jintsp), mpi%n_size,SUM(atoms%neq(:n-1))+1,SUM(atoms%neq(:n)),atoms%lnonsph(n),&
chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,& chi,qssbti,qssbtj,lapw%gvec,lapw%gk,fleg1,fleg2,fl2p1,fl2p1bt,fj,gj,atoms%taual,&
usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c) usdus%ddn(:,n,isp),el(:,n,isp),e_shift,smat%data_c,hmat%data_c)
......
...@@ -68,7 +68,6 @@ MODULE m_banddos_io ...@@ -68,7 +68,6 @@ MODULE m_banddos_io
INTEGER :: hdfError, dimsInt(7) INTEGER :: hdfError, dimsInt(7)
INTEGER :: version INTEGER :: version
INTEGER :: fakeLogical
REAL :: eFermiPrev REAL :: eFermiPrev
LOGICAL :: l_error LOGICAL :: l_error
...@@ -100,9 +99,7 @@ MODULE m_banddos_io ...@@ -100,9 +99,7 @@ MODULE m_banddos_io
CALL h5gcreate_f(fileID, '/general', generalGroupID, hdfError) CALL h5gcreate_f(fileID, '/general', generalGroupID, hdfError)
CALL io_write_attint0(generalGroupID,'spins',input%jspins) CALL io_write_attint0(generalGroupID,'spins',input%jspins)
CALL io_write_attreal0(generalGroupID,'lastFermiEnergy',eFermiPrev) CALL io_write_attreal0(generalGroupID,'lastFermiEnergy',eFermiPrev)
fakeLogical = 0 CALL io_write_attlog0(generalGroupID,'bandUnfolding',banddos%unfoldband)
IF (banddos%unfoldband) fakeLogical = 1
CALL io_write_attint0(generalGroupID,'bandUnfolding',fakeLogical)
CALL h5gclose_f(generalGroupID, hdfError) CALL h5gclose_f(generalGroupID, hdfError)
CALL h5gcreate_f(fileID, '/cell', cellGroupID, hdfError) CALL h5gcreate_f(fileID, '/cell', cellGroupID, hdfError)
...@@ -249,6 +246,7 @@ MODULE m_banddos_io ...@@ -249,6 +246,7 @@ MODULE m_banddos_io
INTEGER(HID_T) :: jsymSpaceID, jsymSetID INTEGER(HID_T) :: jsymSpaceID, jsymSetID
INTEGER(HID_T) :: ksymSpaceID, ksymSetID INTEGER(HID_T) :: ksymSpaceID, ksymSetID
INTEGER(HID_T) :: bUWeightsSpaceID, bUWeightsSetID INTEGER(HID_T) :: bUWeightsSpaceID, bUWeightsSetID
INTEGER(HID_T) :: supercellSpaceID, supercellSetID
INTEGER :: hdfError, dimsInt(7) INTEGER :: hdfError, dimsInt(7)
...@@ -306,6 +304,14 @@ MODULE m_banddos_io ...@@ -306,6 +304,14 @@ MODULE m_banddos_io
IF (banddos%unfoldband) THEN IF (banddos%unfoldband) THEN
CALL h5gcreate_f(fileID, '/bandUnfolding', bandUnfoldingGroupID, hdfError) CALL h5gcreate_f(fileID, '/bandUnfolding', bandUnfoldingGroupID, hdfError)
dims(:1)=(/3/)
dimsInt = dims
CALL h5screate_simple_f(1,dims(:1),supercellSpaceID,hdfError)
CALL h5dcreate_f(bandUnfoldingGroupID, "supercell", H5T_NATIVE_INTEGER, supercellSpaceID, supercellSetID, hdfError)
CALL h5sclose_f(supercellSpaceID,hdfError)
CALL io_write_integer1(supercellSetID,(/1/),dimsInt(:1),(/banddos%s_cell_x,banddos%s_cell_y,banddos%s_cell_z/))
CALL h5dclose_f(supercellSetID, hdfError)
dims(:3)=(/neigd,kpts%nkpt,input%jspins/) dims(:3)=(/neigd,kpts%nkpt,input%jspins/)
dimsInt = dims dimsInt = dims
CALL h5screate_simple_f(3,dims(:3),bUWeightsSpaceID,hdfError) CALL h5screate_simple_f(3,dims(:3),bUWeightsSpaceID,hdfError)
......
...@@ -8,7 +8,7 @@ MODULE m_writeBasis ...@@ -8,7 +8,7 @@ MODULE m_writeBasis
CONTAINS CONTAINS
SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,results,eig_id,oneD) SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,results,eig_id,oneD,sphhar,stars,vacuum)
USE m_types USE m_types
USE m_juDFT USE m_juDFT
...@@ -17,7 +17,7 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r ...@@ -17,7 +17,7 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r
USE m_hdf_tools USE m_hdf_tools
#endif #endif
USE m_genmtbasis USE m_genmtbasis
! USE m_cdn_io USE m_pot_io
USE m_abcof USE m_abcof
USE m_eig66_io, ONLY : read_eig USE m_eig66_io, ONLY : read_eig
...@@ -26,8 +26,10 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r ...@@ -26,8 +26,10 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r
TYPE(t_dimension),INTENT(IN) :: DIMENSION TYPE(t_dimension),INTENT(IN) :: DIMENSION
TYPE(t_enpara),INTENT(IN) :: enpara TYPE(t_enpara),INTENT(IN) :: enpara
! TYPE(t_banddos),INTENT(IN) :: banddos ! TYPE(t_banddos),INTENT(IN) :: banddos
! TYPE(t_sphhar),INTENT(IN) :: sphhar
! TYPE(t_stars),INTENT(IN) :: stars TYPE(t_sphhar),INTENT(IN) :: sphhar
TYPE(t_stars),INTENT(IN) :: stars
TYPE(t_vacuum),INTENT(IN) :: vacuum
TYPE(t_input),INTENT(IN) :: input TYPE(t_input),INTENT(IN) :: input
TYPE(t_noco),INTENT(IN) :: noco TYPE(t_noco),INTENT(IN) :: noco
...@@ -51,7 +53,7 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r ...@@ -51,7 +53,7 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r
LOGICAL :: l_exist LOGICAL :: l_exist
CHARACTER(LEN=30) :: filename CHARACTER(LEN=30) :: filename
CHARACTER(LEN=30) :: kpt_name CHARACTER(LEN=50) :: kpt_name
CHARACTER(LEN=30) :: jsp_name CHARACTER(LEN=30) :: jsp_name
CHARACTER(LEN=30) :: itype_name CHARACTER(LEN=30) :: itype_name
! CHARACTER(LEN=30) :: l_name ! CHARACTER(LEN=30) :: l_name
...@@ -264,6 +266,14 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r ...@@ -264,6 +266,14 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r
CALL io_write_integer1(atomicNumbersSetID,(/1/),dimsInt(:1),atomicNumbers) CALL io_write_integer1(atomicNumbersSetID,(/1/),dimsInt(:1),atomicNumbers)
CALL h5dclose_f(atomicNumbersSetID, hdfError) CALL h5dclose_f(atomicNumbersSetID, hdfError)
dims(:1)=(/atoms%ntype/)
dimsInt=dims
CALL h5screate_simple_f(1,dims(:1),atomicNumbersSpaceID,hdfError)
CALL h5dcreate_f(atomsGroupID, "ztype", H5T_NATIVE_INTEGER, atomicNumbersSpaceID, atomicNumbersSetID, hdfError)
CALL h5sclose_f(atomicNumbersSpaceID,hdfError)
CALL io_write_integer1(atomicNumbersSetID,(/1/),dimsInt(:1),atoms%nz)
CALL h5dclose_f(atomicNumbersSetID, hdfError)
dims(:1)=(/atoms%nat/) dims(:1)=(/atoms%nat/)
dimsInt=dims dimsInt=dims
CALL h5screate_simple_f(1,dims(:1),equivAtomsClassSpaceID,hdfError) CALL h5screate_simple_f(1,dims(:1),equivAtomsClassSpaceID,hdfError)
...@@ -304,8 +314,8 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r ...@@ -304,8 +314,8 @@ SUBROUTINE writeBasis(input,noco,kpts,atoms,sym,cell,enpara,vTot,mpi,DIMENSION,r
! DO nk = mpi%n_start,kpts%nkpt,mpi%n_stride ! DO nk = mpi%n_start,kpts%nkpt,mpi%n_stride
DO nk = 1,kpts%nkpt DO nk = 1,kpts%nkpt
CALL lapw%init(input,noco,kpts,atoms,sym,nk,cell,l_zref) CALL lapw%init(input,noco,kpts,atoms,sym,nk,cell,l_zref)
write(kpt_name , '(2a,i0)') TRIM(ADJUSTL(jsp_name)),'/kpt_',nk !write(kpt_name , '(2a,i0)') TRIM(ADJUSTL(jsp_name)),'/kpt_',nk
!write(kpt_name , '(2a,f12.10,a,f12.10,a,f12.10)') TRIM(ADJUSTL(jsp_name)),'_',kpts%bk(1,nk),',',kpts%bk(2,nk),',',kpts%bk(3,nk)