Commit 29c56395 authored by Uliana Alekseeva's avatar Uliana Alekseeva

hsmt_sph_GPU added

parent a5c5606e
......@@ -71,7 +71,7 @@ CONTAINS
IF (.NOT.noco%l_noco) THEN
!This is for collinear calculations: the (1,1) element of the matrices is all
!that is needed and allocated
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,cell,1,1,chi_one,lapw,enpara%el0,&
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,1,1,chi_one,lapw,enpara%el0,&
td%e_shift(n,ispin),usdus,fj(:,0:,ispin,:),gj(:,0:,ispin,:),smat(1,1),hmat(1,1))
CALL hsmt_nonsph(n,mpi,sym,atoms,ispin,1,1,chi_one,noco,cell,lapw,td,&
fj(:,0:,ispin,:),gj(:,0:,ispin,:),hmat(1,1))
......@@ -83,7 +83,7 @@ CONTAINS
!stored in tmp-variables. Then these are distributed (rotated) into the 2x2
!global spin-matrices.
CALL hmat_tmp%clear();CALL smat_tmp%clear()
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,cell,1,1,chi_one,lapw,enpara%el0,td%e_shift(n,ispin),&
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,1,1,chi_one,lapw,enpara%el0,td%e_shift(n,ispin),&
usdus,fj(:,0:,ispin,:),gj(:,0:,ispin,:),smat_tmp,hmat_tmp)
CALL hsmt_nonsph(n,mpi,sym,atoms,ispin,1,1,chi_one,noco,cell,lapw,td,&
fj(:,0:,ispin,:),gj(:,0:,ispin,:),hmat_tmp)
......@@ -103,7 +103,7 @@ CONTAINS
CALL hsmt_spinor(ispin,n,noco,chi)
DO iintsp=1,2
DO jintsp=1,2
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,cell,iintsp,jintsp,chi(iintsp,jintsp),&
CALL hsmt_sph(n,atoms,mpi,ispin,input,noco,iintsp,jintsp,chi(iintsp,jintsp),&
lapw,enpara%el0,td%e_shift(n,ispin),usdus,fj(:,0:,ispin,:),gj(:,0:,ispin,:),&
smat(iintsp,jintsp),hmat(iintsp,jintsp))
CALL hsmt_nonsph(n,mpi,sym,atoms,ispin,iintsp,jintsp,chi(iintsp,jintsp),noco,cell,&
......
......@@ -62,7 +62,7 @@ CONTAINS
USE m_ylm
USE m_apws
USE cudafor
! USE nvtx
USE nvtx
IMPLICIT NONE
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_cell),INTENT(IN) :: cell
......@@ -92,8 +92,9 @@ CONTAINS
COMPLEX,ALLOCATABLE,DEVICE :: c_ph_dev(:,:)
REAL, ALLOCATABLE,DEVICE :: gkrot_dev(:,:)
INTEGER :: grid, block
!INTEGER :: istat
INTEGER :: istat
call nvtxStartRange("hsmt_ab",3)
lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
ALLOCATE(c_ph_dev(lapw%nv(1),MERGE(2,1,noco%l_ss)))
......@@ -129,17 +130,12 @@ CONTAINS
!--> synthesize the complex conjugates of a and b
!call nvtxStartRange("hsmt_synthAB",5)
!istat = cudaDeviceSynchronize()
! pretty ugly solution
block = 256
grid = lapw%nv(1)/(block*4) + 1
CALL synth_ab<<<grid,block>>>(grid,block,lapw%nv(1),lmax,ab_size,gkrot_dev,&
fj(:,:,iintsp),gj(:,:,iintsp),c_ph_dev(:,iintsp),ab)
!istat = cudaDeviceSynchronize()
!call nvtxEndRange
IF (PRESENT(abclo)) THEN
print*, "Ooooops, TODO in hsmt_ab"
......@@ -169,6 +165,8 @@ CONTAINS
DEALLOCATE(c_ph_dev)
DEALLOCATE(gkrot_dev)
istat = cudaDeviceSynchronize()
call nvtxEndRange
END SUBROUTINE hsmt_ab_gpu
#endif
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment