Commit d883d3c5 authored by Uliana Alekseeva's avatar Uliana Alekseeva

reversed a part of the previous commit

parent ede0053b
...@@ -34,7 +34,6 @@ CONTAINS ...@@ -34,7 +34,6 @@ CONTAINS
#if defined CPP_GPU #if defined CPP_GPU
REAL, ALLOCATABLE,DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:) REAL, ALLOCATABLE,DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:)
COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:) COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:)
COMPLEX,ALLOCATABLE,DEVICE :: c_dev(:,:)
#endif #endif
CALL timestart("non-spherical setup") CALL timestart("non-spherical setup")
IF (mpi%n_size==1) THEN IF (mpi%n_size==1) THEN
...@@ -46,22 +45,7 @@ CONTAINS ...@@ -46,22 +45,7 @@ CONTAINS
ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2))) ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2)))
h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp)) h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp))
IF (hmat%l_real) THEN CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,hmat)
IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
DEALLOCATE(hmat%data_c)
ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
ENDIF
hmat%data_c=0.0
ENDIF
ALLOCATE(c_dev(SIZE(hmat%data_c,1),SIZE(hmat%data_c,2)))
c_dev = hmat%data_c
CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,c_dev)
hmat%data_c = c_dev
IF (hmat%l_real) THEN
hmat%data_r=hmat%data_r+REAL(hmat%data_c)
ENDIF
#else #else
CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat) CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
#endif #endif
...@@ -72,7 +56,7 @@ CONTAINS ...@@ -72,7 +56,7 @@ CONTAINS
END SUBROUTINE hsmt_nonsph END SUBROUTINE hsmt_nonsph
#if defined CPP_GPU #if defined CPP_GPU
SUBROUTINE priv_noMPI_gpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,c_dev) SUBROUTINE priv_noMPI_gpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,hmat)
!Calculate overlap matrix, GPU version !Calculate overlap matrix, GPU version
!note that basically all matrices in the GPU version are conjugates of their cpu counterparts !note that basically all matrices in the GPU version are conjugates of their cpu counterparts
USE m_hsmt_ab USE m_hsmt_ab
...@@ -101,12 +85,12 @@ CONTAINS ...@@ -101,12 +85,12 @@ CONTAINS
! .. ! ..
! .. Array Arguments .. ! .. Array Arguments ..
REAL, INTENT(IN), DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:) REAL, INTENT(IN), DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:)
COMPLEX,INTENT(INOUT),DEVICE :: c_dev(:,:) CLASS(t_mat),INTENT(INOUT) ::hmat
INTEGER:: nn,na,ab_size,l,ll,m INTEGER:: nn,na,ab_size,l,ll,m
real :: rchi real :: rchi
COMPLEX,ALLOCATABLE,DEVICE :: ab1_dev(:,:), ab_dev(:,:), ab2_dev(:,:) COMPLEX,ALLOCATABLE,DEVICE :: ab1_dev(:,:), ab_dev(:,:), ab2_dev(:,:)
COMPLEX,ALLOCATABLE,DEVICE :: c_dev(:,:)
integer :: i, j, istat integer :: i, j, istat
call nvtxStartRange("hsmt_nonsph",1) call nvtxStartRange("hsmt_nonsph",1)
...@@ -114,6 +98,16 @@ CONTAINS ...@@ -114,6 +98,16 @@ CONTAINS
ALLOCATE(ab_dev(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2)) ALLOCATE(ab_dev(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
IF (iintsp.NE.jintsp) ALLOCATE(ab2_dev(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2)) IF (iintsp.NE.jintsp) ALLOCATE(ab2_dev(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
IF (hmat%l_real) THEN
IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
DEALLOCATE(hmat%data_c)
ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
ENDIF
hmat%data_c=0.0
ENDIF
ALLOCATE(c_dev(SIZE(hmat%data_c,1),SIZE(hmat%data_c,2)))
c_dev = hmat%data_c
DO nn = 1,atoms%neq(n) DO nn = 1,atoms%neq(n)
na = SUM(atoms%neq(:n-1))+nn na = SUM(atoms%neq(:n-1))+nn
IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
...@@ -149,6 +143,11 @@ CONTAINS ...@@ -149,6 +143,11 @@ CONTAINS
ENDIF ENDIF
END DO END DO
hmat%data_c = c_dev
IF (hmat%l_real) THEN
hmat%data_r=hmat%data_r+REAL(hmat%data_c)
ENDIF
call nvtxEndRange call nvtxEndRange
END SUBROUTINE priv_noMPI_gpu END SUBROUTINE priv_noMPI_gpu
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment