Commit 389afdb1 authored by Gustav Bihlmayer's avatar Gustav Bihlmayer

Further changes in hsoham.F90 to speed up parallel version.

parent 66a39e30
......@@ -155,9 +155,9 @@ CONTAINS
!
! set up A and B coefficients
!
ALLOCATE ( ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) )
ALLOCATE ( bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) )
ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,DIMENSION%jspd) )
ALLOCATE ( ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) )
ALLOCATE ( bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) )
ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,input%jspins) )
CALL timestart("alineso SOC: -help")
write(*,*) nat_start,nat_stop,nat_l
CALL hsohelp(&
......@@ -177,9 +177,10 @@ CONTAINS
CALL MPI_BARRIER(mpi%MPI_COMM,ierr)
#endif
ALLOCATE ( hsomtx(DIMENSION%neigd,DIMENSION%neigd,2,2) )
CALL hsoham(atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp,&
CALL hsoham(atoms,noco,input,nsz,dimension%neigd,chelp,rsoc,ahelp,bhelp,&
nat_start,nat_stop,mpi%n_rank,mpi%n_size,mpi%SUB_COMM,&
hsomtx)
write(*,*) 'after hsoham'
DEALLOCATE ( ahelp,bhelp,chelp )
CALL timestop("alineso SOC: -ham")
IF (mpi%n_rank==0) THEN
......
......@@ -130,7 +130,7 @@ CONTAINS
n_stride = 1
#endif
n_end = kpts%nkpt
write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start
!write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start
!
!---> start loop k-pts
!
......
......@@ -6,7 +6,7 @@ MODULE m_hsoham
!
CONTAINS
SUBROUTINE hsoham(&
atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp,&
atoms,noco,input,nsz,neigd,chelp,rsoc,ahelp,bhelp,&
nat_start,nat_stop,n_rank,n_size,SUB_COMM,&
hsomtx)
......@@ -25,20 +25,20 @@ CONTAINS
! ..
! .. Scalar Arguments ..
! ..
INTEGER, INTENT (IN) :: nat_start,nat_stop,n_rank,n_size,SUB_COMM
INTEGER, INTENT (IN) :: nat_start,nat_stop,n_rank,n_size,SUB_COMM,neigd
! .. Array Arguments ..
INTEGER, INTENT (IN) :: nsz(:)!(dimension%jspd)
COMPLEX, INTENT (IN) :: ahelp(:,:,:,:)!(lmd,nat_l,dimension%neigd,dimension%jspd)
COMPLEX, INTENT (IN) :: bhelp(:,:,:,:)!(lmd,nat_l,dimension%neigd,dimension%jspd)
COMPLEX, INTENT (IN) :: chelp(-atoms%llod :,:,:,:,:)!(-llod:llod ,dimension%neigd,atoms%nlod,nat_l,dimension%jspd)
COMPLEX, INTENT (OUT):: hsomtx(:,:,:,:)!(dimension%neigd,neigd,2,2)
COMPLEX, INTENT (IN) :: ahelp((atoms%lmaxd+2)*atoms%lmaxd,nat_stop-nat_start+1,neigd,input%jspins)
COMPLEX, INTENT (IN) :: bhelp((atoms%lmaxd+2)*atoms%lmaxd,nat_stop-nat_start+1,neigd,input%jspins)
COMPLEX, INTENT (IN) :: chelp(-atoms%llod:atoms%llod,neigd,atoms%nlod,nat_stop-nat_start+1,input%jspins)
COMPLEX, INTENT (OUT):: hsomtx(neigd,neigd,2,2)
! ..
! .. Local Scalars ..
COMPLEX c_1,c_2,c_3,c_4,c_5
INTEGER i,j,jsp,jsp1,l,lwn ,m1,n,na,nn,i1,j1,ilo,ilop,m,nat_l,na_g,lm,ll1
INTEGER i,j,jsp,jsp1,l,lwn,m1,n,na,nn,i1,j1,ilo,ilop,m,nat_l,na_g,lm,ll1,lm1
! ..
! .. Local Arrays ..
COMPLEX, ALLOCATABLE :: c_b(:,:,:),c_a(:,:,:),c_c(:,:,:),c_buf(:)
COMPLEX, ALLOCATABLE :: c_b(:,:),c_a(:,:),c_c(:,:,:),c_buf(:)
! ..
!
!---------------------------------------------------------------------
......@@ -54,6 +54,10 @@ CONTAINS
!
!---> update hamiltonian matrices: upper triangle
!
ALLOCATE ( c_b((atoms%lmaxd+2)*atoms%lmaxd,nat_l),&
c_a((atoms%lmaxd+2)*atoms%lmaxd,nat_l),&
c_c(-atoms%llod:atoms%llod, atoms%nlod, nat_l) )
DO i1 = 1,2
jsp = i1
IF (input%jspins.EQ.1) jsp = 1
......@@ -66,11 +70,6 @@ CONTAINS
!!$OMP SHARED(hsomtx,i1,jsp,j1,jsp1,nsz,atoms)&
!!$OMP SHARED(ahelp,bhelp,chelp,noco,nat_start,nat_stop,nat_l)&
!!$OMP SHARED(rsoc)
ALLOCATE ( c_b(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l),&
c_a(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l),&
c_c(-atoms%llod :atoms%llod ,atoms%nlod ,nat_l) )
!!$OMP DO
DO j = 1,nsz(jsp1)
......@@ -87,14 +86,15 @@ CONTAINS
DO l = 1,atoms%lmax(n)
ll1 = l*(l+1)
DO m = -l,l
c_a(m,l,na) = CMPLX(0.,0.)
c_b(m,l,na) = CMPLX(0.,0.)
lm = ll1 + m
c_a(lm,na) = CMPLX(0.,0.)
c_b(lm,na) = CMPLX(0.,0.)
DO m1 = -l,l
lm = ll1 + m1
c_a(m,l,na) = c_a(m,l,na) + rsoc%soangl(l,m,i1,l,m1,j1)&
*CONJG(ahelp(lm,na,j,jsp1))
c_b(m,l,na) = c_b(m,l,na) + rsoc%soangl(l,m,i1,l,m1,j1)&
*CONJG(bhelp(lm,na,j,jsp1))
lm1 = ll1 + m1
c_a(lm,na) = c_a(lm,na) + rsoc%soangl(l,m,i1,l,m1,j1)&
* CONJG(ahelp(lm1,na,j,jsp1))
c_b(lm,na) = c_b(lm,na) + rsoc%soangl(l,m,i1,l,m1,j1)&
* CONJG(bhelp(lm1,na,j,jsp1))
ENDDO
ENDDO
ENDDO
......@@ -142,7 +142,7 @@ CONTAINS
c_2 = rsoc%rsoppd(n,l,i1,j1) * ahelp(lm,na,i,jsp) +&
rsoc%rsopdpd(n,l,i1,j1) * bhelp(lm,na,i,jsp)
hsomtx(i,j,i1,j1) = hsomtx(i,j,i1,j1) +&
c_1*c_a(m,l,na) + c_2*c_b(m,l,na)
c_1*c_a(lm,na) + c_2*c_b(lm,na)
ENDDO
!
ENDDO
......@@ -158,7 +158,7 @@ CONTAINS
c_4 = rsoc%rsoplop(n,ilo,i1,j1) *chelp(m,i,ilo,na,jsp)
c_5 =rsoc%rsoplopd(n,ilo,i1,j1) *chelp(m,i,ilo,na,jsp)
hsomtx(i,j,i1,j1) = hsomtx(i,j,i1,j1) + &
c_4*c_a(m,l,na) + c_5*c_b(m,l,na) +&
c_4*c_a(lm,na) + c_5*c_b(lm,na) +&
c_3*c_c(m,ilo,na)
ENDDO
DO ilop = 1,atoms%nlo(n)
......@@ -181,12 +181,12 @@ CONTAINS
ENDDO
!!j
!!$OMP END DO
DEALLOCATE (c_a,c_b,c_c)
!!$OMP END PARALLEL
ENDDO
!!jsp1
ENDDO
!!jsp
DEALLOCATE (c_a,c_b,c_c)
!
!---> update hamiltonian matrices: lower triangle
!
......@@ -200,9 +200,9 @@ CONTAINS
CALL MPI_BARRIER(SUB_COMM,ierr)
n = 4*nsz(1)*nsz(input%jspins)
ALLOCATE(c_buf(n))
CALL MPI_REDUCE(hsomtx(1,1,1,1),c_buf,n,CPP_MPI_COMPLEX,MPI_SUM,0,SUB_COMM,ierr)
CALL MPI_REDUCE(hsomtx,c_buf,n,CPP_MPI_COMPLEX,MPI_SUM,0,SUB_COMM,ierr)
IF (n_rank.EQ.0) THEN
CALL CPP_BLAS_ccopy(n, c_buf, 1, hsomtx(1,1,1,1), 1)
CALL CPP_BLAS_ccopy(n,c_buf,1,hsomtx,1)
ENDIF
DEALLOCATE(c_buf)
#endif
......
......@@ -43,11 +43,9 @@ CONTAINS
! .. Array Arguments ..
INTEGER, INTENT (IN) :: nsz(DIMENSION%jspd)
COMPLEX, INTENT (INOUT) :: zso(:,:,:)!DIMENSION%nbasfcn,2*DIMENSION%neigd,DIMENSION%jspd)
! COMPLEX, INTENT (OUT):: ahelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l,DIMENSION%neigd,DIMENSION%jspd)
! COMPLEX, INTENT (OUT):: bhelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l,DIMENSION%neigd,DIMENSION%jspd)
COMPLEX, INTENT (OUT):: ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd)
COMPLEX, INTENT (OUT):: bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd)
COMPLEX, INTENT (OUT):: chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,DIMENSION%jspd)
COMPLEX, INTENT (OUT):: ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins)
COMPLEX, INTENT (OUT):: bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins)
COMPLEX, INTENT (OUT):: chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,input%jspins)
TYPE(t_mat),INTENT(IN) :: zmat(:) ! (DIMENSION%nbasfcn,DIMENSION%neigd,DIMENSION%jspd)
!-odim
!+odim
......
......@@ -70,7 +70,7 @@ c***********************************************************************
complex, intent(in) :: bcof(:,0:,:,:) !bcof(noccbd,0:lmd,natd,jspd)
complex, intent(in) :: chelp(-llod:,:,:,:,:) !chelp(-llod:llod,neigd,nlod,natd,jspd)
complex, intent(out):: hsomtx(:,:,:,:) !(2,2,neigd,neigd)
complex, allocatable, intent(out):: hsomtx(:,:,:,:) !(neigd,neigd,2,2)
integer :: n,l,nwdd,nw,ispin,ie,na,ll1,m,lm,i,nsz(2)
real :: s(3),r2
......@@ -110,10 +110,10 @@ c***********************************************************************
! is equivalent to the def in the noco-routines
ENDIF
ALLOCATE ( ahelp(lmaxd*(lmaxd+2),natd,neigd,jspd) )
ALLOCATE ( bhelp(lmaxd*(lmaxd+2),natd,neigd,jspd) )
ALLOCATE ( ahelp(lmaxd*(lmaxd+2),natd,neigd,input%jspins) )
ALLOCATE ( bhelp(lmaxd*(lmaxd+2),natd,neigd,input%jspins) )
do ispin=1,jspd
do ispin=1,input%jspins
DO ie = 1, neigd
DO na = 1, natd
DO l = 1, lmaxd
......@@ -150,8 +150,8 @@ c***********************************************************************
rsoc%soangl= conjg(rsoc%soangl)
CALL hsoham(atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp,
> 1,natd,mpi%n_rank,mpi%n_size,mpi%SUB_COMM,
CALL hsoham(atoms,noco,input,nsz,neigd,chelp,rsoc,ahelp,
> bhelp,1,natd,mpi%n_rank,mpi%n_size,mpi%SUB_COMM,
< hsomtx)
end subroutine wann_socmat
......
......@@ -167,7 +167,7 @@ cccccccccccccccccc local variables cccccccccccccccccccc
real :: epar(0:lmaxd,ntypd,max(2,jspd)),evdu(2,max(jspd,2))
!!! the Mmn matrices
complex, allocatable :: surfcurr(:,:,:,:)
complex, allocatable :: hsomtx(:,:,:,:,:)
complex, allocatable :: hsomtx(:,:,:,:,:),hsomtx_l(:,:,:,:)
complex, allocatable :: mmnk(:,:,:,:),mmn(:,:,:)
complex, allocatable :: perpmag(:,:,:)
complex, allocatable :: amn(:,:,:),nablamat(:,:,:,:)
......@@ -496,6 +496,7 @@ c##################################################################
if(wann%l_socmat)then
allocate ( hsomtx(2,2,nbnd,nbnd,fullnkpts) )
allocate ( hsomtx_l(nbnd,nbnd,2,2) )
endif
write (*,*) 'nwfs=',nwfs
......@@ -871,7 +872,16 @@ c...for the lapws and local orbitals, summed by the basis functions
> jri,lmax,dx,rmsh,epar,ello,nlo,llo,
> l_dulo,ulo_der,vTot%mt,
> acof,bcof,ccof,
< hsomtx(:,:,:,:,ikpt))
< hsomtx_l)
do i1 = 1,2
do i2 = 1,2
do i = 1,nbnd
do j = 1,nbnd
hsomtx(i1,i2,i,j,ikpt) = hsomtx_l(i,j,i1,i2)
enddo
enddo
enddo
enddo
endif
#ifdef CPP_TOPO
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment