From 389afdb1fcc6ddb0d03bec6ce5eb1819f7320527 Mon Sep 17 00:00:00 2001 From: "G. Bihlmayer" Date: Fri, 21 Sep 2018 15:00:10 +0200 Subject: [PATCH] Further changes in hsoham.F90 to speed up parallel version. --- eigen_soc/alineso.F90 | 9 ++++---- eigen_soc/eigenso.F90 | 2 +- eigen_soc/hsoham.F90 | 50 +++++++++++++++++++++---------------------- eigen_soc/hsohelp.F90 | 8 +++---- wannier/wann_socmat.F | 12 +++++------ wannier/wann_updown.F | 14 ++++++++++-- 6 files changed, 52 insertions(+), 43 deletions(-) diff --git a/eigen_soc/alineso.F90 b/eigen_soc/alineso.F90 index 159f39a8..77115736 100644 --- a/eigen_soc/alineso.F90 +++ b/eigen_soc/alineso.F90 @@ -155,9 +155,9 @@ CONTAINS ! ! set up A and B coefficients ! - ALLOCATE ( ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) ) - ALLOCATE ( bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) ) - ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,DIMENSION%jspd) ) + ALLOCATE ( ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) ) + ALLOCATE ( bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) ) + ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,input%jspins) ) CALL timestart("alineso SOC: -help") write(*,*) nat_start,nat_stop,nat_l CALL hsohelp(& @@ -177,9 +177,10 @@ CONTAINS CALL MPI_BARRIER(mpi%MPI_COMM,ierr) #endif ALLOCATE ( hsomtx(DIMENSION%neigd,DIMENSION%neigd,2,2) ) - CALL hsoham(atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp,& + CALL hsoham(atoms,noco,input,nsz,dimension%neigd,chelp,rsoc,ahelp,bhelp,& nat_start,nat_stop,mpi%n_rank,mpi%n_size,mpi%SUB_COMM,& hsomtx) + write(*,*) 'after hsoham' DEALLOCATE ( ahelp,bhelp,chelp ) CALL timestop("alineso SOC: -ham") IF (mpi%n_rank==0) THEN diff --git a/eigen_soc/eigenso.F90 b/eigen_soc/eigenso.F90 index b1fb5ffa..2412b87b 100644 --- a/eigen_soc/eigenso.F90 +++ b/eigen_soc/eigenso.F90 @@ -130,7 +130,7 @@ CONTAINS n_stride = 1 #endif n_end = kpts%nkpt - write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start + !write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start ! !---> start loop k-pts ! diff --git a/eigen_soc/hsoham.F90 b/eigen_soc/hsoham.F90 index f80c42e7..1225a4dd 100644 --- a/eigen_soc/hsoham.F90 +++ b/eigen_soc/hsoham.F90 @@ -6,7 +6,7 @@ MODULE m_hsoham ! CONTAINS SUBROUTINE hsoham(& - atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp,& + atoms,noco,input,nsz,neigd,chelp,rsoc,ahelp,bhelp,& nat_start,nat_stop,n_rank,n_size,SUB_COMM,& hsomtx) @@ -25,20 +25,20 @@ CONTAINS ! .. ! .. Scalar Arguments .. ! .. - INTEGER, INTENT (IN) :: nat_start,nat_stop,n_rank,n_size,SUB_COMM + INTEGER, INTENT (IN) :: nat_start,nat_stop,n_rank,n_size,SUB_COMM,neigd ! .. Array Arguments .. INTEGER, INTENT (IN) :: nsz(:)!(dimension%jspd) - COMPLEX, INTENT (IN) :: ahelp(:,:,:,:)!(lmd,nat_l,dimension%neigd,dimension%jspd) - COMPLEX, INTENT (IN) :: bhelp(:,:,:,:)!(lmd,nat_l,dimension%neigd,dimension%jspd) - COMPLEX, INTENT (IN) :: chelp(-atoms%llod :,:,:,:,:)!(-llod:llod ,dimension%neigd,atoms%nlod,nat_l,dimension%jspd) - COMPLEX, INTENT (OUT):: hsomtx(:,:,:,:)!(dimension%neigd,neigd,2,2) + COMPLEX, INTENT (IN) :: ahelp((atoms%lmaxd+2)*atoms%lmaxd,nat_stop-nat_start+1,neigd,input%jspins) + COMPLEX, INTENT (IN) :: bhelp((atoms%lmaxd+2)*atoms%lmaxd,nat_stop-nat_start+1,neigd,input%jspins) + COMPLEX, INTENT (IN) :: chelp(-atoms%llod:atoms%llod,neigd,atoms%nlod,nat_stop-nat_start+1,input%jspins) + COMPLEX, INTENT (OUT):: hsomtx(neigd,neigd,2,2) ! .. ! .. Local Scalars .. COMPLEX c_1,c_2,c_3,c_4,c_5 - INTEGER i,j,jsp,jsp1,l,lwn ,m1,n,na,nn,i1,j1,ilo,ilop,m,nat_l,na_g,lm,ll1 + INTEGER i,j,jsp,jsp1,l,lwn,m1,n,na,nn,i1,j1,ilo,ilop,m,nat_l,na_g,lm,ll1,lm1 ! .. ! .. Local Arrays .. - COMPLEX, ALLOCATABLE :: c_b(:,:,:),c_a(:,:,:),c_c(:,:,:),c_buf(:) + COMPLEX, ALLOCATABLE :: c_b(:,:),c_a(:,:),c_c(:,:,:),c_buf(:) ! .. ! !--------------------------------------------------------------------- @@ -54,6 +54,10 @@ CONTAINS ! !---> update hamiltonian matrices: upper triangle ! + + ALLOCATE ( c_b((atoms%lmaxd+2)*atoms%lmaxd,nat_l),& + c_a((atoms%lmaxd+2)*atoms%lmaxd,nat_l),& + c_c(-atoms%llod:atoms%llod, atoms%nlod, nat_l) ) DO i1 = 1,2 jsp = i1 IF (input%jspins.EQ.1) jsp = 1 @@ -66,11 +70,6 @@ CONTAINS !!$OMP SHARED(hsomtx,i1,jsp,j1,jsp1,nsz,atoms)& !!$OMP SHARED(ahelp,bhelp,chelp,noco,nat_start,nat_stop,nat_l)& !!$OMP SHARED(rsoc) - - ALLOCATE ( c_b(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l),& - c_a(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l),& - c_c(-atoms%llod :atoms%llod ,atoms%nlod ,nat_l) ) - !!$OMP DO DO j = 1,nsz(jsp1) @@ -87,14 +86,15 @@ CONTAINS DO l = 1,atoms%lmax(n) ll1 = l*(l+1) DO m = -l,l - c_a(m,l,na) = CMPLX(0.,0.) - c_b(m,l,na) = CMPLX(0.,0.) + lm = ll1 + m + c_a(lm,na) = CMPLX(0.,0.) + c_b(lm,na) = CMPLX(0.,0.) DO m1 = -l,l - lm = ll1 + m1 - c_a(m,l,na) = c_a(m,l,na) + rsoc%soangl(l,m,i1,l,m1,j1)& - *CONJG(ahelp(lm,na,j,jsp1)) - c_b(m,l,na) = c_b(m,l,na) + rsoc%soangl(l,m,i1,l,m1,j1)& - *CONJG(bhelp(lm,na,j,jsp1)) + lm1 = ll1 + m1 + c_a(lm,na) = c_a(lm,na) + rsoc%soangl(l,m,i1,l,m1,j1)& + * CONJG(ahelp(lm1,na,j,jsp1)) + c_b(lm,na) = c_b(lm,na) + rsoc%soangl(l,m,i1,l,m1,j1)& + * CONJG(bhelp(lm1,na,j,jsp1)) ENDDO ENDDO ENDDO @@ -142,7 +142,7 @@ CONTAINS c_2 = rsoc%rsoppd(n,l,i1,j1) * ahelp(lm,na,i,jsp) +& rsoc%rsopdpd(n,l,i1,j1) * bhelp(lm,na,i,jsp) hsomtx(i,j,i1,j1) = hsomtx(i,j,i1,j1) +& - c_1*c_a(m,l,na) + c_2*c_b(m,l,na) + c_1*c_a(lm,na) + c_2*c_b(lm,na) ENDDO ! ENDDO @@ -158,7 +158,7 @@ CONTAINS c_4 = rsoc%rsoplop(n,ilo,i1,j1) *chelp(m,i,ilo,na,jsp) c_5 =rsoc%rsoplopd(n,ilo,i1,j1) *chelp(m,i,ilo,na,jsp) hsomtx(i,j,i1,j1) = hsomtx(i,j,i1,j1) + & - c_4*c_a(m,l,na) + c_5*c_b(m,l,na) +& + c_4*c_a(lm,na) + c_5*c_b(lm,na) +& c_3*c_c(m,ilo,na) ENDDO DO ilop = 1,atoms%nlo(n) @@ -181,12 +181,12 @@ CONTAINS ENDDO !!j !!$OMP END DO - DEALLOCATE (c_a,c_b,c_c) !!$OMP END PARALLEL ENDDO !!jsp1 ENDDO !!jsp + DEALLOCATE (c_a,c_b,c_c) ! !---> update hamiltonian matrices: lower triangle ! @@ -200,9 +200,9 @@ CONTAINS CALL MPI_BARRIER(SUB_COMM,ierr) n = 4*nsz(1)*nsz(input%jspins) ALLOCATE(c_buf(n)) - CALL MPI_REDUCE(hsomtx(1,1,1,1),c_buf,n,CPP_MPI_COMPLEX,MPI_SUM,0,SUB_COMM,ierr) + CALL MPI_REDUCE(hsomtx,c_buf,n,CPP_MPI_COMPLEX,MPI_SUM,0,SUB_COMM,ierr) IF (n_rank.EQ.0) THEN - CALL CPP_BLAS_ccopy(n, c_buf, 1, hsomtx(1,1,1,1), 1) + CALL CPP_BLAS_ccopy(n,c_buf,1,hsomtx,1) ENDIF DEALLOCATE(c_buf) #endif diff --git a/eigen_soc/hsohelp.F90 b/eigen_soc/hsohelp.F90 index f4e2cb8f..f28941eb 100644 --- a/eigen_soc/hsohelp.F90 +++ b/eigen_soc/hsohelp.F90 @@ -43,11 +43,9 @@ CONTAINS ! .. Array Arguments .. INTEGER, INTENT (IN) :: nsz(DIMENSION%jspd) COMPLEX, INTENT (INOUT) :: zso(:,:,:)!DIMENSION%nbasfcn,2*DIMENSION%neigd,DIMENSION%jspd) -! COMPLEX, INTENT (OUT):: ahelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l,DIMENSION%neigd,DIMENSION%jspd) -! COMPLEX, INTENT (OUT):: bhelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,nat_l,DIMENSION%neigd,DIMENSION%jspd) - COMPLEX, INTENT (OUT):: ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) - COMPLEX, INTENT (OUT):: bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,DIMENSION%jspd) - COMPLEX, INTENT (OUT):: chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,DIMENSION%jspd) + COMPLEX, INTENT (OUT):: ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) + COMPLEX, INTENT (OUT):: bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) + COMPLEX, INTENT (OUT):: chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,input%jspins) TYPE(t_mat),INTENT(IN) :: zmat(:) ! (DIMENSION%nbasfcn,DIMENSION%neigd,DIMENSION%jspd) !-odim !+odim diff --git a/wannier/wann_socmat.F b/wannier/wann_socmat.F index 50fb43a9..f93d2f83 100644 --- a/wannier/wann_socmat.F +++ b/wannier/wann_socmat.F @@ -70,7 +70,7 @@ c*********************************************************************** complex, intent(in) :: bcof(:,0:,:,:) !bcof(noccbd,0:lmd,natd,jspd) complex, intent(in) :: chelp(-llod:,:,:,:,:) !chelp(-llod:llod,neigd,nlod,natd,jspd) - complex, intent(out):: hsomtx(:,:,:,:) !(2,2,neigd,neigd) + complex, allocatable, intent(out):: hsomtx(:,:,:,:) !(neigd,neigd,2,2) integer :: n,l,nwdd,nw,ispin,ie,na,ll1,m,lm,i,nsz(2) real :: s(3),r2 @@ -110,10 +110,10 @@ c*********************************************************************** ! is equivalent to the def in the noco-routines ENDIF - ALLOCATE ( ahelp(lmaxd*(lmaxd+2),natd,neigd,jspd) ) - ALLOCATE ( bhelp(lmaxd*(lmaxd+2),natd,neigd,jspd) ) + ALLOCATE ( ahelp(lmaxd*(lmaxd+2),natd,neigd,input%jspins) ) + ALLOCATE ( bhelp(lmaxd*(lmaxd+2),natd,neigd,input%jspins) ) - do ispin=1,jspd + do ispin=1,input%jspins DO ie = 1, neigd DO na = 1, natd DO l = 1, lmaxd @@ -150,8 +150,8 @@ c*********************************************************************** rsoc%soangl= conjg(rsoc%soangl) - CALL hsoham(atoms,noco,input,nsz,chelp,rsoc,ahelp,bhelp, - > 1,natd,mpi%n_rank,mpi%n_size,mpi%SUB_COMM, + CALL hsoham(atoms,noco,input,nsz,neigd,chelp,rsoc,ahelp, + > bhelp,1,natd,mpi%n_rank,mpi%n_size,mpi%SUB_COMM, < hsomtx) end subroutine wann_socmat diff --git a/wannier/wann_updown.F b/wannier/wann_updown.F index 693daee1..3d25536d 100644 --- a/wannier/wann_updown.F +++ b/wannier/wann_updown.F @@ -167,7 +167,7 @@ cccccccccccccccccc local variables cccccccccccccccccccc real :: epar(0:lmaxd,ntypd,max(2,jspd)),evdu(2,max(jspd,2)) !!! the Mmn matrices complex, allocatable :: surfcurr(:,:,:,:) - complex, allocatable :: hsomtx(:,:,:,:,:) + complex, allocatable :: hsomtx(:,:,:,:,:),hsomtx_l(:,:,:,:) complex, allocatable :: mmnk(:,:,:,:),mmn(:,:,:) complex, allocatable :: perpmag(:,:,:) complex, allocatable :: amn(:,:,:),nablamat(:,:,:,:) @@ -496,6 +496,7 @@ c################################################################## if(wann%l_socmat)then allocate ( hsomtx(2,2,nbnd,nbnd,fullnkpts) ) + allocate ( hsomtx_l(nbnd,nbnd,2,2) ) endif write (*,*) 'nwfs=',nwfs @@ -871,7 +872,16 @@ c...for the lapws and local orbitals, summed by the basis functions > jri,lmax,dx,rmsh,epar,ello,nlo,llo, > l_dulo,ulo_der,vTot%mt, > acof,bcof,ccof, - < hsomtx(:,:,:,:,ikpt)) + < hsomtx_l) + do i1 = 1,2 + do i2 = 1,2 + do i = 1,nbnd + do j = 1,nbnd + hsomtx(i1,i2,i,j,ikpt) = hsomtx_l(i,j,i1,i2) + enddo + enddo + enddo + enddo endif #ifdef CPP_TOPO -- GitLab