Commit 894fb095 authored by Daniel Wortmann's avatar Daniel Wortmann

Several changes:

- made l_zref a local variable no longer in type sym
- changed the setup of the parallelization (mpi/mpimakegroups is obsolete)
- added new command line switches yet to be documented :-)
parent b90c36e8
......@@ -30,9 +30,8 @@ include(vgen/CMakeLists.txt)
include(inpgen/CMakeLists.txt)
include(docs/CMakeLists.txt)
include(tests/CMakeLists.txt)
if (${FLEUR_USE_MPI})
include(mpi/CMakeLists.txt)
endif()
include(mpi/CMakeLists.txt)
#include(wannier/CMakeLists.txt)
......
......@@ -17,6 +17,7 @@ MODULE m_eigen_diag
USE m_elpa
#endif
IMPLICIT NONE
PRIVATE
#ifdef CPP_ELPA
INTEGER,PARAMETER:: diag_elpa=1
#else
......@@ -39,9 +40,15 @@ MODULE m_eigen_diag
#endif
INTEGER,PARAMETER:: diag_lapack=4
INTEGER,PARAMETER:: diag_lapack2=5
PUBLIC eigen_diag,parallel_solver_available
CONTAINS
LOGICAL FUNCTION parallel_solver_available()
parallel_solver_available=any((/diag_elpa,diag_elemental,diag_scalapack/)>0)
END FUNCTION parallel_solver_available
SUBROUTINE eigen_diag(jsp,eig_id,it,atoms,dimension,matsize,mpi, n_rank,n_size,ne,nk,lapw,input,nred,sub_comm,&
sym,matind,kveclo, noco,cell,bkpt,el,jij,l_wu,oneD,td,ud, eig,ne_found,hamOvlp,zMat,realdata)
sym,l_zref,matind,kveclo, noco,cell,bkpt,el,jij,l_wu,oneD,td,ud, eig,ne_found,hamOvlp,zMat,realdata)
USE m_zsymsecloc
USE m_aline
USE m_alinemuff
......@@ -81,7 +88,7 @@ CONTAINS
INTEGER,INTENT(IN) :: ne
INTEGER,INTENT(OUT) :: ne_found
REAL,INTENT(IN) :: el(:,:,:)
LOGICAL, INTENT(IN) :: l_wu
LOGICAL, INTENT(IN) :: l_wu,l_zref
REAL,INTENT(INOUT) :: bkpt(3)
TYPE(t_tlmplm),INTENT(IN) :: td
TYPE(t_usdus),INTENT(IN) :: ud
......@@ -189,14 +196,14 @@ CONTAINS
if (noco%l_ss) call juDFT_error("zsymsecloc not tested with noco%l_ss")
if (input%gw>1) call juDFT_error("zsymsecloc not tested with input%gw>1")
IF (l_real) THEN
CALL zsymsecloc(jsp,input,lapw,bkpt,atoms,kveclo, sym,cell, dimension,matsize,ndim,&
CALL zsymsecloc(jsp,input,lapw,bkpt,atoms,kveclo, sym,l_zref,cell, dimension,matsize,ndim,&
jij,matind,nred,eig,ne_found,hamOvlp%a_r,hamOvlp%b_r,zMat%z_r)
else
CALL zsymsecloc(jsp,input,lapw,bkpt,atoms,kveclo, sym,cell, dimension,matsize,ndim,&
CALL zsymsecloc(jsp,input,lapw,bkpt,atoms,kveclo, sym,l_zref,cell, dimension,matsize,ndim,&
jij,matind,nred,eig,ne_found,hamOvlp%a_c,hamOvlp%b_c,zMat%z_c)
endif
CASE (diag_lapack)
CALL franza(dimension%nbasfcn,ndim, lapw%nmat,(sym%l_zref.AND.(atoms%nlotot.EQ.0)),&
CALL franza(dimension%nbasfcn,ndim, lapw%nmat,(l_zref.AND.(atoms%nlotot.EQ.0)),&
jij%l_j,matind,nred,input%gw,eig,ne_found,hamOvlp,zMat)
CASE DEFAULT
!This should only happen if you select a solver by hand which was not compiled against
......@@ -256,6 +263,7 @@ CONTAINS
END FUNCTION priv_select_solver
SUBROUTINE priv_solver_error(diag_solver,parallel)
IMPLICIT NONE
INTEGER,INTENT(IN):: diag_solver
......
......@@ -26,13 +26,13 @@ MODULE m_zsymsecloc
MODULE procedure zsymsecloc_r,zsymsecloc_c
END INTERFACE zsymsecloc
CONTAINS
SUBROUTINE zsymsecloc_r(jsp,input,lapw,bkpt,atoms, kveclo, sym,cell, dimension,matsize, nsize, jij,matind,nred,eig,ne, a,b, z)
SUBROUTINE zsymsecloc_r(jsp,input,lapw,bkpt,atoms, kveclo, sym,l_zref,cell, dimension,matsize, nsize, jij,matind,nred,eig,ne, a,b, z)
#define CPP_REALDATA
#include "zsymsecloc_cpp.F90"
END SUBROUTINE zsymsecloc_r
SUBROUTINE zsymsecloc_c(jsp,input,lapw,bkpt,atoms, kveclo, sym,cell, dimension,matsize, nsize, jij,matind,nred,eig,ne, a,b, z)
SUBROUTINE zsymsecloc_c(jsp,input,lapw,bkpt,atoms, kveclo, sym,l_zref,cell, dimension,matsize, nsize, jij,matind,nred,eig,ne, a,b, z)
#undef CPP_REALDATA
#include "zsymsecloc_cpp.F90"
......
......@@ -23,6 +23,7 @@
TYPE(t_lapw),INTENT(IN) :: lapw
real,intent(in) ::bkpt(3)
integer,intent(in)::kveclo(atoms%nlotot)
logical,intent(in) :: l_zref
! ..
! .. Scalar Arguments ..
......@@ -82,7 +83,7 @@
!******************************************
! l_zref=.false. => simply call eigensolver
!******************************************
if(.not.sym%l_zref)then
if(.not.l_zref)then
#ifdef CPP_REALDATA
call geneigprobl(dimension%nbasfcn, nsize,dimension%neigd,jij%l_j,eig,ne,a,b,z)
#else
......@@ -512,6 +513,6 @@
allocate(a(dimension%nbasfcn*(dimension%nbasfcn+1)/2))
allocate(b(dimension%nbasfcn*(dimension%nbasfcn+1)/2))
endif !sym%l_zref
endif !l_zref
deallocate ( z1,z2,etemp1,etemp2,evensort )
......@@ -13,7 +13,7 @@ MODULE m_apws
!*********************************************************************
CONTAINS
SUBROUTINE apws(dimension,input,noco,kpts,&
nk,cell,sym,n_size,jspin, bkpt,lapw,matind,nred)
nk,cell,l_zref,n_size,jspin, bkpt,lapw,matind,nred)
USE m_types
USE m_sort
......@@ -23,7 +23,6 @@ CONTAINS
TYPE(t_dimension),INTENT(IN) :: dimension
TYPE(t_input),INTENT(IN) :: input
TYPE(t_noco),INTENT(IN) :: noco
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_cell),INTENT(IN) :: cell
TYPE(t_kpts),INTENT(IN) :: kpts
TYPE(t_lapw),INTENT(INOUT) :: lapw
......@@ -31,6 +30,7 @@ CONTAINS
! .. Scalar Arguments ..
INTEGER, INTENT (IN) :: nk,n_size,jspin
INTEGER, INTENT (OUT) :: nred
LOGICAL, INTENT (IN) :: l_zref
! ..
! .. Array Arguments ..
INTEGER, INTENT (OUT) :: matind(dimension%nbasfcn,2)
......@@ -153,7 +153,7 @@ CONTAINS
!+gu
!---> determine pairs of K-vectors, where K_z = K'_-z to use
!---> z-reflection
IF (sym%l_zref) THEN
IF (l_zref) THEN
n=0
DO i=1,lapw%nv(ispin)
DO j=1,i
......
This diff is collapsed.
......@@ -76,14 +76,14 @@ MODULE m_tlmplm
!---> generate the wavefunctions for each l
!
l_write=mpi%irank==0
!$ l_write=.false.
!$ call gaunt2(atoms%lmaxd)
!$OMP PARALLEL DO DEFAULT(NONE)&
!$OMP PRIVATE(indt,dvd,dvu,uvd,uvu,f,g,x,flo,uuilon,duilon,ulouilopn)&
!$OMP PRIVATE(cil,temp,wronk,i,l,l2,lamda,lh,lm,lmin,lmin0,lmp,lmpl)&
!$OMP PRIVATE(lmplm,lmx,lmxx,lp,lp1,lpl,m,mem,mems,mp,mu,n,nh,noded)&
!$OMP PRIVATE(nodeu,nsym,na)&
!$OMP SHARED(dimension,atoms,gwc,lh0,jspin,jsp,sphhar,enpara,td,ud,l_write,ci,vr,mpi,input)
!!$ l_write=.false.
!!$ call gaunt2(atoms%lmaxd)
!!$OMP PARALLEL DO DEFAULT(NONE)&
!!$OMP PRIVATE(indt,dvd,dvu,uvd,uvu,f,g,x,flo,uuilon,duilon,ulouilopn)&
!!$OMP PRIVATE(cil,temp,wronk,i,l,l2,lamda,lh,lm,lmin,lmin0,lmp,lmpl)&
!!$OMP PRIVATE(lmplm,lmx,lmxx,lp,lp1,lpl,m,mem,mems,mp,mu,n,nh,noded)&
!!$OMP PRIVATE(nodeu,nsym,na)&
!!$OMP SHARED(dimension,atoms,gwc,lh0,jspin,jsp,sphhar,enpara,td,ud,l_write,ci,vr,mpi,input)
DO n = 1,atoms%ntype
na=sum(atoms%neq(:n-1))+1
......@@ -250,7 +250,7 @@ MODULE m_tlmplm
ENDIF
ENDDO
!$OMP END PARALLEL DO
!!$OMP END PARALLEL DO
END SUBROUTINE tlmplm
......
......@@ -710,7 +710,6 @@
LOGICAL ::invs
!Z-refls. sym
LOGICAL ::zrfs
LOGICAL :: l_zref
!No of sym ops
INTEGER ::nop
!No of 2D-sym ops
......@@ -756,6 +755,12 @@
INTEGER :: mpi_comm !< replaces MPI_COMM_WORLD
INTEGER :: irank !< rank of task in mpi_comm
INTEGER :: isize !< no of tasks in mpi_comm
INTEGER :: n_start !< no of first k-point to calculate on this PE
INTEGER :: n_stride !< stride for k-loops
INTEGER :: n_size !< PE per kpoint, i.e. "isize" for eigenvalue parallelization
INTEGER :: n_groups !< No of k-loops per PE
INTEGER :: sub_comm !< Sub-Communicator for eigenvalue parallelization (all PE working on same k-point)
INTEGER :: n_rank !< rank in sub_comm
END TYPE
TYPE t_zMat
......
This diff is collapsed.
set(fleur_F77 ${fleur_F77}
)
set(fleur_F90 ${fleur_F90}
mpi/mingeselle.F90
mpi/mpi_bc_all.F90
mpi/mpi_bc_st.F90
mpi/mpi_col_den.F90
mpi/mpi_make_groups.F90
mpi/setupMPI.F90)
if (${FLEUR_USE_MPI})
set(fleur_F90 ${fleur_F90}
mpi/mingeselle.F90
mpi/mpi_bc_all.F90
mpi/mpi_bc_st.F90
mpi/mpi_col_den.F90
mpi/mpi_make_groups.F90
)
endif()
......@@ -65,7 +65,7 @@ CONTAINS
r(27)=aMix_VHSE() ; r(28)=omega_VHSE()
l(1)=input%eonly ; l(2)=obsolete%form66 ; l(3)=input%secvar ; l(4)=sym%zrfs ; l(5)=input%film
l(6)=sym%invs ; l(7)=sym%invs2 ; l(8)=input%l_bmt ; l(9)=input%l_f ; l(10)=input%cdinf
l(11)=banddos%dos ;l(12)=sym%l_zref ; l(13)=banddos%vacdos ; l(14)=input%integ ; l(15)=sliceplot%iplot
l(11)=banddos%dos ; l(13)=banddos%vacdos ; l(14)=input%integ ; l(15)=sliceplot%iplot
l(16)=input%strho ; l(17)=input%swsp ; l(18)=input%lflip ; l(19)=obsolete%l_f2u ; l(20)=obsolete%l_u2f
l(21)=input%pallst ; l(22)=sliceplot%slice ; l(23)=noco%l_soc ; l(24)=vacuum%starcoeff
l(25)=noco%l_noco ; l(26)=noco%l_ss; l(27)=noco%l_mperp; l(28)=noco%l_constr
......@@ -104,7 +104,7 @@ CONTAINS
noco%l_noco=l(25) ; noco%l_ss=l(26) ; noco%l_mperp=l(27) ; noco%l_constr=l(28)
input%pallst=l(21) ; sliceplot%slice=l(22) ; noco%l_soc=l(23) ; vacuum%starcoeff=l(24)
input%strho=l(16) ; input%swsp=l(17) ; input%lflip=l(18) ; obsolete%l_f2u=l(19) ; obsolete%l_u2f=l(20)
banddos%dos=l(11) ;sym%l_zref=l(12) ; banddos%vacdos=l(13) ; input%integ=l(14) ; sliceplot%iplot=l(15)
banddos%dos=l(11) ; banddos%vacdos=l(13) ; input%integ=l(14) ; sliceplot%iplot=l(15)
sym%invs=l(6) ; sym%invs2=l(7) ; input%l_bmt=l(8) ; input%l_f=l(9) ; input%cdinf=l(10)
input%eonly=l(1) ; obsolete%form66=l(2) ; input%secvar=l(3) ; sym%zrfs=l(4) ; input%film=l(5)
input%efield%l_segmented = l(38) ; sym%symor=l(39); input%efield%dirichlet = l(40)
......
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_setupMPI
use m_juDFT
IMPLICIT NONE
CONTAINS
SUBROUTINE setupMPI(nkpt,mpi)
USE m_types
USE m_eigen_diag,ONLY:parallel_solver_available
INTEGER,INTENT(in) :: nkpt
TYPE(t_mpi),INTENT(inout) :: mpi
IF (mpi%isize==1) THEN
!give some info on available parallelisation
CALL priv_dist_info(nkpt)
mpi%n_start=1
mpi%n_stride=1
mpi%n_rank=0
mpi%n_size=1
mpi%n_groups=1
mpi%sub_comm=mpi%mpi_comm
END IF
#ifdef CPP_MPI
!Distribute the work
CALL priv_distribute_k(nkpt,mpi)
!Now check is parallelization is possible
IF (mpi%n_size>1.AND..NOT.parallel_solver_available()) &
CALL juDFT_error("MPI parallelization failed",hint="You have to either compile FLEUR with a parallel diagonalization library (ELPA,SCALAPACK...) or you have to run such that the No of kpoints can be distributed on the PEs")
#endif
!generate the MPI communicators
CALL priv_create_comm(nkpt,mpi)
END SUBROUTINE setupMPI
SUBROUTINE priv_distribute_k(nkpt,mpi)
use m_types
implicit none
INTEGER,INTENT(in) :: nkpt
TYPE(t_mpi),INTENT(inout) :: mpi
!------------------------------------------------------------------------
!
! Distribute the k-point / eigenvector parallelisation so, that
! all pe's have aproximately equal load. Maximize for k-point
! parallelisation. The naming conventions are as follows:
!
! groups 1 2 (n_groups = 2)
! / \ / \
! k-points: 1 2 3 4 (nkpts = 4)
! /|\ /|\ /|\ /|\
! irank 01 2 34 5 01 2 34 5 (isize = 6)
!
! n_rank 01 2 01 2 01 2 01 2 (n_size = 3)
!
! nrec 12 3 45 6 78 9 1011 12 ...rec. no. on eig-file
! * * * * * * * *
!
! In the above example, 6 pe's should work on 4 k-points and distribute
! their load in a way, that 3 pe's work on each k-points, so 2 k-points
! are done in parellel (n_members=2) and there are 2 groups of k-points.
! n_rank and n_size are the equivalents of irank and isize. The former
! belong to the communicator SUB_COMM, the latter to MPI_COMM.
!
! G.B. `99
!
!------------------------------------------------------------------------
INTEGER:: n_members,n_size_min
CHARACTER(len=20)::txt
n_members = MIN(nkpt,mpi%isize)
IF (judft_was_argument("-n_size_min")) THEN
txt=judft_string_for_argument("-n_size_min")
READ(txt,*) n_size_min
WRITE(*,*) "Trying to use ",n_size_min," PE per kpt"
n_members = MIN(n_members , CEILING(REAL(mpi%isize)/n_size_min) )
ENDIF
DO
IF ((MOD(mpi%isize,n_members) == 0).AND.(MOD(nkpt,n_members) == 0) ) EXIT
n_members = n_members - 1
ENDDO
mpi%n_groups = nkpt/n_members
mpi%n_size = mpi%isize/n_members
mpi%n_stride = n_members
IF (mpi%irank == 0) THEN
WRITE(*,*) 'k-points in parallel: ',n_members
WRITE(*,*) "pe's per k-point: ",mpi%n_size
WRITE(*,*) '# of k-point loops: ',mpi%n_groups
ENDIF
END SUBROUTINE priv_distribute_k
SUBROUTINE priv_create_comm(nkpt,mpi)
use m_types
implicit none
INTEGER,INTENT(in) :: nkpt
TYPE(t_mpi),INTENT(inout) :: mpi
#ifdef CPP_MPI
INTEGER:: n_members,n,i,ierr,sub_group,world_group
INTEGER:: i_mygroup(mpi%n_size)
n_members = nkpt/mpi%n_groups
!
! now, we make the groups
!
mpi%n_start = MOD(mpi%irank,n_members) + 1
!! n_start = INT(irank/n_size) * n_size
n = 0
DO i = mpi%n_start,mpi%isize,n_members
!! DO i = n_start+1,n_start+n_size
n = n+1
i_mygroup(n) = i-1
ENDDO
! write (*,*) irank,n_groups,n_start,i_mygroup
CALL MPI_COMM_GROUP (mpi%MPI_COMM,WORLD_GROUP,ierr)
CALL MPI_GROUP_INCL (WORLD_GROUP,mpi%n_size,i_mygroup,SUB_GROUP,ierr)
CALL MPI_COMM_CREATE (mpi%MPI_COMM,SUB_GROUP,mpi%SUB_COMM,ierr)
CALL MPI_COMM_RANK (mpi%SUB_COMM,mpi%n_rank,ierr)
#endif
END SUBROUTINE priv_create_comm
SUBROUTINE priv_dist_info(nkpt)
USE m_eigen_diag,ONLY:parallel_solver_available
IMPLICIT NONE
INTEGER,INTENT(in) :: nkpt
INTEGER:: n,k_only,pe_k_only(nkpt)
!Create a list of PE that will lead to k-point parallelization only
k_only=0
DO n=1,nkpt
IF (MOD(nkpt,n)==0) THEN
k_only=k_only+1
pe_k_only(k_only)=n
ENDIF
END DO
WRITE(*,*) "Most efficient parallelization for:"
WRITE(*,*) pe_k_only(:k_only)
!check if eigenvalue parallelization is possible
IF (parallel_solver_available()) WRITE(*,*) "Additional eigenvalue parallelization possible"
END SUBROUTINE priv_dist_info
END MODULE m_setupMPI
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment