Commit bc50dedf authored by Matthias Redies's avatar Matthias Redies

more fix

parents 5aec3d1f f3324cf7
......@@ -16,7 +16,8 @@ if (${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
else()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mkl -qopenmp -assume byterecl")
endif()
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -xHost -O2 -g")
#set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -xHost -O2 -g")
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -xMIC-AVX512 -O2")
if (${CMAKE_Fortran_COMPILER_VERSION} VERSION_LESS "19.0.0.0")
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -ftrapuv -check uninit -check pointers -DCPP_DEBUG -warn all")
else()
......@@ -27,7 +28,7 @@ elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
message("PGI Fortran detected")
set(CMAKE_SHARED_LIBRARY_LINK_Fortran_FLAGS "") #fix problem in cmake
#CPU
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp")
#GPU
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mcuda=cuda9.0,cc60 -Mcudalib=cublas")
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mcuda:kepler+ -ta:tesla:cuda7.5 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc")
......
......@@ -75,7 +75,7 @@ CONTAINS
! Local Scalars
INTEGER jsp,nk,nred,ne_all,ne_found
INTEGER ne
INTEGER ne, nk_i
INTEGER isp,i,j,err
LOGICAL l_wu,l_file,l_real,l_zref
INTEGER :: solver=0
......@@ -131,8 +131,8 @@ CONTAINS
unfoldingBuffer = CMPLX(0.0,0.0)
DO jsp = 1,MERGE(1,input%jspins,noco%l_noco)
k_loop:DO nk = mpi%n_start,kpts%nkpt,mpi%n_stride
k_loop:DO nk_i = 1,size(mpi%k_list)
nk=mpi%k_list(i)
! Set up lapw list
CALL lapw%init(input,noco, kpts,atoms,sym,nk,cell,l_zref, mpi)
call timestart("Setup of H&S matrices")
......@@ -246,6 +246,8 @@ CONTAINS
CALL timestop("EV output")
IF (banddos%unfoldband) THEN
IF(modulo (kpts%nkpt,mpi%n_size).NE.0) call juDFT_error("number kpts needs to be multiple of number mpi threads",&
hint=errmsg, calledby="eigen.F90")
CALL calculate_plot_w_n(banddos,cell,kpts,smat_unfold,zMat,lapw,nk,jsp,eig,results,input,atoms,unfoldingBuffer,mpi)
CALL smat_unfold%free()
DEALLOCATE(smat_unfold, stat=dealloc_stat, errmsg=errmsg)
......
......@@ -60,9 +60,9 @@ CONTAINS
! ..
! ..
! .. Local Scalars ..
INTEGER i,j,nk,jspin,n ,l
INTEGER i,j,nk,nk_i,jspin,n ,l
! INTEGER n_loc,n_plus,i_plus,
INTEGER n_end,nsz,nmat,n_stride
INTEGER nsz,nmat,n_stride
LOGICAL l_socvec !,l_all
INTEGER wannierspin
TYPE(t_usdus) :: usdus
......@@ -129,25 +129,9 @@ CONTAINS
CALL timestop("eigenso: spnorb")
!
!---> loop over k-points: each can be a separate task
!
!n_loc = INT(kpts%nkpt/mpi%isize)
!n_plus = kpts%nkpt - mpi%isize*n_loc
!i_plus = -1
!IF (mpi%irank.LT.n_plus) i_plus = 0
!n_end = (mpi%irank+1)+(n_loc+i_plus)*mpi%isize
!
#if defined(CPP_MPI)
n_stride = kpts%nkpt/mpi%n_groups
#else
n_stride = 1
#endif
n_end = kpts%nkpt
!write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start
!
!---> start loop k-pts
!
! DO nk = mpi%irank+1,n_end,mpi%isize
DO nk = mpi%n_start,n_end,n_stride
DO nk_i=1,SIZE(mpi%k_list)
nk=mpi%k_list(nk_i)
!DO nk = mpi%n_start,n_end,n_stride
CALL lapw%init(input,noco, kpts,atoms,sym,nk,cell,.FALSE., mpi)
ALLOCATE( zso(lapw%nv(1)+atoms%nlotot,2*DIMENSION%neigd,wannierspin))
zso(:,:,:) = CMPLX(0.0,0.0)
......
......@@ -127,8 +127,8 @@ CONTAINS
call timestop("HF_setup")
DO nk = mpi%n_start,kpts%nkpt,mpi%n_stride
DO nk = 1,kpts%nkpt
!DO nk = mpi%n_start,kpts%nkpt,mpi%n_stride
CALL lapw%init(input,noco, kpts,atoms,sym,nk,cell,l_zref)
CALL hsfock(nk,atoms,hybrid,lapw,DIMENSION,kpts,jsp,input,hybdat,eig_irr,sym,cell,&
noco,results,iterHF,MAXVAL(hybrid%nobd),xcpot,mpi)
......
......@@ -22,7 +22,7 @@ MODULE m_fleur_arguments
t_fleur_param(0,"-genEnpara","generate an 'enpara' file",""),&
t_fleur_param(0,"-electronConfig","explicitely write the electron configuration into inp.xml",""),&
t_fleur_param(0,"-fast_defaults","generate more aggressive (and less stable) input parameters for faster calculations",""),&
t_fleur_param(0,"-kpts_gw","add alternative k point set for GW",""),&
t_fleur_param(0,"-gw","Set GW mode 1 and add alternative k point set for GW",""),&
t_fleur_param(0,"-h","print this help message","")&
/)
......
......@@ -19,7 +19,7 @@ CONTAINS
CALL new_argument(0,"-genEnpara","Generate an 'enpara' file for the energy parameters","")
CALL new_argument(0,"-explicit","Write out k-point list, symmetry operations, and optional input to inp.xml","")
CALL new_argument(0,"-kpts_gw","add alternative k point set for GW in all outputs for the XML input file","")
CALL new_argument(0,"-gw","Set GW mode 1 and add alternative k point set for GW in all outputs for the XML input file","")
CALL new_argument(0,"-noco","write out noco parameters into inp.xml","")
CALL new_argument(0,"-electronConfig","explicitely write the electron configuration into inp.xml","")
CALL new_argument(0,"-fast_defaults","generate more aggressive (and less stable) input parameters for faster calculations","")
......@@ -41,7 +41,7 @@ CONTAINS
CALL print_argument("-noco")
CALL print_argument("-electronConfig")
CALL print_argument("-fast_defaults")
CALL print_argument("-kpts_gw")
CALL print_argument("-gw")
CALL print_argument("-h")
WRITE(*,'(a)')""
WRITE(*,'(a)')"Please check the documentation on www.flapw.de for more details"
......
......@@ -355,6 +355,7 @@
nu = 8
input%gw = 0
IF(juDFT_was_argument("-gw")) input%gw = 1
IF (kpts%nkpt == 0) THEN ! set some defaults for the k-points
IF (input%film) THEN
......
......@@ -280,7 +280,7 @@ SUBROUTINE w_inpXML(&
WRITE (fileNum,212) kpts%kPointDensity(1),kpts%kPointDensity(2),kpts%kPointDensity(3),kptGamma
END IF
IF(juDFT_was_argument("-kpts_gw")) THEN
IF(juDFT_was_argument("-gw")) THEN
WRITE(fileNum,'(a)') ' <altKPointSet purpose="GW">'
WRITE(fileNum,'(a)') ' <kPointListFile filename="kpts_gw"/>'
WRITE(fileNum,'(a)') ' </altKPointSet>'
......
......@@ -345,11 +345,13 @@ CONTAINS
! writes all times to file
SUBROUTINE writetimes(stdout)
USE m_judft_usage
USE m_judft_usage
USE m_judft_args
IMPLICIT NONE
LOGICAL, INTENT(IN), OPTIONAL::stdout
INTEGER :: irank = 0
CHARACTER(len=:), allocatable :: json_str
CHARACTER(len=30)::filename
#ifdef CPP_MPI
INCLUDE "mpif.h"
INTEGER::err,isize
......@@ -372,14 +374,21 @@ CONTAINS
CALL priv_writetimes(globaltimer, 1, 6)
#ifdef CPP_MPI
IF (l_mpi) THEN
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, isize, err)
WRITE (6, *) "Program used ", isize, " PE"
ENDIF
IF (l_mpi) THEN
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, isize, err)
WRITE (6, *) "Program used ", isize, " PE"
ENDIF
#endif
END IF
IF (irank==0.OR.judft_was_argument("-all_times")) THEN
json_str = ""
call priv_genjson(globaltimer, 1, json_str)
open(32, file="juDFT_times.json")
CALL priv_genjson(globaltimer, 1, json_str)
IF (irank==0) THEN
OPEN(32, file="juDFT_times.json")
ELSE
WRITE(filename,"(a,i0,a)") "juDFT_times.",irank,".json"
OPEN(32, file=trim(filename))
END IF
write (32,"(A)") json_str
close(32)
ENDIF
......
......@@ -59,6 +59,7 @@ CONTAINS
CALL print_argument("-warn_only")
CALL print_argument("-trace")
CALL print_argument("-debugtime")
CALL print_argument("-all_times")
#ifdef CPP_HDF
WRITE(*,'(a)')""
WRITE(*,'(a)')"HDF density file relevant options:"
......@@ -120,12 +121,13 @@ CONTAINS
!Debugging
CALL new_argument(0,"-warn_only","Continue execution after a warning message","")
CALL new_argument(0,"-trace","Try to generate a stacktrace in case of an error","")
CALL new_argument(0,"-debugtime","Write the start/stop of all timers to the console","")
CALL new_argument(0,"-debugtime","Write the start/stop of all timers to the console","")
CALL new_argument(0,"-all_times","Write json files of timing for all PE, not only for PE=0","")
!Output
CALL new_argument(0,"-mix_io","Do not store mixing history in memory but do IO in each iteration","")
CALL new_argument(0,"-no_out","Do not open the 'out' file but write to stdout","")
CALL new_argument(0,"-genEnpara","Generate an 'enpara' file for the energy parameters","")
CALL new_argument(0,"-kpts_gw","add alternative k point set for GW in all outputs for the XML input file","")
CALL new_argument(0,"-gw","Add alternative k point set for GW in all outputs for the XML input file","")
CALL new_argument(0,"-noco","write out noco parameters in all outputs for inp.xml","")
CALL new_argument(0,"-h","Print this message","")
CALL new_argument(0,"-no_send","Do not send usage data","")
......
......@@ -504,7 +504,7 @@
END IF
!Finalize the MPI setup
CALL setupMPI(kpts%nkpt,mpi)
CALL setupMPI(kpts%nkpt,DIMENSION%neigd,mpi)
!Collect some usage info
CALL add_usage_data("A-Types",atoms%ntype)
......
......@@ -9,14 +9,14 @@ MODULE m_setupMPI
IMPLICIT NONE
CONTAINS
SUBROUTINE setupMPI(nkpt,mpi)
SUBROUTINE setupMPI(nkpt,neigd,mpi)
!$ use omp_lib
USE m_types
USE m_available_solvers,ONLY:parallel_solver_available
INTEGER,INTENT(in) :: nkpt
INTEGER,INTENT(in) :: nkpt,neigd
TYPE(t_mpi),INTENT(inout) :: mpi
integer :: omp=-1
INTEGER :: omp=-1,i
!$ omp=omp_get_max_threads()
if (mpi%irank==0) THEN
......@@ -39,12 +39,14 @@ CONTAINS
IF (mpi%isize==1) THEN
!give some info on available parallelisation
CALL priv_dist_info(nkpt)
mpi%n_start=1
mpi%n_stride=1
mpi%n_rank=0
mpi%n_size=1
mpi%n_groups=1
mpi%sub_comm=mpi%mpi_comm
IF (ALLOCATED(mpi%k_list)) DEALLOCATE(mpi%k_List,mpi%ev_list)
ALLOCATE(mpi%k_list(nkpt))
mpi%k_list=[(i,i=1,nkpt)]
ALLOCATE(mpi%ev_list(neigd))
mpi%ev_list=[(i,i=1,neigd)]
END IF
#ifdef CPP_MPI
!Distribute the work
......@@ -56,7 +58,7 @@ CONTAINS
#endif
!generate the MPI communicators
CALL priv_create_comm(nkpt,mpi)
CALL priv_create_comm(nkpt,neigd,mpi)
if (mpi%irank==0) WRITE(*,*) "--------------------------------------------------------"
END SUBROUTINE setupMPI
......@@ -93,7 +95,7 @@ CONTAINS
! G.B. `99
!
!-------------------------------------------------------------------------------------------
INTEGER:: n_members,n_size_min
INTEGER:: n_members,n_size_min,nk
CHARACTER(len=1000)::txt
n_members = MIN(nkpt,mpi%isize)
......@@ -107,28 +109,31 @@ CONTAINS
IF ((MOD(mpi%isize,n_members) == 0).AND.(MOD(nkpt,n_members) == 0) ) EXIT
n_members = n_members - 1
ENDDO
mpi%n_groups = nkpt/n_members
ALLOCATE(mpi%k_list(nkpt/n_members))
mpi%k_list=[(nk, nk=nkpt/n_members,nkpt,n_members )]
!mpi%n_groups = nkpt/n_members
mpi%n_size = mpi%isize/n_members
mpi%n_stride = n_members
!mpi%n_stride = n_members
IF (mpi%irank == 0) THEN
WRITE(*,*) 'k-points in parallel: ',n_members
WRITE(*,*) "pe's per k-point: ",mpi%n_size
WRITE(*,*) '# of k-point loops: ',mpi%n_groups
WRITE(*,*) '# of k-point loops: ',nkpt/n_members
ENDIF
END SUBROUTINE priv_distribute_k
SUBROUTINE priv_create_comm(nkpt,mpi)
SUBROUTINE priv_create_comm(nkpt,neigd,mpi)
use m_types
implicit none
INTEGER,INTENT(in) :: nkpt
INTEGER,INTENT(in) :: nkpt,neigd
TYPE(t_mpi),INTENT(inout) :: mpi
#ifdef CPP_MPI
INTEGER :: n_members,n,i,ierr,sub_group,world_group
INTEGER :: n_members,n,i,ierr,sub_group,world_group,n_start
INTEGER :: i_mygroup(mpi%n_size)
LOGICAL :: compact ! Deside how to distribute k-points
compact = .true.
n_members = nkpt/mpi%n_groups
n_members = mpi%isize/mpi%n_size
! now, we make the groups
......@@ -149,8 +154,8 @@ CONTAINS
! | 7 | 8 |
! -----------------------------------
mpi%n_start = INT(mpi%irank/mpi%n_size) + 1
i_mygroup(1) = (mpi%n_start-1) * mpi%n_size
n_start = INT(mpi%irank/mpi%n_size) + 1
i_mygroup(1) = (n_start-1) * mpi%n_size
do i = 2, mpi%n_size
i_mygroup(i) = i_mygroup(i-1) + 1
enddo
......@@ -171,10 +176,10 @@ CONTAINS
! | 7 | 8 | 7 | 8 | 7 | 8 |
! -----------------------------------
mpi%n_start = MOD(mpi%irank,n_members) + 1
n_start = MOD(mpi%irank,n_members) + 1
!! n_start = INT(irank/n_size) * n_size
n = 0
DO i = mpi%n_start,mpi%isize,n_members
DO i = n_start,mpi%isize,n_members
!! DO i = n_start+1,n_start+n_size
n = n+1
i_mygroup(n) = i-1
......@@ -188,6 +193,9 @@ CONTAINS
!write (*,"(a,i0,100i4)") "MPI:",mpi%sub_comm,mpi%irank,mpi%n_groups,mpi%n_size,n,i_mygroup
CALL MPI_COMM_RANK (mpi%SUB_COMM,mpi%n_rank,ierr)
ALLOCATE(mpi%ev_list(neigd/mpi%n_size+1))
mpi%ev_list=[(i,i=mpi%irank+1,neigd,mpi%n_size)]
#endif
END SUBROUTINE priv_create_comm
......
......@@ -8,7 +8,7 @@ jt::testrun($executable,$workdir);
#now test output
$result=jt::test_grepexists("$workdir/relax.xml","4204.7823");
$result+=jt::test_grepexists("$workdir/relax.xml","0.1300000000 -0.00942");
$result+=jt::test_grepexists("$workdir/relax.xml","1.3806000000 -0.00942");
$result+=jt::test_grepexists("$workdir/relax.xml","0.00942");
jt::stageresult($workdir,$result,"3");
......@@ -5,14 +5,15 @@
!--------------------------------------------------------------------------------
MODULE m_types_mpi
TYPE t_mpi
!k-point parallelism
INTEGER :: mpi_comm !< replaces MPI_COMM_WORLD
INTEGER :: irank !< rank of task in mpi_comm
INTEGER :: isize !< no of tasks in mpi_comm
INTEGER :: n_start !< no of first k-point to calculate on this PE
INTEGER :: n_stride !< stride for k-loops
INTEGER :: n_size !< PE per kpoint, i.e. "isize" for eigenvalue parallelization
INTEGER :: n_groups !< No of k-loops per PE
INTEGER,ALLOCATABLE :: k_list(:)
!Eigenvalue parallelism
INTEGER :: sub_comm !< Sub-Communicator for eigenvalue parallelization (all PE working on same k-point)
INTEGER :: n_rank !< rank in sub_comm
INTEGER :: n_size !< PE per kpoint, i.e. "isize" for eigenvalue parallelization
INTEGER,ALLOCATABLE :: ev_list(:)
END TYPE t_mpi
END MODULE m_types_mpi
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment