Commit 59dbed39 authored by Andrea Hanke's avatar Andrea Hanke

Merge branch 'develop' of iffgit.fz-juelich.de:fleur/fleur into develop

parents dc8b472e 4e17db7e
init/compileinfo.h init/compileinfo.h
io/xml/inputSchema.h io/xml/inputSchema.h
Testing/*
*~ *~
\#* \#*
build build
......
...@@ -16,7 +16,7 @@ build-gfortran-hdf5: ...@@ -16,7 +16,7 @@ build-gfortran-hdf5:
paths: paths:
- build - build
script: script:
- cd /builds/fleur/fleur; ./configure.sh GITLAB; cd build; make - cd /builds/fleur/fleur; ./configure.sh GITLAB; cd build; make -j 4
# only: # only:
# - schedules # - schedules
# - triggers # - triggers
...@@ -100,7 +100,7 @@ build-intel: ...@@ -100,7 +100,7 @@ build-intel:
- build.intel - build.intel
script: script:
- set +e && source compilervars.sh intel64 && set -e ; ulimit -s unlimited - set +e && source compilervars.sh intel64 && set -e ; ulimit -s unlimited
- cd /builds/fleur/fleur; FC=mpiifort FLEUR_LIBRARIES="-lmkl_scalapack_lp64;-lmkl_blacs_intelmpi_lp64" ./configure.sh -t -l intel INTEL_MPI ; cd build.intel; make - cd /builds/fleur/fleur; FC=mpiifort FLEUR_LIBRARIES="-lmkl_scalapack_lp64;-lmkl_blacs_intelmpi_lp64" ./configure.sh -t -l intel INTEL_MPI ; cd build.intel; make -j 4
only: only:
- schedules - schedules
- triggers - triggers
...@@ -133,7 +133,7 @@ gfortran-coverage: ...@@ -133,7 +133,7 @@ gfortran-coverage:
paths: paths:
- build - build
script: script:
- cd /builds/fleur/fleur; ./configure.sh -l coverage -flags --coverage GITLAB; cd build.coverage; make - cd /builds/fleur/fleur; ./configure.sh -l coverage -flags --coverage GITLAB; cd build.coverage; make -j 4
- lcov --capture --initial -d CMakeFiles -o baseline.info - lcov --capture --initial -d CMakeFiles -o baseline.info
- ulimit -s unlimited ;export juDFT_MPI="mpirun -n 2 --allow-run-as-root ";ctest - ulimit -s unlimited ;export juDFT_MPI="mpirun -n 2 --allow-run-as-root ";ctest
- lcov --capture -d CMakeFiles -o after.info - lcov --capture -d CMakeFiles -o after.info
......
...@@ -134,7 +134,7 @@ SUBROUTINE cdnval(eig_id, mpi,kpts,jspin,noco,input,banddos,cell,atoms,enpara,st ...@@ -134,7 +134,7 @@ SUBROUTINE cdnval(eig_id, mpi,kpts,jspin,noco,input,banddos,cell,atoms,enpara,st
CALL denCoeffs%init(atoms,sphhar,jsp_start,jsp_end) CALL denCoeffs%init(atoms,sphhar,jsp_start,jsp_end)
! The last entry in denCoeffsOffdiag%init is l_fmpl. It is meant as a switch to a plot of the full magnet. ! The last entry in denCoeffsOffdiag%init is l_fmpl. It is meant as a switch to a plot of the full magnet.
! density without the atomic sphere approximation for the magnet. density. It is not completely implemented (lo's missing). ! density without the atomic sphere approximation for the magnet. density. It is not completely implemented (lo's missing).
CALL denCoeffsOffdiag%init(atoms,noco,sphhar,.FALSE.) CALL denCoeffsOffdiag%init(atoms,noco,sphhar,noco%l_mtnocopot)
CALL force%init1(input,atoms) CALL force%init1(input,atoms)
CALL orb%init(atoms,noco,jsp_start,jsp_end) CALL orb%init(atoms,noco,jsp_start,jsp_end)
......
...@@ -34,7 +34,7 @@ SUBROUTINE genNewNocoInp(input,atoms,noco,noco_new) ...@@ -34,7 +34,7 @@ SUBROUTINE genNewNocoInp(input,atoms,noco,noco_new)
alphdiff = 2.0*pi_const*(noco%qss(1)*atoms%taual(1,iAtom) + & alphdiff = 2.0*pi_const*(noco%qss(1)*atoms%taual(1,iAtom) + &
noco%qss(2)*atoms%taual(2,iAtom) + & noco%qss(2)*atoms%taual(2,iAtom) + &
noco%qss(3)*atoms%taual(3,iAtom) ) noco%qss(3)*atoms%taual(3,iAtom) )
noco_new%alph(iType) = noco%alph(iType) - alphdiff noco_new%alph(iType) = noco_new%alph(iType) - alphdiff
DO WHILE (noco_new%alph(iType) > +pi_const) DO WHILE (noco_new%alph(iType) > +pi_const)
noco_new%alph(iType)= noco_new%alph(iType) - 2.0*pi_const noco_new%alph(iType)= noco_new%alph(iType) - 2.0*pi_const
END DO END DO
...@@ -42,12 +42,12 @@ SUBROUTINE genNewNocoInp(input,atoms,noco,noco_new) ...@@ -42,12 +42,12 @@ SUBROUTINE genNewNocoInp(input,atoms,noco,noco_new)
noco_new%alph(iType)= noco_new%alph(iType) + 2.0*pi_const noco_new%alph(iType)= noco_new%alph(iType) + 2.0*pi_const
END DO END DO
ELSE ELSE
noco_new%alph(iType) = noco%alph(iType) noco_new%alph(iType) = noco_new%alph(iType)
END IF END IF
iatom= iatom + atoms%neq(iType) iatom= iatom + atoms%neq(iType)
END DO END DO
OPEN (24,file='nocoinp',form='formatted', status='old') OPEN (24,file='nocoinp',form='formatted', status='unknown')
REWIND (24) REWIND (24)
CALL rw_noco_write(atoms,noco_new, input) CALL rw_noco_write(atoms,noco_new, input)
CLOSE (24) CLOSE (24)
......
...@@ -41,7 +41,7 @@ CONTAINS ...@@ -41,7 +41,7 @@ CONTAINS
! .. Intrinsic Functions .. ! .. Intrinsic Functions ..
INTRINSIC conjg INTRINSIC conjg
qal21=0.0
!---> l-decomposed density for each occupied state !---> l-decomposed density for each occupied state
states : DO i = 1, noccbd states : DO i = 1, noccbd
nt1 = 1 nt1 = 1
......
...@@ -62,7 +62,6 @@ CONTAINS ...@@ -62,7 +62,6 @@ CONTAINS
ENDIF ENDIF
!$OMP PARALLEL DEFAULT(none) & !$OMP PARALLEL DEFAULT(none) &
!$OMP SHARED(usdus,rho,moments,qmtl) & !$OMP SHARED(usdus,rho,moments,qmtl) &
!$OMP SHARED(atoms,jsp_start,jsp_end,enpara,vr,denCoeffs,sphhar)& !$OMP SHARED(atoms,jsp_start,jsp_end,enpara,vr,denCoeffs,sphhar)&
!$OMP SHARED(orb,noco,denCoeffsOffdiag,jspd)& !$OMP SHARED(orb,noco,denCoeffsOffdiag,jspd)&
......
...@@ -22,10 +22,10 @@ CONTAINS ...@@ -22,10 +22,10 @@ CONTAINS
! .. Array Arguments .. ! .. Array Arguments ..
REAL, INTENT(IN) :: we(:)!(nobd) REAL, INTENT(IN) :: we(:)!(nobd)
COMPLEX, INTENT(INOUT) :: uu21(atoms%lmaxd,atoms%ntype) COMPLEX, INTENT(INOUT) :: uu21(0:atoms%lmaxd,atoms%ntype)
COMPLEX, INTENT(INOUT) :: ud21(atoms%lmaxd,atoms%ntype) COMPLEX, INTENT(INOUT) :: ud21(0:atoms%lmaxd,atoms%ntype)
COMPLEX, INTENT(INOUT) :: du21(atoms%lmaxd,atoms%ntype) COMPLEX, INTENT(INOUT) :: du21(0:atoms%lmaxd,atoms%ntype)
COMPLEX, INTENT(INOUT) :: dd21(atoms%lmaxd,atoms%ntype) COMPLEX, INTENT(INOUT) :: dd21(0:atoms%lmaxd,atoms%ntype)
COMPLEX, INTENT(INOUT) :: uulo21(atoms%nlod,atoms%ntype) COMPLEX, INTENT(INOUT) :: uulo21(atoms%nlod,atoms%ntype)
COMPLEX, INTENT(INOUT) :: dulo21(atoms%nlod,atoms%ntype) COMPLEX, INTENT(INOUT) :: dulo21(atoms%nlod,atoms%ntype)
COMPLEX, INTENT(INOUT) :: ulou21(atoms%nlod,atoms%ntype) COMPLEX, INTENT(INOUT) :: ulou21(atoms%nlod,atoms%ntype)
......
...@@ -55,17 +55,17 @@ io/calculator.f global/ss_sym.f global/soc_sym.f math/inv3.f io/rw_symfile.f ...@@ -55,17 +55,17 @@ io/calculator.f global/ss_sym.f global/soc_sym.f math/inv3.f io/rw_symfile.f
kpoints/kptgen_hybrid.f kpoints/od_kptsgen.f kpoints/bravais.f kpoints/divi.f kpoints/brzone.f kpoints/kptgen_hybrid.f kpoints/od_kptsgen.f kpoints/bravais.f kpoints/divi.f kpoints/brzone.f
kpoints/kptmop.f kpoints/kpttet.f init/bandstr1.F kpoints/ordstar.f kpoints/fulstar.f kpoints/kprep.f kpoints/kptmop.f kpoints/kpttet.f init/bandstr1.F kpoints/ordstar.f kpoints/fulstar.f kpoints/kprep.f
kpoints/tetcon.f kpoints/kvecon.f init/boxdim.f global/radsra.f math/intgr.F global/differ.f math/inwint.f kpoints/tetcon.f kpoints/kvecon.f init/boxdim.f global/radsra.f math/intgr.F global/differ.f math/inwint.f
math/outint.f xc-pot/gaunt.f math/grule.f math/outint.f math/grule.f
) )
set(inpgen_F90 ${inpgen_F90} global/constants.f90 io/xsf_io.f90 set(inpgen_F90 ${inpgen_F90} global/constants.f90 io/xsf_io.f90
eigen/orthoglo.F90 juDFT/usage_data.F90 math/ylm4.F90 eigen/orthoglo.F90 juDFT/usage_data.F90 math/ylm4.F90
global/sort.f90 global/chkmt.f90 inpgen/inpgen.f90 inpgen/set_inp.f90 inpgen/inpgen_help.f90 io/rw_inp.f90 juDFT/juDFT.F90 global/find_enpara.f90 global/sort.f90 global/chkmt.f90 inpgen/inpgen.f90 inpgen/set_inp.f90 inpgen/inpgen_help.f90 io/rw_inp.f90 juDFT/juDFT.F90 global/find_enpara.f90
inpgen/closure.f90 inpgen/inpgen_arguments.F90 inpgen/closure.f90 inpgen/inpgen_arguments.F90
juDFT/info.F90 juDFT/stop.F90 juDFT/args.F90 juDFT/time.F90 juDFT/init.F90 juDFT/sysinfo.F90 io/w_inpXML.f90 kpoints/julia.f90 global/utility.F90 juDFT/info.F90 juDFT/stop.F90 juDFT/args.F90 juDFT/time.F90 juDFT/init.F90 juDFT/sysinfo.F90 juDFT/string.f90 io/w_inpXML.f90 kpoints/julia.f90 global/utility.F90
init/compile_descr.F90 kpoints/kpoints.f90 io/xmlOutput.F90 kpoints/brzone2.f90 cdn/slab_dim.f90 cdn/slabgeom.f90 dos/nstm3.f90 cdn/int_21.f90 init/compile_descr.F90 kpoints/kpoints.f90 io/xmlOutput.F90 kpoints/brzone2.f90 cdn/slab_dim.f90 cdn/slabgeom.f90 dos/nstm3.f90 cdn/int_21.f90
cdn/int_21lo.f90 cdn_mt/rhomt21.f90 cdn_mt/rhonmt21.f90 force/force_a21.F90 force/force_a21_lo.f90 force/force_a21_U.f90 force/force_a12.f90 cdn/int_21lo.f90 cdn_mt/rhomt21.f90 cdn_mt/rhonmt21.f90 force/force_a21.F90 force/force_a21_lo.f90 force/force_a21_U.f90 force/force_a12.f90
eigen/tlmplm_store.F90 kpoints/unfoldBandKPTS.f90) eigen/tlmplm_store.F90 xc-pot/gaunt.f90 kpoints/unfoldBandKPTS.f90)
set(fleur_SRC ${fleur_F90} ${fleur_F77}) set(fleur_SRC ${fleur_F90} ${fleur_F77})
......
...@@ -12,7 +12,7 @@ if (${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") ...@@ -12,7 +12,7 @@ if (${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mkl -r8 -qopenmp -assume byterecl") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mkl -r8 -qopenmp -assume byterecl")
endif() endif()
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -xHost -O2 -g") set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -xHost -O2 -g")
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -ftrapuv -check uninit -check pointers -CB ") set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -ftrapuv -check uninit -check pointers -CB -DCPP_DEBUG")
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI") elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
message("PGI Fortran detected") message("PGI Fortran detected")
set(CMAKE_SHARED_LIBRARY_LINK_Fortran_FLAGS "") #fix problem in cmake set(CMAKE_SHARED_LIBRARY_LINK_Fortran_FLAGS "") #fix problem in cmake
...@@ -24,12 +24,12 @@ elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI") ...@@ -24,12 +24,12 @@ elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda:cuda9.0,cc70 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc") #set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda:cuda9.0,cc70 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc")
#set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fast -O3") #set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fast -O3")
set(CMAKE_Fortran_FLAGS_RELEASE "-O1 ") # to prevent cmake from putting -fast which auses problems with PGI18.4 set(CMAKE_Fortran_FLAGS_RELEASE "-O1 ") # to prevent cmake from putting -fast which auses problems with PGI18.4
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -Mchkstk -Mchkptr -Ktrap=fp") set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -Mchkstk -Mchkptr -Ktrap=fp -DCPP_DEBUG")
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "XL") elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "XL")
message("IBM/BG Fortran detected") message("IBM/BG Fortran detected")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qsmp=omp -qnosave -qarch=qp -qtune=qp -qrealsize=8 -qfixed -qsuppress=1520-022 -qessl") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qsmp=omp -qnosave -qarch=qp -qtune=qp -qrealsize=8 -qfixed -qsuppress=1520-022 -qessl")
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O4 -qsuppress=1500-036") set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O4 -qsuppress=1500-036")
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -g") set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -O0 -g -DCPP_DEBUG")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I/bgsys/local/libxml2/include/libxml2") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I/bgsys/local/libxml2/include/libxml2")
set(FLEUR_DEFINITIONS ${FLEUR_DEFINITIONS} "CPP_AIX") set(FLEUR_DEFINITIONS ${FLEUR_DEFINITIONS} "CPP_AIX")
set(FLEUR_MPI_DEFINITIONS ${FLEUR_MPI_DEFINITIONS} "CPP_AIX") set(FLEUR_MPI_DEFINITIONS ${FLEUR_MPI_DEFINITIONS} "CPP_AIX")
...@@ -39,6 +39,6 @@ elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") ...@@ -39,6 +39,6 @@ elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
message(FATAL_ERROR "Only modern versions of gfortran >6.3 will be able to compile FLEUR\nYou need to specify a different compiler.\nSee the docs at www.flapw.de.\n") message(FATAL_ERROR "Only modern versions of gfortran >6.3 will be able to compile FLEUR\nYou need to specify a different compiler.\nSee the docs at www.flapw.de.\n")
endif() endif()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-line-length-none -fopenmp -fdefault-real-8 ") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffree-line-length-none -fopenmp -fdefault-real-8 ")
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O1") set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O2")
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fdump-core -Wall -Wextra -Warray-temporaries -fbacktrace -fcheck=all -finit-real=nan -O0 -g") set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fdump-core -Wall -Wextra -Warray-temporaries -fbacktrace -fcheck=all -finit-real=nan -O0 -g -DCPP_DEBUG")
endif() endif()
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
echo "set(compile_user $USER)" >config.cmake echo "set(compile_user $USER)" >config.cmake
#Some freqeuntly used Environment variables #Some freqeuntly used Environment variables
if [ -n "$HDF5_ROOT" ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_ROOT/lib" ; FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_ROOT/include" ; fi if [ ! -z ${HDF5_ROOT+x} ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_ROOT/lib" ; FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_ROOT/include" ; fi
if [ -n "$HDF5_DIR"} ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_DIR/lib" ; FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_DIR/include" ; fi if [ ! -z ${HDF5_DIR+x} ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_DIR/lib" ; FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_DIR/include" ; fi
if [ -n "$HDF5_LIB" ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_LIB" ; fi if [ ! -z ${HDF5_LIB+x} ] ; then FLEUR_LIBDIR="$FLEUR_LIBDIR $HDF5_LIB" ; fi
if [ -n "$HDF5_INCLUDE" ] ; then FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_INCLUDE" ; fi if [ ! -z ${HDF5_INCLUDE+x} ] ; then FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_INCLUDE" ; fi
if [ -n "$HDF5_MODULES" ] ; then FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_MODULES" ; fi if [ ! -z ${HDF5_MODULES+x} ] ; then FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $HDF5_MODULES" ; fi
......
This diff is collapsed.
...@@ -10,13 +10,13 @@ diagonalization/symmetrize_matrix.f90 ...@@ -10,13 +10,13 @@ diagonalization/symmetrize_matrix.f90
diagonalization/cusolver_diag.F90 diagonalization/cusolver_diag.F90
diagonalization/elemental.F90 diagonalization/elemental.F90
diagonalization/elpa_20180525_onenode.F90) diagonalization/elpa_20180525_onenode.F90)
if (FLEUR_USE_ELPA_20180525) #if (FLEUR_USE_ELPA_20180525)
set(fleur_F90 ${fleur_F90} # set(fleur_F90 ${fleur_F90}
diagonalization/elpa_20180525.F90 # diagonalization/elpa_20180525.F90
) #)
else() #else()
set(fleur_F90 ${fleur_F90} set(fleur_F90 ${fleur_F90}
diagonalization/elpa.F90 diagonalization/elpa.F90
) )
endif() #endif()
...@@ -88,7 +88,7 @@ CONTAINS ...@@ -88,7 +88,7 @@ CONTAINS
END SUBROUTINE chase_distance END SUBROUTINE chase_distance
#ifdef CPP_CHASE #ifdef CPP_CHASE
SUBROUTINE init_chase(mpi,DIMENSION,atoms,kpts,noco,l_real) SUBROUTINE init_chase(mpi,DIMENSION,input,atoms,kpts,noco,l_real)
USE m_types_mpimat USE m_types_mpimat
USE m_types USE m_types
USE m_types_mpi USE m_types_mpi
...@@ -99,6 +99,7 @@ CONTAINS ...@@ -99,6 +99,7 @@ CONTAINS
TYPE(t_mpi), INTENT(IN) :: mpi TYPE(t_mpi), INTENT(IN) :: mpi
TYPE(t_dimension), INTENT(IN) :: dimension TYPE(t_dimension), INTENT(IN) :: dimension
TYPE(t_input), INTENT(IN) :: input
TYPE(t_atoms), INTENT(IN) :: atoms TYPE(t_atoms), INTENT(IN) :: atoms
TYPE(t_kpts), INTENT(IN) :: kpts TYPE(t_kpts), INTENT(IN) :: kpts
TYPE(t_noco), INTENT(IN) :: noco TYPE(t_noco), INTENT(IN) :: noco
...@@ -472,6 +473,7 @@ CONTAINS ...@@ -472,6 +473,7 @@ CONTAINS
EXTERNAL blacs_pinfo, blacs_gridinit EXTERNAL blacs_pinfo, blacs_gridinit
INTEGER,EXTERNAL::numroc,indxl2g INTEGER,EXTERNAL::numroc,indxl2g
ALLOCATE(mat%blacsdata)
mat%blacsdata%mpi_com=hmat%blacsdata%mpi_com mat%blacsdata%mpi_com=hmat%blacsdata%mpi_com
mat%global_size1=hmat%global_size1 mat%global_size1=hmat%global_size1
mat%global_size2=hmat%global_size1 mat%global_size2=hmat%global_size1
......
...@@ -66,6 +66,7 @@ CONTAINS ...@@ -66,6 +66,7 @@ CONTAINS
USE m_types_gpumat USE m_types_gpumat
USE m_matrix_copy USE m_matrix_copy
USE m_cusolver_diag USE m_cusolver_diag
USE m_judft_usage
IMPLICIT NONE IMPLICIT NONE
#ifdef CPP_MPI #ifdef CPP_MPI
include 'mpif.h' include 'mpif.h'
...@@ -108,6 +109,7 @@ CONTAINS ...@@ -108,6 +109,7 @@ CONTAINS
CALL timestart("Diagonalization") CALL timestart("Diagonalization")
!Select the solver !Select the solver
CALL add_usage_data("diag-solver", priv_select_solver(parallel))
SELECT CASE (priv_select_solver(parallel)) SELECT CASE (priv_select_solver(parallel))
CASE (diag_elpa) CASE (diag_elpa)
CALL elpa_diag(hmat,smat,ne,eig,ev) CALL elpa_diag(hmat,smat,ne,eig,ev)
......
...@@ -88,7 +88,6 @@ CONTAINS ...@@ -88,7 +88,6 @@ CONTAINS
#else #else
CALL get_elpa_row_col_comms(hmat%blacsdata%mpi_com, hmat%blacsdata%myrow, hmat%blacsdata%mycol,mpi_comm_rows, mpi_comm_cols) CALL get_elpa_row_col_comms(hmat%blacsdata%mpi_com, hmat%blacsdata%myrow, hmat%blacsdata%mycol,mpi_comm_rows, mpi_comm_cols)
#endif #endif
!print *,"creating ELPA comms -- done"
num2=ne !no of states solved for num2=ne !no of states solved for
...@@ -130,13 +129,16 @@ CONTAINS ...@@ -130,13 +129,16 @@ CONTAINS
print *, "elpa uses " // elpa_int_value_to_string("complex_kernel", kernel) // " kernel" print *, "elpa uses " // elpa_int_value_to_string("complex_kernel", kernel) // " kernel"
endif endif
#endif #endif
!print *,"Before elpa"
!ELPA -start here ! Solve generalized problem
! Solive generalized preblem
! !
! 1. Calculate Cholesky factorization of Matrix S = U**T * U ! 1. Calculate Cholesky factorization of Matrix S = U**T * U
! and invert triangular matrix U ! and invert triangular matrix U.
! Cholesky factorization:
! Only upper triangle needs to be set. On return, the upper triangle contains
! the Cholesky factor and the lower triangle is set to 0.
! invert_triangular:
! Inverts an upper triangular real or complex matrix.
! !
! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed. ! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
! The only reason having them is that the Scalapack counterpart ! The only reason having them is that the Scalapack counterpart
...@@ -183,6 +185,7 @@ CONTAINS ...@@ -183,6 +185,7 @@ CONTAINS
! H is only set in the upper half, solve_evp_real needs a full matrix ! H is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half ! Set lower half from upper half
! Set the lower half of the H matrix to zeros.
DO i=1,hmat%matsize2 DO i=1,hmat%matsize2
! Get global column corresponding to i and number of local rows up to ! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in H ! and including the diagonal, these are unchanged in H
...@@ -195,7 +198,7 @@ CONTAINS ...@@ -195,7 +198,7 @@ CONTAINS
ENDIF ENDIF
ENDDO ENDDO
! Use the ev_dist array to store the calculated values for the lower part.
IF (hmat%l_real) THEN IF (hmat%l_real) THEN
CALL pdtran(hmat%global_size1,hmat%global_size1,1.d0,hmat%data_r,1,1,& CALL pdtran(hmat%global_size1,hmat%global_size1,1.d0,hmat%data_r,1,1,&
hmat%blacsdata%blacs_desc,0.d0,ev_dist%data_r,1,1,ev_dist%blacsdata%blacs_desc) hmat%blacsdata%blacs_desc,0.d0,ev_dist%data_r,1,1,ev_dist%blacsdata%blacs_desc)
...@@ -204,7 +207,7 @@ CONTAINS ...@@ -204,7 +207,7 @@ CONTAINS
hmat%blacsdata%blacs_desc,cmplx(0.d0,0.d0),ev_dist%data_c,1,1,ev_dist%blacsdata%blacs_desc) hmat%blacsdata%blacs_desc,cmplx(0.d0,0.d0),ev_dist%data_c,1,1,ev_dist%blacsdata%blacs_desc)
ENDIF ENDIF
! Copy the calculated values to the lower part of the H matrix
DO i=1,hmat%matsize2 DO i=1,hmat%matsize2
! Get global column corresponding to i and number of local rows up to ! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in H ! and including the diagonal, these are unchanged in H
...@@ -254,7 +257,7 @@ CONTAINS ...@@ -254,7 +257,7 @@ CONTAINS
ENDIF ENDIF
#endif #endif
! 2b. tmp2 = eigvec**T ! 2b. tmp2 = ev_dist**T
IF (hmat%l_real) THEN IF (hmat%l_real) THEN
CALL pdtran(ev_dist%global_size1,ev_dist%global_size1,1.d0,ev_dist%data_r,1,1,& CALL pdtran(ev_dist%global_size1,ev_dist%global_size1,1.d0,ev_dist%data_r,1,1,&
ev_dist%blacsdata%blacs_desc,0.d0,tmp2_r,1,1,ev_dist%blacsdata%blacs_desc) ev_dist%blacsdata%blacs_desc,0.d0,tmp2_r,1,1,ev_dist%blacsdata%blacs_desc)
...@@ -325,7 +328,7 @@ CONTAINS ...@@ -325,7 +328,7 @@ CONTAINS
ENDDO ENDDO
! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1 ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
! Eigenvectors go to eigvec ! Eigenvectors go to ev_dist
#if defined (CPP_ELPA_201705003) #if defined (CPP_ELPA_201705003)
IF (hmat%l_real) THEN IF (hmat%l_real) THEN
CALL elpa_obj%eigenvectors(hmat%data_r, eig2, ev_dist%data_r, err) CALL elpa_obj%eigenvectors(hmat%data_r, eig2, ev_dist%data_r, err)
...@@ -389,7 +392,7 @@ CONTAINS ...@@ -389,7 +392,7 @@ CONTAINS
#endif #endif
! 4. Backtransform eigenvectors: Z = U**-1 * eigvec ! 4. Backtransform eigenvectors: Z = U**-1 * ev_dist
! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T ! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
IF (hmat%l_real) THEN IF (hmat%l_real) THEN
......
...@@ -71,6 +71,7 @@ CONTAINS ...@@ -71,6 +71,7 @@ CONTAINS
CALL elpa_obj%set("process_row", hmat%blacsdata%myrow, err) CALL elpa_obj%set("process_row", hmat%blacsdata%myrow, err)
CALL elpa_obj%set("process_col", hmat%blacsdata%mycol, err) CALL elpa_obj%set("process_col", hmat%blacsdata%mycol, err)
CALL elpa_obj%set("blacs_context", hmat%blacsdata%blacs_desc(2), err) CALL elpa_obj%set("blacs_context", hmat%blacsdata%blacs_desc(2), err)
CALL elpa_obj%set("solver", ELPA_SOLVER_2STAGE)
err = elpa_obj%setup() err = elpa_obj%setup()
CALL hmat%generate_full_matrix() CALL hmat%generate_full_matrix()
......
...@@ -28,6 +28,9 @@ CONTAINS ...@@ -28,6 +28,9 @@ CONTAINS
USE m_types USE m_types
#ifdef CPP_ELPA_ONENODE #ifdef CPP_ELPA_ONENODE
USE elpa USE elpa
#endif
#ifdef CPP_GPU
USE nvtx
#endif #endif
IMPLICIT NONE IMPLICIT NONE
...@@ -45,7 +48,9 @@ CONTAINS ...@@ -45,7 +48,9 @@ CONTAINS
INTEGER :: kernel INTEGER :: kernel
CLASS(elpa_t),pointer :: elpa_obj CLASS(elpa_t),pointer :: elpa_obj
print*, "ELPA 20180525 started" #ifdef CPP_GPU
call nvtxStartRange("ELPA",5)
#endif
err = elpa_init(20180525) err = elpa_init(20180525)
elpa_obj => elpa_allocate() elpa_obj => elpa_allocate()
...@@ -65,15 +70,23 @@ CONTAINS ...@@ -65,15 +70,23 @@ CONTAINS
CALL elpa_obj%set("gpu",1,err) CALL elpa_obj%set("gpu",1,err)
#endif #endif
err = elpa_obj%setup() err = elpa_obj%setup()
call elpa_obj%get("solver", kernel)
print *, "elpa uses " // elpa_int_value_to_string("solver", kernel) // " solver"
CALL hmat%add_transpose(hmat) CALL hmat%add_transpose(hmat)
CALL smat%add_transpose(smat) CALL smat%add_transpose(smat)
#ifdef CPP_GPU
call nvtxStartRange("EigVec",7)
#endif
IF (hmat%l_real) THEN IF (hmat%l_real) THEN
CALL elpa_obj%generalized_eigenvectors(hmat%data_r,smat%data_r,eig2, ev_dist%data_r, .FALSE.,err) CALL elpa_obj%generalized_eigenvectors(hmat%data_r,smat%data_r,eig2, ev_dist%data_r, .FALSE.,err)
ELSE ELSE
CALL elpa_obj%generalized_eigenvectors(hmat%data_c,smat%data_c,eig2, ev_dist%data_c, .FALSE., err) CALL elpa_obj%generalized_eigenvectors(hmat%data_c,smat%data_c,eig2, ev_dist%data_c, .FALSE., err)
ENDIF ENDIF
#ifdef CPP_GPU
call nvtxEndRange!("EigVec",8)
#endif
CALL elpa_deallocate(elpa_obj) CALL elpa_deallocate(elpa_obj)
CALL elpa_uninit() CALL elpa_uninit()
...@@ -86,6 +99,9 @@ CONTAINS ...@@ -86,6 +99,9 @@ CONTAINS
CALL ev%alloc(hmat%l_real,hmat%matsize1,ne) CALL ev%alloc(hmat%l_real,hmat%matsize1,ne)
CALL ev%copy(ev_dist,1,1) CALL ev%copy(ev_dist,1,1)
#ifdef CPP_GPU
call nvtxEndRange!("ELPA",7)
#endif
#endif #endif
END SUBROUTINE elpa_diag_onenode END SUBROUTINE elpa_diag_onenode
......
...@@ -95,8 +95,10 @@ CONTAINS ...@@ -95,8 +95,10 @@ CONTAINS
! In the parallel case also a redistribution happens ! In the parallel case also a redistribution happens
ALLOCATE(smat_final,mold=smat(1,1)) ALLOCATE(smat_final,mold=smat(1,1))
ALLOCATE(hmat_final,mold=smat(1,1)) ALLOCATE(hmat_final,mold=smat(1,1))
CALL timestart("Matrix redistribution")
CALL eigen_redist_matrix(mpi,lapw,atoms,smat,smat_final) CALL eigen_redist_matrix(mpi,lapw,atoms,smat,smat_final)
CALL eigen_redist_matrix(mpi,lapw,atoms,hmat,hmat_final,smat_final) CALL eigen_redist_matrix(mpi,lapw,atoms,hmat,hmat_final,smat_final)
CALL timestop("Matrix redistribution")
END SUBROUTINE eigen_hssetup END SUBROUTINE eigen_hssetup
END MODULE m_eigen_hssetup END MODULE m_eigen_hssetup
......
...@@ -92,7 +92,7 @@ CONTAINS ...@@ -92,7 +92,7 @@ CONTAINS
jspin=jsp jspin=jsp
CALL vacfun(& CALL vacfun(&
vacuum,DIMENSION,stars,& vacuum,DIMENSION,stars,&
jsp,input,noco,ipot,& jsp,input,noco,jspin1,jspin2,&
sym, cell,ivac,evac(1,1),lapw%bkpt,v%vacxy(:,:,ivac,ipot),v%vacz(:,:,:),kvac1,kvac2,nv2,& sym, cell,ivac,evac(1,1),lapw%bkpt,v%vacxy(:,:,ivac,ipot),v%vacz(:,:,:),kvac1,kvac2,nv2,&
tuuv,tddv,tudv,tduv,uz,duz,udz,dudz,ddnv,wronk) tuuv,tddv,tudv,tduv,uz,duz,udz,dudz,ddnv,wronk)
! !
......
...@@ -31,7 +31,7 @@ CONTAINS ...@@ -31,7 +31,7 @@ CONTAINS
REAL dvu(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd ) REAL dvu(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd )
REAL uvd(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd ) REAL uvd(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd )
REAL uvu(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd ) REAL uvu(0:atoms%lmaxd*(atoms%lmaxd+3)/2,0:sphhar%nlhd )
REAL f(atoms%jmtd,2,0:atoms%lmaxd),g(atoms%jmtd,2,0:atoms%lmaxd),x(atoms%jmtd) REAL f(atoms%jmtd,2,0:atoms%lmaxd,2),g(atoms%jmtd,2,0:atoms%lmaxd,2),x(atoms%jmtd)
REAL flo(atoms%jmtd,2,atoms%nlod) REAL flo(atoms%jmtd,2,atoms%nlod)
INTEGER:: indt(0:SIZE(td%tuu,1)-1) INTEGER:: indt(0:SIZE(td%tuu,1)-1)
...@@ -40,14 +40,21 @@ CONTAINS ...@@ -40,14 +40,21 @@ CONTAINS
COMPLEX :: cil COMPLEX :: cil
REAL :: temp REAL :: temp
INTEGER i,l,l2,lamda,lh,lm,lmin,lmin0,lmp,lmpl,lmplm,lmx,lmxx,lp,info,in INTEGER i,l,l2,lamda,lh,lm,lmin,lmin0,lmp,lmpl,lmplm,lmx,lmxx,lp,info,in
INTEGER lp1,lpl ,mem,mems,mp,mu,nh,na,m,nsym,s,i_u INTEGER lp1,lpl ,mem,mems,mp,mu,nh,na,m,nsym,s,i_u,jspin1,jspin2
vr0=v%mt(:,:,n,jsp) vr0=v%mt(:,:,n,jsp)
IF (jsp<3) vr0(:,0)=0.0 IF (jsp<3) vr0(:,0)=0.0
DO i=MERGE(1,jspin,jspin>2),MERGE(2,jspin,jspin>2)
CALL genMTBasis(atoms,enpara,v,mpi,n,jspin,ud,f,g,flo) CALL genMTBasis(atoms,enpara,v,mpi,n,i,ud,f(:,:,:,i),g(:,:,:,i),flo)
ENDDO
IF (jspin>2) THEN
jspin1=1
jspin2=2
ELSE
jspin1=jspin;jspin2=jspin
END IF
na=SUM(atoms%neq(:n-1))+1 na=SUM(atoms%neq(:n-1))+1
nsym = atoms%ntypsy(na) nsym = atoms%ntypsy(na)
nh = sphhar%nlh(nsym) nh = sphhar%nlh(nsym)
...@@ -73,22 +80,22 @@ CONTAINS ...@@ -73,22 +80,22 @@ CONTAINS
dvu(lpl,lh) = 0.0 dvu(lpl,lh) = 0.0
ELSE ELSE
DO i = 1,atoms%jri(n) DO i = 1,atoms%jri(n)
x(i) = (f(i,1,lp)*f(i,1,l)+f(i,2,lp)*f(i,2,l))* vr0(i,lh) x(i) = (f(i,1,lp,jspin1)*f(i,1,l,jspin2)+f(i,2,lp,jspin1)*f(i,2,l,jspin2))* vr0(i,lh)
END DO END DO
CALL intgr3(x,atoms%rmsh(1,n),atoms%dx(n),atoms%jri(n),temp) CALL intgr3(x,atoms%rmsh(1,n),atoms%dx(n),atoms%jri(n),temp)
uvu(lpl,lh) = temp uvu(lpl,lh) = temp
DO i = 1,atoms%jri(n) DO i = 1,atoms%jri(n)
x(i) = (g(i,1,lp)*f(i,1,l)+g(i,2,lp)*f(i,2,l))* vr0(i,lh) x(i) = (g(i,1,lp,jspin1)*f(i,1,l,jspin2)+g(i,2,lp,jspin1)*f(i,2,l,jspin2))* vr0(i,lh)
END DO END DO