Commit 90443a2b authored by Matthias Redies's avatar Matthias Redies

merge develop

parents 8b9847cc 6339d468
......@@ -31,7 +31,9 @@ test-gfortran:
script:
- ulimit -s unlimited ;export juDFT_MPI="mpirun -n 2 --allow-run-as-root ";cd /builds/fleur/fleur/build;ctest
artifacts:
when: on_failure
paths:
- build/Testing/failed
- build/Testing/test.oldlogs
# only:
# - schedules
......@@ -98,7 +100,7 @@ build-intel:
- build.intel
script:
- set +e && source compilervars.sh intel64 && set -e ; ulimit -s unlimited
- cd /builds/fleur/fleur; FC=mpiifort FLEUR_LIBRARIES="-lmkl_scalapack_lp64;-lmkl_blacs_intelmpi_lp64" ./configure.sh -l intel AUTO ; cd build.intel; make
- cd /builds/fleur/fleur; FC=mpiifort FLEUR_LIBRARIES="-lmkl_scalapack_lp64;-lmkl_blacs_intelmpi_lp64" ./configure.sh -t -l intel AUTO ; cd build.intel; make
only:
- schedules
- triggers
......@@ -114,6 +116,11 @@ test-intel:
script:
- set +e && source compilervars.sh intel64 && set -e; ulimit -s unlimited
- cd /builds/fleur/fleur/build.intel;ctest
artifacts:
when: on_failure
paths:
- build/Testing/failed
- build/Testing/test.oldlogs
only:
- schedules
- web
......
......@@ -82,7 +82,7 @@ CONTAINS
ENDDO
END IF
! -----is region
IF (.not.judft_was_Argument("-oldfix")) THEN
IF (.FALSE.) THEN !Change this for old way of determination of int-charge
CALL convol(stars,x,den%pw(:,jspin),stars%ufft)
qis = x(1)*cell%omtil
ELSE
......
......@@ -221,7 +221,7 @@ SUBROUTINE cdnval(eig_id, mpi,kpts,jspin,noco,input,banddos,cell,atoms,enpara,st
! valence density in the vacuum region
IF (input%film) THEN
CALL vacden(vacuum,dimension,stars,oneD, kpts,input,sym,cell,atoms,noco,banddos,&
gVacMap,we,ikpt,jspin,vTot%vacz(:,:,jspin),noccbd,lapw,enpara%evac0,eig,den,zMat,dos)
gVacMap,we,ikpt,jspin,vTot%vacz(:,:,jspin),noccbd,lapw,enpara%evac,eig,den,zMat,dos)
END IF
END IF
IF (input%film) CALL regCharges%sumBandsVac(vacuum,dos,noccbd,ikpt,jsp_start,jsp_end,eig,we)
......
......@@ -37,6 +37,7 @@ include(wannier/CMakeLists.txt)
include(wannier/uhu/CMakeLists.txt)
include(forcetheorem/CMakeLists.txt)
include(rdmft/CMakeLists.txt)
include(kpoints/CMakeLists.txt)
include(tests/tests_old.cmake)
......@@ -48,19 +49,20 @@ inpgen/atom_sym.f inpgen/generator.f inpgen/read_record.f inpgen/soc_or_ssdw.f i
inpgen/bravais_symm.f inpgen/set_atom_core.f inpgen/spg_gen.f global/triang.f
inpgen/lapw_input.f inpgen/struct_input.f inpgen/write_struct.f
io/calculator.f global/ss_sym.f global/soc_sym.f math/inv3.f io/rw_symfile.f
global/sort.f init/kptgen_hybrid.f init/od_kptsgen.f init/bravais.f init/divi.f init/brzone.f
init/kptmop.f init/kpttet.f init/bandstr1.F init/ordstar.f init/fulstar.f init/kprep.f
init/tetcon.f init/kvecon.f init/boxdim.f math/ylm4.f global/radsra.f math/intgr.F global/differ.f math/inwint.f
global/sort.f kpoints/kptgen_hybrid.f kpoints/od_kptsgen.f kpoints/bravais.f kpoints/divi.f kpoints/brzone.f
kpoints/kptmop.f kpoints/kpttet.f init/bandstr1.F kpoints/ordstar.f kpoints/fulstar.f kpoints/kprep.f
kpoints/tetcon.f kpoints/kvecon.f init/boxdim.f math/ylm4.f global/radsra.f math/intgr.F global/differ.f math/inwint.f
math/outint.f xc-pot/gaunt.f math/grule.f
)
set(inpgen_F90 ${inpgen_F90} global/constants.f90 io/xsf_io.f90
eigen/vec_for_lo.f90 eigen/orthoglo.F90 juDFT/usage_data.F90
global/enpara.f90 global/chkmt.f90 inpgen/inpgen.f90 inpgen/set_inp.f90 inpgen/inpgen_help.f90 io/rw_inp.f90 juDFT/juDFT.F90 global/find_enpara.f90 inpgen/closure.f90
juDFT/info.F90 juDFT/stop.F90 juDFT/args.F90 juDFT/time.F90 juDFT/init.F90 juDFT/sysinfo.F90 io/w_inpXML.f90 init/julia.f90 global/utility.F90
init/compile_descr.F90 init/kpoints.f90 io/xmlOutput.F90 init/brzone2.f90 cdn/slab_dim.f90 cdn/slabgeom.f90 dos/nstm3.f90 cdn/int_21.f90
global/enpara.f90 global/chkmt.f90 inpgen/inpgen.f90 inpgen/set_inp.f90 inpgen/inpgen_help.f90 io/rw_inp.f90 juDFT/juDFT.F90 global/find_enpara.f90
inpgen/closure.f90 inpgen/inpgen_arguments.F90
juDFT/info.F90 juDFT/stop.F90 juDFT/args.F90 juDFT/time.F90 juDFT/init.F90 juDFT/sysinfo.F90 io/w_inpXML.f90 kpoints/julia.f90 global/utility.F90
init/compile_descr.F90 kpoints/kpoints.f90 io/xmlOutput.F90 kpoints/brzone2.f90 cdn/slab_dim.f90 cdn/slabgeom.f90 dos/nstm3.f90 cdn/int_21.f90
cdn/int_21lo.f90 cdn_mt/rhomt21.f90 cdn_mt/rhonmt21.f90 force/force_a21.F90 force/force_a21_lo.f90 force/force_a21_U.f90 force/force_a12.f90
eigen/tlmplm_store.F90 init/unfoldBandKPTS.f90)
eigen/tlmplm_store.F90 kpoints/unfoldBandKPTS.f90)
set(fleur_SRC ${fleur_F90} ${fleur_F77})
......
......@@ -16,9 +16,15 @@ if (${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
message("PGI Fortran detected")
set(CMAKE_SHARED_LIBRARY_LINK_Fortran_FLAGS "") #fix problem in cmake
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda:kepler+ -ta:tesla:cuda7.5 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc")
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fast -O3")
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -Mchkstk -Mchkptr")
#CPU
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics")
#GPU
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda=cuda9.0,cc60 -Mcudalib=cublas")
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda:kepler+ -ta:tesla:cuda7.5 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc")
#set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -mp -Mr8 -Mr8intrinsics -Mcuda:cuda9.0,cc70 -DUSE_STREAMS -DNUM_STREAMS=${N_STREAMS} -Minfo=accel -acc")
#set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fast -O3")
set(CMAKE_Fortran_FLAGS_RELEASE "-O1 ") # to prevent cmake from putting -fast which auses problems with PGI18.4
set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C -traceback -O0 -g -Mchkstk -Mchkptr -Ktrap=fp")
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "XL")
message("IBM/BG Fortran detected")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qsmp=omp -qnosave -qarch=qp -qtune=qp -qrealsize=8 -qfixed -qsuppress=1520-022 -qessl")
......
#Configuration for CLAIX@RWTH; do a 'module load intelmpi' before
if ! module list 2>&1| grep -q intelmpi
if module list 2>&1 | grep -q intel
then
if module list 2>&1| grep -q openmpi
then
echo "Please use intelmpi, e.g. do a module switch openmpi intelmpi"
exit
fi
export FC=$MPIFC
export CC=$MPICC
if module list 2>&1| grep -q intelmpi
then
export FC=$MPIFC
export CC=$MPICC
fi
#ELPA
if [ $ELPA_MODULES ]
then
......@@ -16,5 +19,7 @@
FLEUR_LIBDIR="$FLEUR_LIBDIR $ELPA_LIB"
FLEUR_INCLUDEDIR="$FLEUR_INCLUDEDIR $ELPA_MODULES"
fi
elif module list 2>&1 | grep -q pgi
then
echo "found PGI compiler"
fi
......@@ -109,12 +109,12 @@ CONTAINS
scale_distance=1E-3
IF (judft_was_argument("-chase_scale")) THEN
arg=juDFT_string_for_argument("-chase_scale")
READ(arg,*) scale_distance
ENDIF
!IF (judft_was_argument("-chase_tol_scale")) THEN
! arg=juDFT_string_for_argument("-chase_tol_scale")
! READ(arg,*) scale_distance
!ENDIF
IF (juDFT_was_argument("-diag:chase")) THEN
IF (TRIM(juDFT_string_for_argument("-diag"))=="chase") THEN
nevd = min(dimension%neigd,dimension%nvd+atoms%nlotot)
nexd = min(max(nevd/4, 45),dimension%nvd+atoms%nlotot-nevd) !dimensioning for workspace
chase_eig_id=open_eig(mpi%mpi_comm,DIMENSION%nbasfcn,nevd+nexd,kpts%nkpt,DIMENSION%jspd,&
......
......@@ -65,7 +65,7 @@ CONTAINS
LOGICAL :: parallel
!For check-mode
TYPE(t_mat) :: s_store,h_store
SELECT TYPE(smat)
CLASS IS (t_mpimat)
......@@ -75,7 +75,7 @@ CONTAINS
END SELECT
!Create a copy of the matrix if in test mode
IF (judft_was_argument("-diag:test")) THEN
IF (TRIM(judft_string_for_argument("-diag"))=="test") THEN
SELECT TYPE(hmat)
CLASS IS (t_mpimat)
CALL s_store%init(hmat%l_real,hmat%global_size1,hmat%global_size2)
......@@ -112,7 +112,7 @@ CONTAINS
END SELECT
!Create test the solution
IF (judft_was_argument("-diag:test")) THEN
IF (TRIM(judft_string_for_argument("-diag"))=="test") THEN
CALL priv_test_solution(mpi,ne,s_store,h_store,eig,ev)
CALL judft_error("Diagonalization tested")
END IF
......@@ -223,13 +223,13 @@ CONTAINS
ENDIF
!check if a special solver was requested
IF (juDFT_was_argument("-diag:elpa")) diag_solver=diag_elpa
IF (juDFT_was_argument("-diag:scalapack")) diag_solver=diag_scalapack
IF (juDFT_was_argument("-diag:elemental")) diag_solver=diag_elemental
IF (juDFT_was_argument("-diag:lapack")) diag_solver=diag_lapack
IF (juDFT_was_argument("-diag:magma")) diag_solver=diag_magma
IF (juDFT_was_argument("-diag:chase")) diag_solver=diag_chase
IF (juDFT_was_argument("-diag:debugout")) diag_solver=diag_debugout
IF (TRIM(juDFT_string_for_argument("-diag"))=="elpa") diag_solver=diag_elpa
IF (trim(juDFT_string_for_argument("-diag"))=="scalapack") diag_solver=diag_scalapack
IF (trim(juDFT_string_for_argument("-diag"))=="elemental") diag_solver=diag_elemental
IF (trim(juDFT_string_for_argument("-diag"))=="lapack") diag_solver=diag_lapack
IF (trim(juDFT_string_for_argument("-diag"))=="magma") diag_solver=diag_magma
IF (trim(juDFT_string_for_argument("-diag"))=="chase") diag_solver=diag_chase
IF (trim(juDFT_string_for_argument("-diag"))=="debugout") diag_solver=diag_debugout
!Check if solver is possible
if (diag_solver<0) call priv_solver_error(diag_solver,parallel)
......
......@@ -80,7 +80,7 @@ CONTAINS
!Vacuum contributions
IF (input%film) THEN
CALL timestart("Vacuum part")
CALL hsvac(vacuum,stars,DIMENSION, atoms,mpi,isp,input,v,enpara%evac0,cell,&
CALL hsvac(vacuum,stars,DIMENSION, atoms,mpi,isp,input,v,enpara%evac,cell,&
lapw,sym, noco,hmat,smat)
CALL timestop("Vacuum part")
ENDIF
......
......@@ -40,6 +40,14 @@ CONTAINS
USE m_constants, ONLY : fpi_const,tpi_const
USE m_types
USE m_ylm
#if defined (_CUDA)
! cublas: required to use generic BLAS interface
! cudafor: required to use CUDA runtime API routines (e.g.
! cudaDeviceSynchronize)
USE cublas
USE cudafor
USE nvtx
#endif
IMPLICIT NONE
TYPE(t_mpi),INTENT(IN) :: mpi
TYPE(t_sym),INTENT(IN) :: sym
......@@ -61,8 +69,23 @@ CONTAINS
INTEGER:: nn,na,ab_size,l,ll,m
COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab2(:,:)
real :: rchi
#ifdef _CUDA
COMPLEX,ALLOCATABLE,DEVICE :: c_dev(:,:), ab1_dev(:,:), ab_dev(:,:)
COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:)
integer :: i, j, istat
call nvtxStartRange("hsmt_nonsph",1)
print*, "running CUDA version"
#endif
ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2),ab1(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
#ifdef _CUDA
ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2)))
ALLOCATE(ab1_dev(size(ab1,1),size(ab1,2)))
ALLOCATE(ab_dev(size(ab,1),size(ab,2)))
h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp)) !WORKAROUND, var_dev=CONJG(var_dev) does not work (pgi18.4)
!note that basically all matrices in the GPU version are conjugates of their
!cpu counterparts
#endif
IF (iintsp.NE.jintsp) ALLOCATE(ab2(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
......@@ -73,18 +96,39 @@ CONTAINS
ENDIF
hmat%data_c=0.0
ENDIF
#ifdef _CUDA
ALLOCATE(c_dev(SIZE(hmat%data_c,1),SIZE(hmat%data_c,2)))
c_dev = hmat%data_c
#endif
DO nn = 1,atoms%neq(n)
na = SUM(atoms%neq(:n-1))+nn
IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
!#ifdef _CUDA
!CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab_dev,ab_size,.TRUE.)
! istat = cudaDeviceSynchronize()
!#else
CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
!#endif
!Calculate Hamiltonian
#ifdef _CUDA
ab_dev = CONJG(ab)
CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab1_dev,SIZE(ab1_dev,1))
#else
CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
#endif
!ab1=MATMUL(ab(:lapw%nv(iintsp),:ab_size),td%h_loc(:ab_size,:ab_size,n,isp))
IF (iintsp==jintsp) THEN
#ifdef _CUDA
call nvtxStartRange("zherk",3)
CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,ab1_dev,SIZE(ab1_dev,1),1.0,c_dev,SIZE(c_dev,1))
istat = cudaDeviceSynchronize()
call nvtxEndRange()
#else
CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,CONJG(ab1),SIZE(ab1,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
#endif
ELSE !here the l_ss off-diagonal part starts
!Second set of ab is needed
CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
......@@ -94,12 +138,18 @@ CONTAINS
ENDIF
ENDIF
END DO
#ifdef _CUDA
hmat%data_c = c_dev
#endif
IF (hmat%l_real) THEN
hmat%data_r=hmat%data_r+REAL(hmat%data_c)
ENDIF
END SUBROUTINE priv_noMPI
#ifdef _CUDA
call nvtxEndRange
#endif
END SUBROUTINE priv_noMPI
SUBROUTINE priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
......@@ -160,7 +210,7 @@ CONTAINS
CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(:,:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
!Multiply for Hamiltonian
CALL zgemm("N","T",lapw%nv(iintsp),lapw%num_local_cols(jintsp),ab_size,chi,conjg(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(jintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))
CALL zgemm("N","t",lapw%nv(iintsp),lapw%num_local_cols(jintsp),ab_size,chi,conjg(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(jintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))
ENDIF
ENDIF
END DO
......
set(fleur_F77 ${fleur_F77}
)
set(fleur_F90 ${fleur_F90}
eigen_soc/abclocdn_soc.F90
eigen_soc/abcof_soc.F90
eigen_soc/alineso.F90
eigen_soc/anglso.f90
eigen_soc/eigenso.F90
eigen_soc/hsoham.f90
eigen_soc/hsoham.F90
eigen_soc/hsohelp.F90
eigen_soc/sgml.f90
eigen_soc/sointg.f90
......
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_abclocdn_soc
USE m_juDFT
!*********************************************************************
! Calculates the (upper case) A, B and C coefficients for the local
! orbitals. The difference to abccoflo is, that a summation over the
! Gs ist performed. The A, B and C coeff. are set up for each eigen-
! state.
! Philipp Kurz 99/04
!*********************************************************************
!*************** ABBREVIATIONS ***************************************
! nkvec : stores the number of G-vectors that have been found and
! accepted during the construction of the local orbitals.
! kvec : k-vector used in hssphn to attach the local orbital 'lo'
! of atom 'na' to it.
!*********************************************************************
CONTAINS
SUBROUTINE abclocdn_soc(atoms,sym,noco,lapw,cell,ccchi,iintsp,phase,ylm,&
ntyp,na,na_l,k,nkvec,lo,ne,alo1,blo1,clo1,acof,bcof,ccof,zMat,l_force,fgp,force)
USE m_types
USE m_constants
IMPLICIT NONE
TYPE(t_noco), INTENT(IN) :: noco
TYPE(t_sym), INTENT(IN) :: sym
TYPE(t_atoms), INTENT(IN) :: atoms
TYPE(t_lapw), INTENT(IN) :: lapw
TYPE(t_cell), INTENT(IN) :: cell
TYPE(t_mat), INTENT(IN) :: zMat
TYPE(t_force), OPTIONAL, INTENT(INOUT) :: force
! .. Scalar Arguments ..
INTEGER, INTENT (IN) :: iintsp
INTEGER, INTENT (IN) :: k,na,na_l,ne,ntyp,nkvec,lo
COMPLEX, INTENT (IN) :: phase
LOGICAL, INTENT (IN) :: l_force
! .. Array Arguments ..
REAL, INTENT (IN) :: alo1(:),blo1(:),clo1(:)
COMPLEX, INTENT (IN) :: ylm( (atoms%lmaxd+1)**2 )
COMPLEX, INTENT (IN) :: ccchi(2)
COMPLEX, INTENT (INOUT) :: acof(:,0:,:)!(nobd,0:dimension%lmd,atoms%nat_l)
COMPLEX, INTENT (INOUT) :: bcof(:,0:,:)!(nobd,0:dimension%lmd,atoms%nat_l)
COMPLEX, INTENT (INOUT) :: ccof(-atoms%llod:,:,:,:)!(-atoms%llod:atoms%llod,nobd,atoms%nlod,atoms%nat_l)
REAL, OPTIONAL, INTENT (IN) :: fgp(3)
! .. Local Scalars ..
COMPLEX ctmp,term1
INTEGER i,j,l,ll1,lm,nbasf,m,na2,lmp
! ..
! ..
term1 = 2 * tpi_const/SQRT(cell%omtil) * ((atoms%rmt(ntyp)**2)/2) * phase
!
!---> the whole program is in hartree units, therefore 1/wronskian is
!---> (rmt**2)/2. the factor i**l, which usually appears in the a, b
!---> and c coefficients, is included in the t-matrices. thus, it does
!---> not show up in the formula above.
IF ((atoms%invsat(na)==0).OR.(atoms%invsat(na)==1)) THEN
na2=na
ELSE
na2 = sym%invsatnr(na)
ENDIF
nbasf=lapw%nv(iintsp)+lapw%index_lo(lo,na2)+nkvec
l = atoms%llo(lo,ntyp)
ll1 = l* (l+1)
DO i = 1,ne
DO m = -l,l
lm = ll1 + m
!+gu_con
IF ((atoms%invsat(na)==0).OR.(atoms%invsat(na)==1)) THEN
IF (zMat%l_real) THEN
ctmp = zMat%data_r(nbasf,i)*term1*CONJG(ylm(ll1+m+1))
ELSE
ctmp = zMat%data_c(nbasf,i)*term1*CONJG(ylm(ll1+m+1))
ENDIF
acof(i,lm,na_l) = acof(i,lm,na_l) + ctmp*alo1(lo)
bcof(i,lm,na_l) = bcof(i,lm,na_l) + ctmp*blo1(lo)
ccof(m,i,lo,na_l) = ccof(m,i,lo,na_l) + ctmp*clo1(lo)
ELSE
ctmp = zMat%data_c(nbasf,i)*CONJG(term1)*ylm(ll1+m+1)*(-1)**(l-m)
lmp = ll1 - m
acof(i,lmp,na_l) = acof(i,lmp,na_l) +ctmp*alo1(lo)
bcof(i,lmp,na_l) = bcof(i,lmp,na_l) +ctmp*blo1(lo)
ccof(-m,i,lo,na_l) = ccof(-m,i,lo,na_l) +ctmp*clo1(lo)
ENDIF
END DO
END DO
END SUBROUTINE abclocdn_soc
END MODULE m_abclocdn_soc
This diff is collapsed.
......@@ -12,10 +12,10 @@ CONTAINS
nsize,nmat, eig_so,zso)
#include"cpp_double.h"
USE m_types
USE m_hsohelp
USE m_hsoham
USE m_eig66_io, ONLY : read_eig
USE m_types
IMPLICIT NONE
TYPE(t_mpi),INTENT(IN) :: mpi
TYPE(t_lapw),INTENT(IN) :: lapw
......@@ -44,10 +44,11 @@ CONTAINS
! .. Local Scalars ..
REAL r2
INTEGER i,i1 ,j,jsp,jsp1,k,ne,nn,nn1,nrec,info
INTEGER idim_c,idim_r,jsp2,nbas,j1
INTEGER idim_c,idim_r,jsp2,nbas,j1,ierr
CHARACTER vectors
LOGICAL l_socvec,l_qsgw,l_open,l_real
INTEGER irec,irecl_qsgw
INTEGER nat_l, extra, nat_start, nat_stop
COMPLEX cdum
! ..
! .. Local Arrays ..
......@@ -55,7 +56,7 @@ CONTAINS
REAL :: eig(DIMENSION%neigd,DIMENSION%jspd),s(3)
REAL, ALLOCATABLE :: rwork(:)
COMPLEX,ALLOCATABLE :: cwork(:),chelp(:,:,:,:,:)
COMPLEX,ALLOCATABLE :: ahelp(:,:,:,:,:),bhelp(:,:,:,:,:)
COMPLEX,ALLOCATABLE :: ahelp(:,:,:,:),bhelp(:,:,:,:)
COMPLEX,ALLOCATABLE :: zhelp1(:,:),zhelp2(:,:)
COMPLEX,ALLOCATABLE :: hso(:,:),hsomtx(:,:,:,:)
COMPLEX,ALLOCATABLE :: sigma_xc_apw(:,:),sigma_xc(:,:)
......@@ -104,8 +105,6 @@ CONTAINS
eig_id,nk,jsp,&
n_start=1,n_end=ne,&
zmat=zmat(jsp))
write(6,*) "jspin=",jsp,",nk=",nk
write(6,"(5f12.4)") eig(:ne,jsp)
! write(*,*) 'process',irank,' reads ',nk
......@@ -134,29 +133,57 @@ CONTAINS
ENDIF
ENDDO
!
! distribution of (abc)cof over atoms
!
!
! in case of ev-parallelization, now distribute the atoms:
!
IF (mpi%n_size > 1) THEN
nat_l = FLOOR(real(atoms%nat)/mpi%n_size)
extra = atoms%nat - nat_l*mpi%n_size
nat_start = mpi%n_rank*nat_l + 1 + extra
nat_stop = (mpi%n_rank+1)*nat_l + extra
IF (mpi%n_rank < extra) THEN
nat_start = nat_start - (extra - mpi%n_rank)
nat_stop = nat_stop - (extra - mpi%n_rank - 1)
ENDIF
ELSE
nat_start = 1
nat_stop = atoms%nat
ENDIF
nat_l = nat_stop - nat_start + 1
!
! set up A and B coefficients
!
ALLOCATE ( ahelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,atoms%nat,DIMENSION%neigd,DIMENSION%jspd) )
ALLOCATE ( bhelp(-atoms%lmaxd:atoms%lmaxd,atoms%lmaxd,atoms%nat,DIMENSION%neigd,DIMENSION%jspd) )
ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,atoms%nat ,DIMENSION%jspd) )
ALLOCATE ( ahelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) )
ALLOCATE ( bhelp(atoms%lmaxd*(atoms%lmaxd+2),nat_l,DIMENSION%neigd,input%jspins) )
ALLOCATE ( chelp(-atoms%llod :atoms%llod, DIMENSION%neigd,atoms%nlod,nat_l,input%jspins) )
CALL timestart("alineso SOC: -help")
write(*,*) nat_start,nat_stop,nat_l
CALL hsohelp(&
& DIMENSION,atoms,sym,&
& input,lapw,nsz,&
& cell,&
& zmat,usdus,&
& zso,noco,oneD,&
& nat_start,nat_stop,nat_l,&
& ahelp,bhelp,chelp)
CALL timestop("alineso SOC: -help")
!
! set up hamilton matrix
!
CALL timestart("alineso SOC: -ham")
ALLOCATE ( hsomtx(2,2,DIMENSION%neigd,DIMENSION%neigd) )
CALL hsoham(atoms,noco,input,nsz,chelp, rsoc,ahelp,bhelp, hsomtx)
#ifdef CPP_MPI
CALL MPI_BARRIER(mpi%MPI_COMM,ierr)
#endif
ALLOCATE ( hsomtx(DIMENSION%neigd,DIMENSION%neigd,2,2) )
CALL hsoham(atoms,noco,input,nsz,dimension%neigd,chelp,rsoc,ahelp,bhelp,&
nat_start,nat_stop,mpi%n_rank,mpi%n_size,mpi%SUB_COMM,&
hsomtx)
write(*,*) 'after hsoham'
DEALLOCATE ( ahelp,bhelp,chelp )
CALL timestop("alineso SOC: -ham")
IF (mpi%n_rank==0) THEN
!
! add e.v. on diagonal
!
......@@ -164,10 +191,10 @@ CONTAINS
! hsomtx = 0 !!!!!!!!!!!!
DO jsp = 1,input%jspins
DO i = 1,nsz(jsp)
hsomtx(jsp,jsp,i,i) = hsomtx(jsp,jsp,i,i) +&
hsomtx(i,i,jsp,jsp) = hsomtx(i,i,jsp,jsp) +&
& CMPLX(eig(i,jsp),0.)
IF (input%jspins.EQ.1) THEN
hsomtx(2,2,i,i) = hsomtx(2,2,i,i) +&
hsomtx(i,i,2,2) = hsomtx(i,i,2,2) +&
& CMPLX(eig(i,jsp),0.)
ENDIF
ENDDO
......@@ -184,9 +211,10 @@ CONTAINS
IF (jsp.EQ.2) nn = nsz(1)
IF (jsp1.EQ.2) nn1 = nsz(1)
!
!write(3333,'(2i3,4e15.8)') jsp,jsp1,hsomtx(jsp,jsp1,8,8),hsomtx(jsp,jsp1,32,109)
DO i = 1,nsz(jsp)
DO j = 1,nsz(jsp1)
hso(i+nn,j+nn1) = hsomtx(jsp,jsp1,i,j)
hso(i+nn,j+nn1) = hsomtx(i,j,jsp,jsp1)
ENDDO
ENDDO
!
......@@ -265,7 +293,6 @@ else
& eig_so,&
& cwork, idim_c, rwork, &
& info)
IF (info.NE.0) WRITE (6,FMT=8000) info
8000 FORMAT (' AFTER CPP_LAPACK_cheev: info=',i4)
CALL timestop("alineso SOC: -diag")
......@@ -331,6 +358,7 @@ else
ENDIF ! (.NOT.input%eonly)
DEALLOCATE ( hso )
ENDIF ! (n_rank==0)
!
nmat=lapw%nmat
RETURN
......
......@@ -56,7 +56,8 @@ CONTAINS
! ..
! .. Local Scalars ..
INTEGER i,j,nk,jspin,n ,l
INTEGER n_loc,n_plus,i_plus,n_end,nsz,nmat
! INTEGER n_loc,n_plus,i_plus,
INTEGER n_end,nsz,nmat,n_stride
LOGICAL l_socvec !,l_all
INTEGER wannierspin
TYPE(t_usdus):: usdus
......@@ -117,15 +118,24 @@ CONTAINS
!
!---> loop over k-points: each can be a separate task
!
n_loc = INT(kpts%nkpt/mpi%isize)
n_plus = kpts%nkpt - mpi%isize*n_loc
i_plus = -1
IF (mpi%irank.LT.n_plus) i_plus = 0
n_end = (mpi%irank+1)+(n_loc+i_plus)*mpi%isize
!n_loc = INT(kpts%nkpt/mpi%isize)
!n_plus = kpts%nkpt - mpi%isize*n_loc
!i_plus = -1
!IF (mpi%irank.LT.n_plus) i_plus = 0
!n_end = (mpi%irank+1)+(n_loc+i_plus)*mpi%isize
!
#if defined(CPP_MPI)
n_stride = kpts%nkpt/mpi%n_groups
#else
n_stride = 1
#endif
n_end = kpts%nkpt
!write(*,'(4i12)') mpi%irank, mpi%n_groups, n_stride, mpi%n_start
!
!---> start loop k-pts
!
DO nk = mpi%irank+1,n_end,mpi%isize
! DO nk = mpi%irank+1,n_end,mpi%isize
DO nk = mpi%n_start,n_end,n_stride
CALL lapw%init(input,noco, kpts,atoms,sym,nk,cell,.FALSE., mpi)
ALLOCATE( zso(lapw%nv(1)+atoms%nlotot,2*DIMENSION%neigd,wannierspin))
zso(:,:,:) = CMPLX(0.0,0.0)
......@@ -141,20 +151,21 @@ CONTAINS
' the',i4,' SOC eigenvalues are:')
8020 FORMAT (5x,5f12.6)
IF (input%eonly) THEN
CALL write_eig(eig_id, nk,jspin,neig=nsz,neig_total=nsz, eig=eig_so(:nsz))
ELSE
CALL zmat%alloc(.FALSE.,SIZE(zso,1),nsz)
DO jspin = 1,wannierspin
CALL timestart("eigenso: write_eig")
zmat%data_c=zso(:,:nsz,jspin)
CALL write_eig(eig_id, nk,jspin,neig=nsz,neig_total=nsz, eig=eig_so(:nsz),zmat=zmat)
CALL timestop("eigenso: write_eig")
ENDDO
ENDIF ! (input%eonly) ELSE
deallocate(zso)
IF (mpi%n_rank==0) THEN
IF (input%eonly) THEN
CALL write_eig(eig_id, nk,jspin,neig=nsz,neig_total=nsz, eig=eig_so(:nsz))
ELSE
CALL zmat%alloc(.FALSE.,SIZE(zso,1),nsz)
DO jspin = 1,wannierspin
CALL timestart("eigenso: write_eig")
zmat%data_c=zso(:,:nsz,jspin)
CALL write_eig(eig_id, nk,jspin,neig=nsz,neig_total=nsz, eig=eig_so(:nsz),zmat=zmat)
CALL timestop("eigenso: write_eig")
ENDDO
ENDIF ! (input%eonly) ELSE
ENDIF ! n_rank == 0
DEALLOCATE (zso)
ENDDO ! DO nk
! Sorry for the following strange workaround to fill the results%neig and results%eig arrays.
......