...
 
Commits (16)
......@@ -65,32 +65,32 @@ pages:
- triggers
- web
#build-pgi:
# image: iffregistry.fz-juelich.de/fleur/fleur:pgi
# stage: build-pgi
# cache:
# paths:
# - build.debug
# script:
# - cd /builds/fleur/fleur; ./configure.sh -d AUTO ; cd build.debug; make
# only:
# - schedules
# - triggers
# - web
build-pgi:
image: iffregistry.fz-juelich.de/fleur/fleur:pgi
stage: build-pgi
cache:
paths:
- build.debug
script:
- cd /builds/fleur/fleur; ./configure.sh -d AUTO ; cd build.debug; make
only:
- schedules
- triggers
- web
#test-pgi:
# image: iffregistry.fz-juelich.de/fleur/fleur:pgi
# stage: test-pgi
# cache:
# paths:
# - build.debug
# script:
# - cd /builds/fleur/fleur/build.debug;ctest
# only:
# - schedules
# - web
# - triggers
test-pgi:
image: iffregistry.fz-juelich.de/fleur/fleur:pgi
stage: test-pgi
cache:
paths:
- build.debug
script:
- cd /builds/fleur/fleur/build.debug;ctest
only:
- schedules
- web
- triggers
build-intel:
image: iffregistry.fz-juelich.de/docker-images/centos7-intel-compilers/extended
......
......@@ -12,7 +12,7 @@ MODULE m_hlomat
!***********************************************************************
CONTAINS
SUBROUTINE hlomat(input,atoms,mpi,lapw,ud,tlmplm,sym,cell,noco,isp,&
ntyp,na,fj,gj,alo1,blo1,clo1, iintsp,jintsp,chi,hmat)
ntyp,na,fj,gj,ab,abclo,ab_size, iintsp,jintsp,chi,hmat)
!
USE m_hsmt_ab
USE m_types
......@@ -34,9 +34,10 @@ CONTAINS
INTEGER, INTENT (IN) :: isp !spin for usdus and tlmplm
INTEGER, INTENT (IN) :: jintsp,iintsp
COMPLEX, INTENT (IN) :: chi
INTEGER, INTENT (IN) :: ab_size
! ..
! .. Array Arguments ..
REAL, INTENT (IN) :: alo1(:),blo1(:),clo1(:)
COMPLEX,INTENT(IN) :: ab(:,0:,:),abclo(:,-atoms%llod:,:,:,:)
REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
CLASS(t_mat),INTENT (INOUT) :: hmat
......@@ -45,22 +46,14 @@ CONTAINS
COMPLEX axx,bxx,cxx,dtd,dtu,dtulo,ulotd,ulotu,ulotulo,utd,utu, utulo
INTEGER im,in,invsfct,l,lm,lmp,lo,lolo,lolop,lop,lp,i
INTEGER mp,nkvec,nkvecp,lmplm,loplo,kp,m,mlo,mlolo
INTEGER locol,lorow,ii,ij,n,k,ab_size
INTEGER locol,lorow,ii,ij,n,k
! ..
! .. Local Arrays ..
COMPLEX, ALLOCATABLE :: ab(:,:,:),ax(:),bx(:),cx(:)
COMPLEX,ALLOCATABLE :: abclo(:,:,:,:,:)
COMPLEX, ALLOCATABLE :: ax(:),bx(:),cx(:)
! ..
!--> synthesize the complex conjugates of a and b
ALLOCATE(ab(MAXVAL(lapw%nv),0:2*atoms%lmaxd*(atoms%lmaxd+2)+1,MIN(jintsp,iintsp):MAX(jintsp,iintsp)))
ALLOCATE(ax(MAXVAL(lapw%nv)),bx(MAXVAL(lapw%nv)),cx(MAXVAL(lapw%nv)))
ALLOCATE(abclo(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod,2))
DO i=MIN(jintsp,iintsp),MAX(jintsp,iintsp)
CALL hsmt_ab(sym,atoms,noco,isp,i,ntyp,na,cell,lapw,fj,gj,ab(:,:,i),ab_size,.TRUE.,abclo(:,:,:,:,i),alo1,blo1,clo1)
ENDDO
mlo=0;mlolo=0
DO m=1,ntyp-1
......@@ -85,6 +78,7 @@ CONTAINS
l = atoms%llo(lo,ntyp)
!---> calculate the hamiltonian matrix elements with the regular
!---> flapw basis-functions
CALL timestart("loop1")
DO m = -l,l
lm = l* (l+1) + m
DO kp = 1,lapw%nv(iintsp)
......@@ -169,6 +163,7 @@ CONTAINS
ENDIF
END DO
END DO
CALL timestop("loop1")
!---> calculate the hamiltonian matrix elements with other
!---> local orbitals at the same atom and with itself
DO nkvec = 1,invsfct* (2*l+1)
......
......@@ -4,19 +4,141 @@
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_hsmt_ab
use m_juDFT
implicit none
USE m_juDFT
IMPLICIT NONE
INTERFACE hsmt_ab
module procedure hsmt_ab_cpu
MODULE PROCEDURE hsmt_ab_cpu
#ifdef CPP_GPU
module procedure hsmt_ab_gpu
MODULE PROCEDURE hsmt_ab_gpu
#endif
END INTERFACE
CONTAINS
SUBROUTINE hsmt_ab_cpu(mpi,sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
#include"cpp_double.h"
USE m_constants, ONLY : fpi_const
USE m_types
USE m_ylm
IMPLICIT NONE
TYPE(t_mpi), INTENT(IN) :: mpi
TYPE(t_sym), INTENT(IN) :: sym
TYPE(t_cell), INTENT(IN) :: cell
TYPE(t_atoms), INTENT(IN) :: atoms
TYPE(t_lapw), INTENT(IN) :: lapw
TYPE(t_noco), INTENT(IN) :: noco
! ..
! .. Scalar Arguments ..
INTEGER, INTENT (IN) :: ispin,n,na,iintsp
LOGICAL, INTENT(IN) :: l_nonsph
INTEGER, INTENT(OUT) :: ab_size
! ..
! .. Array Arguments ..
REAL, INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
COMPLEX, INTENT (OUT) :: ab(:,:)
!Optional arguments if abc coef for LOs are needed
COMPLEX, INTENT(INOUT), OPTIONAL :: abclo(:,-atoms%llod:,:,:)
REAL, INTENT(IN), OPTIONAL :: alo1(:),blo1(:),clo1(:)
LOGICAL :: l_apw
INTEGER :: np,k,l,ll1,m,lmax,nkvec,lo,lm,invsfct
COMPLEX :: term
REAL :: th,v(3),bmrot(3,3),vmult(3)
COMPLEX :: ylm((atoms%lmaxd+1)**2)
COMPLEX, ALLOCATABLE :: c_ph(:,:)
REAL, ALLOCATABLE :: gkrot(:,:)
#ifdef CPP_MPI
INCLUDE 'mpif.h'
COMPLEX, ALLOCATABLE :: zbuf(:)
INTEGER zb_size,ierr
#endif
ALLOCATE(c_ph(maxval(lapw%nv),MERGE(2,1,noco%l_ss)))
ALLOCATE(gkrot(3,maxval(lapw%nv)))
lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
ab_size=lmax*(lmax+2)+1
l_apw=ALL(gj==0.0)
ab=0.0
IF (PRESENT(abclo)) abclo = 0.0
np = sym%invtab(atoms%ngopr(na))
!---> set up phase factors
CALL lapw%phase_factors(iintsp,atoms%taual(:,na),noco%qss,c_ph(:,iintsp))
IF (np==1) THEN
gkrot(:, 1:lapw%nv(iintsp)) = lapw%gk(:, 1:lapw%nv(iintsp),iintsp)
ELSE
bmrot=MATMUL(1.*sym%mrot(:,:,np),cell%bmat)
DO k = 1,lapw%nv(iintsp)
!--> apply the rotation that brings this atom into the
!--> representative (this is the definition of ngopr(na)
!--> and transform to cartesian coordinates
v(:) = lapw%vk(:,k,iintsp)
gkrot(:,k) = MATMUL(TRANSPOSE(bmrot),v)
END DO
END IF
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP& SHARED(mpi,lapw,gkrot,lmax,c_ph,iintsp,ab,fj,gj,abclo,cell,atoms) &
!$OMP& SHARED(alo1,blo1,clo1,ab_size,na,n) &
!$OMP& PRIVATE(k,vmult,ylm,l,ll1,m,lm,term,invsfct,lo,nkvec)
DO k = 1 + mpi%n_rank,lapw%nv(iintsp), mpi%n_size
!--> generate spherical harmonics
vmult(:) = gkrot(:,k)
CALL ylm4(lmax,vmult,ylm)
!--> synthesize the complex conjugates of a and b
DO l = 0,lmax
ll1 = l* (l+1)
DO m = -l,l
term = c_ph(k,iintsp)*ylm(ll1+m+1)
ab(k,ll1+m+1) = fj(k,l,iintsp)*term
ab(k,ll1+m+1+ab_size) = gj(k,l,iintsp)*term
END DO
END DO
IF (PRESENT(abclo)) THEN
!determine also the abc coeffs for LOs
invsfct=MERGE(1,2,atoms%invsat(na).EQ.0)
term = fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)*c_ph(k,iintsp)
DO lo = 1,atoms%nlo(n)
l = atoms%llo(lo,n)
DO nkvec=1,invsfct*(2*l+1)
IF (lapw%kvec(nkvec,lo,na)==k) THEN !This k-vector is used in LO
ll1 = l*(l+1) + 1
DO m = -l,l
lm = ll1 + m
abclo(1,m,nkvec,lo) = term*ylm(lm)*alo1(lo)
abclo(2,m,nkvec,lo) = term*ylm(lm)*blo1(lo)
abclo(3,m,nkvec,lo) = term*ylm(lm)*clo1(lo)
END DO
END IF
ENDDO
ENDDO
ENDIF
ENDDO !k-loop
!$OMP END PARALLEL DO
#ifdef CPP_MPI
zb_size = size(ab)
ALLOCATE(zbuf(zb_size))
CALL MPI_allreduce(ab(:,:),zbuf,zb_size,CPP_MPI_COMPLEX,MPI_SUM,mpi%sub_comm,ierr)
CALL CPP_BLAS_ccopy(zb_size,zbuf,1,ab(:,:),1)
DEALLOCATE(zbuf)
IF (PRESENT(abclo)) THEN
zb_size = size(abclo)
ALLOCATE(zbuf(zb_size))
CALL MPI_allreduce(abclo(:,-atoms%llod:,:,:),zbuf,zb_size,CPP_MPI_COMPLEX,MPI_SUM,mpi%sub_comm,ierr)
CALL CPP_BLAS_ccopy(zb_size,zbuf,1,abclo(:,-atoms%llod:,:,:),1)
DEALLOCATE(zbuf)
ENDIF
#endif
IF (.NOT.l_apw) ab_size=ab_size*2
END SUBROUTINE hsmt_ab_cpu
#ifdef CPP_GPU
ATTRIBUTES(global) SUBROUTINE synth_ab(loop_size,n,lmax,ab_size,gkrot_dev,fj,gj,c_ph,ab)
......@@ -52,7 +174,7 @@ CONTAINS
END SUBROUTINE synth_ab
SUBROUTINE hsmt_ab_gpu(sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
SUBROUTINE hsmt_ab_gpu(mpi,sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
!Calculate overlap matrix, GPU version
USE m_constants, ONLY : fpi_const,tpi_const
USE m_types
......@@ -60,6 +182,7 @@ CONTAINS
USE cudafor
USE nvtx
IMPLICIT NONE
TYPE(t_mpi), INTENT(IN) :: mpi
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_cell),INTENT(IN) :: cell
TYPE(t_atoms),INTENT(IN) :: atoms
......@@ -166,103 +289,5 @@ CONTAINS
END SUBROUTINE hsmt_ab_gpu
#endif
SUBROUTINE hsmt_ab_cpu(sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
!Calculate overlap matrix, CPU vesion
USE m_constants, ONLY : fpi_const,tpi_const
USE m_types
USE m_ylm
IMPLICIT NONE
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_cell),INTENT(IN) :: cell
TYPE(t_atoms),INTENT(IN) :: atoms
TYPE(t_lapw),INTENT(IN) :: lapw
TYPE(t_noco),INTENT(IN) :: noco
! ..
! .. Scalar Arguments ..
INTEGER, INTENT (IN) :: ispin,n,na,iintsp
LOGICAL,INTENT(IN) :: l_nonsph
INTEGER,INTENT(OUT) :: ab_size
! ..
! .. Array Arguments ..
REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
COMPLEX, INTENT (OUT) :: ab(:,:)
!Optional arguments if abc coef for LOs are needed
COMPLEX, INTENT(INOUT),OPTIONAL:: abclo(:,-atoms%llod:,:,:)
REAL,INTENT(IN),OPTIONAL:: alo1(:),blo1(:),clo1(:)
INTEGER:: np,k,l,ll1,m,lmax,nkvec,lo,lm,invsfct
COMPLEX:: term
REAL :: th,v(3),bmrot(3,3),vmult(3)
COMPLEX :: ylm((atoms%lmaxd+1)**2)
COMPLEX,ALLOCATABLE:: c_ph(:,:)
REAL,ALLOCATABLE :: gkrot(:,:)
LOGICAL :: l_apw
ALLOCATE(c_ph(maxval(lapw%nv),MERGE(2,1,noco%l_ss)))
ALLOCATE(gkrot(3,maxval(lapw%nv)))
lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
ab_size=lmax*(lmax+2)+1
l_apw=ALL(gj==0.0)
ab=0.0
np = sym%invtab(atoms%ngopr(na))
!---> set up phase factors
CALL lapw%phase_factors(iintsp,atoms%taual(:,na),noco%qss,c_ph(:,iintsp))
IF (np==1) THEN
gkrot(:, 1:lapw%nv(iintsp)) = lapw%gk(:, 1:lapw%nv(iintsp),iintsp)
ELSE
bmrot=MATMUL(1.*sym%mrot(:,:,np),cell%bmat)
DO k = 1,lapw%nv(iintsp)
!--> apply the rotation that brings this atom into the
!--> representative (this is the definition of ngopr(na)
!--> and transform to cartesian coordinates
v(:) = lapw%vk(:,k,iintsp)
gkrot(:,k) = MATMUL(TRANSPOSE(bmrot),v)
END DO
END IF
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP& SHARED(lapw,gkrot,lmax,c_ph,iintsp,ab,fj,gj,abclo,cell,atoms) &
!$OMP& SHARED(alo1,blo1,clo1,ab_size,na,n) &
!$OMP& PRIVATE(k,vmult,ylm,l,ll1,m,lm,term,invsfct,lo,nkvec)
DO k = 1,lapw%nv(iintsp)
!--> generate spherical harmonics
vmult(:) = gkrot(:,k)
CALL ylm4(lmax,vmult,ylm)
!--> synthesize the complex conjugates of a and b
DO l = 0,lmax
ll1 = l* (l+1)
DO m = -l,l
term = c_ph(k,iintsp)*ylm(ll1+m+1)
ab(k,ll1+m+1) = fj(k,l,iintsp)*term
ab(k,ll1+m+1+ab_size) = gj(k,l,iintsp)*term
END DO
END DO
IF (PRESENT(abclo)) THEN
!determine also the abc coeffs for LOs
invsfct=MERGE(1,2,atoms%invsat(na).EQ.0)
term = fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)*c_ph(k,iintsp)
DO lo = 1,atoms%nlo(n)
l = atoms%llo(lo,n)
DO nkvec=1,invsfct*(2*l+1)
IF (lapw%kvec(nkvec,lo,na)==k) THEN !This k-vector is used in LO
ll1 = l*(l+1) + 1
DO m = -l,l
lm = ll1 + m
abclo(1,m,nkvec,lo) = term*ylm(lm)*alo1(lo)
abclo(2,m,nkvec,lo) = term*ylm(lm)*blo1(lo)
abclo(3,m,nkvec,lo) = term*ylm(lm)*clo1(lo)
END DO
END IF
ENDDO
ENDDO
ENDIF
ENDDO !k-loop
!$OMP END PARALLEL DO
IF (.NOT.l_apw) ab_size=ab_size*2
END SUBROUTINE hsmt_ab_cpu
END MODULE m_hsmt_ab
......@@ -12,6 +12,7 @@ CONTAINS
USE m_hlomat
USE m_slomat
USE m_setabc1lo
USE m_hsmt_ab
USE m_types
IMPLICIT NONE
TYPE(t_mpi),INTENT(IN) :: mpi
......@@ -36,11 +37,17 @@ CONTAINS
REAL,INTENT(IN) :: fj(:,:,:),gj(:,:,:)
! ..
! .. Local Scalars ..
INTEGER na,nn
INTEGER na,nn,ab_size,i
! ..
! .. Local Arrays ..
REAL alo1(atoms%nlod),blo1(atoms%nlod),clo1(atoms%nlod)
COMPLEX, ALLOCATABLE :: ab(:,:,:)
COMPLEX, ALLOCATABLE :: abclo(:,:,:,:,:)
CALL timestart("LO setup")
ALLOCATE(ab(MAXVAL(lapw%nv),0:2*atoms%lmaxd*(atoms%lmaxd+2)+1,MIN(jintsp,iintsp):MAX(jintsp,iintsp)))
ALLOCATE(abclo(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod,2))
na = sum(atoms%neq(:n-1))
DO nn = 1,atoms%neq(n)
......@@ -49,20 +56,30 @@ CONTAINS
IF (atoms%nlo(n).GE.1) THEN
CALL timestart("hsmt_abLO")
!---> set up the a,b and c coefficients
!---> for the local orbitals, if necessary.
!---> actually, these are the fj,gj equivalents
CALL setabc1lo(atoms,n,ud,isp, alo1,blo1,clo1)
!---> add the local orbital contribution to the overlap and
!---> hamiltonian matrix, if they are used for this atom.
!---> synthesize the complex conjugates of a and b
DO i=MIN(jintsp,iintsp),MAX(jintsp,iintsp)
CALL hsmt_ab(mpi,sym,atoms,noco,isp,i,n,na,cell,lapw,fj,gj,ab(:,:,i),ab_size,.TRUE.,abclo(:,:,:,:,i),alo1,blo1,clo1)
ENDDO
CALL timestop("hsmt_abLO")
!---> add the local orbital contribution to the overlap and
!---> hamiltonian matrix, if they are used for this atom.
CALL slomat(&
input,atoms,mpi,lapw,cell,noco,n,na,&
isp,ud, alo1,blo1,clo1,fj,gj,&
iintsp,jintsp,chi,smat)
CALL timestart("hlomat")
CALL hlomat(input,atoms,mpi,lapw,ud,tlmplm,sym,cell,noco,isp,&
n,na,fj,gj,alo1,blo1,clo1,iintsp,jintsp,chi,hmat)
!n,na,fj,gj,alo1,blo1,clo1,iintsp,jintsp,chi,hmat)
n,na,fj,gj,ab,abclo,ab_size,iintsp,jintsp,chi,hmat)
CALL timestop("hlomat")
ENDIF
END IF
!---> end loop over equivalent atoms
......
......@@ -27,7 +27,11 @@ CONTAINS
TYPE(t_atoms),INTENT(IN) :: atoms
TYPE(t_lapw),INTENT(IN) :: lapw
TYPE(t_tlmplm),INTENT(IN) :: td
#if defined CPP_GPU
REAL,MANAGED,INTENT(IN) :: fj(:,:,:,:),gj(:,:,:,:)
#else
REAL,INTENT(IN) :: fj(:,0:,:,:),gj(:,0:,:,:)
#endif
! .. Scalar Arguments ..
INTEGER, INTENT (IN) :: n
COMPLEX :: chi_one,chi(2,2)
......
This diff is collapsed.