Commit 79fd0997 authored by Daniel Wortmann's avatar Daniel Wortmann
Browse files

Merge branch 'strgn' into 'develop'

Merge branch in which star generator and star type was refactored

See merge request fleur/fleur!205
parents b44aa77e d423a9e5
......@@ -123,7 +123,7 @@ CONTAINS
COMPLEX,INTENT(IN),OPTIONAL :: vpw(:,:)
REAL,INTENT(IN),OPTIONAL :: vr(:,0:,:,:)
COMPLEX,INTENT (INOUT) :: qpw(stars%ng3,input%jspins)
COMPLEX,INTENT (INOUT) :: rhtxy(vacuum%nmzxyd,oneD%odi%n2d-1,2,input%jspins)
COMPLEX,INTENT (INOUT) :: rhtxy(vacuum%nmzxyd,stars%ng2-1,2,input%jspins)
REAL, INTENT (INOUT) :: rho(atoms%jmtd,0:sphhar%nlhd,atoms%ntype,input%jspins)
REAL, INTENT (INOUT) :: rht(vacuum%nmzd,2,input%jspins)
REAL, INTENT (INOUT) :: rh(atoms%msh,atoms%ntype)
......
......@@ -60,13 +60,14 @@
x = (0.0,0.0)
END IF
ELSE
ig2d = stars%ig2(ig3d)
IF (ig2d.EQ.1) THEN
g = stars%kv3(3,ng)*cell%bmat(3,3)*cell%z1
x = cmplx(cell%vol*sin(g)/g,0.0)
ELSE
x = (0.0,0.0)
END IF
x = (0.0,0.0)
if (allocated(stars%ig2)) THEN !film
ig2d = stars%ig2(ig3d)
IF (ig2d.EQ.1) THEN
g = stars%kv3(3,ng)*cell%bmat(3,3)*cell%z1
x = cmplx(cell%vol*sin(g)/g,0.0)
ENDIF
END IF
END IF
END IF
......@@ -77,7 +78,7 @@
IF (.NOT.oneD%odi%d1) THEN
CALL spgrot(&
& sym%nop,sym%symor,sym%mrot,sym%tau,sym%invtab,&
& stars%kv3,&
& stars%kv3(:,ig3d),&
& kr,ph)
DO n = 1,atoms%ntype
srmt = s*atoms%rmt(n)
......@@ -99,7 +100,7 @@
srmt = s*atoms%rmt(n)
CALL spgrot(&
& sym%nop,sym%symor,sym%mrot,sym%tau,sym%invtab,&
& stars%kv3,&
& stars%kv3(:,ig3d),&
& kr,ph)
sfs = (0.0,0.0)
DO nn = 1,sym%nop
......@@ -167,8 +168,9 @@
x(ng) = cmplx(cell%volint,0.0)
cycle starloop
ELSE
IF (oneD%odi%d1) THEN
IF (allocated(stars%ig2)) THEN
!Film calculation
IF (oneD%odi%d1) THEN
IF (stars%kv3(3,ng).EQ.0) THEN
g = (stars%kv3(1,ng)*cell%bmat(1,1) + stars%kv3(2,ng)*cell%bmat(2,1))**2 + &
(stars%kv3(1,ng)*cell%bmat(1,2) + stars%kv3(2,ng)*cell%bmat(2,2))**2
......@@ -179,7 +181,7 @@
ELSE
x(ng) = (0.0,0.0)
END IF
ELSE
ELSE
ig2d = stars%ig2(ig3d)
IF (ig2d.EQ.1) THEN
g = stars%kv3(3,ng)*cell%bmat(3,3)*cell%z1
......@@ -187,8 +189,10 @@
ELSE
x(ng) = (0.0,0.0)
END IF
END IF
END IF
ELSE
x(ng)=0.0
ENDIF
END IF
! -----> sphere contributions
s = stars%sk3(ig3d)
......
......@@ -27,9 +27,7 @@ CONTAINS
USE m_types_mat
#ifdef CPP_ELPA_ONENODE
USE elpa
#ifdef CPP_GPU
USE nvtx
#endif
#endif
IMPLICIT NONE
......@@ -49,9 +47,7 @@ CONTAINS
call timestart("ELPA 2018 one-node")
#ifdef CPP_GPU
call nvtxStartRange("ELPA", 5)
#endif
err = elpa_init(20180525)
elpa_obj => elpa_allocate()
......@@ -77,17 +73,12 @@ CONTAINS
CALL hmat%add_transpose(hmat)
CALL smat%add_transpose(smat)
#ifdef CPP_GPU
call nvtxStartRange("EigVec", 7)
#endif
IF (hmat%l_real) THEN
CALL elpa_obj%generalized_eigenvectors(hmat%data_r, smat%data_r, eig2, ev_dist%data_r, .FALSE., err)
ELSE
CALL elpa_obj%generalized_eigenvectors(hmat%data_c, smat%data_c, eig2, ev_dist%data_c, .FALSE., err)
ENDIF
#ifdef CPP_GPU
call nvtxEndRange!("EigVec",8)
#endif
CALL elpa_deallocate(elpa_obj)
CALL elpa_uninit()
......@@ -100,9 +91,7 @@ CONTAINS
CALL ev%alloc(hmat%l_real, hmat%matsize1, ne)
CALL ev%copy(ev_dist, 1, 1)
#ifdef CPP_GPU
call nvtxEndRange!("ELPA",7)
#endif
#endif
call timestop("ELPA 2018 one-node")
......
......@@ -94,7 +94,7 @@ CONTAINS
END DO
DO i=-stars%mx1, stars%mx1
DO j=-stars%mx2,stars%mx2
WRITE (87,'(i2,1x,e12.4)') stars%ig2(stars%ig(i,j,0)),stars%rgphs(i,j,0)
!WRITE (87,'(i2,1x,e12.4)') stars%ig2(stars%ig(i,j,0)),stars%rgphs(i,j,0)
END DO
END DO
END IF
......
......@@ -4,8 +4,8 @@ CONTAINS
& stars,&
& afft2,bfft2,&
& fg,fgi,fgxy,&
& stride,isn,&
& gfxy )
& isn,&
& firstderiv,secondderiv,cell )
!*************************************************************
!* *
......@@ -20,71 +20,58 @@ CONTAINS
!* *
!*************************************************************
#include"cpp_double.h"
USE m_cfft
USE m_types_fftgrid
USE m_types
IMPLICIT NONE
TYPE(t_stars),INTENT(IN) :: stars
INTEGER, INTENT (IN) :: isn,stride
TYPE(t_cell),INTENT(IN),OPTIONAL:: cell
INTEGER, INTENT (IN) :: isn
REAL :: fg,fgi
REAL :: afft2(0:9*stars%mx1*stars%mx2-1),bfft2(0:9*stars%mx1*stars%mx2-1)
COMPLEX :: fgxy(stride,stars%ng2-1)
REAL,OPTIONAL,INTENT(IN) :: gfxy(0:) !factor to calculate the derivates, i.e. g_x
COMPLEX :: fgxy(:)
REAL,OPTIONAL,INTENT(IN):: firstderiv(3),secondderiv(3)
!... local variables
INTEGER i,ifftd2
REAL scale
COMPLEX fg2(stars%ng2)
TYPE(t_fftgrid) :: grid
INTEGER i
COMPLEX fg2(stars%ng2)
ifftd2=9*stars%mx1*stars%mx2
!
IF (isn.GT.0) THEN
!
call grid%init([3*stars%mx1,3*stars%mx2,1])
IF (isn>0) THEN
! ---> put stars onto the fft-grid
!
fg2(1) = CMPLX(fg,fgi)
CALL CPP_BLAS_ccopy(stars%ng2-1,fgxy,stride,fg2(2),1)
!fg2(2:)=fgxy(1,:)
afft2=0.0
bfft2=0.0
IF (PRESENT(gfxy)) THEN
DO i=0,(2*stars%mx1+1)* (2*stars%mx2+1)-1
if (stars%igfft2(i,1)==0) cycle
afft2(stars%igfft2(i,2))=REAL(fg2(stars%igfft2(i,1))*stars%pgfft2(i))*gfxy(i)
bfft2(stars%igfft2(i,2))=AIMAG(fg2(stars%igfft2(i,1))*stars%pgfft2(i))*gfxy(i)
ENDDO
ELSE
DO i=0,(2*stars%mx1+1)* (2*stars%mx2+1)-1
if (stars%igfft2(i,1)==0) cycle
afft2(stars%igfft2(i,2))=REAL(fg2(stars%igfft2(i,1))*stars%pgfft2(i))
bfft2(stars%igfft2(i,2))=AIMAG(fg2(stars%igfft2(i,1))*stars%pgfft2(i))
ENDDO
ENDIF
ENDIF
!---> now do the fft (isn=+1 : G -> r ; isn=-1 : r -> G)
fg2(2:)=fgxy(:)
CALL cfft(afft2,bfft2,ifftd2,3*stars%mx1,3*stars%mx1,isn)
CALL cfft(afft2,bfft2,ifftd2,3*stars%mx2,ifftd2,isn)
call grid%putFieldOnGrid(stars,fg2,cell,firstderiv=firstderiv,secondderiv=secondderiv,l_2d=.true.)
else
grid%grid=cmplx(afft2,bfft2)
endif
call grid%perform_fft(forward=(isn<0))
if (isn >0) THEN
afft2 = real(grid%grid)
bfft2 = aimag(grid%grid)
else
call grid%takeFieldFromGrid(stars,fg2,l_2d=.true.)
!Scaling by stars%nstr is already done in previous call
!IF (PRESENT(scaled)) THEN
! IF (.not.scaled) fg3 = fg3*stars%nstr
!ENDIF
ENDIF
IF (isn.LT.0) THEN
!
! ---> collect stars from the fft-grid
!
DO i=1,stars%ng2
fg2(i) = CMPLX(0.0,0.0)
ENDDO
scale=1.0/ifftd2
DO i=0,(2*stars%mx1+1)* (2*stars%mx2+1)-1
if (stars%igfft2(i,1)==0) cycle
fg2(stars%igfft2(i,1))=fg2(stars%igfft2(i,1))+ CONJG( stars%pgfft2(i) ) * &
& CMPLX(afft2(stars%igfft2(i,2)),bfft2(stars%igfft2(i,2)))
ENDDO
fg=scale*REAL(fg2(1))/stars%nstr2(1)
fgi=scale*AIMAG(fg2(1))/stars%nstr2(1)
call grid%takeFieldFromGrid(stars, fg2, l_2d=.true.)
fg=REAL(fg2(1))
fgi=AIMAG(fg2(1))
DO i=2,stars%ng2
fgxy(1,i-1)=scale*fg2(i)/stars%nstr2(i)
fgxy(i-1)=fg2(i)
ENDDO
ENDIF
......
......@@ -32,6 +32,6 @@ global/symMMPmat.f90
)
if (FLEUR_USE_GPU)
set(fleur_F90 ${fleur_F90}
# global/nvtx.F90
)
endif()
module nvtx
! See https://devblogs.nvidia.com/parallelforall/customize-cuda-fortran-profiling-nvtx/
use iso_c_binding
implicit none
integer,private :: col(7) = [ Z'0000ff00', Z'000000ff', Z'00ffff00', Z'00ff00ff', Z'0000ffff', Z'00ff0000', Z'00ffffff']
character(len=256),private :: tempName
type, bind(C):: nvtxEventAttributes
integer(C_INT16_T):: version=1
integer(C_INT16_T):: size=48 !
integer(C_INT):: category=0
integer(C_INT):: colorType=1 ! NVTX_COLOR_ARGB = 1
integer(C_INT):: color
integer(C_INT):: payloadType=0 ! NVTX_PAYLOAD_UNKNOWN = 0
integer(C_INT):: reserved0
integer(C_INT64_T):: payload ! union uint,int,double
integer(C_INT):: messageType=1 ! NVTX_MESSAGE_TYPE_ASCII = 1
type(C_PTR):: message ! ascii char
end type
interface nvtxRangePush
! push range with custom label and standard color
subroutine nvtxRangePushA(name) bind(C, name='nvtxRangePushA')
use iso_c_binding
character(kind=C_CHAR,len=*) :: name
end subroutine
! push range with custom label and custom color
subroutine nvtxRangePushEx(event) bind(C, name='nvtxRangePushEx')
use iso_c_binding
import:: nvtxEventAttributes
type(nvtxEventAttributes):: event
end subroutine
end interface
interface nvtxRangePop
subroutine nvtxRangePop() bind(C, name='nvtxRangePop')
end subroutine
end interface
contains
subroutine nvtxStartRange(name,id)
character(kind=c_char,len=*) :: name
integer, optional:: id
type(nvtxEventAttributes):: event
tempName=trim(name)//c_null_char
if ( .not. present(id)) then
call nvtxRangePush(tempName)
else
event%color=col(mod(id,7)+1)
event%message=c_loc(tempName)
call nvtxRangePushEx(event)
end if
end subroutine
subroutine nvtxEndRange
call nvtxRangePop
end subroutine
end module nvtx
......@@ -30,10 +30,10 @@ CONTAINS
! ..
!
n=SIZE(ind)
IF (n>SIZE(lv)) CALL judft_error("BUG: incosistent dimensions")
IF (n>SIZE(lv)) CALL judft_error("BUG in sort: inconsistent dimensions")
ALLOCATE(llv(n))
IF (PRESENT(lv1)) THEN
IF (n>SIZE(lv1)) CALL judft_error("BUG: incosistent dimensions")
IF (n>SIZE(lv1)) CALL judft_error("BUG in sort: inconsistent dimensions")
llv=lv1
ELSE
llv=(/(1.*i,i=1,n)/)
......
......@@ -77,7 +77,7 @@ CONTAINS
DO k = 1, ng3
CALL spgrot(nop, symor, mrot, tau, invtab, kv3(1, k), kr, ph)
CALL spgrot(nop, symor, mrot, tau, invtab, kv3(:, k), kr, ph)
sf(k) = 0.0
......
......@@ -8,7 +8,7 @@ init/local_sym.f
init/od_chisym.f
#init/prp_xcfft_box.f
init/ptsym.f
init/strgn_dim.F
#init/strgn_dim.F
init/ss_sym.f
)
set(fleur_F90 ${fleur_F90}
......@@ -27,12 +27,12 @@ init/boxdim.f90
#init/inpeig.f90
init/mapatom.F90
init/od_mapatom.F90
init/od_strgn1.f90
#init/od_strgn1.f90
#init/prp_qfft.f90
#init/prp_xcfft.f90
init/spgrot.f90
init/stepf.F90
init/strgn.f90
#init/strgn.f90
init/lapw_dim.F90
init/angles.f90
)
......@@ -785,7 +785,7 @@
& stars,&
& rhoRS, rhoRSimag,&
& fg, fgi,&
& efield%rhoEF(:,ivac), 1, -1)
& efield%rhoEF(:,ivac), -1)
! FFT gives the the average charge per grid point
! while sig_b stores the (total) charge per sheet
IF (efield%dirichlet .and. ABS (fg) > 1.0e-15) THEN
......
......@@ -142,81 +142,5 @@ CONTAINS
END SUBROUTINE lapw_dim
SUBROUTINE lapw_fft_dim(cell,input,noco,stars)
!
!*********************************************************************
! determines dimensions of the lapw basis set with |k+G|<rkmax.
! Generalization of the old apws_dim routine
!*********************************************************************
USE m_boxdim
USE m_ifft, ONLY : ifft235
USE m_types
IMPLICIT NONE
TYPE(t_cell),INTENT(IN) :: cell
TYPE(t_input),INTENT(IN) :: input
TYPE(t_noco),INTENT(IN) :: noco
TYPE(t_stars),INTENT(INOUT) :: stars
INTEGER j1,j2,j3,mk1,mk2,mk3,iofile,ksfft,q,nk,nv,nv2
INTEGER ispin,nvh(2),nv2h(2)
REAL arltv1,arltv2,arltv3,rkm,rk2,r2,s(3),gmaxp
REAL,ALLOCATABLE:: q_vectors(:,:)
REAL :: bkpt(3)
! ..
!
!-------> ABBREVIATIONS
!
! iofile : device number for in and output
! gmax : cut-off wavevector for charge density
! rkmax : cut-off for |g+k|
! gmaxp : gmaxp = gmax/rkmax, ideal: gmaxp=2
! arltv(i) : length of reciprical lattice vector along
! direction (i)
!
!---> Determine rkmax box of size mk1, mk2, mk3,
! for which |G(mk1,mk2,mk3) + (k1,k2,k3)| < rkmax
!---> Determine the dimensions kq1d, kq2d, kq3d
! of the dimension of the charge density fft-box
! needed for the fast calculation of pw density
! (add 1 due to integer rounding,
! factor 2 due to positive domain)
!
gmaxp = 2.0
CALL boxdim(cell%bmat,arltv1,arltv2,arltv3)
!
mk1 = int(gmaxp*input%rkmax/arltv1) + 1
mk2 = int(gmaxp*input%rkmax/arltv2) + 1
mk3 = int(gmaxp*input%rkmax/arltv3) + 1
!---> add + 1 in spin spiral calculation, to make sure that all G's are
!---> still within the FFT-box after being shifted by the spin spiral
!---> q-vector.
IF (noco%l_ss) THEN
mk1 = mk1 + 1
mk2 = mk2 + 1
mk3 = mk3 + 1
ENDIF
!
!stars%kq1_fft = 2*mk1
!stars%kq2_fft = 2*mk2
!stars%kq3_fft = 2*mk3
!
!---> fft's are usually fastest for low primes
! (restrict kqid to: kqid= (2**P) * (3**Q) * (5**R)
!
ksfft = 1
! ksfft=(0,1) : KEY OF SELECTING FFT-PRDOGRAM AND RADIX-TYPE
! 0 PROGRAM, RADIX-2 ONLY
! 1 PROGRAM, RADIX-2, RADIX-3,RADIX-5
!stars%kq1_fft = ifft235(ksfft,stars%kq1_fft,gmaxp)
!stars%kq2_fft = ifft235(ksfft,stars%kq2_fft,gmaxp)
!stars%kq3_fft = ifft235(ksfft,stars%kq3_fft,gmaxp)
END SUBROUTINE lapw_fft_dim
END MODULE m_lapwdim
......@@ -11,11 +11,7 @@ MODULE m_make_stars
PUBLIC :: make_stars
CONTAINS
SUBROUTINE make_stars(stars,sym,atoms,vacuum,sphhar,input,cell,xcpot,oneD,noco,fmpi)
USE m_od_strgn1
USE m_strgn
USE m_stepf
USE m_strgndim
USE m_lapwdim
USE m_types_sym
USE m_types_atoms
USE m_types_vacuum
......@@ -52,92 +48,20 @@ CONTAINS
! Dimensioning of stars
IF (fmpi%irank==0) THEN
IF (input%film) THEN
CALL strgn1_dim(fmpi%irank==0,input%gmax,cell%bmat,sym%invs,sym%zrfs,sym%mrot,&
sym%tau,sym%nop,sym%nop2,stars%mx1,stars%mx2,stars%mx3,&
stars%ng3,stars%ng2,oneD%odd)
ELSE
CALL strgn2_dim(fmpi%irank==0,input%gmax,cell%bmat,sym%invs,sym%zrfs,sym%mrot,&
sym%tau,sym%nop,stars%mx1,stars%mx2,stars%mx3,&
stars%ng3,stars%ng2)
oneD%odd%n2d = stars%ng2
oneD%odd%nq2 = stars%ng2
oneD%odd%nop = sym%nop
END IF
stars%gmax=input%gmax
kimax2= (2*stars%mx1+1)* (2*stars%mx2+1)-1
kimax = (2*stars%mx1+1)* (2*stars%mx2+1)* (2*stars%mx3+1)-1
IF (oneD%odd%d1) THEN
oneD%odd%k3 = stars%mx3
oneD%odd%nn2d = (2*(oneD%odd%k3)+1)*(2*(oneD%odd%M)+1)
ELSE
oneD%odd%k3 = 0
oneD%odd%M = 0
oneD%odd%nn2d = 1
oneD%odd%mb = 0
END IF
ALLOCATE (stars%ig(-stars%mx1:stars%mx1,-stars%mx2:stars%mx2,-stars%mx3:stars%mx3))
ALLOCATE (stars%ig2(stars%ng3))
ALLOCATE (stars%kv2(2,stars%ng2),stars%kv3(3,stars%ng3))
ALLOCATE (stars%nstr2(stars%ng2),stars%nstr(stars%ng3))
ALLOCATE (stars%sk2(stars%ng2),stars%sk3(stars%ng3),stars%phi2(stars%ng2))
ALLOCATE (stars%igfft(0:kimax,2),stars%igfft2(0:kimax2,2))
ALLOCATE (stars%rgphs(-stars%mx1:stars%mx1,-stars%mx2:stars%mx2,-stars%mx3:stars%mx3))
ALLOCATE (stars%pgfft(0:kimax),stars%pgfft2(0:kimax2))
ALLOCATE (stars%ufft(0:27*stars%mx1*stars%mx2*stars%mx3-1),stars%ustep(stars%ng3))
stars%kv3(:,:) = 0
stars%sk2(:) = 0.0
stars%sk3(:) = 0.0
stars%phi2(:) = 0.0
! Missing xc functionals initializations
IF (xcpot%needs_grad()) THEN
ALLOCATE (stars%ft2_gfx(0:kimax2),stars%ft2_gfy(0:kimax2))
ALLOCATE (oneD%pgft1x(0:oneD%odd%nn2d-1),oneD%pgft1xx(0:oneD%odd%nn2d-1),&
oneD%pgft1xy(0:oneD%odd%nn2d-1),&
oneD%pgft1y(0:oneD%odd%nn2d-1),oneD%pgft1yy(0:oneD%odd%nn2d-1))
ELSE
ALLOCATE (stars%ft2_gfx(0:1),stars%ft2_gfy(0:1))
ALLOCATE (oneD%pgft1x(0:1),oneD%pgft1xx(0:1),oneD%pgft1xy(0:1),&
oneD%pgft1y(0:1),oneD%pgft1yy(0:1))
END IF
oneD%odd%nq2 = oneD%odd%n2d
oneD%odi%nq2 = oneD%odd%nq2
CALL timestart("strgn")
IF (input%film) THEN
IF (oneD%odd%d1) THEN
CALL od_strgn1(xcpot,cell,sym,oneD)
END IF
CALL strgn1(fmpi%irank==0,stars,oneD,sym,atoms,vacuum,sphhar,input,cell,xcpot)
ELSE
CALL strgn2(fmpi%irank==0,stars,oneD,sym,atoms,vacuum,sphhar,input,cell,xcpot)
END IF
CALL lapw_fft_dim(cell,input,noco,stars)
!count number of stars in 2*rkmax (stars are ordered)
associate(i=>stars%ng3_fft)
DO i=stars%ng3,1,-1
IF ( stars%sk3(i).LE.2.0*input%rkmax ) EXIT
END DO
end associate
CALL timestop("strgn")
ENDIF
CALL stars%mpi_bc(fmpi%mpi_comm)
call timestart("star-setup")
stars%gmax=input%gmax
call stars%dim(sym,cell,input%film)
call stars%init(cell,sym,input%film,input%rkmax)
call timestop("star-setup")
ENDIF
CALL timestart("stepf")
! TODO: DFPT here to alternatively call stepf derivative.
ALLOCATE (stars%ufft(0:27*stars%mx1*stars%mx2*stars%mx3-1),stars%ustep(stars%ng3))
CALL stepf(sym,stars,atoms,oneD,input,cell,vacuum,fmpi)
CALL mpi_bc(stars%ustep,0,fmpi%mpi_comm)
CALL mpi_bc(stars%ufft,0,fmpi%mpi_comm)
CALL timestop("stepf")