Commit 6b4fc4ed authored by Uliana Alekseeva's avatar Uliana Alekseeva

optional MPI in init/stepf.F90

parent ebf77e75
MODULE m_setup
USE m_juDFT
CONTAINS
SUBROUTINE setup(atoms,kpts,DIMENSION,sphhar,&
SUBROUTINE setup(mpi,atoms,kpts,DIMENSION,sphhar,&
obsolete,sym,stars,oneD, input,noco,vacuum,cell,xcpot, sliceplot,enpara,l_opti)
!
!----------------------------------------
......@@ -55,6 +55,7 @@
IMPLICIT NONE
! ..
! .. Scalars Arguments ..
TYPE(t_mpi),INTENT(IN) :: mpi
TYPE(t_atoms),INTENT(INOUT) :: atoms
TYPE(t_kpts),INTENT(INOUT) :: kpts
TYPE(t_dimension),INTENT(INOUT):: DIMENSION
......@@ -77,6 +78,7 @@
INTEGER :: ntp1,ii,i,j,n1,n2,na,np1,n
INTEGER, ALLOCATABLE :: lmx1(:), nq1(:), nlhtp1(:)
!
IF ( mpi%irank == 0 ) THEN
IF (sym%namgrp.EQ.'any ') THEN
CALL rw_symfile('R',94,'sym.out',sym%nop,cell%bmat, sym%mrot,sym%tau,sym%nop,sym%nop2,sym%symor)
ELSE
......@@ -173,16 +175,18 @@
CALL prp_xcfft(stars,input, cell, xcpot)
!
ENDIF ! (mpi%irank == 0)
IF (.NOT.sliceplot%iplot) THEN
!
CALL stepf(sym,stars,atoms,oneD, input,cell, vacuum)
CALL stepf(sym,stars,atoms,oneD, input,cell, vacuum,mpi)
!
CALL convn(DIMENSION,atoms,stars)
!
!---> set up electric field parameters (if needed)
!
CALL efield(atoms, DIMENSION, stars, sym, vacuum, cell, input)
IF ( mpi%irank == 0 ) THEN
CALL convn(DIMENSION,atoms,stars)
!
!---> set up electric field parameters (if needed)
!
CALL efield(atoms, DIMENSION, stars, sym, vacuum, cell, input)
ENDIF
ENDIF
END SUBROUTINE setup
......
......@@ -2,7 +2,7 @@
USE m_juDFT
USE m_cdn_io
CONTAINS
SUBROUTINE stepf(sym,stars,atoms,oneD, input,cell, vacuum)
SUBROUTINE stepf(sym,stars,atoms,oneD, input,cell, vacuum,mpi)
!
!*********************************************************************
! calculates the fourier components of the interstitial step
......@@ -13,6 +13,7 @@
! also set up FFT of U(G) on a (-2G:+2G) grid for convolutions
!
!*********************************************************************
#include"cpp_double.h"
USE m_cfft
USE m_constants
USE m_od_cylbes
......@@ -21,11 +22,12 @@
! ..
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_stars),INTENT(INOUT) :: stars
TYPE(t_atoms),INTENT(IN) :: atoms
TYPE(t_atoms),INTENT(INOUT) :: atoms
TYPE(t_oneD),INTENT(IN) :: oneD
TYPE(t_input),INTENT(IN) :: input
TYPE(t_cell),INTENT(IN) :: cell
TYPE(t_cell),INTENT(INOUT) :: cell
TYPE(t_vacuum),INTENT(IN) :: vacuum
TYPE(t_mpi),INTENT(IN),optional :: mpi
! ..
! .. Local Scalars ..
COMPLEX c_c,c_phs
......@@ -40,16 +42,34 @@
REAL g(3),gm(3),fJ
REAL, ALLOCATABLE :: bfft(:)
INTEGER, ALLOCATABLE :: icm(:,:,:)
! ..
! ..
!---> if step function stored on disc, then just read it in
!
ifftd = 27*stars%mx1*stars%mx2*stars%mx3
INTEGER :: mpi_id, i3_start, i3_end
#ifdef CPP_MPI
INCLUDE 'mpif.h'
INTEGER ierr
INTEGER, ALLOCATABLE :: icm_local(:,:,:)
REAL, ALLOCATABLE :: ufft_local(:), bfft_local(:)
#endif
CALL readStepfunction(stars, atoms, cell, vacuum, l_error)
IF(.NOT.l_error) THEN
RETURN
END IF
ifftd = 27*stars%mx1*stars%mx2*stars%mx3
IF (PRESENT(mpi)) THEN
mpi_id = mpi%irank
ELSE
mpi_id = 0
ENDIF
! ..
! ..
!---> if step function stored on disc, then just read it in
!
l_error = .FALSE.
IF (mpi_id == 0) CALL readStepfunction(stars, atoms, cell, vacuum, l_error)
#ifdef CPP_MPI
IF (PRESENT(mpi)) CALL MPI_BCAST(l_error,1,MPI_LOGICAL,0,mpi%mpi_comm,ierr)
#endif
IF(.NOT.l_error) THEN
RETURN
END IF
IF (mpi_id == 0) THEN
IF (input%film) THEN
dd = vacuum%dvac*cell%area/cell%omtil
......@@ -116,65 +136,116 @@
stars%ustep(k) = stars%ustep(k) - (c* (SIN(gs)/gs-COS(gs))/ (gs*gs))* sf(k)
ENDDO
ENDDO
!
! --> set up stepfunction on fft-grid:
!
ALLOCATE ( bfft(0:27*stars%mx1*stars%mx2*stars%mx3-1) )
im1=CEILING(1.5*stars%mx1); im2=CEILING(1.5*stars%mx2); im3=CEILING(1.5*stars%mx3)
ALLOCATE ( icm(-im1:im1,-im2:im2,-im3:im3) )
icm = 0
ic=1
inv_omtil=1.0/cell%omtil
fp_omtil= -fpi_const*inv_omtil
!DO first vector before loop
stars%ufft(0)=0.0
bfft(0)=0.0
DO n=1,atoms%ntype
stars%ufft(0)=stars%ufft(0)+atoms%neq(n)*atoms%volmts(n)
ENDDO
stars%ufft(0)=1.0-stars%ufft(0)*inv_omtil
loopstart=1
DO i3=0,3*stars%mx3-1
ENDIF ! (mpi_id == 0)
!
! --> set up stepfunction on fft-grid:
!
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
CALL MPI_BCAST(cell%omtil,1,CPP_MPI_REAL,0,mpi%mpi_comm,ierr)
CALL MPI_BCAST(cell%bmat,9,CPP_MPI_REAL,0,mpi%mpi_comm,ierr)
CALL MPI_BCAST(atoms%neq,size(atoms%neq),MPI_INTEGER,0,mpi%mpi_comm,ierr)
CALL MPI_BCAST(atoms%volmts,size(atoms%volmts),CPP_MPI_REAL,0,mpi%mpi_comm,ierr)
CALL MPI_BCAST(atoms%taual,size(atoms%taual),CPP_MPI_REAL,0,mpi%mpi_comm,ierr)
CALL MPI_BCAST(atoms%rmt,size(atoms%rmt),CPP_MPI_REAL,0,mpi%mpi_comm,ierr)
ALLOCATE ( bfft_local(0:ifftd-1), ufft_local(0:ifftd-1) )
bfft_local = 0.0
ufft_local = 0.0
ENDIF
#endif
ALLOCATE ( bfft(0:ifftd-1) )
im1=CEILING(1.5*stars%mx1); im2=CEILING(1.5*stars%mx2); im3=CEILING(1.5*stars%mx3)
ALLOCATE ( icm(-im1:im1,-im2:im2,-im3:im3) )
icm = 0
#ifdef CPP_MPI
IF ( PRESENT(mpi) ) THEN
ALLOCATE ( icm_local(-im1:im1,-im2:im2,-im3:im3) )
icm_local = 0
ENDIF
#endif
inv_omtil=1.0/cell%omtil
fp_omtil= -fpi_const*inv_omtil
!DO first vector before loop
stars%ufft=0.0
bfft=0.0
#ifdef CPP_MPI
IF ( PRESENT(mpi) ) THEN
IF ( mpi%irank == 0 ) THEN
DO n=1,atoms%ntype
ufft_local(0)=ufft_local(0)+atoms%neq(n)*atoms%volmts(n)
ENDDO
ufft_local(0)=1.0-ufft_local(0)*inv_omtil
ENDIF
ELSE
DO n=1,atoms%ntype
stars%ufft(0)=stars%ufft(0)+atoms%neq(n)*atoms%volmts(n)
ENDDO
stars%ufft(0)=1.0-stars%ufft(0)*inv_omtil
ENDIF
#else
DO n=1,atoms%ntype
stars%ufft(0)=stars%ufft(0)+atoms%neq(n)*atoms%volmts(n)
ENDDO
stars%ufft(0)=1.0-stars%ufft(0)*inv_omtil
#endif
CALL timestart("stepf_loop")
i3_start = 0
i3_end = im3
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
i3_start = mpi%irank * (im3/mpi%isize)
i3_end = (mpi%irank + 1) * (im3/mpi%isize) - 1
IF (mpi%irank == mpi%isize - 1) i3_end = im3
ENDIF
#endif
DO i3=i3_start,i3_end
gm(3)=REAL(i3)
IF ( gm(3) > 1.5*stars%mx3 ) gm(3)=gm(3)-3.0*stars%mx3
DO i2=0,3*stars%mx2-1
gm(2)=REAL(i2)
IF ( gm(2) > 1.5*stars%mx2 ) gm(2)=gm(2)-3.0*stars%mx2
IF ( 2*i2 > 3*stars%mx2 ) gm(2)=gm(2)-3.0*stars%mx2
IF (i3 == 0 .AND. i2 == 0 ) THEN
loopstart=1
ELSE
loopstart=0
ENDIF
DO i1=loopstart,3*stars%mx1-1
loopstart=0 !all further loops start at i1=0
ic=i1 + 3*stars%mx1*i2 + 9*stars%mx1*stars%mx2*i3
gm(1)=REAL(i1)
IF ( gm(1) > 1.5*stars%mx1 ) gm(1)=gm(1)-3.0*stars%mx1
IF ( 2*i1 > 3*stars%mx1 ) gm(1)=gm(1)-3.0*stars%mx1
!
!-> use inversion <-> c.c.
!
ic1 = NINT(gm(1)) ; ic2 = NINT(gm(2)) ; ic3 = NINT(gm(3))
IF ( gm(3) < 0.0 ) THEN ! retreive from table icm()
icc = icm(-ic1,-ic2,-ic3)
!IF (icc.EQ.0) THEN
! write(*,*) ic1,ic2,ic3,icc
! CALL juDFT_error(" error in stepf! ",calledby="stepf")
!ENDIF
stars%ufft(ic) = stars%ufft(icc)
IF (.NOT.sym%invs) bfft(ic) = - bfft(icc)
ic=ic+1
CYCLE
ELSE ! store number in table icm()
icm(ic1,ic2,ic3) = ic
IF (ic1 == im1) icm(-ic1,ic2,ic3) = ic
IF (ic2 == im2) icm(ic1,-ic2,ic3) = ic
IF ((ic1 == im1).AND.(ic2 == im2)) icm(-ic1,-ic2,ic3) = ic
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
icm_local(ic1,ic2,ic3) = ic
IF (ic1 == im1) icm_local(-ic1,ic2,ic3) = ic
IF (ic2 == im2) icm_local(ic1,-ic2,ic3) = ic
IF ((ic1 == im1).AND.(ic2 == im2)) icm_local(-ic1,-ic2,ic3) = ic
ELSE
icm(ic1,ic2,ic3) = ic
IF (ic1 == im1) icm(-ic1,ic2,ic3) = ic
IF (ic2 == im2) icm(ic1,-ic2,ic3) = ic
IF ((ic1 == im1).AND.(ic2 == im2)) icm(-ic1,-ic2,ic3) = ic
ENDIF
#else
icm(ic1,ic2,ic3) = ic
IF (ic1 == im1) icm(-ic1,ic2,ic3) = ic
IF (ic2 == im2) icm(ic1,-ic2,ic3) = ic
IF ((ic1 == im1).AND.(ic2 == im2)) icm(-ic1,-ic2,ic3) = ic
#endif
g=MATMUL(TRANSPOSE(cell%bmat),gm)
g_sqr = DOT_PRODUCT(g,g)
g_abs = SQRT(g_sqr)
help = fp_omtil/g_sqr
IF (sym%invs) THEN
r_c = 0.0
! Better no OpenMP, huge overhead! Parallel region is located in a
! nested loop and is therefore created more then a billion times.
! U.Alekseeva 15.10.2015
!!$OMP PARALLEL DO PRIVATE(r_phs,nn,th,na,g_rmt,n) DEFAULT(SHARED) REDUCTION(+:r_c)
DO n=1,atoms%ntype
r_phs = 0.0
na=SUM(atoms%neq(:n-1))
......@@ -185,12 +256,17 @@
g_rmt = g_abs * atoms%rmt(n)
r_c=r_c+atoms%rmt(n)*(SIN(g_rmt)/g_rmt-COS(g_rmt))*r_phs
ENDDO
!!$OMP END PARALLEL DO
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
ufft_local(ic) = help * r_c
ELSE
stars%ufft(ic) = help * r_c
ENDIF
#else
stars%ufft(ic) = help * r_c
#endif
ELSE
c_c=CMPLX(0.0,0.0)
!!$OMP PARALLEL DO PRIVATE(c_phs,nn,th,na,g_rmt,n) DEFAULT(SHARED)REDUCTION(+:c_c)
DO n=1,atoms%ntype
c_phs = CMPLX(0.0,0.0)
na=SUM(atoms%neq(:n-1))
......@@ -201,27 +277,85 @@
g_rmt = g_abs * atoms%rmt(n)
c_c=c_c+atoms%rmt(n)*(SIN(g_rmt)/g_rmt-COS(g_rmt))*c_phs
ENDDO
!!$OMP END PARALLEL DO
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
ufft_local(ic) = help * REAL(c_c)
bfft_local(ic) = help * AIMAG(c_c)
ELSE
stars%ufft(ic) = help * REAL(c_c)
bfft(ic) = help * AIMAG(c_c)
ENDIF
#else
stars%ufft(ic) = help * REAL(c_c)
bfft(ic) = help * AIMAG(c_c)
#endif
ENDIF
IF (((i3.EQ.3*stars%mx3/2).OR. (i2.EQ.3*stars%mx2/2)).OR. (i1.EQ.3*stars%mx1/2)) THEN
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
ufft_local(ic)=0.0
bfft_local(ic)=0.0
ELSE
stars%ufft(ic)=0.0
bfft(ic)=0.0
ENDIF
#else
stars%ufft(ic)=0.0
bfft(ic)=0.0
#endif
ENDIF
!-odim
IF (oneD%odd%d1) THEN
IF (oneD%odd%d1) THEN !!!! MPI version is not tested yet !!!!
IF (ic.LT.9*stars%mx1*stars%mx2 .AND. ic.NE.0) THEN
gx = (cell%bmat(1,1)*gm(1) + cell%bmat(2,1)*gm(2))
gy = (cell%bmat(1,2)*gm(1) + cell%bmat(2,2)*gm(2))
gr = SQRT(gx**2 + gy**2)
CALL od_cylbes(1,gr*cell%z1,fJ)
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
ufft_local(ic) = ufft_local(ic) +2*cell%vol*fJ/(gr*cell%z1*cell%omtil)
ELSE
stars%ufft(ic) = stars%ufft(ic) +2*cell%vol*fJ/(gr*cell%z1*cell%omtil)
ENDIF
#else
stars%ufft(ic) = stars%ufft(ic) +2*cell%vol*fJ/(gr*cell%z1*cell%omtil)
#endif
END IF
END IF
!+odim
ENDDO
ENDDO
ENDDO
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
CALL MPI_REDUCE(ufft_local,stars%ufft,ifftd,CPP_MPI_REAL, MPI_SUM,0,mpi%mpi_comm,ierr)
CALL MPI_REDUCE(bfft_local,bfft,ifftd,CPP_MPI_REAL, MPI_SUM,0,mpi%mpi_comm,ierr)
CALL MPI_REDUCE(icm_local,icm,size(icm),MPI_INTEGER, MPI_SUM,0,mpi%mpi_comm,ierr)
ENDIF
#endif
CALL timestop("stepf_loop")
IF (mpi_id == 0) THEN
ic = 9*stars%mx1*stars%mx2*(im3+1)
DO i3=im3+1,3*stars%mx3-1
gm(3)=REAL(i3)
gm(3)=gm(3)-3.0*stars%mx3
DO i2=0,3*stars%mx2-1
gm(2)=REAL(i2)
IF ( 2*i2 > 3*stars%mx2 ) gm(2)=gm(2)-3.0*stars%mx2
DO i1=0,3*stars%mx1-1
gm(1)=REAL(i1)
IF ( 2*i1 > 3*stars%mx1 ) gm(1)=gm(1)-3.0*stars%mx1
!
!-> use inversion <-> c.c.
!
ic1 = NINT(gm(1)) ; ic2 = NINT(gm(2)) ; ic3 = NINT(gm(3))
icc = icm(-ic1,-ic2,-ic3)
stars%ufft(ic) = stars%ufft(icc)
IF (.NOT.sym%invs) bfft(ic) = - bfft(icc)
ic=ic+1
ENDDO
ENDDO
......@@ -256,8 +390,13 @@
CALL cfft(stars%ufft,bfft,ifftd,3*stars%mx3,ifftd,+1)
DEALLOCATE ( bfft , icm )
#ifdef CPP_MPI
IF (PRESENT(mpi)) THEN
DEALLOCATE ( bfft_local, ufft_local , icm_local )
ENDIF
#endif
CALL writeStepfunction(stars)
END SUBROUTINE stepf
END MODULE m_stepf
ENDIF ! (mpi_id == 0)
END SUBROUTINE stepf
END MODULE m_stepf
......@@ -315,11 +315,13 @@
IF ((sliceplot%iplot).OR.(input%strho).OR.(input%swsp).OR.&
& (input%lflip).OR.(obsolete%l_f2u).OR.(obsolete%l_u2f).OR.(input%l_bmt)) l_opti = .TRUE.
!
END IF ! mpi%irank.eq.0
CALL setup(&
& atoms,kpts,DIMENSION,sphhar,&
& mpi,atoms,kpts,DIMENSION,sphhar,&
& obsolete,sym,stars,oneD,input,noco,&
& vacuum,cell,xcpot,&
& sliceplot,enpara,l_opti)
IF (mpi%irank.EQ.0) THEN
!
stars%ng3=stars%ng3 ; stars%ng2=stars%ng2
!+t3e
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment