stepf.F90 9.35 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
      MODULE m_stepf
      use m_juDFT
      CONTAINS
      SUBROUTINE stepf(&
     &                 stars,atoms,oneD,&
     &                 input,cell,&
     &                 vacuum)
!
!*********************************************************************
!     calculates the fourier components of the interstitial step
!     function for the reciprocal vectors of the star list.
!           m. weinert  1986
!*********************************************************************
!
!     also set up FFT of U(G) on a (-2G:+2G) grid for convolutions
!
!*********************************************************************
      USE m_cfft
      USE m_constants
      USE m_od_cylbes
      USE m_types
      IMPLICIT NONE
!     ..
      TYPE(t_stars),INTENT(INOUT)   :: stars
      TYPE(t_atoms),INTENT(IN)      :: atoms
      TYPE(t_oneD),INTENT(IN)       :: oneD
      TYPE(t_input),INTENT(IN)      :: input
      TYPE(t_cell),INTENT(IN)       :: cell
      TYPE(t_vacuum),INTENT(IN)     :: vacuum
!     ..
!     .. Local Scalars ..
      COMPLEX c_c,c_phs
      REAL c,dd,gs,th,inv_omtil,r_phs
      REAL g_rmt,g_sqr,help,g_abs,fp_omtil,r_c,gr,gx,gy
      INTEGER i,k,n,n3,na,nn,i1,i2,i3,ic,ifft2d,ifftd,kk
      INTEGER ic1,ic2,ic3,icc,im1,im2,im3,loopstart
!     ..
!     .. Local Arrays ..
      COMPLEX sf(stars%n3d)
      REAL g(3),gm(3),fJ
      REAL,    ALLOCATABLE :: bfft(:)
      INTEGER, ALLOCATABLE :: icm(:,:,:)
!     ..
!     .. Intrinsic Functions ..
      INTRINSIC cmplx,cos,sin
!     ..
!     ..
!--->    if step function on unit14, then just read it in
!
      ifftd = 27*stars%k1d*stars%k2d*stars%k3d
!
      OPEN (14,file='wkf2',form='unformatted',status='unknown')
      REWIND 14
      READ (14,end=10,err=10) n3,n
      IF (n3.NE.stars%ng3) GO TO 10
      IF (n.NE.ifftd) GO TO 10
      READ (14) (stars%ustep(i),i=1,stars%ng3)
      READ (14) (stars%ufft(i),i=0,ifftd-1)
      CLOSE (14)
      RETURN

   10 CONTINUE
     
      IF (input%film) THEN
         dd = vacuum%dvac*cell%area/cell%omtil
         IF (oneD%odd%d1) dd = cell%vol/cell%omtil
      ELSE
         dd = 1.0
      END IF
!--->    G=0 star
      c = 0.0
      DO 20 n = 1,atoms%ntype
         c = c + atoms%neq(n)*atoms%volmts(n)/cell%omtil
   20 CONTINUE
      stars%ustep(1) = cmplx(dd-c,0.0)
!--->    G(parallel)=0  (for film)
      IF (input%film .AND. .NOT.oneD%odd%d1) THEN
         DO 30 k = 2,stars%ng3
            IF (stars%ig2(k).EQ.1) THEN
               th = cell%bmat(3,3)*stars%kv3(3,k)*cell%z1
               stars%ustep(k) = cmplx(cell%vol*sin(th)/th/cell%omtil,0.0)
            ELSE
               stars%ustep(k) = cmplx(0.0,0.0)
            END IF
   30    CONTINUE
!-odim
      ELSEIF (oneD%odd%d1) THEN
         DO 35 k = 2,stars%ng3
            gr = 0.0
            IF (stars%kv3(3,k).EQ.0) THEN
               kk = stars%ig2(k)
               gr = stars%sk2(kk)
               CALL od_cylbes(1,gr*cell%z1,fJ)
               stars%ustep(k) = cmplx(2.*dd*fJ/(gr*cell%z1),0.)
            ELSE
               stars%ustep(k) =cmplx(0.,0.)
            END IF
 35      CONTINUE
!+odim
      ELSE
         DO 40 k = 2,stars%ng3
            stars%ustep(k) = cmplx(0.0,0.0)
   40    CONTINUE
      END IF
!--->    sphere contributions
      na = 0
      DO 90 n = 1,atoms%ntype
         c = 3.*atoms%volmts(n)/cell%omtil
!-->     structure factors: loop over equivalent atoms
         na = na + 1
         DO 50 k = 2,stars%ng3
            th = -tpi_const* (stars%kv3(1,k)*atoms%taual(1,na)+stars%kv3(2,k)*atoms%taual(2,na)+&
     &           stars%kv3(3,k)*atoms%taual(3,na))
            sf(k) = cmplx(cos(th),sin(th))
   50    CONTINUE
         DO 70 nn = 2,atoms%neq(n)
            na = na + 1
            DO 60 k = 2,stars%ng3
               th = -tpi_const* (stars%kv3(1,k)*atoms%taual(1,na)+stars%kv3(2,k)*atoms%taual(2,na)+&
     &              stars%kv3(3,k)*atoms%taual(3,na))
               sf(k) = sf(k) + cmplx(cos(th),sin(th))
   60       CONTINUE
   70    CONTINUE
!--->    update step function
         DO 80 k = 2,stars%ng3
            gs = stars%sk3(k)*atoms%rmt(n)
            stars%ustep(k) = stars%ustep(k) - (c* (sin(gs)/gs-cos(gs))/ (gs*gs))*&
     &                 sf(k)
   80    CONTINUE
   90 CONTINUE
!
! --> set up stepfunction on fft-grid:
!
      ALLOCATE (  bfft(0:27*stars%k1d*stars%k2d*stars%k3d-1) )
      im1=CEILING(1.5*stars%k1d); im2=CEILING(1.5*stars%k2d); im3=CEILING(1.5*stars%k3d) 
      ALLOCATE ( icm(-im1:im1,-im2:im2,-im3:im3) )
      icm = 0
      ic=1
      inv_omtil=1.0/cell%omtil
      fp_omtil=  -fpi_const*inv_omtil
      !DO first vector before loop
      stars%ufft(0)=0.0
      bfft(0)=0.0
      DO n=1,atoms%ntype
         stars%ufft(0)=stars%ufft(0)+atoms%neq(n)*atoms%volmts(n)
      ENDDO
      stars%ufft(0)=1.0-stars%ufft(0)*inv_omtil
      loopstart=1
      DO i3=0,3*stars%k3d-1
        gm(3)=real(i3)
        IF ( gm(3) > 1.5*stars%k3d ) gm(3)=gm(3)-3.0*stars%k3d
        DO i2=0,3*stars%k2d-1
          gm(2)=real(i2)
          IF ( gm(2) > 1.5*stars%k2d ) gm(2)=gm(2)-3.0*stars%k2d
          DO i1=loopstart,3*stars%k1d-1
            loopstart=0 !all further loops start at i1=0
            gm(1)=real(i1)
            IF ( gm(1) > 1.5*stars%k1d ) gm(1)=gm(1)-3.0*stars%k1d
!
!-> use inversion <-> c.c.
!
            ic1 = NINT(gm(1)) ; ic2 = NINT(gm(2)) ; ic3 = NINT(gm(3))
            IF ( gm(3) < 0.0 ) THEN  ! retreive from table icm()
              icc = icm(-ic1,-ic2,-ic3)
              !IF (icc.EQ.0) THEN
              !  write(*,*) ic1,ic2,ic3,icc
              !   CALL juDFT_error(" error in stepf! ",calledby="stepf")
              !ENDIF
              stars%ufft(ic) = stars%ufft(icc)
#ifndef CPP_INVERSION
              bfft(ic) = - bfft(icc)
#endif
              ic=ic+1
              CYCLE 
            ELSE                         ! store number in table icm()
              icm(ic1,ic2,ic3) = ic
              IF (ic1 == im1) icm(-ic1,ic2,ic3) = ic
              IF (ic2 == im2) icm(ic1,-ic2,ic3) = ic
              IF ((ic1 == im1).AND.(ic2 == im2)) icm(-ic1,-ic2,ic3) = ic
            ENDIF
            g=matmul(transpose(cell%bmat),gm)
            g_sqr = dot_product(g,g)
            g_abs = sqrt(g_sqr)
            help = fp_omtil/g_sqr

#ifdef CPP_INVERSION
            r_c = 0.0
!       Better no OpenMP, huge overhead! Parallel region is located in a
!       nested loop and is therefore created more then a billion times. 
!           U.Alekseeva 15.10.2015 
!!$OMP  PARALLEL DO PRIVATE(r_phs,nn,th,na,g_rmt,n) DEFAULT(SHARED) REDUCTION(+:r_c)
            DO n=1,atoms%ntype
                r_phs = 0.0
                na=sum(atoms%neq(:n-1))
                DO nn=1,atoms%neq(n)
                  th=-tpi_const*dot_product(gm,atoms%taual(:,na+nn))
                  r_phs = r_phs + cos(th)
                ENDDO
                g_rmt = g_abs * atoms%rmt(n)
                r_c=r_c+atoms%rmt(n)*(sin(g_rmt)/g_rmt-cos(g_rmt))*r_phs
            ENDDO
!!$OMP END PARALLEL DO
            stars%ufft(ic) = help * r_c
#else
            c_c=cmplx(0.0,0.0)
!!$OMP  PARALLEL DO PRIVATE(c_phs,nn,th,na,g_rmt,n) DEFAULT(SHARED)REDUCTION(+:c_c)

            DO n=1,atoms%ntype
                c_phs = cmplx(0.0,0.0)
                na=sum(atoms%neq(:n-1))
                DO nn=1,atoms%neq(n)
                  th=-tpi_const*dot_product(gm,atoms%taual(:,na+nn))
                  c_phs = c_phs + exp(cmplx(0,th))
                ENDDO
                g_rmt = g_abs * atoms%rmt(n)
                c_c=c_c+atoms%rmt(n)*(sin(g_rmt)/g_rmt-cos(g_rmt))*c_phs
            ENDDO
!!$OMP END PARALLEL DO
            stars%ufft(ic) = help * real(c_c)
            bfft(ic) = help * aimag(c_c)
#endif


            IF (((i3.eq.3*stars%k3d/2).or.&
     &           (i2.eq.3*stars%k2d/2)).or.&
     &           (i1.eq.3*stars%k1d/2)) THEN
                   stars%ufft(ic)=0.0 
                   bfft(ic)=0.0 
            ENDIF
!-odim
            IF (oneD%odd%d1) THEN
               IF (ic.LT.9*stars%k1d*stars%k2d .AND. ic.NE.0) THEN
                  gx = (cell%bmat(1,1)*gm(1) + cell%bmat(2,1)*gm(2))
                  gy = (cell%bmat(1,2)*gm(1) + cell%bmat(2,2)*gm(2))
                  gr = sqrt(gx**2 + gy**2)
                  CALL od_cylbes(1,gr*cell%z1,fJ)
                  stars%ufft(ic) = stars%ufft(ic)&
     &                 +2*cell%vol*fJ/(gr*cell%z1*cell%omtil)
               END IF
            END IF
!+odim
            ic=ic+1
          ENDDO
        ENDDO
      ENDDO
!
! --> add film-contributions
!
      IF (input%film .AND. .NOT.oneD%odd%d1) THEN

        ifft2d=9*stars%k1d*stars%k2d
        stars%ufft(0)=stars%ufft(0)+cell%vol*inv_omtil-1.0

        DO i3=1,3*stars%k3d-1
          gm(3)=real(i3)
          IF ( gm(3) > 1.5*stars%k3d ) gm(3)=gm(3)-3.0*stars%k3d
          th=cell%bmat(3,3)*gm(3)*cell%z1
          stars%ufft(i3*ifft2d)=stars%ufft(i3*ifft2d)+cell%vol*inv_omtil*sin(th)/th
        ENDDO

      ELSEIF (oneD%odd%d1) THEN
!-odim
         stars%ufft(0) = stars%ufft(0)+cell%vol*inv_omtil-1.0
!+odim

      ENDIF
!
! --> make fft
!
#ifdef CPP_INVERSION
      bfft=0.0
#endif
      CALL cfft(stars%ufft,bfft,ifftd,3*stars%k1d,3*stars%k1d,+1)
      CALL cfft(stars%ufft,bfft,ifftd,3*stars%k2d,9*stars%k1d*stars%k2d,+1)
      CALL cfft(stars%ufft,bfft,ifftd,3*stars%k3d,ifftd,+1)

      DEALLOCATE ( bfft , icm )

!--->    store on unit14
      REWIND 14
      WRITE (14) stars%ng3,ifftd
      WRITE (14) (stars%ustep(i),i=1,stars%ng3)
      WRITE (14) (stars%ufft(i),i=0,ifftd-1)

      CLOSE (14)

      END SUBROUTINE stepf
      END MODULE m_stepf