mpi_col_den.F90 18.9 KB
Newer Older
1

2 3 4 5 6
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7 8 9 10
MODULE m_mpi_col_den
  !
  ! collect all data calculated in cdnval on different pe's on pe 0
  !
11 12
  ! for some data also spread them back onto all pe's (Jan. 2019  U.Alekseeva)
  !
13
CONTAINS
14 15
  SUBROUTINE mpi_col_den(mpi,sphhar,atoms,oneD,stars,vacuum,input,noco,jspin,regCharges,dos,&
                         results,denCoeffs,orb,denCoeffsOffdiag,den,n_mmp,mcd,slab,orbcomp)
16

17 18
#include"cpp_double.h"
    USE m_types
19
    USE m_constants
20
    USE m_juDFT
21 22
    IMPLICIT NONE

23 24
    TYPE(t_results),INTENT(INOUT):: results
    TYPE(t_mpi),INTENT(IN)       :: mpi
25 26 27 28 29 30
    TYPE(t_oneD),INTENT(IN)      :: oneD
    TYPE(t_input),INTENT(IN)     :: input
    TYPE(t_vacuum),INTENT(IN)    :: vacuum
    TYPE(t_noco),INTENT(IN)      :: noco
    TYPE(t_stars),INTENT(IN)     :: stars
    TYPE(t_sphhar),INTENT(IN)    :: sphhar
31
    TYPE(t_atoms),INTENT(IN)     :: atoms
32
    TYPE(t_potden),INTENT(INOUT) :: den
33 34 35
    INCLUDE 'mpif.h'
    ! ..
    ! ..  Scalar Arguments ..
36
    INTEGER, INTENT (IN) :: jspin
37 38
    ! ..
    ! ..  Array Arguments ..
39
    COMPLEX,INTENT(INOUT) :: n_mmp(-lmaxU_const:lmaxU_const,-lmaxU_const:lmaxU_const,atoms%n_u)
40 41 42 43 44 45 46 47
    TYPE (t_orb),               INTENT(INOUT) :: orb
    TYPE (t_denCoeffs),         INTENT(INOUT) :: denCoeffs
    TYPE (t_denCoeffsOffdiag),  INTENT(INOUT) :: denCoeffsOffdiag
    TYPE (t_regionCharges),     INTENT(INOUT) :: regCharges
    TYPE (t_dos),               INTENT(INOUT) :: dos
    TYPE (t_mcd),     OPTIONAL, INTENT(INOUT) :: mcd
    TYPE (t_slab),    OPTIONAL, INTENT(INOUT) :: slab
    TYPE (t_orbcomp), OPTIONAL, INTENT(INOUT) :: orbcomp
48 49
    ! ..
    ! ..  Local Scalars ..
50
    INTEGER :: n, i
51 52 53 54 55
    ! ..
    ! ..  Local Arrays ..
    INTEGER :: ierr(3)
    COMPLEX, ALLOCATABLE :: c_b(:)
    REAL,    ALLOCATABLE :: r_b(:)
56
    INTEGER, ALLOCATABLE :: i_b(:)
57 58 59 60
    ! ..
    ! ..  External Subroutines
    EXTERNAL CPP_BLAS_scopy,CPP_BLAS_ccopy,MPI_REDUCE

61
    CALL timestart("mpi_col_den")
62 63

    ! -> Collect den%pw(:,jspin)
64
    n = stars%ng3
65
    ALLOCATE(c_b(n))
66
    CALL MPI_REDUCE(den%pw(:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
67
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%pw(:,jspin), 1)
68
    DEALLOCATE (c_b)
69 70

    ! -> Collect den%vacxy(:,:,:,jspin)
71
    IF (input%film) THEN
72 73
       !n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
       n=size(den%vacxy(:,:,:,jspin))
74
       ALLOCATE(c_b(n))
75
       CALL MPI_REDUCE(den%vacxy(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
76
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,jspin), 1)
77
       DEALLOCATE (c_b)
78 79

       ! -> Collect den%vacz(:,:,jspin)
80 81
       !n = vacuum%nmzd*2
       n=size(den%vacz(:,:,jspin))
82
       ALLOCATE(r_b(n))
83
       CALL MPI_REDUCE(den%vacz(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
84
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, den%vacz(:,:,jspin), 1)
85 86
       DEALLOCATE (r_b)
    ENDIF
87

88
    ! -> Collect uu(),ud() and dd()
89
    n = (atoms%lmaxd+1)*atoms%ntype
90
    ALLOCATE(r_b(n))
91 92 93 94 95 96
    CALL MPI_ALLREDUCE(denCoeffs%uu(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uu(0:,:,jspin), 1)
    CALL MPI_ALLREDUCE(denCoeffs%du(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%du(0:,:,jspin), 1)
    CALL MPI_ALLREDUCE(denCoeffs%dd(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dd(0:,:,jspin), 1)
97
    DEALLOCATE (r_b)
98

99
    !--> Collect uunmt,udnmt,dunmt,ddnmt
100
    n = (((atoms%lmaxd*(atoms%lmaxd+3))/2)+1)*sphhar%nlhd*atoms%ntype
101
    ALLOCATE(r_b(n))
102 103 104 105 106 107 108 109
    CALL MPI_ALLREDUCE(denCoeffs%uunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uunmt(0:,:,:,jspin), 1)
    CALL MPI_ALLREDUCE(denCoeffs%udnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%udnmt(0:,:,:,jspin), 1)
    CALL MPI_ALLREDUCE(denCoeffs%dunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dunmt(0:,:,:,jspin), 1)
    CALL MPI_ALLREDUCE(denCoeffs%ddnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ddnmt(0:,:,:,jspin), 1)
110
    DEALLOCATE (r_b)
111

112
    !--> ener & sqal
113
    n=4*atoms%ntype
114
    ALLOCATE(r_b(n))
115 116 117 118
    CALL MPI_ALLREDUCE(regCharges%ener(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%ener(0:,:,jspin), 1)
    CALL MPI_ALLREDUCE(regCharges%sqal(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
    CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqal(0:,:,jspin), 1)
119
    DEALLOCATE (r_b)
120

121 122
    !--> svac & pvac
    IF ( input%film ) THEN
123
       n=SIZE(regCharges%svac,1)
124
       ALLOCATE(r_b(n))
125 126 127 128
       CALL MPI_ALLREDUCE(regCharges%svac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%svac(:,jspin), 1)
       CALL MPI_ALLREDUCE(regCharges%pvac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%pvac(:,jspin), 1)
129 130
       DEALLOCATE (r_b)
    ENDIF
131 132 133 134 135

    !collect DOS stuff
    n = SIZE(dos%jsym,1)*SIZE(dos%jsym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%jsym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
136 137 138 139 140
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%jsym,2)
          dos%jsym(:,i,jspin) = i_b((i-1)*SIZE(dos%jsym,1)+1:i*SIZE(dos%jsym,1))
       END DO
    END IF
141 142 143 144 145
    DEALLOCATE (i_b)

    n = SIZE(dos%ksym,1)*SIZE(dos%ksym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%ksym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
146 147 148 149 150
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%ksym,2)
          dos%ksym(:,i,jspin) = i_b((i-1)*SIZE(dos%ksym,1)+1:i*SIZE(dos%ksym,1))
       END DO
    END IF
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
    DEALLOCATE (i_b)

    n = SIZE(dos%qis,1)*SIZE(dos%qis,2)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qis(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qis(:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qal,1)*SIZE(dos%qal,2)*SIZE(dos%qal,3)*SIZE(dos%qal,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qal(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qal(0:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvac,1)*SIZE(dos%qvac,2)*SIZE(dos%qvac,3)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvac(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvac(:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvlay,1)*SIZE(dos%qvlay,2)*SIZE(dos%qvlay,3)*SIZE(dos%qvlay,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvlay(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvlay(:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qstars,1)*SIZE(dos%qstars,2)*SIZE(dos%qstars,3)*SIZE(dos%qstars,4)*SIZE(dos%qstars,5)
    ALLOCATE(c_b(n))
    CALL MPI_REDUCE(dos%qstars(:,:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
180
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, dos%qstars(:,:,:,:,:,jspin), 1)
181 182
    DEALLOCATE (c_b)

183
    ! Collect mcd%mcd
184 185 186 187 188 189 190
    IF (PRESENT(mcd)) THEN
       n = SIZE(mcd%mcd,1)*SIZE(mcd%mcd,2)*SIZE(mcd%mcd,3)*SIZE(mcd%mcd,4)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(mcd%mcd(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, mcd%mcd(:,:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
191

192
    ! Collect slab - qintsl and qmtsl
193 194 195 196 197 198
    IF (PRESENT(slab)) THEN
       n = SIZE(slab%qintsl,1)*SIZE(slab%qintsl,2)*SIZE(slab%qintsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qintsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qintsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
199

200 201 202 203 204 205
       n = SIZE(slab%qmtsl,1)*SIZE(slab%qmtsl,2)*SIZE(slab%qmtsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qmtsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qmtsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
206

207
    ! Collect orbcomp - comp and qmtp
208 209 210 211 212 213
    IF (PRESENT(orbcomp)) THEN
       n = SIZE(orbcomp%comp,1)*SIZE(orbcomp%comp,2)*SIZE(orbcomp%comp,3)*SIZE(orbcomp%comp,4)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(orbcomp%comp(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%comp(:,:,:,:,jspin), 1)
       DEALLOCATE (r_b)
214

215 216 217 218 219 220
       n = SIZE(orbcomp%qmtp,1)*SIZE(orbcomp%qmtp,2)*SIZE(orbcomp%qmtp,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(orbcomp%qmtp(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%qmtp(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
221

222 223
    ! -> Collect force
    IF (input%l_f) THEN
224
       n=3*atoms%ntype
225
       ALLOCATE(r_b(n))
226
       CALL MPI_REDUCE(results%force(1,1,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
227
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, results%force(1,1,jspin), 1)
228 229
       DEALLOCATE (r_b)
    ENDIF
230

231 232 233
    ! -> Optional the LO-coefficients: aclo,bclo,enerlo,cclo,acnmt,bcnmt,ccnmt
    IF (atoms%nlod.GE.1) THEN

234
       n=atoms%nlod*atoms%ntype 
235
       ALLOCATE (r_b(n))
236 237 238 239
       CALL MPI_ALLREDUCE(denCoeffs%aclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%aclo(:,:,jspin), 1)
       CALL MPI_ALLREDUCE(denCoeffs%bclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bclo(:,:,jspin), 1)
240 241 242 243
       CALL MPI_ALLREDUCE(regCharges%enerlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%enerlo(:,:,jspin), 1)
       CALL MPI_ALLREDUCE(regCharges%sqlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqlo(:,:,jspin), 1)
244 245
       DEALLOCATE (r_b)

246
       n = atoms%nlod * atoms%nlod * atoms%ntype
247
       ALLOCATE (r_b(n))
248 249
       CALL MPI_ALLREDUCE(denCoeffs%cclo(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%cclo(:,:,:,jspin), 1)
250 251
       DEALLOCATE (r_b)

252
       n = (atoms%lmaxd+1) * atoms%ntype * atoms%nlod * sphhar%nlhd
253
       ALLOCATE (r_b(n))
254 255 256 257
       CALL MPI_ALLREDUCE(denCoeffs%acnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%acnmt(0:,:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(denCoeffs%bcnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bcnmt(0:,:,:,:,jspin), 1)
258 259
       DEALLOCATE (r_b)

260
       n = atoms%ntype * sphhar%nlhd * atoms%nlod**2
261
       ALLOCATE (r_b(n))
262 263
       CALL MPI_ALLREDUCE(denCoeffs%ccnmt(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM, MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ccnmt(:,:,:,:,jspin), 1)
264 265 266
       DEALLOCATE (r_b)

    ENDIF
267

268 269 270
    ! ->  Now the SOC - stuff: orb, orblo and orblo
    IF (noco%l_soc) THEN
       ! orb
271
       n=(atoms%lmaxd+1)*(2*atoms%lmaxd+1)*atoms%ntype
272
       ALLOCATE (r_b(n))
273 274 275 276
       CALL MPI_ALLREDUCE(orb%uu(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, orb%uu(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%dd(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, orb%dd(:,:,:,jspin), 1)
277 278 279
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
280 281 282 283 284 285 286 287
       CALL MPI_ALLREDUCE(orb%uup(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uup(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%ddp(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddp(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%uum(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uum(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%ddm(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddm(:,:,:,jspin), 1)
288
       DEALLOCATE (c_b)
289

290
       n = atoms%nlod * (2*atoms%llod+1) * atoms%ntype
291
       ALLOCATE (r_b(n))
292 293 294 295
       CALL MPI_ALLREDUCE(orb%uulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, orb%uulo(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%dulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, orb%dulo(:,:,:,jspin), 1)
296 297 298
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
299 300 301 302 303 304 305 306
       CALL MPI_ALLREDUCE(orb%uulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulop(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%dulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulop(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%uulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulom(:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%dulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulom(:,:,:,jspin), 1)
307
       DEALLOCATE (c_b)
308

309
       n = atoms%nlod * atoms%nlod * (2*atoms%llod+1) * atoms%ntype
310
       ALLOCATE (r_b(n))
311 312
       CALL MPI_ALLREDUCE(orb%z(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_scopy(n, r_b, 1, orb%z(:,:,:,:,jspin), 1)
313 314 315
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
316 317 318 319
       CALL MPI_ALLREDUCE(orb%p(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%p(:,:,:,:,jspin), 1)
       CALL MPI_ALLREDUCE(orb%m(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
       CALL CPP_BLAS_ccopy(n, c_b, 1, orb%m(:,:,:,:,jspin), 1)
320 321 322 323 324 325 326
       DEALLOCATE (c_b)

    ENDIF

    ! -> Collect the noco staff: 
    IF ( noco%l_noco .AND. jspin.EQ.1 ) THEN

327
       n = stars%ng3
328
       ALLOCATE(c_b(n))
329
       CALL MPI_REDUCE(den%pw(:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
330
       IF (mpi%irank.EQ.0) THEN
331
          den%pw(:,3)=RESHAPE(c_b,(/n/))
332 333 334 335 336
       ENDIF
       DEALLOCATE (c_b)
       !
       IF (input%film) THEN

337 338
          !n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
          n=size(den%vacxy(:,:,:,3))
339
          ALLOCATE(c_b(n))
340
          CALL MPI_REDUCE(den%vacxy(:,:,:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
341
          IF (mpi%irank.EQ.0) THEN
342
             CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,3), 1)
343 344 345
          ENDIF
          DEALLOCATE (c_b)
          !
346 347
          !n = vacuum%nmzd*2*2
          n=SIZE(den%vacz(:,:,3:4))
348 349
          ALLOCATE(r_b(n))
          CALL MPI_REDUCE(den%vacz(:,:,3:4),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
350
          IF (mpi%irank.EQ.0) THEN
351
             den%vacz(:,:,3:4)=RESHAPE(r_b,SHAPE(den%vacz(:,:,3:4)))
352
          ENDIF
353
          DEALLOCATE (r_b)
354 355 356 357 358

       ENDIF ! input%film


       IF (noco%l_mperp) THEN
359

360
          ! -->     for (spin)-off diagonal part of muffin-tin
361
          n = (atoms%lmaxd+1) * atoms%ntype
362
          ALLOCATE(c_b(n))
363 364 365 366 367 368 369 370
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%uu21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uu21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%ud21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ud21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%du21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%du21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%dd21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dd21(:,:), 1)
371
          DEALLOCATE (c_b)
372

373
          ! -->     lo,u coeff's:
374
          n = atoms%nlod * atoms%ntype
375
          ALLOCATE(c_b(n))
376 377 378 379 380 381 382 383
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%uulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uulo21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%ulou21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulou21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%dulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dulo21(:,:), 1)
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%ulod21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulod21(:,:), 1)
384
          DEALLOCATE (c_b)
385

386
          ! -->     lo,lo' coeff's:
387
          n = atoms%nlod*atoms%nlod*atoms%ntype
388
          ALLOCATE(c_b(n))
389 390
          CALL MPI_ALLREDUCE(denCoeffsOffdiag%uloulop21,c_b,n,CPP_MPI_COMPLEX, MPI_SUM,MPI_COMM_WORLD,ierr)
          CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uloulop21, 1)
391 392
          DEALLOCATE (c_b)

393
          IF (denCoeffsOffdiag%l_fmpl) THEN
394

395
             !-->        Full magnetization plots: Collect uunmt21, etc.
396
             n = (atoms%lmaxd+1)**2 *sphhar%nlhd*atoms%ntype
397
             ALLOCATE(c_b(n))
398 399 400 401 402 403 404 405
             CALL MPI_ALLREDUCE(denCoeffsOffdiag%uunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,MPI_COMM_WORLD,ierr)
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uunmt21, 1)
             CALL MPI_ALLREDUCE(denCoeffsOffdiag%udnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,MPI_COMM_WORLD,ierr)
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%udnmt21, 1)
             CALL MPI_ALLREDUCE(denCoeffsOffdiag%dunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,MPI_COMM_WORLD,ierr)
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dunmt21, 1)
             CALL MPI_ALLREDUCE(denCoeffsOffdiag%ddnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,MPI_COMM_WORLD,ierr)
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ddnmt21, 1)
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
             DEALLOCATE (c_b)

          ENDIF ! fmpl
       ENDIF  ! mperp
    ENDIF   ! noco

    !+lda+U
    IF ( atoms%n_u.GT.0 ) THEN
       n = 49*atoms%n_u 
       ALLOCATE(c_b(n))
       CALL MPI_REDUCE(n_mmp,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) THEN
          CALL CPP_BLAS_ccopy(n, c_b, 1, n_mmp, 1)
       ENDIF
       DEALLOCATE (c_b)
    ENDIF
    !-lda+U

424 425
    CALL timestop("mpi_col_den")

426 427
  END SUBROUTINE mpi_col_den
END MODULE m_mpi_col_den