mpi_col_den.F90 20.6 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7 8 9 10 11
MODULE m_mpi_col_den
  !
  ! collect all data calculated in cdnval on different pe's on pe 0
  !
CONTAINS
12 13
  SUBROUTINE mpi_col_den(mpi,sphhar,atoms,oneD,stars,vacuum,input,noco,jspin,regCharges,dos,orbcomp,&
                         results,denCoeffs,orb,denCoeffsOffdiag,den,n_mmp,mcd,slab)
14

15 16
#include"cpp_double.h"
    USE m_types
17
    USE m_constants
18
    USE m_juDFT
19 20
    IMPLICIT NONE

21 22
    TYPE(t_results),INTENT(INOUT):: results
    TYPE(t_mpi),INTENT(IN)       :: mpi
23 24 25 26 27 28
    TYPE(t_oneD),INTENT(IN)      :: oneD
    TYPE(t_input),INTENT(IN)     :: input
    TYPE(t_vacuum),INTENT(IN)    :: vacuum
    TYPE(t_noco),INTENT(IN)      :: noco
    TYPE(t_stars),INTENT(IN)     :: stars
    TYPE(t_sphhar),INTENT(IN)    :: sphhar
29
    TYPE(t_atoms),INTENT(IN)     :: atoms
30
    TYPE(t_potden),INTENT(INOUT) :: den
31 32 33
    INCLUDE 'mpif.h'
    ! ..
    ! ..  Scalar Arguments ..
34
    INTEGER, INTENT (IN) :: jspin
35 36
    ! ..
    ! ..  Array Arguments ..
37
    COMPLEX,INTENT(INOUT) :: n_mmp(-lmaxU_const:lmaxU_const,-lmaxU_const:lmaxU_const,atoms%n_u)
38 39 40
    TYPE (t_orb),              INTENT(INOUT) :: orb
    TYPE (t_denCoeffs),        INTENT(INOUT) :: denCoeffs
    TYPE (t_denCoeffsOffdiag), INTENT(INOUT) :: denCoeffsOffdiag
41
    TYPE (t_regionCharges),    INTENT(INOUT) :: regCharges
42
    TYPE (t_dos),              INTENT(INOUT) :: dos
43
    TYPE (t_orbcomp),          INTENT(INOUT) :: orbcomp
44 45
    TYPE (t_mcd),  OPTIONAL,   INTENT(INOUT) :: mcd
    TYPE (t_slab), OPTIONAL,   INTENT(INOUT) :: slab
46 47
    ! ..
    ! ..  Local Scalars ..
48
    INTEGER :: n, i
49 50 51 52 53
    ! ..
    ! ..  Local Arrays ..
    INTEGER :: ierr(3)
    COMPLEX, ALLOCATABLE :: c_b(:)
    REAL,    ALLOCATABLE :: r_b(:)
54
    INTEGER, ALLOCATABLE :: i_b(:)
55 56 57 58
    ! ..
    ! ..  External Subroutines
    EXTERNAL CPP_BLAS_scopy,CPP_BLAS_ccopy,MPI_REDUCE

59
    CALL timestart("mpi_col_den")
60 61

    ! -> Collect den%pw(:,jspin)
62
    n = stars%ng3
63
    ALLOCATE(c_b(n))
64
    CALL MPI_REDUCE(den%pw(:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
65
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%pw(:,jspin), 1)
66
    DEALLOCATE (c_b)
67 68

    ! -> Collect den%vacxy(:,:,:,jspin)
69 70 71
    IF (input%film) THEN
       n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
       ALLOCATE(c_b(n))
72
       CALL MPI_REDUCE(den%vacxy(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
73
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,jspin), 1)
74
       DEALLOCATE (c_b)
75 76

       ! -> Collect den%vacz(:,:,jspin)
77 78
       n = vacuum%nmzd*2
       ALLOCATE(r_b(n))
79
       CALL MPI_REDUCE(den%vacz(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
80
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, den%vacz(:,:,jspin), 1)
81 82
       DEALLOCATE (r_b)
    ENDIF
83

84
    ! -> Collect uu(),ud() and dd()
85
    n = (atoms%lmaxd+1)*atoms%ntype
86
    ALLOCATE(r_b(n))
87
    CALL MPI_REDUCE(denCoeffs%uu(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
88
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uu(0:,:,jspin), 1)
89
    CALL MPI_REDUCE(denCoeffs%du(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
90
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%du(0:,:,jspin), 1)
91
    CALL MPI_REDUCE(denCoeffs%dd(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
92
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dd(0:,:,jspin), 1)
93
    DEALLOCATE (r_b)
94

95
    !--> Collect uunmt,udnmt,dunmt,ddnmt
96
    n = (((atoms%lmaxd*(atoms%lmaxd+3))/2)+1)*sphhar%nlhd*atoms%ntype
97
    ALLOCATE(r_b(n))
98
    CALL MPI_REDUCE(denCoeffs%uunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
99
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uunmt(0:,:,:,jspin), 1)
100
    CALL MPI_REDUCE(denCoeffs%udnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
101
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%udnmt(0:,:,:,jspin), 1)
102
    CALL MPI_REDUCE(denCoeffs%dunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
103
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dunmt(0:,:,:,jspin), 1)
104
    CALL MPI_REDUCE(denCoeffs%ddnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
105
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ddnmt(0:,:,:,jspin), 1)
106
    DEALLOCATE (r_b)
107

108
    !--> ener & sqal
109
    n=4*atoms%ntype
110
    ALLOCATE(r_b(n))
111
    CALL MPI_REDUCE(regCharges%ener(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
112
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%ener(:,:,jspin), 1)
113
    CALL MPI_REDUCE(regCharges%sqal(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
114
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqal(:,:,jspin), 1)
115
    DEALLOCATE (r_b)
116

117 118 119 120
    !--> svac & pvac
    IF ( input%film ) THEN
       n=2
       ALLOCATE(r_b(n))
121
       CALL MPI_REDUCE(regCharges%svac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
122
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%svac(:,jspin), 1)
123
       CALL MPI_REDUCE(regCharges%pvac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
124
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%pvac(:,jspin), 1)
125 126
       DEALLOCATE (r_b)
    ENDIF
127 128 129 130 131

    !collect DOS stuff
    n = SIZE(dos%jsym,1)*SIZE(dos%jsym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%jsym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
132 133 134 135 136
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%jsym,2)
          dos%jsym(:,i,jspin) = i_b((i-1)*SIZE(dos%jsym,1)+1:i*SIZE(dos%jsym,1))
       END DO
    END IF
137 138 139 140 141
    DEALLOCATE (i_b)

    n = SIZE(dos%ksym,1)*SIZE(dos%ksym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%ksym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
142 143 144 145 146
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%ksym,2)
          dos%ksym(:,i,jspin) = i_b((i-1)*SIZE(dos%ksym,1)+1:i*SIZE(dos%ksym,1))
       END DO
    END IF
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
    DEALLOCATE (i_b)

    n = SIZE(dos%qis,1)*SIZE(dos%qis,2)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qis(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qis(:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qal,1)*SIZE(dos%qal,2)*SIZE(dos%qal,3)*SIZE(dos%qal,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qal(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qal(0:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvac,1)*SIZE(dos%qvac,2)*SIZE(dos%qvac,3)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvac(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvac(:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvlay,1)*SIZE(dos%qvlay,2)*SIZE(dos%qvlay,3)*SIZE(dos%qvlay,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvlay(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvlay(:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qstars,1)*SIZE(dos%qstars,2)*SIZE(dos%qstars,3)*SIZE(dos%qstars,4)*SIZE(dos%qstars,5)
    ALLOCATE(c_b(n))
    CALL MPI_REDUCE(dos%qstars(:,:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
176
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, dos%qstars(:,:,:,:,:,jspin), 1)
177 178
    DEALLOCATE (c_b)

179
    ! Collect mcd%mcd
180 181 182 183 184 185 186
    IF (PRESENT(mcd)) THEN
       n = SIZE(mcd%mcd,1)*SIZE(mcd%mcd,2)*SIZE(mcd%mcd,3)*SIZE(mcd%mcd,4)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(mcd%mcd(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, mcd%mcd(:,:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
187

188
    ! Collect slab - qintsl and qmtsl
189 190 191 192 193 194
    IF (PRESENT(slab)) THEN
       n = SIZE(slab%qintsl,1)*SIZE(slab%qintsl,2)*SIZE(slab%qintsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qintsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qintsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
195

196 197 198 199 200 201
       n = SIZE(slab%qmtsl,1)*SIZE(slab%qmtsl,2)*SIZE(slab%qmtsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qmtsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qmtsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
202

203 204 205 206 207 208 209 210 211 212 213 214 215 216
    ! Collect orbcomp - comp and qmtp

    n = SIZE(orbcomp%comp,1)*SIZE(orbcomp%comp,2)*SIZE(orbcomp%comp,3)*SIZE(orbcomp%comp,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(orbcomp%comp(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%comp(:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(orbcomp%qmtp,1)*SIZE(orbcomp%qmtp,2)*SIZE(orbcomp%qmtp,3)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(orbcomp%qmtp(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%qmtp(:,:,:,jspin), 1)
    DEALLOCATE (r_b)

217 218
    ! -> Collect force
    IF (input%l_f) THEN
219
       n=3*atoms%ntype
220
       ALLOCATE(r_b(n))
221
       CALL MPI_REDUCE(results%force(1,1,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
222
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, results%force(1,1,jspin), 1)
223 224
       DEALLOCATE (r_b)
    ENDIF
225

226 227 228
    ! -> Optional the LO-coefficients: aclo,bclo,enerlo,cclo,acnmt,bcnmt,ccnmt
    IF (atoms%nlod.GE.1) THEN

229
       n=atoms%nlod*atoms%ntype 
230
       ALLOCATE (r_b(n))
231
       CALL MPI_REDUCE(denCoeffs%aclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
232
       IF (mpi%irank.EQ.0) THEN
233
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%aclo(:,:,jspin), 1)
234
       ENDIF
235
       CALL MPI_REDUCE(denCoeffs%bclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
236
       IF (mpi%irank.EQ.0) THEN
237
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bclo(:,:,jspin), 1)
238
       ENDIF
239
       CALL MPI_REDUCE(regCharges%enerlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
240
       IF (mpi%irank.EQ.0) THEN
241
          CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%enerlo(:,:,jspin), 1)
242
       ENDIF
243
       CALL MPI_REDUCE(regCharges%sqlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
244
       IF (mpi%irank.EQ.0) THEN
245
          CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqlo(:,:,jspin), 1)
246 247 248
       ENDIF
       DEALLOCATE (r_b)

249
       n = atoms%nlod * atoms%nlod * atoms%ntype
250
       ALLOCATE (r_b(n))
251
       CALL MPI_REDUCE(denCoeffs%cclo(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
252
       IF (mpi%irank.EQ.0) THEN
253
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%cclo(:,:,:,jspin), 1)
254 255 256
       ENDIF
       DEALLOCATE (r_b)

257
       n = (atoms%lmaxd+1) * atoms%ntype * atoms%nlod * sphhar%nlhd
258
       ALLOCATE (r_b(n))
259
       CALL MPI_REDUCE(denCoeffs%acnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
260
       IF (mpi%irank.EQ.0) THEN
261
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%acnmt(0:,:,:,:,jspin), 1)
262
       ENDIF
263
       CALL MPI_REDUCE(denCoeffs%bcnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
264
       IF (mpi%irank.EQ.0) THEN
265
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bcnmt(0:,:,:,:,jspin), 1)
266 267 268
       ENDIF
       DEALLOCATE (r_b)

269
       n = atoms%ntype * sphhar%nlhd * atoms%nlod**2
270
       ALLOCATE (r_b(n))
271
       CALL MPI_REDUCE(denCoeffs%ccnmt(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
272
       IF (mpi%irank.EQ.0) THEN
273
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ccnmt(:,:,:,:,jspin), 1)
274 275 276 277
       ENDIF
       DEALLOCATE (r_b)

    ENDIF
278

279 280 281
    ! ->  Now the SOC - stuff: orb, orblo and orblo
    IF (noco%l_soc) THEN
       ! orb
282
       n=(atoms%lmaxd+1)*(2*atoms%lmaxd+1)*atoms%ntype
283
       ALLOCATE (r_b(n))
284
       CALL MPI_REDUCE(orb%uu(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
285
       IF (mpi%irank.EQ.0) THEN
286
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%uu(:,:,:,jspin), 1)
287
       ENDIF
288
       CALL MPI_REDUCE(orb%dd(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
289
       IF (mpi%irank.EQ.0) THEN
290
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%dd(:,:,:,jspin), 1)
291 292 293 294
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
295
       CALL MPI_REDUCE(orb%uup(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
296
       IF (mpi%irank.EQ.0) THEN
297
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uup(:,:,:,jspin), 1)
298
       ENDIF
299
       CALL MPI_REDUCE(orb%ddp(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
300
       IF (mpi%irank.EQ.0) THEN
301
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddp(:,:,:,jspin), 1)
302
       ENDIF
303
       CALL MPI_REDUCE(orb%uum(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
304
       IF (mpi%irank.EQ.0) THEN
305
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uum(:,:,:,jspin), 1)
306
       ENDIF
307
       CALL MPI_REDUCE(orb%ddm(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
308
       IF (mpi%irank.EQ.0) THEN
309
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddm(:,:,:,jspin), 1)
310 311
       ENDIF
       DEALLOCATE (c_b)
312

313
       n = atoms%nlod * (2*atoms%llod+1) * atoms%ntype
314
       ALLOCATE (r_b(n))
315
       CALL MPI_REDUCE(orb%uulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
316
       IF (mpi%irank.EQ.0) THEN
317
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%uulo(:,:,:,jspin), 1)
318
       ENDIF
319
       CALL MPI_REDUCE(orb%dulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
320
       IF (mpi%irank.EQ.0) THEN
321
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%dulo(:,:,:,jspin), 1)
322 323 324 325
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
326
       CALL MPI_REDUCE(orb%uulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
327
       IF (mpi%irank.EQ.0) THEN
328
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulop(:,:,:,jspin), 1)
329
       ENDIF
330
       CALL MPI_REDUCE(orb%dulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
331
       IF (mpi%irank.EQ.0) THEN
332
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulop(:,:,:,jspin), 1)
333
       ENDIF
334
       CALL MPI_REDUCE(orb%uulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
335
       IF (mpi%irank.EQ.0) THEN
336
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulom(:,:,:,jspin), 1)
337
       ENDIF
338
       CALL MPI_REDUCE(orb%dulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
339
       IF (mpi%irank.EQ.0) THEN
340
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulom(:,:,:,jspin), 1)
341 342
       ENDIF
       DEALLOCATE (c_b)
343

344
       n = atoms%nlod * atoms%nlod * (2*atoms%llod+1) * atoms%ntype
345
       ALLOCATE (r_b(n))
346
       CALL MPI_REDUCE(orb%z(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
347
       IF (mpi%irank.EQ.0) THEN
348
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%z(:,:,:,:,jspin), 1)
349 350 351 352
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
353
       CALL MPI_REDUCE(orb%p(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
354
       IF (mpi%irank.EQ.0) THEN
355
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%p(:,:,:,:,jspin), 1)
356
       ENDIF
357
       CALL MPI_REDUCE(orb%m(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
358
       IF (mpi%irank.EQ.0) THEN
359
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%m(:,:,:,:,jspin), 1)
360 361 362 363 364 365 366 367
       ENDIF
       DEALLOCATE (c_b)

    ENDIF

    ! -> Collect the noco staff: 
    IF ( noco%l_noco .AND. jspin.EQ.1 ) THEN

368
       n = stars%ng3
369
       ALLOCATE(c_b(n))
370
       CALL MPI_REDUCE(den%pw(:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
371
       IF (mpi%irank.EQ.0) THEN
372
          CALL CPP_BLAS_ccopy(n, c_b, 1, den%pw(:,3), 1)
373 374 375 376 377 378 379
       ENDIF
       DEALLOCATE (c_b)
       !
       IF (input%film) THEN

          n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
          ALLOCATE(c_b(n))
380
          CALL MPI_REDUCE(den%vacxy(:,:,:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
381
          IF (mpi%irank.EQ.0) THEN
382
             CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,3), 1)
383 384 385
          ENDIF
          DEALLOCATE (c_b)
          !
386 387 388
          n = vacuum%nmzd*2*2
          ALLOCATE(r_b(n))
          CALL MPI_REDUCE(den%vacz(:,:,3:4),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
389
          IF (mpi%irank.EQ.0) THEN
390
             CALL CPP_BLAS_ccopy(n, r_b, 1, den%vacz(:,:,3:4), 1)
391
          ENDIF
392
          DEALLOCATE (r_b)
393 394 395 396 397

       ENDIF ! input%film


       IF (noco%l_mperp) THEN
398

399
          ! -->     for (spin)-off diagonal part of muffin-tin
400
          n = (atoms%lmaxd+1) * atoms%ntype
401
          ALLOCATE(c_b(n))
402
          CALL MPI_REDUCE(denCoeffsOffdiag%uu21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
403
          IF (mpi%irank.EQ.0) THEN
404
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uu21(:,:), 1)
405
          ENDIF
406
          CALL MPI_REDUCE(denCoeffsOffdiag%ud21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
407
          IF (mpi%irank.EQ.0) THEN
408
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ud21(:,:), 1)
409
          ENDIF
410
          CALL MPI_REDUCE(denCoeffsOffdiag%du21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
411
          IF (mpi%irank.EQ.0) THEN
412
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%du21(:,:), 1)
413
          ENDIF
414
          CALL MPI_REDUCE(denCoeffsOffdiag%dd21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
415
          IF (mpi%irank.EQ.0) THEN
416
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dd21(:,:), 1)
417 418
          ENDIF
          DEALLOCATE (c_b)
419

420
          ! -->     lo,u coeff's:
421
          n = atoms%nlod * atoms%ntype
422
          ALLOCATE(c_b(n))
423
          CALL MPI_REDUCE(denCoeffsOffdiag%uulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
424
          IF (mpi%irank.EQ.0) THEN
425
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uulo21(:,:), 1)
426
          ENDIF
427
          CALL MPI_REDUCE(denCoeffsOffdiag%ulou21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
428
          IF (mpi%irank.EQ.0) THEN
429
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulou21(:,:), 1)
430
          ENDIF
431
          CALL MPI_REDUCE(denCoeffsOffdiag%dulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
432
          IF (mpi%irank.EQ.0) THEN
433
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dulo21(:,:), 1)
434
          ENDIF
435
          CALL MPI_REDUCE(denCoeffsOffdiag%ulod21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
436
          IF (mpi%irank.EQ.0) THEN
437
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulod21(:,:), 1)
438 439
          ENDIF
          DEALLOCATE (c_b)
440

441
          ! -->     lo,lo' coeff's:
442
          n = atoms%nlod*atoms%nlod*atoms%ntype
443
          ALLOCATE(c_b(n))
444
          CALL MPI_REDUCE(denCoeffsOffdiag%uloulop21,c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
445
          IF (mpi%irank.EQ.0) THEN
446
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uloulop21, 1)
447 448 449
          ENDIF
          DEALLOCATE (c_b)

450
          IF (denCoeffsOffdiag%l_fmpl) THEN
451

452
             !-->        Full magnetization plots: Collect uunmt21, etc.
453
             n = (atoms%lmaxd+1)**2 *sphhar%nlhd*atoms%ntype
454
             ALLOCATE(c_b(n))
455
             CALL MPI_REDUCE(denCoeffsOffdiag%uunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
456
             IF (mpi%irank.EQ.0) THEN
457
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uunmt21, 1)
458
             ENDIF
459
             CALL MPI_REDUCE(denCoeffsOffdiag%udnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
460
             IF (mpi%irank.EQ.0) THEN
461
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%udnmt21, 1)
462
             ENDIF
463
             CALL MPI_REDUCE(denCoeffsOffdiag%dunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
464
             IF (mpi%irank.EQ.0) THEN
465
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dunmt21, 1)
466
             ENDIF
467
             CALL MPI_REDUCE(denCoeffsOffdiag%ddnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
468
             IF (mpi%irank.EQ.0) THEN
469
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ddnmt21, 1)
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
             ENDIF
             DEALLOCATE (c_b)

          ENDIF ! fmpl
       ENDIF  ! mperp
    ENDIF   ! noco

    !+lda+U
    IF ( atoms%n_u.GT.0 ) THEN
       n = 49*atoms%n_u 
       ALLOCATE(c_b(n))
       CALL MPI_REDUCE(n_mmp,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) THEN
          CALL CPP_BLAS_ccopy(n, c_b, 1, n_mmp, 1)
       ENDIF
       DEALLOCATE (c_b)
    ENDIF
    !-lda+U

489 490
    CALL timestop("mpi_col_den")

491 492
  END SUBROUTINE mpi_col_den
END MODULE m_mpi_col_den