mpi_col_den.F90 20.7 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7 8 9 10 11
MODULE m_mpi_col_den
  !
  ! collect all data calculated in cdnval on different pe's on pe 0
  !
CONTAINS
12 13
  SUBROUTINE mpi_col_den(mpi,sphhar,atoms,oneD,stars,vacuum,input,noco,jspin,regCharges,dos,&
                         results,denCoeffs,orb,denCoeffsOffdiag,den,n_mmp,mcd,slab,orbcomp)
14

15 16
#include"cpp_double.h"
    USE m_types
17
    USE m_constants
18
    USE m_juDFT
19 20
    IMPLICIT NONE

21 22
    TYPE(t_results),INTENT(INOUT):: results
    TYPE(t_mpi),INTENT(IN)       :: mpi
23 24 25 26 27 28
    TYPE(t_oneD),INTENT(IN)      :: oneD
    TYPE(t_input),INTENT(IN)     :: input
    TYPE(t_vacuum),INTENT(IN)    :: vacuum
    TYPE(t_noco),INTENT(IN)      :: noco
    TYPE(t_stars),INTENT(IN)     :: stars
    TYPE(t_sphhar),INTENT(IN)    :: sphhar
29
    TYPE(t_atoms),INTENT(IN)     :: atoms
30
    TYPE(t_potden),INTENT(INOUT) :: den
31 32 33
    INCLUDE 'mpif.h'
    ! ..
    ! ..  Scalar Arguments ..
34
    INTEGER, INTENT (IN) :: jspin
35 36
    ! ..
    ! ..  Array Arguments ..
37
    COMPLEX,INTENT(INOUT) :: n_mmp(-lmaxU_const:lmaxU_const,-lmaxU_const:lmaxU_const,atoms%n_u)
38 39 40 41 42 43 44 45
    TYPE (t_orb),               INTENT(INOUT) :: orb
    TYPE (t_denCoeffs),         INTENT(INOUT) :: denCoeffs
    TYPE (t_denCoeffsOffdiag),  INTENT(INOUT) :: denCoeffsOffdiag
    TYPE (t_regionCharges),     INTENT(INOUT) :: regCharges
    TYPE (t_dos),               INTENT(INOUT) :: dos
    TYPE (t_mcd),     OPTIONAL, INTENT(INOUT) :: mcd
    TYPE (t_slab),    OPTIONAL, INTENT(INOUT) :: slab
    TYPE (t_orbcomp), OPTIONAL, INTENT(INOUT) :: orbcomp
46 47
    ! ..
    ! ..  Local Scalars ..
48
    INTEGER :: n, i
49 50 51 52 53
    ! ..
    ! ..  Local Arrays ..
    INTEGER :: ierr(3)
    COMPLEX, ALLOCATABLE :: c_b(:)
    REAL,    ALLOCATABLE :: r_b(:)
54
    INTEGER, ALLOCATABLE :: i_b(:)
55 56 57 58
    ! ..
    ! ..  External Subroutines
    EXTERNAL CPP_BLAS_scopy,CPP_BLAS_ccopy,MPI_REDUCE

59
    CALL timestart("mpi_col_den")
60 61

    ! -> Collect den%pw(:,jspin)
62
    n = stars%ng3
63
    ALLOCATE(c_b(n))
64
    CALL MPI_REDUCE(den%pw(:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
65
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%pw(:,jspin), 1)
66
    DEALLOCATE (c_b)
67 68

    ! -> Collect den%vacxy(:,:,:,jspin)
69 70 71
    IF (input%film) THEN
       n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
       ALLOCATE(c_b(n))
72
       CALL MPI_REDUCE(den%vacxy(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
73
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,jspin), 1)
74
       DEALLOCATE (c_b)
75 76

       ! -> Collect den%vacz(:,:,jspin)
77 78
       n = vacuum%nmzd*2
       ALLOCATE(r_b(n))
79
       CALL MPI_REDUCE(den%vacz(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
80
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, den%vacz(:,:,jspin), 1)
81 82
       DEALLOCATE (r_b)
    ENDIF
83

84
    ! -> Collect uu(),ud() and dd()
85
    n = (atoms%lmaxd+1)*atoms%ntype
86
    ALLOCATE(r_b(n))
87
    CALL MPI_REDUCE(denCoeffs%uu(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
88
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uu(0:,:,jspin), 1)
89
    CALL MPI_REDUCE(denCoeffs%du(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
90
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%du(0:,:,jspin), 1)
91
    CALL MPI_REDUCE(denCoeffs%dd(0:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
92
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dd(0:,:,jspin), 1)
93
    DEALLOCATE (r_b)
94

95
    !--> Collect uunmt,udnmt,dunmt,ddnmt
96
    n = (((atoms%lmaxd*(atoms%lmaxd+3))/2)+1)*sphhar%nlhd*atoms%ntype
97
    ALLOCATE(r_b(n))
98
    CALL MPI_REDUCE(denCoeffs%uunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
99
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%uunmt(0:,:,:,jspin), 1)
100
    CALL MPI_REDUCE(denCoeffs%udnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
101
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%udnmt(0:,:,:,jspin), 1)
102
    CALL MPI_REDUCE(denCoeffs%dunmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
103
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%dunmt(0:,:,:,jspin), 1)
104
    CALL MPI_REDUCE(denCoeffs%ddnmt(0:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
105
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ddnmt(0:,:,:,jspin), 1)
106
    DEALLOCATE (r_b)
107

108
    !--> ener & sqal
109
    n=4*atoms%ntype
110
    ALLOCATE(r_b(n))
111
    CALL MPI_REDUCE(regCharges%ener(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
112
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%ener(:,:,jspin), 1)
113
    CALL MPI_REDUCE(regCharges%sqal(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
114
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqal(:,:,jspin), 1)
115
    DEALLOCATE (r_b)
116

117 118 119 120
    !--> svac & pvac
    IF ( input%film ) THEN
       n=2
       ALLOCATE(r_b(n))
121
       CALL MPI_REDUCE(regCharges%svac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
122
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%svac(:,jspin), 1)
123
       CALL MPI_REDUCE(regCharges%pvac(:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
124
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%pvac(:,jspin), 1)
125 126
       DEALLOCATE (r_b)
    ENDIF
127 128 129 130 131

    !collect DOS stuff
    n = SIZE(dos%jsym,1)*SIZE(dos%jsym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%jsym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
132 133 134 135 136
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%jsym,2)
          dos%jsym(:,i,jspin) = i_b((i-1)*SIZE(dos%jsym,1)+1:i*SIZE(dos%jsym,1))
       END DO
    END IF
137 138 139 140 141
    DEALLOCATE (i_b)

    n = SIZE(dos%ksym,1)*SIZE(dos%ksym,2)
    ALLOCATE(i_b(n))
    CALL MPI_REDUCE(dos%ksym(:,:,jspin),i_b,n,MPI_INTEGER,MPI_SUM,0, MPI_COMM_WORLD,ierr)
142 143 144 145 146
    IF (mpi%irank.EQ.0) THEN
       DO i = 1, SIZE(dos%ksym,2)
          dos%ksym(:,i,jspin) = i_b((i-1)*SIZE(dos%ksym,1)+1:i*SIZE(dos%ksym,1))
       END DO
    END IF
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
    DEALLOCATE (i_b)

    n = SIZE(dos%qis,1)*SIZE(dos%qis,2)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qis(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qis(:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qal,1)*SIZE(dos%qal,2)*SIZE(dos%qal,3)*SIZE(dos%qal,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qal(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qal(0:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvac,1)*SIZE(dos%qvac,2)*SIZE(dos%qvac,3)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvac(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvac(:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qvlay,1)*SIZE(dos%qvlay,2)*SIZE(dos%qvlay,3)*SIZE(dos%qvlay,4)
    ALLOCATE(r_b(n))
    CALL MPI_REDUCE(dos%qvlay(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, dos%qvlay(:,:,:,:,jspin), 1)
    DEALLOCATE (r_b)

    n = SIZE(dos%qstars,1)*SIZE(dos%qstars,2)*SIZE(dos%qstars,3)*SIZE(dos%qstars,4)*SIZE(dos%qstars,5)
    ALLOCATE(c_b(n))
    CALL MPI_REDUCE(dos%qstars(:,:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
176
    IF (mpi%irank.EQ.0) CALL CPP_BLAS_ccopy(n, c_b, 1, dos%qstars(:,:,:,:,:,jspin), 1)
177 178
    DEALLOCATE (c_b)

179
    ! Collect mcd%mcd
180 181 182 183 184 185 186
    IF (PRESENT(mcd)) THEN
       n = SIZE(mcd%mcd,1)*SIZE(mcd%mcd,2)*SIZE(mcd%mcd,3)*SIZE(mcd%mcd,4)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(mcd%mcd(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, mcd%mcd(:,:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
187

188
    ! Collect slab - qintsl and qmtsl
189 190 191 192 193 194
    IF (PRESENT(slab)) THEN
       n = SIZE(slab%qintsl,1)*SIZE(slab%qintsl,2)*SIZE(slab%qintsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qintsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qintsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
195

196 197 198 199 200 201
       n = SIZE(slab%qmtsl,1)*SIZE(slab%qmtsl,2)*SIZE(slab%qmtsl,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(slab%qmtsl(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, slab%qmtsl(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
202

203
    ! Collect orbcomp - comp and qmtp
204 205 206 207 208 209
    IF (PRESENT(orbcomp)) THEN
       n = SIZE(orbcomp%comp,1)*SIZE(orbcomp%comp,2)*SIZE(orbcomp%comp,3)*SIZE(orbcomp%comp,4)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(orbcomp%comp(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%comp(:,:,:,:,jspin), 1)
       DEALLOCATE (r_b)
210

211 212 213 214 215 216
       n = SIZE(orbcomp%qmtp,1)*SIZE(orbcomp%qmtp,2)*SIZE(orbcomp%qmtp,3)
       ALLOCATE(r_b(n))
       CALL MPI_REDUCE(orbcomp%qmtp(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, orbcomp%qmtp(:,:,:,jspin), 1)
       DEALLOCATE (r_b)
    END IF
217

218 219
    ! -> Collect force
    IF (input%l_f) THEN
220
       n=3*atoms%ntype
221
       ALLOCATE(r_b(n))
222
       CALL MPI_REDUCE(results%force(1,1,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
223
       IF (mpi%irank.EQ.0) CALL CPP_BLAS_scopy(n, r_b, 1, results%force(1,1,jspin), 1)
224 225
       DEALLOCATE (r_b)
    ENDIF
226

227 228 229
    ! -> Optional the LO-coefficients: aclo,bclo,enerlo,cclo,acnmt,bcnmt,ccnmt
    IF (atoms%nlod.GE.1) THEN

230
       n=atoms%nlod*atoms%ntype 
231
       ALLOCATE (r_b(n))
232
       CALL MPI_REDUCE(denCoeffs%aclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
233
       IF (mpi%irank.EQ.0) THEN
234
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%aclo(:,:,jspin), 1)
235
       ENDIF
236
       CALL MPI_REDUCE(denCoeffs%bclo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
237
       IF (mpi%irank.EQ.0) THEN
238
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bclo(:,:,jspin), 1)
239
       ENDIF
240
       CALL MPI_REDUCE(regCharges%enerlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
241
       IF (mpi%irank.EQ.0) THEN
242
          CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%enerlo(:,:,jspin), 1)
243
       ENDIF
244
       CALL MPI_REDUCE(regCharges%sqlo(:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
245
       IF (mpi%irank.EQ.0) THEN
246
          CALL CPP_BLAS_scopy(n, r_b, 1, regCharges%sqlo(:,:,jspin), 1)
247 248 249
       ENDIF
       DEALLOCATE (r_b)

250
       n = atoms%nlod * atoms%nlod * atoms%ntype
251
       ALLOCATE (r_b(n))
252
       CALL MPI_REDUCE(denCoeffs%cclo(:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
253
       IF (mpi%irank.EQ.0) THEN
254
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%cclo(:,:,:,jspin), 1)
255 256 257
       ENDIF
       DEALLOCATE (r_b)

258
       n = (atoms%lmaxd+1) * atoms%ntype * atoms%nlod * sphhar%nlhd
259
       ALLOCATE (r_b(n))
260
       CALL MPI_REDUCE(denCoeffs%acnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
261
       IF (mpi%irank.EQ.0) THEN
262
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%acnmt(0:,:,:,:,jspin), 1)
263
       ENDIF
264
       CALL MPI_REDUCE(denCoeffs%bcnmt(0:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
265
       IF (mpi%irank.EQ.0) THEN
266
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%bcnmt(0:,:,:,:,jspin), 1)
267 268 269
       ENDIF
       DEALLOCATE (r_b)

270
       n = atoms%ntype * sphhar%nlhd * atoms%nlod**2
271
       ALLOCATE (r_b(n))
272
       CALL MPI_REDUCE(denCoeffs%ccnmt(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
273
       IF (mpi%irank.EQ.0) THEN
274
          CALL CPP_BLAS_scopy(n, r_b, 1, denCoeffs%ccnmt(:,:,:,:,jspin), 1)
275 276 277 278
       ENDIF
       DEALLOCATE (r_b)

    ENDIF
279

280 281 282
    ! ->  Now the SOC - stuff: orb, orblo and orblo
    IF (noco%l_soc) THEN
       ! orb
283
       n=(atoms%lmaxd+1)*(2*atoms%lmaxd+1)*atoms%ntype
284
       ALLOCATE (r_b(n))
285
       CALL MPI_REDUCE(orb%uu(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
286
       IF (mpi%irank.EQ.0) THEN
287
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%uu(:,:,:,jspin), 1)
288
       ENDIF
289
       CALL MPI_REDUCE(orb%dd(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
290
       IF (mpi%irank.EQ.0) THEN
291
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%dd(:,:,:,jspin), 1)
292 293 294 295
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
296
       CALL MPI_REDUCE(orb%uup(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
297
       IF (mpi%irank.EQ.0) THEN
298
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uup(:,:,:,jspin), 1)
299
       ENDIF
300
       CALL MPI_REDUCE(orb%ddp(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
301
       IF (mpi%irank.EQ.0) THEN
302
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddp(:,:,:,jspin), 1)
303
       ENDIF
304
       CALL MPI_REDUCE(orb%uum(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
305
       IF (mpi%irank.EQ.0) THEN
306
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uum(:,:,:,jspin), 1)
307
       ENDIF
308
       CALL MPI_REDUCE(orb%ddm(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
309
       IF (mpi%irank.EQ.0) THEN
310
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%ddm(:,:,:,jspin), 1)
311 312
       ENDIF
       DEALLOCATE (c_b)
313

314
       n = atoms%nlod * (2*atoms%llod+1) * atoms%ntype
315
       ALLOCATE (r_b(n))
316
       CALL MPI_REDUCE(orb%uulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
317
       IF (mpi%irank.EQ.0) THEN
318
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%uulo(:,:,:,jspin), 1)
319
       ENDIF
320
       CALL MPI_REDUCE(orb%dulo(:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
321
       IF (mpi%irank.EQ.0) THEN
322
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%dulo(:,:,:,jspin), 1)
323 324 325 326
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
327
       CALL MPI_REDUCE(orb%uulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
328
       IF (mpi%irank.EQ.0) THEN
329
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulop(:,:,:,jspin), 1)
330
       ENDIF
331
       CALL MPI_REDUCE(orb%dulop(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
332
       IF (mpi%irank.EQ.0) THEN
333
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulop(:,:,:,jspin), 1)
334
       ENDIF
335
       CALL MPI_REDUCE(orb%uulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
336
       IF (mpi%irank.EQ.0) THEN
337
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%uulom(:,:,:,jspin), 1)
338
       ENDIF
339
       CALL MPI_REDUCE(orb%dulom(:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
340
       IF (mpi%irank.EQ.0) THEN
341
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%dulom(:,:,:,jspin), 1)
342 343
       ENDIF
       DEALLOCATE (c_b)
344

345
       n = atoms%nlod * atoms%nlod * (2*atoms%llod+1) * atoms%ntype
346
       ALLOCATE (r_b(n))
347
       CALL MPI_REDUCE(orb%z(:,:,:,:,jspin),r_b,n,CPP_MPI_REAL, MPI_SUM,0,MPI_COMM_WORLD,ierr)
348
       IF (mpi%irank.EQ.0) THEN
349
          CALL CPP_BLAS_scopy(n, r_b, 1, orb%z(:,:,:,:,jspin), 1)
350 351 352 353
       ENDIF
       DEALLOCATE (r_b)

       ALLOCATE (c_b(n))
354
       CALL MPI_REDUCE(orb%p(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
355
       IF (mpi%irank.EQ.0) THEN
356
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%p(:,:,:,:,jspin), 1)
357
       ENDIF
358
       CALL MPI_REDUCE(orb%m(:,:,:,:,jspin),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
359
       IF (mpi%irank.EQ.0) THEN
360
          CALL CPP_BLAS_ccopy(n, c_b, 1, orb%m(:,:,:,:,jspin), 1)
361 362 363 364 365 366 367 368
       ENDIF
       DEALLOCATE (c_b)

    ENDIF

    ! -> Collect the noco staff: 
    IF ( noco%l_noco .AND. jspin.EQ.1 ) THEN

369
       n = stars%ng3
370
       ALLOCATE(c_b(n))
371
       CALL MPI_REDUCE(den%pw(:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
372
       IF (mpi%irank.EQ.0) THEN
373
          CALL CPP_BLAS_ccopy(n, c_b, 1, den%pw(:,3), 1)
374 375 376 377 378 379 380
       ENDIF
       DEALLOCATE (c_b)
       !
       IF (input%film) THEN

          n = vacuum%nmzxyd*(oneD%odi%n2d-1)*2
          ALLOCATE(c_b(n))
381
          CALL MPI_REDUCE(den%vacxy(:,:,:,3),c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
382
          IF (mpi%irank.EQ.0) THEN
383
             CALL CPP_BLAS_ccopy(n, c_b, 1, den%vacxy(:,:,:,3), 1)
384 385 386
          ENDIF
          DEALLOCATE (c_b)
          !
387 388 389
          n = vacuum%nmzd*2*2
          ALLOCATE(r_b(n))
          CALL MPI_REDUCE(den%vacz(:,:,3:4),r_b,n,CPP_MPI_REAL,MPI_SUM,0, MPI_COMM_WORLD,ierr)
390
          IF (mpi%irank.EQ.0) THEN
391
             CALL CPP_BLAS_ccopy(n, r_b, 1, den%vacz(:,:,3:4), 1)
392
          ENDIF
393
          DEALLOCATE (r_b)
394 395 396 397 398

       ENDIF ! input%film


       IF (noco%l_mperp) THEN
399

400
          ! -->     for (spin)-off diagonal part of muffin-tin
401
          n = (atoms%lmaxd+1) * atoms%ntype
402
          ALLOCATE(c_b(n))
403
          CALL MPI_REDUCE(denCoeffsOffdiag%uu21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
404
          IF (mpi%irank.EQ.0) THEN
405
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uu21(:,:), 1)
406
          ENDIF
407
          CALL MPI_REDUCE(denCoeffsOffdiag%ud21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
408
          IF (mpi%irank.EQ.0) THEN
409
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ud21(:,:), 1)
410
          ENDIF
411
          CALL MPI_REDUCE(denCoeffsOffdiag%du21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
412
          IF (mpi%irank.EQ.0) THEN
413
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%du21(:,:), 1)
414
          ENDIF
415
          CALL MPI_REDUCE(denCoeffsOffdiag%dd21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
416
          IF (mpi%irank.EQ.0) THEN
417
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dd21(:,:), 1)
418 419
          ENDIF
          DEALLOCATE (c_b)
420

421
          ! -->     lo,u coeff's:
422
          n = atoms%nlod * atoms%ntype
423
          ALLOCATE(c_b(n))
424
          CALL MPI_REDUCE(denCoeffsOffdiag%uulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
425
          IF (mpi%irank.EQ.0) THEN
426
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uulo21(:,:), 1)
427
          ENDIF
428
          CALL MPI_REDUCE(denCoeffsOffdiag%ulou21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
429
          IF (mpi%irank.EQ.0) THEN
430
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulou21(:,:), 1)
431
          ENDIF
432
          CALL MPI_REDUCE(denCoeffsOffdiag%dulo21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
433
          IF (mpi%irank.EQ.0) THEN
434
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dulo21(:,:), 1)
435
          ENDIF
436
          CALL MPI_REDUCE(denCoeffsOffdiag%ulod21(:,:),c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
437
          IF (mpi%irank.EQ.0) THEN
438
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ulod21(:,:), 1)
439 440
          ENDIF
          DEALLOCATE (c_b)
441

442
          ! -->     lo,lo' coeff's:
443
          n = atoms%nlod*atoms%nlod*atoms%ntype
444
          ALLOCATE(c_b(n))
445
          CALL MPI_REDUCE(denCoeffsOffdiag%uloulop21,c_b,n,CPP_MPI_COMPLEX, MPI_SUM,0,MPI_COMM_WORLD,ierr)
446
          IF (mpi%irank.EQ.0) THEN
447
             CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uloulop21, 1)
448 449 450
          ENDIF
          DEALLOCATE (c_b)

451
          IF (denCoeffsOffdiag%l_fmpl) THEN
452

453
             !-->        Full magnetization plots: Collect uunmt21, etc.
454
             n = (atoms%lmaxd+1)**2 *sphhar%nlhd*atoms%ntype
455
             ALLOCATE(c_b(n))
456
             CALL MPI_REDUCE(denCoeffsOffdiag%uunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
457
             IF (mpi%irank.EQ.0) THEN
458
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%uunmt21, 1)
459
             ENDIF
460
             CALL MPI_REDUCE(denCoeffsOffdiag%udnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
461
             IF (mpi%irank.EQ.0) THEN
462
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%udnmt21, 1)
463
             ENDIF
464
             CALL MPI_REDUCE(denCoeffsOffdiag%dunmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
465
             IF (mpi%irank.EQ.0) THEN
466
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%dunmt21, 1)
467
             ENDIF
468
             CALL MPI_REDUCE(denCoeffsOffdiag%ddnmt21,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0 ,MPI_COMM_WORLD,ierr)
469
             IF (mpi%irank.EQ.0) THEN
470
                CALL CPP_BLAS_ccopy(n, c_b, 1, denCoeffsOffdiag%ddnmt21, 1)
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
             ENDIF
             DEALLOCATE (c_b)

          ENDIF ! fmpl
       ENDIF  ! mperp
    ENDIF   ! noco

    !+lda+U
    IF ( atoms%n_u.GT.0 ) THEN
       n = 49*atoms%n_u 
       ALLOCATE(c_b(n))
       CALL MPI_REDUCE(n_mmp,c_b,n,CPP_MPI_COMPLEX,MPI_SUM,0, MPI_COMM_WORLD,ierr)
       IF (mpi%irank.EQ.0) THEN
          CALL CPP_BLAS_ccopy(n, c_b, 1, n_mmp, 1)
       ENDIF
       DEALLOCATE (c_b)
    ENDIF
    !-lda+U

490 491
    CALL timestop("mpi_col_den")

492 493
  END SUBROUTINE mpi_col_den
END MODULE m_mpi_col_den