types_mpimat.F90 19.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

MODULE m_types_mpimat
  USE m_judft
  USE m_types_rcmat
10
  IMPLICIT NONE
11 12 13 14 15 16 17 18 19
  PRIVATE
  INTEGER,PARAMETER    :: DEFAULT_BLOCKSIZE=64
  INTEGER, PARAMETER   :: dlen_=9

  !<This data-type extends the basic t_mat for distributed matrices.
  !<
  !<It stores the additional mpi_communicator and sets up a blacs grid for the matrix.
  !<This can be used to perform scalapack calls on the matrix with little additional input.
  !<The copy procedure is overwritten from t_mat to enable also redistribution of the matrix.
20 21 22 23 24 25 26 27 28 29 30
  TYPE  t_blacsdata
     INTEGER:: no_use
     INTEGER:: mpi_com                          !> mpi-communiator over which matrix is distributed
     INTEGER:: blacs_desc(dlen_)                !> blacs descriptor
     !> 1: =1
     !> 2: context
     !> 3,4: global matrix size
     !> 5,6: block sizes
     !> 7,8: row/colum of grid for first row/colum of matrix
     !> 9: leading dimension of local matrix
     INTEGER:: npcol,nprow                     !> the number of columns/rows in the processor grid
31
     INTEGER:: mycol,myrow
32
  END TYPE t_blacsdata
33 34 35
  
  
  TYPE,EXTENDS(t_mat):: t_mpimat
36 37
     INTEGER                   :: global_size1,global_size2        !> this is the size of the full-matrix
     TYPE(t_blacsdata),POINTER :: blacsdata
38 39
   CONTAINS
     PROCEDURE,PASS   :: copy => mpimat_copy     !<overwriten from t_mat, also performs redistribution
40
     PROCEDURE,PASS   :: move => mpimat_move     !<overwriten from t_mat, also performs redistribution
41
     PROCEDURE,PASS   :: free => mpimat_free     !<overwriten from t_mat, takes care of blacs-grids
42
     PROCEDURE,PASS   :: multiply =>mpimat_multiply  !<overwriten from t_mat, takes care of blacs-grids
43 44
     PROCEDURE,PASS   :: init_details => mpimat_init
     PROCEDURE,PASS   :: init_template =>mpimat_init_template     !<overwriten from t_mat, also calls alloc in t_mat
45
     PROCEDURE,PASS   :: add_transpose => mpimat_add_transpose !<overwriten from t_mat
46 47
     PROCEDURE,PASS   :: generate_full_matrix    ! construct full matrix if only upper triangle of hermitian matrix is given
     PROCEDURE,PASS   :: print_matrix
48
     PROCEDURE,PASS   :: from_non_dist
49
     FINAL :: finalize, finalize_1d, finalize_2d, finalize_3d
50 51 52 53 54 55
  END TYPE t_mpimat
  
  PUBLIC t_mpimat

CONTAINS

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
  SUBROUTINE mpimat_multiply(mat1,mat2,res)
    CLASS(t_mpimat),INTENT(INOUT)     :: mat1
    CLASS(t_mat),INTENT(IN)           :: mat2
    CLASS(t_mat),INTENT(OUT),OPTIONAL :: res

#ifdef CPP_SCALAPACK
    TYPE(t_mpimat)::m,r
    IF (.NOT.PRESENT(res)) CALL judft_error("BUG: in mpicase the multiply requires the optional result argument")
    SELECT TYPE(mat2)
    TYPE IS (t_mpimat)
       SELECT TYPE(res)
       TYPE is (t_mpimat)
          CALL m%init(mat1,mat2%global_size1,mat2%global_size2)
          CALL m%copy(mat2,1,1)
          CALL r%init(mat1,res%global_size1,res%global_size2)
          IF (mat1%l_real) THEN
             CALL pdgemm('N','N',mat1%global_size1, m%global_size2,mat1%global_size2, 1.0, &
                  mat1%data_r, 1,1,mat1%blacsdata%blacs_desc, &
                  m%data_r, 1,1,m%blacsdata%blacs_desc,0.0, &
                  r%data_r, 1,1,r%blacsdata%blacs_desc )
          ELSE
             CALL pzgemm('N','N',mat1%global_size1, m%global_size2,mat1%global_size2, CMPLX(1.0,0.0), &
                  mat1%data_c, 1,1,mat1%blacsdata%blacs_desc, &
                  m%data_c, 1,1,m%blacsdata%blacs_desc,CMPLX(0.0,0.0), &
                  r%data_c, 1,1,r%blacsdata%blacs_desc )
          ENDIF
          CALL res%copy(r,1,1)
          CALL r%free()
          CALL m%free()
       CLASS default
          CALL judft_error("BUG in mpimat%multiply")
       END SELECT
    CLASS default
       CALL judft_error("BUG in mpimat%multiply")
    END SELECT
#endif    
  END SUBROUTINE mpimat_multiply
          

    

97 98 99 100 101 102 103 104 105 106
  SUBROUTINE print_matrix(mat,fileno)
    CLASS(t_mpimat),INTENT(INOUT) ::mat
    INTEGER:: fileno

#ifdef CPP_SCALAPACK
    INCLUDE 'mpif.h'
    INTEGER,EXTERNAL:: indxl2g
    CHARACTER(len=10)::filename
    INTEGER :: irank,isize,i,j,npr,npc,r,c,tmp,err,status(MPI_STATUS_SIZE) 

107 108
    CALL MPI_COMM_RANK(mat%blacsdata%mpi_com,irank,err)
    CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com,isize,err)
109 110 111

    tmp=0

112
    IF (irank>0) CALL MPI_RECV(tmp,1,MPI_INTEGER,irank-1,0,mat%blacsdata%mpi_com,status,err) !lock
113 114 115
    WRITE(filename,"(a,i0)") "out.",fileno
    OPEN(fileno,file=filename,access='append')
    
116
    CALL blacs_gridinfo(mat%blacsdata%blacs_desc(2),npr,npc,r,c)
117 118 119
    DO i=1,mat%matsize1
       DO j=1,mat%matsize2
          IF (mat%l_real) THEN
120 121
             WRITE(fileno,"(5(i0,1x),2(f10.5,1x))") irank,i,j,indxl2g(i,mat%blacsdata%blacs_desc(5),r,0,npr),&
                  indxl2g(j,mat%blacsdata%blacs_desc(6),c,0,npc),mat%data_r(i,j)
122
          ELSE
123 124
             WRITE(fileno,"(5(i0,1x),2(f10.5,1x))") irank,i,j,indxl2g(i,mat%blacsdata%blacs_desc(5),r,0,npr),&
                  indxl2g(j,mat%blacsdata%blacs_desc(6),c,0,npc),mat%data_c(i,j)
125 126 127 128
          END IF
       ENDDO
    ENDDO
    CLOSE(fileno)
129
    IF (irank+1<isize) CALL MPI_SEND(tmp,1,MPI_INTEGER,irank+1,0,mat%blacsdata%mpi_com,err)
130 131 132 133 134 135 136
    
#endif    
  END SUBROUTINE print_matrix

  SUBROUTINE generate_full_matrix(mat)
    CLASS(t_mpimat),INTENT(INOUT) ::mat
    
137
    INTEGER :: i,j,i_glob,j_glob,myid,err,np
138 139 140 141 142
    COMPLEX,ALLOCATABLE:: tmp_c(:,:)
    REAL,ALLOCATABLE   :: tmp_r(:,:)
#ifdef CPP_SCALAPACK
    INCLUDE 'mpif.h'
    INTEGER, EXTERNAL    :: numroc, indxl2g  !SCALAPACK functions
143

144 145
    CALL MPI_COMM_RANK(mat%blacsdata%mpi_com,myid,err)
    CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com,np,err)
146 147 148 149 150 151
    !Set lower part of matrix to zero
 
    DO i=1,mat%matsize1
       DO j=1,mat%matsize2
          ! Get global column corresponding to i and number of local rows up to
          ! and including the diagonal, these are unchanged in A
152 153
          i_glob = indxl2g(i,     mat%blacsdata%blacs_desc(5), mat%blacsdata%myrow, 0, mat%blacsdata%nprow)
          j_glob = indxl2g(j,     mat%blacsdata%blacs_desc(6), mat%blacsdata%mycol, 0, mat%blacsdata%npcol)
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

          IF (i_glob>j_glob) THEN
             IF (mat%l_real) THEN
                mat%data_r(i,j) = 0.0
             ELSE
                mat%data_c(i,j) = 0.0
             ENDIF
          ENDIF
          IF (i_glob==j_glob) THEN
             IF (mat%l_real) THEN
                mat%data_r(i,j) =  mat%data_r(i,j)/2.0
             ELSE
                mat%data_c(i,j) =  mat%data_c(i,j)/2.0
             ENDIF
          ENDIF
       ENDDO
    ENDDO
171 172 173

    IF (mat%l_real) THEN
       ALLOCATE(tmp_r(mat%matsize1,mat%matsize2))
174
       tmp_r=mat%data_r
175 176
    ELSE
       ALLOCATE(tmp_c(mat%matsize1,mat%matsize2))
177
       tmp_c=mat%data_c
178
    END IF
179
    CALL MPI_BARRIER(mat%blacsdata%mpi_com,i)
180 181
 IF (mat%l_real) THEN
#ifdef CPP_SCALAPACK          
182

183
       CALL pdgeadd('t',mat%global_size1,mat%global_size2,1.0,tmp_r,1,1,mat%blacsdata%blacs_desc,1.0,mat%data_r,1,1,mat%blacsdata%blacs_desc)
184
    ELSE
185
       CALL pzgeadd('c',mat%global_size1,mat%global_size2,CMPLX(1.0,0.0),tmp_c,1,1,mat%blacsdata%blacs_desc,CMPLX(1.0,0.0),mat%data_c,1,1,mat%blacsdata%blacs_desc)
186 187 188
#endif
    END IF

189 190 191 192 193

#endif
  END SUBROUTINE generate_full_matrix


194 195 196 197 198 199 200 201
  SUBROUTINE mpimat_add_transpose(mat,mat1)
    CLASS(t_mpimat),INTENT(INOUT) ::mat
    CLASS(t_mat),INTENT(INOUT) ::mat1

    INTEGER:: i,ii,n_size,n_rank

    SELECT TYPE(mat1)
    TYPE IS (t_mpimat)
202
#ifdef CPP_MPI    
203 204
    CALL MPI_COMM_RANK(mat%blacsdata%mpi_com,n_rank,i)
    CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com,n_size,i)
205 206 207 208 209 210 211 212 213 214 215 216 217
#endif
    !Set lower part of matrix to zero...
       ii=0
       DO i=n_rank+1,MIN(mat%global_size1,mat%global_size2),n_size
          ii=ii+1
          IF (mat%l_real) THEN
             mat%data_r(i+1:,ii)=0.0
             mat1%data_r(i+1:,ii)=0.0
          ELSE
             mat%data_c(i+1:,ii)=0.0
             mat1%data_c(i+1:,ii)=0.0
          ENDIF
       ENDDO
218 219 220
       IF (mat%l_real) THEN
#ifdef CPP_SCALAPACK          

221
       CALL pdgeadd('t',mat1%global_size1,mat1%global_size2,1.0,mat1%data_r,1,1,mat1%blacsdata%blacs_desc,1.0,mat%data_r,1,1,mat%blacsdata%blacs_desc)
222
    ELSE
223
       CALL pzgeadd('c',mat1%global_size1,mat1%global_size2,CMPLX(1.0,0.0),mat1%data_c,1,1,mat1%blacsdata%blacs_desc,CMPLX(1.0,0.0),mat%data_c,1,1,mat1%blacsdata%blacs_desc)
224 225 226
#endif
    END IF
    !Now multiply the diagonal of the matrix by 1/2
227

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
    ii=0
    DO i=n_rank+1,MIN(mat%global_size1,mat%global_size2),n_size
       ii=ii+1
       IF (mat%l_real) THEN
          mat%data_r(i,ii)=mat%data_r(i,ii)/2
       ELSE
          mat%data_c(i,ii)=mat%data_c(i,ii)/2
       END IF
    ENDDO
    CLASS default
       CALL judft_error("Inconsistent types in t_mpimat_add_transpose")
    END SELECT
    
  END SUBROUTINE mpimat_add_transpose

  SUBROUTINE mpimat_copy(mat,mat1,n1,n2)
    IMPLICIT NONE
    CLASS(t_mpimat),INTENT(INOUT)::mat
246
    CLASS(t_mat),INTENT(IN)      ::mat1
247 248 249 250 251
    INTEGER,INTENT(IN) ::n1,n2
#ifdef CPP_SCALAPACK
    SELECT TYPE(mat1)
    TYPE IS(t_mpimat)
       IF (mat%l_real) THEN
252
          CALL pdgemr2d(Mat1%global_size1,mat1%global_size2,mat1%data_r,1,1,mat1%blacsdata%blacs_desc,mat%data_r,n1,n2,mat%blacsdata%blacs_desc,mat%blacsdata%blacs_desc(2))
253
       ELSE
254
          CALL pzgemr2d(mat1%global_size1,mat1%global_size2,mat1%data_c,1,1,mat1%blacsdata%blacs_desc,mat%data_c,n1,n2,mat%blacsdata%blacs_desc,mat%blacsdata%blacs_desc(2))
255 256 257 258 259 260
       END IF
    CLASS DEFAULT
       CALL judft_error("Wrong datatype in copy")
    END SELECT
#endif    
  END SUBROUTINE mpimat_copy
261

262 263 264 265 266 267
  SUBROUTINE from_non_dist(mat,mat1)
    IMPLICIT NONE
    CLASS(t_mpimat),INTENT(INOUT)::mat
    TYPE(t_mat),INTENT(IN)       ::mat1

    INTEGER:: blacs_desc(9),irank,ierr,umap(1,1),np
268
#ifdef CPP_SCALAPACK
269 270
    blacs_desc=(/1,-1,mat1%matsize1,mat1%matsize2,mat1%matsize1,mat1%matsize2,0,0,mat1%matsize1/)

271
    CALL MPI_COMM_RANK(mat%blacsdata%mpi_com,irank,ierr)
272
    umap(1,1)=0
273
    CALL BLACS_GET(mat%blacsdata%blacs_desc(2),10,blacs_desc(2))
274 275
    CALL BLACS_GRIDMAP(blacs_desc(2),umap,1,1,1)
    IF (mat%l_real) THEN
276
       CALL pdgemr2d(Mat1%matsize1,mat1%matsize2,mat1%data_r,1,1,blacs_desc,mat%data_r,1,1,mat%blacsdata%blacs_desc,mat%blacsdata%blacs_desc(2))
277
    ELSE
278
       CALL pzgemr2d(mat1%matsize1,mat1%matsize2,mat1%data_c,1,1,blacs_desc,mat%data_c,1,1,mat%blacsdata%blacs_desc,mat%blacsdata%blacs_desc(2))
279
    END IF
280
#endif    
281 282 283 284
  END SUBROUTINE from_non_dist
    


285 286 287 288 289 290 291
  SUBROUTINE mpimat_move(mat,mat1)
    IMPLICIT NONE
    CLASS(t_mpimat),INTENT(INOUT)::mat
    CLASS(t_mat),INTENT(INOUT)   ::mat1
    CALL mat%copy(mat1,1,1)
  END SUBROUTINE mpimat_move

292 293 294 295 296 297
  SUBROUTINE finalize(mat)
    IMPLICIT NONE
    TYPE(t_mpimat),INTENT(INOUT) :: mat
    CALL mpimat_free(mat)
  END SUBROUTINE finalize

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
  SUBROUTINE finalize_1d(mat)
    IMPLICIT NONE
    
    TYPE(t_mpimat),INTENT(INOUT) :: mat(:)
    INTEGER                      :: i
    DO i = 1,size(mat)
       CALL mpimat_free(mat(i))
    ENDDO
  END SUBROUTINE finalize_1d

  SUBROUTINE finalize_2d(mat)
    IMPLICIT NONE
    
    TYPE(t_mpimat),INTENT(INOUT) :: mat(:,:)
    INTEGER                      :: i,j

    DO i = 1,size(mat, dim=1)
       DO j = 1,size(mat, dim=2)
          CALL mpimat_free(mat(i,j))
      ENDDO
    ENDDO
  END SUBROUTINE finalize_2d
  
  SUBROUTINE finalize_3d(mat)
    IMPLICIT NONE
    
    TYPE(t_mpimat),INTENT(INOUT) :: mat(:,:,:)
    INTEGER                      :: i,j,k

    DO i = 1,size(mat, dim=1)
       DO j = 1,size(mat, dim=2)
         DO k = 1,size(mat, dim=3)
             CALL mpimat_free(mat(i,j,k))
         ENDDO
      ENDDO
    ENDDO
  END SUBROUTINE finalize_3d

336 337 338 339 340 341
  SUBROUTINE mpimat_free(mat)
    IMPLICIT NONE
    CLASS(t_mpimat),INTENT(INOUT) :: mat
    INTEGER :: ierr
    IF (ALLOCATED(mat%data_r)) DEALLOCATE(mat%data_r)
    IF (ALLOCATED(mat%data_c)) DEALLOCATE(mat%data_c)
342 343 344 345 346
    IF (ASSOCIATED(mat%blacsdata)) THEN
       IF (mat%blacsdata%no_use>1) THEN
          mat%blacsdata%no_use=mat%blacsdata%no_use-1
          mat%blacsdata=>null()
       ELSE
347
#ifdef CPP_SCALAPACK    
348 349
          CALL BLACS_GRIDEXIT(mat%blacsdata%blacs_desc(2),ierr)
          DEALLOCATE(mat%blacsdata)
350
#endif
351 352
       END IF
    ENDIF
353 354 355 356 357 358 359 360
  END SUBROUTINE mpimat_free

  !>Initialization of the distributed matrix.
  !!
  !! The argument l_2d controls the kind of distribution used:
  !!  - TRUE: the matrix is a Scalapack BLOCK-CYCLIC distribution
  !!  - FALSE: the matrix is distributed in a one-dimensional column cyclic distribution with blocksize 1
  !! as used in the parallel matrix setup of FLEUR
361
  SUBROUTINE mpimat_init(mat,l_real,matsize1,matsize2,mpi_subcom,l_2d,nb_x,nb_y)
362 363 364 365
    IMPLICIT NONE
    CLASS(t_mpimat)             :: mat
    INTEGER,INTENT(IN),OPTIONAL :: matsize1,matsize2,mpi_subcom
    LOGICAL,INTENT(IN),OPTIONAL :: l_real,l_2d
366
    INTEGER,INTENT(IN),OPTIONAL :: nb_y,nb_x
367
#ifdef CPP_SCALAPACK    
368 369
    INTEGER::nbx,nby,irank,ierr
    include 'mpif.h'
370 371 372
    nbx=DEFAULT_BLOCKSIZE; nby=DEFAULT_BLOCKSIZE
    IF (PRESENT(nb_x)) nbx=nb_x
    IF (PRESENT(nb_y)) nby=nb_y
373 374 375 376
    IF (.NOT.(PRESENT(matsize1).AND.PRESENT(matsize2).AND.PRESENT(mpi_subcom).AND.PRESENT(l_real).AND.PRESENT(l_2d)))&
         CALL judft_error("Optional arguments must be present in mpimat_init")
    mat%global_size1=matsize1
    mat%global_size2=matsize2
377 378 379 380
    ALLOCATE(mat%blacsdata)
    mat%blacsdata%no_use=1
    mat%blacsdata%mpi_com=mpi_subcom
    CALL priv_create_blacsgrid(mat%blacsdata%mpi_com,l_2d,matsize1,matsize2,nbx,nby,&
381
         mat%blacsdata,mat%matsize1,mat%matsize2)
382
    CALL mat%alloc(l_real) !Attention,sizes determined in call to priv_create_blacsgrid
383 384 385
    !check if this matrix is actually distributed over MPI_COMM_SELF
    IF (mpi_subcom==MPI_COMM_SELF) THEN
       CALL MPI_COMM_RANK(mpi_subcom,irank,ierr)
386
       IF (irank>0) mat%blacsdata%blacs_desc(2)=-1
387
    END IF
388
#endif    
389
  END SUBROUTINE mpimat_init
390

391
  SUBROUTINE mpimat_init_template(mat,templ,global_size1,global_size2)
392 393 394
    IMPLICIT NONE
    CLASS(t_mpimat),INTENT(INOUT)  :: mat
    CLASS(t_mat),INTENT(IN)        :: templ
395
    INTEGER,INTENT(IN),OPTIONAL    :: global_size1,global_size2
396

397 398 399
    INTEGER::numroc
    EXTERNAL::numroc
    
400 401 402
    SELECT TYPE(templ)
    TYPE IS (t_mpimat)
       mat%l_real=templ%l_real
403 404 405
       IF (PRESENT(global_size1).AND.PRESENT(global_size2)) THEN
          ALLOCATE(mat%blacsdata)
          mat%blacsdata=templ%blacsdata
Daniel Wortmann's avatar
Daniel Wortmann committed
406
          mat%blacsdata%no_use=1
407 408 409 410
          mat%blacsdata%blacs_desc(3)=global_size1
          mat%blacsdata%blacs_desc(4)=global_size2
          mat%global_size1=global_size1
          mat%global_size2=global_size2
411
#ifdef CPP_SCALAPACK    
412 413
          mat%matsize1=NUMROC( global_size1,mat%blacsdata%blacs_desc(5), mat%blacsdata%myrow, mat%blacsdata%blacs_desc(7), mat%blacsdata%nprow )
          mat%matsize1=NUMROC( global_size2,mat%blacsdata%blacs_desc(6), mat%blacsdata%mycol, mat%blacsdata%blacs_desc(8), mat%blacsdata%npcol )
414
#endif    
415 416 417 418 419 420 421 422
       ELSE
          mat%matsize1=templ%matsize1
          mat%matsize2=templ%matsize2
          mat%global_size1=templ%global_size1
          mat%global_size2=templ%global_size2
          mat%blacsdata=>templ%blacsdata
          mat%blacsdata%no_use=mat%blacsdata%no_use+1
       ENDIF
423 424
       CALL mat%alloc()
      
425 426 427 428 429
       CLASS default
          CALL judft_error("Mixed initialization in t_mpimat not possible(BUG)")
    END SELECT
  END SUBROUTINE mpimat_init_template

430
    
431
  SUBROUTINE priv_create_blacsgrid(mpi_subcom,l_2d,m1,m2,nbc,nbr,blacsdata,local_size1,local_size2)
432 433 434
    IMPLICIT NONE
    INTEGER,INTENT(IN) :: mpi_subcom
    INTEGER,INTENT(IN) :: m1,m2
435
    INTEGER,INTENT(INOUT)::nbc,nbr
436
    LOGICAL,INTENT(IN) :: l_2d
437
    type(t_blacsdata),INTENT(OUT)::blacsdata
438 439
    INTEGER,INTENT(OUT):: local_size1,local_size2
   
440
#ifdef CPP_SCALAPACK
441
    INCLUDE 'mpif.h'
442
    INTEGER     :: myrowssca,mycolssca
Daniel Wortmann's avatar
Daniel Wortmann committed
443
    INTEGER     :: iamblacs,npblacs,np,myid,mycol,myrow
444
    INTEGER     :: nprow2,npcol2
445
    INTEGER     :: k,i,j,ictextblacs
446 447 448 449 450 451
    INTEGER     :: ierr

    INTEGER,ALLOCATABLE :: iblacsnums(:),ihelp(:),iusermap(:,:)

    EXTERNAL descinit, blacs_get
    EXTERNAL blacs_pinfo, blacs_gridinit
452
 
453 454 455 456 457 458 459 460 461 462
    !Determine rank and no of processors
    CALL MPI_COMM_RANK(mpi_subcom,myid,ierr)
    CALL MPI_COMM_SIZE(mpi_subcom,np,ierr)


    ! compute processor grid, as square as possible
    ! If not square with more rows than columns
    IF (l_2d) THEN
       distloop: DO j=INT(SQRT(REAL(np))),1,-1
          IF ( (np/j) * j == np) THEN
463 464
             blacsdata%npcol = np/j
             blacsdata%nprow = j
465 466 467 468
             EXIT distloop
          ENDIF
       ENDDO distloop
    ELSE
469 470
       nbc=1
       nbr=MAX(m1,m2)
471 472
       blacsdata%npcol=np
       blacsdata%nprow=1
473
    ENDIF
474
    ALLOCATE(iblacsnums(np),ihelp(np),iusermap(blacsdata%nprow,blacsdata%npcol))
475

476
    !   An blacsdata%nprow*blacsdata%npcol processor grid will be created
477 478 479 480 481 482
    !   Row and column index myrow, mycol of this processor in the grid
    !   and distribution of A and B in ScaLAPACK
    !   The local processor will get myrowssca rows and mycolssca columns
    !   of A and B
    !

483 484
    myrow = myid/blacsdata%npcol  ! my row number in the BLACS blacsdata%nprow*blacsdata%npcol grid
    mycol = myid -(myid/blacsdata%npcol)*blacsdata%npcol  ! my column number in the BLACS blacsdata%nprow*blacsdata%npcol grid
485 486 487
    !
    !  Now allocate Asca to put the elements of Achi or receivebuffer to
    !
488
    myrowssca=(m1-1)/(nbr*blacsdata%nprow)*nbr+ MIN(MAX(m1-(m1-1)/(nbr*blacsdata%nprow)*nbr*blacsdata%nprow-nbr*myrow,0),nbr)
489
    !     Number of rows the local process gets in ScaLAPACK distribution
490
    mycolssca=(m2-1)/(nbc*blacsdata%npcol)*nbc+ MIN(MAX(m2-(m2-1)/(nbc*blacsdata%npcol)*nbc*blacsdata%npcol-nbc*mycol,0),nbc)
491
  
492 493 494 495 496 497 498 499 500 501 502 503

    !Get BLACS ranks for all MPI ranks
    CALL BLACS_PINFO(iamblacs,npblacs)  ! iamblacs = local process rank (e.g. myid)
    ! npblacs  = number of available processes
    iblacsnums=-2
    ihelp=-2
    ihelp(myid+1)=iamblacs ! Get the Blacs id corresponding to the MPI id
    CALL MPI_ALLREDUCE(ihelp, iblacsnums, np,MPI_INTEGER,MPI_MAX,mpi_subcom,ierr)
    IF (ierr.NE.0) STOP 'Error in allreduce for BLACS nums' 

    !     iblacsnums(i) is the BLACS-process number of MPI-process i-1
    k = 1
504 505
    DO i = 1, blacsdata%nprow
       DO j = 1, blacsdata%npcol
506 507 508 509
          iusermap(i,j) = iblacsnums(k)
          k = k + 1
       ENDDO
    ENDDO
510
!#ifdef CPP_BLACSDEFAULT    
511 512
    !Get the Blacs default context
    CALL BLACS_GET(0,0,ictextblacs)
513 514 515
!#else
!    ictextblacs=mpi_subcom
!#endif    
516
    ! Create the Grid
517
    CALL BLACS_GRIDMAP(ictextblacs,iusermap,size(iusermap,1),blacsdata%nprow,blacsdata%npcol)
518
    !     Now control, whether the BLACS grid is the one we wanted
519 520
    CALL BLACS_GRIDINFO(ictextblacs, nprow2,npcol2,blacsdata%myrow,blacsdata%mycol)
    IF (nprow2 /= blacsdata%nprow) THEN
521
       WRITE(6,*) 'Wrong number of rows in BLACS grid'
522
       WRITE(6,*) 'nprow=',blacsdata%nprow,' nprow2=',nprow2
523 524
       call judft_error('Wrong number of rows in BLACS grid')
    ENDIF
525
    IF (npcol2 /= blacsdata%npcol) THEN
526
       WRITE(6,*) 'Wrong number of columns in BLACS grid'
527
       WRITE(6,*) 'npcol=',blacsdata%npcol,' npcol2=',npcol2
528 529 530 531 532
       call judft_error('Wrong number of columns in BLACS grid')

    ENDIF

    !Create the descriptors
533
    CALL descinit(blacsdata%blacs_desc,m1,m2,nbr,nbc,0,0,ictextblacs,myrowssca,ierr)
534 535 536 537 538 539
    IF (ierr /=0 ) call judft_error('Creation of BLACS descriptor failed')
    local_size1=myrowssca
    local_size2=mycolssca
#endif
  END SUBROUTINE priv_create_blacsgrid
END MODULE m_types_mpimat