chani.F90 16.4 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7 8
MODULE m_chani
CONTAINS
9
  SUBROUTINE chani(M,Neigd, Myid,Np,Sub_comm,mpi_comm, &
10
       Eig,Num,hamOvlp,zMat)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
    !
    !----------------------------------------------------
    !- Parallel eigensystem solver - driver routine; gb99
    !  Uses the SCALAPACK for the actual diagonalization
    !
    ! m ........ actual (=leading) dimension of full a & b matrices
    !            must be problem size, as input a, b  are one-dimensional
    !            and shall be redistributed to two-dimensional matrices
    !            actual (=leading) dimension of eigenvector z(,)
    ! n ........ number of columns of full (sub)matrix ( about n/np)
    ! neigd..... second (=column) dimension of eigenvector matrix
    ! myid ..... id of node (normally irank)
    ! np ....... number of processors (normally isize)
    ! SUB_COMM.. communicator for MPI
    ! a,b   .... packed (sub)matrices, here expanded to non-packed
    ! z,eig .... eigenvectors and values, output
    ! num ...... number of ev's searched (and found) on this node
    !            On input, overall number of ev's searched,
    !            On output, local number of ev's found
    !
    !----------------------------------------------------
    !
!#include"cpp_arch.h"
#include"cpp_double.h"
    USE m_juDFT
36 37
    USE m_types
    USE m_subredist1
38
    USE m_subredist2
39 40 41
    IMPLICIT NONE
    INCLUDE 'mpif.h'

42
    INTEGER, INTENT (IN) :: neigd,m
43 44 45
    INTEGER, INTENT (IN) :: SUB_COMM,np,myid,mpi_comm
    INTEGER, INTENT (INOUT) :: num
    REAL,    INTENT   (OUT) :: eig(neigd)
46 47 48
    TYPE(t_hamOvlp),INTENT(INOUT) :: hamOvlp
    TYPE(t_zMat),INTENT(INOUT)    :: zMat
 
49 50 51 52 53 54 55 56 57 58 59
    !...  Local variables
    !
    INTEGER nc,ic,ir,n_sym,jsym,num_j,icm,n_bound
    INTEGER i ,j ,k ,l, info, i_plus, i_dim
    INTEGER nprow,npcol,myrowssca, nsq, nprow2, npcol2
    INTEGER myrow,mycol,mycolssca, ierr, me,ierr2
    INTEGER iamblacs,myrowblacs,mycolblacs,npblacs
    INTEGER ictxtblacs,err
    INTEGER, ALLOCATABLE :: iwork(:),iusermap(:,:)
    INTEGER, ALLOCATABLE :: iblacsnums(:),ihelp(:)
    REAL,    ALLOCATABLE :: dwork(:)
60 61
    REAL,    ALLOCATABLE :: eigvec_r(:,:)
    COMPLEX, ALLOCATABLE :: eigvec_c(:,:)
62 63
    REAL,    ALLOCATABLE :: rwork(:)
    INTEGER              :: lrwork
64

65 66 67 68 69 70 71 72 73 74
    !
    !  ScaLAPACK things
    CHARACTER (len=1)    :: uplo
    INTEGER, PARAMETER   :: dlen_=9
    INTEGER              :: desca(dlen_),desceigv(dlen_)
    INTEGER              :: num1,num2,liwork,lwork2,np0,mq0
    INTEGER              :: iceil, numroc, nn, nb
    INTEGER, ALLOCATABLE :: ifail(:), iclustr(:)
    REAL                 :: abstol,orfac=1.E-4,CPP_LAPACK_slamch
    REAL,ALLOCATABLE     :: eig2(:), gap(:)
75 76 77
    REAL,    ALLOCATABLE :: asca_r(:,:), bsca_r(:,:),work2_r(:)
    COMPLEX, ALLOCATABLE :: asca_c(:,:), bsca_c(:,:),work2_c(:)

78
    EXTERNAL iceil, numroc
79 80 81
    EXTERNAL CPP_LAPACK_slamch, descinit
    EXTERNAL blacs_pinfo, blacs_gridinit
    EXTERNAL MPI_COMM_DUP
82
    
83 84 85 86 87 88 89 90
    !
    ! determine actual number of columns of input matrices A and B
    ! nc is number of columns the local processor will get, must be <=n
    !
    nc = 0
    DO i = myid+1, m, np
       nc = nc + 1
    ENDDO
91 92 93 94 95
    !IF (nc.GT.n) THEN
    !   WRITE (6,*) myid,'gets more columns than allowed:'
    !   WRITE (6,*) myid,'will get',nc,' columns, only',n,' allowed'
    !   CALL juDFT_error("chani: nc > n",calledby ="chani")
    !ENDIF
96 97 98 99 100 101 102 103 104 105 106 107 108
    !
    ! determine block size
    !
    nb = 20
    IF (m.GT.2048)   nb = 30 !2
    IF (m.GT.8*2048) nb = 60 !4
    !
    ! transfer to ScaLAPACK block cyclic 2d distribution with
    ! block-size nb
    !
    ! compute processor grid, as square as possible
    ! If not square with more rows than columns
    nsq = INT(SQRT(REAL(np)))
109
    gridloop:DO j=nsq,1,-1
110 111 112
       IF ( (np/j) * j == np) THEN
          npcol = np/j
          nprow = j
113
          EXIT gridloop
114
       ENDIF
115
    ENDDO gridloop
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    !
    !   An nprow*npcol processor grid will be created
    !   Row and column index myrow, mycol of this processor in the grid
    !   and distribution of A and B in ScaLAPACK
    !   The local processor will get myrowssca rows and mycolssca columns
    !   of A and B
    !
    myrow = myid/npcol  ! my row number in the BLACS nprow*npcol grid
    mycol = myid -(myid/npcol)*npcol  ! my column number in the BLACS nprow*npcol grid
    !
    !  Now allocate Asca to put the elements of Achi or receivebuffer to
    !
    myrowssca=(m-1)/(nb*nprow)*nb+&
         MIN(MAX(m-(m-1)/(nb*nprow)*nb*nprow-nb*myrow,0),nb)
    !     Number of rows the local process gets in ScaLAPACK distribution
    mycolssca=(m-1)/(nb*npcol)*nb+ &
         MIN(MAX(m-(m-1)/(nb*npcol)*nb*npcol-nb*mycol,0),nb)
    !     Number of columns the local process gets in ScaLAPACK distribution
134
    if (hamOvlp%l_real) THEN
135 136 137
       ALLOCATE ( asca_r(myrowssca,mycolssca), stat=err )
    else
       ALLOCATE ( asca_c(myrowssca,mycolssca), stat=err )
138
    endif
139 140 141 142 143
    IF (err.NE.0) THEN
       WRITE (*,*) 'In chani an error occured during the allocation of'
       WRITE (*,*) 'asca: ',err,' size: ',myrowssca*mycolssca
       CALL juDFT_error("allocte in chani",calledby ="chani")
    ENDIF
144
    if (hamOvlp%l_real) THEN
145 146 147 148 149 150
       asca_r=0.0
       ALLOCATE ( bsca_r(myrowssca,mycolssca), stat=err  )
    else
       asca_c=0.0
       ALLOCATE ( bsca_c(myrowssca,mycolssca), stat=err  )
    endif
151 152 153 154 155
    IF (err.NE.0) THEN
       WRITE (*,*) 'In chani an error occured during the allocation of'
       WRITE (*,*) 'bsca :',err
       CALL juDFT_error("allocate in chani",calledby ="chani")
    ENDIF
156
    if (hamOvlp%l_real) THEN
157
       bsca_r=0.0
158 159
       CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb ,achi_r=hamovlp%a_r,asca_r=asca_r)
       CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb ,achi_r=hamovlp%b_r,asca_r=bsca_r)
160 161
    else
       bsca_c=0.0
162 163
       CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb ,achi_c=hamovlp%a_c,asca_c=asca_c)
       CALL subredist1(m,myrowssca,SUB_COMM, nprow, npcol, myid, ierr, nb ,achi_c=hamovlp%b_c,asca_c=bsca_c)
164
    end if
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    CALL BLACS_PINFO(iamblacs,npblacs)  ! iamblacs = local process rank (e.g. myid)
    ! npblacs  = number of available processes
    IF (npblacs /= np) THEN
       !gb         WRITE (6,*) 'Number of procs differ between',
       !gb     &        ' BLACS and MPI'
       IF (np > npblacs) THEN
          WRITE(6,*) 'Have to take SUB_COMM for BLACS grid!' ! this should not happen
       ENDIF
    ENDIF
    ALLOCATE( iblacsnums(np) )
    iblacsnums=-2
    ALLOCATE (ihelp(np))
    ihelp=-2
    ihelp(myid+1)=iamblacs ! Get the Blacs id corresponding to the MPI id
    CALL MPI_ALLREDUCE(ihelp, iblacsnums, np,MPI_INTEGER,MPI_MAX,SUB_COMM,ierr)
    IF (ierr.NE.0) THEN
       WRITE (6,*) 'Error in allreduce for BLACS nums'
       CALL juDFT_error('Error in allreduce for BLACS nums',calledby ='chani')
    ENDIF
    !     iblacsnums(i) is the BLACS-process number of MPI-process i-1
    ALLOCATE ( iusermap(nprow,npcol) ) ! Usermap for BLACS_GRIDMAP
    k = 1
    DO i = 1, nprow
       DO j = 1, npcol
          iusermap(i,j) = iblacsnums(k)
          k = k + 1
       ENDDO
    ENDDO
    !gb      WRITE(*,*) np,iblacsnums
    !
    !     Take a copy of SUB_COMM as system context for BLACS_GRIDMAP
    !     We cannot take SUB_COMM itself as it will be overwritten with the
    !     BLACS context for the grid comstructed 
    !
    !      CALL MPI_COMM_DUP(SUB_COMM,ictxtblacs,ierr)
    !I do not understand this, perhaps we should use MPI_COMM_WRLD???
    !      CALL MPI_COMM_DUP(MPI_COMM_WRLD,ictxtblacs,ierr)
    CALL MPI_COMM_DUP(MPI_COMM,ictxtblacs,ierr)
    IF (ierr /=0 ) THEN
       WRITE(6,*) 'MPI_COMM_DUP failed'
       CALL juDFT_error('MPI_COMM_DUP failed',calledby='chani')
    ENDIF
    CALL BLACS_GRIDMAP(ictxtblacs,iusermap,nprow,nprow,npcol)
    !     Now control, whether the BLACS grid is the one we wanted
    CALL BLACS_GRIDINFO(ictxtblacs, nprow2,npcol2,myrowblacs,mycolblacs)
    IF (nprow2 /= nprow) THEN
       WRITE(6,*) 'Wrong number of rows in BLACS grid'
       WRITE(6,*) 'nprow=',nprow,' nprow2=',nprow2
       ierr = -nprow2
       CALL juDFT_error('Wrong number of rows in BLACS grid',calledby= 'chani')
    ENDIF
    IF (npcol2 /= npcol) THEN
       WRITE(6,*) 'Wrong number of columns in BLACS grid'
       WRITE(6,*) 'npcol=',npcol,' npcol2=',npcol2
       ierr = -npcol2
       CALL juDFT_error('Wrong number of columns in BLACS grid', calledby ='chani')

    ENDIF
    IF (myrowblacs.NE.myrow) THEN
       WRITE(6,*) 'My row in BLACS is',myrowblacs
       WRITE(6,*) 'It is not',myrow
       !CALL CPP_flush(6)
    ENDIF
    IF (mycolblacs.NE.mycol) THEN
       WRITE(6,*) 'My column in BLACS is',mycolblacs
       WRITE(6,*) 'It is not',mycol
       !CALL CPP_flush(6)
    ENDIF
    !
    CALL descinit(DESCA,m,m,nb,nb,0,0,ictxtblacs,myrowssca,ierr)
    CALL descinit(DESCeigv,m,m,nb,nb,0,0,ictxtblacs, myrowssca,ierr)

    abstol=2.0*CPP_LAPACK_slamch('S') ! PDLAMCH gave an error on ZAMpano
    ALLOCATE ( eig2(m), stat=err ) ! The eigenvalue array for ScaLAPACK
    IF (err.NE.0) THEN
       WRITE (*,*) 'In chani an error occured during the allocation of'
       WRITE (*,*) 'eig2 :',err
       CALL juDFT_error('Failed to allocated "eig2"', calledby ='chani')
    ENDIF
    !      write(*,*) 'c :',myrowssca,mycolssca,desceigv
245
    if (hamovlp%l_real) THEN
246 247 248 249
       ALLOCATE ( eigvec_r(myrowssca,mycolssca), stat=err ) ! Eigenvectors for ScaLAPACK
    else
       ALLOCATE ( eigvec_c(myrowssca,mycolssca), stat=err ) ! Eigenvectors for ScaLAPACK
    end if
250 251 252 253 254 255 256 257 258
    IF (err.NE.0) THEN
       WRITE (*,*) 'In chani an error occured during the allocation of'
       WRITE (*,*) 'eigvec :',err
       CALL juDFT_error('Failed to allocated "eigvec"', calledby ='chani')
    ENDIF
    !
    nn=MAX(MAX(m,nb),2)
    np0=numroc(nn,nb,0,0,nprow)
    mq0=numroc(MAX(MAX(neigd,nb),2),nb,0,0,npcol)
259
    if (hamovlp%l_real) THEN
260 261 262 263 264 265
       lwork2=5*m+MAX(5*nn,np0*mq0+2*nb*nb)+ iceil(neigd,nprow*npcol)*nn
       ALLOCATE ( work2_r(lwork2+10*m), stat=err ) ! Allocate more in case of clusters
    else
       lwork2=m+MAX(nb*(np0+1),3)
       ALLOCATE ( work2_c(lwork2), stat=err )
    endif
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
    IF (err.NE.0) THEN 
       WRITE (*,*) 'work2  :',err,lwork2
       CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
    ENDIF

    liwork=6*MAX(MAX(m,nprow*npcol+1),4)
    ALLOCATE ( iwork(liwork), stat=err )
    IF (err.NE.0) THEN
       WRITE (*,*) 'iwork  :',err,liwork
       CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
    ENDIF
    ALLOCATE ( ifail(m), stat=err )
    IF (err.NE.0) THEN
       WRITE (*,*) 'ifail  :',err,m
       CALL juDFT_error('Failed to allocated "ifail"', calledby ='chani')
    ENDIF
    ALLOCATE ( iclustr(2*nprow*npcol), stat=err )
    IF (err.NE.0) THEN
       WRITE (*,*) 'iclustr:',err,2*nprow*npcol
       CALL juDFT_error('Failed to allocated "iclustr"', calledby ='chani')
    ENDIF
    ALLOCATE ( gap(nprow*npcol), stat=err )
    IF (err.NE.0) THEN
       WRITE (*,*) 'gap    :',err,nprow*npcol
       CALL juDFT_error('Failed to allocated "gap"', calledby ='chani')
    ENDIF
    !
    !     Compute size of workspace
    !
295
    if (hamovlp%l_real) THEN
296
    uplo='U'
297 298 299 300 301 302 303
    CALL CPP_LAPACK_pdsygvx(1,'V','I','U',m,asca_r,1,1,desca,bsca_r,1,1, desca,&
         0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,eigvec_r,1,1,&
         desceigv,work2_r,-1,iwork,-1,ifail,iclustr, gap,ierr)
    IF ( work2_r(1).GT.lwork2) THEN
       lwork2 = work2_r(1)
       DEALLOCATE (work2_r)
       ALLOCATE ( work2_r(lwork2+20*m), stat=err ) ! Allocate even more in case of clusters
304 305 306 307 308
       IF (err.NE.0) THEN
          WRITE (*,*) 'work2  :',err,lwork2
          CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
       ENDIF
    ENDIF
309
else
310 311 312 313 314 315 316
    lrwork=4*m+MAX(5*nn,np0*mq0)+ iceil(neigd,nprow*npcol)*nn
    ! Allocate more in case of clusters
    ALLOCATE(rwork(lrwork+10*m), stat=ierr)
    IF (err /= 0) THEN
       WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed'
       CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
    ENDIF
317

318 319 320
    CALL CPP_LAPACK_pzhegvx(1,'V','I','U',m,asca_c,1,1,desca,bsca_c,1,1, desca,&
         0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,eigvec_c,1,1,&
         desceigv,work2_c,-1,rwork,-1,iwork,-1,ifail,iclustr,&
321
         gap,ierr)
322 323 324 325
    IF (ABS(work2_c(1)).GT.lwork2) THEN
       lwork2=work2_c(1)
       DEALLOCATE (work2_c)
       ALLOCATE (work2_c(lwork2), stat=err)
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
       IF (err /= 0) THEN
          WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed:',lwork2
          CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
       ENDIF
    ENDIF
    IF (rwork(1).GT.lrwork) THEN
       lrwork=rwork(1)
       DEALLOCATE(rwork)
       ! Allocate even more in case of clusters
       ALLOCATE (rwork(lrwork+20*m), stat=err)
       IF (err /= 0) THEN
          WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed: ', lrwork+20*m
          CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
       ENDIF
    ENDIF
341
endif
342 343 344 345 346 347 348 349 350 351 352 353
    IF (iwork(1) .GT. liwork) THEN
       liwork = iwork(1)
       DEALLOCATE (iwork)
       ALLOCATE (iwork(liwork), stat=err)
       IF (err /= 0) THEN
          WRITE (*,*) 'ERROR: chani.F: Allocating iwork failed: ',liwork
          CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
       ENDIF
    ENDIF
    !
    !     Now solve generalized eigenvalue problem
    !
354
if (hamovlp%l_real) THEN
355 356 357
    CALL CPP_LAPACK_pdsygvx(1,'V','I','U',m,asca_r,1,1,desca,bsca_r,1,1, desca,&
         1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,eigvec_r,1,1,&
         desceigv,work2_r,lwork2,iwork,liwork,ifail,iclustr,&
358
         gap,ierr)
359 360 361 362
else
    CALL CPP_LAPACK_pzhegvx(1,'V','I','U',m,asca_c,1,1,desca,bsca_c,1,1, desca,&
         1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,eigvec_c,1,1,&
         desceigv,work2_c,lwork2,rwork,lrwork,iwork,liwork,&
363 364
         ifail,iclustr,gap,ierr)
    DEALLOCATE(rwork)
365
endif
366
    IF (ierr .NE. 0) THEN
367
       IF (ierr /= 2) WRITE (6,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
       IF (ierr <= 0) WRITE (6,*) myid,' illegal input argument'
       IF (MOD(ierr,2) /= 0) THEN
          WRITE (6,*) myid,'some eigenvectors failed to converge'
          eigs: DO i = 1, neigd
             IF (ifail(i) /= 0) THEN
                WRITE (6,*) myid,' eigenvector',ifail(i), 'failed to converge'
             ELSE
                EXIT eigs
             ENDIF
          ENDDO eigs
          !CALL CPP_flush(6)
       ENDIF
       IF (MOD(ierr/4,2).NE.0) THEN
          WRITE(6,*) myid,' only',num2,' eigenvectors converged'
          !CALL CPP_flush(6)
       ENDIF
       IF (MOD(ierr/8,2).NE.0) THEN
          WRITE(6,*) myid,' PDSTEBZ failed to compute eigenvalues'
          !CALL CPP_flush(6)
       ENDIF
       IF (MOD(ierr/16,2).NE.0) THEN
          WRITE(6,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
          !CALL CPP_flush(6)
       ENDIF
    ENDIF
    IF (num2 < num1) THEN
       IF (myid ==0) THEN
          WRITE(6,*) 'Not all eigenvalues wanted are found'
          WRITE(6,*) 'number of eigenvalues/vectors wanted',num1
          WRITE(6,*) 'number of eigenvalues/vectors found',num2
          !CALL CPP_flush(6)
       ENDIF
    ENDIF
    ierr=0
    CALL BLACS_GRIDEXIT(ictxtblacs,ierr)
    IF (ierr /= 0 ) THEN
       WRITE (6,*) 'blacs_gridexit failed, ierr=',ierr
    ENDIF
    DEALLOCATE(iwork)
    DEALLOCATE(ifail)
    DEALLOCATE(iclustr)
    DEALLOCATE(gap)
    !
    !     Put those eigenvalues expected by chani to eig, i.e. for
    !     process i these are eigenvalues i+1, np+i+1, 2*np+i+1...
    !     Only num=num2/np eigenvalues per process
    !
    num=FLOOR(REAL(num2)/np)
    IF (myid.LT.num2-(num2/np)*np) num=num+1
    k=1
    DO i=myid+1,num2,np
       eig(k)=eig2(i)
       k=k+1
    ENDDO
    DEALLOCATE(eig2)
    !
    !     Redistribute eigvec from ScaLAPACK distribution to each process
    !     having all eigenvectors corresponding to his eigenvalues as above
    !
427 428
    if (hamovlp%l_real) THEN
       CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,zmat%z_r,eigvec_r)
429
       ELSE
430
       CALL subredist2(m,num2,myrowssca,SUB_COMM,nprow,npcol, myid,ierr,nb,achi_c=zmat%z_c,asca_c=eigvec_c)
431
    end if
432
    !
433
    !DEALLOCATE ( eigvec)
434 435 436 437 438 439
    DEALLOCATE ( iblacsnums )
    DEALLOCATE ( ihelp )
    DEALLOCATE ( iusermap )

  END SUBROUTINE chani
end MODULE m_chani