Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
fleur
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Anoop Chandran
fleur
Commits
dd2e78bf
Commit
dd2e78bf
authored
Oct 07, 2016
by
Daniel Wortmann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Files missing for last commit
parent
45b7d160
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
762 additions
and
0 deletions
+762
-0
diagonalization/elpa_cpp.F90
diagonalization/elpa_cpp.F90
+190
-0
diagonalization/packed_to_full.f90
diagonalization/packed_to_full.f90
+55
-0
diagonalization/zsymsecloc_cpp.F90
diagonalization/zsymsecloc_cpp.F90
+517
-0
No files found.
diagonalization/elpa_cpp.F90
0 → 100644
View file @
dd2e78bf
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
USE
m_juDFT
USE
elpa1
#ifdef CPP_ELPA2
USE
elpa2
#endif
IMPLICIT
NONE
INCLUDE
'mpif.h'
INTEGER
,
INTENT
(
IN
)
::
m
,
n
INTEGER
,
INTENT
(
IN
)
::
SUB_COMM
INTEGER
,
INTENT
(
INOUT
)
::
num
REAL
,
INTENT
(
OUT
)
::
eig
(:)
CPP_REALCOMPLEX
,
ALLOCATABLE
,
INTENT
(
INOUT
)
::
a
(:),
b
(:)
CPP_REALCOMPLEX
,
INTENT
(
OUT
)
::
z
(:,:)
!
#include "elpa_cpp.F90"
END
SUBROUTINE
elpa_r
!... Local variables
!
INTEGER
::
sc_desc
(
9
)
!SCALAPACK DESCRIPTOR
INTEGER
::
np
,
nb
,
myid
,
npcol
,
nprow
,
mycol
,
myrow
,
myrowssca
INTEGER
::
mycolssca
,
ictextblacs
,
mpi_comm_rows
INTEGER
::
mpi_comm_cols
INTEGER
::
num2
LOGICAL
::
ok
INTEGER
::
err
,
ierr
INTEGER
::
i
,
k
,
n_col
,
n_row
! large distributed Matrices
REAL
,
ALLOCATABLE
::
eig2
(:)
CPP_REALCOMPLEX
,
ALLOCATABLE
::
asca
(:,:),
bsca
(:,:),
tmp2
(:,:)
CPP_REALCOMPLEX
,
ALLOCATABLE
::
eigvec
(:,:)
INTEGER
,
EXTERNAL
::
numroc
,
indxl2g
!SCALAPACK functions
CALL
priv_create_blacsgrid
(
sub_comm
,
m
,
np
,
nb
,
myid
,
npcol
,
nprow
,
mycol
,
myrow
,
myrowssca
,
mycolssca
,&
ictextblacs
,
sc_desc
,
mpi_comm_rows
,
mpi_comm_cols
)
! Number of columns the local process gets in ScaLAPACK distribution
ALLOCATE
(
asca
(
myrowssca
,
mycolssca
),
stat
=
err
)
IF
(
err
/
=
0
)
CALL
juDFT_error
(
"allocate asca"
,
calledby
=
"chani_elpa"
)
asca
=
0.0
ALLOCATE
(
bsca
(
myrowssca
,
mycolssca
),
stat
=
err
)
IF
(
err
/
=
0
)
CALL
juDFT_error
(
"allocate bsca"
,
calledby
=
"chani_elpa"
)
bsca
=
0.0
!
! transfer to ScaLAPACK block cyclic 2d distribution with
! block-size nb
!
!print *,"Before subredist"
CALL
subredist1
(
a
,
m
,
asca
,
myrowssca
,
SUB_COMM
,
nprow
,
npcol
,
myid
,
ierr
,
nb
)
CALL
subredist1
(
b
,
m
,
bsca
,
myrowssca
,
SUB_COMM
,
nprow
,
npcol
,
myid
,
ierr
,
nb
)
! for saving memory one might deallocate(a,b)
!print *,"Before Allocate"
ALLOCATE
(
eig2
(
m
),
stat
=
err
)
! The eigenvalue array for ScaLAPACK
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "eig2"'
,
calledby
=
'elpa'
)
ALLOCATE
(
eigvec
(
myrowssca
,
mycolssca
),
stat
=
err
)
! Eigenvectors for ScaLAPACK
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "eigvec"'
,
calledby
=
'elpa'
)
ALLOCATE
(
tmp2
(
myrowssca
,
mycolssca
),
stat
=
err
)
! tmp_array
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "tmp2"'
,
calledby
=
'elpa'
)
!print *,"Before elpa"
!ELPA -start here
! Solve generalized problem
!
! 1. Calculate Cholesky factorization of Matrix B = U**T * U
! and invert triangular matrix U
!
! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
! The only reason having them is that the Scalapack counterpart
! PDPOTRF very often fails on higher processor numbers for unknown reasons!
#ifdef CPP_ELPA_NEW
CALL
CPP_CHOLESKY
(
m
,
bsca
,
SIZE
(
bsca
,
1
),
nb
,
mycolssca
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
CALL
CPP_invert_trm
(
m
,
bsca
,
SIZE
(
bsca
,
1
),
nb
,
mycolssca
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
#else
CALL
CPP_CHOLESKY
(
m
,
bsca
,
SIZE
(
bsca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
CALL
CPP_invert_trm
(
m
,
bsca
,
SIZE
(
bsca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
#endif
! 2. Calculate U**-T * A * U**-1
! 2a. eigvec = U**-T * A
! A is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half
CALL
CPP_transpose
(
m
,
m
,
CPP_ONE
,
asca
,
1
,
1
,
sc_desc
,
CPP_ZERO
,
eigvec
,
1
,
1
,
sc_desc
)
DO
i
=
1
,
SIZE
(
asca
,
1
)
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in A
n_col
=
indxl2g
(
i
,
nb
,
mycol
,
0
,
npcol
)
n_row
=
numroc
(
n_col
,
nb
,
myrow
,
0
,
nprow
)
asca
(
n_row
+1
:
myrowssca
,
i
)
=
eigvec
(
n_row
+1
:
myrowssca
,
i
)
ENDDO
CALL
CPP_mult
(
'U'
,
'L'
,
m
,
m
,
bsca
,
myrowssca
,
asca
,
SIZE
(
asca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
eigvec
,
myrowssca
)
! 2b. tmp2 = eigvec**T
CALL
CPP_transpose
(
m
,
m
,
CPP_ONE
,
eigvec
,
1
,
1
,
sc_desc
,
CPP_ZERO
,
tmp2
,
1
,
1
,
sc_desc
)
! 2c. A = U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
CALL
CPP_mult
(
'U'
,
'U'
,
m
,
m
,
bsca
,
SIZE
(
bsca
,
1
),
tmp2
,&
SIZE
(
tmp2
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
asca
,
SIZE
(
asca
,
1
))
! A is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half
CALL
CPP_transpose
(
m
,
m
,
CPP_ONE
,
asca
,
1
,
1
,
sc_desc
,
CPP_ZERO
,
eigvec
,
1
,
1
,
sc_desc
)
DO
i
=
1
,
SIZE
(
asca
,
1
)
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in A
n_col
=
indxl2g
(
i
,
nb
,
mycol
,
0
,
npcol
)
n_row
=
numroc
(
n_col
,
nb
,
myrow
,
0
,
nprow
)
asca
(
n_row
+1
:
myrowssca
,
i
)
=
eigvec
(
n_row
+1
:
myrowssca
,
i
)
ENDDO
! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
! Eigenvectors go to eigvec
num2
=
num
#ifdef CPP_ELPA_NEW
#ifdef CPP_ELPA2
err
=
CPP_solve_evp_2stage
(
m
,
num2
,
asca
,
SIZE
(
asca
,
1
),&
eig2
,
eigvec
,
SIZE
(
asca
,
1
),
nb
,
mycolssca
,
mpi_comm_rows
,
mpi_comm_cols
,
sub_comm
)
#else
err
=
CPP_solve_evp
(
m
,
num2
,
asca
,
SIZE
(
asca
,
1
),&
eig2
,
eigvec
,
SIZE
(
asca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
#endif
#else
#ifdef CPP_ELPA2
CALL
CPP_solve_evp_2stage
(
m
,&
num2
,
asca
,
SIZE
(
asca
,
1
),
eig2
,
eigvec
,
SIZE
(
asca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
sub_comm
)
#else
CALL
CPP_solve_evp
(
m
,
num2
,&
asca
,
SIZE
(
asca
,
1
),
eig2
,
eigvec
,
SIZE
(
asca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
#endif
#endif
! 4. Backtransform eigenvectors: Z = U**-1 * eigvec
! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
CALL
CPP_transpose
(
m
,
m
,
CPP_ONE
,
bsca
,
1
,
1
,
sc_desc
,
CPP_ZERO
,
tmp2
,
1
,
1
,
sc_desc
)
CALL
CPP_mult
(
'L'
,
'N'
,
m
,
num2
,
tmp2
,
SIZE
(
asca
,
1
),&
eigvec
,
SIZE
(
asca
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
asca
,
SIZE
(
asca
,
1
))
! END of ELPA stuff
CALL
BLACS_GRIDEXIT
(
ictextblacs
,
ierr
)
CALL
MPI_COMM_FREE
(
mpi_comm_rows
,
ierr
)
CALL
MPI_COMM_FREE
(
mpi_comm_cols
,
ierr
)
!print *,"elpa done"
!
! Put those eigenvalues expected by chani to eig, i.e. for
! process i these are eigenvalues i+1, np+i+1, 2*np+i+1...
! Only num=num2/np eigenvalues per process
!
num
=
FLOOR
(
REAL
(
num2
)/
np
)
IF
(
myid
.LT.
num2
-
(
num2
/
np
)
*
np
)
num
=
num
+1
k
=
1
DO
i
=
myid
+1
,
num2
,
np
eig
(
k
)
=
eig2
(
i
)
k
=
k
+1
ENDDO
!write(*,"(a,i5,99f10.4)") "ELPA:",myid,eig
!
! Redistribute eigvec from ScaLAPACK distribution to each process
! having all eigenvectors corresponding to his eigenvalues as above
!
CALL
subredist2
(
z
,
m
,
num2
,
asca
,
myrowssca
,
SUB_COMM
,
nprow
,
npcol
,
myid
,
ierr
,
nb
)
!
DEALLOCATE
(
asca
)
DEALLOCATE
(
bsca
)
DEALLOCATE
(
eigvec
,
tmp2
,
eig2
)
diagonalization/packed_to_full.f90
0 → 100644
View file @
dd2e78bf
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Gruenberg Institut, Forschungszentrum Juelich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE
m_packed_to_full
! Contains a service routine to expand a packed storage matrix to full storage
! A real and complex version is provided with a common interface
use
m_juDFT
implicit
none
private
interface
packed_to_full
module
procedure
packed_to_full_r
,
packed_to_full_c
end
interface
packed_to_full
public
packed_to_full
contains
subroutine
packed_to_full_r
(
n
,
packed
,
full
)
integer
,
intent
(
in
)
::
n
real
,
intent
(
in
)
::
packed
(:)
real
,
allocatable
,
intent
(
out
)
::
full
(:,:)
integer
::
i
,
err
,
i1
,
i2
ALLOCATE
(
full
(
n
,
n
),
stat
=
err
)
if
(
err
/
=
0
)
call
judft_error
(
"Allocation of full matrix failed"
,
calledby
=
"packed_to_full"
)
i
=
0
DO
i1
=
1
,
n
DO
i2
=
1
,
i1
i
=
i
+1
full
(
i2
,
i1
)
=
packed
(
i
)
full
(
i1
,
i2
)
=
packed
(
i
)
ENDDO
ENDDO
end
subroutine
packed_to_full_r
subroutine
packed_to_full_c
(
n
,
packed
,
full
)
integer
,
intent
(
in
)
::
n
complex
,
intent
(
in
)
::
packed
(:)
complex
,
allocatable
,
intent
(
out
)
::
full
(:,:)
integer
::
i
,
err
,
i1
,
i2
ALLOCATE
(
full
(
n
,
n
),
stat
=
err
)
if
(
err
/
=
0
)
call
judft_error
(
"Allocation of full matrix failed"
,
calledby
=
"packed_to_full"
)
i
=
0
DO
i1
=
1
,
n
DO
i2
=
1
,
i1
i
=
i
+1
full
(
i2
,
i1
)
=
packed
(
i
)
full
(
i1
,
i2
)
=
conjg
(
packed
(
i
))
ENDDO
ENDDO
end
subroutine
packed_to_full_c
end
MODULE
m_packed_to_full
diagonalization/zsymsecloc_cpp.F90
0 → 100644
View file @
dd2e78bf
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment