Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
fleur
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
51
Issues
51
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
fleur
fleur
Commits
4219fe5e
Commit
4219fe5e
authored
Jun 14, 2018
by
Uliana Alekseeva
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ELPA interface adjusted
parent
1a6d1755
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
447 additions
and
182 deletions
+447
-182
diagonalization/eigen_diag.F90
diagonalization/eigen_diag.F90
+1
-1
diagonalization/elpa.F90
diagonalization/elpa.F90
+446
-181
No files found.
diagonalization/eigen_diag.F90
View file @
4219fe5e
...
...
@@ -72,7 +72,7 @@ CONTAINS
!Select the solver
SELECT
CASE
(
priv_select_solver
(
parallel
))
CASE
(
diag_elpa
)
!CALL elpa
(hmat,smat,ne,eig,ev)
CALL
elpa_diag
(
hmat
,
smat
,
ne
,
eig
,
ev
)
CASE
(
diag_elemental
)
!CALL ELEMENTAL(hmat,smat,ne,eig,ev)
CASE
(
diag_scalapack
)
...
...
diagonalization/elpa.F90
View file @
4219fe5e
!-------------------------------------------------------------------------------
-
!-------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE
m_elpa
PRIVATE
#ifdef CPP_ELPA
interface
elpa_diag
module
procedure
elpa_r
,
elpa_c
end
interface
elpa_diag
!Module to call elpa library for parallel diagonalization
!uses ssubredist1/2 for redistribution
PUBLIC
elpa_diag
MODULE
m_elpa
CONTAINS
! First the real version of the code
#define CPP_CHOLESKY cholesky_real
#define CPP_invert_trm invert_trm_real
#define CPP_solve_evp_1stage solve_evp_real_1stage
#define CPP_solve_evp solve_evp_real
#define CPP_solve_evp_2stage solve_evp_real_2stage
#define CPP_REALCOMPLEX real
#define CPP_transpose pdtran
#define CPP_ONE 1.0
#define CPP_ZERO 0.0
#ifdef CPP_ELPA_201605004
#define CPP_mult elpa_mult_at_b_real
#else
#define CPP_mult mult_at_b_real
#endif
#define CPP_REAL
SUBROUTINE
elpa_r
(
m
,
SUB_COMM
,
a
,
b
,
z
,
eig
,
num
)
!
!----------------------------------------------------
!- Parallel eigensystem solver - driver routine based on chani; dw'12
!
! m ........ actual (=leading) dimension of full a & b matrices
! must be problem size, as input a, b are one-dimensional
! and shall be redistributed to two-dimensional matrices
! actual (=leading) dimension of eigenvector z(,)
! n ........ number of columns of full (sub)matrix ( about n/np)
! SUB_COMM.. communicator for MPI
! a,b .... packed (sub)matrices, here expanded to non-packed
! z,eig .... eigenvectors and values, output
! num ...... number of ev's searched (and found) on this node
! On input, overall number of ev's searched,
! On output, local number of ev's found
!
!----------------------------------------------------
SUBROUTINE
elpa_diag
(
hmat
,
smat
,
ne
,
eig
,
ev
)
!
#include "elpa_cpp.F90"
END
SUBROUTINE
elpa_r
! Now the complex version
#define CPP_CHOLESKY cholesky_complex
#define CPP_invert_trm invert_trm_complex
#define CPP_solve_evp_1stage solve_evp_complex_1stage
#define CPP_solve_evp solve_evp_complex
#define CPP_solve_evp_2stage solve_evp_complex_2stage
#define CPP_REALCOMPLEX complex
#define CPP_transpose pztranc
#define CPP_ONE cmplx(1.,0.)
#define CPP_ZERO cmplx(0.,0.)
#ifdef CPP_ELPA_201605004
#define CPP_mult mult_ah_b_complex
#else
#define CPP_mult mult_ah_b_complex
#endif
#undef CPP_REAL
SUBROUTINE
elpa_c
(
m
,
SUB_COMM
,
a
,
b
,
z
,
eig
,
num
)
!
!----------------------------------------------------
!- Parallel eigensystem solver - driver routine based on chani; dw'12
! Uses the ELPA for the actual diagonalization
!
! m ........ actual (=leading) dimension of full a & b matrices
! must be problem size, as input a, b are one-dimensional
! and shall be redistributed to two-dimensional matrices
! actual (=leading) dimension of eigenvector z(,)
! n ........ number of columns of full (sub)matrix ( about n/np)
! SUB_COMM.. communicator for MPI
! a,b .... packed (sub)matrices, here expanded to non-packed
! z,eig .... eigenvectors and values, output
! num ...... number of ev's searched (and found) on this node
!
! hmat ..... Hamiltonian matrix
! smat ..... overlap matrix
! ne ....... number of ev's searched (and found) on this node
! On input, overall number of ev's searched,
! On output, local number of ev's found
! eig ...... eigenvalues, output
! ev ....... eigenvectors, output
!
!----------------------------------------------------
!
#include "elpa_cpp.F90"
END
SUBROUTINE
elpa_c
SUBROUTINE
priv_create_blacsgrid
(
mpi_subcom
,
m
,
np
,
nb
,
myid
,
npcol
,
nprow
,
mycol
,&
myrow
,
myrowssca
,
mycolssca
,
ictextblacs
,
sc_desc
,
mpi_comm_rows
,
mpi_comm_cols
)
#include"cpp_double.h"
USE
m_juDFT
USE
m_types_mpimat
USE
m_types
USE
elpa1
#ifdef CPP_ELPA2
USE
elpa2
#endif
#ifdef CPP_ELPA_201705003
USE
elpa
#endif
IMPLICIT
NONE
CLASS
(
t_mat
),
INTENT
(
INOUT
)
::
hmat
,
smat
CLASS
(
t_mat
),
ALLOCATABLE
,
INTENT
(
OUT
)::
ev
REAL
,
INTENT
(
out
)
::
eig
(:)
INTEGER
,
INTENT
(
INOUT
)
::
ne
#ifdef CPP_ELPA
INCLUDE
'mpif.h'
INTEGER
,
INTENT
(
IN
)
::
mpi_subcom
INTEGER
,
INTENT
(
IN
)
::
m
INTEGER
,
INTENT
(
OUT
)::
np
,
nb
,
npcol
,
nprow
,
myrowssca
,
mycolssca
INTEGER
,
INTENT
(
OUT
)::
myrow
,
mycol
,
myid
INTEGER
,
INTENT
(
OUT
)::
ictextblacs
,
sc_desc
(:)
INTEGER
,
INTENT
(
OUT
)::
mpi_comm_rows
,
mpi_comm_cols
!... Local variables
!
INTEGER
::
num
,
num2
,
myrow
,
mycol
INTEGER
::
nb
,
myid
,
np
INTEGER
::
n_col
,
n_row
LOGICAL
::
ok
INTEGER
::
err
INTEGER
::
mpi_comm_rows
,
mpi_comm_cols
INTEGER
::
i
,
k
! large distributed Matrices
REAL
,
ALLOCATABLE
::
eig2
(:)
TYPE
(
t_mpimat
)
::
ev_dist
REAL
,
ALLOCATABLE
::
tmp2_r
(:,:)
COMPLEX
,
ALLOCATABLE
::
tmp2_c
(:,:)
INTEGER
,
EXTERNAL
::
numroc
,
indxl2g
!SCALAPACK functions
#ifdef CPP_ELPA_201705003
INTEGER
::
kernel
CLASS
(
elpa_t
),
pointer
::
elpa_obj
INTEGER
::
iamblacs
,
npblacs
INTEGER
::
nprow2
,
npcol2
,
myrowblacs
,
mycolblacs
INTEGER
::
k
,
i
,
j
INTEGER
::
ierr
err
=
elpa_init
(
20170403
)
elpa_obj
=>
elpa_allocate
()
#endif
INTEGER
,
ALLOCATABLE
::
iblacsnums
(:),
ihelp
(:),
iusermap
(:,:)
SELECT
TYPE
(
hmat
)
TYPE
IS
(
t_mpimat
)
SELECT
TYPE
(
smat
)
TYPE
IS
(
t_mpimat
)
EXTERNAL
descinit
,
blacs_get
EXTERNAL
blacs_pinfo
,
blacs_gridinit
CALL
MPI_BARRIER
(
hmat
%
mpi_com
,
err
)
CALL
MPI_COMM_RANK
(
hmat
%
mpi_com
,
myid
,
err
)
CALL
MPI_COMM_SIZE
(
hmat
%
mpi_com
,
np
,
err
)
myrow
=
myid
/
hmat
%
npcol
mycol
=
myid
-
(
myid
/
hmat
%
npcol
)
*
hmat
%
npcol
!Determine rank and no of processors
CALL
MPI_COMM_RANK
(
mpi_subcom
,
myid
,
ierr
)
CALL
MPI_COMM_SIZE
(
mpi_subcom
,
np
,
ierr
)
!print *,"priv_create_blacsgrid"
! determine block size
!
nb
=
64
!IF (m.GT.2048) nb = 30 !2
!IF (m.GT.8*2048) nb = 60 !4
! compute processor grid, as square as possible
! If not square with more rows than columns
distloop
:
DO
j
=
INT
(
SQRT
(
REAL
(
np
))),
1
,
-1
IF
(
(
np
/
j
)
*
j
==
np
)
THEN
npcol
=
np
/
j
nprow
=
j
EXIT
distloop
ENDIF
ENDDO
distloop
ALLOCATE
(
iblacsnums
(
np
),
ihelp
(
np
),
iusermap
(
nprow
,
npcol
))
! An nprow*npcol processor grid will be created
! Row and column index myrow, mycol of this processor in the grid
! and distribution of A and B in ScaLAPACK
! The local processor will get myrowssca rows and mycolssca columns
! of A and B
!
!Create communicators for ELPA
#if defined (CPP_ELPA_201705003)
mpi_comm_rows
=
-1
mpi_comm_cols
=
-1
#elif defined (CPP_ELPA_201605004) || defined (CPP_ELPA_201605003)||defined(CPP_ELPA_NEW)
err
=
get_elpa_row_col_comms
(
hmat
%
mpi_com
,
myrow
,
mycol
,
mpi_comm_rows
,
mpi_comm_cols
)
#else
CALL
get_elpa_row_col_comms
(
hmat
%
mpi_com
,
myrow
,
mycol
,
mpi_comm_rows
,
mpi_comm_cols
)
#endif
!print *,"creating ELPA comms -- done"
num2
=
ne
!no of states solved for
ALLOCATE
(
eig2
(
hmat
%
global_size1
),
stat
=
err
)
! The eigenvalue array for ScaLAPACK
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "eig2"'
,
calledby
=
'elpa'
)
CALL
ev_dist
%
init
(
hmat
%
l_real
,
hmat
%
global_size1
,
hmat
%
global_size2
,
hmat
%
mpi_com
,
.TRUE.
)
! Eigenvectors for ScaLAPACK
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "ev_dist"'
,
calledby
=
'elpa'
)
IF
(
hmat
%
l_real
)
THEN
ALLOCATE
(
tmp2_r
(
hmat
%
matsize1
,
hmat
%
matsize2
),
stat
=
err
)
! tmp_array
ELSE
ALLOCATE
(
tmp2_c
(
hmat
%
matsize1
,
hmat
%
matsize2
),
stat
=
err
)
! tmp_array
ENDIF
IF
(
err
.NE.
0
)
CALL
juDFT_error
(
'Failed to allocated "tmp2"'
,
calledby
=
'elpa'
)
smat
%
blacs_desc
=
hmat
%
blacs_desc
ev_dist
%
blacs_desc
=
hmat
%
blacs_desc
nb
=
hmat
%
blacs_desc
(
5
)
! Blocking factor
IF
(
nb
.NE.
hmat
%
blacs_desc
(
6
))
CALL
judft_error
(
"Different block sizes for rows/columns not supported"
)
#ifdef CPP_ELPA_201705003
CALL
elpa_obj
%
set
(
"na"
,
hmat
%
global_size1
,
err
)
CALL
elpa_obj
%
set
(
"nev"
,
num2
,
err
)
CALL
elpa_obj
%
set
(
"local_nrows"
,
hmat
%
matsize1
,
err
)
CALL
elpa_obj
%
set
(
"local_ncols"
,
hmat
%
matsize2
,
err
)
CALL
elpa_obj
%
set
(
"nblk"
,
nb
,
err
)
CALL
elpa_obj
%
set
(
"mpi_comm_parent"
,
hmat
%
mpi_com
,
err
)
CALL
elpa_obj
%
set
(
"process_row"
,
myrow
,
err
)
CALL
elpa_obj
%
set
(
"process_col"
,
mycol
,
err
)
#ifdef CPP_ELPA2
CALL
elpa_obj
%
set
(
"solver"
,
ELPA_SOLVER_2STAGE
)
#else
CALL
elpa_obj
%
set
(
"solver"
,
ELPA_SOLVER_1STAGE
)
#endif
err
=
elpa_obj
%
setup
()
if
(
myid
==
0
)
then
call
elpa_obj
%
get
(
"complex_kernel"
,
kernel
)
print
*
,
"elpa uses "
//
elpa_int_value_to_string
(
"complex_kernel"
,
kernel
)
//
" kernel"
endif
#endif
!print *,"Before elpa"
myrow
=
myid
/
npcol
! my row number in the BLACS nprow*npcol grid
mycol
=
myid
-
(
myid
/
npcol
)
*
npcol
! my column number in the BLACS nprow*npcol grid
!ELPA -start here
! Solive generalized preblem
!
! Now allocate Asca to put the elements of Achi or receivebuffer to
! 1. Calculate Cholesky factorization of Matrix S = U**T * U
! and invert triangular matrix U
!
myrowssca
=
(
m
-1
)/(
nb
*
nprow
)
*
nb
+
MIN
(
MAX
(
m
-
(
m
-1
)/(
nb
*
nprow
)
*
nb
*
nprow
-
nb
*
myrow
,
0
),
nb
)
! Number of rows the local process gets in ScaLAPACK distribution
mycolssca
=
(
m
-1
)/(
nb
*
npcol
)
*
nb
+
MIN
(
MAX
(
m
-
(
m
-1
)/(
nb
*
npcol
)
*
nb
*
npcol
-
nb
*
mycol
,
0
),
nb
)
!Get BLACS ranks for all MPI ranks
CALL
BLACS_PINFO
(
iamblacs
,
npblacs
)
! iamblacs = local process rank (e.g. myid)
! npblacs = number of available processes
iblacsnums
=
-2
ihelp
=
-2
ihelp
(
myid
+1
)
=
iamblacs
! Get the Blacs id corresponding to the MPI id
!print *,"ALLREDUCE:",mpi_subcom
CALL
MPI_ALLREDUCE
(
ihelp
,
iblacsnums
,
np
,
MPI_INTEGER
,
MPI_MAX
,
mpi_subcom
,
ierr
)
IF
(
ierr
.NE.
0
)
CALL
juDFT_error
(
'Error in allreduce for BLACS nums'
,
calledby
=
'elpa'
)
! iblacsnums(i) is the BLACS-process number of MPI-process i-1
k
=
1
DO
i
=
1
,
nprow
DO
j
=
1
,
npcol
iusermap
(
i
,
j
)
=
iblacsnums
(
k
)
k
=
k
+
1
ENDDO
! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
! The only reason having them is that the Scalapack counterpart
! PDPOTRF very often fails on higher processor numbers for unknown reasons!
#if defined(CPP_ELPA_201705003)
IF
(
hmat
%
l_real
)
THEN
CALL
elpa_obj
%
cholesky
(
smat
%
data_r
,
err
)
CALL
elpa_obj
%
invert_triangular
(
smat
%
data_r
,
err
)
ELSE
CALL
elpa_obj
%
cholesky
(
smat
%
data_c
,
err
)
CALL
elpa_obj
%
invert_triangular
(
smat
%
data_c
,
err
)
ENDIF
#elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
IF
(
hmat
%
l_real
)
THEN
ok
=
cholesky_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ok
=
invert_trm_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ELSE
ok
=
cholesky_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ok
=
invert_trm_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ENDIF
#elif defined CPP_ELPA_NEW
IF
(
hmat
%
l_real
)
THEN
CALL
cholesky_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
CALL
invert_trm_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
ELSE
CALL
cholesky_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
CALL
invert_trm_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
,
ok
)
ENDIF
#else
IF
(
hmat
%
l_real
)
THEN
CALL
cholesky_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
CALL
invert_trm_real
(
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ELSE
CALL
cholesky_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
CALL
invert_trm_complex
(
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
nb
,
smat
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
.false.
)
ENDIF
#endif
! 2. Calculate U**-T * H * U**-1
! 2a. ev_dist = U**-T * H
! H is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half
DO
i
=
1
,
hmat
%
matsize2
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in H
n_col
=
indxl2g
(
i
,
nb
,
mycol
,
0
,
hmat
%
npcol
)
n_row
=
numroc
(
n_col
,
nb
,
myrow
,
0
,
hmat
%
nprow
)
IF
(
hmat
%
l_real
)
THEN
hmat
%
data_r
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
0.d0
ELSE
hmat
%
data_c
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
0.d0
ENDIF
ENDDO
!Get the Blacs default context
CALL
BLACS_GET
(
0
,
0
,
ictextblacs
)
! Create the Grid
CALL
BLACS_GRIDMAP
(
ictextblacs
,
iusermap
,
nprow
,
nprow
,
npcol
)
! Now control, whether the BLACS grid is the one we wanted
CALL
BLACS_GRIDINFO
(
ictextblacs
,
nprow2
,
npcol2
,
myrowblacs
,
mycolblacs
)
IF
(
nprow2
/
=
nprow
)
THEN
WRITE
(
6
,
*
)
'Wrong number of rows in BLACS grid'
WRITE
(
6
,
*
)
'nprow='
,
nprow
,
' nprow2='
,
nprow2
CALL
juDFT_error
(
'Wrong number of rows in BLACS grid'
,
calledby
=
'elpa'
)
IF
(
hmat
%
l_real
)
THEN
CALL
pdtran
(
hmat
%
global_size1
,
hmat
%
global_size1
,
1.d0
,
hmat
%
data_r
,
1
,
1
,&
hmat
%
blacs_desc
,
0.d0
,
ev_dist
%
data_r
,
1
,
1
,
ev_dist
%
blacs_desc
)
ELSE
CALL
pztranc
(
hmat
%
global_size1
,
hmat
%
global_size2
,
cmplx
(
1.d0
,
0.d0
),
hmat
%
data_c
,
1
,
1
,&
hmat
%
blacs_desc
,
cmplx
(
0.d0
,
0.d0
),
ev_dist
%
data_c
,
1
,
1
,
ev_dist
%
blacs_desc
)
ENDIF
IF
(
npcol2
/
=
npcol
)
THEN
WRITE
(
6
,
*
)
'Wrong number of columns in BLACS grid'
WRITE
(
6
,
*
)
'npcol='
,
npcol
,
' npcol2='
,
npcol2
CALL
juDFT_error
(
'Wrong number of columns in BLACS grid'
,
calledby
=
'elpa'
)
DO
i
=
1
,
hmat
%
matsize2
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in H
n_col
=
indxl2g
(
i
,
nb
,
mycol
,
0
,
hmat
%
npcol
)
n_row
=
numroc
(
n_col
,
nb
,
myrow
,
0
,
hmat
%
nprow
)
IF
(
hmat
%
l_real
)
THEN
hmat
%
data_r
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
ev_dist
%
data_r
(
n_row
+1
:
ev_dist
%
matsize1
,
i
)
ELSE
hmat
%
data_c
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
ev_dist
%
data_c
(
n_row
+1
:
ev_dist
%
matsize1
,
i
)
ENDIF
ENDDO
#if defined (CPP_ELPA_201705003)
IF
(
hmat
%
l_real
)
THEN
CALL
elpa_obj
%
hermitian_multiply
(
'U'
,
'L'
,
hmat
%
global_size1
,
smat
%
data_r
,
hmat
%
data_r
,&
smat
%
matsize1
,
smat
%
matsize2
,
ev_dist
%
data_r
,
hmat
%
matsize1
,
hmat
%
matsize2
,
err
)
ELSE
CALL
elpa_obj
%
hermitian_multiply
(
'U'
,
'L'
,
hmat
%
global_size1
,
smat
%
data_c
,
hmat
%
data_c
,&
smat
%
matsize1
,
smat
%
matsize2
,
ev_dist
%
data_c
,
hmat
%
matsize1
,
hmat
%
matsize2
,
err
)
ENDIF
#elif defined (CPP_ELPA_201605004)
IF
(
hmat
%
l_real
)
THEN
ok
=
elpa_mult_at_b_real
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
smat
%
matsize2
,&
hmat
%
data_r
,
hmat
%
matsize1
,
hmat
%
matsize2
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,&
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
ev_dist
%
matsize2
)
ELSE
ok
=
mult_ah_b_complex
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
smat
%
matsize2
,&
hmat
%
data_c
,
hmat
%
matsize1
,
hmat
%
matsize2
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,&
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
ev_dist
%
matsize2
)
ENDIF
#elif defined (CPP_ELPA_201605003)
IF
(
hmat
%
l_real
)
THEN
ok
=
mult_at_b_real
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,&
hmat
%
data_r
,
hmat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
)
ELSE
ok
=
mult_ah_b_complex
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,&
hmat
%
data_c
,
hmat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
)
ENDIF
#else
IF
(
hmat
%
l_real
)
THEN
CALL
mult_at_b_real
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,&
hmat
%
data_r
,
hmat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
)
ELSE
CALL
mult_ah_b_complex
(
'U'
,
'L'
,
hmat
%
global_size1
,
hmat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,&
hmat
%
data_c
,
hmat
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
)
ENDIF
#endif
!Create communicators for ELPA
!print *,"creating ELPA comms"
! 2b. tmp2 = eigvec**T
IF
(
hmat
%
l_real
)
THEN
CALL
pdtran
(
ev_dist
%
global_size1
,
ev_dist
%
global_size1
,
1.d0
,
ev_dist
%
data_r
,
1
,
1
,&
ev_dist
%
blacs_desc
,
0.d0
,
tmp2_r
,
1
,
1
,
ev_dist
%
blacs_desc
)
ELSE
CALL
pztranc
(
ev_dist
%
global_size1
,
ev_dist
%
global_size1
,
cmplx
(
1.0
,
0.0
),
ev_dist
%
data_c
,
1
,
1
,&
ev_dist
%
blacs_desc
,
cmplx
(
0.d0
,
0.d0
),
tmp2_c
,
1
,
1
,
ev_dist
%
blacs_desc
)
ENDIF
! 2c. A = U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
#if defined (CPP_ELPA_201705003)
mpi_comm_rows
=
-1
mpi_comm_cols
=
-1
#elif defined (CPP_ELPA_201605004) || defined (CPP_ELPA_201605003)||defined(CPP_ELPA_NEW)
ierr
=
get_elpa_row_col_comms
(
mpi_subcom
,
myrowblacs
,
mycolblacs
,
mpi_comm_rows
,
mpi_comm_cols
)
IF
(
hmat
%
l_real
)
THEN
CALL
elpa_obj
%
hermitian_multiply
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
data_r
,
tmp2_r
,&
smat
%
matsize1
,
smat
%
matsize2
,
hmat
%
data_r
,
hmat
%
matsize1
,
hmat
%
matsize2
,
err
)
ELSE
CALL
elpa_obj
%
hermitian_multiply
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
data_c
,
tmp2_c
,&
smat
%
matsize1
,
smat
%
matsize2
,
hmat
%
data_c
,
hmat
%
matsize1
,
hmat
%
matsize2
,
err
)
ENDIF
#elif defined (CPP_ELPA_201605004)
IF
(
hmat
%
l_real
)
THEN
ok
=
elpa_mult_at_b_real
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,
smat
%
matsize2
,&
tmp2_r
,
SIZE
(
tmp2_r
,
1
),
SIZE
(
tmp2_r
,
2
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,&
hmat
%
data_r
,
hmat
%
matsize1
,
hmat
%
matsize2
)
ELSE
ok
=
mult_ah_b_complex
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,
smat
%
matsize2
,&
tmp2_c
,
SIZE
(
tmp2_c
,
1
),
SIZE
(
tmp2_c
,
2
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,&
hmat
%
data_c
,
hmat
%
matsize1
,
hmat
%
matsize2
)
ENDIF
#elif defined (CPP_ELPA_201605003)
IF
(
hmat
%
l_real
)
THEN
ok
=
mult_at_b_real
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,&
tmp2_r
,
SIZE
(
tmp2_r
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
data_r
,
hmat
%
matsize1
)
ELSE
ok
=
mult_ah_b_complex
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,&
tmp2_c
,
SIZE
(
tmp2_c
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
data_c
,
hmat
%
matsize1
)
ENDIF
#else
CALL
get_elpa_row_col_comms
(
mpi_subcom
,
myrowblacs
,
mycolblacs
,
mpi_comm_rows
,
mpi_comm_cols
)
IF
(
hmat
%
l_real
)
THEN
CALL
mult_at_b_real
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_r
,
smat
%
matsize1
,&
tmp2_r
,
SIZE
(
tmp2_r
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
data_r
,
hmat
%
matsize1
)
ELSE
CALL
mult_ah_b_complex
(
'U'
,
'U'
,
smat
%
global_size1
,
smat
%
global_size1
,
smat
%
data_c
,
smat
%
matsize1
,&
tmp2_c
,
SIZE
(
tmp2_c
,
1
),
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
data_c
,
hmat
%
matsize1
)
ENDIF
#endif
!print *,"creating ELPA comms -- done"
!Create the descriptors
CALL
descinit
(
sc_desc
,
m
,
m
,
nb
,
nb
,
0
,
0
,
ictextblacs
,
myrowssca
,
ierr
)
IF
(
ierr
/
=
0
)
CALL
juDFT_error
(
'descinit1 failed'
,
calledby
=
'elpa'
)
! A is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half
IF
(
hmat
%
l_real
)
THEN
CALL
pdtran
(
hmat
%
global_size1
,
hmat
%
global_size1
,
1.d0
,
hmat
%
data_r
,
1
,
1
,&
hmat
%
blacs_desc
,
0.d0
,
ev_dist
%
data_r
,
1
,
1
,
ev_dist
%
blacs_desc
)
ELSE
CALL
pztranc
(
hmat
%
global_size1
,
hmat
%
global_size1
,
cmplx
(
1.0
,
0.0
),
hmat
%
data_c
,
1
,
1
,&
hmat
%
blacs_desc
,
cmplx
(
0.d0
,
0.d0
),
ev_dist
%
data_c
,
1
,
1
,
ev_dist
%
blacs_desc
)
ENDIF
DO
i
=
1
,
hmat
%
matsize2
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in A
n_col
=
indxl2g
(
i
,
nb
,
mycol
,
0
,
hmat
%
npcol
)
n_row
=
numroc
(
n_col
,
nb
,
myrow
,
0
,
hmat
%
nprow
)
IF
(
hmat
%
l_real
)
THEN
hmat
%
data_r
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
ev_dist
%
data_r
(
n_row
+1
:
ev_dist
%
matsize1
,
i
)
ELSE
hmat
%
data_c
(
n_row
+1
:
hmat
%
matsize1
,
i
)
=
ev_dist
%
data_c
(
n_row
+1
:
ev_dist
%
matsize1
,
i
)
ENDIF
ENDDO
END
SUBROUTINE
priv_create_blacsgrid
! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
! Eigenvectors go to eigvec
#if defined (CPP_ELPA_201705003)
IF
(
hmat
%
l_real
)
THEN
CALL
elpa_obj
%
eigenvectors
(
hmat
%
data_r
,
eig2
,
ev_dist
%
data_r
,
err
)
ELSE
CALL
elpa_obj
%
eigenvectors
(
hmat
%
data_c
,
eig2
,
ev_dist
%
data_c
,
err
)
ENDIF
#elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
#ifdef CPP_ELPA2
IF
(
hmat
%
l_real
)
THEN
ok
=
solve_evp_real_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ELSE
ok
=
solve_evp_complex_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ENDIF
#else
LOGICAL
::
elpa_used
=
.false.
IF
(
hmat
%
l_real
)
THEN
ok
=
solve_evp_real_1stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
)
ELSE
ok
=
solve_evp_complex_1stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
)
ENDIF
#endif
end
MODULE
m_elpa
#elif defined CPP_ELPA_NEW
#ifdef CPP_ELPA2
IF
(
hmat
%
l_real
)
THEN
err
=
solve_evp_real_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ELSE
err
=
solve_evp_complex_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ENDIF
#else
IF
(
hmat
%
l_real
)
THEN
err
=
solve_evp_real_1stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
)
ELSE
err
=
solve_evp_complex_1stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
ev_dist
%
matsize2
,
mpi_comm_rows
,
mpi_comm_cols
)
ENDIF
#endif
#else
#ifdef CPP_ELPA2
IF
(
hmat
%
l_real
)
THEN
CALL
solve_evp_real_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ELSE
CALL
solve_evp_complex_2stage
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
,
hmat
%
mpi_com
)
ENDIF
#else
IF
(
hmat
%
l_real
)
THEN
CALL
solve_evp_real
(
hmat
%
global_size1
,
num2
,
hmat
%
data_r
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_r
,
ev_dist
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
ELSE
CALL
solve_evp_complex
(
hmat
%
global_size1
,
num2
,
hmat
%
data_c
,
hmat
%
matsize1
,&
eig2
,
ev_dist
%
data_c
,
ev_dist
%
matsize1
,
nb
,
mpi_comm_rows
,
mpi_comm_cols
)
ENDIF
#endif
#endif
! 4. Backtransform eigenvectors: Z = U**-1 * eigvec
! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
IF
(
hmat
%
l_real
)
THEN
CALL
pdtran
(
smat
%
global_size1
,
smat
%
global_size1
,
1.d0
,
smat
%
data_r
,
1
,
1
,&
smat
%
blacs_desc
,
0.d0
,
tmp2_r
,
1
,
1
,
smat
%
blacs_desc
)
ELSE
CALL
pztranc
(
smat
%
global_size1
,
smat
%
global_size1
,
cmplx
(
1.d0
,
0.d0
),
smat
%
data_c
,
1
,
1
,&
smat
%
blacs_desc
,
cmplx
(
0.d0
,
0.d0
),
tmp2_c
,
1
,
1
,
smat
%
blacs_desc
)
ENDIF
#if defined (CPP_ELPA_201705003)
IF
(
hmat
%
l_real
)
THEN
CALL
elpa_obj
%
hermitian_multiply
(
'L'
,
'N'
,
num2
,
tmp2_r
,
ev_dist
%
data_r
,&
ev_dist
%
matsize1
,
ev_dist
%
matsize2
,
hmat
%
data_r
,
hmat
%
matsize1
,
hmat
%
matsize2
,
err
)
ELSE