Commit 443c5423 authored by Matthias Redies's avatar Matthias Redies

merge

parents 7530699d 1b2cda7e
......@@ -2,6 +2,9 @@ include_directories(include)
set(c_filesInpgen io/xml/inputSchema.h io/xml/dropInputSchema.c)
set(c_filesFleur io/xml/inputSchema.h io/xml/dropInputSchema.c io/xml/xmlInterfaceWrapper.c)
if(FLEUR_USE_GPU)
set(c_filesFleur ${c_filesFleur} diagonalization/cusolver.c)
endif()
set(fleur_F90 main/fleur.F90)
set(fleur_F77 "")
......@@ -56,7 +59,7 @@ kpoints/tetcon.f kpoints/kvecon.f init/boxdim.f global/radsra.f math/intgr.F glo
)
set(inpgen_F90 ${inpgen_F90} global/constants.f90 io/xsf_io.f90
eigen/vec_for_lo.f90 eigen/orthoglo.F90 juDFT/usage_data.F90 math/ylm4.f90
eigen/vec_for_lo.f90 eigen/orthoglo.F90 juDFT/usage_data.F90 math/ylm4.F90
global/enpara.f90 global/chkmt.f90 inpgen/inpgen.f90 inpgen/set_inp.f90 inpgen/inpgen_help.f90 io/rw_inp.f90 juDFT/juDFT.F90 global/find_enpara.f90
inpgen/closure.f90 inpgen/inpgen_arguments.F90
juDFT/info.F90 juDFT/stop.F90 juDFT/args.F90 juDFT/time.F90 juDFT/init.F90 juDFT/sysinfo.F90 io/w_inpXML.f90 kpoints/julia.f90 global/utility.F90
......
......@@ -3,9 +3,9 @@ try_compile(FLEUR_USE_ELPA ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/t
LINK_LIBRARIES ${FLEUR_LIBRARIES})
if (NOT FLEUR_USE_ELPA)
if (DEFINED CLI_ELPA_OPENMP)
set(TEST_LIBRARIES "${FLEUR_LIBRARIES};-lelpa_openmp")
set(TEST_LIBRARIES "-lelpa_openmp;${FLEUR_LIBRARIES}")
else()
set(TEST_LIBRARIES "${FLEUR_LIBRARIES};-lelpa")
set(TEST_LIBRARIES "-lelpa;${FLEUR_LIBRARIES}")
endif()
try_compile(FLEUR_USE_ELPA ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/test_ELPA.f90
LINK_LIBRARIES ${TEST_LIBRARIES})
......@@ -29,6 +29,8 @@ LINK_LIBRARIES ${FLEUR_LIBRARIES})
try_compile(FLEUR_USE_ELPA_201605004 ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/test_ELPA_201605004.f90
LINK_LIBRARIES ${FLEUR_LIBRARIES})
try_compile(FLEUR_USE_ELPA_201705003 ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/test_ELPA_201705003.f90
LINK_LIBRARIES ${FLEUR_LIBRARIES})
try_compile(FLEUR_USE_ELPA_20180525 ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/test_ELPA_20180525.f90
LINK_LIBRARIES ${FLEUR_LIBRARIES})
message("Version check for ELPA:")
message("OLD ELPA : ${FLEUR_USE_ELPA_OLD}")
......@@ -36,6 +38,7 @@ LINK_LIBRARIES ${FLEUR_LIBRARIES})
message("201605003 ELPA: ${FLEUR_USE_ELPA_201605003}")
message("201605004 ELPA: ${FLEUR_USE_ELPA_201605004}")
message("201705003 ELPA: ${FLEUR_USE_ELPA_201705003}")
message("20180525 ELPA: ${FLEUR_USE_ELPA_20180525}")
#Set preprocessor switches
if (FLEUR_USE_ELPA_OLD)
set(FLEUR_USE_ELPA TRUE)
......
program test
use elpa
implicit none
class(elpa_t),pointer :: elpa1
integer :: success,na
REAL :: H(2,2),s(2,2),eig(2),ev(2,2)
call elpa1%set("na",na,success)
CALL elpa1%generalized_eigenvectors(h,s,eig, ev, .FALSE.)
end
......@@ -8,6 +8,8 @@ if (CLI_FLEUR_USE_GPU)
message("Using cuda8")
elseif(${CLI_FLEUR_USE_GPU} MATCHES "cuda9")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcuda=cuda9.0,cc60 -Mcuda=rdc -Mcudalib=cublas -lnvToolsExt ")
elseif(${CLI_FLEUR_USE_GPU} MATCHES "emu")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcuda=emu -Mcudalib=cublas -Minfo=accel ")
endif()
set(FLEUR_MPI_DEFINITIONS ${FLEUR_MPI_DEFINITIONS} "CPP_GPU")
set(FLEUR_DEFINITIONS ${FLEUR_DEFINITIONS} "CPP_GPU")
......
......@@ -4,8 +4,19 @@ set(fleur_F90 ${fleur_F90}
diagonalization/eigen_diag.F90
diagonalization/lapack_diag.F90
diagonalization/magma.F90
diagonalization/elpa.F90
diagonalization/scalapack.F90
diagonalization/chase_diag.F90
diagonalization/symmetrize_matrix.f90
diagonalization/cusolver_diag.F90
diagonalization/elemental.F90)
if (FLEUR_USE_ELPA_20180525)
set(fleur_F90 ${fleur_F90}
diagonalization/elpa_20180525.F90
)
else()
set(fleur_F90 ${fleur_F90}
diagonalization/elpa.F90
)
endif()
......@@ -71,12 +71,12 @@ IMPLICIT NONE
PRIVATE
INTEGER :: chase_eig_id
PUBLIC init_chase, chase_diag
PUBLIC init_chase
#endif
REAL :: scale_distance
REAL :: tol
PUBLIC chase_distance
PUBLIC chase_distance,chase_diag
CONTAINS
......@@ -121,7 +121,8 @@ CONTAINS
noco%l_noco,.TRUE.,l_real,noco%l_soc,.FALSE.,mpi%n_size)
END IF
END SUBROUTINE init_chase
#endif
SUBROUTINE chase_diag(hmat,smat,ikpt,jsp,iter,ne,eig,zmat)
USE m_types_mpimat
USE m_types
......@@ -139,7 +140,7 @@ CONTAINS
INTEGER, INTENT(INOUT) :: ne
CLASS(t_mat), ALLOCATABLE, INTENT(OUT) :: zmat
REAL, INTENT(OUT) :: eig(:)
#ifdef CPP_CHASE
!Choose serial or parallel solver
SELECT TYPE(hmat)
CLASS is (t_mpimat)
......@@ -157,8 +158,9 @@ CONTAINS
CALL judft_error("Inconsistent matrix setup")
END SELECT
END SELECT
#endif
END SUBROUTINE chase_diag
#ifdef CPP_CHASE
SUBROUTINE chase_diag_noMPI(hmat,smat,ikpt,jsp,iter,ne,eig,zmat)
USE m_types
......
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_cusolver
use m_juDFT
INTEGER,PARAMETER :: NGPU_CONST=1
INTEGER,PARAMETER :: CUSOLVER_EIG_TYPE_1=1
CHARACTER,PARAMETER :: CUSOLVER_EIG_MODE_VECTOR='V'
!**********************************************************
! Solve the generalized eigenvalue problem
! using the MAGMA library for multiple GPUs
!**********************************************************
CONTAINS
SUBROUTINE cusolver_diag(nsize,eig,ne,a_r,b_r,z_r,a_c,b_c,z_c)
use m_packed_to_full
#ifdef CPP_MAGMA
!use magma
#endif
#include"cpp_double.h"
IMPLICIT NONE
! ... Arguments ...
INTEGER, INTENT (IN) :: nsize
REAL, INTENT(OUT) :: eig(:)
INTEGER, INTENT(INOUT) :: ne
REAL, OPTIONAL,ALLOCATABLE, INTENT (INOUT) :: a_r(:),b_r(:)
REAL, OPTIONAL,ALLOCATABLE, INTENT (INOUT) :: z_r(:,:)
COMPLEX, OPTIONAL,ALLOCATABLE, INTENT (INOUT) :: a_c(:),b_c(:)
COMPLEX, OPTIONAL,ALLOCATABLE, INTENT (INOUT) :: z_c(:,:)
#ifdef CPP_MAGMA
! ... Local Variables ..
INTEGER iind,ind1,ind2,info,lwork,liwork,lrwork,err,i,mout(1)
REAL eigTemp(nsize)
LOGICAL:: initialized=.false.
REAL, ALLOCATABLE :: rwork(:)
INTEGER, ALLOCATABLE :: iwork(:)
REAL, ALLOCATABLE :: largea_r(:,:),largeb_r(:,:)
COMPLEX, ALLOCATABLE :: largea_c(:,:),largeb_c(:,:)
COMPLEX,ALLOCATABLE :: work(:)
INTEGER :: ifail(nsize)
LOGICAL :: l_real
l_real=present(a_r)
!**********************************
!expand from packed to full storage
!**********************************
!hamiltonian
if (l_real) THEN
call packed_to_full(nsize,a_r,largea_r)
call packed_to_full(nsize,b_r,largeb_r)
!deallocate(a_r,b_r)
ELSE
call packed_to_full(nsize,a_c,largea_c)
call packed_to_full(nsize,b_c,largeb_c)
!deallocate(a_c,b_c)
Endif
if (l_real) call juDFT_error("REAL diagonalization not implemented in magma.F90")
err=cusolverDnCreate(handle)
!$acc data copyin(largea_c,largeb_c)
!$acc host_data use_device(largea_c,largeb_c,eigtemp,work)
err=cusolverDnZhegvd( handle, CUSOLVER_EIG_TYPE_1, CUSOLVER_EIG_MODE_VECTOR, cublas_Fill_Mode_upper, nsize, largea_c,nsize,largeb_c,nsize, eigtemp, work, lwork, devInfo);
!$acc end host_data
!$acc end data
err=cusolverDnDestroy(handle)
DO i = 1, ne
eig(i) = eigTemp(i)
z_c(:nsize,i)=largea_c(:nsize,i)
END DO
!call judft_error("Eigenvectors are not calculated in MAGMA")
#endif
END SUBROUTINE magma_diag
END MODULE m_magma
module priv_cusolver_interface
contains
integer(c_int) function cusolverDnCreate(cusolver_Hndl) bind(C,name="cusolverDnCreate")
use iso_c_binding
implicit none
type(c_ptr)::cusolver_Hndl
end function cusolverDnCreate
integer(c_int) function cusolverDnDestroy(cusolver_Hndl) bind(C,name="cusolverDnDestroy")
use iso_c_binding
implicit none
type(c_ptr),value::cusolver_Hndl
end function cusolverDnDestroy
integer(c_int) function cusolverDnZhegvd( handle, CUSOLVER_EIG_TYPE, CUSOLVER_EIG_MODE, cublas_Fill_Mode, nsize, largea_c,nsize,largeb_c,nsize, eigtemp, work, lwork, devInfo) bind(C,name="cusolverDnZhegvd")
use iso_c_binding
implicit none
type(c_ptr),value :: handle
integer(c_int),value :: cusolver_eig_type
character(c_char),value:: cusolver_eig_mode
integer(c_int),value :: cublas_fill_mode
integer(c_int),value ::n
type(c_ptr),value::d_A
integer(c_int),value::lda
type(c_ptr),value::Lwork
end function cusolverDnZhegvd
end module priv_cusolver_interface
/*
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusolverDn.h>
/* Interface for the cusolverDN routines for solving a generalized Eigenvalue problem
Code adopted from the example in the documentation
*/
void cusolver_complex(cuDoubleComplex *H,cuDoubleComplex *S,int n,int ne,double tol,int max_sweeps,double* eig,cuDoubleComplex *z){
cusolverDnHandle_t cusolverH = NULL;
cudaStream_t stream = NULL;
syevjInfo_t syevj_params = NULL;
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
cudaError_t cudaStat1 = cudaSuccess;
cudaError_t cudaStat2 = cudaSuccess;
cudaError_t cudaStat3 = cudaSuccess;
double *d_W = NULL; /* eigenvalues on device*/
int *d_info = NULL; /* error info */
int lwork = 0; /* size of workspace */
cuDoubleComplex *d_work = NULL; /* device workspace for syevj */
int info = 0; /* host copy of error info */
/* configuration of syevj */
const cusolverEigType_t itype = CUSOLVER_EIG_TYPE_1; //Solve H psi=S lambda psi
const cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvectors.
const cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
/* numerical results of syevj */
double residual = 0;
int executed_sweeps = 0;
/* step 1: create cusolver handle, bind a stream */
status = cusolverDnCreate(&cusolverH);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
assert(cudaSuccess == cudaStat1);
status = cusolverDnSetStream(cusolverH, stream);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 2: configuration of syevj */
status = cusolverDnCreateSyevjInfo(&syevj_params);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of tolerance is machine zero */
status = cusolverDnXsyevjSetTolerance(&svj_params,tol);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of max. sweeps is 100 */
status = cusolverDnXsyevjSetMaxSweeps(&syevj_params,max_sweeps);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 3: copy A to device */
cudaStat2 = cudaMalloc ((void**)&d_W, sizeof(cuDoubleComplex) * n);
cudaStat3 = cudaMalloc ((void**)&d_info, sizeof(int));
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
/* step 4: query working space of sygvj */
status = cusolverDnZhegvj_bufferSize(cusolverH,itype,jobz,uplo,n,H,n,S,n,d_W,&lwork,syevj_params);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex)*lwork);
assert(cudaSuccess == cudaStat1);
/* step 5: compute eigen-pair */
status = cusolverDnZhegvj(cusolverH,itype,jobz,uplo,n,H,n,S,n,d_W,d_work,lwork,d_info,syevj_params);
cudaStat1 = cudaDeviceSynchronize();
assert(CUSOLVER_STATUS_SUCCESS == status);
assert(cudaSuccess == cudaStat1);
cudaStat1 = cudaMemcpy(eig, d_W, sizeof(double)*ne, cudaMemcpyDeviceToHost);
cudaStat2 = cudaMemcpy(z, H, sizeof(cuDoubleComplex)*n*ne, cudaMemcpyDeviceToHost);
cudaStat3 = cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
if ( 0 == info ){
printf("sygvj converges \n");
}else if ( 0 > info ){
printf("%d-th parameter is wrong \n", -info);
exit(1);
}else{
printf("WARNING: info = %d : sygvj does not converge \n", info );
}
status = cusolverDnXsyevjGetSweeps(cusolverH,syevj_params,&executed_sweeps);
assert(CUSOLVER_STATUS_SUCCESS == status);
status = cusolverDnXsyevjGetResidual(cusolverH,syevj_params,&residual);
assert(CUSOLVER_STATUS_SUCCESS == status);
printf("residual |A - V*W*V**H|_F = %E \n", residual );
printf("number of executed sweeps = %d \n", executed_sweeps );
/* free resources */
if (d_W ) cudaFree(d_W);
if (d_info ) cudaFree(d_info);
if (d_work ) cudaFree(d_work);
if (cusolverH ) cusolverDnDestroy(cusolverH);
if (stream ) cudaStreamDestroy(stream);
if (syevj_params) cusolverDnDestroySyevjInfo(syevj_params);
// cudaDeviceReset();
return ;
}
void cusolver_real(double *H,double *S,int n,int ne,double tol,int max_sweeps,double* eig,double *z){
cusolverDnHandle_t cusolverH = NULL;
cudaStream_t stream = NULL;
syevjInfo_t syevj_params = NULL;
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
cudaError_t cudaStat1 = cudaSuccess;
cudaError_t cudaStat2 = cudaSuccess;
cudaError_t cudaStat3 = cudaSuccess;
double *d_W = NULL; /* eigenvalues on device*/
int *d_info = NULL; /* error info */
int lwork = 0; /* size of workspace */
double *d_work = NULL; /* device workspace for syevj */
int info = 0; /* host copy of error info */
/* configuration of syevj */
const cusolverEigType_t itype = CUSOLVER_EIG_TYPE_1; //Solve H psi=S lambda psi
const cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvectors.
const cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
/* numerical results of syevj */
double residual = 0;
int executed_sweeps = 0;
/* step 1: create cusolver handle, bind a stream */
status = cusolverDnCreate(&cusolverH);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
assert(cudaSuccess == cudaStat1);
status = cusolverDnSetStream(cusolverH, stream);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 2: configuration of syevj */
status = cusolverDnCreateSyevjInfo(&syevj_params);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of tolerance is machine zero */
status = cusolverDnXsyevjSetTolerance(&svj_params,tol);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of max. sweeps is 100 */
status = cusolverDnXsyevjSetMaxSweeps(&syevj_params,max_sweeps);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* step 3: copy A to device */
cudaStat2 = cudaMalloc ((void**)&d_W, sizeof(double) * n);
cudaStat3 = cudaMalloc ((void**)&d_info, sizeof(int));
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
/* step 4: query working space of sygvj */
status = cusolverDnDsygvj_bufferSize(cusolverH,itype,jobz,uplo,n,H,n,S,n,d_W,&lwork,syevj_params);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaStat1 = cudaMalloc((void**)&d_work, sizeof(double)*lwork);
assert(cudaSuccess == cudaStat1);
/* step 5: compute eigen-pair */
status = cusolverDnDsygvj(cusolverH,itype,jobz,uplo,n,H,n,S,n,d_W,d_work,lwork,d_info,syevj_params);
cudaStat1 = cudaDeviceSynchronize();
assert(CUSOLVER_STATUS_SUCCESS == status);
assert(cudaSuccess == cudaStat1);
cudaStat1 = cudaMemcpy(eig, d_W, sizeof(double)*ne, cudaMemcpyDeviceToHost);
cudaStat2 = cudaMemcpy(z, H, sizeof(double)*n*ne, cudaMemcpyDeviceToHost);
cudaStat3 = cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
if ( 0 == info ){
printf("sygvj converges \n");
}else if ( 0 > info ){
printf("%d-th parameter is wrong \n", -info);
exit(1);
}else{
printf("WARNING: info = %d : sygvj does not converge \n", info );
}
status = cusolverDnXsyevjGetSweeps(cusolverH,syevj_params,&executed_sweeps);
assert(CUSOLVER_STATUS_SUCCESS == status);
status = cusolverDnXsyevjGetResidual(cusolverH,syevj_params,&residual);
assert(CUSOLVER_STATUS_SUCCESS == status);
printf("residual |A - V*W*V**H|_F = %E \n", residual );
printf("number of executed sweeps = %d \n", executed_sweeps );
/* free resources */
if (d_W ) cudaFree(d_W);
if (d_info ) cudaFree(d_info);
if (d_work ) cudaFree(d_work);
if (cusolverH ) cusolverDnDestroy(cusolverH);
if (stream ) cudaStreamDestroy(stream);
if (syevj_params) cusolverDnDestroySyevjInfo(syevj_params);
// cudaDeviceReset();
return ;
}
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_cusolver_diag
USE m_types
USE m_judft
#ifdef CPP_GPU
USE m_types_gpumat
#endif
IMPLICIT NONE
PRIVATE
#ifdef CPP_GPU
INTERFACE
SUBROUTINE cusolver_real(H,S,n,ne,tol,max_sweeps,eig,z) BIND(C,name="cusolver_real")
USE iso_c_binding
IMPLICIT NONE
REAL(c_double) :: H(*),S(*)
INTEGER(c_int),VALUE :: n,ne,max_sweeps
REAL(c_double),VALUE :: tol
REAL(c_double) :: eig(*),z(*)
END SUBROUTINE cusolver_real
END INTERFACE
INTERFACE
SUBROUTINE cusolver_complex(H,S,n,ne,tol,max_sweeps,eig,z) BIND(C,name="cusolver_real")
USE iso_c_binding
IMPLICIT NONE
COMPLEX(c_double) :: H(*),S(*)
INTEGER(c_int),VALUE :: n,ne,max_sweeps
REAL(c_double),VALUE :: tol
REAL(c_double) :: eig(*)
COMPLEX(c_double) :: z(*)
END SUBROUTINE cusolver_complex
END INTERFACE
#endif
PUBLIC cusolver_diag
CONTAINS
SUBROUTINE cusolver_diag(hmat,smat,ne,eig,zmat)
!Simple driver to solve Generalized Eigenvalue Problem using CuSolverDN
USE m_types
IMPLICIT NONE
CLASS(t_mat),INTENT(INOUT) :: hmat,smat
INTEGER,INTENT(INOUT) :: ne
CLASS(t_mat),ALLOCATABLE,INTENT(OUT) :: zmat
REAL,INTENT(OUT) :: eig(:)
#ifdef CPP_GPU
INTEGER,PARAMETER:: max_sweeps=15
REAL :: tol=1E-7
ALLOCATE(t_mat::zmat)
CALL zmat%alloc(hmat%l_real,hmat%matsize1,ne)
IF (hmat%l_real) THEN
CALL cusolver_real(hmat%data_r,smat%data_r,smat%matsize1,ne,tol,max_sweeps,eig,zmat%data_r)
ELSE
CALL cusolver_complex(hmat%data_c,smat%data_c,smat%matsize1,ne,tol,max_sweeps,eig,zmat%data_c)
END IF
#endif
END SUBROUTINE cusolver_diag
END MODULE m_cusolver_diag
......@@ -29,8 +29,18 @@ MODULE m_eigen_diag
#else
INTEGER,PARAMETER:: diag_magma=-6
#endif
INTEGER,PARAMETER:: diag_lapack=4
#ifdef CPP_CHASE
INTEGER,PARAMETER:: diag_chase=7
#else
INTEGER,PARAMETER:: diag_chase=-7
#endif
#ifdef CPP_GPU
INTEGER,PARAMETER:: diag_cusolver=8
#else
INTEGER,PARAMETER:: diag_cusolver=-8
#endif
INTEGER,PARAMETER:: diag_lapack=4
INTEGER,PARAMETER:: diag_debugout=99
PUBLIC eigen_diag,parallel_solver_available
CONTAINS
......@@ -47,7 +57,9 @@ CONTAINS
USE m_elemental
USE m_chase_diag
USE m_types_mpimat
USE m_types_gpumat
USE m_matrix_copy
USE m_cusolver_diag
IMPLICIT NONE
#ifdef CPP_MPI
include 'mpif.h'
......@@ -99,14 +111,12 @@ CONTAINS
CALL scalapack(hmat,smat,ne,eig,ev)
CASE (diag_magma)
!CALL magma_diag(hmat,smat,ne,eig,ev)
CASE (diag_cusolver)
CALL cusolver_diag(hmat,smat,ne,eig,ev)
CASE (diag_lapack)
CALL lapack_diag(hmat,smat,ne,eig,ev)
CASE (diag_chase)
#ifdef CPP_CHASE
CALL chase_diag(hmat,smat,ikpt,jsp,iter,ne,eig,ev)
#else
CALL juDFT_error('ChASE eigensolver selected but not available', calledby = 'eigen_diag')
#endif
CASE (diag_debugout)
CALL priv_debug_out(smat,hmat)
END SELECT
......@@ -215,11 +225,7 @@ CONTAINS
diag_solver=diag_scalapack
#endif
ELSE
#ifdef CPP_MAGMA
diag_solver=diag_magma
#else
diag_solver=diag_lapack
#endif
ENDIF
!check if a special solver was requested
......@@ -229,49 +235,17 @@ CONTAINS
IF (trim(juDFT_string_for_argument("-diag"))=="lapack") diag_solver=diag_lapack
IF (trim(juDFT_string_for_argument("-diag"))=="magma") diag_solver=diag_magma
IF (trim(juDFT_string_for_argument("-diag"))=="chase") diag_solver=diag_chase
IF (trim(juDFT_string_for_argument("-diag"))=="cusolver") diag_solver=diag_cusolver
IF (trim(juDFT_string_for_argument("-diag"))=="debugout") diag_solver=diag_debugout
!Check if solver is possible
if (diag_solver<0) call priv_solver_error(diag_solver,parallel)
IF (ANY((/diag_lapack,diag_magma/)==diag_solver).AND.parallel) CALL priv_solver_error(diag_solver,parallel)
if (any((/diag_elpa,diag_elemental,diag_scalapack/)==diag_solver).and..not.parallel) call priv_solver_error(diag_solver,parallel)
IF (diag_solver<0) CALL juDFT_error("You selected a solver for the eigenvalue problem that is not available",hint="You most probably did not provide the appropriate libraries for compilation/linking")
IF (ANY((/diag_lapack,diag_magma,diag_cusolver/)==diag_solver).AND.parallel) CALL judft_error("You selected an eigensolver that does not support distributed memory parallism",hint="Try scalapack,elpa or another supported solver for parallel matrices")
IF (ANY((/diag_elpa,diag_elemental,diag_scalapack/)==diag_solver).AND..NOT.parallel) CALL judft_error("You selected an eigensolver for matrices that are memory distributed",hint="Try lapack, cusolver or another supported solver for non-distributed matrices")
END FUNCTION priv_select_solver
SUBROUTINE priv_solver_error(diag_solver,parallel)
IMPLICIT NONE
INTEGER,INTENT(IN):: diag_solver
LOGICAL,INTENT(IN)::parallel
SELECT CASE(diag_solver)
CASE (diag_elpa)
IF (parallel) THEN
CALL juDFT_error("You did not compile with the ELPA solver and hence can not use it")
ELSE
CALL juDFT_error("The ELPA solver can not be used in serial")
ENDIF
CASE (diag_elemental)
IF (parallel) THEN
CALL juDFT_error("You did not compile with the ELEMENTAL solver and hence can not use it")
ELSE
CALL juDFT_error("The ELEMENTAL solver can not be used in serial")
ENDIF
CASE (diag_scalapack)
IF (parallel) THEN
CALL juDFT_error("You did not compile with the SCALAPACK solver and hence can not use it")
ELSE
CALL juDFT_error("The SCALAPACK solver can not be used in serial")
ENDIF
CASE (diag_lapack)
IF (parallel) THEN
CALL juDFT_error("The LAPACK solver can not be used in parallel")
ENDIF
CASE (diag_magma)
CALL juDFT_error("You have not compiled with MAGMA support")
CASE DEFAULT
CALL judft_error("You have selected an unkown eigensolver")
END SELECT
END SUBROUTINE priv_solver_error
END MODULE m_eigen_diag
!-------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------
MODULE m_elpa
CONTAINS
SUBROUTINE elpa_diag(hmat,smat,ne,eig,ev)
!
!----------------------------------------------------
!- Parallel eigensystem solver - driver routine based on chani; dw'12
! Uses the ELPA for the actual diagonalization
!
!
! hmat ..... Hamiltonian matrix
! smat ..... overlap matrix
! ne ....... number of ev's searched (and found) on this node
! On input, overall number of ev's searched,
! On output, local number of ev's found
! eig ...... eigenvalues, output
! ev ....... eigenvectors, output
!
!----------------------------------------------------
USE m_juDFT
USE m_types_mpimat
USE m_types
USE elpa
IMPLICIT NONE
CLASS(t_mat),INTENT(INOUT) :: hmat,smat
CLASS(t_mat),ALLOCATABLE,INTENT(OUT)::ev
REAL,INTENT(out) :: eig(:)
INTEGER,INTENT(INOUT) :: ne