Commit 1d790398 authored by Daniel Wortmann's avatar Daniel Wortmann

Modified GPU compiling and configure

parent ffdc1568
......@@ -2,7 +2,7 @@ include_directories(include)
set(c_filesInpgen io/xml/inputSchema.h io/xml/dropInputSchema.c)
set(c_filesFleur io/xml/inputSchema.h io/xml/dropInputSchema.c io/xml/xmlInterfaceWrapper.c)
if(FLEUR_USE_GPU)
if(FLEUR_USE_CUSOLVER)
set(c_filesFleur ${c_filesFleur} diagonalization/cusolver.c)
endif()
......
......@@ -36,6 +36,9 @@ else()
message("${Green} ChASE Library found : ${CReset} ---")
endif()
message("${Green} Compile GPU version : ${CReset} ${FLEUR_USE_GPU}")
if (FLEUR_USE_GPU)
message("${Green} CuSolver Library found : ${CReset} ${FLEUR_USE_CUSOLVER}")
endif()
message("\n")
message("${Green}Compile serial version : ${CReset} ${FLEUR_USE_SERIAL}")
message("${Green}Compile parallel version : ${CReset} ${FLEUR_USE_MPI}")
......
......@@ -7,12 +7,24 @@ if (CLI_FLEUR_USE_GPU)
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ta=tesla:cuda8.0,cc60 -Mcuda:kepler+ -Minfo=accel -Mcudalib=cublas -acc ")
message("Using cuda8")
elseif(${CLI_FLEUR_USE_GPU} MATCHES "cuda9")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcuda=cuda9.0,cc60 -Mcuda=rdc -Mcudalib=cublas")
elseif(${CLI_FLEUR_USE_GPU} MATCHES "cuda9-nvtx")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcuda=cuda9.0,cc60 -Mcuda=rdc -Mcudalib=cublas -lnvToolsExt ")
elseif(${CLI_FLEUR_USE_GPU} MATCHES "emu")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcuda=emu -Mcudalib=cublas -Minfo=accel ")
endif()
set(FLEUR_MPI_DEFINITIONS ${FLEUR_MPI_DEFINITIONS} "CPP_GPU")
set(FLEUR_DEFINITIONS ${FLEUR_DEFINITIONS} "CPP_GPU")
#Now check for cusolverDN library
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Mcuda")
try_compile(FLEUR_USE_CUSOLVER ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/tests/test_cusolver.c
LINK_LIBRARIES "-lcusolver"
)
if (FLEUR_USE_CUSOLVER)
set(${FLEUR_LIBRARIES} "${FLEUR_LIBRARIES};-lcusolver")
set(FLEUR_MPI_DEFINITIONS ${FLEUR_MPI_DEFINITIONS} "CPP_CUSOLVER")
set(FLEUR_DEFINITIONS ${FLEUR_DEFINITIONS} "CPP_CUSOLVER")
endif()
else()
set(FLEUR_USE_GPU FALSE)
endif()
......@@ -56,7 +56,7 @@ void cusolver_complex(cuDoubleComplex *H,cuDoubleComplex *S,int n,int ne,double
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of tolerance is machine zero */
status = cusolverDnXsyevjSetTolerance(&svj_params,tol);
status = cusolverDnXsyevjSetTolerance(&syevj_params,tol);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of max. sweeps is 100 */
......@@ -163,7 +163,7 @@ void cusolver_real(double *H,double *S,int n,int ne,double tol,int max_sweeps,do
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of tolerance is machine zero */
status = cusolverDnXsyevjSetTolerance(&svj_params,tol);
status = cusolverDnXsyevjSetTolerance(&syevj_params,tol);
assert(CUSOLVER_STATUS_SUCCESS == status);
/* default value of max. sweeps is 100 */
......
......@@ -11,7 +11,7 @@ MODULE m_cusolver_diag
#endif
IMPLICIT NONE
PRIVATE
#ifdef CPP_GPU
#ifdef CPP_CUSOLVER
INTERFACE
SUBROUTINE cusolver_real(H,S,n,ne,tol,max_sweeps,eig,z) BIND(C,name="cusolver_real")
USE iso_c_binding
......@@ -46,7 +46,7 @@ CONTAINS
CLASS(t_mat),ALLOCATABLE,INTENT(OUT) :: zmat
REAL,INTENT(OUT) :: eig(:)
#ifdef CPP_GPU
#ifdef CPP_CUSOLVER
INTEGER,PARAMETER:: max_sweeps=15
REAL :: tol=1E-7
......
......@@ -34,7 +34,7 @@ MODULE m_eigen_diag
#else
INTEGER,PARAMETER:: diag_chase=-7
#endif
#ifdef CPP_GPU
#ifdef CPP_CUSOLVER
INTEGER,PARAMETER:: diag_cusolver=8
#else
INTEGER,PARAMETER:: diag_cusolver=-8
......
......@@ -26,7 +26,7 @@ CONTAINS
USE m_ylm
USE m_apws
USE cudafor
USE nvtx
! USE nvtx
IMPLICIT NONE
TYPE(t_sym),INTENT(IN) :: sym
TYPE(t_cell),INTENT(IN) :: cell
......@@ -60,7 +60,7 @@ CONTAINS
INTEGER :: istat
call nvtxStartRange("hsmt_ab",2)
! call nvtxStartRange("hsmt_ab",2)
lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
ALLOCATE(c_ph_dev(lapw%nv(1),MERGE(2,1,noco%l_ss)))
......@@ -109,7 +109,7 @@ CONTAINS
ENDDO
ylm_dev = ylm
call nvtxStartRange("hsmt_cuf",5)
!call nvtxStartRange("hsmt_cuf",5)
!$cuf kernel do <<<*,256>>>
DO k = 1,lapw%nv(1)
!--> generate spherical harmonics
......@@ -123,7 +123,7 @@ CONTAINS
END DO
ENDDO !k-loop
istat = cudaDeviceSynchronize()
call nvtxEndRange
!call nvtxEndRange
IF (PRESENT(abclo)) THEN
print*, "Ooooops, TODO in hsmt_ab"
......@@ -150,7 +150,7 @@ CONTAINS
ab_size=ab_size*2
call nvtxEndRange
!call nvtxEndRange
END SUBROUTINE hsmt_ab_gpu
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment