Skip to content
Snippets Groups Projects
Commit 13a8026a authored by Paul Baumeister's avatar Paul Baumeister
Browse files

prepare for a first GPU-accelerated version, make tfQMRgpu=yes

parent 6201ef16
No related branches found
No related tags found
No related merge requests found
...@@ -219,6 +219,9 @@ module KKRmat_mod ...@@ -219,6 +219,9 @@ module KKRmat_mod
use fillKKRMatrix_mod, only: dump use fillKKRMatrix_mod, only: dump
use IterativeSolver_mod, only: IterativeSolver, solve use IterativeSolver_mod, only: IterativeSolver, solve
use DirectSolver_mod, only: DirectSolver, solve use DirectSolver_mod, only: DirectSolver, solve
#ifdef has_tfQMRgpu
! use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration
#endif
use SparseMatrixDescription_mod, only: dump use SparseMatrixDescription_mod, only: dump
use InitialGuess_mod, only: InitialGuess, load, store use InitialGuess_mod, only: InitialGuess, load, store
use KKROperator_mod, only: KKROperator use KKROperator_mod, only: KKROperator
...@@ -257,6 +260,13 @@ module KKRmat_mod ...@@ -257,6 +260,13 @@ module KKRmat_mod
double complex, allocatable :: dPdE_local(:,:,:), gllke_x(:,:), dgde(:,:), MinvdMdE(:,:,:), TinvMinvdMdE(:,:,:) ! LLY double complex, allocatable :: dPdE_local(:,:,:), gllke_x(:,:), dgde(:,:), MinvdMdE(:,:,:), TinvMinvdMdE(:,:,:) ! LLY
double complex :: tracek ! LLY double complex :: tracek ! LLY
#ifdef has_tfQMRgpu
external :: tfqmrgpu_bsrsv_complete ! subroutine
integer :: o = 6
integer :: ierr = 1
integer :: iterations, lda
double precision :: residuum
#endif
integer :: num_trunc_atoms, lmsd, lm1, idx_lly, i1 integer :: num_trunc_atoms, lmsd, lm1, idx_lly, i1
double complex :: cfctorinv double complex :: cfctorinv
...@@ -383,6 +393,20 @@ module KKRmat_mod ...@@ -383,6 +393,20 @@ module KKRmat_mod
! store the initial guess ! store the initial guess
call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy) call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy)
#ifdef has_tfQMRgpu
case (5) ! GPU solver
iterations = 1000
residuum = 1e-7
lda = size(op%mat_A, 1)
call tfqmrgpu_bsrsv_complete(op%bsr_A%nRows, lda, &
op%bsr_A%RowStart, op%bsr_A%ColIndex, op%mat_A(:,:,:,0), 'n', & !! A (in)
op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out)
op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in)
iterations, residuum, o, ierr)
#endif
case default case default
warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type) warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type)
endselect ! solver_type endselect ! solver_type
......
...@@ -7,12 +7,19 @@ TYPE ?= nodebug ...@@ -7,12 +7,19 @@ TYPE ?= nodebug
### the user may specify SMP=openmp as a command line argument to make ### the user may specify SMP=openmp as a command line argument to make
SMP ?= none SMP ?= none
### can we make use of the tfQMRgpu library
tfQMRgpu ?= no
PROGRAM = kkr.exe PROGRAM = kkr.exe
# Path to put object files and module files # Path to put object files and module files
BUILDDIR = $(HOME)/build BUILDDIR = $(HOME)/build
EXTRA_FLAGS = EXTRA_FLAGS ?=
ifeq ($(tfQMRgpu),yes)
EXTRA_FLAGS += -D has_tfQMRgpu
endif
FC90FLAGS = FC90FLAGS =
FCFLAGS = FCFLAGS =
PPFLAGS = PPFLAGS =
...@@ -375,6 +382,11 @@ OBJS += $(notdir ${SRCS90:.f90=.o}) ...@@ -375,6 +382,11 @@ OBJS += $(notdir ${SRCS90:.f90=.o})
OBJS += $(notdir ${SRCSFPP:.F=.o}) OBJS += $(notdir ${SRCSFPP:.F=.o})
OBJS += $(notdir ${SRCS90FPP:.F90=.o}) OBJS += $(notdir ${SRCS90FPP:.F90=.o})
ifeq ($(tfQMRgpu),yes)
OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o
LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand
endif
.PHONY: all .PHONY: all
all: $(PROGRAM) all: $(PROGRAM)
...@@ -521,7 +533,7 @@ bsrmm_mod.o: CacheOverlap_mod.o ...@@ -521,7 +533,7 @@ bsrmm_mod.o: CacheOverlap_mod.o
CacheOverlap_mod.o: CacheOverlap_mod.o:
ChebMeshData_mod.o: InputParams_mod.o RadialMeshData_mod.o ChebMeshData_mod.o: InputParams_mod.o RadialMeshData_mod.o
Truncation_mod.o: Logging_mod.o Exceptions_mod.o TruncationZone_mod.o Truncation_mod.o: Logging_mod.o Exceptions_mod.o TruncationZone_mod.o
NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o read_formatted_shapefun_mod.o
NonCollinearMagnetismData_mod.o: Exceptions_mod.o NonCollinearMagnetismData_mod.o: Exceptions_mod.o
# DO NOT DELETE # DO NOT DELETE
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment