Skip to content
Snippets Groups Projects
Commit 46f1124c authored by Paul Baumeister's avatar Paul Baumeister
Browse files

prepare for a first GPU-accelerated version, make tfQMRgpu=yes

parent 918a0bd1
No related branches found
No related tags found
No related merge requests found
......@@ -219,6 +219,9 @@ module KKRmat_mod
use fillKKRMatrix_mod, only: dump
use IterativeSolver_mod, only: IterativeSolver, solve
use DirectSolver_mod, only: DirectSolver, solve
#ifdef has_tfQMRgpu
! use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration
#endif
use SparseMatrixDescription_mod, only: dump
use InitialGuess_mod, only: InitialGuess, load, store
use KKROperator_mod, only: KKROperator
......@@ -257,6 +260,13 @@ module KKRmat_mod
double complex, allocatable :: dPdE_local(:,:,:), gllke_x(:,:), dgde(:,:), MinvdMdE(:,:,:), TinvMinvdMdE(:,:,:) ! LLY
double complex :: tracek ! LLY
#ifdef has_tfQMRgpu
external :: tfqmrgpu_bsrsv_complete ! subroutine
integer :: o = 6
integer :: ierr = 1
integer :: iterations, lda
double precision :: residuum
#endif
integer :: num_trunc_atoms, lmsd, lm1, idx_lly, i1
double complex :: cfctorinv
......@@ -383,6 +393,20 @@ module KKRmat_mod
! store the initial guess
call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy)
#ifdef has_tfQMRgpu
case (5) ! GPU solver
iterations = 1000
residuum = 1e-7
lda = size(op%mat_A, 1)
call tfqmrgpu_bsrsv_complete(op%bsr_A%nRows, lda, &
op%bsr_A%RowStart, op%bsr_A%ColIndex, op%mat_A(:,:,:,0), 'n', & !! A (in)
op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out)
op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in)
iterations, residuum, o, ierr)
#endif
case default
warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type)
endselect ! solver_type
......
......@@ -7,12 +7,19 @@ TYPE ?= nodebug
### the user may specify SMP=openmp as a command line argument to make
SMP ?= none
### can we make use of the tfQMRgpu library
tfQMRgpu ?= no
PROGRAM = kkr.exe
# Path to put object files and module files
BUILDDIR = $(HOME)/build
EXTRA_FLAGS =
EXTRA_FLAGS ?=
ifeq ($(tfQMRgpu),yes)
EXTRA_FLAGS += -D has_tfQMRgpu
endif
FC90FLAGS =
FCFLAGS =
PPFLAGS =
......@@ -377,6 +384,11 @@ OBJS += $(notdir ${SRCSFPP:.F=.o})
OBJS += $(notdir ${SRCS90FPP:.F90=.o})
OBJS += $(notdir ${SRCS95TMPL:.F95=.o})
ifeq ($(tfQMRgpu),yes)
OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o
LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand
endif
.PHONY: all
all: $(PROGRAM)
......@@ -520,7 +532,7 @@ bsrmm_mod.o: CacheOverlap_mod.o
CacheOverlap_mod.o:
ChebMeshData_mod.o: InputParams_mod.o RadialMeshData_mod.o
Truncation_mod.o: Logging_mod.o Exceptions_mod.o TruncationZone_mod.o
NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o
NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o read_formatted_shapefun_mod.o
NonCollinearMagnetismData_mod.o: Exceptions_mod.o
# DO NOT DELETE
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment