From 13a8026acd60e9d4f9024edfe69cb9a2371b233d Mon Sep 17 00:00:00 2001 From: Paul Baumeister <p.baumeister@fz-juelich.de> Date: Fri, 17 May 2019 13:52:59 +0200 Subject: [PATCH] prepare for a first GPU-accelerated version, make tfQMRgpu=yes --- .../source/IterativeSolver/KKRmat_mod.F90 | 24 +++++++++++++++++++ source/KKRnano/source/Makefile | 16 +++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 index 785b690e2..73a40cee9 100644 --- a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 +++ b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 @@ -219,6 +219,9 @@ module KKRmat_mod use fillKKRMatrix_mod, only: dump use IterativeSolver_mod, only: IterativeSolver, solve use DirectSolver_mod, only: DirectSolver, solve +#ifdef has_tfQMRgpu + ! use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration +#endif use SparseMatrixDescription_mod, only: dump use InitialGuess_mod, only: InitialGuess, load, store use KKROperator_mod, only: KKROperator @@ -257,6 +260,13 @@ module KKRmat_mod double complex, allocatable :: dPdE_local(:,:,:), gllke_x(:,:), dgde(:,:), MinvdMdE(:,:,:), TinvMinvdMdE(:,:,:) ! LLY double complex :: tracek ! LLY +#ifdef has_tfQMRgpu + external :: tfqmrgpu_bsrsv_complete ! subroutine + integer :: o = 6 + integer :: ierr = 1 + integer :: iterations, lda + double precision :: residuum +#endif integer :: num_trunc_atoms, lmsd, lm1, idx_lly, i1 double complex :: cfctorinv @@ -383,6 +393,20 @@ module KKRmat_mod ! store the initial guess call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy) +#ifdef has_tfQMRgpu + case (5) ! GPU solver + + iterations = 1000 + residuum = 1e-7 + lda = size(op%mat_A, 1) + + call tfqmrgpu_bsrsv_complete(op%bsr_A%nRows, lda, & + op%bsr_A%RowStart, op%bsr_A%ColIndex, op%mat_A(:,:,:,0), 'n', & !! A (in) + op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out) + op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in) + iterations, residuum, o, ierr) +#endif + case default warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type) endselect ! solver_type diff --git a/source/KKRnano/source/Makefile b/source/KKRnano/source/Makefile index 05f24378b..25e3c764e 100644 --- a/source/KKRnano/source/Makefile +++ b/source/KKRnano/source/Makefile @@ -7,12 +7,19 @@ TYPE ?= nodebug ### the user may specify SMP=openmp as a command line argument to make SMP ?= none +### can we make use of the tfQMRgpu library +tfQMRgpu ?= no + PROGRAM = kkr.exe # Path to put object files and module files BUILDDIR = $(HOME)/build -EXTRA_FLAGS = +EXTRA_FLAGS ?= + +ifeq ($(tfQMRgpu),yes) + EXTRA_FLAGS += -D has_tfQMRgpu +endif FC90FLAGS = FCFLAGS = PPFLAGS = @@ -375,6 +382,11 @@ OBJS += $(notdir ${SRCS90:.f90=.o}) OBJS += $(notdir ${SRCSFPP:.F=.o}) OBJS += $(notdir ${SRCS90FPP:.F90=.o}) +ifeq ($(tfQMRgpu),yes) + OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o + LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand +endif + .PHONY: all all: $(PROGRAM) @@ -521,7 +533,7 @@ bsrmm_mod.o: CacheOverlap_mod.o CacheOverlap_mod.o: ChebMeshData_mod.o: InputParams_mod.o RadialMeshData_mod.o Truncation_mod.o: Logging_mod.o Exceptions_mod.o TruncationZone_mod.o -NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o +NonCollinearMagnetism_mod.o: RadialMeshData_mod.o ChebMeshData_mod.o read_formatted_shapefun_mod.o NonCollinearMagnetismData_mod.o: Exceptions_mod.o # DO NOT DELETE -- GitLab