diff --git a/source/KKRnano/regtests/run_test.sh b/source/KKRnano/regtests/run_test.sh index 02cfb37a9005361d93f7ba2f7ecf5a80be9a0971..117c9a1277429a052cdbe6bd15419a991e8859ec 100755 --- a/source/KKRnano/regtests/run_test.sh +++ b/source/KKRnano/regtests/run_test.sh @@ -1,6 +1,8 @@ #!/bin/sh ARCHIVE=loggs +export LD_LIBRARY_PATH=$MKLROOT/lib/intel64:$LD_LIBRARY_PATH + ## today's date, hour, minute day=`date "+%Y%m%d%H%M"` diff --git a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 index fc05a235fda05dea5a6744aefe0c4469e461add7..6cabb140dd7bb928e4e5995f9377d20841cc8a97 100644 --- a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 +++ b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 @@ -221,6 +221,8 @@ module KKRmat_mod use DirectSolver_mod, only: DirectSolver, solve #ifdef has_tfQMRgpu use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration + use SolverStats_mod, only: reduce + use TimerMpi_mod, only: startTimer, stopTimer #endif use SparseMatrixDescription_mod, only: dump use InitialGuess_mod, only: InitialGuess, load, store @@ -261,10 +263,8 @@ module KKRmat_mod double complex :: tracek ! LLY #ifdef has_tfQMRgpu - integer :: o = 6 - integer :: ierr = 1 - integer :: iterations, lda - double precision :: residuum + integer (kind=4) :: o, ierr, iterations, lda + real (kind=8) :: residual #endif integer :: num_trunc_atoms, lmsd, lm1, idx_lly, i1 @@ -395,15 +395,23 @@ module KKRmat_mod case (5) ! GPU solver #ifdef has_tfQMRgpu - iterations = 1000 - residuum = 1e-7 - lda = size(op%mat_A, 1) + o=0 + ierr=0 + + iterations = 2000 + residual = iterative_solver%qmrbound + lda = size(op%mat_A, 1) + call startTimer(kernel_timer) + ! write(*,*) "Written by us: ", size(op%bsr_X%ColIndex) call tfqmrgpu_bsrsv_complete(op%bsr_A%nRows, lda, & - op%bsr_A%RowStart, op%bsr_A%ColIndex, op%mat_A(:,:,:,0), 'n', & !! A (in) - op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out) - op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in) - iterations, residuum, o, ierr) + op%bsr_A%RowStart, op%bsr_A%ColIndex, op%mat_A(:,:,:,0), 't', & !! A (in) + op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 't', & !! X (out) + op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 't', & !! B (in) + iterations, residual, o, ierr) + call stopTimer(kernel_timer) + call reduce(iterative_solver%stats, iterations, residual, 0_8) + #else warn(6, "GPU solver needs -D has_tfQMRgpu (Problem is not solved) solver_type ="+solver_type) #endif @@ -412,6 +420,10 @@ module KKRmat_mod warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type) endselect ! solver_type + !call dump(op%mat_X, "solution_form.dat", formatted=.true.) + !call dump(op%mat_B, "rhs_form.dat", formatted=.true.) + !stop __FILE__ + TESTARRAYLOG(3, op%mat_B) TESTARRAYLOG(3, op%mat_X) ! RESULT: mat_X diff --git a/source/KKRnano/source/IterativeSolver/fillKKRMatrix_mod.F90 b/source/KKRnano/source/IterativeSolver/fillKKRMatrix_mod.F90 index 745f05f02eacdd1fba5e36ca513a27659a70cc15..88ea971c0eda163bc5f8e31b8ed227acd3fd7c2e 100644 --- a/source/KKRnano/source/IterativeSolver/fillKKRMatrix_mod.F90 +++ b/source/KKRnano/source/IterativeSolver/fillKKRMatrix_mod.F90 @@ -32,7 +32,7 @@ module fillKKRMatrix_mod type(SparseMatrixDescription), intent(out) :: bsr_X integer(kind=1), intent(in) :: lmax_a(:,:) !< lmax of each interaction dim(naez_trc,num_local_atoms), -1: truncated - integer, parameter :: GROUPING = 0 ! -1:never, 0:auto, 1:always + integer, parameter :: GROUPING = -1 ! -1:never, 0:auto, 1:always integer :: iRow, jCol, Xind, nnzb, group nnzb = count(lmax_a >= 0) ! number of non-zero blocks in X @@ -46,7 +46,7 @@ module fillKKRMatrix_mod endif if (group > 0) then - + !write(*,*) "we are in group > 0" call create(bsr_X, nRows=size(lmax_a, 1), nnzb=size(lmax_a, 1), nCols=1) ! generate BSR descriptor of a flat structure, fuse RHS atoms into rectangular blocks diff --git a/source/KKRnano/source/Makefile b/source/KKRnano/source/Makefile index d4893402d597b067ef3b94e23d594ed235a7c2f6..b552c82cc38743759993528b0b81f425dd5b6b70 100644 --- a/source/KKRnano/source/Makefile +++ b/source/KKRnano/source/Makefile @@ -7,9 +7,6 @@ TYPE ?= nodebug ### the user may specify SMP=openmp as a command line argument to make SMP ?= none -### can we make use of the tfQMRgpu library -tfQMRgpu ?= no - PROGRAM = kkr.exe # Path to put object files and module files @@ -21,14 +18,9 @@ EXTRA_FLAGS ?= tfQMRgpu ?= no ifeq ($(tfQMRgpu),yes) EXTRA_FLAGS += -D has_tfQMRgpu - CUDA_PATH=-L/usr/local/zam/CUDA/cuda-9.1/lib64 + TFQMRGPU_PATH = $(HOME)/tfQMRgpu endif -EXTRA_FLAGS ?= - -ifeq ($(tfQMRgpu),yes) - EXTRA_FLAGS += -D has_tfQMRgpu -endif FC90FLAGS = FCFLAGS = @@ -388,22 +380,15 @@ SRCS90 = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.f90)) SRCSFPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F)) SRCS90FPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F90)) SRCS95TMPL = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F95)) +ifeq ($(tfQMRgpu),yes) + SRCS90FPP += $(TFQMRGPU_PATH)/tfqmrgpu_Fortran_example.F90 + LDFLAGS += -L$(TFQMRGPU_PATH)/lib64 -ltfQMRgpu_Fortran -ltfQMRgpu +endif + OBJS= LINKER= $(FC90) -ifeq ($(tfQMRgpu),yes) - OBJS += tfqmrgpu_Fortran.o - OBJS += tfqmrgpu_Fortran_wrappers.o - OBJS += tfqmrgpu.o - SRCS90FPP += GPU/tfqmrgpu_Fortran.F90 -# LDFLAGS += $(CUDA_PATH) -gfortran -lcudart -lnvToolsExt -lm -lcurand \ -# -L/usr/lib64/gcc/x86_64-suse-linux/4.8 \ -# -L/usr/lib -lstdc++ - LDFLAGS += $(CUDA_PATH) -lcudart -lnvToolsExt -lm -lcurand - LDFLAGS += -L/usr/lib -lstdc++ -# # LINKER= gfortran -endif # notdir extracts only filename @@ -414,11 +399,6 @@ OBJS += $(notdir ${SRCS90FPP:.F90=.o}) OBJS += $(notdir ${SRCS95TMPL:.F95=.o}) -ifeq ($(tfQMRgpu),yes) - OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o - LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand -endif - .PHONY: all all: $(PROGRAM) @@ -437,6 +417,12 @@ $(PROGRAM): $(OBJS) %.o: %.F90 $(FC90) $(FCFLAGS) $(EXTRA_FLAGS) $(FC90FLAGS) $(PPFLAGS) -c $< -o $(BUILDDIR)/$@ +ifeq ($(tfQMRgpu),yes) +# tfqmrgpu_Fortran_example.o: $(TFQMRGPU_PATH)/example/tfqmrgpu_Fortran_example.F90 +# $(FC90) $(FCFLAGS) $(EXTRA_FLAGS) $(FC90FLAGS) $(PPFLAGS) -c $< -o $(BUILDDIR)/$@ +endif + + ### Fortran-templates: %.o: %.F95 sed -e 's/_TYPE/D/' $< > $(BUILDDIR)/$*.F90 @@ -456,12 +442,18 @@ clean: test: @echo $(OBJS) @echo $(VPATH) + @echo $(SRCS90FPP) .PHONY: depend depend: $(SRCS90) makedepend +ifeq ($(tfQMRgpu),yes) + KKRmat_mod.o: tfqmrgpu_Fortran_example.o +endif + + #======================== Module dependencies ======================================== KKRnano.o: Logging_mod.o KKRzero_mod.o PotentialConverter_mod.o KKRnanoParallel_mod.o BasisAtom_mod.o AtomicCore_mod.o RadialMeshData_mod.o main2_aux_mod.o ScatteringCalculation_mod.o Main2Arrays_mod.o KKRnano_Comm_mod.o ProcessKKRresults_mod.o InputParams_mod.o EBalanceHandler_mod.o LDAUData_mod.o TimerMpi_mod.o EnergyMesh_mod.o DimParams_mod.o CalculationData_mod.o