From cd1f3be365119725b9b7845d1c13928e92261c10 Mon Sep 17 00:00:00 2001 From: Paul Baumeister <p.baumeister@fz-juelich.de> Date: Mon, 29 Jul 2019 15:31:32 +0200 Subject: [PATCH] difficult to link, fails to run due to OpenMPI-error --- .../source/IterativeSolver/KKRmat_mod.F90 | 9 +++-- source/KKRnano/source/Makefile | 39 +++++++++++++------ .../source/NonCollinearMagnetism_mod.F90 | 2 +- .../datastructures/ChebMeshData_mod.F90 | 2 +- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 index 73a40cee9..fc05a235f 100644 --- a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 +++ b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 @@ -220,7 +220,7 @@ module KKRmat_mod use IterativeSolver_mod, only: IterativeSolver, solve use DirectSolver_mod, only: DirectSolver, solve #ifdef has_tfQMRgpu - ! use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration + use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration #endif use SparseMatrixDescription_mod, only: dump use InitialGuess_mod, only: InitialGuess, load, store @@ -261,7 +261,6 @@ module KKRmat_mod double complex :: tracek ! LLY #ifdef has_tfQMRgpu - external :: tfqmrgpu_bsrsv_complete ! subroutine integer :: o = 6 integer :: ierr = 1 integer :: iterations, lda @@ -393,9 +392,9 @@ module KKRmat_mod ! store the initial guess call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy) -#ifdef has_tfQMRgpu case (5) ! GPU solver +#ifdef has_tfQMRgpu iterations = 1000 residuum = 1e-7 lda = size(op%mat_A, 1) @@ -405,8 +404,10 @@ module KKRmat_mod op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out) op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in) iterations, residuum, o, ierr) +#else + warn(6, "GPU solver needs -D has_tfQMRgpu (Problem is not solved) solver_type ="+solver_type) #endif - + case default warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type) endselect ! solver_type diff --git a/source/KKRnano/source/Makefile b/source/KKRnano/source/Makefile index a87e05bc7..aa5958da0 100644 --- a/source/KKRnano/source/Makefile +++ b/source/KKRnano/source/Makefile @@ -7,19 +7,20 @@ TYPE ?= nodebug ### the user may specify SMP=openmp as a command line argument to make SMP ?= none -### can we make use of the tfQMRgpu library -tfQMRgpu ?= no - PROGRAM = kkr.exe # Path to put object files and module files -BUILDDIR = $(HOME)/build +BUILDDIR = ./build EXTRA_FLAGS ?= +### can we make use of the tfQMRgpu library? +tfQMRgpu ?= no ifeq ($(tfQMRgpu),yes) EXTRA_FLAGS += -D has_tfQMRgpu + CUDA_PATH=-L/usr/local/zam/CUDA/cuda-9.1/lib64 endif + FC90FLAGS = FCFLAGS = PPFLAGS = @@ -98,7 +99,10 @@ ifeq ($(PLATFORM),gfortran) FC90FLAGS = -ffree-line-length-0 PPFLAGS += -D NOLOGGING # FCFLAGS += -D TASKLOCAL_FILES ### breaks when TASKLOCAL_FILES is used - LDFLAGS += -L /usr/local/atlas/lib -llapack -lf77blas -lcblas -latlas +# LDFLAGS += -L /usr/local/atlas/lib -llapack -latlas + LDFLAGS += -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 ###-lpthread +# # LDFLAGS += -lgfortran + ### -lf77blas -lcblas -latlas endif @@ -377,23 +381,36 @@ SRCSFPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F)) SRCS90FPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F90)) SRCS95TMPL = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F95)) +OBJS= + +LINKER= $(FC90) +ifeq ($(tfQMRgpu),yes) + OBJS += tfqmrgpu_Fortran.o + OBJS += tfqmrgpu_Fortran_wrappers.o + OBJS += tfqmrgpu.o + SRCS90FPP += GPU/tfqmrgpu_Fortran.F90 +# LDFLAGS += $(CUDA_PATH) -gfortran -lcudart -lnvToolsExt -lm -lcurand \ +# -L/usr/lib64/gcc/x86_64-suse-linux/4.8 \ +# -L/usr/lib -lstdc++ + LDFLAGS += $(CUDA_PATH) -lcudart -lnvToolsExt -lm -lcurand + LDFLAGS += -L/usr/lib -lstdc++ +# # LINKER= gfortran +endif + + # notdir extracts only filename -OBJS = $(notdir ${SRCS:.f=.o}) +OBJS += $(notdir ${SRCS:.f=.o}) OBJS += $(notdir ${SRCS90:.f90=.o}) OBJS += $(notdir ${SRCSFPP:.F=.o}) OBJS += $(notdir ${SRCS90FPP:.F90=.o}) OBJS += $(notdir ${SRCS95TMPL:.F95=.o}) -ifeq ($(tfQMRgpu),yes) - OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o - LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand -endif .PHONY: all all: $(PROGRAM) $(PROGRAM): $(OBJS) - $(FC90) $(FCFLAGS) -o $(PROGRAM) $(addprefix $(BUILDDIR)/,$(OBJS)) $(LDFLAGS) + $(LINKER) -o $(PROGRAM) $(addprefix $(BUILDDIR)/,$(OBJS)) $(LDFLAGS) %.o: %.f $(FC) $(FCFLAGS) $(EXTRA_FLAGS) -c $< -o $(BUILDDIR)/$@ diff --git a/source/KKRnano/source/NonCollinearMagnetism_mod.F90 b/source/KKRnano/source/NonCollinearMagnetism_mod.F90 index fea34a3c3..d1e75662f 100644 --- a/source/KKRnano/source/NonCollinearMagnetism_mod.F90 +++ b/source/KKRnano/source/NonCollinearMagnetism_mod.F90 @@ -1043,7 +1043,7 @@ END IF ! klo and khi now bracket the input value of x. h=xa(khi)-xa(klo) ! The xa's must be distinct. -IF (h == 0.d0) PAUSE 'bad xa input in splint' +IF (h == 0.d0) STOP 'bad xa input in splint' !! used to be PAUSE ! Cubic spline polynomial is now evaluated. a = (xa(khi)-x)/h b = (x-xa(klo))/h diff --git a/source/KKRnano/source/datastructures/ChebMeshData_mod.F90 b/source/KKRnano/source/datastructures/ChebMeshData_mod.F90 index 2ed1cdf35..95358e8fc 100644 --- a/source/KKRnano/source/datastructures/ChebMeshData_mod.F90 +++ b/source/KKRnano/source/datastructures/ChebMeshData_mod.F90 @@ -674,7 +674,7 @@ end if ! klo and khi now bracket the input value of x. h=xa(khi)-xa(klo) ! the xa's must be distinct. -if (h == 0.d0) pause 'bad xa input in splint' +if (h == 0.d0) STOP 'bad xa input in splint' !! used to be PAUSE ! cubic spline polynomial is now evaluated. a = (xa(khi)-x)/h b = (x-xa(klo))/h -- GitLab