From cd1f3be365119725b9b7845d1c13928e92261c10 Mon Sep 17 00:00:00 2001
From: Paul Baumeister <p.baumeister@fz-juelich.de>
Date: Mon, 29 Jul 2019 15:31:32 +0200
Subject: [PATCH] difficult to link, fails to run due to OpenMPI-error

---
 .../source/IterativeSolver/KKRmat_mod.F90     |  9 +++--
 source/KKRnano/source/Makefile                | 39 +++++++++++++------
 .../source/NonCollinearMagnetism_mod.F90      |  2 +-
 .../datastructures/ChebMeshData_mod.F90       |  2 +-
 4 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90 b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90
index 73a40cee9..fc05a235f 100644
--- a/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90
+++ b/source/KKRnano/source/IterativeSolver/KKRmat_mod.F90
@@ -220,7 +220,7 @@ module KKRmat_mod
     use IterativeSolver_mod, only: IterativeSolver, solve
     use DirectSolver_mod, only: DirectSolver, solve
 #ifdef  has_tfQMRgpu
-    ! use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration
+    use tfqmrgpu, only: tfqmrgpu_bsrsv_complete ! all-in-one GPU solver interface for rapid integration
 #endif
     use SparseMatrixDescription_mod, only: dump
     use InitialGuess_mod, only: InitialGuess, load, store
@@ -261,7 +261,6 @@ module KKRmat_mod
     double complex :: tracek  ! LLY
          
 #ifdef  has_tfQMRgpu
-    external :: tfqmrgpu_bsrsv_complete ! subroutine
     integer :: o = 6
     integer :: ierr = 1
     integer :: iterations, lda
@@ -393,9 +392,9 @@ module KKRmat_mod
       ! store the initial guess
       call store(iguess_data, op%mat_X, ik=ikpoint, is=ispin, ie=ienergy)
 
-#ifdef  has_tfQMRgpu
     case (5) ! GPU solver
       
+#ifdef  has_tfQMRgpu
       iterations = 1000
       residuum = 1e-7
       lda = size(op%mat_A, 1)
@@ -405,8 +404,10 @@ module KKRmat_mod
         op%bsr_X%RowStart, op%bsr_X%ColIndex, op%mat_X, 'n', & !! X (out)
         op%bsr_B%RowStart, op%bsr_B%ColIndex, op%mat_B, 'n', & !! B (in)
         iterations, residuum, o, ierr)
+#else
+      warn(6, "GPU solver needs -D has_tfQMRgpu (Problem is not solved) solver_type ="+solver_type)
 #endif
-      
+
     case default
       warn(6, "No solver selected! Problem is not solved, solver_type ="+solver_type)
     endselect ! solver_type
diff --git a/source/KKRnano/source/Makefile b/source/KKRnano/source/Makefile
index a87e05bc7..aa5958da0 100644
--- a/source/KKRnano/source/Makefile
+++ b/source/KKRnano/source/Makefile
@@ -7,19 +7,20 @@ TYPE ?= nodebug
 ### the user may specify SMP=openmp as a command line argument to make
 SMP ?= none
 
-### can we make use of the tfQMRgpu library
-tfQMRgpu ?= no
-
 PROGRAM = kkr.exe
 
 # Path to put object files and module files
-BUILDDIR = $(HOME)/build
+BUILDDIR = ./build
 
 EXTRA_FLAGS ?=
 
+### can we make use of the tfQMRgpu library?
+tfQMRgpu ?= no
 ifeq ($(tfQMRgpu),yes)
 	EXTRA_FLAGS += -D has_tfQMRgpu
+	CUDA_PATH=-L/usr/local/zam/CUDA/cuda-9.1/lib64
 endif
+
 FC90FLAGS = 
 FCFLAGS =
 PPFLAGS =
@@ -98,7 +99,10 @@ ifeq ($(PLATFORM),gfortran)
 	FC90FLAGS = -ffree-line-length-0
 	PPFLAGS += -D NOLOGGING
 	# FCFLAGS += -D TASKLOCAL_FILES ### breaks when TASKLOCAL_FILES is used
-	LDFLAGS += -L /usr/local/atlas/lib -llapack -lf77blas -lcblas -latlas
+# 	LDFLAGS += -L /usr/local/atlas/lib -llapack -latlas
+	LDFLAGS += -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 ###-lpthread
+# 	# 	LDFLAGS += -lgfortran
+	### -lf77blas -lcblas -latlas
 endif
 
 
@@ -377,23 +381,36 @@ SRCSFPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F))
 SRCS90FPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F90))
 SRCS95TMPL = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F95))
 
+OBJS=
+
+LINKER= $(FC90)
+ifeq ($(tfQMRgpu),yes)
+	OBJS += tfqmrgpu_Fortran.o
+	OBJS += tfqmrgpu_Fortran_wrappers.o
+	OBJS += tfqmrgpu.o
+	SRCS90FPP += GPU/tfqmrgpu_Fortran.F90
+# 	LDFLAGS += $(CUDA_PATH) -gfortran -lcudart -lnvToolsExt -lm -lcurand \
+# 	             -L/usr/lib64/gcc/x86_64-suse-linux/4.8 \
+# 	             -L/usr/lib -lstdc++
+	LDFLAGS += $(CUDA_PATH) -lcudart -lnvToolsExt -lm -lcurand
+	LDFLAGS += -L/usr/lib -lstdc++
+# # 	LINKER= gfortran
+endif
+
+
 # notdir extracts only filename
-OBJS =  $(notdir ${SRCS:.f=.o})
+OBJS += $(notdir ${SRCS:.f=.o})
 OBJS += $(notdir ${SRCS90:.f90=.o})
 OBJS += $(notdir ${SRCSFPP:.F=.o})
 OBJS += $(notdir ${SRCS90FPP:.F90=.o})
 OBJS += $(notdir ${SRCS95TMPL:.F95=.o})
 
-ifeq ($(tfQMRgpu),yes)
-	OBJS += tfqmrgpu_Fortran.o tfqmrgpu_Fortran_wrappers.o tfqmrgpu.o
-	LDFLAGS += -L/usr/local/cuda/lib64 -lcudart -lnvToolsExt -lm -lcurand
-endif
 
 .PHONY: all
 all:	$(PROGRAM)
 
 $(PROGRAM): $(OBJS)
-	$(FC90) $(FCFLAGS) -o $(PROGRAM) $(addprefix $(BUILDDIR)/,$(OBJS)) $(LDFLAGS) 
+	$(LINKER) -o $(PROGRAM) $(addprefix $(BUILDDIR)/,$(OBJS)) $(LDFLAGS) 
 
 %.o: %.f
 	$(FC) $(FCFLAGS) $(EXTRA_FLAGS) -c $< -o $(BUILDDIR)/$@
diff --git a/source/KKRnano/source/NonCollinearMagnetism_mod.F90 b/source/KKRnano/source/NonCollinearMagnetism_mod.F90
index fea34a3c3..d1e75662f 100644
--- a/source/KKRnano/source/NonCollinearMagnetism_mod.F90
+++ b/source/KKRnano/source/NonCollinearMagnetism_mod.F90
@@ -1043,7 +1043,7 @@ END IF
 ! klo and khi now bracket the input value of x.
 h=xa(khi)-xa(klo)
 ! The xa's must be distinct.
-IF (h == 0.d0) PAUSE 'bad xa input in splint'
+IF (h == 0.d0) STOP 'bad xa input in splint' !! used to be PAUSE
 ! Cubic spline polynomial is now evaluated.
 a = (xa(khi)-x)/h
 b = (x-xa(klo))/h
diff --git a/source/KKRnano/source/datastructures/ChebMeshData_mod.F90 b/source/KKRnano/source/datastructures/ChebMeshData_mod.F90
index 2ed1cdf35..95358e8fc 100644
--- a/source/KKRnano/source/datastructures/ChebMeshData_mod.F90
+++ b/source/KKRnano/source/datastructures/ChebMeshData_mod.F90
@@ -674,7 +674,7 @@ end if
 ! klo and khi now bracket the input value of x.
 h=xa(khi)-xa(klo)
 ! the xa's must be distinct.
-if (h == 0.d0) pause 'bad xa input in splint'
+if (h == 0.d0) STOP 'bad xa input in splint' !! used to be PAUSE
 ! cubic spline polynomial is now evaluated.
 a = (xa(khi)-x)/h
 b = (x-xa(klo))/h
-- 
GitLab