From d9a6c309d838694d4763c3e2af24903072bb29b6 Mon Sep 17 00:00:00 2001 From: "Paul F. Baumeister" <p.baumeister@fz-juelich.de> Date: Thu, 9 Sep 2021 13:30:29 +0200 Subject: [PATCH] Prepare KKRnano Makefile for tfQMRgpu library mkdir -p build make tfQMRgpu SMP=openmp tfQMRgpu=yes make SMP=openmp tfQMRgpu=yes -j --- source/KKRnano/source/Makefile | 35 +++++++++++-------- .../source/ScatteringCalculation_mod.F90 | 13 ++++--- .../source/parallel/two_sided_comm_mod.F95 | 13 ++++--- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/source/KKRnano/source/Makefile b/source/KKRnano/source/Makefile index b552c82cc..cfb93cf13 100644 --- a/source/KKRnano/source/Makefile +++ b/source/KKRnano/source/Makefile @@ -18,7 +18,7 @@ EXTRA_FLAGS ?= tfQMRgpu ?= no ifeq ($(tfQMRgpu),yes) EXTRA_FLAGS += -D has_tfQMRgpu - TFQMRGPU_PATH = $(HOME)/tfQMRgpu + TFQMRGPU_PATH = $(HOME)/tfQMRgpu endif @@ -318,7 +318,7 @@ PPFLAGS = -WF,-DUSE_VOROWEIGHTS -WF,-DCOMPUTE_tref_LOCALLY -WF,-DNOLOGGING ifeq ($(SMP),openmp) FCFLAGS += -qsmp=omp - PPFLAGS += -WF,-DCPP_hybrid + PPFLAGS += -WF,-DCPP_hybrid endif #ESSL @@ -328,16 +328,16 @@ endif LDFLAGS = -L/bgsys/local/lib -lesslsmpbg -L/opt/ibmcmp/xlsmp/3.1/bglib64 -lxlsmp -L/bgsys/local/fftw3/3.3.2/fftw_g/lib/ -lfftw3 ifeq ($(TYPE),debug) - FCFLAGS += -q64 -O0 -qstrict -g -qnosave -C -qinitauto=7FF7FFFF -WF,-DDEBUG1 + FCFLAGS += -q64 -O0 -qstrict -g -qnosave -C -qinitauto=7FF7FFFF -WF,-DDEBUG1 else ifeq ($(TYPE),scorep) - FC = scorep mpixlf77_r - FC90 = scorep mpixlf90_r - FCFLAGS += -q64 -O3 -qstrict + FC = scorep mpixlf77_r + FC90 = scorep mpixlf90_r + FCFLAGS += -q64 -O3 -qstrict else ifeq ($(TYPE),voronoi_mesh) - FCFLAGS += -q64 -O3 -qstrict - PPFLAGS += -WF,-DUSE_OLD_MESH + FCFLAGS += -q64 -O3 -qstrict + PPFLAGS += -WF,-DUSE_OLD_MESH else - FCFLAGS += -q64 -O3 -qstrict + FCFLAGS += -q64 -O3 -qstrict endif #ifeq ($(USETOOL),scalasca) @@ -380,14 +380,16 @@ SRCS90 = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.f90)) SRCSFPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F)) SRCS90FPP = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F90)) SRCS95TMPL = $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.F95)) + + +OBJS= + ifeq ($(tfQMRgpu),yes) - SRCS90FPP += $(TFQMRGPU_PATH)/tfqmrgpu_Fortran_example.F90 LDFLAGS += -L$(TFQMRGPU_PATH)/lib64 -ltfQMRgpu_Fortran -ltfQMRgpu + OBJS += tfQMRgpu_mod.o endif -OBJS= - LINKER= $(FC90) @@ -418,8 +420,10 @@ $(PROGRAM): $(OBJS) $(FC90) $(FCFLAGS) $(EXTRA_FLAGS) $(FC90FLAGS) $(PPFLAGS) -c $< -o $(BUILDDIR)/$@ ifeq ($(tfQMRgpu),yes) -# tfqmrgpu_Fortran_example.o: $(TFQMRGPU_PATH)/example/tfqmrgpu_Fortran_example.F90 -# $(FC90) $(FCFLAGS) $(EXTRA_FLAGS) $(FC90FLAGS) $(PPFLAGS) -c $< -o $(BUILDDIR)/$@ +# add a target tfQMRgpu +tfQMRgpu: $(TFQMRGPU_PATH)/example/tfqmrgpu_Fortran_example.F90 + $(FC90) -I $(TFQMRGPU_PATH)/tfQMRgpu/include $(FCFLAGS) $(EXTRA_FLAGS) \ + $(FC90FLAGS) $(PPFLAGS) -c $< -o $(BUILDDIR)/tfQMRgpu_mod.o endif @@ -450,7 +454,8 @@ depend: makedepend ifeq ($(tfQMRgpu),yes) - KKRmat_mod.o: tfqmrgpu_Fortran_example.o + # manually add a dependency + KKRmat_mod.o: tfQMRgpu_mod.o endif diff --git a/source/KKRnano/source/ScatteringCalculation_mod.F90 b/source/KKRnano/source/ScatteringCalculation_mod.F90 index 2250861b5..dbdf267b4 100644 --- a/source/KKRnano/source/ScatteringCalculation_mod.F90 +++ b/source/KKRnano/source/ScatteringCalculation_mod.F90 @@ -107,7 +107,7 @@ implicit none integer :: omp_threads !DEBUGGING logical :: xccpl double precision :: rMTref - double precision, allocatable :: rMTs(:) + double precision, allocatable :: rMTs(:,:), rMTrefs(:,:) double complex, allocatable :: tmatLL(:,:,:,:) !< all t-matrices inside the truncation zone double complex, allocatable :: GmatN_buffer(:,:,:) !< GmatN for all local atoms @@ -187,9 +187,12 @@ implicit none enddo endif !--------------------------------------------------------- - allocate(rMTs(calc%trunc_zone%naez_trc)) - call distribute(calc%xTable, 1, calc%atomdata_a(:)%rMTref, rMTs) ! communicate the Muffin-Tin radii within the truncation zone - + allocate(rMTs(1,calc%trunc_zone%naez_trc), rMTrefs(1,num_local_atoms)) + rMTrefs(1,:) = calc%atomdata_a(:)%rMTref + ! communicate the Muffin-Tin radii within the truncation zone + call distribute(calc%xTable, 1, rMTrefs, rMTs) + deallocate(rMTrefs, stat=ist) ! ignore status + ! IE ==================================================================== ! BEGIN do loop over energies (EMPID-parallel) ! IE ==================================================================== @@ -212,7 +215,7 @@ implicit none do iacls = 1, calc%ref_cluster_a(ila)%nacls ! this calls tref several times with the same parameters if the local atoms are close to each other ! rMTref = kkr(ila)%rMTref(iacls) ! possible if it has been communicated earlier - rMTref = rMTs(calc%trunc_zone%trunc_atom_idx(calc%ref_cluster_a(ila)%atom(iacls))) + rMTref = rMTs(1,calc%trunc_zone%trunc_atom_idx(calc%ref_cluster_a(ila)%atom(iacls))) call tref(emesh%EZ(IE), params%vref, dims%lmaxd, rMTref, & kkr(ila)%Tref_ell(:,iacls), kkr(ila)%dTref_ell(:,iacls), derive=(dims%Lly > 0)) !if (dims%korbit == 1) then ! NOCO diff --git a/source/KKRnano/source/parallel/two_sided_comm_mod.F95 b/source/KKRnano/source/parallel/two_sided_comm_mod.F95 index 33e4c7a94..2f57417df 100644 --- a/source/KKRnano/source/parallel/two_sided_comm_mod.F95 +++ b/source/KKRnano/source/parallel/two_sided_comm_mod.F95 @@ -40,6 +40,8 @@ !> to replace a missing template feature in Fortran. !> Do not use more than one name with _TYPE per line! +! #define DEBUG + #define NUMBERZ double complex #define NUMBERMPIZ MPI_DOUBLE_COMPLEX #define NUMBERC complex @@ -84,11 +86,12 @@ module two_sided_comm_TYPE_mod include 'mpif.h' ! only: MPI_STATUS_SIZE, MPI_INTEGER, MPI_REQUEST_NULL assert( self%comm /= 0 ) - + call MPI_Comm_rank(self%comm, myrank, ierr) allocate(sreq(self%send_n), sstats(MPI_STATUS_SIZE,self%send_n), & - rreq(self%recv_n), rstats(MPI_STATUS_SIZE,self%recv_n), stat=ist) ! ToDo: catch status + rreq(self%recv_n), rstats(MPI_STATUS_SIZE,self%recv_n), stat=ist) + if (ist /= 0) call MPI_Abort(self%comm, ist, ierr) sreq(:) = MPI_REQUEST_NULL rreq(:) = MPI_REQUEST_NULL @@ -103,19 +106,19 @@ module two_sided_comm_TYPE_mod do inz = self%send_start(ipair), self%send_start(ipair + 1) - 1 tag = inz - self%send_start(ipair) iinp = self%send_index(inz) - call MPI_Isend(Ginp(:,iinp), ncount, NUMBERMPI_TYPE, rank, tag, self%comm, sreq(inz), ierr) #ifdef DEBUG write(*, '(9(a,i0))') "send local _TYPE-element ",iinp,"@",myrank," with tag ",tag," to rank ",rank #endif + call MPI_Isend(Ginp(:,iinp), ncount, NUMBERMPI_TYPE, rank, tag, self%comm, sreq(inz), ierr) enddo ! inz do inz = self%recv_start(ipair), self%recv_start(ipair + 1) - 1 tag = inz - self%recv_start(ipair) iout = self%recv_index(inz) - call MPI_Irecv(Gout(:,iout), ncount, NUMBERMPI_TYPE, rank, tag, self%comm, rreq(inz), ierr) #ifdef DEBUG write(*, '(9(a,i0))') "I (rank ",myrank,") want to receive a _TYPE-element with tag ",tag," from rank ",rank #endif + call MPI_Irecv(Gout(:,iout), ncount, NUMBERMPI_TYPE, rank, tag, self%comm, rreq(inz), ierr) enddo ! inz else ! rank /= myrank @@ -132,7 +135,7 @@ module two_sided_comm_TYPE_mod write(*, '(9(a,i0))') "copy local _TYPE-element ",iinp,"@",myrank," locally" #endif enddo ! inz - + endif ! rank /= myrank enddo ! ipair -- GitLab