Add ENABLE_PATCH_INTEL to use MKL on AMD hardware

07b5b5db · Philipp Rüssmann · 92751da2 · 07b5b5db · 07b5b5db · 07b5b5db
Commit 07b5b5db authored 4 years ago by Philipp Rüssmann
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,7 @@ option(ENABLE_OPTRPT "Generate optimization reports (*.optrpt files) at compilat
 option(OPTFLAGS_xHOST "Turn on xHost optimization flag for release build" ON)
 option(OPTFLAGS_AVX512 "Use AVX512 instruction set for release build" OFF)
 option(OPTFLAGS_AMD "FLAGS for AMD64 node on iffslurm (see compiler_config.txt)" OFF)
+option(ENABLE_PATCH_INTEL "Use patch to make MKL think it works on intel hardware (EXPERIMENTAL!!!)" OFF)
 option(ENABLE_MPI "Enable MPI parallelization" ON)
 option(ENABLE_OMP "Enable OpenMP parallelization" OFF)
 option(ENABLE_OMP_EVERYWHERE "Add openmp compile flags everywhere" OFF)

--- a/cmake/compiler_config.txt
+++ b/cmake/compiler_config.txt
@@ -48,12 +48,25 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES Intel)

    # release flags
    if(OPTFLAGS_xHOST)
+
      set(CMAKE_Fortran_FLAGS_RELEASE "-O2 -ip -xHOST")
+
    elseif(OPTFLAGS_AMD)
+
      set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer")
+      if(ENABLE_PATCH_INTEL)
+        set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wl,--allow-multiple-definition -static-intel -mkl")
+        #set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Wl,--allow-multiple-definition -static-intel -mkl -assume byterecl")
+        #set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O3 -march=core-avx2 -mtune=core-avx2 -g")
+        set(complib "mklpatch ${complib}")
+      endif()
+
    elseif(OPTFLAGS_AVX512)
+
      set(CMAKE_Fortran_FLAGS_RELEASE "-O2 -ip -xCORE-AVX512")
+
    endif()
+
    if(NOT COMPILE_KKRIMP)
        set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -r8")
    endif()
@@ -158,6 +171,7 @@ if(USE_LIBFLAME)
        message(FATAL_ERROR "FLAME_PATH not set. Please specify the location of the FLAME library with -FLAME_PATH=<path-to-libflame.a>")
    endif()
    set(complib "libflame")
+    set(complib "${complib} libflame")
    message("Use BLIS: ${BLAS_LIBRARIES}")
    message("Use LAPACK: ${LAPACK_LIBRARIES}")
    # we need to make sure that these are linked with the openmp flags since on the cluster they are compiled like that
@@ -166,4 +180,4 @@ if(USE_LIBFLAME)
    elseif(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
        set(LAPACK_LIBRARIES "-fopenmp ${LAPACK_LIBRARIES}")
    endif()
-endif()
+endif()
\ No newline at end of file
--- a/cmake/source_list_KKRhost.txt
+++ b/cmake/source_list_KKRhost.txt
@@ -109,8 +109,23 @@ add_library(lib_common STATIC
    source/common/wronskian.f90
    source/common/mathtools.f90
 )
-# disable cmake auto add pf 'lib' prefix to .so file
+# disable cmake auto add of 'lib' prefix to .so file
 SET_TARGET_PROPERTIES(lib_common PROPERTIES PREFIX "")
+
+
+if(ENABLE_PATCH_INTEL)
+    # disable cmake auto add of 'lib' prefix to .so file
+    add_library(lib_patch_intel STATIC
+        source/common/patch_intel.c
+        source/common/patch_intel.f90
+    )
+    SET_TARGET_PROPERTIES(lib_patch_intel PROPERTIES PREFIX "")
+    # activate preprcessor flag to call patch_intel subroutine
+    add_definitions(-DCPP_PATCH_INTEL)
+    target_link_libraries(lib_common lib_patch_intel)
+endif()
+
+
 add_library(lib_kkrhost STATIC
    source/KKRhost/addvirtual14.f90
    source/KKRhost/amemagvec.f90
@@ -373,12 +388,9 @@ add_library(lib_kkrhost STATIC
    source/KKRhost/wunfiles.F90
    source/KKRhost/ylag.f90
 )
-# disable cmake auto add pf 'lib' prefix to .so file
+# disable cmake auto add of 'lib' prefix to .so file
 SET_TARGET_PROPERTIES(lib_kkrhost PROPERTIES PREFIX "")
 target_link_libraries(lib_kkrhost lib_common)
-#target_link_libraries(lib_common lib_npy_for_fortran)
-#target_link_libraries(lib_kkrhost lib_npy_for_fortran)
-


 # the executable is built from this list of files

--- a/source/KKRhost/main_all.F90
+++ b/source/KKRhost/main_all.F90
@@ -4,6 +4,7 @@
 ! of the MIT license as expressed in the LICENSE.md file in more detail.                  !
 !-----------------------------------------------------------------------------------------!

+
 ! -----------------------------------------------------------------------------------
 !> Summary: Main program for the JM-KKR
 !> Author: Philipp Ruessmann, Bernd Zimmermann, Phivos Mavropoulos, R. Zeller, and many others ...
@@ -59,7 +60,9 @@ program kkrcode
    rc, rcls, rclsimp, refpot, righttinvll, rmesh, rmtnew, rmtrefat, rnew, rpan_intervall, rr, rrel, rrot, rs, rws, s, rmt, rmtref, &
    socscale, socscl, srrel, thesme, thetas, thetasnew, tleft, tright, rmrel, uldau, vins, visp, vref, vtrel, wez, wg, wldau, &
    yrg, zat, zrel, ueff
-
+#ifdef CPP_PATCH_INTEL
+  use mod_patch_intel, only: patch_intel
+#endif

  implicit none

@@ -90,6 +93,16 @@ program kkrcode
  ! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< initialize MPI !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

+
+
+#ifdef CPP_PATCH_INTEL
+  ! this makes the MKL think it works on intel hardware even if it runs on AMD
+  ! seems to give better performance than unpatched MKL or BLIS+LIBFLAME
+  call patch_intel()
+#endif
+
+
+
  ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! start KKR with main0 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
  ! >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

--- a/source/common/patch_intel.c
+++ b/source/common/patch_intel.c
+/* Code from Agner Fog to improve performance of INTEL compiler/libraries on AMD */
+
+
+
+/***********************  intel_cpu_feature_patch.c  **************************
+* Author:           Agner Fog
+* Date created:     2014-07-30
+* Last modified:    2019-12-29
+* Source URL:       https://www.agner.org/optimize/intel_dispatch_patch.zip
+* Language:         C or C++
+*
+* Description:
+* Patch for Intel compiler version 13.0 and later, including the general
+* libraries, LIBM and SVML, but not MKL and VML.
+*
+* Example of how to patch Intel's CPU feature dispatcher in order to improve
+* compatibility of generated code with non-Intel processors.
+* In Windows: Use the static link libraries (*.lib), not the dynamic link
+* librarise (*.DLL).
+* In Linux and Mac: use static linking (*.a) or dynamic linking (*.so).
+*
+* Include this code in your C or C++ program and call intel_cpu_patch();
+* before any call to the library functions.
+*
+* Copyright (c) 2014-2019. BSD License 2.0
+******************************************************************************/
+#include <stdint.h>
+
+#ifdef __cplusplus  // use C-style linking
+extern "C" {
+#endif
+
+// link to Intel libraries
+extern int64_t __intel_cpu_feature_indicator;    // CPU feature bits
+extern int64_t __intel_cpu_feature_indicator_x;  // CPU feature bits
+void __intel_cpu_features_init();                // unfair dispatcher: checks CPU features for Intel CPU's only
+void __intel_cpu_features_init_x();              // fair dispatcher: checks CPU features without discriminating by CPU brand
+
+#ifdef __cplusplus
+}  // end of extern "C"
+#endif
+
+void intel_cpu_patch() {
+    // force a re-evaluation of the CPU features without discriminating by CPU brand
+    __intel_cpu_feature_indicator = 0;
+    __intel_cpu_feature_indicator_x = 0;
+    __intel_cpu_features_init_x();
+    __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x;
+}
+
+/***********************  intel_mkl_cpuid_patch.c  **************************
+* Author:           Agner Fog
+* Date created:     2019-12-29
+* Source URL:       https://www.agner.org/optimize/intel_dispatch_patch.zip
+* Language:         C or C++
+*
+* Description:
+* Patch for Intel Math Kernel Library (MKL) version 14.0 and later, except
+* the Vector Math Library (VML).
+*
+* Example of how to override Intel's CPU feature dispatcher in order to improve
+* compatibility of Intel function libraries with non-Intel processors.
+*
+* Include this code in your C or C++ program and make sure it is linked before
+* any Intel libraries. You may need to include intel_mkl_feature_patch.c as well.
+*
+* Copyright (c) 2019. BSD License 2.0
+******************************************************************************/
+//#include <stdint.h>
+#ifdef __cplusplus  // use C-style linking
+extern "C" {
+#endif
+/*
+    // detect if Intel CPU
+    int mkl_serv_intel_cpu() {
+        return 1;
+    }
+
+    // detect if Intel CPU
+    int mkl_serv_intel_cpu_true() {
+        return 1;
+    }
+
+
+    int mkl_serv_cpuhaspnr_true() {
+        return 0;
+    }
+
+    int mkl_serv_cpuhaspnr() {
+        return 0;
+    }
+
+    int mkl_serv_cpuhasnhm() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisbulldozer() {
+        return 0;
+    }
+
+    int mkl_serv_cpuiszen() {
+        return 1;
+    }
+
+    int mkl_serv_cpuisatomsse4_2() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisatomssse3() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisitbarcelona() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisskl() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisknm() {
+        return 0;
+    }
+
+    int mkl_serv_cpuisclx() {
+        return 0;
+    }
+
+    int mkl_serv_get_microarchitecture() {
+        // I don't know what this number means
+        return 33;
+    }
+   int mkl_serv_cpuisclx() {
+        return 1;
+    }
+    int mkl_serv_cpuiszen() {
+        return 1;
+    }
+*/
+    int mkl_serv_intel_cpu() {
+        return 1;
+    }
+
+    int mkl_serv_intel_cpu_true() {
+        return 1;
+    }
+
+#ifdef __cplusplus
+}  // end of extern "C"
+#endif
+/***********************  intel_mkl_feature_patch.c  **************************
+* Author:           Agner Fog
+* Date created:     2014-07-30
+* Last modified:    2019-12-29
+* Source URL:       https://www.agner.org/optimize/intel_dispatch_patch.zip
+* Language:         C or C++
+*
+* Description:
+* Patch for Intel Math Kernel Library (MKL) version 14.0 and later, except
+* the Vector Math Library (VML).
+*
+* Example of how to patch Intel's CPU feature dispatcher in order to improve
+* compatibility of Intel function libraries with non-Intel processors.
+* In Windows: Use the static link libraries (*.lib), not the dynamic link
+* librarise (*.DLL).
+* In Linux and Mac: use static linking (*.a) or dynamic linking (*.so).
+*
+* Include this code in your C or C++ program and call intel_mkl_patch();
+* before any call to the MKL functions. You may need to include
+* intel_mkl_cpuid_patch.c as well.
+*
+* Copyright (c) 2014-2019. BSD License 2.0
+******************************************************************************/
+//#include <stdint.h>
+
+#ifdef __cplusplus  // use C-style linking
+extern "C" {
+#endif
+
+// link to MKL libraries
+extern int64_t __intel_mkl_feature_indicator;       // CPU feature bits
+extern int64_t __intel_mkl_feature_indicator_x;     // CPU feature bits
+void __intel_mkl_features_init();                   // unfair dispatcher: checks CPU features for Intel CPU's only
+void __intel_mkl_features_init_x();                 // fair dispatcher: checks CPU features without discriminating by CPU brand
+
+#ifdef __cplusplus
+}  // end of extern "C"
+#endif
+
+void intel_mkl_patch() {
+    // force a re-evaluation of the CPU features without discriminating by CPU brand
+    __intel_mkl_feature_indicator = 0;
+    __intel_mkl_feature_indicator_x = 0;
+    __intel_mkl_features_init_x();
+    __intel_mkl_feature_indicator = __intel_mkl_feature_indicator_x;
+}
--- a/source/common/patch_intel.f90
+++ b/source/common/patch_intel.f90
+
+!-----------------------------------------------------------------------------------------!
+! Copyright (c) 2020 Peter Grünberg Institut, Forschungszentrum Jülich, Germany           !
+! This file is part of Jülich KKR code and available as free software under the conditions!
+! of the MIT license as expressed in the LICENSE.md file in more detail.                  !
+!-----------------------------------------------------------------------------------------!
+
+module mod_patch_intel
+
+  private
+  public :: patch_intel
+
+
+  !-------------------------------------------------------------------------------
+  !> Summary: interface to patch_intel.c which make mkl believe it works on a intel CPU
+  !> Author: 
+  !> Category: 
+  !> Deprecated: False 
+  !> taken from fleur code, seems to give better performance on AMD hardware
+  !> than unpatched MKL or AMD's BLIS+FLAME libraries.
+  !-------------------------------------------------------------------------------
+contains
+
+  subroutine patch_intel()
+    !we try to patch the intel libraries to overwrite determination of 'INTEL' brand
+    !otherwise performance on AMD CPUs is bad.
+    INTERFACE
+      subroutine mkl_patch() BIND(C, name="intel_mkl_patch")
+      END subroutine
+    END INTERFACE
+    INTERFACE
+      subroutine cpu_patch() BIND(C, name="intel_cpu_patch")
+      END subroutine
+    END INTERFACE
+
+    print *,"INTEL PATCH applied"
+
+    call cpu_patch()
+    call mkl_patch()
+
+  end subroutine patch_intel
+
+end module mod_patch_intel
\ No newline at end of file