fftw: enable optimizations unconditionally and build with mtune=generic

This commit is contained in:
Tungsten842 2023-03-13 12:31:32 +01:00
parent 5ad547c708
commit 23f23a89b3

View file

@ -5,10 +5,6 @@
, perl , perl
, llvmPackages , llvmPackages
, precision ? "double" , precision ? "double"
, enableAvx ? stdenv.hostPlatform.avxSupport
, enableAvx2 ? stdenv.hostPlatform.avx2Support
, enableAvx512 ? stdenv.hostPlatform.avx512Support
, enableFma ? stdenv.hostPlatform.fmaSupport
, enableMpi ? false , enableMpi ? false
, mpi , mpi
, withDoc ? stdenv.cc.isGNU , withDoc ? stdenv.cc.isGNU
@ -40,22 +36,25 @@ stdenv.mkDerivation (finalAttrs: {
llvmPackages.openmp llvmPackages.openmp
] ++ lib.optional enableMpi mpi; ] ++ lib.optional enableMpi mpi;
configureFlags = configureFlags = [
[ "--enable-shared" "--enable-shared"
"--enable-threads" "--enable-threads"
] "--enable-openmp"
++ lib.optional (precision != "double") "--enable-${precision}" ]
# all x86_64 have sse2
# however, not all float sizes fit ++ lib.optional (precision != "double") "--enable-${precision}"
++ lib.optional (stdenv.isx86_64 && (precision == "single" || precision == "double") ) "--enable-sse2" # https://www.fftw.org/fftw3_doc/SIMD-alignment-and-fftw_005fmalloc.html
++ lib.optional enableAvx "--enable-avx" # FFTW will try to detect at runtime whether the CPU supports these extensions
++ lib.optional enableAvx2 "--enable-avx2" ++ lib.optional (stdenv.isx86_64 && (precision == "single" || precision == "double"))
++ lib.optional enableAvx512 "--enable-avx512" "--enable-sse2 --enable-avx --enable-avx2 --enable-avx512 --enable-avx128-fma"
++ lib.optional enableFma "--enable-fma" ++ lib.optional enableMpi "--enable-mpi"
++ [ "--enable-openmp" ] # doc generation causes Fortran wrapper generation which hard-codes gcc
++ lib.optional enableMpi "--enable-mpi" ++ lib.optional (!withDoc) "--disable-doc";
# doc generation causes Fortran wrapper generation which hard-codes gcc
++ lib.optional (!withDoc) "--disable-doc"; # fftw builds with -mtune=native by default
postPatch = ''
substituteInPlace configure --replace "-mtune=native" "-mtune=generic"
'';
enableParallelBuilding = true; enableParallelBuilding = true;