python3Packages.tensorflow: fix `GLIBCXX_3.4.30' not found

Make tensorflow (and a bunch of ther things) use CUDA-compatible
toolchain. Introduces cudaPackages.backendStdenv
This commit is contained in:
Someone Serge 2023-02-27 16:28:07 +02:00
parent d378cc6fb2
commit 5f4bdbe6c3
No known key found for this signature in database
GPG key ID: 7B0E3B1390D61DA4
8 changed files with 88 additions and 69 deletions

View file

@ -11,7 +11,7 @@ args@
, fetchurl
, fontconfig
, freetype
, gcc
, gcc # :: String
, gdk-pixbuf
, glib
, glibc
@ -22,13 +22,13 @@ args@
, perl
, python3
, requireFile
, stdenv
, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
, unixODBC
, xorg
, zlib
}:
stdenv.mkDerivation rec {
backendStdenv.mkDerivation rec {
pname = "cudatoolkit";
inherit version runPatches;
@ -146,37 +146,24 @@ stdenv.mkDerivation rec {
# Fix builds with newer glibc version
sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
# Ensure that cmake can find CUDA.
'' +
# Point NVCC at a compatible compiler
# FIXME: redist cuda_nvcc copy-pastes this code
# Refer to comments in the overrides for cuda_nvcc for explanation
# CUDA_TOOLKIT_ROOT_DIR is legacy,
# Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
''
mkdir -p $out/nix-support
echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook
# Set the host compiler to be used by nvcc.
# FIXME: redist cuda_nvcc copy-pastes this code
# For CMake-based projects:
# https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
# https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
# https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
# For non-CMake projects:
# FIXME: results in "incompatible redefinition" warnings ...but we keep
# both this and cmake variables until we come up with a more general
# solution
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
cat <<EOF >> $out/nix-support/setup-hook
cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${gcc}/bin'
cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
if [ -z "\''${CUDAHOSTCXX-}" ]; then
export CUDAHOSTCXX=${gcc}/bin;
export CUDAHOSTCXX=${backendStdenv.cc}/bin;
fi
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${gcc}/bin'
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
EOF
# Move some libraries to the lib output so that programs that
# depend on them don't pull in this entire monstrosity.
mkdir -p $lib/lib
@ -212,11 +199,10 @@ stdenv.mkDerivation rec {
# The path to libstdc++ and such
#
# NB:
# 1. "gcc" (gcc-wrapper) here is what's exposed as cudaPackages.cudatoolkit.cc
# 2. "gcc.cc" is the unwrapped gcc
# 3. "gcc.cc.lib" is one of its outputs
"${gcc.cc.lib}/lib64"
# `backendStdenv` is the cuda-compatible toolchain that we pick in
# extension.nix; we hand it to NVCC to use as a back-end, and we link
# cudatoolkit's binaries against its libstdc++
"${backendStdenv.cc.cc.lib}/lib64"
"$out/jre/lib/amd64/jli"
"$out/lib64"
@ -286,7 +272,7 @@ stdenv.mkDerivation rec {
popd
'';
passthru = {
cc = gcc;
cc = backendStdenv.cc;
majorMinorVersion = lib.versions.majorMinor version;
majorVersion = lib.versions.majorMinor version;
};

View file

@ -7,11 +7,24 @@ final: prev: let
# Version info for the classic cudatoolkit packages that contain everything that is in redist.
cudatoolkitVersions = final.lib.importTOML ./versions.toml;
finalVersion = cudatoolkitVersions.${final.cudaVersion};
# Exposed as cudaPackages.backendStdenv.
# We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc.
# Instead, it's the back-end toolchain for nvcc to use.
# We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib)
# Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv";
### Add classic cudatoolkit package
cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
cudatoolkit = buildCudaToolkitPackage (finalVersion // { inherit backendStdenv; });
cudaFlags = final.callPackage ./flags.nix {};
in {
inherit cudatoolkit cudaFlags;
in
{
inherit
backendStdenv
cudatoolkit
cudaFlags;
}

View file

@ -1,5 +1,5 @@
{ lib
, stdenv
, backendStdenv
, fetchurl
, autoPatchelfHook
, autoAddOpenGLRunpathHook
@ -11,7 +11,7 @@ attrs:
let
arch = "linux-x86_64";
in
stdenv.mkDerivation {
backendStdenv.mkDerivation {
inherit pname;
inherit (attrs) version;
@ -33,11 +33,8 @@ stdenv.mkDerivation {
# autoPatchelfHook will search for a libstdc++ and we're giving it a
# "compatible" libstdc++ from the same toolchain that NVCC uses.
#
# E.g. it might happen that stdenv=gcc12Stdenv, but we build against cuda11
# that only "supports" gcc11. Linking against gcc12's libraries we might
# sometimes actually sometimes encounter dynamic linkage errors at runtime
# NB: We don't actually know if this is the right thing to do
cudatoolkit.cc.cc.lib
backendStdenv.cc.cc.lib
];
dontBuild = true;
@ -51,7 +48,7 @@ stdenv.mkDerivation {
runHook postInstall
'';
passthru.stdenv = stdenv;
passthru.stdenv = backendStdenv;
meta = {
description = attrs.name;

View file

@ -24,7 +24,7 @@ in
cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs:
let
inherit (prev.cudatoolkit) cc;
inherit (prev.backendStdenv) cc;
in
{
# Point NVCC at a compatible compiler
@ -44,7 +44,6 @@ in
postInstall = (oldAttrs.postInstall or "") + ''
mkdir -p $out/nix-support
cat <<EOF >> $out/nix-support/setup-hook
cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin'
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin'
if [ -z "\''${CUDAHOSTCXX-}" ]; then

View file

@ -1,11 +1,11 @@
{
stdenv,
backendStdenv,
lib,
zlib,
useCudatoolkitRunfile ? false,
cudaVersion,
cudaMajorVersion,
cudatoolkit, # if cuda>=11: only used for .cc
cudatoolkit, # For cuda < 11
libcublas ? null, # cuda <11 doesn't ship redist packages
autoPatchelfHook,
autoAddOpenGLRunpathHook,
@ -26,7 +26,7 @@
maxCudaVersion,
}:
assert useCudatoolkitRunfile || (libcublas != null); let
inherit (cudatoolkit) cc;
inherit (backendStdenv) cc;
inherit (lib) lists strings trivial versions;
# majorMinorPatch :: String -> String
@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let
then cudatoolkit
else libcublas;
in
stdenv.mkDerivation {
backendStdenv.mkDerivation {
pname = "cudatoolkit-${cudaMajorVersion}-cudnn";
version = versionTriple;

View file

@ -1,5 +1,5 @@
{ lib
, stdenv
, backendStdenv
, requireFile
, autoPatchelfHook
, autoAddOpenGLRunpathHook
@ -18,7 +18,7 @@
assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
"This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})";
stdenv.mkDerivation rec {
backendStdenv.mkDerivation rec {
pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt";
version = fullVersion;
src = requireFile rec {
@ -45,7 +45,7 @@ stdenv.mkDerivation rec {
# Used by autoPatchelfHook
buildInputs = [
cudatoolkit.cc.cc.lib # libstdc++
backendStdenv.cc.cc.lib # libstdc++
cudatoolkit
cudnn
];
@ -74,6 +74,8 @@ stdenv.mkDerivation rec {
"$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}"
'';
passthru.stdenv = backendStdenv;
meta = with lib; {
# Check that the cudatoolkit version satisfies our min/max constraints (both
# inclusive). We mark the package as broken if it fails to satisfies the

View file

@ -32,6 +32,26 @@
}:
let
originalStdenv = stdenv;
in
let
# Tensorflow looks at many toolchain-related variables which may diverge.
#
# Toolchain for cuda-enabled builds.
# We want to achieve two things:
# 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11)
# 2. Normal C++ files should be compiled with the same toolchain,
# to avoid potential weird dynamic linkage errors at runtime.
# This may not be necessary though
#
# Toolchain for Darwin:
# clang 7 fails to emit a symbol for
# __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
# translation units, so the build fails at link time
stdenv =
if cudaSupport then cudaPackages.backendStdenv
else if originalStdenv.isDarwin then llvmPackages_11.stdenv
else originalStdenv;
inherit (cudaPackages) cudatoolkit cudnn nccl;
in
@ -44,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport);
let
withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
# FIXME: migrate to redist cudaPackages
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-merged";
paths = [
@ -56,10 +77,13 @@ let
];
};
# Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
# The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX,
# but that path must contain cc as well, so we merge them
cudatoolkit_cc_joined = symlinkJoin {
name = "${cudatoolkit.cc.name}-merged";
name = "${stdenv.cc.name}-merged";
paths = [
cudatoolkit.cc
stdenv.cc
binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip
];
};
@ -175,12 +199,7 @@ let
'';
}) else _bazel-build;
_bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin {
# clang 7 fails to emit a symbol for
# __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
# translation units, so the build fails at link time
stdenv = llvmPackages_11.stdenv;
})) {
_bazel-build = buildBazelPackage.override { inherit stdenv; } {
name = "${pname}-${version}";
bazel = bazel_5;
@ -211,12 +230,13 @@ let
flatbuffers-core
giflib
grpc
icu
# Necessary to fix the "`GLIBCXX_3.4.30' not found" error
(icu.override { inherit stdenv; })
jsoncpp
libjpeg_turbo
libpng
lmdb-core
pybind11
(pybind11.overridePythonAttrs (_: { inherit stdenv; }))
snappy
sqlite
] ++ lib.optionals cudaSupport [
@ -301,10 +321,12 @@ let
TF_NEED_CUDA = tfFeature cudaSupport;
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
# Needed even when we override stdenv: e.g. for ar
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc";
postPatch = ''
# bazel 3.3 should work just as well as bazel 3.1
rm -f .bazelversion

View file

@ -1,4 +1,4 @@
{ lib, stdenv, fetchFromGitHub
{ lib, backendStdenv, fetchFromGitHub
, cmake, addOpenGLRunpath
, cudatoolkit
, cutensor
@ -35,13 +35,13 @@ let
in
{
cublas = stdenv.mkDerivation (commonAttrs // {
cublas = backendStdenv.mkDerivation (commonAttrs // {
pname = "cuda-library-samples-cublas";
src = "${src}/cuBLASLt";
});
cusolver = stdenv.mkDerivation (commonAttrs // {
cusolver = backendStdenv.mkDerivation (commonAttrs // {
pname = "cuda-library-samples-cusolver";
src = "${src}/cuSOLVER";
@ -49,7 +49,7 @@ in
sourceRoot = "cuSOLVER/gesv";
});
cutensor = stdenv.mkDerivation (commonAttrs // {
cutensor = backendStdenv.mkDerivation (commonAttrs // {
pname = "cuda-library-samples-cutensor";
src = "${src}/cuTENSOR";