cudaPackages: use -Xfatbin=-compress-all; prune default cudaCapabilities

This commit is contained in:
Connor Baker 2023-03-09 18:38:51 -05:00
parent 61fa97e527
commit bdfb61e117
4 changed files with 85 additions and 25 deletions

View file

@ -151,6 +151,10 @@ backendStdenv.mkDerivation rec {
# Refer to comments in the overrides for cuda_nvcc for explanation
# CUDA_TOOLKIT_ROOT_DIR is legacy,
# Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
# NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled
# binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
# the default set of CUDA capabilities we build can regularly cause this to occur (for
# example, with Magma).
''
mkdir -p $out/nix-support
cat <<EOF >> $out/nix-support/setup-hook
@ -160,7 +164,7 @@ backendStdenv.mkDerivation rec {
if [ -z "\''${CUDAHOSTCXX-}" ]; then
export CUDAHOSTCXX=${backendStdenv.cc}/bin;
fi
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all'
EOF
# Move some libraries to the lib output so that programs that

View file

@ -4,12 +4,8 @@
}:
# Type aliases
# Gpu = {
# archName: String, # e.g., "Hopper"
# computeCapability: String, # e.g., "9.0"
# minCudaVersion: String, # e.g., "11.8"
# maxCudaVersion: String, # e.g., "12.0"
# }
# Gpu :: AttrSet
# - See the documentation in ./gpus.nix.
let
inherit (lib) attrsets lists strings trivial versions;
@ -34,22 +30,40 @@ let
# gpus :: List Gpu
gpus = builtins.import ./gpus.nix;
# isVersionIn :: Gpu -> Bool
# isSupported :: Gpu -> Bool
isSupported = gpu:
let
inherit (gpu) minCudaVersion maxCudaVersion;
lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
upperBoundSatisfied = (maxCudaVersion == null)
|| !(strings.versionOlder maxCudaVersion cudaVersion);
in
lowerBoundSatisfied && upperBoundSatisfied;
# isDefault :: Gpu -> Bool
isDefault = gpu:
let
inherit (gpu) dontDefaultAfter;
newGpu = dontDefaultAfter == null;
recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion;
in
recentGpu;
# supportedGpus :: List Gpu
# GPUs which are supported by the provided CUDA version.
supportedGpus = builtins.filter isSupported gpus;
# defaultGpus :: List Gpu
# GPUs which are supported by the provided CUDA version and we want to build for by default.
defaultGpus = builtins.filter isDefault supportedGpus;
# supportedCapabilities :: List Capability
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
# defaultCapabilities :: List Capability
# The default capabilities to target, if not overridden by the user.
defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus;
# cudaArchNameToVersions :: AttrSet String (List String)
# Maps the name of a GPU architecture to different versions of that architecture.
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
@ -151,6 +165,6 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
# dropDot :: String -> String
inherit dropDot;
} // formatCapabilities {
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
cudaCapabilities = config.cudaCapabilities or defaultCapabilities;
enableForwardCompat = config.cudaForwardCompat or true;
}

View file

@ -1,110 +1,148 @@
[
# Type alias
# Gpu = {
# archName: String
# - The name of the microarchitecture.
# computeCapability: String
# - The compute capability of the GPU.
# minCudaVersion: String
# - The minimum (inclusive) CUDA version that supports this GPU.
# dontDefaultAfter: null | String
# - The CUDA version after which to exclude this GPU from the list of default capabilities
# we build. null means we always include this GPU in the default capabilities if it is
# supported.
# maxCudaVersion: null | String
# - The maximum (exclusive) CUDA version that supports this GPU. null means there is no
# maximum.
# }
{
archName = "Kepler";
computeCapability = "3.0";
minCudaVersion = "10.0";
dontDefaultAfter = "10.2";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.2";
minCudaVersion = "10.0";
dontDefaultAfter = "10.2";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.5";
minCudaVersion = "10.0";
dontDefaultAfter = "11.0";
maxCudaVersion = "11.8";
}
{
archName = "Kepler";
computeCapability = "3.7";
minCudaVersion = "10.0";
dontDefaultAfter = "11.0";
maxCudaVersion = "11.8";
}
{
archName = "Maxwell";
computeCapability = "5.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = "11.0";
maxCudaVersion = null;
}
{
archName = "Maxwell";
computeCapability = "5.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = "11.0";
maxCudaVersion = null;
}
{
archName = "Maxwell";
computeCapability = "5.3";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = "11.0";
maxCudaVersion = null;
}
{
archName = "Pascal";
computeCapability = "6.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Pascal";
computeCapability = "6.1";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Pascal";
computeCapability = "6.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Volta";
computeCapability = "7.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Volta";
computeCapability = "7.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Turing";
computeCapability = "7.5";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Ampere";
computeCapability = "8.0";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Ampere";
computeCapability = "8.6";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Ampere";
computeCapability = "8.7";
minCudaVersion = "11.5";
maxCudaVersion = "12.0";
# NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by
# default. 8.7 is the Jetson Orin series of devices which are a very specific platform.
# We keep this entry here in case we ever want to target it explicitly, but we don't
# want to target it by default.
dontDefaultAfter = "11.4";
maxCudaVersion = null;
}
{
archName = "Ada";
computeCapability = "8.9";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
{
archName = "Hopper";
computeCapability = "9.0";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
dontDefaultAfter = null;
maxCudaVersion = null;
}
]

View file

@ -41,6 +41,10 @@ in
# uses the last --compiler-bindir it gets on the command line.
# FIXME: this results in "incompatible redefinition" warnings.
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
# NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the
# compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem
# for us, as the default set of CUDA capabilities we build can regularly cause this to
# occur (for example, with Magma).
postInstall = (oldAttrs.postInstall or "") + ''
mkdir -p $out/nix-support
cat <<EOF >> $out/nix-support/setup-hook
@ -49,7 +53,7 @@ in
if [ -z "\''${CUDAHOSTCXX-}" ]; then
export CUDAHOSTCXX=${cc}/bin;
fi
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin'
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all'
EOF
'';
});