Merge pull request #166784 from samuela/samuela/cudnn3

fix `pytorchWithCuda`, fix `cupy`, upgrade `cudnn`
This commit is contained in:
Frederik Rietdijk 2022-04-03 20:43:26 +02:00 committed by GitHub
commit c77281417a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 93 additions and 52 deletions

View file

@ -86,5 +86,9 @@ rec {
gcc = gcc10; # can bump to 11 along with stdenv.cc
};
# Make sure to only ever update this to a version that is compatible with the
# latest cudnn, nccl, cutensor, etc! It sometimes happens that CUDA versions
# are released prior to compatibility with the rest of the ecosystem. And
# don't forget to request a review from @NixOS/cuda-maintainers!
cudatoolkit_11 = cudatoolkit_11_5;
}

View file

@ -81,31 +81,27 @@ rec {
cudnn_8_1_cudatoolkit_11_2 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; };
cudnn_8_1_cudatoolkit_10 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_10; };
cudnn_8_1_cudatoolkit_11 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11; };
# cuDNN 8.3 is necessary for the latest jaxlib, esp. jaxlib-bin. See
# https://github.com/google/jax/discussions/9455 for more info.
cudnn_8_3_cudatoolkit_10_2 =
generic
rec {
version = "8.3.2";
cudatoolkit = cudatoolkit_10_2;
# See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions.
minCudaVersion = "10.2.00000";
maxCudaVersion = "11.5.99999";
mkSrc = cudatoolkit:
let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in
fetchurl {
# Starting at version 8.3.1 there's a new directory layout including
# a subdirectory `local_installers`.
url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz";
hash = {
"10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw=";
"11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc=";
}."${v}";
};
}
;
cudnn_8_3_cudatoolkit_10_2 = generic rec {
version = "8.3.2";
cudatoolkit = cudatoolkit_10_2;
# See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions.
minCudaVersion = "10.2.00000";
maxCudaVersion = "11.5.99999";
mkSrc = cudatoolkit:
let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in
fetchurl {
# Starting at version 8.3.1 there's a new directory layout including
# a subdirectory `local_installers`.
url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz";
hash = {
"10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw=";
"11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc=";
}."${v}";
};
};
cudnn_8_3_cudatoolkit_11_0 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_0; };
cudnn_8_3_cudatoolkit_11_1 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_1; };
cudnn_8_3_cudatoolkit_11_2 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; };

View file

@ -1,6 +1,14 @@
{ callPackage
, cudatoolkit_10_1, cudatoolkit_10_2
, cudatoolkit_11_0, cudatoolkit_11_1, cudatoolkit_11_2, cudatoolkit_11_3, cudatoolkit_11_4
, cudatoolkit_10_1
, cudatoolkit_10_2
, cudatoolkit_11
, cudatoolkit_11_0
, cudatoolkit_11_1
, cudatoolkit_11_2
, cudatoolkit_11_3
, cudatoolkit_11_4
, cudatoolkit_11_5
, cudatoolkit_11_6
}:
rec {
@ -8,18 +16,18 @@ rec {
version = "1.2.2.5";
libPath = "lib/10.1";
cudatoolkit = cudatoolkit_10_1;
# 1.2.2 is compatible with CUDA 11.0, 11.1, and 11.2:
# ephemeral doc at https://developer.nvidia.com/cutensor/downloads
sha256 = "1dl9bd71frhac9cb8lvnh71zfsnqxbxbfhndvva2zf6nh0my4klm";
# 1.2.2 is compatible with CUDA 10.1, 10.2, and 11.x.
# See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-2-2.
hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY=";
};
cutensor_cudatoolkit_10_2 = cutensor_cudatoolkit_10_1.override {
version = "1.3.1.3";
libPath = "lib/10.2";
cudatoolkit = cudatoolkit_10_2;
# 1.3.1 is compatible with CUDA 11.0, 11.1, and 11.2:
# ephemeral doc at https://developer.nvidia.com/cutensor/downloads
sha256 = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8=";
# 1.3.1 is compatible with CUDA 10.2 and 11.x.
# See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-3-1.
hash = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8=";
};
cutensor_cudatoolkit_10 = cutensor_cudatoolkit_10_2;
@ -29,21 +37,12 @@ rec {
cudatoolkit = cudatoolkit_11_0;
};
cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_1;
};
cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_1; };
cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_2; };
cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_3; };
cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_4; };
cutensor_cudatoolkit_11_5 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_5; };
cutensor_cudatoolkit_11_6 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_6; };
cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_2;
};
cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_3;
};
cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_4;
};
cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_4;
cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11; };
}

View file

@ -7,7 +7,7 @@
, addOpenGLRunpath
, version
, sha256
, hash
}:
let
@ -21,7 +21,7 @@ stdenv.mkDerivation {
src = fetchurl {
url = "https://developer.download.nvidia.com/compute/cutensor/${mostOfVersion}/local_installers/libcutensor-${stdenv.hostPlatform.parsed.kernel.name}-${stdenv.hostPlatform.parsed.cpu.name}-${version}.tar.gz";
inherit sha256;
inherit hash;
};
outputs = [ "out" "dev" ];

View file

@ -38,6 +38,10 @@ stdenv.mkDerivation rec {
enableParallelBuilding = true;
passthru = {
inherit cudatoolkit;
};
meta = with lib; {
description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs";
homepage = "https://developer.nvidia.com/nccl";

View file

@ -5,6 +5,10 @@
, addOpenGLRunpath
}:
assert cudnn.cudatoolkit == cudatoolkit;
assert cutensor.cudatoolkit == cudatoolkit;
assert nccl.cudatoolkit == cudatoolkit;
buildPythonPackage rec {
pname = "cupy";
version = "10.2.0";
@ -15,8 +19,15 @@ buildPythonPackage rec {
sha256 = "sha256-5ovvA76QGOsOnVztMfDgLerks5nJrKR08rLc+ArmWA8=";
};
# See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Seting both
# CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in
# a small amount of thrashing but it turns out there are a large number of
# very short builds and a few extremely long ones, so setting both ends up
# working nicely in practice.
preConfigure = ''
export CUDA_PATH=${cudatoolkit}
export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
'';
nativeBuildInputs = [

View file

@ -4647,7 +4647,6 @@ with pkgs;
cudnn_8_1_cudatoolkit_11_1
cudnn_8_1_cudatoolkit_11_2
cudnn_8_1_cudatoolkit_10
cudnn_8_1_cudatoolkit_11
cudnn_8_3_cudatoolkit_10_2
cudnn_8_3_cudatoolkit_11_0
cudnn_8_3_cudatoolkit_11_1
@ -4658,8 +4657,8 @@ with pkgs;
cudnn_8_3_cudatoolkit_10
cudnn_8_3_cudatoolkit_11;
# TODO(samuela): This is old and should be upgraded to 8.3 at some point.
cudnn = cudnn_7_6_cudatoolkit_10_1;
# Make sure to keep this in sync with the `cudatoolkit` version!
cudnn = cudnn_8_3_cudatoolkit_10;
cutensorPackages = callPackages ../development/libraries/science/math/cutensor { };
inherit (cutensorPackages)
@ -32962,11 +32961,16 @@ with pkgs;
### SCIENCE / MATH
caffe = callPackage ../applications/science/math/caffe ({
cudaSupport = config.cudaSupport or false;
cudatoolkit = cudatoolkit_10_1;
cudnn = cudnn_7_6_cudatoolkit_10_1;
opencv3 = opencv3WithoutCuda; # Used only for image loading.
blas = openblas;
inherit (darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo;
} // (config.caffe or {}));
caffeWithCuda = caffe.override { cudaSupport = true; };
caffe2 = callPackage ../development/libraries/science/math/caffe2 (rec {
inherit (python3Packages) python future six numpy pydot;
protobuf = protobuf3_1;

View file

@ -1459,6 +1459,11 @@ in {
inherit (self) python numpy boost;
});
caffeWithCuda = toPythonModule (pkgs.caffeWithCuda.override {
pythonSupport = true;
inherit (self) python numpy boost;
});
cairocffi = callPackage ../development/python-modules/cairocffi { };
cairosvg = callPackage ../development/python-modules/cairosvg { };
@ -1961,7 +1966,7 @@ in {
cupy = callPackage ../development/python-modules/cupy {
cudatoolkit = pkgs.cudatoolkit_11;
cudnn = pkgs.cudnn_8_1_cudatoolkit_11;
cudnn = pkgs.cudnn_8_3_cudatoolkit_11;
nccl = pkgs.nccl_cudatoolkit_11;
cutensor = pkgs.cutensor_cudatoolkit_11;
};
@ -8366,6 +8371,24 @@ in {
pytorch = callPackage ../development/python-modules/pytorch {
cudaSupport = pkgs.config.cudaSupport or false;
# TODO: next time pytorch is updated (to 1.11.0, currently in staging as of
# 2022-03-31), make the following changes:
# -> cudatoolk_11
cudatoolkit = pkgs.cudatoolkit_10;
# -> cudnn_8_3_cudatoolkit_11
cudnn = pkgs.cudnn_8_1_cudatoolkit_10;
# -> cutensor_cudatoolkit_11 (cutensor is a new dependency in v1.11.0)
# cutensor = pkgs.cutensor_cudatoolkit_11;
# -> setting a custom magma should be unnecessary with v1.11.0
magma = pkgs.magma.override { cudatoolkit = pkgs.cudatoolkit_10; };
# -> nccl_cudatoolkit_11
nccl = pkgs.nccl.override { cudatoolkit = pkgs.cudatoolkit_10; };
};
pytorch-bin = callPackage ../development/python-modules/pytorch/bin.nix { };