From ceeaf2d0669cc5a4d86e6313cc0fdb6774659884 Mon Sep 17 00:00:00 2001 From: "Noah D. Brenowitz" Date: Fri, 23 Apr 2021 00:50:38 -0700 Subject: [PATCH] python3Packages.dask: fix sandboxed builds Importing dask.dataframe in a sandboxed build results in a TypeError like this: File "/nix/store/nv60iri29bia4szhhcvsdxgsci4wxvp6-python3.8-dask-2021.03.0/lib/python3.8/site-packages/dask/dataframe/io/csv.py", line 392, in AUTO_BLOCKSIZE = auto_blocksize(TOTAL_MEM, CPU_COUNT) File "/nix/store/nv60iri29bia4szhhcvsdxgsci4wxvp6-python3.8-dask-2021.03.0/lib/python3.8/site-packages/dask/dataframe/io/csv.py", line 382, in auto_blocksize blocksize = int(total_memory // cpu_count / memory_factor) TypeError: unsupported operand type(s) for //: 'int' and 'NoneType' This occurs because dask.dataframe has a non-deterministic component which generates an automatic chunk-size based on system information. This went unnoticed because the dask tests were disabled. Changes: - add a patch making the chunk-size inference more robust - re-enable the tests Resolves #120307 --- .../python-modules/dask/default.nix | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/dask/default.nix b/pkgs/development/python-modules/dask/default.nix index 8f3e4d08583..13c03a1f796 100644 --- a/pkgs/development/python-modules/dask/default.nix +++ b/pkgs/development/python-modules/dask/default.nix @@ -1,6 +1,7 @@ { lib , bokeh , buildPythonPackage +, fetchpatch , fetchFromGitHub , fsspec , pytestCheckHook @@ -42,7 +43,7 @@ buildPythonPackage rec { distributed ]; - doCheck = false; + doCheck = true; checkInputs = [ pytestCheckHook @@ -52,6 +53,16 @@ buildPythonPackage rec { dontUseSetuptoolsCheck = true; + patches = [ + # dask dataframe cannot be imported in sandboxed builds + # See https://github.com/dask/dask/pull/7601 + (fetchpatch { + url = "https://github.com/dask/dask/commit/9ce5b0d258cecb3ef38fd844135ad1f7ac3cea5f.patch"; + sha256 = "sha256-1EVRYwAdTSEEH9jp+UOnrijzezZN3iYR6q6ieYJM3kY="; + name = "fix-dask-dataframe-imports-in-sandbox.patch"; + }) + ]; + postPatch = '' # versioneer hack to set version of github package echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py @@ -66,8 +77,13 @@ buildPythonPackage rec { disabledTests = [ "test_annotation_pack_unpack" "test_annotations_blockwise_unpack" + # this test requires features of python3Packages.psutil that are + # blocked in sandboxed-builds + "test_auto_blocksize_csv" ]; + pythonImportsCheck = [ "dask.dataframe" "dask" "dask.array" ]; + meta = with lib; { description = "Minimal task scheduling abstraction"; homepage = "https://dask.org/";