python3Packages.datafusion: init at 0.4.0 (#152763)

This commit is contained in:
Phillip Cloud 2022-01-25 09:30:26 -05:00 committed by GitHub
parent 688e471a6e
commit 7891c655a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 170 additions and 0 deletions

View file

@ -0,0 +1,78 @@
diff --git a/Cargo.lock b/Cargo.lock
index fa84a54c..3d790e1c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -57,9 +57,9 @@ checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd"
[[package]]
name = "arrow"
-version = "6.0.0"
+version = "6.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "337e668497751234149fd607f5cb41a6ae7b286b6329589126fe67f0ac55d637"
+checksum = "216c6846a292bdd93c2b93c1baab58c32ff50e2ab5e8d50db333ab518535dd8b"
dependencies = [
"bitflags",
"chrono",
@@ -212,9 +212,9 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "4.1.1"
+version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11e95a3e867422fd8d04049041f5671f94d53c32a9dcd82e2be268714942f3f3"
+checksum = "c42350b81f044f576ff88ac750419f914abb46a03831bb1747134344ee7a4e64"
dependencies = [
"strum",
"strum_macros",
@@ -279,7 +279,7 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "5.1.0"
+version = "6.0.0"
dependencies = [
"ahash",
"arrow",
@@ -310,7 +310,7 @@ dependencies = [
[[package]]
name = "datafusion-python"
-version = "0.3.0"
+version = "0.4.0"
dependencies = [
"datafusion",
"pyo3",
@@ -877,9 +877,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "6.0.0"
+version = "6.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d263b9b59ba260518de9e57bd65931c3f765fea0fabacfe84f40d6fde38e841a"
+checksum = "788d9953f4cfbe9db1beff7bebd54299d105e34680d78b82b1ddc85d432cac9d"
dependencies = [
"arrow",
"base64",
@@ -1228,15 +1228,15 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strum"
-version = "0.21.0"
+version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2"
+checksum = "f7ac893c7d471c8a21f31cfe213ec4f6d9afeed25537c772e08ef3f005f8729e"
[[package]]
name = "strum_macros"
-version = "0.21.1"
+version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec"
+checksum = "339f799d8b549e3744c7ac7feb216383e4005d94bdb22561b3ab8f3b808ae9fb"
dependencies = [
"heck",
"proc-macro2",

View file

@ -0,0 +1,90 @@
{ lib
, stdenv
, fetchurl
, buildPythonPackage
, fetchPypi
, fetchFromGitHub
, rustPlatform
, maturin
, pytestCheckHook
, libiconv
, numpy
, pandas
, pyarrow
, pytest
}:
let
# le sigh, the perils of unrelated versions of software living in the same
# repo: there's no obvious way to map the top level source repo
# (arrow-datafusion) version to the version of contained repo
# (arrow-datafusion/python)
#
# A commit hash will do in a pinch, and ultimately the sha256 has the final
# say of what the content is when building
cargoLock = fetchurl {
url = "https://raw.githubusercontent.com/apache/arrow-datafusion/6.0.0/python/Cargo.lock";
sha256 = "sha256-xiv3drEU5jOGsEIh0U01ZQ1NBKobxO2ctp4mxy9iigw=";
};
postUnpack = ''
cp "${cargoLock}" $sourceRoot/Cargo.lock
chmod u+w $sourceRoot/Cargo.lock
'';
in
buildPythonPackage rec {
pname = "datafusion";
version = "0.4.0";
format = "pyproject";
src = fetchPypi {
inherit pname version;
sha256 = "sha256-+YqogteKfNhtI2QbVXv/5CIWm3PcOH653dwONm5ZcL8=";
};
inherit postUnpack;
# TODO: remove the patch hacking and postUnpack hooks after
# https://github.com/apache/arrow-datafusion/pull/1508 is merged
#
# the lock file isn't up to date as of 6.0.0 so we need to patch the source
# lockfile and the vendored cargo deps lockfile
patches = [ ./Cargo.lock.patch ];
cargoDeps = rustPlatform.fetchCargoTarball {
inherit src pname version postUnpack;
sha256 = "sha256-JGyDxpfBXzduJaMF1sbmRm7KJajHYdVSj+WbiSETiY0=";
patches = [ ./Cargo.lock.patch ];
};
nativeBuildInputs = with rustPlatform; [
cargoSetupHook
maturinBuildHook
];
buildInputs = lib.optionals stdenv.isDarwin [ libiconv ];
propagatedBuildInputs = [
numpy
pandas
pyarrow
];
checkInputs = [ pytest ];
pythonImportsCheck = [ "datafusion" ];
checkPhase = ''
runHook preCheck
pytest --pyargs "${pname}"
runHook postCheck
'';
meta = with lib; {
description = "Extensible query execution framework";
longDescription = ''
DataFusion is an extensible query execution framework, written in Rust,
that uses Apache Arrow as its in-memory format.
'';
homepage = "https://arrow.apache.org/datafusion/";
license = with licenses; [ asl20 ];
maintainers = with maintainers; [ cpcloud ];
};
}

View file

@ -1994,6 +1994,8 @@ in {
datadog = callPackage ../development/python-modules/datadog { };
datafusion = callPackage ../development/python-modules/datafusion { };
datamodeldict = callPackage ../development/python-modules/datamodeldict { };
dataset = callPackage ../development/python-modules/dataset { };