From 128321bca98a63ad89fa1080416f3dc5d37fbae4 Mon Sep 17 00:00:00 2001 From: happysalada Date: Mon, 28 Aug 2023 16:45:49 +0800 Subject: [PATCH] unstructured-api: init at 0.0.39 --- pkgs/servers/unstructured-api/default.nix | 68 +++++++++++++++++++++++ pkgs/top-level/all-packages.nix | 2 + 2 files changed, 70 insertions(+) create mode 100644 pkgs/servers/unstructured-api/default.nix diff --git a/pkgs/servers/unstructured-api/default.nix b/pkgs/servers/unstructured-api/default.nix new file mode 100644 index 00000000000..521ff17e573 --- /dev/null +++ b/pkgs/servers/unstructured-api/default.nix @@ -0,0 +1,68 @@ +{ + lib, + stdenvNoCC, + fetchFromGitHub, + python3, + makeWrapper, + nix-update-script, + symlinkJoin, + nltk-data, +}: +let + pythonEnv = python3.withPackages (packages: with packages; [ + unstructured-api-tools + unstructured + pydantic + click + ratelimit + requests + pypdf + pycryptodome + safetensors + uvicorn + ] ++ packages.unstructured.optional-dependencies.local-inference); + version = "0.0.39"; + unstructured_api_nltk_data = symlinkJoin { + name = "unstructured_api_nltk_data"; + + paths = [ nltk-data.punkt nltk-data.averaged_perceptron_tagger ]; + }; +in stdenvNoCC.mkDerivation { + pname = "unstructured-api"; + inherit version; + + src = fetchFromGitHub { + owner = "Unstructured-IO"; + repo = "unstructured-api"; + rev = version; + hash = "sha256-fk0YkGllggi0eWdp9ytHy4/9rChkcDnQvEvVAp1+RJw="; + }; + + nativeBuildInputs = [ makeWrapper ]; + + installPhase = '' + runHook preInstall + + mkdir -p $out $out/bin $out/lib + cp -r . $out/lib + + makeWrapper ${pythonEnv}/bin/uvicorn $out/bin/unstructured-api \ + --set NLTK_DATA ${unstructured_api_nltk_data} \ + --prefix PYTHONPATH : $out/lib \ + --add-flags "prepline_general.api.app:app" + + runHook postInstall + ''; + + passthru = { + updateScript = nix-update-script { }; + }; + + meta = with lib; { + description = "open-source toolkit designed to make it easy to prepare unstructured data like PDFs, HTML and Word Documents for downstream data science tasks"; + homepage = "https://github.com/Unstructured-IO/unstructured-api"; + changelog = "https://github.com/Unstructured-IO/unstructured-api/releases/tag/${version}"; + license = licenses.asl20; + maintainers = with maintainers; [ happysalada ]; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index e00fbc33de8..a84f95f282a 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -27486,6 +27486,8 @@ with pkgs; inherit (darwin.apple_sdk.frameworks) Cocoa WebKit; }; + unstructured-api = callPackage ../servers/unstructured-api { }; + urserver = callPackage ../servers/urserver { }; uxplay = callPackage ../servers/uxplay { };