nixpkgs/pkgs/tools/text/invoice2data/default.nix
Daniel Albert 4ca42ad0a7
invoice2data: 0.3.6 -> 0.4.2
Update invoice2data to version v0.4.2 [released on 2023-02-11](https://github.com/invoice-x/invoice2data/releases/tag/v0.4.2).

This change also
* Installs `ghostscript`, which is used to convert pdfs to images used as input for tesseract
* Uses tesseract 5 instead of tesseract 3
* Switches from `xpdf` (which is also marked as insecure) to `poppler_utils`, which is the recommended provider of `pdftotext` according to the `invoice2data` repo.
2023-02-15 00:36:56 +01:00

60 lines
1.1 KiB
Nix

{ lib
, fetchFromGitHub
, ghostscript
, imagemagick
, poppler_utils
, python3
, tesseract5
}:
python3.pkgs.buildPythonApplication rec {
pname = "invoice2data";
version = "0.4.2";
format = "setuptools";
src = fetchFromGitHub {
owner = "invoice-x";
repo = pname;
rev = "v${version}";
sha256 = "sha256-ss2h8cg0sga+lzJyQHckrZB/Eb63Oj3FkqmGqWCzCQ8=";
};
buildInputs = with python3.pkgs; [ setuptools-git ];
propagatedBuildInputs = with python3.pkgs; [
chardet
dateparser
pdfminer-six
pillow
pyyaml
setuptools
unidecode
];
postPatch = ''
substituteInPlace setup.cfg \
--replace "pytest-runner" ""
'';
makeWrapperArgs = ["--prefix" "PATH" ":" (lib.makeBinPath [
ghostscript
imagemagick
tesseract5
poppler_utils
])];
# Tests fails even when ran manually on my ubuntu machine !!
doCheck = false;
pythonImportsCheck = [
"invoice2data"
];
meta = with lib; {
description = "Data extractor for PDF invoices";
homepage = "https://github.com/invoice-x/invoice2data";
license = licenses.mit;
maintainers = with maintainers; [ psyanticy ];
};
}