diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix index 8539fef0a19..d8b5df004df 100644 --- a/nixos/lib/testing.nix +++ b/nixos/lib/testing.nix @@ -93,7 +93,7 @@ rec { vms = map (m: m.config.system.build.vm) (lib.attrValues nodes); - ocrProg = tesseract; + ocrProg = tesseract.override { enableLanguages = [ "eng" ]; }; # Generate onvenience wrappers for running the test driver # interactively with the specified network, and for starting the diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix index 1f1da9a389f..b5e1707c4fe 100644 --- a/pkgs/applications/graphics/tesseract/default.nix +++ b/pkgs/applications/graphics/tesseract/default.nix @@ -1,5 +1,8 @@ { stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff , icu, pango, opencl-headers + +# Supported list of languages or `null' for all available languages +, enableLanguages ? null }: stdenv.mkDerivation rec { @@ -25,7 +28,30 @@ stdenv.mkDerivation rec { LIBLEPT_HEADERSDIR = "${leptonica}/include"; - postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*"; + # Copy the .traineddata files of the languages specified in enableLanguages + # into `$out/share/tessdata' and check afterwards if copying was successful. + postInstall = let + mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}"; + mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg; + findLangArgs = if enableLanguages != null + then "\\( ${mkFindArgs enableLanguages} \\)" + else "-iname '*.traineddata'"; + in '' + numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \ + ${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)" + + ${if enableLanguages != null then '' + expected=${toString (builtins.length enableLanguages)} + '' else '' + expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)" + ''} + + if [ "$numLangs" -ne "$expected" ]; then + echo "Expected $expected languages, but $numLangs" \ + "were copied to \`$out/share/tessdata'" >&2 + exit 1 + fi + ''; meta = { description = "OCR engine";