From 68bc260ca2d71a676dd6afdb3524d4fff483016b Mon Sep 17 00:00:00 2001 From: aszlig Date: Mon, 19 Dec 2016 13:05:30 +0100 Subject: [PATCH] tesseract: 3.02.02 -> 3.04.01 From the upstream changelog: * Tesseract development is now done with Git and hosted at github.com (Previously we used Subversion as a VCS and code.google.com for hosting). So let's move over to the GitHub repository, where the organisation also includes a full repository for tessdata, so we no longer need to fetch it one-by-one. The build also got significantly simpler, because we no longer need to run autoconf, neither do we need to patch the configure script for Leptonica headers. This also has the advantage that we don't need to use the enableLanguages attribute for the test runner anymore. Full upstream changelog can be found at: https://github.com/tesseract-ocr/tesseract/blob/c4d273d33cc36e/ChangeLog Tested against all NixOS tests with enabled OCR (chromium, emacs-daemon, installer.luksroot and lightdm). Signed-off-by: aszlig Cc: @viric --- nixos/lib/testing.nix | 2 +- .../graphics/tesseract/default.nix | 58 ++++++------------- 2 files changed, 19 insertions(+), 41 deletions(-) diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix index 7fad5cbc3cd..2efe7a5b879 100644 --- a/nixos/lib/testing.nix +++ b/nixos/lib/testing.nix @@ -93,7 +93,7 @@ rec { vms = map (m: m.config.system.build.vm) (lib.attrValues nodes); - ocrProg = tesseract.override { enableLanguages = [ "eng" ]; }; + ocrProg = tesseract; # Generate onvenience wrappers for running the test driver # interactively with the specified network, and for starting the diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix index 375b0999548..1f1da9a389f 100644 --- a/pkgs/applications/graphics/tesseract/default.nix +++ b/pkgs/applications/graphics/tesseract/default.nix @@ -1,53 +1,31 @@ -{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff -, enableLanguages ? null +{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff +, icu, pango, opencl-headers }: -with stdenv.lib; - -let - majVersion = "3.02"; - version = "${majVersion}.02"; - - mkLang = lang: sha256: let - src = fetchurl { - url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz"; - inherit sha256; - }; - in "tar xfvz ${src} -C $out/share/ --strip=1"; - - wantLang = name: const (enableLanguages == null || elem name enableLanguages); - - extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang { - cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"; - rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"; - spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"; - nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"; - eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"; - slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"; - jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"; - }); -in - stdenv.mkDerivation rec { name = "tesseract-${version}"; + version = "3.04.01"; - src = fetchurl { - url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${version}.tar.gz"; - sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"; + src = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tesseract"; + rev = version; + sha256 = "0h1x4z1h86n2gwknd0wck6gykkp99bmm02lg4a47a698g4az6ybv"; }; - buildInputs = [ autoconf automake libtool leptonica libpng libtiff ]; + tessdata = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tessdata"; + rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; + sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; + }; - hardeningDisable = [ "format" ]; + nativeBuildInputs = [ pkgconfig ]; + buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; - preConfigure = '' - ./autogen.sh - substituteInPlace "configure" \ - --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \ - 'LIBLEPT_HEADERSDIR=${leptonica}/include' - ''; + LIBLEPT_HEADERSDIR = "${leptonica}/include"; - postInstall = concatStringsSep "; " extraLanguages; + postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*"; meta = { description = "OCR engine";