From 42be422b5252d526a74d15316abc9ccc24f8868c Mon Sep 17 00:00:00 2001 From: Marcin Deranek Date: Sat, 29 Jan 2022 16:16:09 +0100 Subject: iscan-# became imagescan --- .../files/imagescan-3.62.0-tests-tesseract.patch | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 imagescan/files/imagescan-3.62.0-tests-tesseract.patch (limited to 'imagescan/files/imagescan-3.62.0-tests-tesseract.patch') diff --git a/imagescan/files/imagescan-3.62.0-tests-tesseract.patch b/imagescan/files/imagescan-3.62.0-tests-tesseract.patch new file mode 100644 index 0000000..52f7b18 --- /dev/null +++ b/imagescan/files/imagescan-3.62.0-tests-tesseract.patch @@ -0,0 +1,111 @@ +commit 9ce60e8df3b613950c483f0ae5ec772afc329104 +Author: Olaf Meeuwissen +Date: Fri Jun 21 22:20:30 2019 +0900 + + Fix tesseract command-line invocation. Re #78 + +diff --git a/filters/get-text-orientation b/filters/get-text-orientation +index 847f2c6..6f0978c 100755 +--- a/filters/get-text-orientation ++++ b/filters/get-text-orientation +@@ -47,26 +47,14 @@ if test $? != 0; then + exit 1 + fi + +-tmpfile=$(mktemp -q .reorient.XXX) +-trap "rm -f $tmpfile" 0 1 2 15 +- + case "$engine" in + */tesseract|tesseract) + +- # Notwithstanding what the manual page says, tesseract +- # doesn't support reading from standard input with the +- # `-psm 0` option. We stuff incoming image data into a +- # temporary file to work around this limitation. +- # See https://github.com/tesseract-ocr/tesseract/issues/85 +- +- cat - > $tmpfile ++ # Tesseract 3.03 outputs the results we want to standard ++ # error; Tesseract 3.04 dumps it on standard output. We ++ # want it on the latter. + +- # We don't care about the "regular" tesseract output so +- # divert that to /dev/null. The output that we do care +- # about ends up on standard error, but our caller looks +- # for it on standard output. Redirect to handle that. +- +- $engine $tmpfile /dev/null -psm 0 -l osd 2>&1 ++ $engine - - -psm 0 -l osd 2>&1 + ;; + + */ocr-engine-getrotate) +@@ -74,6 +62,9 @@ case "$engine" in + # The ocr-engine-getrotate utility expects an uncompressed + # BMP image. + ++ tmpfile=$(mktemp -q .reorient.XXX) ++ trap "rm -f $tmpfile" 0 1 2 15 ++ + $convert - -compress None bmp3:$tmpfile + $engine $tmpfile + ;; + +commit 56f1d8ed51cc7140b961ef8ab8c7501f69d87fd5 +Author: Olaf Meeuwissen +Date: Mon Oct 21 12:41:20 2019 +0900 + + Fix get-text-orientation for newer Tesseract versions. Fixes #86 + +diff --git a/filters/get-text-orientation b/filters/get-text-orientation +index 6f0978c..eb64e4b 100755 +--- a/filters/get-text-orientation ++++ b/filters/get-text-orientation +@@ -51,10 +51,21 @@ case "$engine" in + */tesseract|tesseract) + + # Tesseract 3.03 outputs the results we want to standard +- # error; Tesseract 3.04 dumps it on standard output. We +- # want it on the latter. ++ # error; Tesseract 3.04 dumps it on standard output. So ++ # do later versions. We want it on the latter. ++ # Command-line options changed in 3.05.00. + +- $engine - - -psm 0 -l osd 2>&1 ++ version=$($engine --version 2>&1 | sed -n 's/.*tesseract *//p') ++ ++ case "$version" in ++ 3.0[34].* ) ++ $engine - - -psm 0 -l osd 2>&1 ++ ;; ++ ++ 3.05.* | [45].* ) ++ $engine - - --psm 0 -l osd 2>&1 ++ ;; ++ esac + ;; + + */ocr-engine-getrotate) +commit 9d5edc4c52e5a6b59d61a43ddcc13353b82992f5 +Author: Olaf Meeuwissen +Date: Sun Jun 23 17:22:22 2019 +0900 + + Fix reorientation logic for newer Tesseract versions. Re #78 + +diff --git a/filters/reorient.cpp b/filters/reorient.cpp +index e0c1dc1..60c5173 100644 +--- a/filters/reorient.cpp ++++ b/filters/reorient.cpp +@@ -483,6 +483,13 @@ reorient::finalize (const context& ctx) + log::alert + (format ("unexpected document orientation: %1% degrees") + % degrees); ++ ++ if (engine_ == "tesseract" ++ && !tesseract_version_before_("3.04")) { ++ // Orientation reporting changed direct with 3.04. See #78 ++ /**/ if ( 90 == degrees) rv.orientation (context::left_bottom); ++ else if (270 == degrees) rv.orientation (context::right_top); ++ } + } + return rv; + } -- cgit v1.2.3