commit 9ce60e8df3b613950c483f0ae5ec772afc329104 Author: Olaf Meeuwissen Date: Fri Jun 21 22:20:30 2019 +0900 Fix tesseract command-line invocation. Re #78 diff --git a/filters/get-text-orientation b/filters/get-text-orientation index 847f2c6..6f0978c 100755 --- a/filters/get-text-orientation +++ b/filters/get-text-orientation @@ -47,26 +47,14 @@ if test $? != 0; then exit 1 fi -tmpfile=$(mktemp -q .reorient.XXX) -trap "rm -f $tmpfile" 0 1 2 15 - case "$engine" in */tesseract|tesseract) - # Notwithstanding what the manual page says, tesseract - # doesn't support reading from standard input with the - # `-psm 0` option. We stuff incoming image data into a - # temporary file to work around this limitation. - # See https://github.com/tesseract-ocr/tesseract/issues/85 - - cat - > $tmpfile + # Tesseract 3.03 outputs the results we want to standard + # error; Tesseract 3.04 dumps it on standard output. We + # want it on the latter. - # We don't care about the "regular" tesseract output so - # divert that to /dev/null. The output that we do care - # about ends up on standard error, but our caller looks - # for it on standard output. Redirect to handle that. - - $engine $tmpfile /dev/null -psm 0 -l osd 2>&1 + $engine - - -psm 0 -l osd 2>&1 ;; */ocr-engine-getrotate) @@ -74,6 +62,9 @@ case "$engine" in # The ocr-engine-getrotate utility expects an uncompressed # BMP image. + tmpfile=$(mktemp -q .reorient.XXX) + trap "rm -f $tmpfile" 0 1 2 15 + $convert - -compress None bmp3:$tmpfile $engine $tmpfile ;; commit 56f1d8ed51cc7140b961ef8ab8c7501f69d87fd5 Author: Olaf Meeuwissen Date: Mon Oct 21 12:41:20 2019 +0900 Fix get-text-orientation for newer Tesseract versions. Fixes #86 diff --git a/filters/get-text-orientation b/filters/get-text-orientation index 6f0978c..eb64e4b 100755 --- a/filters/get-text-orientation +++ b/filters/get-text-orientation @@ -51,10 +51,21 @@ case "$engine" in */tesseract|tesseract) # Tesseract 3.03 outputs the results we want to standard - # error; Tesseract 3.04 dumps it on standard output. We - # want it on the latter. + # error; Tesseract 3.04 dumps it on standard output. So + # do later versions. We want it on the latter. + # Command-line options changed in 3.05.00. - $engine - - -psm 0 -l osd 2>&1 + version=$($engine --version 2>&1 | sed -n 's/.*tesseract *//p') + + case "$version" in + 3.0[34].* ) + $engine - - -psm 0 -l osd 2>&1 + ;; + + 3.05.* | [45].* ) + $engine - - --psm 0 -l osd 2>&1 + ;; + esac ;; */ocr-engine-getrotate) commit 9d5edc4c52e5a6b59d61a43ddcc13353b82992f5 Author: Olaf Meeuwissen Date: Sun Jun 23 17:22:22 2019 +0900 Fix reorientation logic for newer Tesseract versions. Re #78 diff --git a/filters/reorient.cpp b/filters/reorient.cpp index e0c1dc1..60c5173 100644 --- a/filters/reorient.cpp +++ b/filters/reorient.cpp @@ -483,6 +483,13 @@ reorient::finalize (const context& ctx) log::alert (format ("unexpected document orientation: %1% degrees") % degrees); + + if (engine_ == "tesseract" + && !tesseract_version_before_("3.04")) { + // Orientation reporting changed direct with 3.04. See #78 + /**/ if ( 90 == degrees) rv.orientation (context::left_bottom); + else if (270 == degrees) rv.orientation (context::right_top); + } } return rv; }