From 8d7557a5f5b0006448ddc6c29a3cfa610008adf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Thu, 26 Nov 2015 22:16:11 +0100 Subject: Add some more ocr commandline tools to suggests --- debian/changelog | 15 +++- debian/control | 2 +- debian/ocr-scripts/xsane2cunei.sh | 84 ++++++++++++++++++++++ debian/ocr-scripts/xsane2ocrad.sh | 87 +++++++++++++++++++++++ debian/ocr-scripts/xsane2tess3.sh | 145 ++++++++++++++++++++++++++++++++++++++ debian/xsane.NEWS | 10 +++ debian/xsane.README | 46 ++++++++++++ debian/xsane.dirs | 1 + debian/xsane.docs | 1 + debian/xsane.install | 1 + 10 files changed, 388 insertions(+), 4 deletions(-) create mode 100755 debian/ocr-scripts/xsane2cunei.sh create mode 100755 debian/ocr-scripts/xsane2ocrad.sh create mode 100755 debian/ocr-scripts/xsane2tess3.sh create mode 100644 debian/xsane.NEWS create mode 100644 debian/xsane.README (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 65a2058..6a5d4b4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -xsane (0.999-3) UNRELEASED; urgency=medium +xsane (0.999-3) unstable; urgency=medium * debian/control: - Remove version from the Build-Depents libsane-dev (>= 1.0.11-3) @@ -8,8 +8,17 @@ xsane (0.999-3) UNRELEASED; urgency=medium - Remove useless whitspaces. * debian/rules: - Split override_dh_install into *-arch and *-indep (Closes: #806125). - - -- Jörg Frings-Fürst Tue, 24 Nov 2015 21:56:44 +0100 + * Add new ocr commandline tools: + - debian/control: + + Add cuneiform, tesseract-ocr, ocrad to Suggests. + - debian/ocr-scripts: + + Add xsane2cunei.sh, xsane2ocrad.sh and xsane2tess3.sh. + - debian/xsane.dirs + + Add usr/share/xsane + - debian/xsane.install + + Install the ocr-scripts to debian/share/xsane/ocr-scripts. + + -- Jörg Frings-Fürst Thu, 26 Nov 2015 22:06:47 +0100 xsane (0.999-2) unstable; urgency=medium diff --git a/debian/control b/debian/control index 909d395..4d48572 100644 --- a/debian/control +++ b/debian/control @@ -30,7 +30,7 @@ Recommends: iceweasel | firefox | www-browser Suggests: gimp, - gocr, + gocr | cuneiform | tesseract-ocr | ocrad, gv, hylafax-client | mgetty-fax Description: featureful graphical frontend for SANE (Scanner Access Now Easy) diff --git a/debian/ocr-scripts/xsane2cunei.sh b/debian/ocr-scripts/xsane2cunei.sh new file mode 100755 index 0000000..b704d52 --- /dev/null +++ b/debian/ocr-scripts/xsane2cunei.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# ############################################################################### +# # +# xsane2cunei 0.3 # +# # +# *** cuneiform made simple *** # +# # +# ############################################################################### +# +# xane2cunei is a wrapper to be able to use Cuneiform-Linux with XSane +# +# +# +TEMP_DIR=~/tmp/ # folder for temporary files +ERRORLOG="xsane2cunei.log" # file where STDERR goes + +if [[ -z "$1" ]] + then + echo "Usage: $0 [OPTIONS] + + xsane2cunei scans image files with XSane, + recognizes the text using cuneiform-linux + and outputs the text in a file. + + OPTIONS: + -i define input file (any image-format supported) + -o define output file (txt, html, hocr, rtf) + -l define the language used for recognition + -f define the format used for output + -e optional: dotmatrix, fax, singlecolumn + + Progress- & error-messages will be stored in this logfile: + $TEMP_DIR$ERRORLOG + + xsane2cunei depends on + - XSane http://www.xsane.org/ + - libmagick-++dev http://www.imagemagick.org/ + - cuneiform-linux https://launchpad.net/cuneiform-linux Cuneiform-Linux + + Some coding was stolen from 'ocube' + http://www.geocities.com/thierryguy/ocube.html + + This Cuneiform adaption is based on xsane2tess + http://doc.ubuntu-fr.org/xsane2tess, + + Hints always welcome! heinrich (dot) schwietering (at) gmx (dot) de +" + exit +fi + +# get options... +while getopts ":i:o:l:f:e:" OPTION + do + case $OPTION in + i) # input filename (with path) + FILE_PATH="$OPTARG" + ;; + o ) # output filename + FILE_OUT="$OPTARG" + ;; + l ) # recognition language + LANGUAGE="$OPTARG" + ;; + f ) # output format + FORMAT="$OPTARG" + ;; + e ) # extra option format + EXTRA="$OPTARG" + ;; + esac +done + +# redirect STDOUT to FILE_OUT +exec 1>>$FILE_OUT + +# redirect STDERR to ERRORLOG +exec 2>>$TEMP_DIR$ERRORLOG + +# strip path from FILE_PATH, use filename only +IN_FILE="${FILE_PATH##*/.*}" + +# start OCR +cuneiform -l "$LANGUAGE" -f "$FORMAT" -o "$FILE_OUT" "--$EXTRA" "$IN_FILE" 1>&2 diff --git a/debian/ocr-scripts/xsane2ocrad.sh b/debian/ocr-scripts/xsane2ocrad.sh new file mode 100755 index 0000000..0e5fb13 --- /dev/null +++ b/debian/ocr-scripts/xsane2ocrad.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# xsane2ocrad - ocr with ocrad directly from xsane +# Copyright (C) 2012 Heinrich Schwietering +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +################################################################################ +# # +# xsane2ocrad 0.1 # +# # +# *** ocrad made simple *** # +# # +################################################################################ +# +# xane2ocrad is a wrapper to use Ocrad with XSane +# +# +# +TEMP_DIR=/tmp/ # folder for temporary files +ERRORLOG="xsane2ocrad.log" # file where STDERR goes + +if [[ -z "$1" ]] + then + echo "Usage: $0 [OPTIONS] + + xsane2ocrad scans image files with XSane, + recognizes the text using ocrad + and outputs the text in a file. + + OPTIONS: + -i define input file (any image-format supported) + -o define output file (txt, html, hocr, rtf) + -e optional, all ocrad-Options, use quotes + + Progress- & error-messages will be stored in this logfile: + $TEMP_DIR$ERRORLOG + + xsane2ocrad depends on + - XSane, http://www.xsane.org/ + - ocrad, http://www.gnu.org/software/ocrad/ + + Some coding was stolen from 'ocube' + http://www.geocities.com/thierryguy/ocube.html + + This ocrad adaption is based on xsane2tess + http://doc.ubuntu-fr.org/xsane2tess, + + Hints always welcome! heinrich (dot) schwietering (at) gmx (dot) de +" + exit +fi + +# get options... +while getopts ":i:o:e:" OPTION + do + case $OPTION in + i ) # input filename (with path) + FILE_PATH="$OPTARG" + ;; + o ) # output filename + FILE_OUT="$OPTARG" + ;; + e ) # extra options + EXTRA="$OPTARG" + ;; + esac +done + +# redirect STDERR to ERRORLOG +exec 2>>$TEMP_DIR$ERRORLOG +echo "~~~+++~~~~+++~~~" 1>&2 + +ocrad "$FILE_PATH" -o "$FILE_OUT" $EXTRA 1>&2 +echo "ocrad "$FILE_PATH" -o "$FILE_OUT" $EXTRA ausgeführt" 1>&2 + +echo "~~~+++~~~~+++~~~"$(date +%c) 1>&2 diff --git a/debian/ocr-scripts/xsane2tess3.sh b/debian/ocr-scripts/xsane2tess3.sh new file mode 100755 index 0000000..14683bf --- /dev/null +++ b/debian/ocr-scripts/xsane2tess3.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# xsane2tess3 - tesseractOCR directly from xsane +# Copyright (C) 2012 Heinrich Schwietering +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +############################################################################## +# +# xsane2tess3 0.1 +# +# *** tesseract made simple *** +# +# +############################################################################## +# +# xsane2tess is a TesseractOCR 3.0x wrapper to be able to use tesseract with xsane +# +# +# +TEMP_DIR=/tmp/ # folder for temporary files (all files) +ERRORLOG="xsane2tess3.log" # file where STDERR goes + +if [[ -z "$1" ]] + then + echo "Usage: $0 [OPTIONS] + + xsane2tess3 scans images with TesseractOCR + and outputs the text in a file or as hocr/html document + + OPTIONS: + -i define input file (any image-format supported) + -o define output-file (*.txt/hOCR) + -l define language-data tesseract should use + -e filename for tesseract + -f name and path fot multiscan document + + Progress- & error-messages will be stored in this logfile: + $TEMP_DIR$ERRORLOG + + xsane2tess depends on + - XSane, http://www.xsane.org/ + - TesseractOCR, http://code.google.com/p/tesseract-ocr/ + + Some coding was stolen from 'ocube' + http://www.geocities.com/thierryguy/ocube.html + + This adaption is based on xsane2tess + http://doc.ubuntu-fr.org/xsane2tess, + + Hints always welcome! heinrich (dot) schwietering (at) gmx (dot) de + +" + exit +fi + + +# get options... +while getopts ":i:o:l:c:f:" OPTION + do + case $OPTION in + i ) # input filename (with path) + FILE_PATH="$OPTARG" + ;; + o ) # output filename + FILE_OUT="$OPTARG" + ;; + l ) # Language-selection + LANG="$OPTARG" + ;; + c ) # use hocr configfile + CONFILE="$OPTARG" + ;; + f ) # final name for multiscan ocr file + FINAL="$OPTARG" + ;; + esac +done + +# redirect STDOUT to FILE_OUT +exec 1>>$FILE_OUT + +# redirect STDERR to ERRORLOG +exec 2>>$TEMP_DIR$ERRORLOG + +# strip path from FILE_PATH, use filename only +IN_FILE="${FILE_PATH##*/.*}" + +echo "~~~+++~~~~+++~~~" 1>&2 + +# start OCR (tesseract expands output with *.txt/.html) +tesseract "$IN_FILE" "$FILE_OUT" -l "$LANG" "$CONFILE" 1>&2 +echo Tesseract used with "$LANG" "$CONFILE" 1>&2 + +{ if [[ "$FINAL" != '' ]] + then + { if [[ "$CONFILE" == "" ]] + then + # check if final txt file is already existing + { if [[ ! -a "$FINAL".txt ]] + then +# start final ocr file + cp "$FILE_OUT".txt "$FINAL".txt 1>&2 + echo "$FINAL.txt started" 1>&2 + else + mv "$FINAL".txt "$FINAL".new.txt + cat "$FINAL".new.txt "$FILE_OUT".txt > "$FINAL".txt + echo "$FILE_OUT.txt added to $FINAL.txt" 1>&2 + rm "$FINAL".new.txt + fi } + else +# check if final hocr file is already existing + { if [[ ! -a "$FINAL".html ]] + then +# start final ocr file + cp "$FILE_OUT.html" "$FINAL".html 1>&2 + echo "$FINAL.html started" 1>&2 + else + mv "$FINAL".html "$FINAL".new.html + cat "$FINAL".new.html "$FILE_OUT".html > "$FINAL".html + echo "$FILE_OUT.html added to $FINAL.html" 1>&2 + rm "$FINAL".new.html + fi } + fi } + rm $FILE_OUT + else +# STDOUT scanned text => FILE_OUT + cat "$FILE_OUT".* + +fi } + +rm $FILE_OUT.* + +echo "~~~+++~~~~+++~~~"$(date +%c) 1>&2 diff --git a/debian/xsane.NEWS b/debian/xsane.NEWS new file mode 100644 index 0000000..7bfd03c --- /dev/null +++ b/debian/xsane.NEWS @@ -0,0 +1,10 @@ +xsane (0.999-3) unstable; urgency=medium + + - New ocr commandline tools + + Since release 0.999-3 three new ocr commandline tools + added to Suggests. + + For more details read the xsane.README. + + -- Jörg Frings-Fürst Tue, 24 Nov 2015 21:56:44 +0100 diff --git a/debian/xsane.README b/debian/xsane.README new file mode 100644 index 0000000..b03d7e0 --- /dev/null +++ b/debian/xsane.README @@ -0,0 +1,46 @@ +Use the new ocr commandline tools. + + + +############################################################################# +cuneiform: + +- install cuneiform + apt-get install cuneiform +- copy /usr/share/xsane/ocr-scripts/xsane2cunei.sh to /usr/local/bin + cp /usr/share/xsane/ocr-scripts/xsane2cunei.sh /usr/local/bin +- at "xsane -> Setup -> Preferences -> OCR" add: + OCR command: /usr/local/bin/xsane2cunei.sh -l -f -e + Input file: -i + Output file: -o +- for , or please read the cuneiform man page. + + + +############################################################################# +tesseract-ocr: + +- install tesseract-ocr and at minimum one language file + apt-get install tesseract-ocr tesseract-ocr-[eng|fra|ita|ndl|por|spa|vie|deu|deu-frak] +- copy /usr/share/xsane/ocr-scripts/xsane2tess3.sh to /usr/local/bin + cp /usr/share/xsane/ocr-scripts/xsane2tess3.sh /usr/local/bin +- at "xsane -> Setup -> Preferences -> OCR" add: + OCR command: /usr/local/bin/xsane2tess3.sh -l + Input file: -i + Output file: -o +- for or please read the tesseract-ocr man page. + + + +############################################################################# +ocrad: + +- install ocrad + apt-get install ocrad +- copy /usr/share/xsane/ocr-scripts/xsane2ocrad.shh to /usr/local/bin + cp /usr/share/xsane/ocr-scripts/xsane2ocrad.sh /usr/local/bin +- at "xsane -> Setup -> Preferences -> OCR" add: + OCR command: /usr/local/bin/xsane2ocrad.sh -e + Input file: -i + Output file: -o +- for please read the ocrad man page. diff --git a/debian/xsane.dirs b/debian/xsane.dirs index ccc38a2..92dda21 100644 --- a/debian/xsane.dirs +++ b/debian/xsane.dirs @@ -2,3 +2,4 @@ usr/lib/gimp/2.0/plug-ins usr/share/pixmaps usr/share/icons/hicolor/256x256/apps usr/share/applications +usr/share/xsane diff --git a/debian/xsane.docs b/debian/xsane.docs index d05dba6..4faf401 100644 --- a/debian/xsane.docs +++ b/debian/xsane.docs @@ -7,3 +7,4 @@ xsane.PROBLEMS xsane.ROOT xsane.TODO xsane.VENDOR-STRINGS +debian/xsane.README diff --git a/debian/xsane.install b/debian/xsane.install index a5366dc..1d72533 100644 --- a/debian/xsane.install +++ b/debian/xsane.install @@ -2,3 +2,4 @@ src/xsane.desktop usr/share/applications debian/xsane.png usr/share/icons/hicolor/256x256/apps usr/bin usr/share/man +debian/ocr-scripts usr/share/xsane -- cgit v1.2.3