summaryrefslogtreecommitdiff
path: root/debian/ocr-scripts/xsane2cunei.sh
blob: b704d52b71ea7e549f0cb8e90ee36b3a57266f7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
#
# ###############################################################################
#                                                                               #
#                                   xsane2cunei 0.3                             #
#                                                                               #
#                          *** cuneiform made simple ***                        # 
#                                                                               # 
# ###############################################################################
# 
# xane2cunei is a wrapper to be able to use Cuneiform-Linux with XSane
# 
# 
#
TEMP_DIR=~/tmp/      # folder for temporary files
ERRORLOG="xsane2cunei.log"  # file where STDERR goes 

if [[ -z "$1"  ]]
  then
  echo "Usage: $0 [OPTIONS]

  xsane2cunei scans image files with XSane, 
  recognizes the text using cuneiform-linux
  and outputs the text in a file.
  
  OPTIONS:
    -i <file1>  define input file (any image-format supported)
    -o <file2>  define output file (txt, html, hocr, rtf)
    -l <language> define the language used for recognition
    -f <format> define the format used for output
    -e <extraoptions> optional: dotmatrix, fax, singlecolumn
 
  Progress- & error-messages will be stored in this logfile:
     $TEMP_DIR$ERRORLOG

  xsane2cunei depends on
    - XSane http://www.xsane.org/
    - libmagick-++dev  http://www.imagemagick.org/
    - cuneiform-linux   https://launchpad.net/cuneiform-linux Cuneiform-Linux
  
  Some coding was stolen from 'ocube'
  http://www.geocities.com/thierryguy/ocube.html

  This Cuneiform adaption is based on xsane2tess  
  http://doc.ubuntu-fr.org/xsane2tess, 

  Hints always welcome! heinrich (dot) schwietering (at) gmx (dot) de
"
  exit
fi

# get options...
while getopts ":i:o:l:f:e:" OPTION
  do
  case $OPTION in 
    i)  # input filename (with path)
      FILE_PATH="$OPTARG"
    ;;
    o )  # output filename
      FILE_OUT="$OPTARG"
    ;;
    l )  # recognition language
      LANGUAGE="$OPTARG"
    ;;
    f )  # output format
      FORMAT="$OPTARG"
    ;;
    e )  # extra option format
      EXTRA="$OPTARG"
    ;;
  esac
done

# redirect STDOUT to FILE_OUT
exec 1>>$FILE_OUT

# redirect STDERR to ERRORLOG
exec 2>>$TEMP_DIR$ERRORLOG

# strip path from FILE_PATH, use filename only
IN_FILE="${FILE_PATH##*/.*}"

# start OCR 
cuneiform -l "$LANGUAGE" -f "$FORMAT" -o "$FILE_OUT" "--$EXTRA" "$IN_FILE"  1>&2