#!/bin/bash # SPDX-License-Identifier: GPL-3.0-or-later # Copyright (C) 2022 Alexander Vogt # Author: Alexander Vogt # # Sample postprocessing script for gnome-simple-scan for OCR in PDFs # # This script first identifies a suitable instance of ocrmypdf # (https://github.com/ocrmypdf/OCRmyPDF) and then applies this as a # postprocessing step to PDFs generated by simple-scan. # # Usage: # ===== # simple-scan-postprocessing mime-type keep-origin input-file args # # Currently, only mime-type "application/pdf" is supported, the script will # exit without an error if "image/jpeg", "image/png", or "image/webp" is # provided. Any other mime-type results in an error. # All args are provided to ocrmypdf. # If keep-origin is set to "true", a copy of the source file is kept. # # Example: # ======= # simple-scan-postprocessing application/pdf true scan.pdf -l eng+deu # simple-scan-postprocessing application/pdf true scan.pdf -rcd --jbig2-lossy -l deu # set -e +m # Arguments mime_type="$1" keep_original="$2" target="$3" remainder="${@:4}" # Globals _ocrmypdfcontainer="jbarlow83/ocrmypdf" source="${target%.*}_orig.${target##*.}" # Helper functions function findOcrMyPdf() { # Determines the path of ocrmypdf in the following order: # 1. ocrmypdf from the $PATH (local installation) # 2. ocrmypdf through podman (if podman in $PATH) # 3. ocrmypdf through docker (if podman in $PATH) _ocrmypdf=$(which ocrmypdf) && return _ocrmypdf="$(which podman) run --rm -i ${_ocrmypdfcontainer} " && return _ocrmypdf="$(which docker) run --rm -i ${_ocrmypdfcontainer} " if [ $? -ne 0 ]; then echo "No suitable instance of ocrmypdf found. Please check your setup. " exit 1 fi } case ${mime_type} in "application/pdf") mv "$target" "$source" # create a backup # Determine the version of ocrmypdf to use findOcrMyPdf # Execute OCR ${_ocrmypdf} ${remainder} - - <"$source" >"$target" ;; "image/jpeg") exit 0 # Nothing implemented ;; "image/png") exit 0 # Nothing implemented ;; "image/webp") exit 0 # Nothing implemented ;; *) echo "Unsupported mime-type \"${mime_type}\"" exit 1 ;; esac # Clean up if [ "$keep_original" == "true" ]; then exit 0 else rm "$source" fi