summaryrefslogtreecommitdiff
path: root/raphodo/exiftool.py
diff options
context:
space:
mode:
Diffstat (limited to 'raphodo/exiftool.py')
-rw-r--r--raphodo/exiftool.py370
1 files changed, 370 insertions, 0 deletions
diff --git a/raphodo/exiftool.py b/raphodo/exiftool.py
new file mode 100644
index 0000000..b5b9687
--- /dev/null
+++ b/raphodo/exiftool.py
@@ -0,0 +1,370 @@
+# -*- coding: utf-8 -*-
+# PyExifTool <http://github.com/smarnach/pyexiftool>
+# Copyright 2012 Sven Marnach
+
+# This file is part of PyExifTool.
+#
+# PyExifTool is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the licence, or
+# (at your option) any later version, or the BSD licence.
+#
+# PyExifTool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING.GPL or COPYING.BSD for more details.
+
+"""
+PyExifTool is a Python library to communicate with an instance of Phil
+Harvey's excellent ExifTool_ command-line application. The library
+provides the class :py:class:`ExifTool` that runs the command-line
+tool in batch mode and features methods to send commands to that
+program, including methods to extract meta-information from one or
+more image files. Since ``exiftool`` is run in batch mode, only a
+single instance needs to be launched and can be reused for many
+queries. This is much more efficient than launching a separate
+process for every single query.
+
+.. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/
+
+The source code can be checked out from the github repository with
+
+::
+
+ git clone git://github.com/smarnach/pyexiftool.git
+
+Alternatively, you can download a tarball_. There haven't been any
+releases yet.
+
+.. _tarball: https://github.com/smarnach/pyexiftool/tarball/master
+
+PyExifTool is licenced under GNU GPL version 3 or later.
+
+Example usage::
+
+ import exiftool
+
+ files = ["a.jpg", "b.png", "c.tif"]
+ with exiftool.ExifTool() as et:
+ metadata = et.get_metadata_batch(files)
+ for d in metadata:
+ print("{:20.20} {:20.20}".format(d["SourceFile"],
+ d["EXIF:DateTimeOriginal"]))
+
+Modified by Damon Lynch:
+Added "common arguments".
+Grouping tag names is no longer the default.
+The function set_pdeathsig is used to automatically terminate the process when the
+program exits.
+Add version_info()
+"""
+
+from __future__ import unicode_literals
+
+import sys
+import subprocess
+import os
+import json
+import warnings
+import codecs
+from typing import Optional
+
+from raphodo.utilities import set_pdeathsig
+
+
+def version_info() -> str:
+ """
+ returns the version of Exiftool being used
+
+ :return version number, or None if Exiftool cannot be found
+ """
+ try:
+ return subprocess.check_output(['exiftool', '-ver']).strip().decode()
+ except (OSError, subprocess.CalledProcessError):
+ return ''
+
+
+basestring = (bytes, str)
+
+executable = "exiftool"
+"""The name of the executable to run.
+
+If the executable is not located in one of the paths listed in the
+``PATH`` environment variable, the full path should be given here.
+"""
+
+# Sentinel indicating the end of the output of a sequence of commands.
+# The standard value should be fine.
+sentinel = b"{ready}"
+
+# The block size when reading from exiftool. The standard value
+# should be fine, though other values might give better performance in
+# some cases.
+block_size = 4096
+
+# This code has been adapted from Lib/os.py in the Python source tree
+# (sha1 265e36e277f3)
+def _fscodec():
+ encoding = sys.getfilesystemencoding()
+ errors = "strict"
+ if encoding != "mbcs":
+ try:
+ codecs.lookup_error("surrogateescape")
+ except LookupError:
+ pass
+ else:
+ errors = "surrogateescape"
+
+ def fsencode(filename):
+ """
+ Encode filename to the filesystem encoding with 'surrogateescape' error
+ handler, return bytes unchanged. On Windows, use 'strict' error handler if
+ the file system encoding is 'mbcs' (which is the default encoding).
+ """
+ if isinstance(filename, bytes):
+ return filename
+ else:
+ return filename.encode(encoding, errors)
+
+ return fsencode
+
+fsencode = _fscodec()
+del _fscodec
+
+class ExifTool(object):
+ """Run the `exiftool` command-line tool and communicate to it.
+
+ You can pass the file name of the ``exiftool`` executable as an
+ argument to the constructor. The default value ``exiftool`` will
+ only work if the executable is in your ``PATH``.
+
+ Most methods of this class are only available after calling
+ :py:meth:`start()`, which will actually launch the subprocess. To
+ avoid leaving the subprocess running, make sure to call
+ :py:meth:`terminate()` method when finished using the instance.
+ This method will also be implicitly called when the instance is
+ garbage collected, but there are circumstance when this won't ever
+ happen, so you should not rely on the implicit process
+ termination. Subprocesses won't be automatically terminated if
+ the parent process exits, so a leaked subprocess will stay around
+ until manually killed.
+
+ A convenient way to make sure that the subprocess is terminated is
+ to use the :py:class:`ExifTool` instance as a context manager::
+
+ with ExifTool() as et:
+ ...
+
+ .. warning:: Note that there is no error handling. Nonsensical
+ options will be silently ignored by exiftool, so there's not
+ much that can be done in that regard. You should avoid passing
+ non-existent files to any of the methods, since this will lead
+ to undefied behaviour.
+
+ .. py:attribute:: running
+
+ A Boolean value indicating whether this instance is currently
+ associated with a running subprocess.
+ """
+
+ def __init__(self, common_arguments=None, executable_=None):
+ """
+
+ :param common_arguments: each call to exiftool will contain
+ these command line arguments
+ :param executable_:
+ """
+ if executable_ is None:
+ self.executable = executable
+ else:
+ self.executable = executable_
+ if common_arguments is not None:
+ self.common_arguments = common_arguments
+ else:
+ self.common_arguments = []
+ self.running = False
+
+ def start(self):
+ """Start an ``exiftool`` process in batch mode for this instance.
+
+ This method will issue a ``UserWarning`` if the subprocess is
+ already running. The process is started with the ``-G`` and
+ ``-n`` as common arguments, which are automatically included
+ in every command you run with :py:meth:`execute()`.
+ """
+ if self.running:
+ warnings.warn("ExifTool already running; doing nothing.")
+ return
+
+ if version_info() is None:
+ warnings.warn("ExifTool cannot be started; doing nothing.")
+ return
+
+ with open(os.devnull, "w") as devnull:
+ cmd = [self.executable, "-stay_open", "True", "-@", "-",
+ "-common_args"] + self.common_arguments
+ self._process = subprocess.Popen(
+ cmd,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=devnull,
+ preexec_fn=set_pdeathsig())
+ self.running = True
+
+ def terminate(self):
+ """Terminate the ``exiftool`` process of this instance.
+
+ If the subprocess isn't running, this method will do nothing.
+ """
+ if not self.running:
+ return
+ self._process.stdin.write(b"-stay_open\nFalse\n")
+ try:
+ self._process.stdin.flush()
+ self._process.communicate()
+ except (BrokenPipeError, subprocess.TimeoutExpired):
+ import logging
+ logging.error("Encountered problem when closing long-running ExifTool process")
+ del self._process
+ self.running = False
+
+ def __enter__(self):
+ self.start()
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.terminate()
+
+ def __del__(self):
+ self.terminate()
+
+ def execute(self, *params):
+ """Execute the given batch of parameters with ``exiftool``.
+
+ This method accepts any number of parameters and sends them to
+ the attached ``exiftool`` process. The process must be
+ running, otherwise ``ValueError`` is raised. The final
+ ``-execute`` necessary to actually run the batch is appended
+ automatically; see the documentation of :py:meth:`start()` for
+ the common options. The ``exiftool`` output is read up to the
+ end-of-output sentinel and returned as a raw ``bytes`` object,
+ excluding the sentinel.
+
+ The parameters must also be raw ``bytes``, in whatever
+ encoding exiftool accepts. For filenames, this should be the
+ system's filesystem encoding.
+
+ .. note:: This is considered a low-level method, and should
+ rarely be needed by application developers.
+ """
+ if not self.running:
+ raise ValueError("ExifTool instance not running.")
+ self._process.stdin.write(b"\n".join(params + (b"-execute\n",)))
+ self._process.stdin.flush()
+ output = b""
+ fd = self._process.stdout.fileno()
+ while not output[-32:].strip().endswith(sentinel):
+ output += os.read(fd, block_size)
+ return output.strip()[:-len(sentinel)]
+
+ def execute_json(self, *params):
+ """Execute the given batch of parameters and parse the JSON output.
+
+ This method is similar to :py:meth:`execute()`. It
+ automatically adds the parameter ``-j`` to request JSON output
+ from ``exiftool`` and parses the output. The return value is
+ a list of dictionaries, mapping tag names to the corresponding
+ values. All keys are Unicode strings with the tag names
+ including the ExifTool group name in the format <group>:<tag>.
+ The values can have multiple types. All strings occurring as
+ values will be Unicode strings. Each dictionary contains the
+ name of the file it corresponds to in the key ``"SourceFile"``.
+
+ The parameters to this function must be either raw strings
+ (type ``str`` in Python 2.x, type ``bytes`` in Python 3.x) or
+ Unicode strings (type ``unicode`` in Python 2.x, type ``str``
+ in Python 3.x). Unicode strings will be encoded using
+ system's filesystem encoding. This behaviour means you can
+ pass in filenames according to the convention of the
+ respective Python version – as raw strings in Python 2.x and
+ as Unicode strings in Python 3.x.
+ """
+ params = map(fsencode, params)
+ return json.loads(self.execute(b"-j", b"-n", *params).decode("utf-8"))
+
+ def execute_json_no_formatting(self, *params):
+ params = map(fsencode, params)
+ return json.loads(self.execute(b"-j", *params).decode("utf-8"))
+
+ def get_metadata_batch(self, filenames):
+ """Return all meta-data for the given files.
+
+ The return value will have the format described in the
+ documentation of :py:meth:`execute_json()`.
+ """
+ return self.execute_json(*filenames)
+
+ def get_metadata(self, filename):
+ """Return meta-data for a single file.
+
+ The returned dictionary has the format described in the
+ documentation of :py:meth:`execute_json()`.
+ """
+ return self.execute_json(filename)[0]
+
+ def get_tags_batch(self, tags, filenames):
+ """Return only specified tags for the given files.
+
+ The first argument is an iterable of tags. The tag names may
+ include group names, as usual in the format <group>:<tag>.
+
+ The second argument is an iterable of file names.
+
+ The format of the return value is the same as for
+ :py:meth:`execute_json()`.
+ """
+ # Explicitly ruling out strings here because passing in a
+ # string would lead to strange and hard-to-find errors
+ if isinstance(tags, basestring):
+ raise TypeError("The argument 'tags' must be "
+ "an iterable of strings")
+ if isinstance(filenames, basestring):
+ raise TypeError("The argument 'filenames' must be "
+ "an iterable of strings")
+ params = ["-" + t for t in tags]
+ params.extend(filenames)
+ return self.execute_json(*params)
+
+ def get_tags(self, tags, filename):
+ """Return only specified tags for a single file.
+
+ The returned dictionary has the format described in the
+ documentation of :py:meth:`execute_json()`.
+ """
+ return self.get_tags_batch(tags, [filename])[0]
+
+ def get_tag_batch(self, tag, filenames):
+ """Extract a single tag from the given files.
+
+ The first argument is a single tag name, as usual in the
+ format <group>:<tag>.
+
+ The second argument is an iterable of file names.
+
+ The return value is a list of tag values or ``None`` for
+ non-existent tags, in the same order as ``filenames``.
+ """
+ data = self.get_tags_batch([tag], filenames)
+ result = []
+ for d in data:
+ d.pop("SourceFile")
+ result.append(next(iter(d.values()), None))
+ return result
+
+ def get_tag(self, tag, filename):
+ """Extract a single tag from a single file.
+
+ The return value is the value of the specified tag, or
+ ``None`` if this tag was not found in the file.
+ """
+ return self.get_tag_batch(tag, [filename])[0]