summaryrefslogtreecommitdiff
path: root/raphodo/photoattributes.py
diff options
context:
space:
mode:
Diffstat (limited to 'raphodo/photoattributes.py')
-rw-r--r--raphodo/photoattributes.py452
1 files changed, 452 insertions, 0 deletions
diff --git a/raphodo/photoattributes.py b/raphodo/photoattributes.py
new file mode 100644
index 0000000..9652add
--- /dev/null
+++ b/raphodo/photoattributes.py
@@ -0,0 +1,452 @@
+# Copyright (C) 2015-2016 Damon Lynch <damonlynch@gmail.com>
+
+# This file is part of Rapid Photo Downloader.
+#
+# Rapid Photo Downloader is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Rapid Photo Downloader is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Rapid Photo Downloader. If not,
+# see <http://www.gnu.org/licenses/>.
+
+"""
+Collects attributes about varieties of photo formats, including how much of the file
+has to be read in order to extract exif information or a preview.
+"""
+
+__author__ = 'Damon Lynch'
+__copyright__ = "Copyright 2015-2016, Damon Lynch"
+
+import shlex
+import subprocess
+from enum import IntEnum
+import os
+import datetime
+import resource
+from typing import Optional, Dict, Union
+
+import gi
+gi.require_version('GExiv2', '0.10')
+from gi.repository import GExiv2
+from PyQt5.QtGui import QImage
+
+from raphodo.utilities import format_size_for_user
+from raphodo.metadataphoto import MetaData
+
+page_size = resource.getpagesize()
+to_kb = page_size // 1024
+
+vmtouch_cmd = 'vmtouch -v "{}"'
+
+JPEG_EXTENSIONS = ['jpg', 'jpe', 'jpeg']
+
+class PreviewSource(IntEnum):
+ preview_1 = 0
+ preview_2 = 1
+ preview_3 = 2
+ preview_4 = 3
+ preview_5 = 4
+ preview_6 = 5
+
+
+class PhotoAttributes:
+ def __init__(self, full_file_name: str, ext: str, metadata: GExiv2.Metadata,
+ exiftool_process) -> None:
+ self.exiftool_process = exiftool_process
+ self.datetime = None # type: datetime.datetime
+ self.iso = None # type: int
+ self.height = None # type: int
+ self.width = None # type: int
+ self.model = None # type: str
+ self.has_gps = False # type: bool
+ self.orientation = None # type: str
+ self.no_previews = None # type: int
+ self.has_exif_thumbnail = False # type: bool
+ self.exif_thumbnail_height = None # type: int
+ self.exif_thumbnail_width = None # type: int
+ self.exif_thumbnail_details = None # type: str
+ self.all_exif_values = dict() # type: Dict[str, Union[int, str, float, datetime.datetime]]
+ self.has_app0 = None
+ self.preview_source = None # type: PreviewSource
+ self.preview_width = None # type: int
+ self.preview_height = None # type: int
+ self.preview_extension = None # type: str
+ self.exif_thumbnail_and_preview_identical = None # type: bool
+ self.preview_size_and_types = []
+ self.minimum_exif_read_size_in_bytes_orientation = None # type: int
+ self.minimum_exif_read_size_in_bytes_datetime = None # type: int
+ self.minimum_exif_read_size_in_bytes_all = None # type: int
+ self.bytes_cached_post_previews = None
+ self.in_memory_post_previews = None
+
+ self.file_name = full_file_name
+ self.ext = ext
+
+ # Before doing anything else, understand what has already
+ # been cached after simply reading the exif
+ self.bytes_cached, self.total, self.in_memory = vmtouch_output(full_file_name)
+
+ # Get information about the photo
+ self.assign_photo_attributes(metadata)
+ self.extract_thumbnail(metadata)
+ self.bytes_cached_post_thumb, total, self.in_memory_post_thumb = vmtouch_output(
+ full_file_name)
+ self.get_preview_sizes(metadata)
+ self.bytes_cached_post_previews, total, self.in_memory_post_previews = vmtouch_output(
+ full_file_name)
+
+ if self.orientation is not None or self.ext.lower() in JPEG_EXTENSIONS:
+ self.minimum_extract_for_tag(self.orientation_extract)
+
+ if self.datetime is not None:
+ self.minimum_extract_for_tag(self.datetime_extract)
+
+ self.minimum_extract_for_all(metadata)
+
+ def assign_photo_attributes(self, metadata: GExiv2.Metadata) -> None:
+ # I don't know how GExiv2 gets these values:
+ self.width = metadata.get_pixel_width()
+ self.height = metadata.get_pixel_height()
+ try:
+ self.orientation = metadata.get_tag_string('Exif.Image.Orientation')
+ except KeyError:
+ pass
+ if metadata.has_tag('Exif.Image.Make') and metadata.has_tag('Exif.Image.Model'):
+ self.model = '{} {}'.format(metadata.get_tag_string('Exif.Image.Make').strip(),
+ metadata.get_tag_string('Exif.Image.Model').strip())
+ self.has_gps = metadata.get_gps_info()[0]
+ self.iso = metadata.get_iso_speed()
+ try:
+ self.datetime = metadata.get_date_time()
+ except (KeyError, ValueError):
+ pass
+
+ def extract_thumbnail(self, metadata: GExiv2.Metadata) -> None:
+ # not all files have an exif preview, but all CR2 seem to
+ exif_thumbnail = metadata.get_exif_thumbnail()
+ if exif_thumbnail:
+ # Get the thumbnail but don't save it
+ self.has_exif_thumbnail = True
+ qimage = QImage.fromData(exif_thumbnail)
+ if not qimage.isNull():
+ self.exif_thumbnail_width = qimage.width()
+ self.exif_thumbnail_height = qimage.height()
+ self.exif_thumbnail_details = '{}x{}'.format(self.exif_thumbnail_width,
+ self.exif_thumbnail_height)
+
+ previews = metadata.get_preview_properties()
+ self.no_previews = len(previews)
+
+ for idx, preview in enumerate(previews):
+ image = metadata.get_preview_image(preview)
+ if image.get_width() >= 160 and image.get_height() >= 120:
+ # Get the thumbnail but don't save it
+ preview_thumbnail = metadata.get_preview_image(preview).get_data()
+ if self.has_exif_thumbnail:
+ self.exif_thumbnail_and_preview_identical = preview_thumbnail == exif_thumbnail
+ self.preview_source = PreviewSource(idx).name.replace('_', ' ').capitalize()
+ self.preview_width = image.get_width()
+ self.preview_height = image.get_height()
+ self.preview_extension = image.get_extension()
+ return
+
+ def get_preview_sizes(self, metadata: GExiv2.Metadata):
+ previews = metadata.get_preview_properties()
+ sizes_and_types = []
+ for idx, preview in enumerate(previews):
+ image = metadata.get_preview_image(preview)
+ sizes_and_types.append((image.get_width(), image.get_height(),
+ image.get_extension()))
+ self.preview_size_and_types = '; '.join(['{}x{} {}'.format(width, height, ext[1:]) for
+ width, height, ext in sizes_and_types])
+
+ def orientation_extract(self, metadata: GExiv2.Metadata, size_in_bytes):
+ if metadata['Exif.Image.Orientation'] == self.orientation:
+ self.minimum_exif_read_size_in_bytes_orientation = size_in_bytes
+ return True
+ return False
+
+ def datetime_extract(self, metadata: GExiv2.Metadata, size_in_bytes):
+ if metadata.get_date_time() == self.datetime:
+ self.minimum_exif_read_size_in_bytes_datetime = size_in_bytes
+ return True
+ return False
+
+ def minimum_extract_for_tag(self, check_extract):
+ if self.ext == 'CRW':
+ # Exiv2 can crash while scanning for exif in a very small
+ # extract of a CRW file
+ return
+ elif self.ext.lower() in JPEG_EXTENSIONS:
+ return self.read_jpeg_2(check_extract)
+
+ metadata = GExiv2.Metadata()
+ for size_in_bytes in exif_scan_range():
+ with open(self.file_name, 'rb') as photo:
+ photo_extract = photo.read(size_in_bytes)
+ try:
+ metadata.open_buf(photo_extract)
+ except:
+ pass
+ else:
+ try:
+ if check_extract(metadata, size_in_bytes):
+ break
+ except KeyError:
+ pass
+
+ def minimum_extract_for_all(self, metadata: MetaData) -> None:
+ if self.ext == 'CRW':
+ # Exiv2 can crash while scanning for exif in a very small
+ # extract of a CRW file
+ return
+
+ funcs = 'aperture iso exposure_time focal_length camera_make camera_model camera_serial ' \
+ 'shutter_count owner_name copyright artist short_camera_model ' \
+ 'date_time timestamp sub_seconds orientation'.split()
+ for f in funcs:
+ v = getattr(metadata, f)()
+ if v:
+ self.all_exif_values[f] = v
+
+ found = set()
+
+ for size_in_bytes in exif_scan_range():
+ with open(self.file_name, 'rb') as photo:
+ photo_extract = photo.read(size_in_bytes)
+ try:
+ metadata_extract = MetaData(raw_bytes=bytearray(photo_extract),
+ et_process=self.exiftool_process)
+ except:
+ pass
+ else:
+ try:
+ for tag in self.all_exif_values:
+ if (tag not in found and
+ getattr(metadata_extract, tag)() == self.all_exif_values[tag]):
+ found.add(tag)
+ if len(found) == len(self.all_exif_values):
+ self.minimum_exif_read_size_in_bytes_all = size_in_bytes
+ return
+ except KeyError:
+ pass
+
+
+
+ def get_jpeg_exif_length(self) -> Optional[int]:
+ app0_data_length = 0
+ soi_marker_length = 2
+ marker_length = 2
+ with open(self.file_name, 'rb') as jpeg:
+ soi_marker = jpeg.read(2)
+ if soi_marker != b'\xff\xd8':
+ print("Not a jpeg image: no SOI marker")
+ return None
+
+ app_marker = jpeg.read(2)
+ if app_marker == b'\xff\xe0':
+ # Don't neeed the content of APP0
+ app0_data_length = jpeg.read(1)[0] * 256 + jpeg.read(1)[0]
+ app0 = jpeg.read(app0_data_length - 2)
+ app_marker = jpeg.read(2)
+ app0_data_length = app0_data_length + marker_length
+
+ if app_marker != b'\xff\xe1':
+ print("Could not locate APP1 marker")
+ return None
+
+ header = jpeg.read(8)
+ if header[2:6] != b'Exif' or header[6:8] != b'\x00\x00':
+ print("APP1 is malformed")
+ return None
+ app1_data_length = header[0] * 256 + header[1]
+ return soi_marker_length + marker_length + app1_data_length + app0_data_length
+
+ def read_jpeg(self, check_extract) -> Optional[int]:
+ length = self.get_jpeg_exif_length()
+ # print("Got exif length of", length)
+ if length is not None:
+ metadata = GExiv2.Metadata()
+ with open(self.file_name, 'rb') as photo:
+ photo_extract = photo.read(length)
+ try:
+ metadata.open_buf(photo_extract)
+ # print("read exif okay :-)")
+ except:
+ print("Failed to read exif!")
+ else:
+ try:
+ if not check_extract(metadata, length):
+ print("Read exif okay, but failed to get value from exif!")
+ except KeyError:
+ print("Read exif okay, but failed to get value from exif!")
+
+ def read_jpeg_2(self, check_extract) -> None:
+
+ # Step 1: determine the location of APP1 in the jpeg file
+ # See http://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_JPEG_files
+
+ app0_data_length = 0
+
+ soi_marker_length = 2
+ marker_length = 2
+ exif_header_length = 8
+ read0_size = soi_marker_length + marker_length + exif_header_length
+ app_length_length = 2
+
+ with open(self.file_name, 'rb') as jpeg:
+ jpeg_header = jpeg.read(read0_size)
+
+
+ if jpeg_header[0:2] != b'\xff\xd8':
+ print("%s not a jpeg image: no SOI marker" % self.file_name)
+ return None
+
+ app_marker = jpeg_header[2:4]
+
+ # Step 2: handle presence of APP0 - it's optional
+ if app_marker == b'\xff\xe0':
+ self.has_app0 = True
+ # There is an APP0 before the probable APP1
+ # Don't neeed the content of the APP0
+ app0_data_length = jpeg_header[4] * 256 + jpeg_header[5]
+ # We've already read twelve bytes total, going into the APP1 data.
+ # Now we want to download the rest of the APP1, along with the app0 marker
+ # and the app0 exif header
+ read1_size = app0_data_length + 2
+ app0 = jpeg.read(read1_size)
+ app_marker = app0[(exif_header_length + 2) * -1:exif_header_length * -1]
+ exif_header = app0[exif_header_length * -1:]
+ jpeg_header = jpeg_header + app0
+
+ else:
+ exif_header = jpeg_header[exif_header_length * -1:]
+
+ # Step 3: process exif header
+ if app_marker != b'\xff\xe1':
+ print("Could not locate APP1 marker in %s" % self.file_name)
+ return None
+ if exif_header[2:6] != b'Exif' or exif_header[6:8] != b'\x00\x00':
+ print("APP1 is malformed in %s" % self.file_name)
+ return None
+ app1_data_length = exif_header[0] * 256 + exif_header[1]
+
+ # Step 4: read APP1
+ view = jpeg.read(app1_data_length)
+ photo_extract = jpeg_header + view
+
+ metadata = GExiv2.Metadata()
+ length = app1_data_length + app0_data_length
+
+ try:
+ metadata.open_buf(photo_extract)
+ # print("read exif okay :-)")
+ except:
+ print("Failed to read exif!")
+ else:
+ try:
+ if not check_extract(metadata, length):
+ pass
+ # print("Read exif okay, but failed to get value from exif!")
+ except KeyError:
+ pass
+ # print("Read exif okay, but failed to get value from exif!")
+
+
+ def __repr__(self):
+ if self.model:
+ s = self.model
+ elif self.file_name:
+ s = os.path.split(self.file_name)[1]
+ else:
+ return "Unknown photo"
+ if self.width:
+ s += ' {}x{}'.format(self.width, self.height)
+ if self.ext:
+ s += ' {}'.format(self.ext)
+ return s
+
+ def __str__(self):
+ s = ''
+ if self.model is not None:
+ s += '{}\n'.format(self.model)
+ elif self.file_name is not None:
+ s += '{}\n'.format(os.path.split(self.file_name)[1])
+ if self.width is not None:
+ s += '{}x{}\n'.format(self.width, self.height)
+ if self.datetime: # type: datetime.datetime
+ s += '{}\n'.format(self.datetime.strftime('%c'))
+ if self.iso:
+ s += 'ISO: {}\n'.format(self.iso)
+ if self.orientation is not None:
+ s += 'Orientation: {}\n'.format(self.orientation)
+ if self.has_gps:
+ s += 'Has GPS tag: True\n'
+ if self.has_exif_thumbnail:
+ s += 'Exif thumbnail: {}\n'.format(self.exif_thumbnail_details)
+ if self.preview_source is not None:
+ s += '{} of {}: {}x{} {}\n'.format(
+ self.preview_source,
+ self.no_previews,
+ self.preview_width, self.preview_height,
+ self.preview_extension[1:])
+ if self.exif_thumbnail_and_preview_identical == False:
+ # Check against False as value is one of None, True or
+ # False
+ s += 'Exif thumbnail differs from smallest preview\n'
+ if self.preview_size_and_types:
+ s += 'All preview images: {}\n'.format(self.preview_size_and_types)
+ s += 'Disk cache after exif read:\n[{}]\n'.format(self.in_memory)
+ if self.in_memory != self.in_memory_post_thumb:
+ s += 'Disk cache after thumbnail / preview extraction:\n[{}]\n'.format(
+ self.in_memory_post_thumb)
+ if self.bytes_cached == self.bytes_cached_post_thumb:
+ s += 'Cached: {:,}KB of {:,}KB\n'.format(self.bytes_cached, self.total)
+ else:
+ s += 'Cached: {:,}KB(+{:,}KB after extraction) of {:,}KB\n'.format(
+ self.bytes_cached, self.bytes_cached_post_thumb, self.total)
+ if self.minimum_exif_read_size_in_bytes_orientation is not None:
+ s += 'Minimum read size to extract orientation tag: {}\n'.format(
+ format_size_for_user(self.minimum_exif_read_size_in_bytes_orientation))
+ if self.minimum_exif_read_size_in_bytes_orientation is None and self.orientation is not \
+ None:
+ s += 'Could not extract orientation tag with minimal read\n'
+ if self.minimum_exif_read_size_in_bytes_datetime is not None:
+ s += 'Minimum read size to extract datetime tag: {}\n'.format(
+ format_size_for_user(self.minimum_exif_read_size_in_bytes_datetime))
+ if self.minimum_exif_read_size_in_bytes_datetime is None and self.datetime is not None:
+ s += 'Could not extract datetime tag with minimal read\n'
+ if self.minimum_exif_read_size_in_bytes_all is not None:
+ s += 'Minimum read size to extract variety of tags: {}\n'.format(
+ format_size_for_user(self.minimum_exif_read_size_in_bytes_all))
+ else:
+ s += 'Could not extract variety of tags with minimal read\n'
+ return s
+
+
+def exif_scan_range() -> iter:
+ stop = 20
+ for iterations, step in ((108, 1), (97, 4), (16, 32), (16, 256), (16, 512), (8, 1024),
+ (8, 2048 * 4), (32, 2048 * 16)):
+ start = stop
+ stop = start + step * iterations
+ for b in range(start, stop, step):
+ yield b
+
+def vmtouch_output(full_file_name: str) -> tuple:
+ command = shlex.split(vmtouch_cmd.format(full_file_name))
+ output = subprocess.check_output(command, universal_newlines=True) # type: str
+ for line in output.split('\n'):
+ line = line.strip()
+ if line.startswith('['):
+ in_memory = line[1:line.find(']')]
+ currently_paged_percent = line.rsplit(' ', 1)[-1]
+ num, denom = map(int, currently_paged_percent.split('/'))
+ return (num * to_kb, denom * to_kb, in_memory) \ No newline at end of file