aboutsummaryrefslogtreecommitdiffstats
path: root/libmat2/exiftool.py
blob: 89081e286b1abdcd3dbf868ab1d9f8936136ecf7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import functools
import json
import logging
import os
import subprocess
from typing import Dict, Union, Set

from . import abstract
from . import bubblewrap

# Make pyflakes happy
assert Set


class ExiftoolParser(abstract.AbstractParser):
    """ Exiftool is often the easiest way to get all the metadata
    from a import file, hence why several parsers are re-using its `get_meta`
    method.
    """
    meta_allowlist = set()  # type: Set[str]

    def get_meta(self) -> Dict[str, Union[str, dict]]:
        if self.sandbox:
            out = bubblewrap.run([_get_exiftool_path(), '-json', self.filename],
                                 input_filename=self.filename,
                                 check=True, stdout=subprocess.PIPE).stdout
        else:
            out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
                                 check=True, stdout=subprocess.PIPE).stdout
        meta = json.loads(out.decode('utf-8'))[0]
        for key in self.meta_allowlist:
            meta.pop(key, None)
        return meta

    def _lightweight_cleanup(self) -> bool:
        if os.path.exists(self.output_filename):
            try:  # exiftool can't force output to existing files
                os.remove(self.output_filename)
            except OSError as e:  # pragma: no cover
                logging.error("The output file %s is already existing and \
                               can't be overwritten: %s.", self.filename, e)
                return False

        # Note: '-All=' must be followed by a known exiftool option.
        # Also, '-CommonIFD0' is needed for .tiff files
        cmd = [_get_exiftool_path(),
               '-all=',         # remove metadata
               '-adobe=',       # remove adobe-specific metadata
               '-exif:all=',    # remove all exif metadata
               '-Time:All=',    # remove all timestamps
               '-quiet',        # don't show useless logs
               '-CommonIFD0=',  # remove IFD0 metadata
               '-o', self.output_filename,
               self.filename]
        try:
            if self.sandbox:
                bubblewrap.run(cmd, check=True,
                               input_filename=self.filename,
                               output_filename=self.output_filename)
            else:
                subprocess.run(cmd, check=True)
        except subprocess.CalledProcessError as e:  # pragma: no cover
            logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
            return False
        return True

@functools.lru_cache()
def _get_exiftool_path() -> str:  # pragma: no cover
    possible_pathes = {
        '/usr/bin/exiftool',              # debian/fedora
        '/usr/bin/vendor_perl/exiftool',  # archlinux
    }

    for possible_path in possible_pathes:
        if os.path.isfile(possible_path):
            if os.access(possible_path, os.X_OK):
                return possible_path

    raise RuntimeError("Unable to find exiftool")