summaryrefslogtreecommitdiffstats
path: root/tests/exiftool_parser/parser.py
blob: 351c1b63c3c059303921192d20f4de35fdd044f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
#
# This parser parses the output from Phil Harvey's exiftool (version 9.02)
# and convert it to xml format. It reads exiftool's output from stdin and
# write the xml format to stdout.
#
# In order to get the raw infomation from exiftool, we need to enable the verbose
# flag (-v2) of exiftool.
#
# Usage:
#      exiftool -v2 img.jpg | ./parser.py >> output.xml
#
#

import os
import sys
import re

text = sys.stdin.read()

print """<?xml version="1.0" encoding="utf-8"?>"""
print "<exif>"

# find the following two groups of string:
#
# 1. tag:
#
# | | | x) name = value
# | | |     - Tag 0x1234
#
# 2. IFD indicator:
#
# | | | + [xxx directory with xx entries]
#
p = re.compile(
        "(((?:\| )+)[0-9]*\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})" + "|"
        + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
        , re.M)
tags = p.findall(text)

layer = 0
ifds = []

for s in tags:
    # IFD indicator
    if s[2]:
        l = len(s[3])
        ifd = s[2][l + 3:].split()[0]
        new_layer = l / 2 + 1
        if new_layer > layer:
            ifds.append(ifd)
        else:
            for i in range(layer - new_layer):
                ifds.pop()
            ifds[-1] = ifd
        layer = new_layer
    else:
        l = len(s[1])
        s = s[0]
        new_layer = l / 2
        if new_layer < layer:
            for i in range(layer - new_layer):
                ifds.pop()
        layer = new_layer

        # find the raw value in the parenthesis
        value = re.search("\(.*\)\n", s)
        if value:
            value = value.group(0)[1:-2]
        else:
            value = re.search("=.*\n", s)
            value = value.group(0)[2:-1]

        # find the ID
        p = re.compile("0x[0-9a-f]{4}")
        _id = p.search(s)
        _id = _id.group(0)

        # find the name
        p = re.compile("[0-9]*?\).*? = ")
        name = p.search(s)
        name = name.group(0)[4:-3]
        print ('    <tag ifd="' + ifds[-1] + '" id="'
            + _id + '" name="' + name +'">' + value + "</tag>")
print "</exif>"