summaryrefslogtreecommitdiffstats
path: root/tests/exiftool_parser/parser.py
blob: 32984886ca905efbfc8846ce572fa68208609f7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
#
# This parser parses the output from Phil Harvey's exiftool (version 9.02)
# and convert it to xml format. It reads exiftool's output from stdin and
# write the xml format to stdout.
#
# In order to get the raw infomation from exiftool, we need to enable the verbose
# flag (-v2) of exiftool.
#
# Usage:
#      exiftool -v2 img.jpg | ./parser.py >> output.xml
#
#

import os
import sys
import re

text = sys.stdin.read()

print """<?xml version="1.0" encoding="utf-8"?>"""
print "<exif>"

# find the following two groups of string:
#
# 1. tag:
#
# x) name = value
#      - Tag 0x1234
#
# 2. IFD indicator:
#
# + [xxx directory with xx entries]
#
p = re.compile(
        "(^.*?[0-9]\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})|(\+ \[.*? directory with [0-9]+ entries]$)"
        , re.M)
tags = p.findall(text)

for s in tags:
    if s[1]:
        ifd = s[1][3:].split()[0]
    else:
        s = s[0]
        # find the raw value in the parenthesis
        p = re.compile("\(.*\)\n")
        value = p.search(s)
        if value:
            value = value.group(0)[1:-2]
        else:
            p = re.compile("=.*\n")
            value = p.search(s)
            value = value.group(0)[2:-1]

        # find the ID
        p = re.compile("0x[0-9a-f]{4}")
        _id = p.search(s)
        _id = _id.group(0)

        # find the name
        p = re.compile("[0-9]*?\).*? = ")
        name = p.search(s)
        name = name.group(0)[4:-3]
        print ('    <tag ifd="' + ifd + '" id="'
            + _id + '" name="' + name +'">' + value + "</tag>")
print "</exif>"