summaryrefslogtreecommitdiffstats
path: root/tests/exiftool_parser/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/exiftool_parser/parser.py')
-rwxr-xr-xtests/exiftool_parser/parser.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/tests/exiftool_parser/parser.py b/tests/exiftool_parser/parser.py
new file mode 100755
index 000000000..7df23f154
--- /dev/null
+++ b/tests/exiftool_parser/parser.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#
+# This parser parses the output from Phil Harvey's exiftool (version 9.02)
+# and convert it to xml format. It reads exiftool's output from stdin and
+# write the xml format to stdout.
+#
+# In order to get the raw infomation from exiftool, we need to enable the verbose
+# flag (-v2) of exiftool.
+#
+# Usage:
+# exiftool -v2 img.jpg | ./parser.py >> output.xml
+#
+#
+
+import os
+import sys
+import re
+
+text = sys.stdin.read()
+
+print """<?xml version="1.0" encoding="utf-8"?>"""
+print "<exif>"
+
+# find the following two groups of string:
+#
+# 1. tag:
+#
+# | | | x) name = value
+# | | | - Tag 0x1234
+#
+# 2. IFD indicator:
+#
+# | | | + [xxx directory with xx entries]
+#
+p = re.compile(
+ "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
+ + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
+ , re.M)
+tags = p.findall(text)
+
+layer = 0
+ifds = []
+
+for s in tags:
+ # IFD indicator
+ if s[2]:
+ l = len(s[3])
+ ifd = s[2][l + 3:].split()[0]
+ new_layer = l / 2 + 1
+ if new_layer > layer:
+ ifds.append(ifd)
+ else:
+ for i in range(layer - new_layer):
+ ifds.pop()
+ ifds[-1] = ifd
+ layer = new_layer
+ else:
+ l = len(s[1])
+ s = s[0]
+ new_layer = l / 2
+ if new_layer < layer:
+ for i in range(layer - new_layer):
+ ifds.pop()
+ layer = new_layer
+
+ # find the ID
+ _id = re.search("0x[0-9a-f]{4}", s)
+ _id = _id.group(0)
+
+ # find the name
+ name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
+ name = name.group(0).split()[1]
+
+ # find the raw value in the parenthesis
+ value = re.search("\(SubDirectory\) -->", s)
+ if value:
+ value = "NO_VALUE"
+ else:
+ value = re.search("\(.*\)\n", s)
+ if (name != 'Model' and value):
+ value = value.group(0)[1:-2]
+ else:
+ value = re.search("=.*\n", s)
+ value = value.group(0)[2:-1]
+ if "[snip]" in value:
+ value = "NO_VALUE"
+
+ print (' <tag ifd="' + ifds[-1] + '" id="'
+ + _id + '" name="' + name +'">' + value + "</tag>")
+print "</exif>"