#!/usr/bin/env python # # This parser parses the output from Phil Harvey's exiftool (version 9.02) # and convert it to xml format. It reads exiftool's output from stdin and # write the xml format to stdout. # # In order to get the raw infomation from exiftool, we need to enable the verbose # flag (-v2) of exiftool. # # Usage: # exiftool -v2 img.jpg | ./parser.py >> output.xml # # import os import sys import re text = sys.stdin.read() print """""" print "" # find the following two groups of string: # # 1. tag: # # | | | x) name = value # | | | - Tag 0x1234 # # 2. IFD indicator: # # | | | + [xxx directory with xx entries] # p = re.compile( "(((?:\| )+)[0-9]*\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})" + "|" + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" , re.M) tags = p.findall(text) layer = 0 ifds = [] for s in tags: # IFD indicator if s[2]: l = len(s[3]) ifd = s[2][l + 3:].split()[0] new_layer = l / 2 + 1 if new_layer > layer: ifds.append(ifd) else: for i in range(layer - new_layer): ifds.pop() ifds[-1] = ifd layer = new_layer else: l = len(s[1]) s = s[0] new_layer = l / 2 if new_layer < layer: for i in range(layer - new_layer): ifds.pop() layer = new_layer # find the raw value in the parenthesis value = re.search("\(.*\)\n", s) if value: value = value.group(0)[1:-2] else: value = re.search("=.*\n", s) value = value.group(0)[2:-1] # find the ID p = re.compile("0x[0-9a-f]{4}") _id = p.search(s) _id = _id.group(0) # find the name p = re.compile("[0-9]*?\).*? = ") name = p.search(s) name = name.group(0)[4:-3] print (' ' + value + "") print ""