From f579ab7314ada4d49163ebeb58861c04194dc2d4 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Fri, 12 Oct 2012 14:02:49 +0800 Subject: Exiftool parser This parser parse the output from Phil Harvey's 'exiftool' with -v2 flag and convert it input xml format Change-Id: Iffdb235780697f8c37738937fd4ecaeb1d1127bb --- tests/exiftool_parser/parser.py | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 tests/exiftool_parser/parser.py (limited to 'tests/exiftool_parser/parser.py') diff --git a/tests/exiftool_parser/parser.py b/tests/exiftool_parser/parser.py new file mode 100755 index 000000000..32984886c --- /dev/null +++ b/tests/exiftool_parser/parser.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# +# This parser parses the output from Phil Harvey's exiftool (version 9.02) +# and convert it to xml format. It reads exiftool's output from stdin and +# write the xml format to stdout. +# +# In order to get the raw infomation from exiftool, we need to enable the verbose +# flag (-v2) of exiftool. +# +# Usage: +# exiftool -v2 img.jpg | ./parser.py >> output.xml +# +# + +import os +import sys +import re + +text = sys.stdin.read() + +print """""" +print "" + +# find the following two groups of string: +# +# 1. tag: +# +# x) name = value +# - Tag 0x1234 +# +# 2. IFD indicator: +# +# + [xxx directory with xx entries] +# +p = re.compile( + "(^.*?[0-9]\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})|(\+ \[.*? directory with [0-9]+ entries]$)" + , re.M) +tags = p.findall(text) + +for s in tags: + if s[1]: + ifd = s[1][3:].split()[0] + else: + s = s[0] + # find the raw value in the parenthesis + p = re.compile("\(.*\)\n") + value = p.search(s) + if value: + value = value.group(0)[1:-2] + else: + p = re.compile("=.*\n") + value = p.search(s) + value = value.group(0)[2:-1] + + # find the ID + p = re.compile("0x[0-9a-f]{4}") + _id = p.search(s) + _id = _id.group(0) + + # find the name + p = re.compile("[0-9]*?\).*? = ") + name = p.search(s) + name = name.group(0)[4:-3] + print (' ' + value + "") +print "" -- cgit v1.2.3 From 030bf17cb3792e9f9e0492e0d0e24f6cb819c7e0 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 17 Oct 2012 12:08:47 +0800 Subject: Parse the layer of exiftool output Change-Id: I0ee45608126d49913a96790f2c2e99ec66ccbca4 --- tests/exiftool_parser/parser.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) (limited to 'tests/exiftool_parser/parser.py') diff --git a/tests/exiftool_parser/parser.py b/tests/exiftool_parser/parser.py index 32984886c..351c1b63c 100755 --- a/tests/exiftool_parser/parser.py +++ b/tests/exiftool_parser/parser.py @@ -25,31 +25,50 @@ print "" # # 1. tag: # -# x) name = value -# - Tag 0x1234 +# | | | x) name = value +# | | | - Tag 0x1234 # # 2. IFD indicator: # -# + [xxx directory with xx entries] +# | | | + [xxx directory with xx entries] # p = re.compile( - "(^.*?[0-9]\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})|(\+ \[.*? directory with [0-9]+ entries]$)" + "(((?:\| )+)[0-9]*\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})" + "|" + + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" , re.M) tags = p.findall(text) +layer = 0 +ifds = [] + for s in tags: - if s[1]: - ifd = s[1][3:].split()[0] + # IFD indicator + if s[2]: + l = len(s[3]) + ifd = s[2][l + 3:].split()[0] + new_layer = l / 2 + 1 + if new_layer > layer: + ifds.append(ifd) + else: + for i in range(layer - new_layer): + ifds.pop() + ifds[-1] = ifd + layer = new_layer else: + l = len(s[1]) s = s[0] + new_layer = l / 2 + if new_layer < layer: + for i in range(layer - new_layer): + ifds.pop() + layer = new_layer + # find the raw value in the parenthesis - p = re.compile("\(.*\)\n") - value = p.search(s) + value = re.search("\(.*\)\n", s) if value: value = value.group(0)[1:-2] else: - p = re.compile("=.*\n") - value = p.search(s) + value = re.search("=.*\n", s) value = value.group(0)[2:-1] # find the ID @@ -61,6 +80,6 @@ for s in tags: p = re.compile("[0-9]*?\).*? = ") name = p.search(s) name = name.group(0)[4:-3] - print (' ' + value + "") print "" -- cgit v1.2.3 From 144dc616219e088680c267ae6a0463364b33c41d Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 17 Oct 2012 13:59:15 +0800 Subject: Do not use parentheses for Model tag in exiftool parser Change-Id: Idc45cd389976f0c474dceb550790a6814cb380b8 --- tests/exiftool_parser/parser.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'tests/exiftool_parser/parser.py') diff --git a/tests/exiftool_parser/parser.py b/tests/exiftool_parser/parser.py index 351c1b63c..5de54a352 100755 --- a/tests/exiftool_parser/parser.py +++ b/tests/exiftool_parser/parser.py @@ -63,23 +63,22 @@ for s in tags: ifds.pop() layer = new_layer + # find the ID + _id = re.search("0x[0-9a-f]{4}", s) + _id = _id.group(0) + + # find the name + name = re.search("[0-9]*?\).*? = ", s) + name = name.group(0)[4:-3] + # find the raw value in the parenthesis value = re.search("\(.*\)\n", s) - if value: + if (name != 'Model' and value): value = value.group(0)[1:-2] else: value = re.search("=.*\n", s) value = value.group(0)[2:-1] - # find the ID - p = re.compile("0x[0-9a-f]{4}") - _id = p.search(s) - _id = _id.group(0) - - # find the name - p = re.compile("[0-9]*?\).*? = ") - name = p.search(s) - name = name.group(0)[4:-3] print (' ' + value + "") print "" -- cgit v1.2.3 From 5be98ed60daac27a7d99fe89b9ed4709bfb218aa Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Thu, 18 Oct 2012 16:38:32 +0800 Subject: use NO_VALUE when the output of exiftool is invalid Change-Id: I85c0054d70ff1639e643a30658ca59f1803f7d13 --- tests/exiftool_parser/parser.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'tests/exiftool_parser/parser.py') diff --git a/tests/exiftool_parser/parser.py b/tests/exiftool_parser/parser.py index 5de54a352..7df23f154 100755 --- a/tests/exiftool_parser/parser.py +++ b/tests/exiftool_parser/parser.py @@ -33,7 +33,7 @@ print "" # | | | + [xxx directory with xx entries] # p = re.compile( - "(((?:\| )+)[0-9]*\).*? = .*?\n.*?- Tag 0x[0-9a-f]{4})" + "|" + "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" , re.M) tags = p.findall(text) @@ -68,16 +68,22 @@ for s in tags: _id = _id.group(0) # find the name - name = re.search("[0-9]*?\).*? = ", s) - name = name.group(0)[4:-3] + name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) + name = name.group(0).split()[1] # find the raw value in the parenthesis - value = re.search("\(.*\)\n", s) - if (name != 'Model' and value): - value = value.group(0)[1:-2] + value = re.search("\(SubDirectory\) -->", s) + if value: + value = "NO_VALUE" else: - value = re.search("=.*\n", s) - value = value.group(0)[2:-1] + value = re.search("\(.*\)\n", s) + if (name != 'Model' and value): + value = value.group(0)[1:-2] + else: + value = re.search("=.*\n", s) + value = value.group(0)[2:-1] + if "[snip]" in value: + value = "NO_VALUE" print (' ' + value + "") -- cgit v1.2.3