aboutsummaryrefslogtreecommitdiffstats
path: root/mako/lexer.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2010-03-02 16:09:58 +0000
committerMike Bayer <mike_mp@zzzcomputing.com>2010-03-02 16:09:58 +0000
commit71d199f72e368f473d485c9eff6ec2d1308b3781 (patch)
treebb80e930e1ceb92437c2aadca1e5e5a5401cb1dc /mako/lexer.py
parentd190169d0b0059b312ba9adad7011ecdff177188 (diff)
downloadexternal_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.tar.gz
external_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.tar.bz2
external_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.zip
cleanup of lexer
Diffstat (limited to 'mako/lexer.py')
-rw-r--r--mako/lexer.py138
1 files changed, 102 insertions, 36 deletions
diff --git a/mako/lexer.py b/mako/lexer.py
index 52a4b6d..65be795 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -13,7 +13,9 @@ from mako.pygen import adjust_whitespace
_regexp_cache = {}
class Lexer(object):
- def __init__(self, text, filename=None, disable_unicode=False, input_encoding=None, preprocessor=None):
+ def __init__(self, text, filename=None,
+ disable_unicode=False,
+ input_encoding=None, preprocessor=None):
self.text = text
self.filename = filename
self.template = parsetree.TemplateNode(self.filename)
@@ -31,8 +33,13 @@ class Lexer(object):
self.preprocessor = [preprocessor]
else:
self.preprocessor = preprocessor
-
- exception_kwargs = property(lambda self:{'source':self.text, 'lineno':self.matched_lineno, 'pos':self.matched_charpos, 'filename':self.filename})
+
+ @property
+ def exception_kwargs(self):
+ return {'source':self.text,
+ 'lineno':self.matched_lineno,
+ 'pos':self.matched_charpos,
+ 'filename':self.filename}
def match(self, regexp, flags=None):
"""match the given regular expression string and flags to the current text position.
@@ -62,7 +69,8 @@ class Lexer(object):
cp -=1
self.matched_charpos = mp - cp
self.lineno += len(lines)
- #print "MATCHED:", match.group(0), "LINE START:", self.matched_lineno, "LINE END:", self.lineno
+ #print "MATCHED:", match.group(0), "LINE START:",
+ # self.matched_lineno, "LINE END:", self.lineno
#print "MATCH:", regexp, "\n", self.text[mp : mp + 15], (match and "TRUE" or "FALSE")
return match
@@ -76,15 +84,23 @@ class Lexer(object):
if match:
m = self.match(r'.*?%s' % match.group(1), re.S)
if not m:
- raise exceptions.SyntaxException("Unmatched '%s'" % match.group(1), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Unmatched '%s'" %
+ match.group(1),
+ **self.exception_kwargs)
else:
match = self.match(r'(%s)' % r'|'.join(text))
if match:
- return (self.text[startpos:self.match_position-len(match.group(1))], match.group(1))
+ return \
+ self.text[startpos:self.match_position-len(match.group(1))],\
+ match.group(1)
else:
match = self.match(r".*?(?=\"|\'|#|%s)" % r'|'.join(text), re.S)
if not match:
- raise exceptions.SyntaxException("Expected: %s" % ','.join(text), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Expected: %s" %
+ ','.join(text),
+ **self.exception_kwargs)
def append_node(self, nodecls, *args, **kwargs):
kwargs.setdefault('source', self.text)
@@ -105,8 +121,12 @@ class Lexer(object):
self.control_line.pop()
elif node.is_primary:
self.control_line.append(node)
- elif len(self.control_line) and not self.control_line[-1].is_ternary(node.keyword):
- raise exceptions.SyntaxException("Keyword '%s' not a legal ternary for keyword '%s'" % (node.keyword, self.control_line[-1].keyword), **self.exception_kwargs)
+ elif len(self.control_line) and \
+ not self.control_line[-1].is_ternary(node.keyword):
+ raise exceptions.SyntaxException(
+ "Keyword '%s' not a legal ternary for keyword '%s'" %
+ (node.keyword, self.control_line[-1].keyword),
+ **self.exception_kwargs)
def escape_code(self, text):
if not self.disable_unicode and self.encoding:
@@ -122,22 +142,34 @@ class Lexer(object):
parsed_encoding = 'utf-8'
me = self.match_encoding()
if me is not None and me != 'utf-8':
- raise exceptions.CompileException("Found utf-8 BOM in file, with conflicting magic encoding comment of '%s'" % me, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
+ raise exceptions.CompileException(
+ "Found utf-8 BOM in file, with conflicting "
+ "magic encoding comment of '%s'" % me,
+ self.text.decode('utf-8', 'ignore'),
+ 0, 0, self.filename)
else:
parsed_encoding = self.match_encoding()
if parsed_encoding:
self.encoding = parsed_encoding
+
if not self.disable_unicode and not isinstance(self.text, unicode):
if self.encoding:
try:
self.text = self.text.decode(self.encoding)
except UnicodeDecodeError, e:
- raise exceptions.CompileException("Unicode decode operation of encoding '%s' failed" % self.encoding, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
+ raise exceptions.CompileException(
+ "Unicode decode operation of encoding '%s' failed" %
+ self.encoding,
+ self.text.decode('utf-8', 'ignore'),
+ 0, 0, self.filename)
else:
try:
self.text = self.text.decode()
except UnicodeDecodeError, e:
- raise exceptions.CompileException("Could not read template using encoding of 'ascii'. Did you forget a magic encoding comment?", self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
+ raise exceptions.CompileException(
+ "Could not read template using encoding of 'ascii'. "
+ "Did you forget a magic encoding comment?",
+ self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
self.textlength = len(self.text)
@@ -167,9 +199,15 @@ class Lexer(object):
raise exceptions.CompileException("assertion failed")
if len(self.tag):
- raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs)
+ raise exceptions.SyntaxException("Unclosed tag: <%%%s>" %
+ self.tag[-1].keyword,
+ **self.exception_kwargs)
if len(self.control_line):
- raise exceptions.SyntaxException("Unterminated control keyword: '%s'" % self.control_line[-1].keyword, self.text, self.control_line[-1].lineno, self.control_line[-1].pos, self.filename)
+ raise exceptions.SyntaxException("Unterminated control keyword: '%s'" %
+ self.control_line[-1].keyword,
+ self.text,
+ self.control_line[-1].lineno,
+ self.control_line[-1].pos, self.filename)
return self.template
def match_encoding(self):
@@ -196,12 +234,12 @@ class Lexer(object):
re.I | re.S | re.X)
if match:
- (keyword, attr, isend) = (match.group(1), match.group(2), match.group(3))
+ keyword, attr, isend = match.group(1), match.group(2), match.group(3)
self.keyword = keyword
attributes = {}
if attr:
for att in re.findall(r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr):
- (key, val1, val2) = att
+ key, val1, val2 = att
text = val1 or val2
text = text.replace('\r\n', '\n')
attributes[key] = self.escape_code(text)
@@ -212,7 +250,10 @@ class Lexer(object):
if keyword == 'text':
match = self.match(r'(.*?)(?=\</%text>)', re.S)
if not match:
- raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Unclosed tag: <%%%s>" %
+ self.tag[-1].keyword,
+ **self.exception_kwargs)
self.append_node(parsetree.Text, match.group(1))
return self.match_tag_end()
return True
@@ -223,9 +264,15 @@ class Lexer(object):
match = self.match(r'\</%[\t ]*(.+?)[\t ]*>')
if match:
if not len(self.tag):
- raise exceptions.SyntaxException("Closing tag without opening tag: </%%%s>" % match.group(1), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Closing tag without opening tag: </%%%s>" %
+ match.group(1),
+ **self.exception_kwargs)
elif self.tag[-1].keyword != match.group(1):
- raise exceptions.SyntaxException("Closing tag </%%%s> does not match tag: <%%%s>" % (match.group(1), self.tag[-1].keyword), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Closing tag </%%%s> does not match tag: <%%%s>" %
+ (match.group(1), self.tag[-1].keyword),
+ **self.exception_kwargs)
self.tag.pop()
return True
else:
@@ -246,16 +293,18 @@ class Lexer(object):
match = self.match(r"""
(.*?) # anything, followed by:
(
- (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based comment preceded by a consumed \n and whitespace
+ (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
+ # comment preceded by a
+ # consumed newline and whitespace
|
- (?=\${) # an expression
+ (?=\${) # an expression
|
- (?=\#\*) # multiline comment
+ (?=\#\*) # multiline comment
|
(?=</?[%&]) # a substitution or block or call start or end
- # - don't consume
+ # - don't consume
|
- (\\\r?\n) # an escaped newline - throw away
+ (\\\r?\n) # an escaped newline - throw away
|
\Z # end of string
)""", re.X | re.S)
@@ -270,10 +319,15 @@ class Lexer(object):
def match_python_block(self):
match = self.match(r"<%(!)?")
if match:
- (line, pos) = (self.matched_lineno, self.matched_charpos)
- (text, end) = self.parse_until_text(r'%>')
- text = adjust_whitespace(text) + "\n" # the trailing newline helps compiler.parse() not complain about indentation
- self.append_node(parsetree.Code, self.escape_code(text), match.group(1)=='!', lineno=line, pos=pos)
+ line, pos = self.matched_lineno, self.matched_charpos
+ text, end = self.parse_until_text(r'%>')
+ # the trailing newline helps
+ # compiler.parse() not complain about indentation
+ text = adjust_whitespace(text) + "\n"
+ self.append_node(
+ parsetree.Code,
+ self.escape_code(text),
+ match.group(1)=='!', lineno=line, pos=pos)
return True
else:
return False
@@ -281,14 +335,17 @@ class Lexer(object):
def match_expression(self):
match = self.match(r"\${")
if match:
- (line, pos) = (self.matched_lineno, self.matched_charpos)
- (text, end) = self.parse_until_text(r'\|', r'}')
+ line, pos = self.matched_lineno, self.matched_charpos
+ text, end = self.parse_until_text(r'\|', r'}')
if end == '|':
- (escapes, end) = self.parse_until_text(r'}')
+ escapes, end = self.parse_until_text(r'}')
else:
escapes = ""
text = text.replace('\r\n', '\n')
- self.append_node(parsetree.Expression, self.escape_code(text), escapes.strip(), lineno=line, pos=pos)
+ self.append_node(
+ parsetree.Expression,
+ self.escape_code(text), escapes.strip(),
+ lineno=line, pos=pos)
return True
else:
return False
@@ -301,15 +358,24 @@ class Lexer(object):
if operator == '%':
m2 = re.match(r'(end)?(\w+)\s*(.*)', text)
if not m2:
- raise exceptions.SyntaxException("Invalid control line: '%s'" % text, **self.exception_kwargs)
- (isend, keyword) = m2.group(1, 2)
+ raise exceptions.SyntaxException(
+ "Invalid control line: '%s'" %
+ text,
+ **self.exception_kwargs)
+ isend, keyword = m2.group(1, 2)
isend = (isend is not None)
if isend:
if not len(self.control_line):
- raise exceptions.SyntaxException("No starting keyword '%s' for '%s'" % (keyword, text), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "No starting keyword '%s' for '%s'" %
+ (keyword, text),
+ **self.exception_kwargs)
elif self.control_line[-1].keyword != keyword:
- raise exceptions.SyntaxException("Keyword '%s' doesn't match keyword '%s'" % (text, self.control_line[-1].keyword), **self.exception_kwargs)
+ raise exceptions.SyntaxException(
+ "Keyword '%s' doesn't match keyword '%s'" %
+ (text, self.control_line[-1].keyword),
+ **self.exception_kwargs)
self.append_node(parsetree.ControlLine, keyword, isend, self.escape_code(text))
else:
self.append_node(parsetree.Comment, text)