aboutsummaryrefslogtreecommitdiffstats
path: root/mako/lexer.py
diff options
context:
space:
mode:
authorDaniel Martin <daniel.martin@crowdstrike.com>2016-02-04 13:14:41 -0500
committerDaniel Martin <daniel.martin@crowdstrike.com>2016-02-04 13:32:04 -0500
commit34f90e69aee6f1b8f6b85f903c525d1a9389451d (patch)
treec81f1147ec4cf3da80e6366fd63e482ecbe58909 /mako/lexer.py
parent1cfcf16f1bcb109ca32840d0e7c097d9902d6f64 (diff)
downloadexternal_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.gz
external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.bz2
external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.zip
Improve lexer with better string handling and grouping parens/brackets
This allows the lexer to correctly handle strings like: ${'backslash quote right-curly is \\\'}'} And also allows users to use the bitwise-or operator to mean bitwise or simply by enclosing the expression inside parens: ${(0x5432 | 0x8000)} or by using it in the middle of a dictionary literal: ${ {'foo-val': 0x43 | 0x100, 'bar-val': 0x22 | 0x100}[thing+'-val']} or inside brackets: ${ big_lookup_dict[index_low | (indexhigh << 3)] } Basically, only "top level" uses of the vertical bar mean pipe. (Note that currently, any non-top-level use of the vertical bar in an expression just results in a syntax error in the generated python, so no working code is affected by this change)
Diffstat (limited to 'mako/lexer.py')
-rw-r--r--mako/lexer.py26
1 files changed, 16 insertions, 10 deletions
diff --git a/mako/lexer.py b/mako/lexer.py
index 2fa08e4..a80b898 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -95,31 +95,37 @@ class Lexer(object):
# (match and "TRUE" or "FALSE")
return match
- def parse_until_text(self, *text):
+ def parse_until_text(self, watch_nesting, *text):
startpos = self.match_position
text_re = r'|'.join(text)
brace_level = 0
+ paren_level = 0
+ bracket_level = 0
while True:
match = self.match(r'#.*\n')
if match:
continue
- match = self.match(r'(\"\"\"|\'\'\'|\"|\')((?<!\\)\\\1|.)*?\1',
+ match = self.match(r'(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1',
re.S)
if match:
continue
match = self.match(r'(%s)' % text_re)
- if match:
- if match.group(1) == '}' and brace_level > 0:
- brace_level -= 1
- continue
+ if match and not (watch_nesting
+ and (brace_level > 0 or paren_level > 0
+ or bracket_level > 0)):
return \
self.text[startpos:
self.match_position - len(match.group(1))],\
match.group(1)
- match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
+ elif not match:
+ match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
if match:
brace_level += match.group(1).count('{')
brace_level -= match.group(1).count('}')
+ paren_level += match.group(1).count('(')
+ paren_level -= match.group(1).count(')')
+ bracket_level += match.group(1).count('[')
+ bracket_level -= match.group(1).count(']')
continue
raise exceptions.SyntaxException(
"Expected: %s" %
@@ -368,7 +374,7 @@ class Lexer(object):
match = self.match(r"<%(!)?")
if match:
line, pos = self.matched_lineno, self.matched_charpos
- text, end = self.parse_until_text(r'%>')
+ text, end = self.parse_until_text(False, r'%>')
# the trailing newline helps
# compiler.parse() not complain about indentation
text = adjust_whitespace(text) + "\n"
@@ -384,9 +390,9 @@ class Lexer(object):
match = self.match(r"\${")
if match:
line, pos = self.matched_lineno, self.matched_charpos
- text, end = self.parse_until_text(r'\|', r'}')
+ text, end = self.parse_until_text(True, r'\|', r'}')
if end == '|':
- escapes, end = self.parse_until_text(r'}')
+ escapes, end = self.parse_until_text(True, r'}')
else:
escapes = ""
text = text.replace('\r\n', '\n')