diff options
author | Daniel Martin <daniel.martin@crowdstrike.com> | 2016-02-04 13:14:41 -0500 |
---|---|---|
committer | Daniel Martin <daniel.martin@crowdstrike.com> | 2016-02-04 13:32:04 -0500 |
commit | 34f90e69aee6f1b8f6b85f903c525d1a9389451d (patch) | |
tree | c81f1147ec4cf3da80e6366fd63e482ecbe58909 /mako/lexer.py | |
parent | 1cfcf16f1bcb109ca32840d0e7c097d9902d6f64 (diff) | |
download | external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.gz external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.bz2 external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.zip |
Improve lexer with better string handling and grouping parens/brackets
This allows the lexer to correctly handle strings like:
${'backslash quote right-curly is \\\'}'}
And also allows users to use the bitwise-or operator to mean bitwise or
simply by enclosing the expression inside parens:
${(0x5432 | 0x8000)}
or by using it in the middle of a dictionary literal:
${ {'foo-val': 0x43 | 0x100, 'bar-val': 0x22 | 0x100}[thing+'-val']}
or inside brackets:
${ big_lookup_dict[index_low | (indexhigh << 3)] }
Basically, only "top level" uses of the vertical bar mean pipe.
(Note that currently, any non-top-level use of the vertical bar in
an expression just results in a syntax error in the generated python,
so no working code is affected by this change)
Diffstat (limited to 'mako/lexer.py')
-rw-r--r-- | mako/lexer.py | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/mako/lexer.py b/mako/lexer.py index 2fa08e4..a80b898 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -95,31 +95,37 @@ class Lexer(object): # (match and "TRUE" or "FALSE") return match - def parse_until_text(self, *text): + def parse_until_text(self, watch_nesting, *text): startpos = self.match_position text_re = r'|'.join(text) brace_level = 0 + paren_level = 0 + bracket_level = 0 while True: match = self.match(r'#.*\n') if match: continue - match = self.match(r'(\"\"\"|\'\'\'|\"|\')((?<!\\)\\\1|.)*?\1', + match = self.match(r'(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1', re.S) if match: continue match = self.match(r'(%s)' % text_re) - if match: - if match.group(1) == '}' and brace_level > 0: - brace_level -= 1 - continue + if match and not (watch_nesting + and (brace_level > 0 or paren_level > 0 + or bracket_level > 0)): return \ self.text[startpos: self.match_position - len(match.group(1))],\ match.group(1) - match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) + elif not match: + match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) if match: brace_level += match.group(1).count('{') brace_level -= match.group(1).count('}') + paren_level += match.group(1).count('(') + paren_level -= match.group(1).count(')') + bracket_level += match.group(1).count('[') + bracket_level -= match.group(1).count(']') continue raise exceptions.SyntaxException( "Expected: %s" % @@ -368,7 +374,7 @@ class Lexer(object): match = self.match(r"<%(!)?") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'%>') + text, end = self.parse_until_text(False, r'%>') # the trailing newline helps # compiler.parse() not complain about indentation text = adjust_whitespace(text) + "\n" @@ -384,9 +390,9 @@ class Lexer(object): match = self.match(r"\${") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'\|', r'}') + text, end = self.parse_until_text(True, r'\|', r'}') if end == '|': - escapes, end = self.parse_until_text(r'}') + escapes, end = self.parse_until_text(True, r'}') else: escapes = "" text = text.replace('\r\n', '\n') |