Improve lexer with better string handling and grouping parens/brackets

This allows the lexer to correctly handle strings like: ${'backslash quote right-curly is \\\'}'} And also allows users to use the bitwise-or operator to mean bitwise or simply by enclosing the expression inside parens: ${(0x5432 | 0x8000)} or by using it in the middle of a dictionary literal: ${ {'foo-val': 0x43 | 0x100, 'bar-val': 0x22 | 0x100}[thing+'-val']} or inside brackets: ${ big_lookup_dict[index_low | (indexhigh << 3)] } Basically, only "top level" uses of the vertical bar mean pipe. (Note that currently, any non-top-level use of the vertical bar in an expression just results in a syntax error in the generated python, so no working code is affected by this change)
author: Daniel Martin <daniel.martin@crowdstrike.com> 2016-02-04 13:14:41 -0500
committer: Daniel Martin <daniel.martin@crowdstrike.com> 2016-02-04 13:32:04 -0500
commit: 34f90e69aee6f1b8f6b85f903c525d1a9389451d (patch)
tree: c81f1147ec4cf3da80e6366fd63e482ecbe58909 /mako/lexer.py
parent: 1cfcf16f1bcb109ca32840d0e7c097d9902d6f64 (diff)
download: external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.gz
external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.bz2
external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.zip
1 files changed, 16 insertions, 10 deletions
diff --git a/mako/lexer.py b/mako/lexer.py
index 2fa08e4..a80b898 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -95,31 +95,37 @@ class Lexer(object):
         #          (match and "TRUE" or "FALSE")
         return match
 
-    def parse_until_text(self, *text):
+    def parse_until_text(self, watch_nesting, *text):
         startpos = self.match_position
         text_re = r'|'.join(text)
         brace_level = 0
+        paren_level = 0
+        bracket_level = 0
         while True:
             match = self.match(r'#.*\n')
             if match:
                 continue
-            match = self.match(r'(\"\"\"|\'\'\'|\"|\')((?<!\\)\\\1|.)*?\1',
+            match = self.match(r'(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1',
                                re.S)
             if match:
                 continue
             match = self.match(r'(%s)' % text_re)
-            if match:
-                if match.group(1) == '}' and brace_level > 0:
-                    brace_level -= 1
-                    continue
+            if match and not (watch_nesting
+                              and (brace_level > 0 or paren_level > 0
+                                   or bracket_level > 0)):
                 return \
                     self.text[startpos:
                               self.match_position - len(match.group(1))],\
                     match.group(1)
-            match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
+            elif not match:
+                match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
             if match:
                 brace_level += match.group(1).count('{')
                 brace_level -= match.group(1).count('}')
+                paren_level += match.group(1).count('(')
+                paren_level -= match.group(1).count(')')
+                bracket_level += match.group(1).count('[')
+                bracket_level -= match.group(1).count(']')
                 continue
             raise exceptions.SyntaxException(
                 "Expected: %s" %
@@ -368,7 +374,7 @@ class Lexer(object):
         match = self.match(r"<%(!)?")
         if match:
             line, pos = self.matched_lineno, self.matched_charpos
-            text, end = self.parse_until_text(r'%>')
+            text, end = self.parse_until_text(False, r'%>')
             # the trailing newline helps
             # compiler.parse() not complain about indentation
             text = adjust_whitespace(text) + "\n"
@@ -384,9 +390,9 @@ class Lexer(object):
         match = self.match(r"\${")
         if match:
             line, pos = self.matched_lineno, self.matched_charpos
-            text, end = self.parse_until_text(r'\|', r'}')
+            text, end = self.parse_until_text(True, r'\|', r'}')
             if end == '|':
-                escapes, end = self.parse_until_text(r'}')
+                escapes, end = self.parse_until_text(True, r'}')
             else:
                 escapes = ""
             text = text.replace('\r\n', '\n')
author	Daniel Martin <daniel.martin@crowdstrike.com>	2016-02-04 13:14:41 -0500
committer	Daniel Martin <daniel.martin@crowdstrike.com>	2016-02-04 13:32:04 -0500
commit	34f90e69aee6f1b8f6b85f903c525d1a9389451d (patch)
tree	c81f1147ec4cf3da80e6366fd63e482ecbe58909 /mako/lexer.py
parent	1cfcf16f1bcb109ca32840d0e7c097d9902d6f64 (diff)
download	external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.gz external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.tar.bz2 external_python_mako-34f90e69aee6f1b8f6b85f903c525d1a9389451d.zip