diff options
Diffstat (limited to 'mako/lexer.py')
-rw-r--r-- | mako/lexer.py | 273 |
1 files changed, 157 insertions, 116 deletions
diff --git a/mako/lexer.py b/mako/lexer.py index cf4187f..e11a949 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -6,19 +6,26 @@ """provides the Lexer class for parsing template strings into parse trees.""" -import re import codecs -from mako import parsetree, exceptions, compat +import re + +from mako import compat +from mako import exceptions +from mako import parsetree from mako.pygen import adjust_whitespace _regexp_cache = {} class Lexer(object): - - def __init__(self, text, filename=None, - disable_unicode=False, - input_encoding=None, preprocessor=None): + def __init__( + self, + text, + filename=None, + disable_unicode=False, + input_encoding=None, + preprocessor=None, + ): self.text = text self.filename = filename self.template = parsetree.TemplateNode(self.filename) @@ -34,22 +41,24 @@ class Lexer(object): if compat.py3k and disable_unicode: raise exceptions.UnsupportedError( - "Mako for Python 3 does not " - "support disabling Unicode") + "Mako for Python 3 does not " "support disabling Unicode" + ) if preprocessor is None: self.preprocessor = [] - elif not hasattr(preprocessor, '__iter__'): + elif not hasattr(preprocessor, "__iter__"): self.preprocessor = [preprocessor] else: self.preprocessor = preprocessor @property def exception_kwargs(self): - return {'source': self.text, - 'lineno': self.matched_lineno, - 'pos': self.matched_charpos, - 'filename': self.filename} + return { + "source": self.text, + "lineno": self.matched_lineno, + "pos": self.matched_charpos, + "filename": self.filename, + } def match(self, regexp, flags=None): """compile the given regexp, cache the reg, and call match_reg().""" @@ -83,9 +92,9 @@ class Lexer(object): else: self.match_position = end self.matched_lineno = self.lineno - lines = re.findall(r"\n", self.text[mp:self.match_position]) + lines = re.findall(r"\n", self.text[mp : self.match_position]) cp = mp - 1 - while (cp >= 0 and cp < self.textlength and self.text[cp] != '\n'): + while cp >= 0 and cp < self.textlength and self.text[cp] != "\n": cp -= 1 self.matched_charpos = mp - cp self.lineno += len(lines) @@ -97,46 +106,49 @@ class Lexer(object): def parse_until_text(self, watch_nesting, *text): startpos = self.match_position - text_re = r'|'.join(text) + text_re = r"|".join(text) brace_level = 0 paren_level = 0 bracket_level = 0 while True: - match = self.match(r'#.*\n') + match = self.match(r"#.*\n") if match: continue - match = self.match(r'(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1', - re.S) + match = self.match( + r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S + ) if match: continue - match = self.match(r'(%s)' % text_re) - if match and not (watch_nesting - and (brace_level > 0 or paren_level > 0 - or bracket_level > 0)): - return \ - self.text[startpos: - self.match_position - len(match.group(1))],\ - match.group(1) + match = self.match(r"(%s)" % text_re) + if match and not ( + watch_nesting + and (brace_level > 0 or paren_level > 0 or bracket_level > 0) + ): + return ( + self.text[ + startpos : self.match_position - len(match.group(1)) + ], + match.group(1), + ) elif not match: match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) if match: - brace_level += match.group(1).count('{') - brace_level -= match.group(1).count('}') - paren_level += match.group(1).count('(') - paren_level -= match.group(1).count(')') - bracket_level += match.group(1).count('[') - bracket_level -= match.group(1).count(']') + brace_level += match.group(1).count("{") + brace_level -= match.group(1).count("}") + paren_level += match.group(1).count("(") + paren_level -= match.group(1).count(")") + bracket_level += match.group(1).count("[") + bracket_level -= match.group(1).count("]") continue raise exceptions.SyntaxException( - "Expected: %s" % - ','.join(text), - **self.exception_kwargs) + "Expected: %s" % ",".join(text), **self.exception_kwargs + ) def append_node(self, nodecls, *args, **kwargs): - kwargs.setdefault('source', self.text) - kwargs.setdefault('lineno', self.matched_lineno) - kwargs.setdefault('pos', self.matched_charpos) - kwargs['filename'] = self.filename + kwargs.setdefault("source", self.text) + kwargs.setdefault("lineno", self.matched_lineno) + kwargs.setdefault("pos", self.matched_charpos) + kwargs["filename"] = self.filename node = nodecls(*args, **kwargs) if len(self.tag): self.tag[-1].nodes.append(node) @@ -149,8 +161,10 @@ class Lexer(object): if self.control_line: control_frame = self.control_line[-1] control_frame.nodes.append(node) - if not (isinstance(node, parsetree.ControlLine) and - control_frame.is_ternary(node.keyword)): + if not ( + isinstance(node, parsetree.ControlLine) + and control_frame.is_ternary(node.keyword) + ): if self.ternary_stack and self.ternary_stack[-1]: self.ternary_stack[-1][-1].nodes.append(node) if isinstance(node, parsetree.Tag): @@ -164,17 +178,20 @@ class Lexer(object): elif node.is_primary: self.control_line.append(node) self.ternary_stack.append([]) - elif self.control_line and \ - self.control_line[-1].is_ternary(node.keyword): + elif self.control_line and self.control_line[-1].is_ternary( + node.keyword + ): self.ternary_stack[-1].append(node) - elif self.control_line and \ - not self.control_line[-1].is_ternary(node.keyword): + elif self.control_line and not self.control_line[-1].is_ternary( + node.keyword + ): raise exceptions.SyntaxException( - "Keyword '%s' not a legal ternary for keyword '%s'" % - (node.keyword, self.control_line[-1].keyword), - **self.exception_kwargs) + "Keyword '%s' not a legal ternary for keyword '%s'" + % (node.keyword, self.control_line[-1].keyword), + **self.exception_kwargs + ) - _coding_re = re.compile(r'#.*coding[:=]\s*([-\w.]+).*\r?\n') + _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") def decode_raw_stream(self, text, decode_raw, known_encoding, filename): """given string/unicode or bytes/string, determine encoding @@ -184,44 +201,48 @@ class Lexer(object): """ if isinstance(text, compat.text_type): m = self._coding_re.match(text) - encoding = m and m.group(1) or known_encoding or 'ascii' + encoding = m and m.group(1) or known_encoding or "ascii" return encoding, text if text.startswith(codecs.BOM_UTF8): - text = text[len(codecs.BOM_UTF8):] - parsed_encoding = 'utf-8' - m = self._coding_re.match(text.decode('utf-8', 'ignore')) - if m is not None and m.group(1) != 'utf-8': + text = text[len(codecs.BOM_UTF8) :] + parsed_encoding = "utf-8" + m = self._coding_re.match(text.decode("utf-8", "ignore")) + if m is not None and m.group(1) != "utf-8": raise exceptions.CompileException( "Found utf-8 BOM in file, with conflicting " "magic encoding comment of '%s'" % m.group(1), - text.decode('utf-8', 'ignore'), - 0, 0, filename) + text.decode("utf-8", "ignore"), + 0, + 0, + filename, + ) else: - m = self._coding_re.match(text.decode('utf-8', 'ignore')) + m = self._coding_re.match(text.decode("utf-8", "ignore")) if m: parsed_encoding = m.group(1) else: - parsed_encoding = known_encoding or 'ascii' + parsed_encoding = known_encoding or "ascii" if decode_raw: try: text = text.decode(parsed_encoding) except UnicodeDecodeError: raise exceptions.CompileException( - "Unicode decode operation of encoding '%s' failed" % - parsed_encoding, - text.decode('utf-8', 'ignore'), - 0, 0, filename) + "Unicode decode operation of encoding '%s' failed" + % parsed_encoding, + text.decode("utf-8", "ignore"), + 0, + 0, + filename, + ) return parsed_encoding, text def parse(self): self.encoding, self.text = self.decode_raw_stream( - self.text, - not self.disable_unicode, - self.encoding, - self.filename) + self.text, not self.disable_unicode, self.encoding, self.filename + ) for preproc in self.preprocessor: self.text = preproc(self.text) @@ -232,7 +253,7 @@ class Lexer(object): self.textlength = len(self.text) - while (True): + while True: if self.match_position > self.textlength: break @@ -258,20 +279,24 @@ class Lexer(object): raise exceptions.CompileException("assertion failed") if len(self.tag): - raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % - self.tag[-1].keyword, - **self.exception_kwargs) + raise exceptions.SyntaxException( + "Unclosed tag: <%%%s>" % self.tag[-1].keyword, + **self.exception_kwargs + ) if len(self.control_line): raise exceptions.SyntaxException( - "Unterminated control keyword: '%s'" % - self.control_line[-1].keyword, + "Unterminated control keyword: '%s'" + % self.control_line[-1].keyword, self.text, self.control_line[-1].lineno, - self.control_line[-1].pos, self.filename) + self.control_line[-1].pos, + self.filename, + ) return self.template def match_tag_start(self): - match = self.match(r''' + match = self.match( + r""" \<% # opening tag ([\w\.\:]+) # keyword @@ -283,9 +308,9 @@ class Lexer(object): (/)?> # closing - ''', - - re.I | re.S | re.X) + """, + re.I | re.S | re.X, + ) if match: keyword, attr, isend = match.groups() @@ -293,22 +318,23 @@ class Lexer(object): attributes = {} if attr: for att in re.findall( - r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr): + r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr + ): key, val1, val2 = att text = val1 or val2 - text = text.replace('\r\n', '\n') + text = text.replace("\r\n", "\n") attributes[key] = text self.append_node(parsetree.Tag, keyword, attributes) if isend: self.tag.pop() else: - if keyword == 'text': - match = self.match(r'(.*?)(?=\</%text>)', re.S) + if keyword == "text": + match = self.match(r"(.*?)(?=\</%text>)", re.S) if not match: raise exceptions.SyntaxException( - "Unclosed tag: <%%%s>" % - self.tag[-1].keyword, - **self.exception_kwargs) + "Unclosed tag: <%%%s>" % self.tag[-1].keyword, + **self.exception_kwargs + ) self.append_node(parsetree.Text, match.group(1)) return self.match_tag_end() return True @@ -316,25 +342,27 @@ class Lexer(object): return False def match_tag_end(self): - match = self.match(r'\</%[\t ]*(.+?)[\t ]*>') + match = self.match(r"\</%[\t ]*(.+?)[\t ]*>") if match: if not len(self.tag): raise exceptions.SyntaxException( - "Closing tag without opening tag: </%%%s>" % - match.group(1), - **self.exception_kwargs) + "Closing tag without opening tag: </%%%s>" + % match.group(1), + **self.exception_kwargs + ) elif self.tag[-1].keyword != match.group(1): raise exceptions.SyntaxException( - "Closing tag </%%%s> does not match tag: <%%%s>" % - (match.group(1), self.tag[-1].keyword), - **self.exception_kwargs) + "Closing tag </%%%s> does not match tag: <%%%s>" + % (match.group(1), self.tag[-1].keyword), + **self.exception_kwargs + ) self.tag.pop() return True else: return False def match_end(self): - match = self.match(r'\Z', re.S) + match = self.match(r"\Z", re.S) if match: string = match.group() if string: @@ -345,7 +373,8 @@ class Lexer(object): return False def match_text(self): - match = self.match(r""" + match = self.match( + r""" (.*?) # anything, followed by: ( (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based @@ -360,7 +389,9 @@ class Lexer(object): (\\\r?\n) # an escaped newline - throw away | \Z # end of string - )""", re.X | re.S) + )""", + re.X | re.S, + ) if match: text = match.group(1) @@ -374,14 +405,17 @@ class Lexer(object): match = self.match(r"<%(!)?") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(False, r'%>') + text, end = self.parse_until_text(False, r"%>") # the trailing newline helps # compiler.parse() not complain about indentation text = adjust_whitespace(text) + "\n" self.append_node( parsetree.Code, text, - match.group(1) == '!', lineno=line, pos=pos) + match.group(1) == "!", + lineno=line, + pos=pos, + ) return True else: return False @@ -390,16 +424,19 @@ class Lexer(object): match = self.match(r"\${") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(True, r'\|', r'}') - if end == '|': - escapes, end = self.parse_until_text(True, r'}') + text, end = self.parse_until_text(True, r"\|", r"}") + if end == "|": + escapes, end = self.parse_until_text(True, r"}") else: escapes = "" - text = text.replace('\r\n', '\n') + text = text.replace("\r\n", "\n") self.append_node( parsetree.Expression, - text, escapes.strip(), - lineno=line, pos=pos) + text, + escapes.strip(), + lineno=line, + pos=pos, + ) return True else: return False @@ -407,31 +444,35 @@ class Lexer(object): def match_control_line(self): match = self.match( r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)" - r"(?:\r?\n|\Z)", re.M) + r"(?:\r?\n|\Z)", + re.M, + ) if match: operator = match.group(1) text = match.group(2) - if operator == '%': - m2 = re.match(r'(end)?(\w+)\s*(.*)', text) + if operator == "%": + m2 = re.match(r"(end)?(\w+)\s*(.*)", text) if not m2: raise exceptions.SyntaxException( - "Invalid control line: '%s'" % - text, - **self.exception_kwargs) + "Invalid control line: '%s'" % text, + **self.exception_kwargs + ) isend, keyword = m2.group(1, 2) - isend = (isend is not None) + isend = isend is not None if isend: if not len(self.control_line): raise exceptions.SyntaxException( - "No starting keyword '%s' for '%s'" % - (keyword, text), - **self.exception_kwargs) + "No starting keyword '%s' for '%s'" + % (keyword, text), + **self.exception_kwargs + ) elif self.control_line[-1].keyword != keyword: raise exceptions.SyntaxException( - "Keyword '%s' doesn't match keyword '%s'" % - (text, self.control_line[-1].keyword), - **self.exception_kwargs) + "Keyword '%s' doesn't match keyword '%s'" + % (text, self.control_line[-1].keyword), + **self.exception_kwargs + ) self.append_node(parsetree.ControlLine, keyword, isend, text) else: self.append_node(parsetree.Comment, text) |