diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2010-03-02 16:09:58 +0000 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2010-03-02 16:09:58 +0000 |
commit | 71d199f72e368f473d485c9eff6ec2d1308b3781 (patch) | |
tree | bb80e930e1ceb92437c2aadca1e5e5a5401cb1dc /mako/lexer.py | |
parent | d190169d0b0059b312ba9adad7011ecdff177188 (diff) | |
download | external_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.tar.gz external_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.tar.bz2 external_python_mako-71d199f72e368f473d485c9eff6ec2d1308b3781.zip |
cleanup of lexer
Diffstat (limited to 'mako/lexer.py')
-rw-r--r-- | mako/lexer.py | 138 |
1 files changed, 102 insertions, 36 deletions
diff --git a/mako/lexer.py b/mako/lexer.py index 52a4b6d..65be795 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -13,7 +13,9 @@ from mako.pygen import adjust_whitespace _regexp_cache = {} class Lexer(object): - def __init__(self, text, filename=None, disable_unicode=False, input_encoding=None, preprocessor=None): + def __init__(self, text, filename=None, + disable_unicode=False, + input_encoding=None, preprocessor=None): self.text = text self.filename = filename self.template = parsetree.TemplateNode(self.filename) @@ -31,8 +33,13 @@ class Lexer(object): self.preprocessor = [preprocessor] else: self.preprocessor = preprocessor - - exception_kwargs = property(lambda self:{'source':self.text, 'lineno':self.matched_lineno, 'pos':self.matched_charpos, 'filename':self.filename}) + + @property + def exception_kwargs(self): + return {'source':self.text, + 'lineno':self.matched_lineno, + 'pos':self.matched_charpos, + 'filename':self.filename} def match(self, regexp, flags=None): """match the given regular expression string and flags to the current text position. @@ -62,7 +69,8 @@ class Lexer(object): cp -=1 self.matched_charpos = mp - cp self.lineno += len(lines) - #print "MATCHED:", match.group(0), "LINE START:", self.matched_lineno, "LINE END:", self.lineno + #print "MATCHED:", match.group(0), "LINE START:", + # self.matched_lineno, "LINE END:", self.lineno #print "MATCH:", regexp, "\n", self.text[mp : mp + 15], (match and "TRUE" or "FALSE") return match @@ -76,15 +84,23 @@ class Lexer(object): if match: m = self.match(r'.*?%s' % match.group(1), re.S) if not m: - raise exceptions.SyntaxException("Unmatched '%s'" % match.group(1), **self.exception_kwargs) + raise exceptions.SyntaxException( + "Unmatched '%s'" % + match.group(1), + **self.exception_kwargs) else: match = self.match(r'(%s)' % r'|'.join(text)) if match: - return (self.text[startpos:self.match_position-len(match.group(1))], match.group(1)) + return \ + self.text[startpos:self.match_position-len(match.group(1))],\ + match.group(1) else: match = self.match(r".*?(?=\"|\'|#|%s)" % r'|'.join(text), re.S) if not match: - raise exceptions.SyntaxException("Expected: %s" % ','.join(text), **self.exception_kwargs) + raise exceptions.SyntaxException( + "Expected: %s" % + ','.join(text), + **self.exception_kwargs) def append_node(self, nodecls, *args, **kwargs): kwargs.setdefault('source', self.text) @@ -105,8 +121,12 @@ class Lexer(object): self.control_line.pop() elif node.is_primary: self.control_line.append(node) - elif len(self.control_line) and not self.control_line[-1].is_ternary(node.keyword): - raise exceptions.SyntaxException("Keyword '%s' not a legal ternary for keyword '%s'" % (node.keyword, self.control_line[-1].keyword), **self.exception_kwargs) + elif len(self.control_line) and \ + not self.control_line[-1].is_ternary(node.keyword): + raise exceptions.SyntaxException( + "Keyword '%s' not a legal ternary for keyword '%s'" % + (node.keyword, self.control_line[-1].keyword), + **self.exception_kwargs) def escape_code(self, text): if not self.disable_unicode and self.encoding: @@ -122,22 +142,34 @@ class Lexer(object): parsed_encoding = 'utf-8' me = self.match_encoding() if me is not None and me != 'utf-8': - raise exceptions.CompileException("Found utf-8 BOM in file, with conflicting magic encoding comment of '%s'" % me, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename) + raise exceptions.CompileException( + "Found utf-8 BOM in file, with conflicting " + "magic encoding comment of '%s'" % me, + self.text.decode('utf-8', 'ignore'), + 0, 0, self.filename) else: parsed_encoding = self.match_encoding() if parsed_encoding: self.encoding = parsed_encoding + if not self.disable_unicode and not isinstance(self.text, unicode): if self.encoding: try: self.text = self.text.decode(self.encoding) except UnicodeDecodeError, e: - raise exceptions.CompileException("Unicode decode operation of encoding '%s' failed" % self.encoding, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename) + raise exceptions.CompileException( + "Unicode decode operation of encoding '%s' failed" % + self.encoding, + self.text.decode('utf-8', 'ignore'), + 0, 0, self.filename) else: try: self.text = self.text.decode() except UnicodeDecodeError, e: - raise exceptions.CompileException("Could not read template using encoding of 'ascii'. Did you forget a magic encoding comment?", self.text.decode('utf-8', 'ignore'), 0, 0, self.filename) + raise exceptions.CompileException( + "Could not read template using encoding of 'ascii'. " + "Did you forget a magic encoding comment?", + self.text.decode('utf-8', 'ignore'), 0, 0, self.filename) self.textlength = len(self.text) @@ -167,9 +199,15 @@ class Lexer(object): raise exceptions.CompileException("assertion failed") if len(self.tag): - raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs) + raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % + self.tag[-1].keyword, + **self.exception_kwargs) if len(self.control_line): - raise exceptions.SyntaxException("Unterminated control keyword: '%s'" % self.control_line[-1].keyword, self.text, self.control_line[-1].lineno, self.control_line[-1].pos, self.filename) + raise exceptions.SyntaxException("Unterminated control keyword: '%s'" % + self.control_line[-1].keyword, + self.text, + self.control_line[-1].lineno, + self.control_line[-1].pos, self.filename) return self.template def match_encoding(self): @@ -196,12 +234,12 @@ class Lexer(object): re.I | re.S | re.X) if match: - (keyword, attr, isend) = (match.group(1), match.group(2), match.group(3)) + keyword, attr, isend = match.group(1), match.group(2), match.group(3) self.keyword = keyword attributes = {} if attr: for att in re.findall(r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr): - (key, val1, val2) = att + key, val1, val2 = att text = val1 or val2 text = text.replace('\r\n', '\n') attributes[key] = self.escape_code(text) @@ -212,7 +250,10 @@ class Lexer(object): if keyword == 'text': match = self.match(r'(.*?)(?=\</%text>)', re.S) if not match: - raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs) + raise exceptions.SyntaxException( + "Unclosed tag: <%%%s>" % + self.tag[-1].keyword, + **self.exception_kwargs) self.append_node(parsetree.Text, match.group(1)) return self.match_tag_end() return True @@ -223,9 +264,15 @@ class Lexer(object): match = self.match(r'\</%[\t ]*(.+?)[\t ]*>') if match: if not len(self.tag): - raise exceptions.SyntaxException("Closing tag without opening tag: </%%%s>" % match.group(1), **self.exception_kwargs) + raise exceptions.SyntaxException( + "Closing tag without opening tag: </%%%s>" % + match.group(1), + **self.exception_kwargs) elif self.tag[-1].keyword != match.group(1): - raise exceptions.SyntaxException("Closing tag </%%%s> does not match tag: <%%%s>" % (match.group(1), self.tag[-1].keyword), **self.exception_kwargs) + raise exceptions.SyntaxException( + "Closing tag </%%%s> does not match tag: <%%%s>" % + (match.group(1), self.tag[-1].keyword), + **self.exception_kwargs) self.tag.pop() return True else: @@ -246,16 +293,18 @@ class Lexer(object): match = self.match(r""" (.*?) # anything, followed by: ( - (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based comment preceded by a consumed \n and whitespace + (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based + # comment preceded by a + # consumed newline and whitespace | - (?=\${) # an expression + (?=\${) # an expression | - (?=\#\*) # multiline comment + (?=\#\*) # multiline comment | (?=</?[%&]) # a substitution or block or call start or end - # - don't consume + # - don't consume | - (\\\r?\n) # an escaped newline - throw away + (\\\r?\n) # an escaped newline - throw away | \Z # end of string )""", re.X | re.S) @@ -270,10 +319,15 @@ class Lexer(object): def match_python_block(self): match = self.match(r"<%(!)?") if match: - (line, pos) = (self.matched_lineno, self.matched_charpos) - (text, end) = self.parse_until_text(r'%>') - text = adjust_whitespace(text) + "\n" # the trailing newline helps compiler.parse() not complain about indentation - self.append_node(parsetree.Code, self.escape_code(text), match.group(1)=='!', lineno=line, pos=pos) + line, pos = self.matched_lineno, self.matched_charpos + text, end = self.parse_until_text(r'%>') + # the trailing newline helps + # compiler.parse() not complain about indentation + text = adjust_whitespace(text) + "\n" + self.append_node( + parsetree.Code, + self.escape_code(text), + match.group(1)=='!', lineno=line, pos=pos) return True else: return False @@ -281,14 +335,17 @@ class Lexer(object): def match_expression(self): match = self.match(r"\${") if match: - (line, pos) = (self.matched_lineno, self.matched_charpos) - (text, end) = self.parse_until_text(r'\|', r'}') + line, pos = self.matched_lineno, self.matched_charpos + text, end = self.parse_until_text(r'\|', r'}') if end == '|': - (escapes, end) = self.parse_until_text(r'}') + escapes, end = self.parse_until_text(r'}') else: escapes = "" text = text.replace('\r\n', '\n') - self.append_node(parsetree.Expression, self.escape_code(text), escapes.strip(), lineno=line, pos=pos) + self.append_node( + parsetree.Expression, + self.escape_code(text), escapes.strip(), + lineno=line, pos=pos) return True else: return False @@ -301,15 +358,24 @@ class Lexer(object): if operator == '%': m2 = re.match(r'(end)?(\w+)\s*(.*)', text) if not m2: - raise exceptions.SyntaxException("Invalid control line: '%s'" % text, **self.exception_kwargs) - (isend, keyword) = m2.group(1, 2) + raise exceptions.SyntaxException( + "Invalid control line: '%s'" % + text, + **self.exception_kwargs) + isend, keyword = m2.group(1, 2) isend = (isend is not None) if isend: if not len(self.control_line): - raise exceptions.SyntaxException("No starting keyword '%s' for '%s'" % (keyword, text), **self.exception_kwargs) + raise exceptions.SyntaxException( + "No starting keyword '%s' for '%s'" % + (keyword, text), + **self.exception_kwargs) elif self.control_line[-1].keyword != keyword: - raise exceptions.SyntaxException("Keyword '%s' doesn't match keyword '%s'" % (text, self.control_line[-1].keyword), **self.exception_kwargs) + raise exceptions.SyntaxException( + "Keyword '%s' doesn't match keyword '%s'" % + (text, self.control_line[-1].keyword), + **self.exception_kwargs) self.append_node(parsetree.ControlLine, keyword, isend, self.escape_code(text)) else: self.append_node(parsetree.Comment, text) |