9 files changed, 274 insertions, 23 deletions
diff --git a/lib/mako/codegen.py b/lib/mako/codegen.py
index 7c3e4e0..ad72850 100644
--- a/lib/mako/codegen.py
+++ b/lib/mako/codegen.py
@@ -189,3 +189,4 @@ class PythonPrinter(object):
         self.line_buffer = []
         self._reset_multi_line_flags()
 
+
diff --git a/lib/mako/exceptions.py b/lib/mako/exceptions.py
index 30eef75..acaa509 100644
--- a/lib/mako/exceptions.py
+++ b/lib/mako/exceptions.py
@@ -3,8 +3,13 @@
 class MakoException(Exception):
     pass
 class CompileException(MakoException):
-    pass
+    def __init__(self, message, lineno, pos):
+        MakoException.__init__(self, message + " at line: %d char: %d" % (lineno, pos))
+        self.lineno =lineno
+        self.pos = pos
+                    
 class SyntaxException(MakoException):
-    def __init__(self, message, lineno):
-        MakoException.__init__(self, message + " at line: %d" % lineno)
-        self.lineno =lineno
-\ No newline at end of file
+    def __init__(self, message, lineno, pos):
+        MakoException.__init__(self, message + " at line: %d char: %d" % (lineno, pos))
+        self.lineno =lineno
+        self.pos = pos
+\ No newline at end of file
diff --git a/lib/mako/lexer.py b/lib/mako/lexer.py
index 10211f2..27d4fcc 100644
--- a/lib/mako/lexer.py
+++ b/lib/mako/lexer.py
@@ -1,5 +1,6 @@
 import re
 from mako import parsetree, exceptions
+from mako.util import adjust_whitespace
 
 class Lexer(object):
     def __init__(self, text):
@@ -57,21 +58,25 @@ class Lexer(object):
         
             if self.match_end():
                 break
-            
+            if self.match_expression():
+                continue
+            if self.match_control_line():
+                continue
             if self.match_tag_start(): 
                 continue
             if self.match_tag_end():
                 continue
-                
+            if self.match_python_block():
+                continue
             if self.match_text(): 
                 continue
             
             if (self.current.match_position > len(self.current.source)):
                 break
-        
-            raise exceptions.Compiler("Infinite parsing loop encountered - Lexer bug?")
+            raise "assertion failed"
+            
         if len(self.tag):
-            raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, self.matched_lineno)
+            raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, self.matched_lineno, self.matched_charpos)
         return self.nodes    
 
     def match_tag_start(self):
@@ -117,7 +122,9 @@ class Lexer(object):
         match = self.match(r"""
                 (.*?)         # anything, followed by:
                 (
-                 (?<=\n)\s*(?=[%#]) # an eval or comment line, preceded by a consumed \n and whitespace
+                 (?<=\n)(?=\s*[%#]) # an eval or comment line, preceded by a consumed \n and whitespace
+                 |
+                 (?=\${)   # an expression
                  |
                  (?=</?[%&])  # a substitution or block or call start or end
                                               # - don't consume
@@ -134,6 +141,46 @@ class Lexer(object):
             return True
         else:
             return False
-                
+    
+    def match_python_block(self):
+        match = self.match(r"<%(!)?(.*?)%>", re.S)
+        if match:
+            text = adjust_whitespace(match.group(2))
+            self.append_node(parsetree.Code, text, match.group(1)=='!')
+            return True
+        else:
+            return False
+            
+    def match_expression(self):
+        match = self.match(r"\${(.+?)(?:\|\s*(.+?)\s*)?}", re.S)
+        if match:
+            escapes = match.group(2)
+            if escapes:
+                escapes = re.split(r'\s*,\s*', escapes)
+            else:
+                escapes = []
+            self.append_node(parsetree.Expression, match.group(1), escapes)
+            return True
+        else:
+            return False
+
+    def match_control_line(self):
+        match = self.match(r"(?<=^)\s*([%#])\s*([^\n]*)(?:\n|\Z)", re.M)
+        if match:
+            operator = match.group(1)
+            text = match.group(2)
+            if operator == '%':
+                m2 = re.match(r'(end)?(\w+)\s*(.*)', text)
+                if not m2:
+                    raise exceptions.SyntaxException("Invalid control line: '%s'" % text, self.matched_lineno, self.matched_charpos)
+                (isend, keyword) = m2.group(1, 2)
+                isend = (isend is not None)
+                self.append_node(parsetree.ControlLine, keyword, isend, text)
+            else:
+                self.append_node(parsetree.Comment, text)
+            return True
+        else:
+            return False
+            
     def _count_lines(self, text):
         return len(re.findall(r"\n", text)) 
 \ No newline at end of file
diff --git a/lib/mako/parsetree.py b/lib/mako/parsetree.py
index bd2927a..11dd395 100644
--- a/lib/mako/parsetree.py
+++ b/lib/mako/parsetree.py
@@ -1,13 +1,16 @@
 """object model defining a Mako template."""
 
-from mako import exceptions
+from mako import exceptions, ast
 
 class Node(object):
     """base class for a Node in the parse tree."""
     def __init__(self, lineno, pos):
         self.lineno = lineno
         self.pos = pos
-
+    def accept_visitor(self, visitor):
+        method = getattr(visitor, "visit" + self.__class__.__name__)
+        method(self)
+        
 class ControlLine(Node):
     """defines a control line, a line-oriented python line or end tag.
     
@@ -15,13 +18,13 @@ class ControlLine(Node):
         (markup)
     % endif
     """
-    def __init__(self, keyword, isend, text, **kwargs):
+    def __init__(self, keyword, text, isend, **kwargs):
         super(ControlLine, self).__init__(**kwargs)
-        self.keyword = keyword
         self.text = text
+        self.keyword = keyword
         self.isend = isend
     def __repr__(self):
-        return "ControlLine(%s, %s, %s, %s)" % (repr(self.keyword), repr(self.text), repr(self.isend), repr((self.lineno, self.pos)))
+        return "ControlLine(%s, %s, %s, %s)" % (repr(self.keyword), repr(self.isend), repr(self.text), repr((self.lineno, self.pos)))
 
 class Text(Node):
     """defines plain text in the template."""
@@ -49,8 +52,9 @@ class Code(Node):
         super(Code, self).__init__(**kwargs)
         self.text = text
         self.ismodule = ismodule
+        self.code = ast.PythonCode(text)
     def __repr__(self):
-        return "Comment(%s, %s, %s)" % (repr(self.text), repr(self.ismodule), repr((self.lineno, self.pos)))
+        return "Code(%s, %s, %s)" % (repr(self.text), repr(self.ismodule), repr((self.lineno, self.pos)))
         
 class Comment(Node):
     """defines a comment line.
@@ -70,11 +74,12 @@ class Expression(Node):
     ${x+y}
     
     """
-    def __init__(self, text, **kwargs):
+    def __init__(self, text, escapes, **kwargs):
         super(Expression, self).__init__(**kwargs)
         self.text = text
+        self.escapes = escapes
     def __repr__(self):
-        return "Expression(%s, %s)" % (self.text, repr((self.lineno, self.pos)))
+        return "Expression(%s, %s, %s)" % (repr(self.text), repr(self.escapes), repr((self.lineno, self.pos)))
         
 class _TagMeta(type):
     """metaclass to allow Tag to produce a subclass according to its keyword"""
@@ -87,7 +92,7 @@ class _TagMeta(type):
         try:
             cls = _TagMeta._classmap[keyword]
         except KeyError:
-            raise exceptions.CompileError("No such tag: '%s'" % keyword)
+            raise exceptions.CompileException("No such tag: '%s'" % keyword, kwargs['lineno'], kwargs['pos'])
         return type.__call__(cls, keyword, attributes, **kwargs)
         
 class Tag(Node):
@@ -117,3 +122,5 @@ class ComponentTag(Tag):
     __keyword__ = 'component'
 class CallTag(Tag):
     __keyword__ = 'call'
+class InheritTag(Tag):
+    __keyword__ = 'inherit'
+\ No newline at end of file
diff --git a/lib/mako/util.py b/lib/mako/util.py
index bc2377c..74ff01b 100644
--- a/lib/mako/util.py
+++ b/lib/mako/util.py
@@ -1,5 +1,38 @@
-
+from StringIO import StringIO
+import re, string
 try:
     Set = set
 except:
+    import sets
     Set = sets.Set
+
+
+def adjust_whitespace(text):
+    state = [False, False]
+    (backslashed, triplequoted) = (0, 1)
+    def in_multi_line(line):
+        current_state = (state[backslashed] or state[triplequoted]) 
+        if re.search(r"\\$", line):
+            state[backslashed] = True
+        else:
+            state[backslashed] = False
+        triples = len(re.findall(r"\"\"\"|\'\'\'", line))
+        if triples == 1 or triples % 2 != 0:
+            state[triplequoted] = not state[triplequoted]
+        return current_state
+
+    def _indent_line(line, stripspace = ''):
+        return re.sub(r"^%s" % stripspace, '', line)
+
+    stream = StringIO()
+    stripspace = None
+
+    for line in re.split(r'\r?\n', text):
+        if in_multi_line(line):
+            stream.write(line + "\n")
+        else:
+            line = string.expandtabs(line)
+            if stripspace is None and re.search(r"^[ \t]*[^# \t]", line):
+                stripspace = re.match(r"^([ \t]*)", line).group(1)
+            stream.write(_indent_line(line, stripspace) + "\n")
+    return stream.getvalue()
diff --git a/test/ast.py b/test/ast.py
index 501fc4a..588697e 100644
--- a/test/ast.py
+++ b/test/ast.py
@@ -31,6 +31,17 @@ print "Another expr", c
         assert parsed.undeclared_identifiers == util.Set(['x', 'y', 'z'])
         assert parsed.declared_identifiers == util.Set()
 
+    def test_locate_identifiers_2(self):
+        code = """
+result = []
+data = get_data()
+for x in data:
+    result.append(x+7)
+"""
+        parsed = ast.PythonCode(code)
+        assert parsed.undeclared_identifiers == util.Set(['get_data'])
+        assert parsed.declared_identifiers == util.Set(['result', 'data', 'x'])
+        
     def test_function_decl(self):
         """test getting the arguments from a function"""
         code = "def foo(a, b, c=None, d='hi', e=x, f=y+7):pass"
diff --git a/test/lexer.py b/test/lexer.py
index d9855b3..0129056 100644
--- a/test/lexer.py
+++ b/test/lexer.py
@@ -27,7 +27,7 @@ class LexerTest(unittest.TestCase):
             nodes = Lexer(template).parse()
             assert False
         except exceptions.SyntaxException, e:
-            assert str(e) == "Unclosed tag: <%component> at line: 5"
+            assert str(e) == "Unclosed tag: <%component> at line: 5 char: 9"
 
     def test_expr_in_attribute(self):
         """test some slightly trickier expressions.
@@ -53,6 +53,107 @@ class LexerTest(unittest.TestCase):
         """
         nodes = Lexer(template).parse()
         assert repr(nodes) == r"""[Text('\n        \n        ', (1, 1)), NamespaceTag('namespace', {'name': '"ns"'}, (3, 9), ["Text('\\n            ', (3, 31))", 'ComponentTag(\'component\', {\'name\': \'"lala(hi, there)"\'}, (4, 13), ["Text(\'\\\\n                \', (4, 48))", \'CallTag(\\\'call\\\', {\\\'expr\\\': \\\'"something()"\\\'}, (5, 17), [])\', "Text(\'\\\\n            \', (5, 44))"])', "Text('\\n        ', (6, 26))"]), Text('\n        \n        ', (7, 22))]"""
+    
+    def test_code(self):
+        template = """
+        some text
+        
+        <%
+            print "hi"
+            for x in range(1,5):
+                print x
+        %>
+        
+        more text
+        
+        <%!
+            import foo
+        %>
+        """
+        nodes = Lexer(template).parse()
+        #print nodes
+        assert repr(nodes) == r"""[Text('\n        some text\n        \n        ', (1, 1)), Code('\nprint "hi"\nfor x in range(1,5):\n    print x\n        \n', False, (4, 9)), Text('\n        \n        more text\n        \n        ', (8, 11)), Code('\nimport foo\n        \n', True, (12, 9)), Text('\n        ', (14, 11))]"""
+    
+    def test_code_and_tags(self):
+        template = """
+<%namespace name="foo">
+    <%component name="x">
+        this is x
+    </%component>
+    <%component name="y">
+        this is y
+    </%component>
+</%namespace>
+
+<%
+    result = []
+    data = get_data()
+    for x in data:
+        result.append(x+7)
+%>
+
+    result: <%call expr="foo.x(result)"/>
+"""
+        nodes = Lexer(template).parse()
+        #print nodes
+        assert repr(nodes) == r"""[Text('\n', (1, 1)), NamespaceTag('namespace', {'name': '"foo"'}, (2, 1), ["Text('\\n    ', (2, 24))", 'ComponentTag(\'component\', {\'name\': \'"x"\'}, (3, 5), ["Text(\'\\\\n        this is x\\\\n    \', (3, 26))"])', "Text('\\n    ', (5, 18))", 'ComponentTag(\'component\', {\'name\': \'"y"\'}, (6, 5), ["Text(\'\\\\n        this is y\\\\n    \', (6, 26))"])', "Text('\\n', (8, 18))"]), Text('\n\n', (9, 14)), Code('\nresult = []\ndata = get_data()\nfor x in data:\n    result.append(x+7)\n\n', False, (11, 1)), Text('\n\n    result: ', (16, 3)), CallTag('call', {'expr': '"foo.x(result)"'}, (18, 13), []), Text('\n', (18, 42))]"""
+
+    def test_expression(self):
+        template = """
+        this is some ${text} and this is ${textwith | escapes, moreescapes}
+        <%component name="hi">
+            give me ${foo()} and ${bar()}
+        </%component>
+        ${hi()}
+"""
+        nodes = Lexer(template).parse()
+        #print nodes
+        assert repr(nodes) == r"""[Text('\n        this is some ', (1, 1)), Expression('text', [], (2, 22)), Text(' and this is ', (2, 29)), Expression('textwith ', ['escapes', 'moreescapes'], (2, 42)), Text('\n        ', (2, 76)), ComponentTag('component', {'name': '"hi"'}, (3, 9), ["Text('\\n            give me ', (3, 31))", "Expression('foo()', [], (4, 21))", "Text(' and ', (4, 29))", "Expression('bar()', [], (4, 34))", "Text('\\n        ', (4, 42))"]), Text('\n        ', (5, 22)), Expression('hi()', [], (6, 9)), Text('\n', (6, 16))]"""
+
+    def test_control_lines(self):
+        template = """
+text text la la
+% if foo():
+ mroe text la la blah blah
+% endif
+
+        and osme more stuff
+        % for l in range(1,5):
+    tex tesl asdl l is ${l} kfmas d
+      % endfor
+    tetx text
+    
+"""
+        nodes = Lexer(template).parse()
+        print nodes
+        assert repr(nodes) == r"""[Text('\ntext text la la\n', (1, 1)), ControlLine('if', 'if foo():', False, (3, 1)), Text(' mroe text la la blah blah\n', (4, 1)), ControlLine('if', 'endif', True, (5, 1)), Text('\n        and osme more stuff\n', (6, 1)), ControlLine('for', 'for l in range(1,5):', False, (8, 1)), Text('    tex tesl asdl l is ', (9, 1)), Expression('l', [], (9, 24)), Text(' kfmas d\n', (9, 28)), ControlLine('for', 'endfor', True, (10, 1)), Text('    tetx text\n    \n', (11, 1))]"""
         
+
+    def test_integration(self):
+        template = """<%namespace name="foo" file="somefile.html"/>
+ # inherit from foobar.html
+<%inherit file="foobar.html"/>
+
+<%component name="header">
+     <div>header</div>
+</%component>
+<%component name="footer">
+    <div> footer</div>
+</%component>
+
+<table>
+    % for j in data():
+    <tr>
+        % for x in j:
+            <td>Hello ${x| h}</td>
+        % endfor
+    </tr>
+    % endfor
+</table>
+"""
+        nodes = Lexer(template).parse()
+        print nodes
+        assert repr(nodes) == r"""[NamespaceTag('namespace', {'name': '"foo"', 'file': '"somefile.html"'}, (1, 1), []), Text('\n', (1, 46)), Comment('inherit from foobar.html', (2, 1)), InheritTag('inherit', {'file': '"foobar.html"'}, (3, 1), []), Text('\n\n', (3, 31)), ComponentTag('component', {'name': '"header"'}, (5, 1), ["Text('\\n     <div>header</div>\\n', (5, 27))"]), Text('\n', (7, 14)), ComponentTag('component', {'name': '"footer"'}, (8, 1), ["Text('\\n    <div> footer</div>\\n', (8, 27))"]), Text('\n\n<table>\n', (10, 14)), ControlLine('for', 'for j in data():', False, (13, 1)), Text('    <tr>\n', (14, 1)), ControlLine('for', 'for x in j:', False, (15, 1)), Text('            <td>Hello ', (16, 1)), Expression('x', ['h'], (16, 23)), Text('</td>\n', (16, 30)), ControlLine('for', 'endfor', True, (17, 1)), Text('    </tr>\n', (18, 1)), ControlLine('for', 'endfor', True, (19, 1)), Text('</table>\n', (20, 1))]"""
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/pygen.py b/test/pygen.py
index 4f3da8e..de813b7 100644
--- a/test/pygen.py
+++ b/test/pygen.py
@@ -114,7 +114,15 @@ and more block.
         printer = PythonPrinter(stream)
         printer.print_adjusted_line(block)
         printer.close()
-        print stream.getvalue()
+        assert stream.getvalue() == \
+"""
+            # comment
+if test:
+    if (lala + hoho) + \\
+(foobar + blat) == 5:
+        print "hi"
+print "more indent"
 
+"""
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/util.py b/test/util.py
new file mode 100644
index 0000000..ebe4e54
--- /dev/null
+++ b/test/util.py
@@ -0,0 +1,38 @@
+import unittest
+
+from mako import util
+
+class WhitespaceTest(unittest.TestCase):
+    def test_basic(self):
+        text = """
+        for x in range(0,15):
+            print x
+        print "hi"
+        """
+        assert util.adjust_whitespace(text) == \
+"""
+for x in range(0,15):
+    print x
+print "hi"
+
+"""
+
+    def test_quotes(self):
+        text = """
+        print ''' aslkjfnas kjdfn
+askdjfnaskfd fkasnf dknf sadkfjn asdkfjna sdakjn
+asdkfjnads kfajns '''
+        if x:
+            print y
+"""
+        assert util.adjust_whitespace(text) == \
+"""
+print ''' aslkjfnas kjdfn
+askdjfnaskfd fkasnf dknf sadkfjn asdkfjna sdakjn
+asdkfjnads kfajns '''
+if x:
+    print y
+
+"""
+if __name__ == '__main__':
+    unittest.main()