aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2020-05-29 16:25:11 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2020-05-29 16:25:11 -0400
commit68b74a1bf581433b59bfb1656b654018607c092d (patch)
tree552c684cff28e8417dd678d4db441b82e5da149c
parentd9290964f0063d5b2f04d20d3122b5811e1f47e5 (diff)
downloadexternal_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.tar.gz
external_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.tar.bz2
external_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.zip
Switch template encoding to utf-8
The default template encoding is now utf-8. Previously, the encoding was "ascii", which was standard throughout Python 2. This allows that "magic encoding comment" for utf-8 templates is no longer required. Fixes: #267 Change-Id: I411a75ebd6161e2517e06b5180a3571e5f5d923e
-rw-r--r--doc/build/unicode.rst33
-rw-r--r--doc/build/unreleased/267.rst8
-rw-r--r--mako/lexer.py4
-rw-r--r--test/templates/chs_unicode_py3k.html1
-rw-r--r--test/templates/chs_utf8.html1
-rw-r--r--test/templates/unicode_arguments.html1
-rw-r--r--test/templates/unicode_arguments_py3k.html1
-rw-r--r--test/test_template.py8
8 files changed, 25 insertions, 32 deletions
diff --git a/doc/build/unicode.rst b/doc/build/unicode.rst
index 1ba364e..2f77f6a 100644
--- a/doc/build/unicode.rst
+++ b/doc/build/unicode.rst
@@ -4,6 +4,9 @@
The Unicode Chapter
===================
+.. note:: this chapter was written many years ago and is very Python-2
+ centric. As of Mako 1.1.3, the default template encoding is ``utf-8``.
+
The Python language supports two ways of representing what we
know as "strings", i.e. series of characters. In Python 2, the
two types are ``string`` and ``unicode``, and in Python 3 they are
@@ -89,35 +92,23 @@ encoding <defining_output_encoding>` (still required in Python 3).
Specifying the Encoding of a Template File
==========================================
-This is the most basic encoding-related setting, and it is
-equivalent to Python's "magic encoding comment", as described in
-`pep-0263 <http://www.python.org/dev/peps/pep-0263/>`_. Any
-template that contains non-ASCII characters requires that this
-comment be present so that Mako can decode to unicode (and also
-make usage of Python's AST parsing services). Mako's lexer will
-use this encoding in order to convert the template source into a
-``unicode`` object before continuing its parsing:
+.. versionchanged:: 1.1.3
+
+ As of Mako 1.1.3, the default template encoding is "utf-8". Previously, a
+ Python "magic encoding comment" was required for templates that were not
+ using ASCII.
+
+Mako templates support Python's "magic encoding comment" syntax
+described in `pep-0263 <http://www.python.org/dev/peps/pep-0263/>`_:
.. sourcecode:: mako
## -*- coding: utf-8 -*-
- Alors vous imaginez ma surprise, au lever du jour, quand
+ Alors vous imaginez ma surprise, au lever du jour, quand
une drôle de petite voix m’a réveillé. Elle disait:
« S’il vous plaît… dessine-moi un mouton! »
-For the picky, the regular expression used is derived from that
-of the above mentioned pep:
-
-.. sourcecode:: python
-
- #.*coding[:=]\s*([-\w.]+).*\n
-
-The lexer will convert to unicode in all cases, so that if any
-characters exist in the template that are outside of the
-specified encoding (or the default of ``ascii``), the error will
-be immediate.
-
As an alternative, the template encoding can be specified
programmatically to either :class:`.Template` or :class:`.TemplateLookup` via
the ``input_encoding`` parameter:
diff --git a/doc/build/unreleased/267.rst b/doc/build/unreleased/267.rst
new file mode 100644
index 0000000..83a54a5
--- /dev/null
+++ b/doc/build/unreleased/267.rst
@@ -0,0 +1,8 @@
+.. change::
+ :tags: bug, templates
+ :tickets: 267
+
+ The default template encoding is now utf-8. Previously, the encoding was
+ "ascii", which was standard throughout Python 2. This allows that
+ "magic encoding comment" for utf-8 templates is no longer required.
+
diff --git a/mako/lexer.py b/mako/lexer.py
index a02b57f..6226e26 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -201,7 +201,7 @@ class Lexer(object):
"""
if isinstance(text, compat.text_type):
m = self._coding_re.match(text)
- encoding = m and m.group(1) or known_encoding or "ascii"
+ encoding = m and m.group(1) or known_encoding or "utf-8"
return encoding, text
if text.startswith(codecs.BOM_UTF8):
@@ -222,7 +222,7 @@ class Lexer(object):
if m:
parsed_encoding = m.group(1)
else:
- parsed_encoding = known_encoding or "ascii"
+ parsed_encoding = known_encoding or "utf-8"
if decode_raw:
try:
diff --git a/test/templates/chs_unicode_py3k.html b/test/templates/chs_unicode_py3k.html
index e4b6a8f..1ee49cc 100644
--- a/test/templates/chs_unicode_py3k.html
+++ b/test/templates/chs_unicode_py3k.html
@@ -1,4 +1,3 @@
-## -*- coding:utf-8 -*-
<%
msg = '新中国的主席'
%>
diff --git a/test/templates/chs_utf8.html b/test/templates/chs_utf8.html
index 5f4733f..50886be 100644
--- a/test/templates/chs_utf8.html
+++ b/test/templates/chs_utf8.html
@@ -1,4 +1,3 @@
-## -*- coding:utf-8 -*-
<%
msg = '新中国的主席'
%>
diff --git a/test/templates/unicode_arguments.html b/test/templates/unicode_arguments.html
index b363cb6..e6d7c2c 100644
--- a/test/templates/unicode_arguments.html
+++ b/test/templates/unicode_arguments.html
@@ -1,4 +1,3 @@
-# coding: utf-8
<%def name="my_def(x)">
x is: ${x}
diff --git a/test/templates/unicode_arguments_py3k.html b/test/templates/unicode_arguments_py3k.html
index 47d918a..871517b 100644
--- a/test/templates/unicode_arguments_py3k.html
+++ b/test/templates/unicode_arguments_py3k.html
@@ -1,4 +1,3 @@
-# coding: utf-8
<%def name="my_def(x)">
x is: ${x}
diff --git a/test/test_template.py b/test/test_template.py
index 89e5a61..40fd10c 100644
--- a/test/test_template.py
+++ b/test/test_template.py
@@ -541,9 +541,7 @@ quand une drôle de petite voix m’a réveillé. Elle disait:
# won't read the file even with open(...encoding='utf-8') unless
# errors is specified. or if there's some quirk in 3.1.2
# since I'm pretty sure this test worked with py3k when I wrote it.
- template.render(
- path=self._file_path("internationalization.html")
- )
+ template.render(path=self._file_path("internationalization.html"))
@requires_python_2
def test_bytestring_passthru(self):
@@ -1682,7 +1680,7 @@ Text
8,
8,
],
- "source_encoding": "ascii",
+ "source_encoding": "utf-8",
"filename": None,
"line_map": {
35: 29,
@@ -1773,7 +1771,7 @@ Text
8,
8,
],
- "source_encoding": "ascii",
+ "source_encoding": "utf-8",
"filename": None,
"line_map": {
34: 10,