diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-05-29 16:25:11 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-05-29 16:25:11 -0400 |
commit | 68b74a1bf581433b59bfb1656b654018607c092d (patch) | |
tree | 552c684cff28e8417dd678d4db441b82e5da149c | |
parent | d9290964f0063d5b2f04d20d3122b5811e1f47e5 (diff) | |
download | external_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.tar.gz external_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.tar.bz2 external_python_mako-68b74a1bf581433b59bfb1656b654018607c092d.zip |
Switch template encoding to utf-8
The default template encoding is now utf-8. Previously, the encoding was
"ascii", which was standard throughout Python 2. This allows that
"magic encoding comment" for utf-8 templates is no longer required.
Fixes: #267
Change-Id: I411a75ebd6161e2517e06b5180a3571e5f5d923e
-rw-r--r-- | doc/build/unicode.rst | 33 | ||||
-rw-r--r-- | doc/build/unreleased/267.rst | 8 | ||||
-rw-r--r-- | mako/lexer.py | 4 | ||||
-rw-r--r-- | test/templates/chs_unicode_py3k.html | 1 | ||||
-rw-r--r-- | test/templates/chs_utf8.html | 1 | ||||
-rw-r--r-- | test/templates/unicode_arguments.html | 1 | ||||
-rw-r--r-- | test/templates/unicode_arguments_py3k.html | 1 | ||||
-rw-r--r-- | test/test_template.py | 8 |
8 files changed, 25 insertions, 32 deletions
diff --git a/doc/build/unicode.rst b/doc/build/unicode.rst index 1ba364e..2f77f6a 100644 --- a/doc/build/unicode.rst +++ b/doc/build/unicode.rst @@ -4,6 +4,9 @@ The Unicode Chapter =================== +.. note:: this chapter was written many years ago and is very Python-2 + centric. As of Mako 1.1.3, the default template encoding is ``utf-8``. + The Python language supports two ways of representing what we know as "strings", i.e. series of characters. In Python 2, the two types are ``string`` and ``unicode``, and in Python 3 they are @@ -89,35 +92,23 @@ encoding <defining_output_encoding>` (still required in Python 3). Specifying the Encoding of a Template File ========================================== -This is the most basic encoding-related setting, and it is -equivalent to Python's "magic encoding comment", as described in -`pep-0263 <http://www.python.org/dev/peps/pep-0263/>`_. Any -template that contains non-ASCII characters requires that this -comment be present so that Mako can decode to unicode (and also -make usage of Python's AST parsing services). Mako's lexer will -use this encoding in order to convert the template source into a -``unicode`` object before continuing its parsing: +.. versionchanged:: 1.1.3 + + As of Mako 1.1.3, the default template encoding is "utf-8". Previously, a + Python "magic encoding comment" was required for templates that were not + using ASCII. + +Mako templates support Python's "magic encoding comment" syntax +described in `pep-0263 <http://www.python.org/dev/peps/pep-0263/>`_: .. sourcecode:: mako ## -*- coding: utf-8 -*- - Alors vous imaginez ma surprise, au lever du jour, quand + Alors vous imaginez ma surprise, au lever du jour, quand une drôle de petite voix m’a réveillé. Elle disait: « S’il vous plaît… dessine-moi un mouton! » -For the picky, the regular expression used is derived from that -of the above mentioned pep: - -.. sourcecode:: python - - #.*coding[:=]\s*([-\w.]+).*\n - -The lexer will convert to unicode in all cases, so that if any -characters exist in the template that are outside of the -specified encoding (or the default of ``ascii``), the error will -be immediate. - As an alternative, the template encoding can be specified programmatically to either :class:`.Template` or :class:`.TemplateLookup` via the ``input_encoding`` parameter: diff --git a/doc/build/unreleased/267.rst b/doc/build/unreleased/267.rst new file mode 100644 index 0000000..83a54a5 --- /dev/null +++ b/doc/build/unreleased/267.rst @@ -0,0 +1,8 @@ +.. change:: + :tags: bug, templates + :tickets: 267 + + The default template encoding is now utf-8. Previously, the encoding was + "ascii", which was standard throughout Python 2. This allows that + "magic encoding comment" for utf-8 templates is no longer required. + diff --git a/mako/lexer.py b/mako/lexer.py index a02b57f..6226e26 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -201,7 +201,7 @@ class Lexer(object): """ if isinstance(text, compat.text_type): m = self._coding_re.match(text) - encoding = m and m.group(1) or known_encoding or "ascii" + encoding = m and m.group(1) or known_encoding or "utf-8" return encoding, text if text.startswith(codecs.BOM_UTF8): @@ -222,7 +222,7 @@ class Lexer(object): if m: parsed_encoding = m.group(1) else: - parsed_encoding = known_encoding or "ascii" + parsed_encoding = known_encoding or "utf-8" if decode_raw: try: diff --git a/test/templates/chs_unicode_py3k.html b/test/templates/chs_unicode_py3k.html index e4b6a8f..1ee49cc 100644 --- a/test/templates/chs_unicode_py3k.html +++ b/test/templates/chs_unicode_py3k.html @@ -1,4 +1,3 @@ -## -*- coding:utf-8 -*- <% msg = '新中国的主席' %> diff --git a/test/templates/chs_utf8.html b/test/templates/chs_utf8.html index 5f4733f..50886be 100644 --- a/test/templates/chs_utf8.html +++ b/test/templates/chs_utf8.html @@ -1,4 +1,3 @@ -## -*- coding:utf-8 -*- <% msg = '新中国的主席' %> diff --git a/test/templates/unicode_arguments.html b/test/templates/unicode_arguments.html index b363cb6..e6d7c2c 100644 --- a/test/templates/unicode_arguments.html +++ b/test/templates/unicode_arguments.html @@ -1,4 +1,3 @@ -# coding: utf-8 <%def name="my_def(x)"> x is: ${x} diff --git a/test/templates/unicode_arguments_py3k.html b/test/templates/unicode_arguments_py3k.html index 47d918a..871517b 100644 --- a/test/templates/unicode_arguments_py3k.html +++ b/test/templates/unicode_arguments_py3k.html @@ -1,4 +1,3 @@ -# coding: utf-8 <%def name="my_def(x)"> x is: ${x} diff --git a/test/test_template.py b/test/test_template.py index 89e5a61..40fd10c 100644 --- a/test/test_template.py +++ b/test/test_template.py @@ -541,9 +541,7 @@ quand une drôle de petite voix m’a réveillé. Elle disait: # won't read the file even with open(...encoding='utf-8') unless # errors is specified. or if there's some quirk in 3.1.2 # since I'm pretty sure this test worked with py3k when I wrote it. - template.render( - path=self._file_path("internationalization.html") - ) + template.render(path=self._file_path("internationalization.html")) @requires_python_2 def test_bytestring_passthru(self): @@ -1682,7 +1680,7 @@ Text 8, 8, ], - "source_encoding": "ascii", + "source_encoding": "utf-8", "filename": None, "line_map": { 35: 29, @@ -1773,7 +1771,7 @@ Text 8, 8, ], - "source_encoding": "ascii", + "source_encoding": "utf-8", "filename": None, "line_map": { 34: 10, |