From 85fa4a6bc506be12fdbd0f4cff139e7c4e3bc6a8 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 10:04:50 -0400 Subject: When reading config files, require them to be encoded with UTF-8. Fixes #1702. --- setuptools/dist.py | 9 ++------- setuptools/tests/test_config.py | 31 +++++++++---------------------- 2 files changed, 11 insertions(+), 29 deletions(-) (limited to 'setuptools') diff --git a/setuptools/dist.py b/setuptools/dist.py index ae380290..9a165de0 100644 --- a/setuptools/dist.py +++ b/setuptools/dist.py @@ -35,7 +35,6 @@ from setuptools.depends import Require from setuptools import windows_support from setuptools.monkey import get_unpatched from setuptools.config import parse_configuration -from .unicode_utils import detect_encoding import pkg_resources __import__('setuptools.extern.packaging.specifiers') @@ -587,13 +586,9 @@ class Distribution(_Distribution): parser = ConfigParser() for filename in filenames: - with io.open(filename, 'rb') as fp: - encoding = detect_encoding(fp) + with io.open(filename, encoding='utf-8') as reader: if DEBUG: - self.announce(" reading %s [%s]" % ( - filename, encoding or 'locale') - ) - reader = io.TextIOWrapper(fp, encoding=encoding) + self.announce(" reading {filename}".format(**locals())) (parser.read_file if six.PY3 else parser.readfp)(reader) for section in parser.sections(): options = parser.options(section) diff --git a/setuptools/tests/test_config.py b/setuptools/tests/test_config.py index 4daf1df1..bc97664d 100644 --- a/setuptools/tests/test_config.py +++ b/setuptools/tests/test_config.py @@ -9,7 +9,6 @@ from mock import patch from setuptools.dist import Distribution, _Distribution from setuptools.config import ConfigHandler, read_configuration from setuptools.extern.six.moves import configparser -from setuptools.tests import is_ascii from . import py2_only, py3_only from .textwrap import DALS @@ -446,10 +445,6 @@ class TestMetadata: with get_dist(tmpdir): pass - skip_if_not_ascii = pytest.mark.skipif( - not is_ascii, reason='Test not supported with this locale') - - @skip_if_not_ascii def test_non_ascii_1(self, tmpdir): fake_env( tmpdir, @@ -457,18 +452,8 @@ class TestMetadata: 'description = éàïôñ\n', encoding='utf-8' ) - with pytest.raises(UnicodeDecodeError): - with get_dist(tmpdir): - pass - - def test_non_ascii_2(self, tmpdir): - fake_env( - tmpdir, - '# -*- coding: invalid\n' - ) - with pytest.raises(LookupError): - with get_dist(tmpdir): - pass + with get_dist(tmpdir): + pass def test_non_ascii_3(self, tmpdir): fake_env( @@ -479,7 +464,6 @@ class TestMetadata: with get_dist(tmpdir): pass - @skip_if_not_ascii def test_non_ascii_4(self, tmpdir): fake_env( tmpdir, @@ -491,8 +475,10 @@ class TestMetadata: with get_dist(tmpdir) as dist: assert dist.metadata.description == 'éàïôñ' - @skip_if_not_ascii - def test_non_ascii_5(self, tmpdir): + def test_not_utf8(self, tmpdir): + """ + Config files encoded not in UTF-8 will fail + """ fake_env( tmpdir, '# vim: set fileencoding=iso-8859-15 :\n' @@ -500,8 +486,9 @@ class TestMetadata: 'description = éàïôñ\n', encoding='iso-8859-15' ) - with get_dist(tmpdir) as dist: - assert dist.metadata.description == 'éàïôñ' + with pytest.raises(UnicodeDecodeError): + with get_dist(tmpdir): + pass class TestOptions: -- cgit v1.2.3 From 7b09ba64c0327ecea04cc95057ffa7d5c8d939c8 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 10:46:00 -0400 Subject: Add test for setopt to demonstrate that edit_config retains non-ASCII characters. --- setuptools/tests/test_setopt.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 setuptools/tests/test_setopt.py (limited to 'setuptools') diff --git a/setuptools/tests/test_setopt.py b/setuptools/tests/test_setopt.py new file mode 100644 index 00000000..2241ef73 --- /dev/null +++ b/setuptools/tests/test_setopt.py @@ -0,0 +1,36 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import io + +import six + +from setuptools.command import setopt +from setuptools.extern.six.moves import configparser + + +class TestEdit: + @staticmethod + def parse_config(filename): + parser = configparser.ConfigParser() + with io.open(filename, encoding='utf-8') as reader: + (parser.read_file if six.PY3 else parser.readfp)(reader) + return parser + + @staticmethod + def write_text(file, content): + with io.open(file, 'wb') as strm: + strm.write(content.encode('utf-8')) + + def test_utf8_encoding_retained(self, tmpdir): + """ + When editing a file, non-ASCII characters encoded in + UTF-8 should be retained. + """ + config = tmpdir.join('setup.cfg') + self.write_text(config, '[names]\njaraco=йарацо') + setopt.edit_config(str(config), dict(names=dict(other='yes'))) + parser = self.parse_config(str(config)) + assert parser['names']['jaraco'] == 'йарацо' + assert parser['names']['other'] == 'yes' -- cgit v1.2.3 From b336e83a63722b3a3e4d3f1779686149d5cef8d1 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 10:49:23 -0400 Subject: Add compatibility for Python 2 --- setuptools/tests/test_setopt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'setuptools') diff --git a/setuptools/tests/test_setopt.py b/setuptools/tests/test_setopt.py index 2241ef73..7c803500 100644 --- a/setuptools/tests/test_setopt.py +++ b/setuptools/tests/test_setopt.py @@ -29,8 +29,8 @@ class TestEdit: UTF-8 should be retained. """ config = tmpdir.join('setup.cfg') - self.write_text(config, '[names]\njaraco=йарацо') + self.write_text(str(config), '[names]\njaraco=йарацо') setopt.edit_config(str(config), dict(names=dict(other='yes'))) parser = self.parse_config(str(config)) - assert parser['names']['jaraco'] == 'йарацо' - assert parser['names']['other'] == 'yes' + assert parser.get('names', 'jaraco') == 'йарацо' + assert parser.get('names', 'other') == 'yes' -- cgit v1.2.3 From 7ed188bcaf38a25fb63fbb1ed3b070428ff95759 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 11:07:02 -0400 Subject: Correct cyrillic to match preferred pronunciation. --- setuptools/tests/test_setopt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'setuptools') diff --git a/setuptools/tests/test_setopt.py b/setuptools/tests/test_setopt.py index 7c803500..3fb04fb4 100644 --- a/setuptools/tests/test_setopt.py +++ b/setuptools/tests/test_setopt.py @@ -29,8 +29,8 @@ class TestEdit: UTF-8 should be retained. """ config = tmpdir.join('setup.cfg') - self.write_text(str(config), '[names]\njaraco=йарацо') + self.write_text(str(config), '[names]\njaraco=джарако') setopt.edit_config(str(config), dict(names=dict(other='yes'))) parser = self.parse_config(str(config)) - assert parser.get('names', 'jaraco') == 'йарацо' + assert parser.get('names', 'jaraco') == 'джарако' assert parser.get('names', 'other') == 'yes' -- cgit v1.2.3 From f36781084f8f870ea747d477bd742057ea022421 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 12:25:03 -0400 Subject: Remove detect_encoding, no longer used. --- setuptools/unicode_utils.py | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'setuptools') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 3b8179a8..7c63efd2 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,6 +1,5 @@ import unicodedata import sys -import re from setuptools.extern import six @@ -43,15 +42,3 @@ def try_encode(string, enc): return string.encode(enc) except UnicodeEncodeError: return None - - -CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') - - -def detect_encoding(fp): - first_line = fp.readline() - fp.seek(0) - m = CODING_RE.match(first_line) - if m is None: - return None - return m.group(1).decode('ascii') -- cgit v1.2.3