aboutsummaryrefslogtreecommitdiffstats
path: root/setuptools/unicode_utils.py
diff options
context:
space:
mode:
authorPhilip Thiem <ptthiem@gmail.com>2014-05-17 04:14:19 -0500
committerPhilip Thiem <ptthiem@gmail.com>2014-05-17 04:14:19 -0500
commit1718b94353733bb79043a7c6d80efeba8bd0c8d1 (patch)
treee78377d38797a17038d7a8f025245c2a1ccd50fc /setuptools/unicode_utils.py
parent60a42fcd2ae461bac7bbe93af92a9d7e0d13b746 (diff)
downloadexternal_python_setuptools-1718b94353733bb79043a7c6d80efeba8bd0c8d1.tar.gz
external_python_setuptools-1718b94353733bb79043a7c6d80efeba8bd0c8d1.tar.bz2
external_python_setuptools-1718b94353733bb79043a7c6d80efeba8bd0c8d1.zip
Starting a unicode_utils module.
--HG-- extra : source : 2e47fa11a272ed61f7c1bbf88aae27e81040fe93
Diffstat (limited to 'setuptools/unicode_utils.py')
-rw-r--r--setuptools/unicode_utils.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py
new file mode 100644
index 00000000..d2de941a
--- /dev/null
+++ b/setuptools/unicode_utils.py
@@ -0,0 +1,41 @@
+import unicodedata
+import sys
+from setuptools.compat import unicode as decoded_string
+
+
+# HFS Plus uses decomposed UTF-8
+def decompose(path):
+ if isinstance(path, decoded_string):
+ return unicodedata.normalize('NFD', path)
+ try:
+ path = path.decode('utf-8')
+ path = unicodedata.normalize('NFD', path)
+ path = path.encode('utf-8')
+ except UnicodeError:
+ pass # Not UTF-8
+ return path
+
+
+def filesys_decode(path):
+ """
+ Ensure that the given path is decoded,
+ NONE when no expected encoding works
+ """
+
+ fs_enc = sys.getfilesystemencoding()
+ if isinstance(path, decoded_string):
+ return path
+
+ for enc in (fs_enc, "utf-8"):
+ try:
+ return path.decode(enc)
+ except UnicodeDecodeError:
+ continue
+
+
+def try_encode(string, enc):
+ "turn unicode encoding into a functional routine"
+ try:
+ return string.encode(enc)
+ except UnicodeEncodeError:
+ return None