Backport fixes and doc updates; prep for 0.6c6 release

--HG-- branch : setuptools-0.6 extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/branches/setuptools-0.6%4055712
author: PJ Eby <distutils-sig@python.org> 2007-05-31 17:30:55 +0000
committer: PJ Eby <distutils-sig@python.org> 2007-05-31 17:30:55 +0000
commit: b364978eee6eaf2e03999ab0590a16278a03b13e (patch)
tree: 82cfe1b08ac83e76cb354191573a21beb4060695 /setuptools/package_index.py
parent: 89111e6143f3a9bb510433f529d4281681b7c66e (diff)
download: external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.gz
external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.bz2
external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.zip
1 files changed, 44 insertions, 4 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index e4f96f0b..3da253a5 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -132,14 +132,14 @@ def find_external_links(url, page):
         rels = map(str.strip, rel.lower().split(','))
         if 'homepage' in rels or 'download' in rels:
             for match in HREF.finditer(tag):
-                yield urlparse.urljoin(url, match.group(1))
+                yield urlparse.urljoin(url, htmldecode(match.group(1)))
 
     for tag in ("<th>Home Page", "<th>Download URL"):
         pos = page.find(tag)
         if pos!=-1:
             match = HREF.search(page,pos)
             if match:
-                yield urlparse.urljoin(url, match.group(1))
+                yield urlparse.urljoin(url, htmldecode(match.group(1)))
 
 user_agent = "Python-urllib/%s setuptools/%s" % (
     urllib2.__version__, require('setuptools')[0].version
@@ -200,7 +200,7 @@ class PackageIndex(Environment):
         if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
             page = self.process_index(url, page)
         for match in HREF.finditer(page):
-            link = urlparse.urljoin(base, match.group(1))
+            link = urlparse.urljoin(base, htmldecode(match.group(1)))
             self.process_url(link)
 
     def process_filename(self, fn, nested=False):
@@ -262,7 +262,7 @@ class PackageIndex(Environment):
 
         # process an index page into the package-page index
         for match in HREF.finditer(page):
-            scan( urlparse.urljoin(url, match.group(1)) )
+            scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
 
         pkg, ver = scan(url)   # ensure this page is in the page index
         if pkg:
@@ -611,6 +611,8 @@ class PackageIndex(Environment):
             self.url_ok(url, True)   # raises error if not allowed
             return self._attempt_download(url, filename)
 
+
+
     def scan_url(self, url):
         self.process_url(url, True)
 
@@ -652,6 +654,44 @@ class PackageIndex(Environment):
     def warn(self, msg, *args):
         log.warn(msg, *args)
 
+# This pattern matches a character entity reference (a decimal numeric
+# references, a hexadecimal numeric reference, or a named reference).
+entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
+
+def uchr(c):
+    if not isinstance(c, int):
+        return c
+    if c>255: return unichr(c)
+    return chr(c)
+
+def decode_entity(match):
+    what = match.group(1)
+    if what.startswith('#x'):
+        what = int(what[2:], 16)
+    elif what.startswith('#'):
+        what = int(what[1:])
+    else:
+        from htmlentitydefs import name2codepoint
+        what = name2codepoint.get(what, match.group(0))
+    return uchr(what)
+
+def htmldecode(text):
+    """Decode HTML entities in the given text."""
+    return entity_sub(decode_entity, text)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
author	PJ Eby <distutils-sig@python.org>	2007-05-31 17:30:55 +0000
committer	PJ Eby <distutils-sig@python.org>	2007-05-31 17:30:55 +0000
commit	b364978eee6eaf2e03999ab0590a16278a03b13e (patch)
tree	82cfe1b08ac83e76cb354191573a21beb4060695 /setuptools/package_index.py
parent	89111e6143f3a9bb510433f529d4281681b7c66e (diff)
download	external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.gz external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.bz2 external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.zip