aboutsummaryrefslogtreecommitdiffstats
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
authorPJ Eby <distutils-sig@python.org>2007-05-31 17:30:55 +0000
committerPJ Eby <distutils-sig@python.org>2007-05-31 17:30:55 +0000
commitb364978eee6eaf2e03999ab0590a16278a03b13e (patch)
tree82cfe1b08ac83e76cb354191573a21beb4060695 /setuptools/package_index.py
parent89111e6143f3a9bb510433f529d4281681b7c66e (diff)
downloadexternal_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.gz
external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.tar.bz2
external_python_setuptools-b364978eee6eaf2e03999ab0590a16278a03b13e.zip
Backport fixes and doc updates; prep for 0.6c6 release
--HG-- branch : setuptools-0.6 extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/branches/setuptools-0.6%4055712
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py48
1 files changed, 44 insertions, 4 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index e4f96f0b..3da253a5 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -132,14 +132,14 @@ def find_external_links(url, page):
rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
- yield urlparse.urljoin(url, match.group(1))
+ yield urlparse.urljoin(url, htmldecode(match.group(1)))
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
if pos!=-1:
match = HREF.search(page,pos)
if match:
- yield urlparse.urljoin(url, match.group(1))
+ yield urlparse.urljoin(url, htmldecode(match.group(1)))
user_agent = "Python-urllib/%s setuptools/%s" % (
urllib2.__version__, require('setuptools')[0].version
@@ -200,7 +200,7 @@ class PackageIndex(Environment):
if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
page = self.process_index(url, page)
for match in HREF.finditer(page):
- link = urlparse.urljoin(base, match.group(1))
+ link = urlparse.urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
def process_filename(self, fn, nested=False):
@@ -262,7 +262,7 @@ class PackageIndex(Environment):
# process an index page into the package-page index
for match in HREF.finditer(page):
- scan( urlparse.urljoin(url, match.group(1)) )
+ scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
pkg, ver = scan(url) # ensure this page is in the page index
if pkg:
@@ -611,6 +611,8 @@ class PackageIndex(Environment):
self.url_ok(url, True) # raises error if not allowed
return self._attempt_download(url, filename)
+
+
def scan_url(self, url):
self.process_url(url, True)
@@ -652,6 +654,44 @@ class PackageIndex(Environment):
def warn(self, msg, *args):
log.warn(msg, *args)
+# This pattern matches a character entity reference (a decimal numeric
+# references, a hexadecimal numeric reference, or a named reference).
+entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
+
+def uchr(c):
+ if not isinstance(c, int):
+ return c
+ if c>255: return unichr(c)
+ return chr(c)
+
+def decode_entity(match):
+ what = match.group(1)
+ if what.startswith('#x'):
+ what = int(what[2:], 16)
+ elif what.startswith('#'):
+ what = int(what[1:])
+ else:
+ from htmlentitydefs import name2codepoint
+ what = name2codepoint.get(what, match.group(0))
+ return uchr(what)
+
+def htmldecode(text):
+ """Decode HTML entities in the given text."""
+ return entity_sub(decode_entity, text)
+
+
+
+
+
+
+
+
+
+
+
+
+
+