aboutsummaryrefslogtreecommitdiffstats
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
authorDirley Rodrigues <dirleyrls@gmail.com>2013-02-04 11:30:58 -0200
committerDirley Rodrigues <dirleyrls@gmail.com>2013-02-04 11:30:58 -0200
commite3207bd63bcf365a1f91b7c3e75a4b3354435501 (patch)
tree42942c5ca5bfca15527d3bc55ea0d1d24f10fbec /setuptools/package_index.py
parentcf2a28328628a15a95ec354f8c3a4421d3652e31 (diff)
downloadexternal_python_setuptools-e3207bd63bcf365a1f91b7c3e75a4b3354435501.tar.gz
external_python_setuptools-e3207bd63bcf365a1f91b7c3e75a4b3354435501.tar.bz2
external_python_setuptools-e3207bd63bcf365a1f91b7c3e75a4b3354435501.zip
Improve external links finder to not yield duplicate links.
--HG-- branch : distribute extra : rebase_source : 78e932fca32ee0ee1f50794cf998f4e7db78131b
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 0ee21e3b..4393c83a 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -139,20 +139,26 @@ REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
+ seen = set()
for match in REL.finditer(page):
tag, rel = match.groups()
rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
- yield urlparse.urljoin(url, htmldecode(match.group(1)))
+ url = urlparse.urljoin(url, htmldecode(match.group(1)))
+ if not url in seen:
+ yield url
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
if pos!=-1:
match = HREF.search(page,pos)
if match:
- yield urlparse.urljoin(url, htmldecode(match.group(1)))
+ url = urlparse.urljoin(url, htmldecode(match.group(1)))
+ if not url in seen:
+ yield url
+
user_agent = "Python-urllib/%s distribute/%s" % (
sys.version[:3], require('distribute')[0].version