aboutsummaryrefslogtreecommitdiffstats
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2013-05-24 11:02:05 -0400
committerJason R. Coombs <jaraco@jaraco.com>2013-05-24 11:02:05 -0400
commit3b9a57a0c80ee11995fbe937e7dbeca3d83ec10a (patch)
treefb099a4c0c184f971478248c43187ababbf86be0 /setuptools/package_index.py
parentb8327d7f646141415beb30acd39ce8840ebc708b (diff)
downloadexternal_python_setuptools-3b9a57a0c80ee11995fbe937e7dbeca3d83ec10a.tar.gz
external_python_setuptools-3b9a57a0c80ee11995fbe937e7dbeca3d83ec10a.tar.bz2
external_python_setuptools-3b9a57a0c80ee11995fbe937e7dbeca3d83ec10a.zip
Use a wrapper to ensure unique values on find_external_links. Factors out uniqueness test into a re-usable decorator and simplifies the body of find_external_links.
--HG-- branch : distribute
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py24
1 files changed, 15 insertions, 9 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index e542f586..0a3f9e05 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -12,6 +12,8 @@ except ImportError:
from md5 import md5
from fnmatch import translate
+from .py24compat import wraps
+
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
# this is here to fix emacs' cruddy broken syntax highlighting
@@ -153,32 +155,36 @@ def unique_everseen(iterable, key=None):
seen_add(k)
yield element
+def unique_values(func):
+ """
+ Wrap a function returning an iterable such that the resulting iterable
+ only ever yields unique items.
+ """
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ return unique_everseen(func(*args, **kwargs))
+ return wrapper
+
REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
# this line is here to fix emacs' cruddy broken syntax highlighting
+@unique_values
def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
- seen_links = set()
for match in REL.finditer(page):
tag, rel = match.groups()
rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
- link = urlparse.urljoin(url, htmldecode(match.group(1)))
- if not link in seen_links:
- seen_links.add(link)
- yield link
+ yield urlparse.urljoin(url, htmldecode(match.group(1)))
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
if pos!=-1:
match = HREF.search(page,pos)
if match:
- link = urlparse.urljoin(url, htmldecode(match.group(1)))
- if not link in seen_links:
- seen_links.add(link)
- yield link
+ yield urlparse.urljoin(url, htmldecode(match.group(1)))
user_agent = "Python-urllib/%s distribute/%s" % (