diff options
author | PJ Eby <distutils-sig@python.org> | 2006-01-17 19:56:59 +0000 |
---|---|---|
committer | PJ Eby <distutils-sig@python.org> | 2006-01-17 19:56:59 +0000 |
commit | 48bda8b1882e8d785af53081a73b9f5f3ee2d992 (patch) | |
tree | 9e406f78dfe64bbc9a3aff49ffc11ad400dfa020 /setuptools/package_index.py | |
parent | 5cda1e9eb7e5f5f71ba66791846c899cb0a17649 (diff) | |
download | external_python_setuptools-48bda8b1882e8d785af53081a73b9f5f3ee2d992.tar.gz external_python_setuptools-48bda8b1882e8d785af53081a73b9f5f3ee2d992.tar.bz2 external_python_setuptools-48bda8b1882e8d785af53081a73b9f5f3ee2d992.zip |
Scrape-proof Sourceforge mirror processing!
--HG--
branch : setuptools
extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4042088
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-x | setuptools/package_index.py | 100 |
1 files changed, 60 insertions, 40 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 35cd04a6..c48968f1 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -134,9 +134,9 @@ class PackageIndex(Environment): def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" + url = fix_sf_url(url) if url in self.scanned_urls and not retrieve: return - self.scanned_urls[url] = True if not URL_SCHEME(url): # process filenames or directories @@ -296,6 +296,36 @@ class PackageIndex(Environment): "; possible download problem?" ) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + def download(self, spec, tmpdir): """Locate and/or download `spec` to `tmpdir`, returning a local path @@ -502,8 +532,6 @@ class PackageIndex(Environment): def _download_html(self, url, headers, filename, tmpdir): - # Check for a sourceforge URL - sf_url = url.startswith('http://prdownloads.') file = open(filename) for line in file: if line.strip(): @@ -513,13 +541,6 @@ class PackageIndex(Environment): file.close() os.unlink(filename) return self._download_svn(url, filename) - # Check for a SourceForge header - elif sf_url: - page = ''.join(list(file)) - if '?use_mirror=' in page: - file.close() - os.unlink(filename) - return self._download_sourceforge(url, page, tmpdir) break # not an index page file.close() os.unlink(filename) @@ -541,43 +562,42 @@ class PackageIndex(Environment): log.warn(msg, *args) +def fix_sf_url(url): + scheme, server, path, param, query, frag = urlparse.urlparse(url) + if server!='prdownloads.sourceforge.net': + return url + return urlparse.urlunparse( + (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag) + ) + + + + + + + + + + + + + + + + + + + + + + - def _download_sourceforge(self, source_url, sf_page, tmpdir): - """Download package from randomly-selected SourceForge mirror""" - self.debug("Processing SourceForge mirror page") - mirror_regex = re.compile(r'HREF="?(/.*?\?use_mirror=[^">]*)', re.I) - urls = [m.group(1) for m in mirror_regex.finditer(sf_page)] - if not urls: - raise DistutilsError( - "URL looks like a Sourceforge mirror page, but no URLs found" - ) - import random - url = urlparse.urljoin(source_url, random.choice(urls)) - self.info( - "Requesting redirect to (randomly selected) %r mirror", - url.split('=',1)[-1] - ) - f = self.open_url(url) - match = re.search( - r'(?i)<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"', - f.read() - ) - f.close() - if match: - download_url = match.group(1) - scheme = URL_SCHEME(download_url) - return self._download_url(scheme.group(1), download_url, tmpdir) - else: - raise DistutilsError( - 'No META HTTP-EQUIV="refresh" found in Sourceforge page at %s' - % url - ) |