From c542669d4d7a6a2ee3a797a14362eef2a69ed089 Mon Sep 17 00:00:00 2001 From: PJ Eby Date: Wed, 27 Sep 2006 01:58:22 +0000 Subject: Allow explicit selection of Sourceforge mirror(s) with ``--sf-mirror``, and further refine download/retry algorithm. (backport from trunk) --HG-- branch : setuptools-0.6 extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/branches/setuptools-0.6%4052013 --- setuptools/package_index.py | 105 ++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 32 deletions(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 78e89daf..ae816609 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -14,7 +14,7 @@ PYPI_MD5 = re.compile( '([^<]+)\n\s+\\(md5\\)' ) - +SF_DOWNLOAD = 'dl.sourceforge.net' URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() @@ -165,7 +165,9 @@ user_agent = "Python-urllib/%s setuptools/%s" % ( class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" - def __init__(self,index_url="http://www.python.org/pypi",hosts=('*',),*args,**kw): + def __init__(self, index_url="http://www.python.org/pypi", hosts=('*',), + sf_mirrors=None, *args, **kw + ): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} @@ -173,6 +175,33 @@ class PackageIndex(Environment): self.package_pages = {} self.allows = re.compile('|'.join(map(translate,hosts))).match self.to_scan = [] + if sf_mirrors: + if isinstance(sf_mirrors,str): + self.sf_mirrors = map(str.strip, sf_mirrors.split(',')) + else: + self.sf_mirrors = map(str.strip, sf_mirrors) + else: + self.sf_mirrors = () + + + def _get_mirrors(self): + mirrors = [] + for mirror in self.sf_mirrors: + if mirror: + if '.' not in mirror: + mirror += '.dl.sourceforge.net' + mirrors.append(mirror) + + if not mirrors: + try: + mirrors.extend( + socket.gethostbyname_ex('sf-mirrors.telecommunity.com')[-1] + ) + except socket.error: + # DNS-bl0ck1n9 f1r3w4llz sUx0rs! + mirrors[:] = [SF_DOWNLOAD] + + return mirrors def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" @@ -202,7 +231,6 @@ class PackageIndex(Environment): f = self.open_url(url) self.fetched_urls[url] = self.fetched_urls[f.url] = True - if 'html' not in f.headers.get('content-type', '').lower(): f.close() # not html, we can't process it return @@ -212,7 +240,6 @@ class PackageIndex(Environment): f.close() if url.startswith(self.index_url) and getattr(f,'code',None)!=404: page = self.process_index(url, page) - for match in HREF.finditer(page): link = urlparse.urljoin(base, match.group(1)) self.process_url(link) @@ -244,6 +271,20 @@ class PackageIndex(Environment): + + + + + + + + + + + + + + def process_index(self,url,page): """Process the contents of a PyPI page""" def scan(link): @@ -581,27 +622,27 @@ class PackageIndex(Environment): def _retry_sf_download(self, url, filename): self.url_ok(url, True) # raises error if not allowed - try: - return self._attempt_download(url, filename) - except (KeyboardInterrupt,SystemExit): - raise - except: - scheme, server, path, param, query, frag = urlparse.urlparse(url) - if server!='dl.sourceforge.net': - raise + scheme, server, path, param, query, frag = urlparse.urlparse(url) + + if server == SF_DOWNLOAD: + mirrors = self._get_mirrors() + query = '' + else: + mirrors = [server] - mirror = get_sf_ip() + while mirrors or server != SF_DOWNLOAD: + mirror = random.choice(mirrors) + url = urlparse.urlunparse((scheme,mirror,path,param,query,frag)) - while _sf_mirrors: - self.warn("Download failed: %s", sys.exc_info()[1]) - url = urlparse.urlunparse((scheme, mirror, path, param, '', frag)) try: return self._attempt_download(url, filename) except (KeyboardInterrupt,SystemExit): raise except: - _sf_mirrors.remove(mirror) # don't retry the same mirror - mirror = get_sf_ip() + if server != SF_DOWNLOAD: + raise + self.warn("Download failed: %s", sys.exc_info()[1]) + mirrors.remove(mirror) raise # fail if no mirror works @@ -692,22 +733,9 @@ def fix_sf_url(url): if server!='prdownloads.sourceforge.net': return url return urlparse.urlunparse( - (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag) + (scheme, SF_DOWNLOAD, 'sourceforge'+path, param, '', frag) ) -_sf_mirrors = [] - -def get_sf_ip(): - if not _sf_mirrors: - try: - _sf_mirrors[:] = socket.gethostbyname_ex( - 'sf-mirrors.telecommunity.com')[-1] - except socket.error: - # DNS-bl0ck1n9 f1r3w4llz sUx0rs! - _sf_mirrors[:] = ['dl.sourceforge.net'] - return random.choice(_sf_mirrors) - - def local_open(url): """Read a local path, with special support for directories""" scheme, server, path, param, query, frag = urlparse.urlparse(url) @@ -735,4 +763,17 @@ def local_open(url): + + + + + + + + + + + + + # this line is a kludge to keep the trailing blank lines for pje's editor -- cgit v1.2.3