diff options
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-x | setuptools/package_index.py | 104 |
1 files changed, 69 insertions, 35 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 1e2a086d..99247ef1 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -1,6 +1,8 @@ """PyPI and direct package downloading""" import sys, os.path, re, shutil, random, socket +import itertools import base64 +from setuptools import ssl_support from pkg_resources import * from distutils import log from distutils.errors import DistutilsError @@ -14,8 +16,8 @@ try: except ImportError: from md5 import md5 from fnmatch import translate - from setuptools.py24compat import wraps +from setuptools.py27compat import get_all_headers EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) @@ -61,6 +63,8 @@ def parse_bdist_wininst(name): def egg_info_for_url(url): scheme, server, path, parameters, query, fragment = urlparse(url) base = unquote(path.split('/')[-1]) + if server=='sourceforge.net' and base=='download': # XXX Yuck + base = unquote(path.split('/')[-2]) if '#' in base: base, fragment = base.split('#',1) return base,fragment @@ -83,14 +87,12 @@ def distros_for_location(location, basename, metadata=None): if basename.endswith('.egg') and '-' in basename: # only one, unambiguous interpretation return [Distribution.from_location(location, basename, metadata)] - if basename.endswith('.exe'): win_base, py_ver, platform = parse_bdist_wininst(basename) if win_base is not None: return interpret_distro_name( location, win_base, metadata, py_ver, BINARY_DIST, platform ) - # Try source distro extensions (.zip, .tgz, etc.) # for ext in EXTENSIONS: @@ -189,17 +191,15 @@ def find_external_links(url, page): if match: yield urljoin(url, htmldecode(match.group(1))) - -user_agent = "Python-urllib/%s distribute/%s" % ( - sys.version[:3], require('distribute')[0].version +user_agent = "Python-urllib/%s setuptools/%s" % ( + sys.version[:3], require('setuptools')[0].version ) - class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" - def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',), - *args, **kw + def __init__(self, index_url="https://pypi.python.org/simple", hosts=('*',), + ca_bundle=None, verify_ssl=True, *args, **kw ): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] @@ -208,8 +208,9 @@ class PackageIndex(Environment): self.package_pages = {} self.allows = re.compile('|'.join(map(translate,hosts))).match self.to_scan = [] - - + if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()): + self.opener = ssl_support.opener_for(ca_bundle) + else: self.opener = urllib2.urlopen def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" @@ -235,10 +236,10 @@ class PackageIndex(Environment): return self.info("Reading %s", url) + self.fetched_urls[url] = True # prevent multiple fetch attempts f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url) if f is None: return - self.fetched_urls[url] = self.fetched_urls[f.url] = True - + self.fetched_urls[f.url] = True if 'html' not in f.headers.get('content-type', '').lower(): f.close() # not html, we can't process it return @@ -488,7 +489,6 @@ class PackageIndex(Environment): set, development and system eggs (i.e., those using the ``.egg-info`` format) will be ignored. """ - # process a Requirement self.info("Searching for %s", requirement) skipped = {} @@ -508,10 +508,9 @@ class PackageIndex(Environment): continue if dist in req and (dist.precedence<=SOURCE_DIST or not source): - self.info("Best match: %s", dist) - return dist.clone( - location=self.download(dist.location, tmpdir) - ) + return dist + + if force_scan: self.prescan() @@ -535,7 +534,10 @@ class PackageIndex(Environment): (source and "a source distribution of " or ""), requirement, ) - return dist + else: + self.info("Best match: %s", dist) + return dist.clone(location=self.download(dist.location, tmpdir)) + def fetch(self, requirement, tmpdir, force_scan=False, source=False): """Obtain a file suitable for fulfilling `requirement` @@ -551,12 +553,6 @@ class PackageIndex(Environment): return None - - - - - - def gen_setup(self, filename, fragment, tmpdir): match = EGG_FRAGMENT.match(fragment) dists = match and [d for d in @@ -618,8 +614,8 @@ class PackageIndex(Environment): size = -1 if "content-length" in headers: # Some servers return multiple Content-Length headers :( - content_length = headers.get("Content-Length") - size = int(content_length) + sizes = get_all_headers(headers, 'Content-Length') + size = max(map(int, sizes)) self.reporthook(url, filename, blocknum, bs, size) tfp = open(filename,'wb') while True: @@ -645,7 +641,7 @@ class PackageIndex(Environment): if url.startswith('file:'): return local_open(url) try: - return open_with_auth(url) + return open_with_auth(url, self.opener) except (ValueError, httplib.InvalidURL): v = sys.exc_info()[1] msg = ' '.join([str(arg) for arg in v.args]) @@ -682,9 +678,8 @@ class PackageIndex(Environment): def _download_url(self, scheme, url, tmpdir): # Determine download filename # - name = [_f for _f in urlparse(url)[2].split('/') if _f] + name, fragment = egg_info_for_url(url) if name: - name = name[-1] while '..' in name: name = name.replace('..','.').replace('\\','_') else: @@ -709,8 +704,6 @@ class PackageIndex(Environment): self.url_ok(url, True) # raises error if not allowed return self._attempt_download(url, filename) - - def scan_url(self, url): self.process_url(url, True) @@ -737,10 +730,39 @@ class PackageIndex(Environment): os.unlink(filename) raise DistutilsError("Unexpected HTML page found at "+url) + + + + + + + + + + + + + + + def _download_svn(self, url, filename): url = url.split('#',1)[0] # remove any fragment for svn's sake + creds = '' + if url.lower().startswith('svn:') and '@' in url: + scheme, netloc, path, p, q, f = urlparse.urlparse(url) + if not netloc and path.startswith('//') and '/' in path[2:]: + netloc, path = path[2:].split('/',1) + auth, host = urllib.splituser(netloc) + if auth: + if ':' in auth: + user, pw = auth.split(':',1) + creds = " --username=%s --password=%s" % (user, pw) + else: + creds = " --username="+auth + netloc = host + url = urlparse.urlunparse((scheme, netloc, url, p, q, f)) self.info("Doing subversion checkout from %s to %s", url, filename) - os.system("svn checkout -q %s %s" % (url, filename)) + os.system("svn checkout%s -q %s %s" % (creds, url, filename)) return filename def _vcs_split_rev_from_url(self, url, pop_prefix=False): @@ -801,6 +823,18 @@ class PackageIndex(Environment): def warn(self, msg, *args): log.warn(msg, *args) + + + + + + + + + + + + # This pattern matches a character entity reference (a decimal numeric # references, a hexadecimal numeric reference, or a named reference). entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub @@ -868,7 +902,7 @@ def _encode_auth(auth): # strip the trailing carriage return return encoded.rstrip() -def open_with_auth(url): +def open_with_auth(url, opener=urllib2.urlopen): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urlparse(url) @@ -892,7 +926,7 @@ def open_with_auth(url): request = urllib2.Request(url) request.add_header('User-Agent', user_agent) - fp = urllib2.urlopen(request) + fp = opener(request) if auth: # Put authentication info back into request URL if same host, |