diff options
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-x | setuptools/package_index.py | 190 |
1 files changed, 161 insertions, 29 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 9a9c5d62..3a6b6fac 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -1,5 +1,6 @@ """PyPI and direct package downloading""" import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO +import base64 import httplib, urllib from pkg_resources import * from distutils import log @@ -24,20 +25,31 @@ __all__ = [ 'interpret_distro_name', ] +_SOCKET_TIMEOUT = 15 + def parse_bdist_wininst(name): """Return (base,pyversion) or (None,None) for possible .exe name""" lower = name.lower() - base, py_ver = None, None + base, py_ver, plat = None, None, None if lower.endswith('.exe'): if lower.endswith('.win32.exe'): base = name[:-10] + plat = 'win32' elif lower.startswith('.win32-py',-16): py_ver = name[-7:-4] base = name[:-16] + plat = 'win32' + elif lower.endswith('.win-amd64.exe'): + base = name[:-14] + plat = 'win-amd64' + elif lower.startswith('.win-amd64-py',-20): + py_ver = name[-7:-4] + base = name[:-20] + plat = 'win-amd64' + return base,py_ver,plat - return base,py_ver def egg_info_for_url(url): scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) @@ -67,10 +79,10 @@ def distros_for_location(location, basename, metadata=None): # only one, unambiguous interpretation return [Distribution.from_location(location, basename, metadata)] if basename.endswith('.exe'): - win_base, py_ver = parse_bdist_wininst(basename) + win_base, py_ver, platform = parse_bdist_wininst(basename) if win_base is not None: return interpret_distro_name( - location, win_base, metadata, py_ver, BINARY_DIST, "win32" + location, win_base, metadata, py_ver, BINARY_DIST, platform ) # Try source distro extensions (.zip, .tgz, etc.) # @@ -142,7 +154,7 @@ def find_external_links(url, page): yield urlparse.urljoin(url, htmldecode(match.group(1))) user_agent = "Python-urllib/%s setuptools/%s" % ( - urllib2.__version__, require('setuptools')[0].version + sys.version[:3], require('setuptools')[0].version ) @@ -187,7 +199,7 @@ class PackageIndex(Environment): self.info("Reading %s", url) self.fetched_urls[url] = True # prevent multiple fetch attempts - f = self.open_url(url, "Download error: %s -- Some packages may not be found!") + f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url) if f is None: return self.fetched_urls[f.url] = True if 'html' not in f.headers.get('content-type', '').lower(): @@ -196,12 +208,19 @@ class PackageIndex(Environment): base = f.url # handle redirects page = f.read() + if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. + if isinstance(f, urllib2.HTTPError): + # Errors have no charset, assume latin1: + charset = 'latin-1' + else: + charset = f.headers.get_param('charset') or 'latin-1' + page = page.decode(charset, "ignore") f.close() - if url.startswith(self.index_url) and getattr(f,'code',None)!=404: - page = self.process_index(url, page) for match in HREF.finditer(page): link = urlparse.urljoin(base, htmldecode(match.group(1))) self.process_url(link) + if url.startswith(self.index_url) and getattr(f,'code',None)!=404: + page = self.process_index(url, page) def process_filename(self, fn, nested=False): # process filenames or directories @@ -237,7 +256,7 @@ class PackageIndex(Environment): self.scan_egg_link(item, entry) def scan_egg_link(self, path, entry): - lines = filter(None, map(str.strip, file(os.path.join(path, entry)))) + lines = filter(None, map(str.strip, open(os.path.join(path, entry)))) if len(lines)==2: for dist in find_distributions(os.path.join(path, lines[0])): dist.location = os.path.join(path, *lines) @@ -262,7 +281,10 @@ class PackageIndex(Environment): # process an index page into the package-page index for match in HREF.finditer(page): - scan( urlparse.urljoin(url, htmldecode(match.group(1))) ) + try: + scan( urlparse.urljoin(url, htmldecode(match.group(1))) ) + except ValueError: + pass pkg, ver = scan(url) # ensure this page is in the page index if pkg: @@ -410,7 +432,7 @@ class PackageIndex(Environment): def fetch_distribution(self, requirement, tmpdir, force_scan=False, source=False, develop_ok=False, - local_index=None, + local_index=None ): """Obtain a distribution suitable for fulfilling `requirement` @@ -433,7 +455,9 @@ class PackageIndex(Environment): skipped = {} dist = None - def find(env, req): + def find(req, env=None): + if env is None: + env = self # Find a matching distribution; may be called more than once for dist in env[req.key]: @@ -452,18 +476,18 @@ class PackageIndex(Environment): if force_scan: self.prescan() self.find_packages(requirement) - dist = find(self, requirement) - + dist = find(requirement) + if local_index is not None: - dist = dist or find(local_index, requirement) + dist = dist or find(requirement, local_index) if dist is None and self.to_scan is not None: self.prescan() - dist = find(self, requirement) + dist = find(requirement) if dist is None and not force_scan: self.find_packages(requirement) - dist = find(self, requirement) + dist = find(requirement) if dist is None: self.warn( @@ -550,6 +574,7 @@ class PackageIndex(Environment): bs = self.dl_blocksize size = -1 if "content-length" in headers: + # Some servers return multiple Content-Length headers :( size = max(map(int,headers.getheaders("Content-Length"))) self.reporthook(url, filename, blocknum, bs, size) tfp = open(filename,'wb') @@ -573,19 +598,37 @@ class PackageIndex(Environment): def open_url(self, url, warning=None): - if url.startswith('file:'): return local_open(url) + if url.startswith('file:'): + return local_open(url) try: return open_with_auth(url) + except (ValueError, httplib.InvalidURL), v: + msg = ' '.join([str(arg) for arg in v.args]) + if warning: + self.warn(warning, msg) + else: + raise DistutilsError('%s %s' % (url, msg)) except urllib2.HTTPError, v: return v except urllib2.URLError, v: - reason = v.reason - except httplib.HTTPException, v: - reason = "%s: %s" % (v.__doc__ or v.__class__.__name__, v) - if warning: - self.warn(warning, reason) - else: - raise DistutilsError("Download error for %s: %s" % (url, reason)) + if warning: + self.warn(warning, v.reason) + else: + raise DistutilsError("Download error for %s: %s" + % (url, v.reason)) + except httplib.BadStatusLine, v: + if warning: + self.warn(warning, v.line) + else: + raise DistutilsError('%s returned a bad status line. ' + 'The server might be down, %s' % \ + (url, v.line)) + except httplib.HTTPException, v: + if warning: + self.warn(warning, v) + else: + raise DistutilsError("Download error for %s: %s" + % (url, v)) def _download_url(self, scheme, url, tmpdir): # Determine download filename @@ -606,8 +649,12 @@ class PackageIndex(Environment): # if scheme=='svn' or scheme.startswith('svn+'): return self._download_svn(url, filename) + elif scheme=='git' or scheme.startswith('git+'): + return self._download_git(url, filename) + elif scheme.startswith('hg+'): + return self._download_hg(url, filename) elif scheme=='file': - return urllib2.url2pathname(urlparse.urlparse(url)[2]) + return urllib.url2pathname(urlparse.urlparse(url)[2]) else: self.url_ok(url, True) # raises error if not allowed return self._attempt_download(url, filename) @@ -674,6 +721,55 @@ class PackageIndex(Environment): os.system("svn checkout%s -q %s %s" % (creds, url, filename)) return filename + def _vcs_split_rev_from_url(self, url, pop_prefix=False): + scheme, netloc, path, query, frag = urlparse.urlsplit(url) + + scheme = scheme.split('+', 1)[-1] + + # Some fragment identification fails + path = path.split('#',1)[0] + + rev = None + if '@' in path: + path, rev = path.rsplit('@', 1) + + # Also, discard fragment + url = urlparse.urlunsplit((scheme, netloc, path, query, '')) + + return url, rev + + def _download_git(self, url, filename): + filename = filename.split('#',1)[0] + url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + + self.info("Doing git clone from %s to %s", url, filename) + os.system("git clone --quiet %s %s" % (url, filename)) + + if rev is not None: + self.info("Checking out %s", rev) + os.system("(cd %s && git checkout --quiet %s)" % ( + filename, + rev, + )) + + return filename + + def _download_hg(self, url, filename): + filename = filename.split('#',1)[0] + url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + + self.info("Doing hg clone from %s to %s", url, filename) + os.system("hg clone --quiet %s %s" % (url, filename)) + + if rev is not None: + self.info("Updating to %s", rev) + os.system("(cd %s && hg up -C -r %s >&-)" % ( + filename, + rev, + )) + + return filename + def debug(self, msg, *args): log.debug(msg, *args) @@ -734,20 +830,52 @@ def htmldecode(text): +def socket_timeout(timeout=15): + def _socket_timeout(func): + def _socket_timeout(*args, **kwargs): + old_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + try: + return func(*args, **kwargs) + finally: + socket.setdefaulttimeout(old_timeout) + return _socket_timeout + return _socket_timeout +def _encode_auth(auth): + """ + A function compatible with Python 2.3-3.3 that will encode + auth from a URL suitable for an HTTP header. + >>> _encode_auth('username%3Apassword') + u'dXNlcm5hbWU6cGFzc3dvcmQ=' + """ + auth_s = urllib2.unquote(auth) + # convert to bytes + auth_bytes = auth_s.encode() + # use the legacy interface for Python 2.3 support + encoded_bytes = base64.encodestring(auth_bytes) + # convert back to a string + encoded = encoded_bytes.decode() + # strip the trailing carriage return + return encoded.rstrip() def open_with_auth(url): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urlparse.urlparse(url) + # Double scheme does not raise on Mac OS X as revealed by a + # failing test. We would expect "nonnumeric port". Refs #20. + if netloc.endswith(':'): + raise httplib.InvalidURL("nonnumeric port: ''") + if scheme in ('http', 'https'): auth, host = urllib.splituser(netloc) else: auth = None if auth: - auth = "Basic " + urllib2.unquote(auth).encode('base64').strip() + auth = "Basic " + _encode_auth(auth) new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) request = urllib2.Request(new_url) request.add_header("Authorization", auth) @@ -766,6 +894,8 @@ def open_with_auth(url): return fp +# adding a timeout to avoid freezing package_index +open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) @@ -783,14 +913,16 @@ def fix_sf_url(url): def local_open(url): """Read a local path, with special support for directories""" scheme, server, path, param, query, frag = urlparse.urlparse(url) - filename = urllib2.url2pathname(path) + filename = urllib.url2pathname(path) if os.path.isfile(filename): return urllib2.urlopen(url) elif path.endswith('/') and os.path.isdir(filename): files = [] for f in os.listdir(filename): if f=='index.html': - body = open(os.path.join(filename,f),'rb').read() + fp = open(os.path.join(filename,f),'rb') + body = fp.read() + fp.close() break elif os.path.isdir(os.path.join(filename,f)): f+='/' |