aboutsummaryrefslogtreecommitdiffstats
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py190
1 files changed, 161 insertions, 29 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 9a9c5d62..3a6b6fac 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -1,5 +1,6 @@
"""PyPI and direct package downloading"""
import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO
+import base64
import httplib, urllib
from pkg_resources import *
from distutils import log
@@ -24,20 +25,31 @@ __all__ = [
'interpret_distro_name',
]
+_SOCKET_TIMEOUT = 15
+
def parse_bdist_wininst(name):
"""Return (base,pyversion) or (None,None) for possible .exe name"""
lower = name.lower()
- base, py_ver = None, None
+ base, py_ver, plat = None, None, None
if lower.endswith('.exe'):
if lower.endswith('.win32.exe'):
base = name[:-10]
+ plat = 'win32'
elif lower.startswith('.win32-py',-16):
py_ver = name[-7:-4]
base = name[:-16]
+ plat = 'win32'
+ elif lower.endswith('.win-amd64.exe'):
+ base = name[:-14]
+ plat = 'win-amd64'
+ elif lower.startswith('.win-amd64-py',-20):
+ py_ver = name[-7:-4]
+ base = name[:-20]
+ plat = 'win-amd64'
+ return base,py_ver,plat
- return base,py_ver
def egg_info_for_url(url):
scheme, server, path, parameters, query, fragment = urlparse.urlparse(url)
@@ -67,10 +79,10 @@ def distros_for_location(location, basename, metadata=None):
# only one, unambiguous interpretation
return [Distribution.from_location(location, basename, metadata)]
if basename.endswith('.exe'):
- win_base, py_ver = parse_bdist_wininst(basename)
+ win_base, py_ver, platform = parse_bdist_wininst(basename)
if win_base is not None:
return interpret_distro_name(
- location, win_base, metadata, py_ver, BINARY_DIST, "win32"
+ location, win_base, metadata, py_ver, BINARY_DIST, platform
)
# Try source distro extensions (.zip, .tgz, etc.)
#
@@ -142,7 +154,7 @@ def find_external_links(url, page):
yield urlparse.urljoin(url, htmldecode(match.group(1)))
user_agent = "Python-urllib/%s setuptools/%s" % (
- urllib2.__version__, require('setuptools')[0].version
+ sys.version[:3], require('setuptools')[0].version
)
@@ -187,7 +199,7 @@ class PackageIndex(Environment):
self.info("Reading %s", url)
self.fetched_urls[url] = True # prevent multiple fetch attempts
- f = self.open_url(url, "Download error: %s -- Some packages may not be found!")
+ f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
if f is None: return
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
@@ -196,12 +208,19 @@ class PackageIndex(Environment):
base = f.url # handle redirects
page = f.read()
+ if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
+ if isinstance(f, urllib2.HTTPError):
+ # Errors have no charset, assume latin1:
+ charset = 'latin-1'
+ else:
+ charset = f.headers.get_param('charset') or 'latin-1'
+ page = page.decode(charset, "ignore")
f.close()
- if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
- page = self.process_index(url, page)
for match in HREF.finditer(page):
link = urlparse.urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
+ if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
+ page = self.process_index(url, page)
def process_filename(self, fn, nested=False):
# process filenames or directories
@@ -237,7 +256,7 @@ class PackageIndex(Environment):
self.scan_egg_link(item, entry)
def scan_egg_link(self, path, entry):
- lines = filter(None, map(str.strip, file(os.path.join(path, entry))))
+ lines = filter(None, map(str.strip, open(os.path.join(path, entry))))
if len(lines)==2:
for dist in find_distributions(os.path.join(path, lines[0])):
dist.location = os.path.join(path, *lines)
@@ -262,7 +281,10 @@ class PackageIndex(Environment):
# process an index page into the package-page index
for match in HREF.finditer(page):
- scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
+ try:
+ scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
+ except ValueError:
+ pass
pkg, ver = scan(url) # ensure this page is in the page index
if pkg:
@@ -410,7 +432,7 @@ class PackageIndex(Environment):
def fetch_distribution(self,
requirement, tmpdir, force_scan=False, source=False, develop_ok=False,
- local_index=None,
+ local_index=None
):
"""Obtain a distribution suitable for fulfilling `requirement`
@@ -433,7 +455,9 @@ class PackageIndex(Environment):
skipped = {}
dist = None
- def find(env, req):
+ def find(req, env=None):
+ if env is None:
+ env = self
# Find a matching distribution; may be called more than once
for dist in env[req.key]:
@@ -452,18 +476,18 @@ class PackageIndex(Environment):
if force_scan:
self.prescan()
self.find_packages(requirement)
- dist = find(self, requirement)
-
+ dist = find(requirement)
+
if local_index is not None:
- dist = dist or find(local_index, requirement)
+ dist = dist or find(requirement, local_index)
if dist is None and self.to_scan is not None:
self.prescan()
- dist = find(self, requirement)
+ dist = find(requirement)
if dist is None and not force_scan:
self.find_packages(requirement)
- dist = find(self, requirement)
+ dist = find(requirement)
if dist is None:
self.warn(
@@ -550,6 +574,7 @@ class PackageIndex(Environment):
bs = self.dl_blocksize
size = -1
if "content-length" in headers:
+ # Some servers return multiple Content-Length headers :(
size = max(map(int,headers.getheaders("Content-Length")))
self.reporthook(url, filename, blocknum, bs, size)
tfp = open(filename,'wb')
@@ -573,19 +598,37 @@ class PackageIndex(Environment):
def open_url(self, url, warning=None):
- if url.startswith('file:'): return local_open(url)
+ if url.startswith('file:'):
+ return local_open(url)
try:
return open_with_auth(url)
+ except (ValueError, httplib.InvalidURL), v:
+ msg = ' '.join([str(arg) for arg in v.args])
+ if warning:
+ self.warn(warning, msg)
+ else:
+ raise DistutilsError('%s %s' % (url, msg))
except urllib2.HTTPError, v:
return v
except urllib2.URLError, v:
- reason = v.reason
- except httplib.HTTPException, v:
- reason = "%s: %s" % (v.__doc__ or v.__class__.__name__, v)
- if warning:
- self.warn(warning, reason)
- else:
- raise DistutilsError("Download error for %s: %s" % (url, reason))
+ if warning:
+ self.warn(warning, v.reason)
+ else:
+ raise DistutilsError("Download error for %s: %s"
+ % (url, v.reason))
+ except httplib.BadStatusLine, v:
+ if warning:
+ self.warn(warning, v.line)
+ else:
+ raise DistutilsError('%s returned a bad status line. '
+ 'The server might be down, %s' % \
+ (url, v.line))
+ except httplib.HTTPException, v:
+ if warning:
+ self.warn(warning, v)
+ else:
+ raise DistutilsError("Download error for %s: %s"
+ % (url, v))
def _download_url(self, scheme, url, tmpdir):
# Determine download filename
@@ -606,8 +649,12 @@ class PackageIndex(Environment):
#
if scheme=='svn' or scheme.startswith('svn+'):
return self._download_svn(url, filename)
+ elif scheme=='git' or scheme.startswith('git+'):
+ return self._download_git(url, filename)
+ elif scheme.startswith('hg+'):
+ return self._download_hg(url, filename)
elif scheme=='file':
- return urllib2.url2pathname(urlparse.urlparse(url)[2])
+ return urllib.url2pathname(urlparse.urlparse(url)[2])
else:
self.url_ok(url, True) # raises error if not allowed
return self._attempt_download(url, filename)
@@ -674,6 +721,55 @@ class PackageIndex(Environment):
os.system("svn checkout%s -q %s %s" % (creds, url, filename))
return filename
+ def _vcs_split_rev_from_url(self, url, pop_prefix=False):
+ scheme, netloc, path, query, frag = urlparse.urlsplit(url)
+
+ scheme = scheme.split('+', 1)[-1]
+
+ # Some fragment identification fails
+ path = path.split('#',1)[0]
+
+ rev = None
+ if '@' in path:
+ path, rev = path.rsplit('@', 1)
+
+ # Also, discard fragment
+ url = urlparse.urlunsplit((scheme, netloc, path, query, ''))
+
+ return url, rev
+
+ def _download_git(self, url, filename):
+ filename = filename.split('#',1)[0]
+ url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+ self.info("Doing git clone from %s to %s", url, filename)
+ os.system("git clone --quiet %s %s" % (url, filename))
+
+ if rev is not None:
+ self.info("Checking out %s", rev)
+ os.system("(cd %s && git checkout --quiet %s)" % (
+ filename,
+ rev,
+ ))
+
+ return filename
+
+ def _download_hg(self, url, filename):
+ filename = filename.split('#',1)[0]
+ url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+ self.info("Doing hg clone from %s to %s", url, filename)
+ os.system("hg clone --quiet %s %s" % (url, filename))
+
+ if rev is not None:
+ self.info("Updating to %s", rev)
+ os.system("(cd %s && hg up -C -r %s >&-)" % (
+ filename,
+ rev,
+ ))
+
+ return filename
+
def debug(self, msg, *args):
log.debug(msg, *args)
@@ -734,20 +830,52 @@ def htmldecode(text):
+def socket_timeout(timeout=15):
+ def _socket_timeout(func):
+ def _socket_timeout(*args, **kwargs):
+ old_timeout = socket.getdefaulttimeout()
+ socket.setdefaulttimeout(timeout)
+ try:
+ return func(*args, **kwargs)
+ finally:
+ socket.setdefaulttimeout(old_timeout)
+ return _socket_timeout
+ return _socket_timeout
+def _encode_auth(auth):
+ """
+ A function compatible with Python 2.3-3.3 that will encode
+ auth from a URL suitable for an HTTP header.
+ >>> _encode_auth('username%3Apassword')
+ u'dXNlcm5hbWU6cGFzc3dvcmQ='
+ """
+ auth_s = urllib2.unquote(auth)
+ # convert to bytes
+ auth_bytes = auth_s.encode()
+ # use the legacy interface for Python 2.3 support
+ encoded_bytes = base64.encodestring(auth_bytes)
+ # convert back to a string
+ encoded = encoded_bytes.decode()
+ # strip the trailing carriage return
+ return encoded.rstrip()
def open_with_auth(url):
"""Open a urllib2 request, handling HTTP authentication"""
scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
+ # Double scheme does not raise on Mac OS X as revealed by a
+ # failing test. We would expect "nonnumeric port". Refs #20.
+ if netloc.endswith(':'):
+ raise httplib.InvalidURL("nonnumeric port: ''")
+
if scheme in ('http', 'https'):
auth, host = urllib.splituser(netloc)
else:
auth = None
if auth:
- auth = "Basic " + urllib2.unquote(auth).encode('base64').strip()
+ auth = "Basic " + _encode_auth(auth)
new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
request = urllib2.Request(new_url)
request.add_header("Authorization", auth)
@@ -766,6 +894,8 @@ def open_with_auth(url):
return fp
+# adding a timeout to avoid freezing package_index
+open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
@@ -783,14 +913,16 @@ def fix_sf_url(url):
def local_open(url):
"""Read a local path, with special support for directories"""
scheme, server, path, param, query, frag = urlparse.urlparse(url)
- filename = urllib2.url2pathname(path)
+ filename = urllib.url2pathname(path)
if os.path.isfile(filename):
return urllib2.urlopen(url)
elif path.endswith('/') and os.path.isdir(filename):
files = []
for f in os.listdir(filename):
if f=='index.html':
- body = open(os.path.join(filename,f),'rb').read()
+ fp = open(os.path.join(filename,f),'rb')
+ body = fp.read()
+ fp.close()
break
elif os.path.isdir(os.path.join(filename,f)):
f+='/'