1 files changed, 161 insertions, 29 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 9a9c5d62..3a6b6fac 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -1,5 +1,6 @@
 """PyPI and direct package downloading"""
 import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO
+import base64
 import httplib, urllib
 from pkg_resources import *
 from distutils import log
@@ -24,20 +25,31 @@ __all__ = [
     'interpret_distro_name',
 ]
 
+_SOCKET_TIMEOUT = 15
+
 def parse_bdist_wininst(name):
     """Return (base,pyversion) or (None,None) for possible .exe name"""
 
     lower = name.lower()
-    base, py_ver = None, None
+    base, py_ver, plat = None, None, None
 
     if lower.endswith('.exe'):
         if lower.endswith('.win32.exe'):
             base = name[:-10]
+            plat = 'win32'
         elif lower.startswith('.win32-py',-16):
             py_ver = name[-7:-4]
             base = name[:-16]
+            plat = 'win32'
+        elif lower.endswith('.win-amd64.exe'):
+            base = name[:-14]
+            plat = 'win-amd64'
+        elif lower.startswith('.win-amd64-py',-20):
+            py_ver = name[-7:-4]
+            base = name[:-20]
+            plat = 'win-amd64'
+    return base,py_ver,plat
 
-    return base,py_ver
 
 def egg_info_for_url(url):
     scheme, server, path, parameters, query, fragment = urlparse.urlparse(url)
@@ -67,10 +79,10 @@ def distros_for_location(location, basename, metadata=None):
         # only one, unambiguous interpretation
         return [Distribution.from_location(location, basename, metadata)]
     if basename.endswith('.exe'):
-        win_base, py_ver = parse_bdist_wininst(basename)
+        win_base, py_ver, platform = parse_bdist_wininst(basename)
         if win_base is not None:
             return interpret_distro_name(
-                location, win_base, metadata, py_ver, BINARY_DIST, "win32"
+                location, win_base, metadata, py_ver, BINARY_DIST, platform
             )
     # Try source distro extensions (.zip, .tgz, etc.)
     #
@@ -142,7 +154,7 @@ def find_external_links(url, page):
                 yield urlparse.urljoin(url, htmldecode(match.group(1)))
 
 user_agent = "Python-urllib/%s setuptools/%s" % (
-    urllib2.__version__, require('setuptools')[0].version
+    sys.version[:3], require('setuptools')[0].version
 )
 
 
@@ -187,7 +199,7 @@ class PackageIndex(Environment):
 
         self.info("Reading %s", url)
         self.fetched_urls[url] = True   # prevent multiple fetch attempts
-        f = self.open_url(url, "Download error: %s -- Some packages may not be found!")
+        f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
         if f is None: return
         self.fetched_urls[f.url] = True
         if 'html' not in f.headers.get('content-type', '').lower():
@@ -196,12 +208,19 @@ class PackageIndex(Environment):
 
         base = f.url     # handle redirects
         page = f.read()
+        if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
+            if isinstance(f, urllib2.HTTPError):
+                # Errors have no charset, assume latin1:
+                charset = 'latin-1'
+            else:
+                charset = f.headers.get_param('charset') or 'latin-1'
+            page = page.decode(charset, "ignore")
         f.close()
-        if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
-            page = self.process_index(url, page)
         for match in HREF.finditer(page):
             link = urlparse.urljoin(base, htmldecode(match.group(1)))
             self.process_url(link)
+        if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
+            page = self.process_index(url, page)
 
     def process_filename(self, fn, nested=False):
         # process filenames or directories
@@ -237,7 +256,7 @@ class PackageIndex(Environment):
                         self.scan_egg_link(item, entry)
 
     def scan_egg_link(self, path, entry):
-        lines = filter(None, map(str.strip, file(os.path.join(path, entry))))
+        lines = filter(None, map(str.strip, open(os.path.join(path, entry))))
         if len(lines)==2:
             for dist in find_distributions(os.path.join(path, lines[0])):
                 dist.location = os.path.join(path, *lines)
@@ -262,7 +281,10 @@ class PackageIndex(Environment):
 
         # process an index page into the package-page index
         for match in HREF.finditer(page):
-            scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
+            try:
+                scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
+            except ValueError:
+                pass
 
         pkg, ver = scan(url)   # ensure this page is in the page index
         if pkg:
@@ -410,7 +432,7 @@ class PackageIndex(Environment):
 
     def fetch_distribution(self,
         requirement, tmpdir, force_scan=False, source=False, develop_ok=False,
-        local_index=None, 
+        local_index=None
     ):
         """Obtain a distribution suitable for fulfilling `requirement`
 
@@ -433,7 +455,9 @@ class PackageIndex(Environment):
         skipped = {}
         dist = None
 
-        def find(env, req):
+        def find(req, env=None):
+            if env is None:
+                env = self
             # Find a matching distribution; may be called more than once
 
             for dist in env[req.key]:
@@ -452,18 +476,18 @@ class PackageIndex(Environment):
         if force_scan:
             self.prescan()
             self.find_packages(requirement)
-            dist = find(self, requirement)
-            
+            dist = find(requirement)
+
         if local_index is not None:
-            dist = dist or find(local_index, requirement)
+            dist = dist or find(requirement, local_index)
 
         if dist is None and self.to_scan is not None:
             self.prescan()
-            dist = find(self, requirement)
+            dist = find(requirement)
 
         if dist is None and not force_scan:
             self.find_packages(requirement)
-            dist = find(self, requirement)
+            dist = find(requirement)
 
         if dist is None:
             self.warn(
@@ -550,6 +574,7 @@ class PackageIndex(Environment):
             bs = self.dl_blocksize
             size = -1
             if "content-length" in headers:
+                # Some servers return multiple Content-Length headers :(
                 size = max(map(int,headers.getheaders("Content-Length")))
                 self.reporthook(url, filename, blocknum, bs, size)
             tfp = open(filename,'wb')
@@ -573,19 +598,37 @@ class PackageIndex(Environment):
 
 
     def open_url(self, url, warning=None):
-        if url.startswith('file:'): return local_open(url)
+        if url.startswith('file:'):
+            return local_open(url)
         try:
             return open_with_auth(url)
+        except (ValueError, httplib.InvalidURL), v:
+            msg = ' '.join([str(arg) for arg in v.args])
+            if warning:
+                self.warn(warning, msg)
+            else:
+                raise DistutilsError('%s %s' % (url, msg))
         except urllib2.HTTPError, v:
             return v
         except urllib2.URLError, v:
-            reason = v.reason
-        except httplib.HTTPException, v: 
-            reason = "%s: %s" % (v.__doc__ or v.__class__.__name__, v)
-        if warning:
-            self.warn(warning, reason)
-        else:
-            raise DistutilsError("Download error for %s: %s" % (url, reason))
+            if warning:
+                self.warn(warning, v.reason)
+            else:
+                raise DistutilsError("Download error for %s: %s"
+                                     % (url, v.reason))
+        except httplib.BadStatusLine, v:
+            if warning:
+                self.warn(warning, v.line)
+            else:
+                raise DistutilsError('%s returned a bad status line. '
+                                     'The server might be down, %s' % \
+                                             (url, v.line))
+        except httplib.HTTPException, v:
+            if warning:
+                self.warn(warning, v)
+            else:
+                raise DistutilsError("Download error for %s: %s"
+                                     % (url, v))
 
     def _download_url(self, scheme, url, tmpdir):
         # Determine download filename
@@ -606,8 +649,12 @@ class PackageIndex(Environment):
         #
         if scheme=='svn' or scheme.startswith('svn+'):
             return self._download_svn(url, filename)
+        elif scheme=='git' or scheme.startswith('git+'):
+            return self._download_git(url, filename)
+        elif scheme.startswith('hg+'):
+            return self._download_hg(url, filename)
         elif scheme=='file':
-            return urllib2.url2pathname(urlparse.urlparse(url)[2])
+            return urllib.url2pathname(urlparse.urlparse(url)[2])
         else:
             self.url_ok(url, True)   # raises error if not allowed
             return self._attempt_download(url, filename)
@@ -674,6 +721,55 @@ class PackageIndex(Environment):
         os.system("svn checkout%s -q %s %s" % (creds, url, filename))
         return filename
 
+    def _vcs_split_rev_from_url(self, url, pop_prefix=False):
+        scheme, netloc, path, query, frag = urlparse.urlsplit(url)
+
+        scheme = scheme.split('+', 1)[-1]
+
+        # Some fragment identification fails
+        path = path.split('#',1)[0]
+
+        rev = None
+        if '@' in path:
+            path, rev = path.rsplit('@', 1)
+
+        # Also, discard fragment
+        url = urlparse.urlunsplit((scheme, netloc, path, query, ''))
+
+        return url, rev
+
+    def _download_git(self, url, filename):
+        filename = filename.split('#',1)[0]
+        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+        self.info("Doing git clone from %s to %s", url, filename)
+        os.system("git clone --quiet %s %s" % (url, filename))
+
+        if rev is not None:
+            self.info("Checking out %s", rev)
+            os.system("(cd %s && git checkout --quiet %s)" % (
+                filename,
+                rev,
+            ))
+
+        return filename
+
+    def _download_hg(self, url, filename):
+        filename = filename.split('#',1)[0]
+        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+        self.info("Doing hg clone from %s to %s", url, filename)
+        os.system("hg clone --quiet %s %s" % (url, filename))
+
+        if rev is not None:
+            self.info("Updating to %s", rev)
+            os.system("(cd %s && hg up -C -r %s >&-)" % (
+                filename,
+                rev,
+            ))
+
+        return filename
+
     def debug(self, msg, *args):
         log.debug(msg, *args)
 
@@ -734,20 +830,52 @@ def htmldecode(text):
 
 
 
+def socket_timeout(timeout=15):
+    def _socket_timeout(func):
+        def _socket_timeout(*args, **kwargs):
+            old_timeout = socket.getdefaulttimeout()
+            socket.setdefaulttimeout(timeout)
+            try:
+                return func(*args, **kwargs)
+            finally:
+                socket.setdefaulttimeout(old_timeout)
+        return _socket_timeout
+    return _socket_timeout
 
+def _encode_auth(auth):
+    """
+    A function compatible with Python 2.3-3.3 that will encode
+    auth from a URL suitable for an HTTP header.
+    >>> _encode_auth('username%3Apassword')
+    u'dXNlcm5hbWU6cGFzc3dvcmQ='
+    """
+    auth_s = urllib2.unquote(auth)
+    # convert to bytes
+    auth_bytes = auth_s.encode()
+    # use the legacy interface for Python 2.3 support
+    encoded_bytes = base64.encodestring(auth_bytes)
+    # convert back to a string
+    encoded = encoded_bytes.decode()
+    # strip the trailing carriage return
+    return encoded.rstrip()
 
 def open_with_auth(url):
     """Open a urllib2 request, handling HTTP authentication"""
 
     scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
 
+    # Double scheme does not raise on Mac OS X as revealed by a
+    # failing test. We would expect "nonnumeric port". Refs #20.
+    if netloc.endswith(':'):
+        raise httplib.InvalidURL("nonnumeric port: ''")
+
     if scheme in ('http', 'https'):
         auth, host = urllib.splituser(netloc)
     else:
         auth = None
 
     if auth:
-        auth = "Basic " + urllib2.unquote(auth).encode('base64').strip()
+        auth = "Basic " + _encode_auth(auth)
         new_url = urlparse.urlunparse((scheme,host,path,params,query,frag))
         request = urllib2.Request(new_url)
         request.add_header("Authorization", auth)
@@ -766,6 +894,8 @@ def open_with_auth(url):
 
     return fp
 
+# adding a timeout to avoid freezing package_index
+open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
 
 
 
@@ -783,14 +913,16 @@ def fix_sf_url(url):
 def local_open(url):
     """Read a local path, with special support for directories"""
     scheme, server, path, param, query, frag = urlparse.urlparse(url)
-    filename = urllib2.url2pathname(path)
+    filename = urllib.url2pathname(path)
     if os.path.isfile(filename):
         return urllib2.urlopen(url)
     elif path.endswith('/') and os.path.isdir(filename):
         files = []
         for f in os.listdir(filename):
             if f=='index.html':
-                body = open(os.path.join(filename,f),'rb').read()
+                fp = open(os.path.join(filename,f),'rb')
+                body = fp.read()
+                fp.close()
                 break
             elif os.path.isdir(os.path.join(filename,f)):
                 f+='/'