1 files changed, 72 insertions, 39 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index b0388628..4f39c70a 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -1,8 +1,9 @@
 """PyPI and direct package downloading"""
-import sys, os.path, re, urlparse, urllib, urllib2, shutil, random, socket, cStringIO
+import sys, os.path, re, urlparse, urllib2, shutil, random, socket, cStringIO
 import itertools
 import base64
-import httplib
+import httplib, urllib
+from setuptools import ssl_support
 from pkg_resources import *
 from distutils import log
 from distutils.errors import DistutilsError
@@ -11,8 +12,8 @@ try:
 except ImportError:
     from md5 import md5
 from fnmatch import translate
-
 from setuptools.py24compat import wraps
+from setuptools.py27compat import get_all_headers
 
 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
 HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
@@ -58,6 +59,8 @@ def parse_bdist_wininst(name):
 def egg_info_for_url(url):
     scheme, server, path, parameters, query, fragment = urlparse.urlparse(url)
     base = urllib2.unquote(path.split('/')[-1])
+    if server=='sourceforge.net' and base=='download':    # XXX Yuck
+        base = urllib2.unquote(path.split('/')[-2])
     if '#' in base: base, fragment = base.split('#',1)
     return base,fragment
 
@@ -80,14 +83,12 @@ def distros_for_location(location, basename, metadata=None):
     if basename.endswith('.egg') and '-' in basename:
         # only one, unambiguous interpretation
         return [Distribution.from_location(location, basename, metadata)]
-
     if basename.endswith('.exe'):
         win_base, py_ver, platform = parse_bdist_wininst(basename)
         if win_base is not None:
             return interpret_distro_name(
                 location, win_base, metadata, py_ver, BINARY_DIST, platform
             )
-
     # Try source distro extensions (.zip, .tgz, etc.)
     #
     for ext in EXTENSIONS:
@@ -186,17 +187,15 @@ def find_external_links(url, page):
             if match:
                 yield urlparse.urljoin(url, htmldecode(match.group(1)))
 
-
-user_agent = "Python-urllib/%s distribute/%s" % (
-    sys.version[:3], require('distribute')[0].version
+user_agent = "Python-urllib/%s setuptools/%s" % (
+    sys.version[:3], require('setuptools')[0].version
 )
 
-
 class PackageIndex(Environment):
     """A distribution index that scans web pages for download URLs"""
 
-    def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',),
-        *args, **kw
+    def __init__(self, index_url="https://pypi.python.org/simple", hosts=('*',),
+        ca_bundle=None, verify_ssl=True, *args, **kw
     ):
         Environment.__init__(self,*args,**kw)
         self.index_url = index_url + "/"[:not index_url.endswith('/')]
@@ -205,8 +204,9 @@ class PackageIndex(Environment):
         self.package_pages = {}
         self.allows = re.compile('|'.join(map(translate,hosts))).match
         self.to_scan = []
-
-
+        if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()):
+            self.opener = ssl_support.opener_for(ca_bundle)
+        else: self.opener = urllib2.urlopen
 
     def process_url(self, url, retrieve=False):
         """Evaluate a URL as a possible download, and maybe retrieve it"""
@@ -232,10 +232,10 @@ class PackageIndex(Environment):
             return
 
         self.info("Reading %s", url)
+        self.fetched_urls[url] = True   # prevent multiple fetch attempts
         f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
         if f is None: return
-        self.fetched_urls[url] = self.fetched_urls[f.url] = True
-
+        self.fetched_urls[f.url] = True
         if 'html' not in f.headers.get('content-type', '').lower():
             f.close()   # not html, we can't process it
             return
@@ -385,7 +385,7 @@ class PackageIndex(Environment):
     def check_md5(self, cs, info, filename, tfp):
         if re.match('md5=[0-9a-f]{32}$', info):
             self.debug("Validating md5 checksum for %s", filename)
-            if cs.hexdigest()<>info[4:]:
+            if cs.hexdigest()!=info[4:]:
                 tfp.close()
                 os.unlink(filename)
                 raise DistutilsError(
@@ -484,7 +484,6 @@ class PackageIndex(Environment):
         set, development and system eggs (i.e., those using the ``.egg-info``
         format) will be ignored.
         """
-
         # process a Requirement
         self.info("Searching for %s", requirement)
         skipped = {}
@@ -504,10 +503,9 @@ class PackageIndex(Environment):
                     continue
 
                 if dist in req and (dist.precedence<=SOURCE_DIST or not source):
-                    self.info("Best match: %s", dist)
-                    return dist.clone(
-                        location=self.download(dist.location, tmpdir)
-                    )
+                    return dist
+
+
 
         if force_scan:
             self.prescan()
@@ -531,7 +529,10 @@ class PackageIndex(Environment):
                 (source and "a source distribution of " or ""),
                 requirement,
             )
-        return dist
+        else:
+            self.info("Best match: %s", dist)
+            return dist.clone(location=self.download(dist.location, tmpdir))
+
 
     def fetch(self, requirement, tmpdir, force_scan=False, source=False):
         """Obtain a file suitable for fulfilling `requirement`
@@ -547,12 +548,6 @@ class PackageIndex(Environment):
         return None
 
 
-
-
-
-
-
-
     def gen_setup(self, filename, fragment, tmpdir):
         match = EGG_FRAGMENT.match(fragment)
         dists = match and [d for d in
@@ -614,8 +609,8 @@ class PackageIndex(Environment):
             size = -1
             if "content-length" in headers:
                 # Some servers return multiple Content-Length headers :(
-                content_length = headers.get("Content-Length")
-                size = int(content_length)
+                sizes = get_all_headers(headers, 'Content-Length')
+                size = max(map(int, sizes))
                 self.reporthook(url, filename, blocknum, bs, size)
             tfp = open(filename,'wb')
             while True:
@@ -641,7 +636,7 @@ class PackageIndex(Environment):
         if url.startswith('file:'):
             return local_open(url)
         try:
-            return open_with_auth(url)
+            return open_with_auth(url, self.opener)
         except (ValueError, httplib.InvalidURL), v:
             msg = ' '.join([str(arg) for arg in v.args])
             if warning:
@@ -673,9 +668,8 @@ class PackageIndex(Environment):
     def _download_url(self, scheme, url, tmpdir):
         # Determine download filename
         #
-        name = filter(None,urlparse.urlparse(url)[2].split('/'))
+        name, fragment = egg_info_for_url(url)
         if name:
-            name = name[-1]
             while '..' in name:
                 name = name.replace('..','.').replace('\\','_')
         else:
@@ -700,8 +694,6 @@ class PackageIndex(Environment):
             self.url_ok(url, True)   # raises error if not allowed
             return self._attempt_download(url, filename)
 
-
-
     def scan_url(self, url):
         self.process_url(url, True)
 
@@ -728,10 +720,39 @@ class PackageIndex(Environment):
         os.unlink(filename)
         raise DistutilsError("Unexpected HTML page found at "+url)
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
     def _download_svn(self, url, filename):
         url = url.split('#',1)[0]   # remove any fragment for svn's sake
+        creds = ''
+        if url.lower().startswith('svn:') and '@' in url:
+            scheme, netloc, path, p, q, f = urlparse.urlparse(url)
+            if not netloc and path.startswith('//') and '/' in path[2:]:
+                netloc, path = path[2:].split('/',1)
+                auth, host = urllib.splituser(netloc)
+                if auth:
+                    if ':' in auth:
+                        user, pw = auth.split(':',1)
+                        creds = " --username=%s --password=%s" % (user, pw)
+                    else:
+                        creds = " --username="+auth
+                    netloc = host
+                    url = urlparse.urlunparse((scheme, netloc, url, p, q, f))
         self.info("Doing subversion checkout from %s to %s", url, filename)
-        os.system("svn checkout -q %s %s" % (url, filename))
+        os.system("svn checkout%s -q %s %s" % (creds, url, filename))
         return filename
 
     def _vcs_split_rev_from_url(self, url, pop_prefix=False):
@@ -792,6 +813,18 @@ class PackageIndex(Environment):
     def warn(self, msg, *args):
         log.warn(msg, *args)
 
+
+
+
+
+
+
+
+
+
+
+
+
 # This pattern matches a character entity reference (a decimal numeric
 # references, a hexadecimal numeric reference, or a named reference).
 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
@@ -860,7 +893,7 @@ def _encode_auth(auth):
     # strip the trailing carriage return
     return encoded.rstrip()
 
-def open_with_auth(url):
+def open_with_auth(url, opener=urllib2.urlopen):
     """Open a urllib2 request, handling HTTP authentication"""
 
     scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
@@ -871,7 +904,7 @@ def open_with_auth(url):
         raise httplib.InvalidURL("nonnumeric port: ''")
 
     if scheme in ('http', 'https'):
-        auth, host = urllib2.splituser(netloc)
+        auth, host = urllib.splituser(netloc)
     else:
         auth = None
 
@@ -884,7 +917,7 @@ def open_with_auth(url):
         request = urllib2.Request(url)
 
     request.add_header('User-Agent', user_agent)
-    fp = urllib2.urlopen(request)
+    fp = opener(request)
 
     if auth:
         # Put authentication info back into request URL if same host,