From 272fed819ba188ac5d6c0ceb9de6d7a8cb543a18 Mon Sep 17 00:00:00 2001 From: PJ Eby Date: Wed, 19 Oct 2005 03:00:35 +0000 Subject: Added "--allow-hosts" option to restrict downloading and spidering to a specified list of server glob patterns. --HG-- branch : setuptools extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4041266 --- setuptools/package_index.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index f5b322b0..76d53d11 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -5,11 +5,11 @@ from pkg_resources import * from distutils import log from distutils.errors import DistutilsError from md5 import md5 +from fnmatch import translate EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) # this is here to fix emacs' cruddy broken syntax highlighting - PYPI_MD5 = re.compile( '([^<]+)\n\s+\\(md5\\)' @@ -124,25 +124,25 @@ def interpret_distro_name(location, basename, metadata, class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" - def __init__(self,index_url="http://www.python.org/pypi",*args,**kw): + def __init__(self,index_url="http://www.python.org/pypi",hosts=('*',),*args,**kw): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} self.fetched_urls = {} self.package_pages = {} + self.allows = re.compile('|'.join(map(translate,hosts))).match def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" - if url in self.scanned_urls and not retrieve: return self.scanned_urls[url] = True - if not URL_SCHEME(url): # process filenames or directories if os.path.isfile(url): - dists = list(distros_for_filename(url)) + map(self.add, distros_for_filename(url)) + return # no need to retrieve anything elif os.path.isdir(url): url = os.path.realpath(url) for item in os.listdir(url): @@ -153,13 +153,16 @@ class PackageIndex(Environment): return else: dists = list(distros_for_url(url)) + if dists: + if not self.url_ok(url): + return + self.debug("Found link: %s", url) - if dists: - self.debug("Found link: %s", url) if dists or not retrieve or url in self.fetched_urls: - for dist in dists: - self.add(dist) - # don't need the actual page + map(self.add, dists) + return # don't need the actual page + + if not self.url_ok(url): return self.info("Reading %s", url) @@ -181,17 +184,14 @@ class PackageIndex(Environment): self.process_url(link) - - - - - - - - - - - + def url_ok(self, url, fatal=False): + if self.allows(urlparse.urlparse(url)[1]): + return True + msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n" + if fatal: + raise DistutilsError(msg % url) + else: + self.warn(msg, url) @@ -368,8 +368,8 @@ class PackageIndex(Environment): dl_blocksize = 8192 - def _download_to(self, url, filename): + self.url_ok(url,True) # raises error if not allowed self.info("Downloading %s", url) # Download the file fp, tfp, info = None, None, None -- cgit v1.2.3