diff options
author | PJ Eby <distutils-sig@python.org> | 2005-06-05 21:33:51 +0000 |
---|---|---|
committer | PJ Eby <distutils-sig@python.org> | 2005-06-05 21:33:51 +0000 |
commit | e6572cb2a69f14590366d99cee809c79658b95c6 (patch) | |
tree | c65587f916dd8ad9369788a658bd60365f66214e | |
parent | 0c9e886999bb42cf825054778b1664e41f8164b4 (diff) | |
download | external_python_setuptools-e6572cb2a69f14590366d99cee809c79658b95c6.tar.gz external_python_setuptools-e6572cb2a69f14590366d99cee809c79658b95c6.tar.bz2 external_python_setuptools-e6572cb2a69f14590366d99cee809c79658b95c6.zip |
Implement PyPI screenscraping for EasyInstall. Fix a bug in requirement
version checking. Document new options for screen scraping.
--HG--
branch : setuptools
extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4041041
-rwxr-xr-x | EasyInstall.txt | 32 | ||||
-rwxr-xr-x | easy_install.py | 208 | ||||
-rw-r--r-- | pkg_resources.py | 30 | ||||
-rw-r--r-- | setuptools/tests/test_resources.py | 19 |
4 files changed, 164 insertions, 125 deletions
diff --git a/EasyInstall.txt b/EasyInstall.txt index 6635320e..2a1d875b 100755 --- a/EasyInstall.txt +++ b/EasyInstall.txt @@ -210,10 +210,42 @@ Command-Line Options URL or filename, so that the installer will not be confused by the presence of multiple ``setup.py`` files in the build directory. +``--scan-url=URL, -s URL`` (New in 0.4a1) + Scan the specified "download page" for direct links to downloadable eggs or + source distributions. Any usable packages will be downloaded if they are + required by a command line argument. For example, this:: + + easy_install -s http://peak.telecommunity.com/dist PyProtocols + + will download and install the latest version of PyProtocols linked from + the PEAK downloads page, but ignore the other download links on that page. + + You may use this option more than once, to list multiple download pages. + If all requested packages can be found using the specified download pages, + the Python Package Index will *not* be consulted. + +``--index-url=URL, -u URL`` (New in 0.4a1) + Specifies the base URL of the Python Package Index. The default is + http://www.python.org/pypi if not specified. When a package is requested + that is not locally available or linked from a ``--scan-url`` download + page, the package index will be searched for download pages for the needed + package, and those download pages will be searched for links to download + an egg or source distribution. + Release Notes/Change History ============================ +0.4a1 + * Added ``--scan-url`` and ``--index-url`` options, to scan download pages + and search PyPI for needed packages. + + * Fixed a bug in requirements processing for exact versions (i.e. ``==`` and + ``!=``) when only one condition was included. + + * Added ``safe_name()`` and ``safe_version()`` APIs to clean up handling of + arbitrary distribution names and versions found on PyPI. + 0.3a4 * ``pkg_resources`` now supports resource directories, not just the resources in them. In particular, there are ``resource_listdir()`` and diff --git a/easy_install.py b/easy_install.py index 00e316ff..81415677 100755 --- a/easy_install.py +++ b/easy_install.py @@ -28,98 +28,133 @@ import __builtin__ from distutils.sysconfig import get_python_lib from shutil import rmtree # must have, because it can be called from __del__ from pkg_resources import * - _os = sys.modules[os.name] _open = open - - - - - - - -EXTENSIONS = ( - (EGG_DIST, ".egg"), - (SOURCE_DIST, ".tar.gz"), - (SOURCE_DIST, ".tar.bz2"), - (SOURCE_DIST, ".tar"), - (SOURCE_DIST, ".zip"), - (SOURCE_DIST, ".tgz"), -) - -class URLDistribution(Distribution): - """A distribution that has not been installed""" - - def __init__(self, url, metadata=None): - path = urlparse.urlparse(url)[2] - base = path.split('/')[-1] - - for typecode, ext in EXTENSIONS: - if base.endswith(ext): - base = base[:-len(ext)] - break - else: - raise DistributionNotFound(url) - - self.typecode = typecode - name, version, py_version, platform = [None]*4 - match = pkg_resources.EGG_NAME(base) - if match: - name,version,py_version,platform = match.group( - 'name','ver','pyver','plat' - ) - else: - name = base - Distribution.__init__(self, - url, metadata=metadata, name=name, version=version or "0", - py_version=py_version or pkg_resources.PY_MAJOR, platform=platform +class Opener(urllib.FancyURLopener): + def http_error_default(self, url, fp, errcode, errmsg, headers): + """Default error handling -- don't raise an exception.""" + info = urllib.addinfourl(fp, headers, "http:" + url) + info.status, info.reason = errcode, errmsg + return info +opener = Opener() + +HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I) +EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() + +def distros_for_url(url, metadata=None): + """Yield egg or source distribution objects that might be found at a URL""" + + path = urlparse.urlparse(url)[2] + base = urllib.unquote(path.split('/')[-1]) + + if base.endswith('.egg'): + dist = Distribution.from_filename(base, metadata) + dist.path = url + yield dist + return # only one, unambiguous interpretation + + for ext in EXTENSIONS: + if base.endswith(ext): + base = base[:-len(ext)] + break + else: + return # no extension matched + + # Generate alternative interpretations of a source distro name + # Because some packages are ambiguous as to name/versions split + # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc. + # So, we generate each possible interepretation (e.g. "adns, python-1.1.0" + # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice, + # the spurious interpretations should be ignored, because in the event + # there's also an "adns" package, the spurious "python-1.1.0" version will + # compare lower than any numeric version number, and is therefore unlikely + # to match a request for it. It's still a potential problem, though, and + # in the long run PyPI and the distutils should go for "safe" names and + # versions in source distribution names. + + parts = base.split('-') + for p in range(1,len(parts)+1): + yield Distribution( + url, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), + distro_type = SOURCE_DIST ) - - - - class PackageIndex(AvailableDistributions): """A distribution index that scans web pages for download URLs""" def __init__(self,index_url="http://www.python.org/pypi",*args,**kw): AvailableDistributions.__init__(self,*args,**kw) - self.index_url = index_url + self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} + self.fetched_urls = {} + self.package_pages = {} def scan_url(self, url): self.process_url(url, True) def process_url(self, url, retrieve=False): - if url in self.scanned_urls: - return - try: - dist = URLDistribution(url) - except DistributionNotFound: # not a distro, so scan the page - if not retrieve: - return # unless we're skipping retrieval - else: - # It's a distro, just process it - self.scanned_urls[url] = True - self.add(dist) # XXX should check py_ver/platform! + if url in self.scanned_urls and not retrieve: return - f = urllib.urlopen(url) self.scanned_urls[url] = True + dists = list(distros_for_url(url)) + map(self.add, dists) + + if dists or not retrieve or url in self.fetched_urls: + # don't need the actual page + return + f = opener.open(url) + self.fetched_urls[url] = self.fetched_urls[f.url] = True if 'html' not in f.headers['content-type'].lower(): f.close() # not html, we can't process it return - url = f.url # handle redirects - href = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I) + + base = f.url # handle redirects page = f.read() f.close() - for match in href.finditer(page): - link = urlparse.urljoin(url, match.group(1)) - self.process_url(link) - - + if url.startswith(self.index_url): + self.process_index(url, page) + else: + for match in HREF.finditer(page): + link = urlparse.urljoin(base, match.group(1)) + self.process_url(link) + + def find_packages(self,requirement): + self.scan_url(self.index_url + requirement.distname) + if not self.package_pages.get(requirement.key): + # We couldn't find the target package, so search the index page too + self.scan_url(self.index_url) + for url in self.package_pages.get(requirement.key,()): + # scan each page that might be related to the desired package + self.scan_url(url) + + def process_index(self,url,page): + def scan(link): + if link.startswith(self.index_url): + parts = map( + urllib.unquote, link[len(self.index_url):].split('/') + ) + if len(parts)==2: + # it's a package page, sanitize and index it + pkg = safe_name(parts[0]) + ver = safe_version(parts[1]) + self.package_pages.setdefault(pkg.lower(),{})[link] = True + if url==self.index_url or 'Index of Packages</title>' in page: + # process an index page into the package-page index + for match in HREF.finditer(page): + scan( urlparse.urljoin(url, match.group(1)) ) + else: + scan(url) # ensure this page is in the page index + # process individual package page + for tag in ("<th>Home Page", "<th>Download URL"): + pos = page.find(tag) + if pos!=-1: + match = HREF.search(page,pos) + if match: + # Process the found URL + self.scan_url(urlparse.urljoin(url, match.group(1))) def obtain(self,requirement): self.find_packages(requirement) @@ -127,41 +162,6 @@ class PackageIndex(AvailableDistributions): if dist in requirement: return dist - def find_packages(self,requirement): - pass # XXX process PyPI entries for package - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Installer: """Manage a download/build/install process""" @@ -801,11 +801,11 @@ def main(argv, factory=Installer): parser.add_option("-u", "--index-url", dest="index_url", metavar="URL", default="http://www.python.org/pypi", - help="Base URL of Python Package Index") + help="base URL of Python Package Index") parser.add_option("-s", "--scan-url", dest="scan_urls", metavar="URL", action="append", - help="Additional URL(s) to search for packages") + help="additional URL(s) to search for packages") (options, args) = parser.parse_args() diff --git a/pkg_resources.py b/pkg_resources.py index 927b7942..a9697bbc 100644 --- a/pkg_resources.py +++ b/pkg_resources.py @@ -54,6 +54,9 @@ class InvalidOption(ResolutionError): _provider_factories = {} PY_MAJOR = sys.version[:3] +EGG_DIST = 2 +SOURCE_DIST = 1 + def register_loader_type(loader_type, provider_factory): """Register `provider_factory` to make providers for `loader_type` @@ -77,9 +80,6 @@ def get_provider(moduleName): - - - def get_platform(): """Return this platform's string for platform-specific distributions @@ -1146,23 +1146,21 @@ def parse_version(s): -EGG_DIST = 2 -SOURCE_DIST = 1 - class Distribution(object): """Wrap an actual or potential sys.path entry w/metadata""" - typecode = EGG_DIST + def __init__(self, path_str, metadata=None, name=None, version=None, - py_version=PY_MAJOR, platform=None + py_version=PY_MAJOR, platform=None, distro_type = EGG_DIST ): if name: - self.name = name.replace('_','-') + self.name = safe_name(name) if version: - self._version = version.replace('_','-') + self._version = safe_version(version) self.py_version = py_version self.platform = platform self.path = path_str + self.distro_type = distro_type self.metadata = metadata def installed_on(self,path=None): @@ -1187,6 +1185,8 @@ class Distribution(object): ) from_filename = classmethod(from_filename) + + # These properties have to be lazy so that we don't have to load any # metadata until/unless it's actually needed. (i.e., some distributions # may not know their name or version without loading PKG-INFO) @@ -1330,7 +1330,7 @@ def parse_requirements(strs): def _sort_dists(dists): - tmp = [(dist.version,dist.typecode,dist) for dist in dists] + tmp = [(dist.version,dist.distro_type,dist) for dist in dists] tmp.sort() dists[::-1] = [d for v,t,d in tmp] @@ -1382,16 +1382,16 @@ class Requirement: item = item.parsed_version elif isinstance(item,basestring): item = parse_version(item) - last = True + last = None for parsed,trans,op,ver in self.index: action = trans[cmp(item,parsed)] if action=='F': return False elif action=='T': return True elif action=='+': last = True - elif action=='-': last = False + elif action=='-' or last is None: last = False + if last is None: last = True # no rules encountered return last - def __hash__(self): return self.__hash @@ -1414,7 +1414,7 @@ state_machine = { '>' : 'F+F', '>=': 'T+F', '==': 'T..', - '!=': 'F..', + '!=': 'F++', } diff --git a/setuptools/tests/test_resources.py b/setuptools/tests/test_resources.py index 88b5f889..91199f47 100644 --- a/setuptools/tests/test_resources.py +++ b/setuptools/tests/test_resources.py @@ -258,12 +258,19 @@ class RequirementsTests(TestCase): ImmutableSet(["foo","bar"]))) ) - - - - - - + def testVersionEquality(self): + r1 = Requirement.parse("setuptools==0.3a2") + r2 = Requirement.parse("setuptools!=0.3a4") + d = Distribution.from_filename + + self.failIf(d("setuptools-0.3a4.egg") in r1) + self.failIf(d("setuptools-0.3a1.egg") in r1) + self.failIf(d("setuptools-0.3a4.egg") in r2) + + self.failUnless(d("setuptools-0.3a2.egg") in r1) + self.failUnless(d("setuptools-0.3a2.egg") in r2) + self.failUnless(d("setuptools-0.3a3.egg") in r2) + self.failUnless(d("setuptools-0.3a5.egg") in r2) |