diff options
author | Jurko Gospodnetić <jurko.gospodnetic@pke.hr> | 2014-04-15 22:47:14 +0200 |
---|---|---|
committer | Jurko Gospodnetić <jurko.gospodnetic@pke.hr> | 2014-04-15 22:47:14 +0200 |
commit | cf1cb5ba177568bfe699d9e0b2d67a78a9800101 (patch) | |
tree | 6711836e73663e8d6e2b64829b27c4669f9fd8d7 /pkg_resources.py | |
parent | 0f0c892487277e25ce79a39477b193822f1edf79 (diff) | |
download | external_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.tar.gz external_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.tar.bz2 external_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.zip |
pkg_resources.Environment.__getitem__() code cleanup
Better commented the code, especially the result caching & lazy distribution
list sorting strategies implemented there. Added several TODO comments
indicating things to look into there in the future.
General code cleanup:
- stopped reusing the project_name variable for different purposes
- added an assertion to detect corrupt distribution list caches
--HG--
extra : source : 86ef8f59ef4e1fc253c155b9ca0856497455372d
Diffstat (limited to 'pkg_resources.py')
-rw-r--r-- | pkg_resources.py | 64 |
1 files changed, 52 insertions, 12 deletions
diff --git a/pkg_resources.py b/pkg_resources.py index 2d656f1a..0d8987c8 100644 --- a/pkg_resources.py +++ b/pkg_resources.py @@ -811,30 +811,70 @@ class Environment(object): for dist in find_distributions(item): self.add(dist) - def __getitem__(self,project_name): + def __getitem__(self, project_name): """Return a newest-to-oldest list of distributions for `project_name` + + Uses case-insensitive `project_name` comparison, assuming all the + project's distributions use their project's name converted to all + lowercase as their key. + """ + # Result caching here serves two purposes: + # 1. Speed up the project_name --> distribution list lookup. + # 2. 'First access' flag indicating the distribution list requires + # sorting before it can be returned to the user. + #TODO: This caching smells like premature optimization. It could be + # that the distribution list lookup speed is not really affected by + # this, in which case the whole cache could be removed and replaced + # with a single 'dist_list_sorted' flag. This seems strongly indicated + # by the fact that this function does not really cache the distribution + # list under the given project name but only under its canonical + # distribution key variant. That means that repeated access using a non + # canonical project name does not get any speedup at all. try: return self._cache[project_name] except KeyError: - project_name = project_name.lower() - if project_name not in self._distmap: - return [] + pass - if project_name not in self._cache: - dists = self._cache[project_name] = self._distmap[project_name] + # We expect all distribution keys to contain lower-case characters + # only. + #TODO: See if this expectation can be implemented better, e.g. by using + # some sort of a name --> key conversion function on the Distribution + # class or something similar. + #TODO: This requires all classes derived from Distribution to use + # lower-case only keys even if they do not calculate them from the + # project's name. It might be better to make this function simpler by + # passing it the the exact distribution key as a parameter and have the + # caller convert a `project_name` to its corresponding distribution key + # as needed. + distribution_key = project_name.lower() + try: + dists = self._distmap[distribution_key] + except KeyError: + return [] + + # Sort the project's distribution list lazily on first access. + if distribution_key not in self._cache: + self._cache[distribution_key] = dists _sort_dists(dists) - return self._cache[project_name] + return dists - def add(self,dist): - """Add `dist` if we ``can_add()`` it and it isn't already added""" + def add(self, dist): + """Add `dist` if we ``can_add()`` it and it has not already been added + """ if self.can_add(dist) and dist.has_version(): - dists = self._distmap.setdefault(dist.key,[]) + dists = self._distmap.setdefault(dist.key, []) if dist not in dists: dists.append(dist) - if dist.key in self._cache: - _sort_dists(self._cache[dist.key]) + cached_dists = self._cache.get(dist.key) + if cached_dists: + # The distribution list has been cached on first access, + # therefore we know it has already been sorted lazily and + # we are expected to keep it in sorted order. + _sort_dists(dists) + assert cached_dists is dists, \ + "Distribution list cache corrupt." def best_match(self, req, working_set, installer=None): """Find distribution best matching `req` and usable on `working_set` |