aboutsummaryrefslogtreecommitdiffstats
path: root/pkg_resources.py
diff options
context:
space:
mode:
authorJurko Gospodnetić <jurko.gospodnetic@pke.hr>2014-04-15 22:47:14 +0200
committerJurko Gospodnetić <jurko.gospodnetic@pke.hr>2014-04-15 22:47:14 +0200
commitcf1cb5ba177568bfe699d9e0b2d67a78a9800101 (patch)
tree6711836e73663e8d6e2b64829b27c4669f9fd8d7 /pkg_resources.py
parent0f0c892487277e25ce79a39477b193822f1edf79 (diff)
downloadexternal_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.tar.gz
external_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.tar.bz2
external_python_setuptools-cf1cb5ba177568bfe699d9e0b2d67a78a9800101.zip
pkg_resources.Environment.__getitem__() code cleanup
Better commented the code, especially the result caching & lazy distribution list sorting strategies implemented there. Added several TODO comments indicating things to look into there in the future. General code cleanup: - stopped reusing the project_name variable for different purposes - added an assertion to detect corrupt distribution list caches --HG-- extra : source : 86ef8f59ef4e1fc253c155b9ca0856497455372d
Diffstat (limited to 'pkg_resources.py')
-rw-r--r--pkg_resources.py64
1 files changed, 52 insertions, 12 deletions
diff --git a/pkg_resources.py b/pkg_resources.py
index 2d656f1a..0d8987c8 100644
--- a/pkg_resources.py
+++ b/pkg_resources.py
@@ -811,30 +811,70 @@ class Environment(object):
for dist in find_distributions(item):
self.add(dist)
- def __getitem__(self,project_name):
+ def __getitem__(self, project_name):
"""Return a newest-to-oldest list of distributions for `project_name`
+
+ Uses case-insensitive `project_name` comparison, assuming all the
+ project's distributions use their project's name converted to all
+ lowercase as their key.
+
"""
+ # Result caching here serves two purposes:
+ # 1. Speed up the project_name --> distribution list lookup.
+ # 2. 'First access' flag indicating the distribution list requires
+ # sorting before it can be returned to the user.
+ #TODO: This caching smells like premature optimization. It could be
+ # that the distribution list lookup speed is not really affected by
+ # this, in which case the whole cache could be removed and replaced
+ # with a single 'dist_list_sorted' flag. This seems strongly indicated
+ # by the fact that this function does not really cache the distribution
+ # list under the given project name but only under its canonical
+ # distribution key variant. That means that repeated access using a non
+ # canonical project name does not get any speedup at all.
try:
return self._cache[project_name]
except KeyError:
- project_name = project_name.lower()
- if project_name not in self._distmap:
- return []
+ pass
- if project_name not in self._cache:
- dists = self._cache[project_name] = self._distmap[project_name]
+ # We expect all distribution keys to contain lower-case characters
+ # only.
+ #TODO: See if this expectation can be implemented better, e.g. by using
+ # some sort of a name --> key conversion function on the Distribution
+ # class or something similar.
+ #TODO: This requires all classes derived from Distribution to use
+ # lower-case only keys even if they do not calculate them from the
+ # project's name. It might be better to make this function simpler by
+ # passing it the the exact distribution key as a parameter and have the
+ # caller convert a `project_name` to its corresponding distribution key
+ # as needed.
+ distribution_key = project_name.lower()
+ try:
+ dists = self._distmap[distribution_key]
+ except KeyError:
+ return []
+
+ # Sort the project's distribution list lazily on first access.
+ if distribution_key not in self._cache:
+ self._cache[distribution_key] = dists
_sort_dists(dists)
- return self._cache[project_name]
+ return dists
- def add(self,dist):
- """Add `dist` if we ``can_add()`` it and it isn't already added"""
+ def add(self, dist):
+ """Add `dist` if we ``can_add()`` it and it has not already been added
+ """
if self.can_add(dist) and dist.has_version():
- dists = self._distmap.setdefault(dist.key,[])
+ dists = self._distmap.setdefault(dist.key, [])
if dist not in dists:
dists.append(dist)
- if dist.key in self._cache:
- _sort_dists(self._cache[dist.key])
+ cached_dists = self._cache.get(dist.key)
+ if cached_dists:
+ # The distribution list has been cached on first access,
+ # therefore we know it has already been sorted lazily and
+ # we are expected to keep it in sorted order.
+ _sort_dists(dists)
+ assert cached_dists is dists, \
+ "Distribution list cache corrupt."
def best_match(self, req, working_set, installer=None):
"""Find distribution best matching `req` and usable on `working_set`