diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2014-06-15 12:33:25 -0400 |
---|---|---|
committer | Jason R. Coombs <jaraco@jaraco.com> | 2014-06-15 12:33:25 -0400 |
commit | 4d4b9454777ff3f219688ea16a811996e9d63e87 (patch) | |
tree | fb21c2f9f2cbba1ad19273dc4ce3a859e36f2ef9 | |
parent | e61c32101ec17fb2b55aeda5218d12b41926ecbb (diff) | |
parent | 7fecd7a9665f9710c609b6f8fd8227e126d99ed7 (diff) | |
download | external_python_setuptools-4d4b9454777ff3f219688ea16a811996e9d63e87.tar.gz external_python_setuptools-4d4b9454777ff3f219688ea16a811996e9d63e87.tar.bz2 external_python_setuptools-4d4b9454777ff3f219688ea16a811996e9d63e87.zip |
Merge issue202 changes. Fixes #202.
-rwxr-xr-x | setuptools/command/easy_install.py | 222 |
1 files changed, 181 insertions, 41 deletions
diff --git a/setuptools/command/easy_install.py b/setuptools/command/easy_install.py index cc5ddbd6..325e3c34 100755 --- a/setuptools/command/easy_install.py +++ b/setuptools/command/easy_install.py @@ -646,13 +646,6 @@ Please make the appropriate changes for your system and try again. def process_distribution(self, requirement, dist, deps=True, *info): self.update_pth(dist) self.package_index.add(dist) - # First remove the dist from self.local_index, to avoid problems using - # old cached data in case its underlying file has been replaced. - # - # This is a quick-fix for a zipimporter caching issue in case the dist - # has been implemented as and already loaded from a zip file that got - # replaced later on. For more detailed information see setuptools issue - # #168 at 'http://bitbucket.org/pypa/setuptools/issue/168'. if dist in self.local_index[dist.key]: self.local_index.remove(dist) self.local_index.add(dist) @@ -834,23 +827,30 @@ Please make the appropriate changes for your system and try again. dir_util.remove_tree(destination, dry_run=self.dry_run) elif os.path.exists(destination): self.execute(os.unlink,(destination,),"Removing "+destination) - uncache_zipdir(destination) - if os.path.isdir(egg_path): - if egg_path.startswith(tmpdir): - f,m = shutil.move, "Moving" + try: + new_dist_is_zipped = False + if os.path.isdir(egg_path): + if egg_path.startswith(tmpdir): + f,m = shutil.move, "Moving" + else: + f,m = shutil.copytree, "Copying" + elif self.should_unzip(dist): + self.mkpath(destination) + f,m = self.unpack_and_compile, "Extracting" else: - f,m = shutil.copytree, "Copying" - elif self.should_unzip(dist): - self.mkpath(destination) - f,m = self.unpack_and_compile, "Extracting" - elif egg_path.startswith(tmpdir): - f,m = shutil.move, "Moving" - else: - f,m = shutil.copy2, "Copying" - - self.execute(f, (egg_path, destination), - (m+" %s to %s") % - (os.path.basename(egg_path),os.path.dirname(destination))) + new_dist_is_zipped = True + if egg_path.startswith(tmpdir): + f,m = shutil.move, "Moving" + else: + f,m = shutil.copy2, "Copying" + self.execute(f, (egg_path, destination), + (m+" %s to %s") % + (os.path.basename(egg_path),os.path.dirname(destination))) + update_dist_caches(destination, + fix_zipimporter_caches=new_dist_is_zipped) + except: + update_dist_caches(destination, fix_zipimporter_caches=False) + raise self.add_output(destination) return self.egg_distribution(destination) @@ -1576,35 +1576,175 @@ def auto_chmod(func, arg, exc): et, ev, _ = sys.exc_info() reraise(et, (ev[0], ev[1] + (" %s %s" % (func,arg)))) -def uncache_zipdir(path): +def update_dist_caches(dist_path, fix_zipimporter_caches): """ - Remove any globally cached zip file related data for `path` - - Stale zipimport.zipimporter objects need to be removed when a zip file is - replaced as they contain cached zip file directory information. If they are - asked to get data from their zip file, they will use that cached - information to calculate the data location in the zip file. This calculated - location may be incorrect for the replaced zip file, which may in turn - cause the read operation to either fail or return incorrect data. - - Note we have no way to clear any local caches from here. That is left up to - whomever is in charge of maintaining that cache. + Fix any globally cached `dist_path` related data + + `dist_path` should be a path of a newly installed egg distribution (zipped + or unzipped). + + sys.path_importer_cache contains finder objects that have been cached when + importing data from the original distribution. Any such finders need to be + cleared since the replacement distribution might be packaged differently, + e.g. a zipped egg distribution might get replaced with an unzipped egg + folder or vice versa. Having the old finders cached may then cause Python + to attempt loading modules from the replacement distribution using an + incorrect loader. + + zipimport.zipimporter objects are Python loaders charged with importing + data packaged inside zip archives. If stale loaders referencing the + original distribution, are left behind, they can fail to load modules from + the replacement distribution. E.g. if an old zipimport.zipimporter instance + is used to load data from a new zipped egg archive, it may cause the + operation to attempt to locate the requested data in the wrong location - + one indicated by the original distribution's zip archive directory + information. Such an operation may then fail outright, e.g. report having + read a 'bad local file header', or even worse, it may fail silently & + return invalid data. + + zipimport._zip_directory_cache contains cached zip archive directory + information for all existing zipimport.zipimporter instances and all such + instances connected to the same archive share the same cached directory + information. + + If asked, and the underlying Python implementation allows it, we can fix + all existing zipimport.zipimporter instances instead of having to track + them down and remove them one by one, by updating their shared cached zip + archive directory information. This, of course, assumes that the + replacement distribution is packaged as a zipped egg. + + If not asked to fix existing zipimport.zipimporter instances, we still do + our best to clear any remaining zipimport.zipimporter related cached data + that might somehow later get used when attempting to load data from the new + distribution and thus cause such load operations to fail. Note that when + tracking down such remaining stale data, we can not catch every conceivable + usage from here, and we clear only those that we know of and have found to + cause problems if left alive. Any remaining caches should be updated by + whomever is in charge of maintaining them, i.e. they should be ready to + handle us replacing their zip archives with new distributions at runtime. """ - normalized_path = normalize_path(path) - _uncache(normalized_path, zipimport._zip_directory_cache) + # There are several other known sources of stale zipimport.zipimporter + # instances that we do not clear here, but might if ever given a reason to + # do so: + # * Global setuptools pkg_resources.working_set (a.k.a. 'master working + # set') may contain distributions which may in turn contain their + # zipimport.zipimporter loaders. + # * Several zipimport.zipimporter loaders held by local variables further + # up the function call stack when running the setuptools installation. + # * Already loaded modules may have their __loader__ attribute set to the + # exact loader instance used when importing them. Python 3.4 docs state + # that this information is intended mostly for introspection and so is + # not expected to cause us problems. + normalized_path = normalize_path(dist_path) _uncache(normalized_path, sys.path_importer_cache) + if fix_zipimporter_caches: + _replace_zip_directory_cache_data(normalized_path) + else: + # Here, even though we do not want to fix existing and now stale + # zipimporter cache information, we still want to remove it. Related to + # Python's zip archive directory information cache, we clear each of + # its stale entries in two phases: + # 1. Clear the entry so attempting to access zip archive information + # via any existing stale zipimport.zipimporter instances fails. + # 2. Remove the entry from the cache so any newly constructed + # zipimport.zipimporter instances do not end up using old stale + # zip archive directory information. + # This whole stale data removal step does not seem strictly necessary, + # but has been left in because it was done before we started replacing + # the zip archive directory information cache content if possible, and + # there are no relevant unit tests that we can depend on to tell us if + # this is really needed. + _remove_and_clear_zip_directory_cache_data(normalized_path) + +def _collect_zipimporter_cache_entries(normalized_path, cache): + """ + Return zipimporter cache entry keys related to a given normalized path. -def _uncache(normalized_path, cache): - to_remove = [] + Alternative path spellings (e.g. those using different character case or + those using alternative path separators) related to the same path are + included. Any sub-path entries are included as well, i.e. those + corresponding to zip archives embedded in other zip archives. + + """ + result = [] prefix_len = len(normalized_path) for p in cache: np = normalize_path(p) if (np.startswith(normalized_path) and np[prefix_len:prefix_len + 1] in (os.sep, '')): - to_remove.append(p) - for p in to_remove: + result.append(p) + return result + +def _update_zipimporter_cache(normalized_path, cache, updater=None): + """ + Update zipimporter cache data for a given normalized path. + + Any sub-path entries are processed as well, i.e. those corresponding to zip + archives embedded in other zip archives. + + Given updater is a callable taking a cache entry key and the original entry + (after already removing the entry from the cache), and expected to update + the entry and possibly return a new one to be inserted in its place. + Returning None indicates that the entry should not be replaced with a new + one. If no updater is given, the cache entries are simply removed without + any additional processing, the same as if the updater simply returned None. + + """ + for p in _collect_zipimporter_cache_entries(normalized_path, cache): + # N.B. pypy's custom zipimport._zip_directory_cache implementation does + # not support the complete dict interface: + # * Does not support item assignment, thus not allowing this function + # to be used only for removing existing cache entries. + # * Does not support the dict.pop() method, forcing us to use the + # get/del patterns instead. For more detailed information see the + # following links: + # https://bitbucket.org/pypa/setuptools/issue/202/more-robust-zipimporter-cache-invalidation#comment-10495960 + # https://bitbucket.org/pypy/pypy/src/dd07756a34a41f674c0cacfbc8ae1d4cc9ea2ae4/pypy/module/zipimport/interp_zipimport.py#cl-99 + old_entry = cache[p] del cache[p] + new_entry = updater and updater(p, old_entry) + if new_entry is not None: + cache[p] = new_entry + +def _uncache(normalized_path, cache): + _update_zipimporter_cache(normalized_path, cache) + +def _remove_and_clear_zip_directory_cache_data(normalized_path): + def clear_and_remove_cached_zip_archive_directory_data(path, old_entry): + old_entry.clear() + _update_zipimporter_cache(normalized_path, + zipimport._zip_directory_cache, + updater=clear_and_remove_cached_zip_archive_directory_data) + +# PyPy Python implementation does not allow directly writing to the +# zipimport._zip_directory_cache and so prevents us from attempting to correct +# its content. The best we can do there is clear the problematic cache content +# and have PyPy repopulate it as needed. The downside is that if there are any +# stale zipimport.zipimporter instances laying around, attempting to use them +# will fail due to not having its zip archive directory information available +# instead of being automatically corrected to use the new correct zip archive +# directory information. +if '__pypy__' in sys.builtin_module_names: + _replace_zip_directory_cache_data = \ + _remove_and_clear_zip_directory_cache_data +else: + def _replace_zip_directory_cache_data(normalized_path): + def replace_cached_zip_archive_directory_data(path, old_entry): + # N.B. In theory, we could load the zip directory information just + # once for all updated path spellings, and then copy it locally and + # update its contained path strings to contain the correct + # spelling, but that seems like a way too invasive move (this cache + # structure is not officially documented anywhere and could in + # theory change with new Python releases) for no significant + # benefit. + old_entry.clear() + zipimport.zipimporter(path) + old_entry.update(zipimport._zip_directory_cache[path]) + return old_entry + _update_zipimporter_cache(normalized_path, + zipimport._zip_directory_cache, + updater=replace_cached_zip_archive_directory_data) def is_python(text, filename='<string>'): "Is this string a valid Python script?" |