diff options
author | Tim Heap <tim@timheap.me> | 2016-08-16 16:40:10 +1000 |
---|---|---|
committer | Tim Heap <tim@timheap.me> | 2016-10-15 02:23:49 +0200 |
commit | bb45468d27615c2ce9f9c9757a367c44d6ee80d2 (patch) | |
tree | 6d4835611deb4911f9693051066edfbaa035d856 /setuptools/command/egg_info.py | |
parent | 7edbffcc92c89b96f37e9e007e0f7f9490c49232 (diff) | |
download | external_python_setuptools-bb45468d27615c2ce9f9c9757a367c44d6ee80d2.tar.gz external_python_setuptools-bb45468d27615c2ce9f9c9757a367c44d6ee80d2.tar.bz2 external_python_setuptools-bb45468d27615c2ce9f9c9757a367c44d6ee80d2.zip |
Much faster implementation of FileList, for big egg_info speedups
Diffstat (limited to 'setuptools/command/egg_info.py')
-rwxr-xr-x | setuptools/command/egg_info.py | 262 |
1 files changed, 232 insertions, 30 deletions
diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py index 5183eedc..6cc8f4c4 100755 --- a/setuptools/command/egg_info.py +++ b/setuptools/command/egg_info.py @@ -3,6 +3,7 @@ Create a distribution's .egg-info directory and contents""" from distutils.filelist import FileList as _FileList +from distutils.errors import DistutilsInternalError from distutils.util import convert_path from distutils import log import distutils.errors @@ -27,6 +28,7 @@ from pkg_resources import ( parse_requirements, safe_name, parse_version, safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) import setuptools.unicode_utils as unicode_utils +from setuptools.glob import glob from pkg_resources.extern import packaging @@ -36,6 +38,88 @@ except ImportError: pass +def translate_pattern(glob): + """ + Translate a file path glob like '*.txt' in to a regular expression. + This differs from fnmatch.translate which allows wildcards to match + directory separators. It also knows about '**/' which matches any number of + directories. + """ + pat = '' + + # This will split on '/' within [character classes]. This is deliberate. + chunks = glob.split(os.path.sep) + + sep = re.escape(os.sep) + valid_char = '[^%s]' % (sep,) + + for c, chunk in enumerate(chunks): + last_chunk = c == len(chunks) - 1 + + # Chunks that are a literal ** are globstars. They match anything. + if chunk == '**': + if last_chunk: + # Match anything if this is the last component + pat += '.*' + else: + # Match '(name/)*' + pat += '(?:%s+%s)*' % (valid_char, sep) + continue # Break here as the whole path component has been handled + + # Find any special characters in the remainder + i = 0 + chunk_len = len(chunk) + while i < chunk_len: + char = chunk[i] + if char == '*': + # Match any number of name characters + pat += valid_char + '*' + elif char == '?': + # Match a name character + pat += valid_char + elif char == '[': + # Character class + inner_i = i + 1 + # Skip initial !/] chars + if inner_i < chunk_len and chunk[inner_i] == '!': + inner_i = inner_i + 1 + if inner_i < chunk_len and chunk[inner_i] == ']': + inner_i = inner_i + 1 + + # Loop till the closing ] is found + while inner_i < chunk_len and chunk[inner_i] != ']': + inner_i = inner_i + 1 + + if inner_i >= chunk_len: + # Got to the end of the string without finding a closing ] + # Do not treat this as a matching group, but as a literal [ + pat += re.escape(char) + else: + # Grab the insides of the [brackets] + inner = chunk[i + 1:inner_i] + char_class = '' + + # Class negation + if inner[0] == '!': + char_class = '^' + inner = inner[1:] + + char_class += re.escape(inner) + pat += '[%s]' % (char_class,) + + # Skip to the end ] + i = inner_i + else: + pat += re.escape(char) + i += 1 + + # Join each chunk with the dir separator + if not last_chunk: + pat += sep + + return re.compile(pat + r'\Z(?ms)') + + class egg_info(Command): description = "create a distribution's .egg-info directory" @@ -239,7 +323,151 @@ class egg_info(Command): class FileList(_FileList): - """File list that accepts only existing, platform-independent paths""" + # Implementations of the various MANIFEST.in commands + + def process_template_line(self, line): + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dir_pattern). + (action, patterns, dir, dir_pattern) = self._parse_template_line(line) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + self.debug_print("include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include(pattern): + log.warn("warning: no files found matching '%s'", pattern) + + elif action == 'exclude': + self.debug_print("exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude(pattern): + log.warn(("warning: no previously-included files " + "found matching '%s'"), pattern) + + elif action == 'global-include': + self.debug_print("global-include " + ' '.join(patterns)) + for pattern in patterns: + if not self.global_include(pattern): + log.warn(("warning: no files found matching '%s' " + "anywhere in distribution"), pattern) + + elif action == 'global-exclude': + self.debug_print("global-exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.global_exclude(pattern): + log.warn(("warning: no previously-included files matching " + "'%s' found anywhere in distribution"), + pattern) + + elif action == 'recursive-include': + self.debug_print("recursive-include %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.recursive_include(dir, pattern): + log.warn(("warning: no files found matching '%s' " + "under directory '%s'"), + pattern, dir) + + elif action == 'recursive-exclude': + self.debug_print("recursive-exclude %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.recursive_exclude(dir, pattern): + log.warn(("warning: no previously-included files matching " + "'%s' found under directory '%s'"), + pattern, dir) + + elif action == 'graft': + self.debug_print("graft " + dir_pattern) + if not self.graft(dir_pattern): + log.warn("warning: no directories found matching '%s'", + dir_pattern) + + elif action == 'prune': + self.debug_print("prune " + dir_pattern) + if not self.prune(dir_pattern): + log.warn(("no previously-included directories found " + "matching '%s'"), dir_pattern) + + else: + raise DistutilsInternalError( + "this cannot happen: invalid action '%s'" % action) + + def _remove_files(self, predicate): + """ + Remove all files from the file list that match the predicate. + Return True if any matching files were removed + """ + found = False + for i in range(len(self.files) - 1, -1, -1): + if predicate(self.files[i]): + self.debug_print(" removing " + self.files[i]) + del self.files[i] + found = True + return found + + def include(self, pattern): + """Include files that match 'pattern'.""" + found = [f for f in glob(pattern) if not os.path.isdir(f)] + self.extend(found) + return bool(found) + + def exclude(self, pattern): + """Exclude files that match 'pattern'.""" + match = translate_pattern(pattern) + return self._remove_files(match.match) + + def recursive_include(self, dir, pattern): + """ + Include all files anywhere in 'dir/' that match the pattern. + """ + full_pattern = os.path.join(dir, '**', pattern) + found = [f for f in glob(full_pattern, recursive=True) + if not os.path.isdir(f)] + self.extend(found) + return bool(found) + + def recursive_exclude(self, dir, pattern): + """ + Exclude any file anywhere in 'dir/' that match the pattern. + """ + match = translate_pattern(os.path.join(dir, '**', pattern)) + return self._remove_files(match.match) + + def graft(self, dir): + """Include all files from 'dir/'.""" + found = distutils.filelist.findall(dir) + self.extend(found) + return bool(found) + + def prune(self, dir): + """Filter out files from 'dir/'.""" + match = translate_pattern(os.path.join(dir, '**')) + return self._remove_files(match.match) + + def global_include(self, pattern): + """ + Include all files anywhere in the current directory that match the + pattern. This is very inefficient on large file trees. + """ + if self.allfiles is None: + self.findall() + match = translate_pattern(os.path.join('**', pattern)) + found = [f for f in self.allfiles if match.match(f)] + self.extend(found) + return bool(found) + + def global_exclude(self, pattern): + """ + Exclude all files anywhere that match the pattern. + """ + match = translate_pattern(os.path.join('**', pattern)) + return self._remove_files(match.match) def append(self, item): if item.endswith('\r'): # Fix older sdists built on Windows @@ -302,7 +530,6 @@ class manifest_maker(sdist): self.filelist = FileList() if not os.path.exists(self.manifest): self.write_manifest() # it must exist so it'll get in the list - self.filelist.findall() self.add_defaults() if os.path.exists(self.template): self.read_template() @@ -341,38 +568,13 @@ class manifest_maker(sdist): elif os.path.exists(self.manifest): self.read_manifest() ei_cmd = self.get_finalized_command('egg_info') - self._add_egg_info(cmd=ei_cmd) - self.filelist.include_pattern("*", prefix=ei_cmd.egg_info) - - def _add_egg_info(self, cmd): - """ - Add paths for egg-info files for an external egg-base. - - The egg-info files are written to egg-base. If egg-base is - outside the current working directory, this method - searchs the egg-base directory for files to include - in the manifest. Uses distutils.filelist.findall (which is - really the version monkeypatched in by setuptools/__init__.py) - to perform the search. - - Since findall records relative paths, prefix the returned - paths with cmd.egg_base, so add_default's include_pattern call - (which is looking for the absolute cmd.egg_info) will match - them. - """ - if cmd.egg_base == os.curdir: - # egg-info files were already added by something else - return - - discovered = distutils.filelist.findall(cmd.egg_base) - resolved = (os.path.join(cmd.egg_base, path) for path in discovered) - self.filelist.allfiles.extend(resolved) + self.filelist.graft(ei_cmd.egg_info) def prune_file_list(self): build = self.get_finalized_command('build') base_dir = self.distribution.get_fullname() - self.filelist.exclude_pattern(None, prefix=build.build_base) - self.filelist.exclude_pattern(None, prefix=base_dir) + self.filelist.prune(build.build_base) + self.filelist.prune(base_dir) sep = re.escape(os.sep) self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1) |