aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xsetuptools/command/egg_info.py262
-rw-r--r--setuptools/glob.py165
-rw-r--r--setuptools/tests/test_manifest.py68
3 files changed, 451 insertions, 44 deletions
diff --git a/setuptools/command/egg_info.py b/setuptools/command/egg_info.py
index 5183eedc..6cc8f4c4 100755
--- a/setuptools/command/egg_info.py
+++ b/setuptools/command/egg_info.py
@@ -3,6 +3,7 @@
Create a distribution's .egg-info directory and contents"""
from distutils.filelist import FileList as _FileList
+from distutils.errors import DistutilsInternalError
from distutils.util import convert_path
from distutils import log
import distutils.errors
@@ -27,6 +28,7 @@ from pkg_resources import (
parse_requirements, safe_name, parse_version,
safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
import setuptools.unicode_utils as unicode_utils
+from setuptools.glob import glob
from pkg_resources.extern import packaging
@@ -36,6 +38,88 @@ except ImportError:
pass
+def translate_pattern(glob):
+ """
+ Translate a file path glob like '*.txt' in to a regular expression.
+ This differs from fnmatch.translate which allows wildcards to match
+ directory separators. It also knows about '**/' which matches any number of
+ directories.
+ """
+ pat = ''
+
+ # This will split on '/' within [character classes]. This is deliberate.
+ chunks = glob.split(os.path.sep)
+
+ sep = re.escape(os.sep)
+ valid_char = '[^%s]' % (sep,)
+
+ for c, chunk in enumerate(chunks):
+ last_chunk = c == len(chunks) - 1
+
+ # Chunks that are a literal ** are globstars. They match anything.
+ if chunk == '**':
+ if last_chunk:
+ # Match anything if this is the last component
+ pat += '.*'
+ else:
+ # Match '(name/)*'
+ pat += '(?:%s+%s)*' % (valid_char, sep)
+ continue # Break here as the whole path component has been handled
+
+ # Find any special characters in the remainder
+ i = 0
+ chunk_len = len(chunk)
+ while i < chunk_len:
+ char = chunk[i]
+ if char == '*':
+ # Match any number of name characters
+ pat += valid_char + '*'
+ elif char == '?':
+ # Match a name character
+ pat += valid_char
+ elif char == '[':
+ # Character class
+ inner_i = i + 1
+ # Skip initial !/] chars
+ if inner_i < chunk_len and chunk[inner_i] == '!':
+ inner_i = inner_i + 1
+ if inner_i < chunk_len and chunk[inner_i] == ']':
+ inner_i = inner_i + 1
+
+ # Loop till the closing ] is found
+ while inner_i < chunk_len and chunk[inner_i] != ']':
+ inner_i = inner_i + 1
+
+ if inner_i >= chunk_len:
+ # Got to the end of the string without finding a closing ]
+ # Do not treat this as a matching group, but as a literal [
+ pat += re.escape(char)
+ else:
+ # Grab the insides of the [brackets]
+ inner = chunk[i + 1:inner_i]
+ char_class = ''
+
+ # Class negation
+ if inner[0] == '!':
+ char_class = '^'
+ inner = inner[1:]
+
+ char_class += re.escape(inner)
+ pat += '[%s]' % (char_class,)
+
+ # Skip to the end ]
+ i = inner_i
+ else:
+ pat += re.escape(char)
+ i += 1
+
+ # Join each chunk with the dir separator
+ if not last_chunk:
+ pat += sep
+
+ return re.compile(pat + r'\Z(?ms)')
+
+
class egg_info(Command):
description = "create a distribution's .egg-info directory"
@@ -239,7 +323,151 @@ class egg_info(Command):
class FileList(_FileList):
- """File list that accepts only existing, platform-independent paths"""
+ # Implementations of the various MANIFEST.in commands
+
+ def process_template_line(self, line):
+ # Parse the line: split it up, make sure the right number of words
+ # is there, and return the relevant words. 'action' is always
+ # defined: it's the first word of the line. Which of the other
+ # three are defined depends on the action; it'll be either
+ # patterns, (dir and patterns), or (dir_pattern).
+ (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
+
+ # OK, now we know that the action is valid and we have the
+ # right number of words on the line for that action -- so we
+ # can proceed with minimal error-checking.
+ if action == 'include':
+ self.debug_print("include " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.include(pattern):
+ log.warn("warning: no files found matching '%s'", pattern)
+
+ elif action == 'exclude':
+ self.debug_print("exclude " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.exclude(pattern):
+ log.warn(("warning: no previously-included files "
+ "found matching '%s'"), pattern)
+
+ elif action == 'global-include':
+ self.debug_print("global-include " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.global_include(pattern):
+ log.warn(("warning: no files found matching '%s' "
+ "anywhere in distribution"), pattern)
+
+ elif action == 'global-exclude':
+ self.debug_print("global-exclude " + ' '.join(patterns))
+ for pattern in patterns:
+ if not self.global_exclude(pattern):
+ log.warn(("warning: no previously-included files matching "
+ "'%s' found anywhere in distribution"),
+ pattern)
+
+ elif action == 'recursive-include':
+ self.debug_print("recursive-include %s %s" %
+ (dir, ' '.join(patterns)))
+ for pattern in patterns:
+ if not self.recursive_include(dir, pattern):
+ log.warn(("warning: no files found matching '%s' "
+ "under directory '%s'"),
+ pattern, dir)
+
+ elif action == 'recursive-exclude':
+ self.debug_print("recursive-exclude %s %s" %
+ (dir, ' '.join(patterns)))
+ for pattern in patterns:
+ if not self.recursive_exclude(dir, pattern):
+ log.warn(("warning: no previously-included files matching "
+ "'%s' found under directory '%s'"),
+ pattern, dir)
+
+ elif action == 'graft':
+ self.debug_print("graft " + dir_pattern)
+ if not self.graft(dir_pattern):
+ log.warn("warning: no directories found matching '%s'",
+ dir_pattern)
+
+ elif action == 'prune':
+ self.debug_print("prune " + dir_pattern)
+ if not self.prune(dir_pattern):
+ log.warn(("no previously-included directories found "
+ "matching '%s'"), dir_pattern)
+
+ else:
+ raise DistutilsInternalError(
+ "this cannot happen: invalid action '%s'" % action)
+
+ def _remove_files(self, predicate):
+ """
+ Remove all files from the file list that match the predicate.
+ Return True if any matching files were removed
+ """
+ found = False
+ for i in range(len(self.files) - 1, -1, -1):
+ if predicate(self.files[i]):
+ self.debug_print(" removing " + self.files[i])
+ del self.files[i]
+ found = True
+ return found
+
+ def include(self, pattern):
+ """Include files that match 'pattern'."""
+ found = [f for f in glob(pattern) if not os.path.isdir(f)]
+ self.extend(found)
+ return bool(found)
+
+ def exclude(self, pattern):
+ """Exclude files that match 'pattern'."""
+ match = translate_pattern(pattern)
+ return self._remove_files(match.match)
+
+ def recursive_include(self, dir, pattern):
+ """
+ Include all files anywhere in 'dir/' that match the pattern.
+ """
+ full_pattern = os.path.join(dir, '**', pattern)
+ found = [f for f in glob(full_pattern, recursive=True)
+ if not os.path.isdir(f)]
+ self.extend(found)
+ return bool(found)
+
+ def recursive_exclude(self, dir, pattern):
+ """
+ Exclude any file anywhere in 'dir/' that match the pattern.
+ """
+ match = translate_pattern(os.path.join(dir, '**', pattern))
+ return self._remove_files(match.match)
+
+ def graft(self, dir):
+ """Include all files from 'dir/'."""
+ found = distutils.filelist.findall(dir)
+ self.extend(found)
+ return bool(found)
+
+ def prune(self, dir):
+ """Filter out files from 'dir/'."""
+ match = translate_pattern(os.path.join(dir, '**'))
+ return self._remove_files(match.match)
+
+ def global_include(self, pattern):
+ """
+ Include all files anywhere in the current directory that match the
+ pattern. This is very inefficient on large file trees.
+ """
+ if self.allfiles is None:
+ self.findall()
+ match = translate_pattern(os.path.join('**', pattern))
+ found = [f for f in self.allfiles if match.match(f)]
+ self.extend(found)
+ return bool(found)
+
+ def global_exclude(self, pattern):
+ """
+ Exclude all files anywhere that match the pattern.
+ """
+ match = translate_pattern(os.path.join('**', pattern))
+ return self._remove_files(match.match)
def append(self, item):
if item.endswith('\r'): # Fix older sdists built on Windows
@@ -302,7 +530,6 @@ class manifest_maker(sdist):
self.filelist = FileList()
if not os.path.exists(self.manifest):
self.write_manifest() # it must exist so it'll get in the list
- self.filelist.findall()
self.add_defaults()
if os.path.exists(self.template):
self.read_template()
@@ -341,38 +568,13 @@ class manifest_maker(sdist):
elif os.path.exists(self.manifest):
self.read_manifest()
ei_cmd = self.get_finalized_command('egg_info')
- self._add_egg_info(cmd=ei_cmd)
- self.filelist.include_pattern("*", prefix=ei_cmd.egg_info)
-
- def _add_egg_info(self, cmd):
- """
- Add paths for egg-info files for an external egg-base.
-
- The egg-info files are written to egg-base. If egg-base is
- outside the current working directory, this method
- searchs the egg-base directory for files to include
- in the manifest. Uses distutils.filelist.findall (which is
- really the version monkeypatched in by setuptools/__init__.py)
- to perform the search.
-
- Since findall records relative paths, prefix the returned
- paths with cmd.egg_base, so add_default's include_pattern call
- (which is looking for the absolute cmd.egg_info) will match
- them.
- """
- if cmd.egg_base == os.curdir:
- # egg-info files were already added by something else
- return
-
- discovered = distutils.filelist.findall(cmd.egg_base)
- resolved = (os.path.join(cmd.egg_base, path) for path in discovered)
- self.filelist.allfiles.extend(resolved)
+ self.filelist.graft(ei_cmd.egg_info)
def prune_file_list(self):
build = self.get_finalized_command('build')
base_dir = self.distribution.get_fullname()
- self.filelist.exclude_pattern(None, prefix=build.build_base)
- self.filelist.exclude_pattern(None, prefix=base_dir)
+ self.filelist.prune(build.build_base)
+ self.filelist.prune(base_dir)
sep = re.escape(os.sep)
self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep,
is_regex=1)
diff --git a/setuptools/glob.py b/setuptools/glob.py
new file mode 100644
index 00000000..f51b9c83
--- /dev/null
+++ b/setuptools/glob.py
@@ -0,0 +1,165 @@
+"""
+Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
+
+Changes include:
+ * `yield from` and PEP3102 `*` removed.
+ * `bytes` changed to `six.binary_type`.
+ * Hidden files are not ignored.
+"""
+
+import os
+import re
+import fnmatch
+from setuptools.extern.six import binary_type
+
+__all__ = ["glob", "iglob", "escape"]
+
+def glob(pathname, recursive=False):
+ """Return a list of paths matching a pathname pattern.
+
+ The pattern may contain simple shell-style wildcards a la
+ fnmatch. However, unlike fnmatch, filenames starting with a
+ dot are special cases that are not matched by '*' and '?'
+ patterns.
+
+ If recursive is true, the pattern '**' will match any files and
+ zero or more directories and subdirectories.
+ """
+ return list(iglob(pathname, recursive=recursive))
+
+def iglob(pathname, recursive=False):
+ """Return an iterator which yields the paths matching a pathname pattern.
+
+ The pattern may contain simple shell-style wildcards a la
+ fnmatch. However, unlike fnmatch, filenames starting with a
+ dot are special cases that are not matched by '*' and '?'
+ patterns.
+
+ If recursive is true, the pattern '**' will match any files and
+ zero or more directories and subdirectories.
+ """
+ it = _iglob(pathname, recursive)
+ if recursive and _isrecursive(pathname):
+ s = next(it) # skip empty string
+ assert not s
+ return it
+
+def _iglob(pathname, recursive):
+ dirname, basename = os.path.split(pathname)
+ if not has_magic(pathname):
+ if basename:
+ if os.path.lexists(pathname):
+ yield pathname
+ else:
+ # Patterns ending with a slash should match only directories
+ if os.path.isdir(dirname):
+ yield pathname
+ return
+ if not dirname:
+ if recursive and _isrecursive(basename):
+ for x in glob2(dirname, basename):
+ yield x
+ else:
+ for x in glob1(dirname, basename):
+ yield x
+ return
+ # `os.path.split()` returns the argument itself as a dirname if it is a
+ # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
+ # contains magic characters (i.e. r'\\?\C:').
+ if dirname != pathname and has_magic(dirname):
+ dirs = _iglob(dirname, recursive)
+ else:
+ dirs = [dirname]
+ if has_magic(basename):
+ if recursive and _isrecursive(basename):
+ glob_in_dir = glob2
+ else:
+ glob_in_dir = glob1
+ else:
+ glob_in_dir = glob0
+ for dirname in dirs:
+ for name in glob_in_dir(dirname, basename):
+ yield os.path.join(dirname, name)
+
+# These 2 helper functions non-recursively glob inside a literal directory.
+# They return a list of basenames. `glob1` accepts a pattern while `glob0`
+# takes a literal basename (so it only has to check for its existence).
+
+def glob1(dirname, pattern):
+ if not dirname:
+ if isinstance(pattern, binary_type):
+ dirname = os.curdir.encode('ASCII')
+ else:
+ dirname = os.curdir
+ try:
+ names = os.listdir(dirname)
+ except OSError:
+ return []
+ return fnmatch.filter(names, pattern)
+
+def glob0(dirname, basename):
+ if not basename:
+ # `os.path.split()` returns an empty basename for paths ending with a
+ # directory separator. 'q*x/' should match only directories.
+ if os.path.isdir(dirname):
+ return [basename]
+ else:
+ if os.path.lexists(os.path.join(dirname, basename)):
+ return [basename]
+ return []
+
+# This helper function recursively yields relative pathnames inside a literal
+# directory.
+
+def glob2(dirname, pattern):
+ assert _isrecursive(pattern)
+ yield pattern[:0]
+ for x in _rlistdir(dirname):
+ yield x
+
+
+# Recursively yields relative pathnames inside a literal directory.
+def _rlistdir(dirname):
+ if not dirname:
+ if isinstance(dirname, binary_type):
+ dirname = binary_type(os.curdir, 'ASCII')
+ else:
+ dirname = os.curdir
+ try:
+ names = os.listdir(dirname)
+ except os.error:
+ return
+ for x in names:
+ yield x
+ path = os.path.join(dirname, x) if dirname else x
+ for y in _rlistdir(path):
+ yield os.path.join(x, y)
+
+
+magic_check = re.compile('([*?[])')
+magic_check_bytes = re.compile(b'([*?[])')
+
+def has_magic(s):
+ if isinstance(s, binary_type):
+ match = magic_check_bytes.search(s)
+ else:
+ match = magic_check.search(s)
+ return match is not None
+
+def _isrecursive(pattern):
+ if isinstance(pattern, binary_type):
+ return pattern == b'**'
+ else:
+ return pattern == '**'
+
+def escape(pathname):
+ """Escape all special characters.
+ """
+ # Escaping is done by wrapping any of "*?[" between square brackets.
+ # Metacharacters do not work in the drive part and shouldn't be escaped.
+ drive, pathname = os.path.splitdrive(pathname)
+ if isinstance(pathname, binary_type):
+ pathname = magic_check_bytes.sub(br'[\1]', pathname)
+ else:
+ pathname = magic_check.sub(r'[\1]', pathname)
+ return drive + pathname
diff --git a/setuptools/tests/test_manifest.py b/setuptools/tests/test_manifest.py
index 6360270d..558de2c7 100644
--- a/setuptools/tests/test_manifest.py
+++ b/setuptools/tests/test_manifest.py
@@ -9,7 +9,7 @@ import tempfile
from distutils import log
from distutils.errors import DistutilsTemplateError
-from setuptools.command.egg_info import FileList, egg_info
+from setuptools.command.egg_info import FileList, egg_info, translate_pattern
from setuptools.dist import Distribution
from setuptools.extern import six
from setuptools.tests.textwrap import DALS
@@ -66,6 +66,34 @@ default_files = frozenset(map(make_local_path, [
]))
+def get_pattern(glob):
+ return translate_pattern(make_local_path(glob)).pattern
+
+
+def test_translated_pattern_test():
+ l = make_local_path
+ assert get_pattern('foo') == r'foo\Z(?ms)'
+ assert get_pattern(l('foo/bar')) == l(r'foo\/bar\Z(?ms)')
+
+ # Glob matching
+ assert get_pattern('*.txt') == l(r'[^\/]*\.txt\Z(?ms)')
+ assert get_pattern('dir/*.txt') == l(r'dir\/[^\/]*\.txt\Z(?ms)')
+ assert get_pattern('*/*.py') == l(r'[^\/]*\/[^\/]*\.py\Z(?ms)')
+ assert get_pattern('docs/page-?.txt') \
+ == l(r'docs\/page\-[^\/]\.txt\Z(?ms)')
+
+ # Globstars change what they mean depending upon where they are
+ assert get_pattern(l('foo/**/bar')) == l(r'foo\/(?:[^\/]+\/)*bar\Z(?ms)')
+ assert get_pattern(l('foo/**')) == l(r'foo\/.*\Z(?ms)')
+ assert get_pattern(l('**')) == r'.*\Z(?ms)'
+
+ # Character classes
+ assert get_pattern('pre[one]post') == r'pre[one]post\Z(?ms)'
+ assert get_pattern('hello[!one]world') == r'hello[^one]world\Z(?ms)'
+ assert get_pattern('[]one].txt') == r'[\]one]\.txt\Z(?ms)'
+ assert get_pattern('foo[!]one]bar') == r'foo[^\]one]bar\Z(?ms)'
+
+
class TempDirTestCase(object):
def setup_method(self, method):
@@ -346,23 +374,21 @@ class TestFileListTest(TempDirTestCase):
def test_include_pattern(self):
# return False if no match
file_list = FileList()
- file_list.set_allfiles([])
+ self.make_files([])
assert not file_list.include_pattern('*.py')
# return True if files match
file_list = FileList()
- file_list.set_allfiles(['a.py', 'b.txt'])
+ self.make_files(['a.py', 'b.txt'])
assert file_list.include_pattern('*.py')
# test * matches all files
file_list = FileList()
- assert file_list.allfiles is None
- file_list.set_allfiles(['a.py', 'b.txt'])
+ self.make_files(['a.py', 'b.txt'])
file_list.include_pattern('*')
- assert file_list.allfiles == ['a.py', 'b.txt']
+ assert file_list.files == ['a.py', 'b.txt']
- def test_process_template(self):
- l = make_local_path
+ def test_process_template_line_invalid(self):
# invalid lines
file_list = FileList()
for action in ('include', 'exclude', 'global-include',
@@ -377,9 +403,11 @@ class TestFileListTest(TempDirTestCase):
else:
assert False, "Should have thrown an error"
+ def test_include(self):
+ l = make_local_path
# include
file_list = FileList()
- file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
+ self.make_files(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('include *.py')
assert file_list.files == ['a.py']
@@ -389,6 +417,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py']
self.assertWarnings()
+ def test_exclude(self):
+ l = make_local_path
# exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
@@ -401,9 +431,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['b.txt', l('d/c.py')]
self.assertWarnings()
+ def test_global_include(self):
+ l = make_local_path
# global-include
file_list = FileList()
- file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
+ self.make_files(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('global-include *.py')
assert file_list.files == ['a.py', l('d/c.py')]
@@ -413,6 +445,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py', l('d/c.py')]
self.assertWarnings()
+ def test_global_exclude(self):
+ l = make_local_path
# global-exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
@@ -425,10 +459,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['b.txt']
self.assertWarnings()
+ def test_recursive_include(self):
+ l = make_local_path
# recursive-include
file_list = FileList()
- file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
- l('d/d/e.py')])
+ self.make_files(['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')])
file_list.process_template_line('recursive-include d *.py')
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
@@ -438,6 +473,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
self.assertWarnings()
+ def test_recursive_exclude(self):
+ l = make_local_path
# recursive-exclude
file_list = FileList()
file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
@@ -450,10 +487,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py', l('d/c.txt')]
self.assertWarnings()
+ def test_graft(self):
+ l = make_local_path
# graft
file_list = FileList()
- file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
- l('f/f.py')])
+ self.make_files(['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')])
file_list.process_template_line('graft d')
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
@@ -463,6 +501,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
self.assertWarnings()
+ def test_prune(self):
+ l = make_local_path
# prune
file_list = FileList()
file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]