From b4ba33898f4d67af70319a0bb64edca72fc3ecee Mon Sep 17 00:00:00 2001 From: Philip Thiem Date: Sat, 20 Jul 2013 17:45:04 -0500 Subject: Additional Tests, Various fixes, and encoding dealings --HG-- extra : rebase_source : 2734e79e08e194923eab8c70f92cb77bce7daccf --- setuptools/svn_utils.py | 376 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 269 insertions(+), 107 deletions(-) (limited to 'setuptools/svn_utils.py') diff --git a/setuptools/svn_utils.py b/setuptools/svn_utils.py index a4c53f15..09aa5e25 100644 --- a/setuptools/svn_utils.py +++ b/setuptools/svn_utils.py @@ -1,31 +1,22 @@ import os import re import sys -import codecs from distutils import log import xml.dom.pulldom +import shlex +import locale +import unicodedata +from setuptools.compat import unicode, bytes try: import urlparse except ImportError: import urllib.parse as urlparse -#requires python >= 2.4 from subprocess import Popen as _Popen, PIPE as _PIPE -#NOTE: Use of the command line options -# require SVN 1.3 or newer (December 2005) -# and SVN 1.3 hsan't been supported by the -# developers since mid 2008. - - -#svnversion return values (previous implementations return max revision) -# 4123:4168 mixed revision working copy -# 4168M modified working copy -# 4123S switched working copy -# 4123:4168MS mixed revision, modified, switched working copy -_SVN_VER_RE = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I) - +#NOTE: Use of the command line options require SVN 1.3 or newer (December 2005) +# and SVN 1.3 hasn't been supported by the developers since mid 2008. #subprocess is called several times with shell=(sys.platform=='win32') #see the follow for more information: @@ -33,27 +24,20 @@ _SVN_VER_RE = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I) # http://stackoverflow.com/questions/5658622/ # python-subprocess-popen-environment-path def _run_command(args, stdout=_PIPE, stderr=_PIPE): - #regarding the shell argument, see: http://bugs.python.org/issue8557 - try: - proc = _Popen(args, stdout=stdout, stderr=stderr, - shell=(sys.platform == 'win32')) - - data = proc.communicate()[0] - except OSError: - return 1, '' - - #TODO: this is probably NOT always utf-8 - try: - data = unicode(data, encoding='utf-8') - except NameError: - data = str(data, encoding='utf-8') + #regarding the shell argument, see: http://bugs.python.org/issue8557 + try: + args = [fsdecode(x) for x in args] + proc = _Popen(args, stdout=stdout, stderr=stderr, + shell=(sys.platform == 'win32')) - #communciate calls wait() - return proc.returncode, data + data = proc.communicate()[0] + except OSError: + return 1, '' + data = consoledecode(data) -def _get_entry_name(entry): - return entry.getAttribute('path') + #communciate calls wait() + return proc.returncode, data def _get_entry_schedule(entry): @@ -63,105 +47,283 @@ def _get_entry_schedule(entry): if t.nodeType == t.TEXT_NODE]) -def parse_revision(path): - code, data = _run_command(['svnversion', '-c', path]) +def _get_target_property(target): + property_text = target.getElementsByTagName('property')[0] + return "".join([t.nodeValue + for t in property_text.childNodes + if t.nodeType == t.TEXT_NODE]) - if code: - log.warn("svnversion failed") - return 0 + +def _get_xml_data(decoded_str): + if sys.version_info < (3, 0): + #old versions want an encoded string + data = decoded_str.encode('utf-8') else: - log.warn('Version: %s' % data.strip()) + data = decoded_str + return data - parsed = _SVN_VER_RE.match(data) - if parsed: - try: - #No max needed this command summarizes working copy since 1.0 - return int(parsed.group(2)) - except ValueError: - #This should only happen if the revision is WAY too big. - pass - return 0 - -#TODO: Need to do this with the -R because only root has .svn in 1.7.x -def parse_dir_entries(path): - code, data = _run_command(['svn', 'info', - '--depth', 'immediates', '--xml', path]) - - if code: - log.warn("svn info failed") - return [] - data = codecs.encode(data, 'UTF-8') +def joinpath(prefix, suffix): + if not prefix or prefix == '.': + return suffix + return os.path.join(prefix, suffix) + + +def fsencode(path): + "Path must be unicode or in file system encoding already" + encoding = sys.getfilesystemencoding() + + if isinstance(path, unicode): + path = path.encode() + elif not isinstance(path, bytes): + raise TypeError('%s is not a string or byte type' + % type(path).__name__) + + #getfilessystemencoding doesn't have the mac-roman issue + if encoding == 'utf-8' and sys.platform == 'darwin': + path = path.decode('utf-8') + path = unicodedata.normalize('NFD', path) + path = path.encode('utf-8') - doc = xml.dom.pulldom.parseString(data) + return path + +def fsdecode(path): + "Path must be unicode or in file system encoding already" + encoding = sys.getfilesystemencoding() + if isinstance(path, bytes): + path = path.decode(encoding) + elif not isinstance(path, unicode): + raise TypeError('%s is not a byte type' + % type(path).__name__) + + return unicodedata.normalize('NFC', path) + +def consoledecode(text): + encoding = locale.getpreferredencoding() + return text.decode(encoding) + + +def parse_dir_entries(decoded_str): + '''Parse the entries from a recursive info xml''' + doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str)) entries = list() + for event, node in doc: if event == 'START_ELEMENT' and node.nodeName == 'entry': doc.expandNode(node) - entries.append(node) - - if entries: - return [ - _get_entry_name(element) - for element in entries[1:] - if _get_entry_schedule(element).lower() != 'deleted' - ] - else: - return [] + if not _get_entry_schedule(node).startswith('delete'): + entries.append((node.getAttribute('path'), + node.getAttribute('kind'))) + return entries[1:] # do not want the root directory -#--xml wasn't supported until 1.5.x need to do -R -#TODO: -R looks like directories are seperated by blank lines -# with dir - prepened to first directory -# what about directories with spaces? -# put quotes around them -# what about the URL's? -# same -# convert to UTF-8 and use csv -# delimiter = space -# -#-R without --xml parses a bit funny -def parse_externals(path): - try: - code, lines = _run_command(['svn', - 'propget', 'svn:externals', path]) - if code: - log.warn("svn propget failed") - return [] +def parse_externals_xml(decoded_str, prefix=''): + '''Parse a propget svn:externals xml''' + prefix = os.path.normpath(prefix) + + doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str)) + externals = list() + + for event, node in doc: + if event == 'START_ELEMENT' and node.nodeName == 'target': + doc.expandNode(node) + path = os.path.normpath(node.getAttribute('path')) + if os.path.normcase(path).startswith(prefix): + path = path[len(prefix)+1:] - lines = [line for line in lines.splitlines() if line] - except ValueError: - lines = [] + data = _get_target_property(node) + for external in parse_external_prop(data): + externals.append(joinpath(path, external)) + return externals # do not want the root directory + + +def parse_external_prop(lines): + """ + Parse the value of a retrieved svn:externals entry. + + possible token setups (with quotng and backscaping in laters versions) + URL[@#] EXT_FOLDERNAME + [-r#] URL EXT_FOLDERNAME + EXT_FOLDERNAME [-r#] URL + """ externals = [] - for line in lines: - line = line.split() + for line in lines.splitlines(): + line = line.lstrip() #there might be a "\ " if not line: continue + if sys.version_info < (3, 0): + #shlex handles NULLs just fine and shlex in 2.7 tries to encode + #as ascii automatiically + line = line.encode('utf-8') + line = shlex.split(line) + if sys.version_info < (3, 0): + line = [x.decode('utf-8') for x in line] + + #EXT_FOLDERNAME is either the first or last depending on where + #the URL falls if urlparse.urlsplit(line[-1])[0]: - externals.append(line[0]) + external = line[0] else: - externals.append(line[-1]) + external = line[-1] + + externals.append(os.path.normpath(external)) return externals -def get_svn_tool_version(): - _, data = _run_command(['svn', '--version', '--quiet']) - if data: - return data.strip() - else: - return '' +class SvnInfo(object): + ''' + Generic svn_info object. No has little knowledge of how to extract + information. Use cls.load to instatiate according svn version. -if __name__ == '__main__': - def entries_externals_finder(dirname): - for record in parse_dir_entries(dirname): - yield os.path.join(dirname, record) + Paths are not filesystem encoded. + ''' - for name in parse_externals(dirname): - yield os.path.join(dirname, name) + @staticmethod + def get_svn_version(): + code, data = _run_command(['svn', '--version', '--quiet']) + if code == 0 and data: + return unicode(data).strip() + else: + return unicode('') + + #svnversion return values (previous implementations return max revision) + # 4123:4168 mixed revision working copy + # 4168M modified working copy + # 4123S switched working copy + # 4123:4168MS mixed revision, modified, switched working copy + revision_re = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I) + + @classmethod + def load(cls, dirname=''): + code, data = _run_command(['svn', 'info', os.path.normpath(dirname)]) + svn_version = tuple(cls.get_svn_version().split('.')) + base_svn_version = tuple(int(x) for x in svn_version[:2]) + if code and base_svn_version: + #Not an SVN repository or compatible one + return SvnInfo(dirname) + elif base_svn_version < (1, 3): + log.warn('Insufficent version of SVN found') + return SvnInfo(dirname) + elif base_svn_version < (1, 5): + return Svn13Info(dirname) + else: + return Svn15Info(dirname) + + def __init__(self, path=''): + self.path = path + self._entries = None + self._externals = None + + def get_revision(self): + 'Retrieve the directory revision informatino using svnversion' + code, data = _run_command(['svnversion', '-c', self.path]) + if code: + log.warn("svnversion failed") + return 0 - for name in entries_externals_finder(sys.argv[1]): + parsed = self.revision_re.match(data) + if parsed: + return int(parsed.group(2)) + else: + return 0 + + @property + def entries(self): + if self._entries is None: + self._entries = self.get_entries() + return self._entries + + @property + def externals(self): + if self._externals is None: + self._externals = self.get_externals() + return self._externals + + def iter_externals(self): + ''' + Iterate over the svn:external references in the repository path. + ''' + for item in self.externals: + yield item + + def iter_files(self): + ''' + Iterate over the non-deleted file entries in the repository path + ''' + for item, kind in self.entries: + if kind.lower()=='file': + yield item + + def iter_dirs(self, include_root=True): + ''' + Iterate over the non-deleted file entries in the repository path + ''' + if include_root: + yield self.path + for item, kind in self.entries: + if kind.lower()=='dir': + yield item + + def get_entries(self): + return [] + + def get_externals(self): + return [] + +class Svn13Info(SvnInfo): + def get_entries(self): + code, data = _run_command(['svn', 'info', '-R', '--xml', self.path]) + + if code: + log.debug("svn info failed") + return [] + + return parse_dir_entries(data) + + def get_externals(self): + #Previous to 1.5 --xml was not supported for svn propget and the -R + #output format breaks the shlex compatible semantics. + cmd = ['svn', 'propget', 'svn:externals'] + result = [] + for folder in self.iter_dirs(): + code, lines = _run_command(cmd + [folder]) + if code != 0: + log.warn("svn propget failed") + return [] + for external in parse_external_prop(lines): + if folder: + external = os.path.join(folder, external) + result.append(os.path.normpath(external)) + + return result + + +class Svn15Info(Svn13Info): + def get_externals(self): + cmd = ['svn', 'propget', 'svn:externals', self.path, '-R', '--xml'] + code, lines = _run_command(cmd) + if code: + log.debug("svn propget failed") + return [] + return parse_externals_xml(lines, prefix=os.path.abspath(self.path)) + + +def svn_finder(dirname=''): + #combined externals due to common interface + #combined externals and entries due to lack of dir_props in 1.7 + info = SvnInfo.load(dirname) + for path in info.iter_files(): + yield fsencode(path) + + for path in info.iter_externals(): + sub_info = SvnInfo.load(path) + for sub_path in sub_info.iter_files(): + yield fsencode(sub_path) + +if __name__ == '__main__': + for name in svn_finder(sys.argv[1]): print(name) \ No newline at end of file -- cgit v1.2.3