summaryrefslogtreecommitdiffstats
path: root/libchrome_tools
diff options
context:
space:
mode:
authorQijiang Fan <fqj@chromium.org>2020-04-20 21:56:57 +0900
committerCommit Bot <commit-bot@chromium.org>2020-05-08 10:13:24 +0000
commite2e7cc8783ffc54d000692b7305a758caf2ffda3 (patch)
treebc9aa09e2e56a33a11589594cb01da63140891f1 /libchrome_tools
parentb3148f5dc5ce72187d35d414b6251a1828cb7492 (diff)
downloadplatform_external_libchrome-e2e7cc8783ffc54d000692b7305a758caf2ffda3.tar.gz
platform_external_libchrome-e2e7cc8783ffc54d000692b7305a758caf2ffda3.tar.bz2
platform_external_libchrome-e2e7cc8783ffc54d000692b7305a758caf2ffda3.zip
add script for history reconnection
BUG=chromium:1048062 TEST=manually run Change-Id: I13e05fe808dd6b8117dd41eb5f17525e9707185e Reviewed-on: https://chromium-review.googlesource.com/c/aosp/platform/external/libchrome/+/2156705 Commit-Queue: Qijiang Fan <fqj@google.com> Tested-by: Qijiang Fan <fqj@google.com> Reviewed-by: Hidehiko Abe <hidehiko@chromium.org>
Diffstat (limited to 'libchrome_tools')
-rw-r--r--libchrome_tools/uprev/lazytree.py22
-rwxr-xr-xlibchrome_tools/uprev/reconnect_history.py333
-rw-r--r--libchrome_tools/uprev/utils.py40
3 files changed, 392 insertions, 3 deletions
diff --git a/libchrome_tools/uprev/lazytree.py b/libchrome_tools/uprev/lazytree.py
index a3c269e27..d76fa5c8c 100644
--- a/libchrome_tools/uprev/lazytree.py
+++ b/libchrome_tools/uprev/lazytree.py
@@ -82,6 +82,28 @@ class LazyTree:
components = path.split(b'/')
self._remove(components)
+ def _get(self, components):
+ """Returns a file at components in utils.GitFile from self tree.
+
+ Args:
+ components: path in list instead of separated by /.
+ """
+ self._loadtree()
+ if len(components) == 1:
+ return self._files[components[0]]
+
+ dirname, components = components[0], components[1:]
+ return self._subtrees[dirname]._get(components)
+
+ def __getitem__(self, path):
+ """Returns a file at path in utils.GitFile from tree.
+
+ Args:
+ path: path of the file to read.
+ """
+ components = path.split(b'/')
+ return self._get(components)
+
def _set(self, components, f):
"""Adds or replace a file.
diff --git a/libchrome_tools/uprev/reconnect_history.py b/libchrome_tools/uprev/reconnect_history.py
new file mode 100755
index 000000000..bb6040c69
--- /dev/null
+++ b/libchrome_tools/uprev/reconnect_history.py
@@ -0,0 +1,333 @@
+#!/usr/bin/env python3
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+Utility to disconnect history of files from a branch, and reconnect with base on
+a different branch.
+"""
+
+import argparse
+import collections
+import subprocess
+import sys
+
+import filtered_utils
+import lazytree
+import utils
+
+
+class CommitMetadataFactory(dict):
+ """Dict-like class to read commit metadata"""
+
+ def __missing__(self, key):
+ """Reads commit metadata if missing"""
+ value = filtered_utils.get_metadata(key)
+ self.__setitem__(key, value)
+ return value
+
+
+def disconnect(source_commit, ref_commit):
+ """Creates a commit that disconnects files from source_commit.
+
+ All files existing in ref_commit will be removed from source_commit.
+
+ Args:
+ source_commit: commit hash to disconnect from.
+ ref_commit: commit hash to be a file list reference.
+ """
+ source_files = utils.get_file_list(source_commit)
+ ref_files = utils.get_file_list(ref_commit)
+ ref_files_set = set(ref.path for ref in ref_files)
+ kept_files = [ref for ref in source_files if ref.path not in ref_files_set]
+ tree = utils.git_mktree(kept_files)
+ return utils.git_commit(
+ tree, [source_commit],
+ message=b'Disconnect history from %s' % (source_commit.encode('ascii')))
+
+
+def connect_base(current_commit, base_commit):
+ """Creates a merge commit that takes files from base_commit.
+
+ Literally it's identical to git merge base_commit in current_commit.
+
+ Args:
+ current_commit: commit hashes on where to commit to.
+ base_commit: commit hashes contains file histories.
+ """
+ current_files = utils.get_file_list(current_commit)
+ base_files = utils.get_file_list(base_commit)
+ tree = utils.git_mktree(current_files + base_files)
+ return utils.git_commit(
+ tree, [current_commit, base_commit],
+ message=b'Connect history with base %s' % (base_commit.encode('ascii')))
+
+
+def blame_files(commithash, files):
+ """Blames files on givven commithash"""
+ blames = {}
+ for path in files:
+ blames[path] = utils.git_blame(commithash, path)
+ return blames
+
+
+def search_blame_line(blames, amend_commits, target_commit_hash):
+ """Searches blames matching target_commit_hash in amend_commits
+
+ Returns a map from file path to a list of tuple, each tuple has
+ len(amend_commits) + 1 elements. 0-th element is the line in blames. and
+ 1st to n-th element are corresponding lines in amend_commits blaems.
+
+ Args:
+ blames: a dict from path to list of GitBlameLine, for files blamed on
+ target_commit_hash.
+ amend_commits: a list of commit hashes to provide actual history.
+ target_commit_hash: commit hash that blames are blaemd on.
+ """
+ blames_combined = {}
+ for blame_file_path, blame_file in blames.items():
+ blames_amend = [
+ utils.git_blame(commit, blame_file_path) for commit in amend_commits
+ ]
+ blames_combined[blame_file_path] = [
+ blame_combined for blame_combined in zip(blame_file, *blames_amend)
+ if blame_combined[0].commit == target_commit_hash
+ ]
+ return blames_combined
+
+
+def get_track_from_blames(blames_combined, virtual_goal_commit, amend_commits,
+ commit_choice_cache, commit_msg_cache):
+ """Blames diffs and locate the amend commits.
+
+ Returns a tuple containing:
+ - a set of commit hashes in amend_commits tree;
+ - a line-by-line mapping for files in diff to commit hashes in
+ amend_commits tree of diffed lines.
+
+ Args:
+ blames_combined: a map from path to a list of tuple. each tuple reflect
+ one line, and has len(amend_commits)+1 elements. See more details in
+ search_blame_line.
+ virtual_goal_commit: a commit that contains no useful history for diffs.
+ amend_commits: list of HEAD commit hashes that refers to tree that can
+ amend the diffs.
+ commit_choice_cache: caches user choice on which amend commit to use.
+ commit_msg_cache: caches commit metadata.
+ """
+ blame_untracked_lines = {}
+ commits_to_track = set()
+
+ for blame_file_path, blame_lines in blames_combined.items():
+ blame_untracked_lines[blame_file_path] = []
+ for blame_line in blame_lines:
+ original_commits = tuple(
+ blame_amend.commit for blame_amend in list(blame_line)[1:])
+ chosen = commit_choice_cache.get(original_commits)
+ if chosen is None:
+ for idx, original_commit in enumerate(original_commits):
+ print('%d: %s' % (idx,
+ commit_msg_cache[original_commit].title))
+ # No validation on user_choice since no untrusted user.
+ # Also the developer can rerun if entered wrongly by accident.
+ user_choice = int(input('Choose patch: '))
+ chosen = original_commits[user_choice]
+ commit_choice_cache[original_commits] = chosen
+ commits_to_track.add(chosen)
+ blame_untracked_lines[blame_file_path].append((blame_line[0],
+ chosen))
+
+ return commits_to_track, blame_untracked_lines
+
+
+def reconstruct_file(blame_goal, blame_base, lines_to_reconstruct,
+ virtual_goal_commit):
+ """Reconstrucs a file to reflect changes in lines_to_reconstruct.
+
+ Takes lines to blame_base, and blame_goal it belongs lines_to_reconstruct.
+ It also deletes removed lines nearby.
+
+ Returns a binary for the new file content.
+
+ Args:
+ blame_goal: a list of utils.GitBlameLine blaming the file on
+ virtual_goal_commit.
+ blame_base: a list of utils.GitBlameLine blaming the file on last
+ commited commit.
+ lines_to_reconstruct: only to reconstruct these lines, instead of
+ everything in blame_goal. It is represented in a list of
+ GitBlameLine.
+ virtual_goal_commit: commit hash where blame_goal is based on.
+ """
+ idx_base, idx_goal = 0, 0
+ reconstructed_file = []
+
+ print('Changed lines are', [line.data for line in lines_to_reconstruct])
+ line_iter = iter(lines_to_reconstruct)
+ line = next(line_iter, None)
+ while idx_base < len(blame_base) or idx_goal< len(blame_goal):
+ # Both sides are idendical. We can't compare blame_base, and line
+ # directly due to blame commit difference could end up different lineno.
+ if (idx_base < len(blame_base) and
+ blame_base[idx_base].data == blame_goal[idx_goal].data and
+ blame_base[idx_base].commit == blame_goal[idx_goal].commit):
+ # We append this line if both sides are identical.
+ reconstructed_file.append(blame_base[idx_base].data)
+ idx_base += 1
+ idx_goal += 1
+ should_skip_base = False
+ elif line and blame_goal[idx_goal] == line:
+ # We append the line from goal, if blame_goal[idx_goal] is the line
+ # we're interested in.
+ reconstructed_file.append(line.data)
+ line = next(line_iter, None)
+ idx_goal += 1
+ should_skip_base = True
+ elif blame_goal[idx_goal].commit == virtual_goal_commit:
+ # We skip the line from goal, if the change in not in the commit
+ # we're interested. Thus, changed lines in other commits will not be
+ # reflected.
+ idx_goal += 1
+ else:
+ # We should skip base if we just appended some lines from goal.
+ # This would treat modified lines and append first and skip later.
+ # If we didn't append something from goal, lines from base should be
+ # preserved because the modified lines are not in the commit we're
+ # currently interested in.
+ if not should_skip_base:
+ reconstructed_file.append(blame_base[idx_base].data)
+ idx_base += 1
+
+ return b''.join([line + b'\n' for line in reconstructed_file])
+
+
+def reconstruct_files(track_commit, blame_untracked_lines, blames,
+ current_base_commit, virtual_goal_commit):
+ """Reconstructs files to reflect changes in track_commit.
+
+ Returns a map from file path to file content for reconstructed files.
+
+ Args:
+ track_commit: commit hashes to track, and reconstruct from.
+ blame_untracked_lines: a line-by-line mapping regarding selected amend
+ commits for diffs. see get_track_from_blames for more.
+ blames: a map from filename to list of utils.GitBlameLine
+ current_base_commit: commit hashes for HEAD of base that contains base
+ history + already committed amend history.
+ virtual_goal_commit: commit hash for one giant commit that has no
+ history. virtual_goal_commit is one commit ahead of
+ current_base_commit.
+ """
+ lines_to_track = collections.defaultdict(list)
+ for file, lines in blame_untracked_lines.items():
+ for line in lines:
+ if line[1] == track_commit:
+ lines_to_track[file].append(line[0])
+ constructed_files = {}
+ for current_file, current_file_lines in lines_to_track.items():
+ print('Reconstructing', current_file, 'for', track_commit)
+ blame_base = utils.git_blame(current_base_commit, current_file)
+ constructed_files[current_file] = reconstruct_file(
+ blames[current_file], blame_base, current_file_lines,
+ virtual_goal_commit)
+ return constructed_files
+
+
+def main():
+ # Init args
+ parser = argparse.ArgumentParser(description='Reconnect git history')
+ parser.add_argument(
+ 'disconnect_from',
+ metavar='disconnect_from',
+ type=str,
+ nargs=1,
+ help='disconnect history from this commit')
+ parser.add_argument(
+ 'base_commit',
+ metavar='base_commit',
+ type=str,
+ nargs=1,
+ help='base commit to use the history')
+ parser.add_argument(
+ 'amend_commits',
+ metavar='amend_commits',
+ type=str,
+ nargs='+',
+ help='commits to amend histories from base_commit')
+
+ arg = parser.parse_args(sys.argv[1:])
+ empty_commit = disconnect(arg.disconnect_from[0], arg.base_commit[0])
+ connected_base = connect_base(empty_commit, arg.base_commit[0])
+
+ commit_msg_cache = CommitMetadataFactory()
+ commit_choice_cache = {}
+ last_commit = connected_base
+ # In each iteration of the loop, it
+ # - re-create the new goal commit, (base + committed history + (one giant)
+ # uncommited history).
+ # - blame on new goal commit and tot of amend commits. map line-by-line
+ # from uncommited to past histories.
+ # - choose one of the past commits, reconstruct files to reflect changes in
+ # that commit, and create a new commits.
+ # last_commit, commit_msg_cache, commit_choice_cache will be persistent
+ # across iteratins.
+ while True:
+ # One commit is processed per iteration.
+
+ # Create virtual target commit, and its diff.
+ virtual_goal = utils.git_commit(arg.disconnect_from[0] + '^{tree}',
+ [last_commit])
+ diffs = utils.git_difftree(None, virtual_goal)
+ if not diffs:
+ print('No diffs are found between %s and goal.' %
+ (last_commit.decode('ascii'),))
+ break
+
+ blames = blame_files(virtual_goal,
+ [diff.file.path for diff in diffs])
+ blames_combined = search_blame_line(blames, arg.amend_commits,
+ virtual_goal)
+
+ commits_to_track, blame_untracked_lines = get_track_from_blames(
+ blames_combined, virtual_goal, arg.amend_commits,
+ commit_choice_cache, commit_msg_cache)
+ if not commits_to_track:
+ print('no commits to track, stopping')
+ break
+
+ # Stablely choose one commit from commits_to_track, and reconstruct it.
+ track_commit = min(commits_to_track)
+ print('Reconstructing commit %s: %s' %
+ (track_commit, commit_msg_cache[track_commit].title))
+ constructed_files = reconstruct_files(track_commit,
+ blame_untracked_lines, blames,
+ last_commit, virtual_goal)
+
+ # Mktree and commit with re-constructed_files.
+ tree = lazytree.LazyTree(filtered_utils.get_metadata(last_commit).tree)
+ for filename, filedata in constructed_files.items():
+ blob = subprocess.check_output(
+ ['git', 'hash-object', '-w', '/dev/stdin'],
+ input=filedata).strip()
+ tree[filename] = utils.GitFile(filename, tree[filename].mode, blob)
+ meta = commit_msg_cache[track_commit]
+ last_commit = utils.git_commit(
+ tree.hash(), [last_commit],
+ (meta.message + b'\n(Reconstructed from ' + track_commit + b')\n'),
+ dict(
+ GIT_AUTHOR_NAME=meta.authorship.name,
+ GIT_AUTHOR_EMAIL=meta.authorship.email,
+ GIT_AUTHOR_DATE=b' '.join(
+ [meta.authorship.time, meta.authorship.timezone])))
+ print('Reconstructed as', last_commit)
+ # Make last commit for history reconstruction.
+ print(
+ utils.git_commit(
+ filtered_utils.get_metadata(arg.disconnect_from[0]).tree,
+ [last_commit],
+ b'Finished history reconstruction\n\nRemoving unnecessary lines\n'))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/libchrome_tools/uprev/utils.py b/libchrome_tools/uprev/utils.py
index 3b19cea10..cf3c2a4b5 100644
--- a/libchrome_tools/uprev/utils.py
+++ b/libchrome_tools/uprev/utils.py
@@ -28,6 +28,11 @@ GitDiffTree = collections.namedtuple(
['op', 'file',]
)
+GitBlameLine = collections.namedtuple(
+ 'GitBlameLine',
+ ['data', 'commit', 'old_line', 'new_line',]
+)
+
GIT_DIFFTREE_RE_LINE = re.compile(rb'^:([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)\t(.*)$')
@@ -89,7 +94,7 @@ def git_difftree(treeish1, treeish2):
if treeish1 is None:
# Remove first line since it's tree hash printed.
out = subprocess.check_output(['git', 'diff-tree', '-r',
- treeish2]).strip(b'\n')[1:]
+ treeish2]).split(b'\n')[1:]
else:
out = subprocess.check_output(['git', 'diff-tree', '-r',
treeish1, treeish2]).split(b'\n')
@@ -180,12 +185,14 @@ def git_mktree(files):
return _mktree([], tree)
-def git_commit(tree, parents):
+def git_commit(tree, parents, message=b"", extra_env={}):
"""Creates a commit.
Args:
tree: tree object id.
parents: parent commit id.
+ message: commit message.
+ extra_env: extra environment variables passed to git.
"""
parent_args = []
for parent in parents:
@@ -193,7 +200,8 @@ def git_commit(tree, parents):
parent_args.append(parent)
return subprocess.check_output(
['git', 'commit-tree', tree] + parent_args,
- stdin=subprocess.DEVNULL).strip(b'\n')
+ input=message,
+ env=dict(os.environ, **extra_env)).strip(b'\n')
def git_revlist(from_commit, to_commit):
@@ -225,3 +233,29 @@ def git_revlist(from_commit, to_commit):
hashes = line.split(b' ')
commits.append((hashes[0], hashes[1:]))
return list(reversed(commits))
+
+
+def git_blame(commit, filepath):
+ """Returns line-by-line git blame.
+
+ Return value is represented by a list of GitBlameLine.
+
+ Args:
+ commit: commit hash to blame at.
+ filepath: file to blame.
+ """
+ output = subprocess.check_output(['git', 'blame', '-p',
+ commit, filepath])
+ commit, old_line, new_line = None, None, None
+ blames = []
+ COMMIT_LINE_PREFIX = re.compile(b'^[0-9a-f]* ')
+ for line in output.split(b'\n'):
+ if not line:
+ continue
+ if line[0] == ord(b'\t'):
+ assert commit != None
+ blames.append(GitBlameLine(line[1:], commit, old_line, new_line))
+ commit, old_line, new_line = None, None, None
+ elif COMMIT_LINE_PREFIX.match(line):
+ commit, old_line, new_line = line.split(b' ', 3)[0:3]
+ return blames