diff options
Diffstat (limited to 'compiler/optimizing/parallel_move_resolver.cc')
-rw-r--r-- | compiler/optimizing/parallel_move_resolver.cc | 309 |
1 files changed, 284 insertions, 25 deletions
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index ad92ca59a1..54ea6f19d4 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -17,11 +17,23 @@ #include "parallel_move_resolver.h" #include "nodes.h" -#include "locations.h" namespace art { -void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { +void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { + // Perform a linear sweep of the moves to add them to the initial list of + // moves to perform, ignoring any move that is redundant (the source is + // the same as the destination, the destination is ignored and + // unallocated, or the move was already eliminated). + for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { + MoveOperands* move = parallel_move->MoveOperandsAt(i); + if (!move->IsRedundant()) { + moves_.Add(move); + } + } +} + +void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK(moves_.IsEmpty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); @@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { moves_.Reset(); } - -void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { - // Perform a linear sweep of the moves to add them to the initial list of - // moves to perform, ignoring any move that is redundant (the source is - // the same as the destination, the destination is ignored and - // unallocated, or the move was already eliminated). - for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { - MoveOperands* move = parallel_move->MoveOperandsAt(i); - if (!move->IsRedundant()) { - moves_.Add(move); - } - } -} - Location LowOf(Location location) { if (location.IsRegisterPair()) { return Location::RegisterLocation(location.low()); @@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati } } -MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { +MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We // mark a move as "pending" on entry to PerformMove in order to detect @@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { } } -bool ParallelMoveResolver::IsScratchLocation(Location loc) { +bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { for (size_t i = 0; i < moves_.Size(); ++i) { if (moves_.Get(i)->Blocks(loc)) { return false; @@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) { return false; } -int ParallelMoveResolver::AllocateScratchRegister(int blocked, - int register_count, - int if_scratch, - bool* spilled) { +int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked, + int register_count, + int if_scratch, + bool* spilled) { DCHECK_NE(blocked, if_scratch); int scratch = -1; for (int reg = 0; reg < register_count; ++reg) { @@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } -ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( - ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) +ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), reg_(kNoRegister), spilled_(false) { @@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } -ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { +ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); } } +void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + DCHECK(moves_.IsEmpty()); + DCHECK(scratches_.IsEmpty()); + + // Backend dependent initialization. + PrepareForEmitNativeCode(); + + // Build up a worklist of moves. + BuildInitialMoveList(parallel_move); + + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + // Skip constants to perform them last. They don't block other moves and + // skipping such moves with register destinations keeps those registers + // free for the whole algorithm. + if (!move.IsEliminated() && !move.GetSource().IsConstant()) { + PerformMove(i); + } + } + + // Perform the moves with constant sources and register destinations with UpdateMoveSource() + // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit + // from changing the constant sources to stack locations. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + Location destination = move->GetDestination(); + if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { + Location source = move->GetSource(); + EmitMove(i); + move->Eliminate(); + // This may introduce additional instruction dependency, but reduce number + // of moves and possible literal loads. For example, + // Original moves: + // 1234.5678 -> D0 + // 1234.5678 -> D1 + // Updated moves: + // 1234.5678 -> D0 + // D0 -> D1 + UpdateMoveSource(source, destination); + } + } + + // Perform the rest of the moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (!move->IsEliminated()) { + EmitMove(i); + move->Eliminate(); + } + } + + // All pending moves that we have added for resolve cycles should be performed. + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + + // Backend dependent cleanup. + FinishEmitNativeCode(); + + moves_.Reset(); + scratches_.Reset(); +} + +Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location loc = scratches_.Get(i); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + Location loc = moves_.Get(i)->GetDestination(); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + return Location::NoLocation(); +} + +void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { + if (kIsDebugBuild) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + DCHECK(!loc.Equals(scratches_.Get(i))); + } + } + scratches_.Add(loc); +} + +void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { + DCHECK(!IsBlockedByMoves(loc)); + for (size_t i = 0; i < scratches_.Size(); ++i) { + if (loc.Equals(scratches_.Get(i))) { + scratches_.DeleteAt(i); + break; + } + } +} + +void ParallelMoveResolverNoSwap::PerformMove(size_t index) { + // Each call to this function performs a move and deletes it from the move + // graph. We first recursively perform any move blocking this one. We mark + // a move as "pending" on entry to PerformMove in order to detect cycles + // in the move graph. We use scratch location to resolve cycles, also + // additional pending moves might be added. After move has been performed, + // we will update source operand in the move graph to reduce dependencies in + // the graph. + + MoveOperands* move = moves_.Get(index); + DCHECK(!move->IsPending()); + DCHECK(!move->IsEliminated()); + if (move->IsRedundant()) { + // Previous operations on the list of moves have caused this particular move + // to become a no-op, so we can safely eliminate it. Consider for example + // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will + // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is + // used as the scratch location, the move (1 -> 2) will occur while resolving + // the cycle. When that move is emitted, the code will update moves with a '1' + // as their source to use '2' instead (see `UpdateMoveSource()`. In our example + // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be + // eliminated here. + move->Eliminate(); + return; + } + + // Clear this move's destination to indicate a pending move. The actual + // destination is saved in a stack-allocated local. Recursion may allow + // multiple moves to be pending. + DCHECK(!move->GetSource().IsInvalid()); + Location destination = move->MarkPending(); + + // Perform a depth-first traversal of the move graph to resolve + // dependencies. Any unperformed, unpending move with a source the same + // as this one's destination blocks this one so recursively perform all + // such moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination) && !other_move.IsPending()) { + PerformMove(i); + } + } + + // We are about to resolve this move and don't need it marked as + // pending, so restore its destination. + move->ClearPending(destination); + + // No one else should write to the move destination when the it is pending. + DCHECK(!move->IsRedundant()); + + Location source = move->GetSource(); + // The move may be blocked on several pending moves, in case we have a cycle. + if (IsBlockedByMoves(destination)) { + // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following + // sequence: + // (C -> scratch) # Emit right now. + // (A -> B) (B -> C) # Unblocked. + // (scratch -> A) # Add to pending_moves_, blocked by (A -> B). + Location::Kind kind = source.GetKind(); + DCHECK_NE(kind, Location::kConstant); + Location scratch = AllocateScratchLocationFor(kind); + // We only care about the move size. + Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt; + // Perform (C -> scratch) + move->SetDestination(scratch); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, scratch); + // Add (scratch -> A). + AddPendingMove(scratch, destination, type); + } else { + // This move is not blocked. + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, destination); + } + + // Moves in the pending list should not block any other moves. But performing + // unblocked moves in the pending list can free scratch registers, so we do this + // as early as possible. + MoveOperands* pending_move; + while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) { + Location pending_source = pending_move->GetSource(); + Location pending_destination = pending_move->GetDestination(); + // We do not depend on the pending move index. So just delete the move instead + // of eliminating it to make the pending list cleaner. + DeletePendingMove(pending_move); + move->SetSource(pending_source); + move->SetDestination(pending_destination); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(pending_source, pending_destination); + // Free any unblocked locations in the scratch location list. + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location scratch = scratches_.Get(i); + // Only scratch overlapping with performed move source can be unblocked. + if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { + FreeScratchLocation(pending_source); + } + } + } +} + +void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { + // This function is used to reduce the dependencies in the graph after + // (from -> to) has been performed. Since we ensure there is no move with the same + // destination, (to -> X) can not be blocked while (from -> X) might still be + // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After + // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is + // a dependency between the two. If we update the source location from 1 to 2, we + // will get (0 -> 1) and (2 -> 3). There is no dependency between the two. + // + // This is not something we must do, but we can use fewer scratch locations with + // this trick. For example, we can avoid using additional scratch locations for + // moves (0 -> 1), (1 -> 2), (1 -> 0). + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (move->GetSource().Equals(from)) { + move->SetSource(to); + } + } +} + +void ParallelMoveResolverNoSwap::AddPendingMove(Location source, + Location destination, Primitive::Type type) { + pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); +} + +void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { + pending_moves_.Delete(move); +} + +MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + MoveOperands* move = pending_moves_.Get(i); + Location destination = move->GetDestination(); + // Only moves with destination overlapping with input loc can be unblocked. + if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { + return move; + } + } + return nullptr; +} + +bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + if (pending_moves_.Get(i)->Blocks(loc)) { + return true; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->Blocks(loc)) { + return true; + } + } + return false; +} + +// So far it is only used for debugging purposes to make sure all pending moves +// have been performed. +size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { + return pending_moves_.Size(); +} + } // namespace art |