summaryrefslogtreecommitdiffstats
path: root/compiler/optimizing/parallel_move_resolver.cc
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing/parallel_move_resolver.cc')
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc309
1 files changed, 284 insertions, 25 deletions
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index ad92ca59a1..54ea6f19d4 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -17,11 +17,23 @@
#include "parallel_move_resolver.h"
#include "nodes.h"
-#include "locations.h"
namespace art {
-void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+ // Perform a linear sweep of the moves to add them to the initial list of
+ // moves to perform, ignoring any move that is redundant (the source is
+ // the same as the destination, the destination is ignored and
+ // unallocated, or the move was already eliminated).
+ for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+ MoveOperands* move = parallel_move->MoveOperandsAt(i);
+ if (!move->IsRedundant()) {
+ moves_.Add(move);
+ }
+ }
+}
+
+void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
DCHECK(moves_.IsEmpty());
// Build up a worklist of moves.
BuildInitialMoveList(parallel_move);
@@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
moves_.Reset();
}
-
-void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
- // Perform a linear sweep of the moves to add them to the initial list of
- // moves to perform, ignoring any move that is redundant (the source is
- // the same as the destination, the destination is ignored and
- // unallocated, or the move was already eliminated).
- for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
- MoveOperands* move = parallel_move->MoveOperandsAt(i);
- if (!move->IsRedundant()) {
- moves_.Add(move);
- }
- }
-}
-
Location LowOf(Location location) {
if (location.IsRegisterPair()) {
return Location::RegisterLocation(location.low());
@@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati
}
}
-MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We
// mark a move as "pending" on entry to PerformMove in order to detect
@@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
}
}
-bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
for (size_t i = 0; i < moves_.Size(); ++i) {
if (moves_.Get(i)->Blocks(loc)) {
return false;
@@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) {
return false;
}
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
- int register_count,
- int if_scratch,
- bool* spilled) {
+int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
+ int register_count,
+ int if_scratch,
+ bool* spilled) {
DCHECK_NE(blocked, if_scratch);
int scratch = -1;
for (int reg = 0; reg < register_count; ++reg) {
@@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
- ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
reg_(kNoRegister),
spilled_(false) {
@@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
-ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
}
}
+void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+ DCHECK(moves_.IsEmpty());
+ DCHECK(scratches_.IsEmpty());
+
+ // Backend dependent initialization.
+ PrepareForEmitNativeCode();
+
+ // Build up a worklist of moves.
+ BuildInitialMoveList(parallel_move);
+
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& move = *moves_.Get(i);
+ // Skip constants to perform them last. They don't block other moves and
+ // skipping such moves with register destinations keeps those registers
+ // free for the whole algorithm.
+ if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+ PerformMove(i);
+ }
+ }
+
+ // Perform the moves with constant sources and register destinations with UpdateMoveSource()
+ // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
+ // from changing the constant sources to stack locations.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ Location destination = move->GetDestination();
+ if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
+ Location source = move->GetSource();
+ EmitMove(i);
+ move->Eliminate();
+ // This may introduce additional instruction dependency, but reduce number
+ // of moves and possible literal loads. For example,
+ // Original moves:
+ // 1234.5678 -> D0
+ // 1234.5678 -> D1
+ // Updated moves:
+ // 1234.5678 -> D0
+ // D0 -> D1
+ UpdateMoveSource(source, destination);
+ }
+ }
+
+ // Perform the rest of the moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (!move->IsEliminated()) {
+ EmitMove(i);
+ move->Eliminate();
+ }
+ }
+
+ // All pending moves that we have added for resolve cycles should be performed.
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+
+ // Backend dependent cleanup.
+ FinishEmitNativeCode();
+
+ moves_.Reset();
+ scratches_.Reset();
+}
+
+Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location loc = scratches_.Get(i);
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ Location loc = moves_.Get(i)->GetDestination();
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ return Location::NoLocation();
+}
+
+void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
+ if (kIsDebugBuild) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ DCHECK(!loc.Equals(scratches_.Get(i)));
+ }
+ }
+ scratches_.Add(loc);
+}
+
+void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
+ DCHECK(!IsBlockedByMoves(loc));
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ if (loc.Equals(scratches_.Get(i))) {
+ scratches_.DeleteAt(i);
+ break;
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
+ // Each call to this function performs a move and deletes it from the move
+ // graph. We first recursively perform any move blocking this one. We mark
+ // a move as "pending" on entry to PerformMove in order to detect cycles
+ // in the move graph. We use scratch location to resolve cycles, also
+ // additional pending moves might be added. After move has been performed,
+ // we will update source operand in the move graph to reduce dependencies in
+ // the graph.
+
+ MoveOperands* move = moves_.Get(index);
+ DCHECK(!move->IsPending());
+ DCHECK(!move->IsEliminated());
+ if (move->IsRedundant()) {
+ // Previous operations on the list of moves have caused this particular move
+ // to become a no-op, so we can safely eliminate it. Consider for example
+ // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
+ // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
+ // used as the scratch location, the move (1 -> 2) will occur while resolving
+ // the cycle. When that move is emitted, the code will update moves with a '1'
+ // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
+ // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
+ // eliminated here.
+ move->Eliminate();
+ return;
+ }
+
+ // Clear this move's destination to indicate a pending move. The actual
+ // destination is saved in a stack-allocated local. Recursion may allow
+ // multiple moves to be pending.
+ DCHECK(!move->GetSource().IsInvalid());
+ Location destination = move->MarkPending();
+
+ // Perform a depth-first traversal of the move graph to resolve
+ // dependencies. Any unperformed, unpending move with a source the same
+ // as this one's destination blocks this one so recursively perform all
+ // such moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& other_move = *moves_.Get(i);
+ if (other_move.Blocks(destination) && !other_move.IsPending()) {
+ PerformMove(i);
+ }
+ }
+
+ // We are about to resolve this move and don't need it marked as
+ // pending, so restore its destination.
+ move->ClearPending(destination);
+
+ // No one else should write to the move destination when the it is pending.
+ DCHECK(!move->IsRedundant());
+
+ Location source = move->GetSource();
+ // The move may be blocked on several pending moves, in case we have a cycle.
+ if (IsBlockedByMoves(destination)) {
+ // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
+ // sequence:
+ // (C -> scratch) # Emit right now.
+ // (A -> B) (B -> C) # Unblocked.
+ // (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
+ Location::Kind kind = source.GetKind();
+ DCHECK_NE(kind, Location::kConstant);
+ Location scratch = AllocateScratchLocationFor(kind);
+ // We only care about the move size.
+ Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
+ // Perform (C -> scratch)
+ move->SetDestination(scratch);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, scratch);
+ // Add (scratch -> A).
+ AddPendingMove(scratch, destination, type);
+ } else {
+ // This move is not blocked.
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, destination);
+ }
+
+ // Moves in the pending list should not block any other moves. But performing
+ // unblocked moves in the pending list can free scratch registers, so we do this
+ // as early as possible.
+ MoveOperands* pending_move;
+ while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
+ Location pending_source = pending_move->GetSource();
+ Location pending_destination = pending_move->GetDestination();
+ // We do not depend on the pending move index. So just delete the move instead
+ // of eliminating it to make the pending list cleaner.
+ DeletePendingMove(pending_move);
+ move->SetSource(pending_source);
+ move->SetDestination(pending_destination);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(pending_source, pending_destination);
+ // Free any unblocked locations in the scratch location list.
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location scratch = scratches_.Get(i);
+ // Only scratch overlapping with performed move source can be unblocked.
+ if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
+ FreeScratchLocation(pending_source);
+ }
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
+ // This function is used to reduce the dependencies in the graph after
+ // (from -> to) has been performed. Since we ensure there is no move with the same
+ // destination, (to -> X) can not be blocked while (from -> X) might still be
+ // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
+ // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
+ // a dependency between the two. If we update the source location from 1 to 2, we
+ // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
+ //
+ // This is not something we must do, but we can use fewer scratch locations with
+ // this trick. For example, we can avoid using additional scratch locations for
+ // moves (0 -> 1), (1 -> 2), (1 -> 0).
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (move->GetSource().Equals(from)) {
+ move->SetSource(to);
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
+ Location destination, Primitive::Type type) {
+ pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+}
+
+void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
+ pending_moves_.Delete(move);
+}
+
+MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ MoveOperands* move = pending_moves_.Get(i);
+ Location destination = move->GetDestination();
+ // Only moves with destination overlapping with input loc can be unblocked.
+ if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
+ return move;
+ }
+ }
+ return nullptr;
+}
+
+bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ if (pending_moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ if (moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// So far it is only used for debugging purposes to make sure all pending moves
+// have been performed.
+size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
+ return pending_moves_.Size();
+}
+
} // namespace art