1 files changed, 284 insertions, 25 deletions
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index ad92ca59a1..54ea6f19d4 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -17,11 +17,23 @@
 
 #include "parallel_move_resolver.h"
 #include "nodes.h"
-#include "locations.h"
 
 namespace art {
 
-void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+  // Perform a linear sweep of the moves to add them to the initial list of
+  // moves to perform, ignoring any move that is redundant (the source is
+  // the same as the destination, the destination is ignored and
+  // unallocated, or the move was already eliminated).
+  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+    MoveOperands* move = parallel_move->MoveOperandsAt(i);
+    if (!move->IsRedundant()) {
+      moves_.Add(move);
+    }
+  }
+}
+
+void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
   DCHECK(moves_.IsEmpty());
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
@@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
   moves_.Reset();
 }
 
-
-void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
-  // Perform a linear sweep of the moves to add them to the initial list of
-  // moves to perform, ignoring any move that is redundant (the source is
-  // the same as the destination, the destination is ignored and
-  // unallocated, or the move was already eliminated).
-  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
-    MoveOperands* move = parallel_move->MoveOperandsAt(i);
-    if (!move->IsRedundant()) {
-      moves_.Add(move);
-    }
-  }
-}
-
 Location LowOf(Location location) {
   if (location.IsRegisterPair()) {
     return Location::RegisterLocation(location.low());
@@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati
   }
 }
 
-MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
   // Each call to this function performs a move and deletes it from the move
   // graph.  We first recursively perform any move blocking this one.  We
   // mark a move as "pending" on entry to PerformMove in order to detect
@@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
   }
 }
 
-bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
   for (size_t i = 0; i < moves_.Size(); ++i) {
     if (moves_.Get(i)->Blocks(loc)) {
       return false;
@@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) {
   return false;
 }
 
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
-                                                  int register_count,
-                                                  int if_scratch,
-                                                  bool* spilled) {
+int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
+                                                          int register_count,
+                                                          int if_scratch,
+                                                          bool* spilled) {
   DCHECK_NE(blocked, if_scratch);
   int scratch = -1;
   for (int reg = 0; reg < register_count; ++reg) {
@@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
 }
 
 
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
-    ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
+    ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
       reg_(kNoRegister),
       spilled_(false) {
@@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
 }
 
 
-ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
   if (spilled_) {
     resolver_->RestoreScratch(reg_);
   }
 }
 
+void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
+  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+  DCHECK(moves_.IsEmpty());
+  DCHECK(scratches_.IsEmpty());
+
+  // Backend dependent initialization.
+  PrepareForEmitNativeCode();
+
+  // Build up a worklist of moves.
+  BuildInitialMoveList(parallel_move);
+
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& move = *moves_.Get(i);
+    // Skip constants to perform them last. They don't block other moves and
+    // skipping such moves with register destinations keeps those registers
+    // free for the whole algorithm.
+    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+      PerformMove(i);
+    }
+  }
+
+  // Perform the moves with constant sources and register destinations with UpdateMoveSource()
+  // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
+  // from changing the constant sources to stack locations.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    Location destination = move->GetDestination();
+    if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
+      Location source = move->GetSource();
+      EmitMove(i);
+      move->Eliminate();
+      // This may introduce additional instruction dependency, but reduce number
+      // of moves and possible literal loads. For example,
+      // Original moves:
+      //   1234.5678 -> D0
+      //   1234.5678 -> D1
+      // Updated moves:
+      //   1234.5678 -> D0
+      //   D0 -> D1
+      UpdateMoveSource(source, destination);
+    }
+  }
+
+  // Perform the rest of the moves.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    if (!move->IsEliminated()) {
+      EmitMove(i);
+      move->Eliminate();
+    }
+  }
+
+  // All pending moves that we have added for resolve cycles should be performed.
+  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+
+  // Backend dependent cleanup.
+  FinishEmitNativeCode();
+
+  moves_.Reset();
+  scratches_.Reset();
+}
+
+Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
+  for (size_t i = 0; i < scratches_.Size(); ++i) {
+    Location loc = scratches_.Get(i);
+    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+      return loc;
+    }
+  }
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    Location loc = moves_.Get(i)->GetDestination();
+    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+      return loc;
+    }
+  }
+  return Location::NoLocation();
+}
+
+void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
+  if (kIsDebugBuild) {
+    for (size_t i = 0; i < scratches_.Size(); ++i) {
+      DCHECK(!loc.Equals(scratches_.Get(i)));
+    }
+  }
+  scratches_.Add(loc);
+}
+
+void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
+  DCHECK(!IsBlockedByMoves(loc));
+  for (size_t i = 0; i < scratches_.Size(); ++i) {
+    if (loc.Equals(scratches_.Get(i))) {
+      scratches_.DeleteAt(i);
+      break;
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
+  // Each call to this function performs a move and deletes it from the move
+  // graph. We first recursively perform any move blocking this one. We mark
+  // a move as "pending" on entry to PerformMove in order to detect cycles
+  // in the move graph. We use scratch location to resolve cycles, also
+  // additional pending moves might be added. After move has been performed,
+  // we will update source operand in the move graph to reduce dependencies in
+  // the graph.
+
+  MoveOperands* move = moves_.Get(index);
+  DCHECK(!move->IsPending());
+  DCHECK(!move->IsEliminated());
+  if (move->IsRedundant()) {
+    // Previous operations on the list of moves have caused this particular move
+    // to become a no-op, so we can safely eliminate it. Consider for example
+    // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
+    // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
+    // used as the scratch location, the move (1 -> 2) will occur while resolving
+    // the cycle. When that move is emitted, the code will update moves with a '1'
+    // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
+    // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
+    // eliminated here.
+    move->Eliminate();
+    return;
+  }
+
+  // Clear this move's destination to indicate a pending move. The actual
+  // destination is saved in a stack-allocated local. Recursion may allow
+  // multiple moves to be pending.
+  DCHECK(!move->GetSource().IsInvalid());
+  Location destination = move->MarkPending();
+
+  // Perform a depth-first traversal of the move graph to resolve
+  // dependencies. Any unperformed, unpending move with a source the same
+  // as this one's destination blocks this one so recursively perform all
+  // such moves.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& other_move = *moves_.Get(i);
+    if (other_move.Blocks(destination) && !other_move.IsPending()) {
+      PerformMove(i);
+    }
+  }
+
+  // We are about to resolve this move and don't need it marked as
+  // pending, so restore its destination.
+  move->ClearPending(destination);
+
+  // No one else should write to the move destination when the it is pending.
+  DCHECK(!move->IsRedundant());
+
+  Location source = move->GetSource();
+  // The move may be blocked on several pending moves, in case we have a cycle.
+  if (IsBlockedByMoves(destination)) {
+    // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
+    // sequence:
+    // (C -> scratch)     # Emit right now.
+    // (A -> B) (B -> C)  # Unblocked.
+    // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
+    Location::Kind kind = source.GetKind();
+    DCHECK_NE(kind, Location::kConstant);
+    Location scratch = AllocateScratchLocationFor(kind);
+    // We only care about the move size.
+    Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
+    // Perform (C -> scratch)
+    move->SetDestination(scratch);
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(source, scratch);
+    // Add (scratch -> A).
+    AddPendingMove(scratch, destination, type);
+  } else {
+    // This move is not blocked.
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(source, destination);
+  }
+
+  // Moves in the pending list should not block any other moves. But performing
+  // unblocked moves in the pending list can free scratch registers, so we do this
+  // as early as possible.
+  MoveOperands* pending_move;
+  while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
+    Location pending_source = pending_move->GetSource();
+    Location pending_destination = pending_move->GetDestination();
+    // We do not depend on the pending move index. So just delete the move instead
+    // of eliminating it to make the pending list cleaner.
+    DeletePendingMove(pending_move);
+    move->SetSource(pending_source);
+    move->SetDestination(pending_destination);
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(pending_source, pending_destination);
+    // Free any unblocked locations in the scratch location list.
+    for (size_t i = 0; i < scratches_.Size(); ++i) {
+      Location scratch = scratches_.Get(i);
+      // Only scratch overlapping with performed move source can be unblocked.
+      if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
+        FreeScratchLocation(pending_source);
+      }
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
+  // This function is used to reduce the dependencies in the graph after
+  // (from -> to) has been performed. Since we ensure there is no move with the same
+  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
+  // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
+  // a dependency between the two. If we update the source location from 1 to 2, we
+  // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
+  //
+  // This is not something we must do, but we can use fewer scratch locations with
+  // this trick. For example, we can avoid using additional scratch locations for
+  // moves (0 -> 1), (1 -> 2), (1 -> 0).
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    if (move->GetSource().Equals(from)) {
+      move->SetSource(to);
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
+    Location destination, Primitive::Type type) {
+  pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+}
+
+void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
+  pending_moves_.Delete(move);
+}
+
+MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
+  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+    MoveOperands* move = pending_moves_.Get(i);
+    Location destination = move->GetDestination();
+    // Only moves with destination overlapping with input loc can be unblocked.
+    if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
+      return move;
+    }
+  }
+  return nullptr;
+}
+
+bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
+  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+    if (pending_moves_.Get(i)->Blocks(loc)) {
+      return true;
+    }
+  }
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    if (moves_.Get(i)->Blocks(loc)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// So far it is only used for debugging purposes to make sure all pending moves
+// have been performed.
+size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
+  return pending_moves_.Size();
+}
+
 }  // namespace art