Merge "Opt compiler: Implement parallel move resolver without using swap."

author: Nicolas Geoffray <ngeoffray@google.com> 2015-04-20 15:20:15 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2015-04-20 15:20:15 +0000
commit: 27eac12a66a73eb38b5ccb45b62350cf341299d0 (patch)
tree: 7cc7eea6dd543720da2921c11f68296f5df37b07 /compiler/optimizing
parent: e40d82ffe388458c2674ec051f1dd897362692eb (diff)
parent: ad4450e5c3ffaa9566216cc6fafbf5c11186c467 (diff)
download: art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.gz
art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.bz2
art-27eac12a66a73eb38b5ccb45b62350cf341299d0.zip
10 files changed, 756 insertions, 196 deletions
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 06f425ea21..600903621d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -96,10 +96,10 @@ class InvokeDexCallingConventionVisitor {
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
-class ParallelMoveResolverARM : public ParallelMoveResolver {
+class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
-      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
 
   void EmitMove(size_t index) OVERRIDE;
   void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cfc8f9c2b1..76e2ec6f42 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -425,22 +425,57 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   CodeGenerator::Finalize(allocator);
 }
 
-void ParallelMoveResolverARM64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
-  codegen_->MoveLocation(move->GetDestination(), move->GetSource());
-}
-
-void ParallelMoveResolverARM64::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
-  codegen_->SwapLocations(move->GetDestination(), move->GetSource());
+void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
+  // Note: There are 6 kinds of moves:
+  // 1. constant -> GPR/FPR (non-cycle)
+  // 2. constant -> stack (non-cycle)
+  // 3. GPR/FPR -> GPR/FPR
+  // 4. GPR/FPR -> stack
+  // 5. stack -> GPR/FPR
+  // 6. stack -> stack (non-cycle)
+  // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
+  // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
+  // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
+  // dependency.
+  vixl_temps_.Open(GetVIXLAssembler());
+}
+
+void ParallelMoveResolverARM64::FinishEmitNativeCode() {
+  vixl_temps_.Close();
+}
+
+Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
+  DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister ||
+         kind == Location::kStackSlot || kind == Location::kDoubleStackSlot);
+  kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister;
+  Location scratch = GetScratchLocation(kind);
+  if (!scratch.Equals(Location::NoLocation())) {
+    return scratch;
+  }
+  // Allocate from VIXL temp registers.
+  if (kind == Location::kRegister) {
+    scratch = LocationFrom(vixl_temps_.AcquireX());
+  } else {
+    DCHECK(kind == Location::kFpuRegister);
+    scratch = LocationFrom(vixl_temps_.AcquireD());
+  }
+  AddScratchLocation(scratch);
+  return scratch;
 }
 
-void ParallelMoveResolverARM64::RestoreScratch(int reg) {
-  __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
+  if (loc.IsRegister()) {
+    vixl_temps_.Release(XRegisterFrom(loc));
+  } else {
+    DCHECK(loc.IsFpuRegister());
+    vixl_temps_.Release(DRegisterFrom(loc));
+  }
+  RemoveScratchLocation(loc);
 }
 
-void ParallelMoveResolverARM64::SpillScratch(int reg) {
-  __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource());
 }
 
 void CodeGeneratorARM64::GenerateFrameEntry() {
@@ -729,10 +764,10 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
       if (destination.IsRegister()) {
         __ Mov(Register(dst), RegisterFrom(source, type));
       } else {
+        DCHECK(destination.IsFpuRegister());
         __ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
       }
     }
-
   } else {  // The destination is not a register. It must be a stack slot.
     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
     if (source.IsRegister() || source.IsFpuRegister()) {
@@ -775,67 +810,6 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
   }
 }
 
-void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
-  DCHECK(!loc1.IsConstant());
-  DCHECK(!loc2.IsConstant());
-
-  if (loc1.Equals(loc2)) {
-    return;
-  }
-
-  UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
-
-  bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
-  bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
-  bool is_fp_reg1 = loc1.IsFpuRegister();
-  bool is_fp_reg2 = loc2.IsFpuRegister();
-
-  if (loc2.IsRegister() && loc1.IsRegister()) {
-    Register r1 = XRegisterFrom(loc1);
-    Register r2 = XRegisterFrom(loc2);
-    Register tmp = temps.AcquireSameSizeAs(r1);
-    __ Mov(tmp, r2);
-    __ Mov(r2, r1);
-    __ Mov(r1, tmp);
-  } else if (is_fp_reg2 && is_fp_reg1) {
-    FPRegister r1 = DRegisterFrom(loc1);
-    FPRegister r2 = DRegisterFrom(loc2);
-    FPRegister tmp = temps.AcquireSameSizeAs(r1);
-    __ Fmov(tmp, r2);
-    __ Fmov(r2, r1);
-    __ Fmov(r1, tmp);
-  } else if (is_slot1 != is_slot2) {
-    MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2);
-    Location reg_loc = is_slot1 ? loc2 : loc1;
-    CPURegister reg, tmp;
-    if (reg_loc.IsFpuRegister()) {
-      reg = DRegisterFrom(reg_loc);
-      tmp = temps.AcquireD();
-    } else {
-      reg = XRegisterFrom(reg_loc);
-      tmp = temps.AcquireX();
-    }
-    __ Ldr(tmp, mem);
-    __ Str(reg, mem);
-    if (reg_loc.IsFpuRegister()) {
-      __ Fmov(FPRegister(reg), FPRegister(tmp));
-    } else {
-      __ Mov(Register(reg), Register(tmp));
-    }
-  } else if (is_slot1 && is_slot2) {
-    MemOperand mem1 = StackOperandFrom(loc1);
-    MemOperand mem2 = StackOperandFrom(loc2);
-    Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX();
-    Register tmp2 = temps.AcquireSameSizeAs(tmp1);
-    __ Ldr(tmp1, mem1);
-    __ Ldr(tmp2, mem2);
-    __ Str(tmp1, mem2);
-    __ Str(tmp2, mem1);
-  } else {
-    LOG(FATAL) << "Unimplemented";
-  }
-}
-
 void CodeGeneratorARM64::Load(Primitive::Type type,
                               CPURegister dst,
                               const MemOperand& src) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9fe60b8adc..5a358671cc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -198,15 +198,17 @@ class LocationsBuilderARM64 : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
 
-class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
  public:
   ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
-      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+      : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
 
+ protected:
+  void PrepareForEmitNativeCode() OVERRIDE;
+  void FinishEmitNativeCode() OVERRIDE;
+  Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE;
+  void FreeScratchLocation(Location loc) OVERRIDE;
   void EmitMove(size_t index) OVERRIDE;
-  void EmitSwap(size_t index) OVERRIDE;
-  void RestoreScratch(int reg) OVERRIDE;
-  void SpillScratch(int reg) OVERRIDE;
 
  private:
   Arm64Assembler* GetAssembler() const;
@@ -215,6 +217,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
   }
 
   CodeGeneratorARM64* const codegen_;
+  vixl::UseScratchRegisterScope vixl_temps_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
 };
@@ -322,7 +325,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
   // locations, and is used for optimisation and debugging.
   void MoveLocation(Location destination, Location source,
                     Primitive::Type type = Primitive::kPrimVoid);
-  void SwapLocations(Location loc_1, Location loc_2);
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
   void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
   void LoadCurrentMethod(vixl::Register current_method);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 368ae0fb0e..07476c6850 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -93,10 +93,10 @@ class InvokeDexCallingConventionVisitor {
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
-class ParallelMoveResolverX86 : public ParallelMoveResolver {
+class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
-      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
 
   void EmitMove(size_t index) OVERRIDE;
   void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index b4876ef161..6cdc82262c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -102,10 +102,10 @@ class SlowPathCodeX86_64 : public SlowPathCode {
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
 };
 
-class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
-      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
 
   void EmitMove(size_t index) OVERRIDE;
   void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index de876be9ab..c3a99150c4 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -285,17 +285,26 @@ class Location : public ValueObject {
   bool Contains(Location other) const {
     if (Equals(other)) {
       return true;
-    } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
-      return other.reg() == low() || other.reg() == high();
-    } else if (IsRegisterPair() && other.IsRegister()) {
-      return other.reg() == low() || other.reg() == high();
-    } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
-      return (GetStackIndex() == other.GetStackIndex())
-          || (GetStackIndex() + 4 == other.GetStackIndex());
+    } else if (IsPair() || IsDoubleStackSlot()) {
+      return ToLow().Equals(other) || ToHigh().Equals(other);
     }
     return false;
   }
 
+  bool OverlapsWith(Location other) const {
+    // Only check the overlapping case that can happen with our register allocation algorithm.
+    bool overlap = Contains(other) || other.Contains(*this);
+    if (kIsDebugBuild && !overlap) {
+      // Note: These are also overlapping cases. But we are not able to handle them in
+      // ParallelMoveResolverWithSwap. Make sure that we do not meet such case with our compiler.
+      if ((IsPair() && other.IsPair()) || (IsDoubleStackSlot() && other.IsDoubleStackSlot())) {
+        DCHECK(!Contains(other.ToLow()));
+        DCHECK(!Contains(other.ToHigh()));
+      }
+    }
+    return overlap;
+  }
+
   const char* DebugString() const {
     switch (GetKind()) {
       case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c3c35acb7a..d9d15c4b18 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2113,7 +2113,7 @@ class HIntConstant : public HConstant {
 
   friend class HGraph;
   ART_FRIEND_TEST(GraphTest, InsertInstructionBefore);
-  ART_FRIEND_TEST(ParallelMoveTest, ConstantLast);
+  ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast);
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
@@ -3526,7 +3526,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
 
   // True if this blocks a move from the given location.
   bool Blocks(Location loc) const {
-    return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
+    return !IsEliminated() && source_.OverlapsWith(loc);
   }
 
   // A move is redundant if it's been eliminated, if its source and
@@ -3595,8 +3595,8 @@ class HParallelMove : public HTemplateInstruction<0> {
         }
       }
       for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-        DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
-            << "Same destination for two moves in a parallel move.";
+        DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination()))
+            << "Overlapped destination for two moves in a parallel move.";
       }
     }
     moves_.Add(MoveOperands(source, destination, type, instruction));
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index ad92ca59a1..54ea6f19d4 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -17,11 +17,23 @@
 
 #include "parallel_move_resolver.h"
 #include "nodes.h"
-#include "locations.h"
 
 namespace art {
 
-void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+  // Perform a linear sweep of the moves to add them to the initial list of
+  // moves to perform, ignoring any move that is redundant (the source is
+  // the same as the destination, the destination is ignored and
+  // unallocated, or the move was already eliminated).
+  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+    MoveOperands* move = parallel_move->MoveOperandsAt(i);
+    if (!move->IsRedundant()) {
+      moves_.Add(move);
+    }
+  }
+}
+
+void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
   DCHECK(moves_.IsEmpty());
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
@@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
   moves_.Reset();
 }
 
-
-void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
-  // Perform a linear sweep of the moves to add them to the initial list of
-  // moves to perform, ignoring any move that is redundant (the source is
-  // the same as the destination, the destination is ignored and
-  // unallocated, or the move was already eliminated).
-  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
-    MoveOperands* move = parallel_move->MoveOperandsAt(i);
-    if (!move->IsRedundant()) {
-      moves_.Add(move);
-    }
-  }
-}
-
 Location LowOf(Location location) {
   if (location.IsRegisterPair()) {
     return Location::RegisterLocation(location.low());
@@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati
   }
 }
 
-MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
   // Each call to this function performs a move and deletes it from the move
   // graph.  We first recursively perform any move blocking this one.  We
   // mark a move as "pending" on entry to PerformMove in order to detect
@@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
   }
 }
 
-bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
   for (size_t i = 0; i < moves_.Size(); ++i) {
     if (moves_.Get(i)->Blocks(loc)) {
       return false;
@@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) {
   return false;
 }
 
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
-                                                  int register_count,
-                                                  int if_scratch,
-                                                  bool* spilled) {
+int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
+                                                          int register_count,
+                                                          int if_scratch,
+                                                          bool* spilled) {
   DCHECK_NE(blocked, if_scratch);
   int scratch = -1;
   for (int reg = 0; reg < register_count; ++reg) {
@@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
 }
 
 
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
-    ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
+    ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
       reg_(kNoRegister),
       spilled_(false) {
@@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
 }
 
 
-ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
   if (spilled_) {
     resolver_->RestoreScratch(reg_);
   }
 }
 
+void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
+  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+  DCHECK(moves_.IsEmpty());
+  DCHECK(scratches_.IsEmpty());
+
+  // Backend dependent initialization.
+  PrepareForEmitNativeCode();
+
+  // Build up a worklist of moves.
+  BuildInitialMoveList(parallel_move);
+
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& move = *moves_.Get(i);
+    // Skip constants to perform them last. They don't block other moves and
+    // skipping such moves with register destinations keeps those registers
+    // free for the whole algorithm.
+    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+      PerformMove(i);
+    }
+  }
+
+  // Perform the moves with constant sources and register destinations with UpdateMoveSource()
+  // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
+  // from changing the constant sources to stack locations.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    Location destination = move->GetDestination();
+    if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
+      Location source = move->GetSource();
+      EmitMove(i);
+      move->Eliminate();
+      // This may introduce additional instruction dependency, but reduce number
+      // of moves and possible literal loads. For example,
+      // Original moves:
+      //   1234.5678 -> D0
+      //   1234.5678 -> D1
+      // Updated moves:
+      //   1234.5678 -> D0
+      //   D0 -> D1
+      UpdateMoveSource(source, destination);
+    }
+  }
+
+  // Perform the rest of the moves.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    if (!move->IsEliminated()) {
+      EmitMove(i);
+      move->Eliminate();
+    }
+  }
+
+  // All pending moves that we have added for resolve cycles should be performed.
+  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+
+  // Backend dependent cleanup.
+  FinishEmitNativeCode();
+
+  moves_.Reset();
+  scratches_.Reset();
+}
+
+Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
+  for (size_t i = 0; i < scratches_.Size(); ++i) {
+    Location loc = scratches_.Get(i);
+    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+      return loc;
+    }
+  }
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    Location loc = moves_.Get(i)->GetDestination();
+    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+      return loc;
+    }
+  }
+  return Location::NoLocation();
+}
+
+void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
+  if (kIsDebugBuild) {
+    for (size_t i = 0; i < scratches_.Size(); ++i) {
+      DCHECK(!loc.Equals(scratches_.Get(i)));
+    }
+  }
+  scratches_.Add(loc);
+}
+
+void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
+  DCHECK(!IsBlockedByMoves(loc));
+  for (size_t i = 0; i < scratches_.Size(); ++i) {
+    if (loc.Equals(scratches_.Get(i))) {
+      scratches_.DeleteAt(i);
+      break;
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
+  // Each call to this function performs a move and deletes it from the move
+  // graph. We first recursively perform any move blocking this one. We mark
+  // a move as "pending" on entry to PerformMove in order to detect cycles
+  // in the move graph. We use scratch location to resolve cycles, also
+  // additional pending moves might be added. After move has been performed,
+  // we will update source operand in the move graph to reduce dependencies in
+  // the graph.
+
+  MoveOperands* move = moves_.Get(index);
+  DCHECK(!move->IsPending());
+  DCHECK(!move->IsEliminated());
+  if (move->IsRedundant()) {
+    // Previous operations on the list of moves have caused this particular move
+    // to become a no-op, so we can safely eliminate it. Consider for example
+    // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
+    // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
+    // used as the scratch location, the move (1 -> 2) will occur while resolving
+    // the cycle. When that move is emitted, the code will update moves with a '1'
+    // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
+    // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
+    // eliminated here.
+    move->Eliminate();
+    return;
+  }
+
+  // Clear this move's destination to indicate a pending move. The actual
+  // destination is saved in a stack-allocated local. Recursion may allow
+  // multiple moves to be pending.
+  DCHECK(!move->GetSource().IsInvalid());
+  Location destination = move->MarkPending();
+
+  // Perform a depth-first traversal of the move graph to resolve
+  // dependencies. Any unperformed, unpending move with a source the same
+  // as this one's destination blocks this one so recursively perform all
+  // such moves.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& other_move = *moves_.Get(i);
+    if (other_move.Blocks(destination) && !other_move.IsPending()) {
+      PerformMove(i);
+    }
+  }
+
+  // We are about to resolve this move and don't need it marked as
+  // pending, so restore its destination.
+  move->ClearPending(destination);
+
+  // No one else should write to the move destination when the it is pending.
+  DCHECK(!move->IsRedundant());
+
+  Location source = move->GetSource();
+  // The move may be blocked on several pending moves, in case we have a cycle.
+  if (IsBlockedByMoves(destination)) {
+    // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
+    // sequence:
+    // (C -> scratch)     # Emit right now.
+    // (A -> B) (B -> C)  # Unblocked.
+    // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
+    Location::Kind kind = source.GetKind();
+    DCHECK_NE(kind, Location::kConstant);
+    Location scratch = AllocateScratchLocationFor(kind);
+    // We only care about the move size.
+    Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
+    // Perform (C -> scratch)
+    move->SetDestination(scratch);
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(source, scratch);
+    // Add (scratch -> A).
+    AddPendingMove(scratch, destination, type);
+  } else {
+    // This move is not blocked.
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(source, destination);
+  }
+
+  // Moves in the pending list should not block any other moves. But performing
+  // unblocked moves in the pending list can free scratch registers, so we do this
+  // as early as possible.
+  MoveOperands* pending_move;
+  while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
+    Location pending_source = pending_move->GetSource();
+    Location pending_destination = pending_move->GetDestination();
+    // We do not depend on the pending move index. So just delete the move instead
+    // of eliminating it to make the pending list cleaner.
+    DeletePendingMove(pending_move);
+    move->SetSource(pending_source);
+    move->SetDestination(pending_destination);
+    EmitMove(index);
+    move->Eliminate();
+    UpdateMoveSource(pending_source, pending_destination);
+    // Free any unblocked locations in the scratch location list.
+    for (size_t i = 0; i < scratches_.Size(); ++i) {
+      Location scratch = scratches_.Get(i);
+      // Only scratch overlapping with performed move source can be unblocked.
+      if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
+        FreeScratchLocation(pending_source);
+      }
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
+  // This function is used to reduce the dependencies in the graph after
+  // (from -> to) has been performed. Since we ensure there is no move with the same
+  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
+  // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
+  // a dependency between the two. If we update the source location from 1 to 2, we
+  // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
+  //
+  // This is not something we must do, but we can use fewer scratch locations with
+  // this trick. For example, we can avoid using additional scratch locations for
+  // moves (0 -> 1), (1 -> 2), (1 -> 0).
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    MoveOperands* move = moves_.Get(i);
+    if (move->GetSource().Equals(from)) {
+      move->SetSource(to);
+    }
+  }
+}
+
+void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
+    Location destination, Primitive::Type type) {
+  pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+}
+
+void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
+  pending_moves_.Delete(move);
+}
+
+MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
+  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+    MoveOperands* move = pending_moves_.Get(i);
+    Location destination = move->GetDestination();
+    // Only moves with destination overlapping with input loc can be unblocked.
+    if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
+      return move;
+    }
+  }
+  return nullptr;
+}
+
+bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
+  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+    if (pending_moves_.Get(i)->Blocks(loc)) {
+      return true;
+    }
+  }
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    if (moves_.Get(i)->Blocks(loc)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// So far it is only used for debugging purposes to make sure all pending moves
+// have been performed.
+size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
+  return pending_moves_.Size();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 95f8ad5b74..e89417df7d 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -19,30 +19,47 @@
 
 #include "base/value_object.h"
 #include "utils/growable_array.h"
+#include "locations.h"
 
 namespace art {
 
 class HParallelMove;
-class Location;
 class MoveOperands;
 
-/**
- * Helper class to resolve a set of parallel moves. Architecture dependent code
- * generator must have their own subclass that implements the `EmitMove` and `EmitSwap`
- * operations.
- */
+// Helper classes to resolve a set of parallel moves. Architecture dependent code generator must
+// have their own subclass that implements corresponding virtual functions.
 class ParallelMoveResolver : public ValueObject {
  public:
   explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
   virtual ~ParallelMoveResolver() {}
 
   // Resolve a set of parallel moves, emitting assembler instructions.
-  void EmitNativeCode(HParallelMove* parallel_move);
+  virtual void EmitNativeCode(HParallelMove* parallel_move) = 0;
+
+ protected:
+  // Build the initial list of moves.
+  void BuildInitialMoveList(HParallelMove* parallel_move);
+
+  GrowableArray<MoveOperands*> moves_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+};
+
+// This helper class uses swap to resolve dependencies and may emit swap.
+class ParallelMoveResolverWithSwap : public ParallelMoveResolver {
+ public:
+  explicit ParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+      : ParallelMoveResolver(allocator) {}
+  virtual ~ParallelMoveResolverWithSwap() {}
+
+  // Resolve a set of parallel moves, emitting assembler instructions.
+  void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
 
  protected:
   class ScratchRegisterScope : public ValueObject {
    public:
-    ScratchRegisterScope(ParallelMoveResolver* resolver,
+    ScratchRegisterScope(ParallelMoveResolverWithSwap* resolver,
                          int blocked,
                          int if_scratch,
                          int number_of_registers);
@@ -52,11 +69,12 @@ class ParallelMoveResolver : public ValueObject {
     bool IsSpilled() const { return spilled_; }
 
    private:
-    ParallelMoveResolver* resolver_;
+    ParallelMoveResolverWithSwap* resolver_;
     int reg_;
     bool spilled_;
   };
 
+  // Return true if the location can be scratched.
   bool IsScratchLocation(Location loc);
 
   // Allocate a scratch register for performing a move. The method will try to use
@@ -72,15 +90,9 @@ class ParallelMoveResolver : public ValueObject {
   virtual void SpillScratch(int reg) = 0;
   virtual void RestoreScratch(int reg) = 0;
 
-  // List of moves not yet resolved.
-  GrowableArray<MoveOperands*> moves_;
-
   static constexpr int kNoRegister = -1;
 
  private:
-  // Build the initial list of moves.
-  void BuildInitialMoveList(HParallelMove* parallel_move);
-
   // Perform the move at the moves_ index in question (possibly requiring
   // other moves to satisfy dependencies).
   //
@@ -99,7 +111,83 @@ class ParallelMoveResolver : public ValueObject {
   //    the right value.
   MoveOperands* PerformMove(size_t index);
 
-  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverWithSwap);
+};
+
+// This helper class uses additional scratch registers to resolve dependencies. It supports all kind
+// of dependency cycles and does not care about the register layout.
+class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
+ public:
+  explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+      : ParallelMoveResolver(allocator), scratches_(allocator, 32),
+        pending_moves_(allocator, 8), allocator_(allocator) {}
+  virtual ~ParallelMoveResolverNoSwap() {}
+
+  // Resolve a set of parallel moves, emitting assembler instructions.
+  void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
+
+ protected:
+  // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent
+  // initialization here.
+  virtual void PrepareForEmitNativeCode() = 0;
+
+  // Called at the end of EmitNativeCode(). A subclass may put some architecture dependent cleanup
+  // here. All scratch locations will be removed after this call.
+  virtual void FinishEmitNativeCode() = 0;
+
+  // Allocate a scratch location to perform a move from input kind of location. A subclass should
+  // implement this to get the best fit location. If there is no suitable physical register, it can
+  // also return a stack slot.
+  virtual Location AllocateScratchLocationFor(Location::Kind kind) = 0;
+
+  // Called after a move which takes a scratch location as source. A subclass can defer the cleanup
+  // to FinishEmitNativeCode().
+  virtual void FreeScratchLocation(Location loc) = 0;
+
+  // Emit a move.
+  virtual void EmitMove(size_t index) = 0;
+
+  // Return a scratch location from the moves which exactly matches the kind.
+  // Return Location::NoLocation() if no matching scratch location can be found.
+  Location GetScratchLocation(Location::Kind kind);
+
+  // Add a location to the scratch list which can be returned from GetScratchLocation() to resolve
+  // dependency cycles.
+  void AddScratchLocation(Location loc);
+
+  // Remove a location from the scratch list.
+  void RemoveScratchLocation(Location loc);
+
+  // List of scratch locations.
+  GrowableArray<Location> scratches_;
+
+ private:
+  // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy
+  // dependencies).
+  void PerformMove(size_t index);
+
+  void UpdateMoveSource(Location from, Location to);
+
+  void AddPendingMove(Location source, Location destination, Primitive::Type type);
+
+  void DeletePendingMove(MoveOperands* move);
+
+  // Find a move that may be unblocked after (loc -> XXX) is performed.
+  MoveOperands* GetUnblockedPendingMove(Location loc);
+
+  // Return true if the location is blocked by outstanding moves.
+  bool IsBlockedByMoves(Location loc);
+
+  // Return the number of pending moves.
+  size_t GetNumberOfPendingMoves();
+
+  // Additional pending moves which might be added to resolve dependency cycle.
+  GrowableArray<MoveOperands*> pending_moves_;
+
+  // Used to allocate pending MoveOperands.
+  ArenaAllocator* const allocator_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverNoSwap);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 95cca5172b..f8f70105cf 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -19,27 +19,41 @@
 #include "parallel_move_resolver.h"
 
 #include "gtest/gtest.h"
+#include "gtest/gtest-typed-test.h"
 
 namespace art {
 
-class TestParallelMoveResolver : public ParallelMoveResolver {
- public:
-  explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {}
-
-  void Dump(Location location) {
-    if (location.IsConstant()) {
-      message_ << "C";
-    } else if (location.IsPair()) {
-      message_ << location.low() << "," << location.high();
-    } else if (location.IsRegister()) {
-      message_ << location.reg();
-    } else if (location.IsStackSlot()) {
-      message_ << location.GetStackIndex() << "(sp)";
-    } else {
-      message_ << "2x" << location.GetStackIndex() << "(sp)";
-      DCHECK(location.IsDoubleStackSlot()) << location;
-    }
+constexpr int kScratchRegisterStartIndexForTest = 100;
+
+static void DumpRegisterForTest(std::ostream& os, int reg) {
+  if (reg >= kScratchRegisterStartIndexForTest) {
+    os << "T" << reg - kScratchRegisterStartIndexForTest;
+  } else {
+    os << reg;
   }
+}
+
+static void DumpLocationForTest(std::ostream& os, Location location) {
+  if (location.IsConstant()) {
+    os << "C";
+  } else if (location.IsPair()) {
+    DumpRegisterForTest(os, location.low());
+    os << ",";
+    DumpRegisterForTest(os, location.high());
+  } else if (location.IsRegister()) {
+    DumpRegisterForTest(os, location.reg());
+  } else if (location.IsStackSlot()) {
+    os << location.GetStackIndex() << "(sp)";
+  } else {
+    DCHECK(location.IsDoubleStackSlot())<< location;
+    os << "2x" << location.GetStackIndex() << "(sp)";
+  }
+}
+
+class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
+ public:
+  explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+      : ParallelMoveResolverWithSwap(allocator) {}
 
   void EmitMove(size_t index) OVERRIDE {
     MoveOperands* move = moves_.Get(index);
@@ -47,9 +61,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
       message_ << " ";
     }
     message_ << "(";
-    Dump(move->GetSource());
+    DumpLocationForTest(message_, move->GetSource());
     message_ << " -> ";
-    Dump(move->GetDestination());
+    DumpLocationForTest(message_, move->GetDestination());
     message_ << ")";
   }
 
@@ -59,9 +73,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
       message_ << " ";
     }
     message_ << "(";
-    Dump(move->GetSource());
+    DumpLocationForTest(message_, move->GetSource());
     message_ << " <-> ";
-    Dump(move->GetDestination());
+    DumpLocationForTest(message_, move->GetDestination());
     message_ << ")";
   }
 
@@ -76,7 +90,64 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
   std::ostringstream message_;
 
 
-  DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver);
+  DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverWithSwap);
+};
+
+class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
+ public:
+  explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+      : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {}
+
+  void PrepareForEmitNativeCode() OVERRIDE {
+    scratch_index_ = kScratchRegisterStartIndexForTest;
+  }
+
+  void FinishEmitNativeCode() OVERRIDE {}
+
+  Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE {
+    if (kind == Location::kStackSlot || kind == Location::kFpuRegister ||
+        kind == Location::kRegister) {
+      kind = Location::kRegister;
+    } else {
+      // Allocate register pair for double stack slot which simulates 32-bit backend's behavior.
+      kind = Location::kRegisterPair;
+    }
+    Location scratch = GetScratchLocation(kind);
+    if (scratch.Equals(Location::NoLocation())) {
+      AddScratchLocation(Location::RegisterLocation(scratch_index_));
+      AddScratchLocation(Location::RegisterLocation(scratch_index_ + 1));
+      AddScratchLocation(Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1));
+      scratch = (kind == Location::kRegister) ? Location::RegisterLocation(scratch_index_)
+          : Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1);
+      scratch_index_ += 2;
+    }
+    return scratch;
+  }
+
+  void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
+
+  void EmitMove(size_t index) OVERRIDE {
+    MoveOperands* move = moves_.Get(index);
+    if (!message_.str().empty()) {
+      message_ << " ";
+    }
+    message_ << "(";
+    DumpLocationForTest(message_, move->GetSource());
+    message_ << " -> ";
+    DumpLocationForTest(message_, move->GetDestination());
+    message_ << ")";
+  }
+
+  std::string GetMessage() const {
+    return  message_.str();
+  }
+
+ private:
+  std::ostringstream message_;
+
+  int scratch_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverNoSwap);
 };
 
 static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
@@ -93,55 +164,102 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
   return moves;
 }
 
-TEST(ParallelMoveTest, Dependency) {
+template <typename T>
+class ParallelMoveTest : public ::testing::Test {
+ public:
+  static const bool has_swap;
+};
+
+template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true;
+template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false;
+
+typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>
+    ParallelMoveResolverTestTypes;
+
+TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
+
+
+TYPED_TEST(ParallelMoveTest, Dependency) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     static constexpr size_t moves[][2] = {{0, 1}, {1, 2}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+    }
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2 -> 3) (1 -> 2) (0 -> 1) (2 -> 4)", resolver.GetMessage().c_str());
+    }
   }
 }
 
-TEST(ParallelMoveTest, Swap) {
+TYPED_TEST(ParallelMoveTest, Cycle) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     static constexpr size_t moves[][2] = {{0, 1}, {1, 0}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0)", resolver.GetMessage().c_str());
+    }
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(1 -> 2) (0 -> 1) (2 -> 0)", resolver.GetMessage().c_str());
+    }
+  }
+
+  {
+    TypeParam resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {0, 2}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(0 -> 2) (1 -> 0) (2 -> 1)", resolver.GetMessage().c_str());
+    }
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
     resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
-    ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(4 -> T0) (3 -> 4) (2 -> 3) (1 -> 2) (0 -> 1) (T0 -> 0)",
+          resolver.GetMessage().c_str());
+    }
   }
 }
 
-TEST(ParallelMoveTest, ConstantLast) {
+TYPED_TEST(ParallelMoveTest, ConstantLast) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  TestParallelMoveResolver resolver(&allocator);
+  TypeParam resolver(&allocator);
   HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
   moves->AddMove(
       Location::ConstantLocation(new (&allocator) HIntConstant(0)),
@@ -157,12 +275,12 @@ TEST(ParallelMoveTest, ConstantLast) {
   ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
 }
 
-TEST(ParallelMoveTest, Pairs) {
+TYPED_TEST(ParallelMoveTest, Pairs) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterLocation(2),
@@ -179,7 +297,7 @@ TEST(ParallelMoveTest, Pairs) {
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
@@ -196,7 +314,7 @@ TEST(ParallelMoveTest, Pairs) {
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
@@ -209,10 +327,14 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2 -> T0) (0,1 -> 2,3) (T0 -> 0)", resolver.GetMessage().c_str());
+    }
   }
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterLocation(2),
@@ -230,10 +352,15 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+          resolver.GetMessage().c_str());
+    }
   }
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterLocation(2),
@@ -251,10 +378,15 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+          resolver.GetMessage().c_str());
+    }
   }
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
@@ -272,10 +404,14 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(7 -> T0) (2 -> 7) (0,1 -> 2,3) (T0 -> 1)", resolver.GetMessage().c_str());
+    }
   }
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
@@ -288,10 +424,14 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2,3 -> T0,T1) (0,1 -> 2,3) (T0,T1 -> 0,1)", resolver.GetMessage().c_str());
+    }
   }
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(2, 3),
@@ -304,12 +444,85 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(0,1 -> T0,T1) (2,3 -> 0,1) (T0,T1 -> 2,3)", resolver.GetMessage().c_str());
+    }
+  }
+}
+
+TYPED_TEST(ParallelMoveTest, MultiCycles) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TypeParam resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {2, 3}, {3, 2}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(1 <-> 0) (3 <-> 2)",  resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0) (3 -> T0) (2 -> 3) (T0 -> 2)",
+          resolver.GetMessage().c_str());
+    }
+  }
+  {
+    TypeParam resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(2),
+        Location::RegisterLocation(0),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(3),
+        Location::RegisterLocation(1),
+        Primitive::kPrimInt,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2 -> T0) (3 -> T1) (0,1 -> 2,3) (T0 -> 0) (T1 -> 1)",
+          resolver.GetMessage().c_str());
+    }
+  }
+  {
+    TypeParam resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(2),
+        Location::RegisterLocation(0),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(3),
+        Location::RegisterLocation(1),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(3 -> T0) (0,1 -> T2,T3) (T0 -> 1) (2 -> 0) (T2,T3 -> 2,3)",
+          resolver.GetMessage().c_str());
+    }
   }
 
   {
     // Test involving registers used in single context and pair context.
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterLocation(10),
@@ -327,17 +540,22 @@ TEST(ParallelMoveTest, Pairs) {
         Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2x32(sp) -> T0,T1) (4,5 -> 2x32(sp)) (10 -> 5) (T0,T1 -> 10,11)",
+          resolver.GetMessage().c_str());
+    }
   }
 }
 
 // Test that we do 64bits moves before 32bits moves.
-TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterLocation(0),
@@ -355,11 +573,16 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
         Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(48(sp) -> T0) (1 -> 48(sp)) (0 -> 1) (T0 -> 0)",
+          resolver.GetMessage().c_str());
+    }
   }
 
   {
-    TestParallelMoveResolver resolver(&allocator);
+    TypeParam resolver(&allocator);
     HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
@@ -377,7 +600,12 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
         Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2x32(sp) -> T0,T1) (2,3 -> 2x32(sp)) (0,1 -> 2,3) (T0,T1 -> 0,1)",
+          resolver.GetMessage().c_str());
+    }
   }
 }
author	Nicolas Geoffray <ngeoffray@google.com>	2015-04-20 15:20:15 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2015-04-20 15:20:15 +0000
commit	27eac12a66a73eb38b5ccb45b62350cf341299d0 (patch)
tree	7cc7eea6dd543720da2921c11f68296f5df37b07 /compiler/optimizing
parent	e40d82ffe388458c2674ec051f1dd897362692eb (diff)
parent	ad4450e5c3ffaa9566216cc6fafbf5c11186c467 (diff)
download	art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.gz art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.bz2 art-27eac12a66a73eb38b5ccb45b62350cf341299d0.zip