summaryrefslogtreecommitdiffstats
path: root/compiler/optimizing
diff options
context:
space:
mode:
authorNicolas Geoffray <ngeoffray@google.com>2015-04-20 15:20:15 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-04-20 15:20:15 +0000
commit27eac12a66a73eb38b5ccb45b62350cf341299d0 (patch)
tree7cc7eea6dd543720da2921c11f68296f5df37b07 /compiler/optimizing
parente40d82ffe388458c2674ec051f1dd897362692eb (diff)
parentad4450e5c3ffaa9566216cc6fafbf5c11186c467 (diff)
downloadart-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.gz
art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.bz2
art-27eac12a66a73eb38b5ccb45b62350cf341299d0.zip
Merge "Opt compiler: Implement parallel move resolver without using swap."
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc122
-rw-r--r--compiler/optimizing/code_generator_arm64.h14
-rw-r--r--compiler/optimizing/code_generator_x86.h4
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/locations.h23
-rw-r--r--compiler/optimizing/nodes.h8
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc309
-rw-r--r--compiler/optimizing/parallel_move_resolver.h120
-rw-r--r--compiler/optimizing/parallel_move_test.cc344
10 files changed, 756 insertions, 196 deletions
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 06f425ea21..600903621d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -96,10 +96,10 @@ class InvokeDexCallingConventionVisitor {
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
-class ParallelMoveResolverARM : public ParallelMoveResolver {
+class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cfc8f9c2b1..76e2ec6f42 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -425,22 +425,57 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
CodeGenerator::Finalize(allocator);
}
-void ParallelMoveResolverARM64::EmitMove(size_t index) {
- MoveOperands* move = moves_.Get(index);
- codegen_->MoveLocation(move->GetDestination(), move->GetSource());
-}
-
-void ParallelMoveResolverARM64::EmitSwap(size_t index) {
- MoveOperands* move = moves_.Get(index);
- codegen_->SwapLocations(move->GetDestination(), move->GetSource());
+void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
+ // Note: There are 6 kinds of moves:
+ // 1. constant -> GPR/FPR (non-cycle)
+ // 2. constant -> stack (non-cycle)
+ // 3. GPR/FPR -> GPR/FPR
+ // 4. GPR/FPR -> stack
+ // 5. stack -> GPR/FPR
+ // 6. stack -> stack (non-cycle)
+ // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
+ // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
+ // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
+ // dependency.
+ vixl_temps_.Open(GetVIXLAssembler());
+}
+
+void ParallelMoveResolverARM64::FinishEmitNativeCode() {
+ vixl_temps_.Close();
+}
+
+Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
+ DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister ||
+ kind == Location::kStackSlot || kind == Location::kDoubleStackSlot);
+ kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister;
+ Location scratch = GetScratchLocation(kind);
+ if (!scratch.Equals(Location::NoLocation())) {
+ return scratch;
+ }
+ // Allocate from VIXL temp registers.
+ if (kind == Location::kRegister) {
+ scratch = LocationFrom(vixl_temps_.AcquireX());
+ } else {
+ DCHECK(kind == Location::kFpuRegister);
+ scratch = LocationFrom(vixl_temps_.AcquireD());
+ }
+ AddScratchLocation(scratch);
+ return scratch;
}
-void ParallelMoveResolverARM64::RestoreScratch(int reg) {
- __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
+ if (loc.IsRegister()) {
+ vixl_temps_.Release(XRegisterFrom(loc));
+ } else {
+ DCHECK(loc.IsFpuRegister());
+ vixl_temps_.Release(DRegisterFrom(loc));
+ }
+ RemoveScratchLocation(loc);
}
-void ParallelMoveResolverARM64::SpillScratch(int reg) {
- __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::EmitMove(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ codegen_->MoveLocation(move->GetDestination(), move->GetSource());
}
void CodeGeneratorARM64::GenerateFrameEntry() {
@@ -729,10 +764,10 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
if (destination.IsRegister()) {
__ Mov(Register(dst), RegisterFrom(source, type));
} else {
+ DCHECK(destination.IsFpuRegister());
__ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
}
}
-
} else { // The destination is not a register. It must be a stack slot.
DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
if (source.IsRegister() || source.IsFpuRegister()) {
@@ -775,67 +810,6 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
}
}
-void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
- DCHECK(!loc1.IsConstant());
- DCHECK(!loc2.IsConstant());
-
- if (loc1.Equals(loc2)) {
- return;
- }
-
- UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
-
- bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
- bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
- bool is_fp_reg1 = loc1.IsFpuRegister();
- bool is_fp_reg2 = loc2.IsFpuRegister();
-
- if (loc2.IsRegister() && loc1.IsRegister()) {
- Register r1 = XRegisterFrom(loc1);
- Register r2 = XRegisterFrom(loc2);
- Register tmp = temps.AcquireSameSizeAs(r1);
- __ Mov(tmp, r2);
- __ Mov(r2, r1);
- __ Mov(r1, tmp);
- } else if (is_fp_reg2 && is_fp_reg1) {
- FPRegister r1 = DRegisterFrom(loc1);
- FPRegister r2 = DRegisterFrom(loc2);
- FPRegister tmp = temps.AcquireSameSizeAs(r1);
- __ Fmov(tmp, r2);
- __ Fmov(r2, r1);
- __ Fmov(r1, tmp);
- } else if (is_slot1 != is_slot2) {
- MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2);
- Location reg_loc = is_slot1 ? loc2 : loc1;
- CPURegister reg, tmp;
- if (reg_loc.IsFpuRegister()) {
- reg = DRegisterFrom(reg_loc);
- tmp = temps.AcquireD();
- } else {
- reg = XRegisterFrom(reg_loc);
- tmp = temps.AcquireX();
- }
- __ Ldr(tmp, mem);
- __ Str(reg, mem);
- if (reg_loc.IsFpuRegister()) {
- __ Fmov(FPRegister(reg), FPRegister(tmp));
- } else {
- __ Mov(Register(reg), Register(tmp));
- }
- } else if (is_slot1 && is_slot2) {
- MemOperand mem1 = StackOperandFrom(loc1);
- MemOperand mem2 = StackOperandFrom(loc2);
- Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX();
- Register tmp2 = temps.AcquireSameSizeAs(tmp1);
- __ Ldr(tmp1, mem1);
- __ Ldr(tmp2, mem2);
- __ Str(tmp1, mem2);
- __ Str(tmp2, mem1);
- } else {
- LOG(FATAL) << "Unimplemented";
- }
-}
-
void CodeGeneratorARM64::Load(Primitive::Type type,
CPURegister dst,
const MemOperand& src) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9fe60b8adc..5a358671cc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -198,15 +198,17 @@ class LocationsBuilderARM64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
};
-class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
public:
ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
+ protected:
+ void PrepareForEmitNativeCode() OVERRIDE;
+ void FinishEmitNativeCode() OVERRIDE;
+ Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE;
+ void FreeScratchLocation(Location loc) OVERRIDE;
void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
private:
Arm64Assembler* GetAssembler() const;
@@ -215,6 +217,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
}
CodeGeneratorARM64* const codegen_;
+ vixl::UseScratchRegisterScope vixl_temps_;
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
};
@@ -322,7 +325,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
// locations, and is used for optimisation and debugging.
void MoveLocation(Location destination, Location source,
Primitive::Type type = Primitive::kPrimVoid);
- void SwapLocations(Location loc_1, Location loc_2);
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
void LoadCurrentMethod(vixl::Register current_method);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 368ae0fb0e..07476c6850 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -93,10 +93,10 @@ class InvokeDexCallingConventionVisitor {
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
-class ParallelMoveResolverX86 : public ParallelMoveResolver {
+class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index b4876ef161..6cdc82262c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -102,10 +102,10 @@ class SlowPathCodeX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
};
-class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index de876be9ab..c3a99150c4 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -285,17 +285,26 @@ class Location : public ValueObject {
bool Contains(Location other) const {
if (Equals(other)) {
return true;
- } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
- return other.reg() == low() || other.reg() == high();
- } else if (IsRegisterPair() && other.IsRegister()) {
- return other.reg() == low() || other.reg() == high();
- } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
- return (GetStackIndex() == other.GetStackIndex())
- || (GetStackIndex() + 4 == other.GetStackIndex());
+ } else if (IsPair() || IsDoubleStackSlot()) {
+ return ToLow().Equals(other) || ToHigh().Equals(other);
}
return false;
}
+ bool OverlapsWith(Location other) const {
+ // Only check the overlapping case that can happen with our register allocation algorithm.
+ bool overlap = Contains(other) || other.Contains(*this);
+ if (kIsDebugBuild && !overlap) {
+ // Note: These are also overlapping cases. But we are not able to handle them in
+ // ParallelMoveResolverWithSwap. Make sure that we do not meet such case with our compiler.
+ if ((IsPair() && other.IsPair()) || (IsDoubleStackSlot() && other.IsDoubleStackSlot())) {
+ DCHECK(!Contains(other.ToLow()));
+ DCHECK(!Contains(other.ToHigh()));
+ }
+ }
+ return overlap;
+ }
+
const char* DebugString() const {
switch (GetKind()) {
case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c3c35acb7a..d9d15c4b18 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2113,7 +2113,7 @@ class HIntConstant : public HConstant {
friend class HGraph;
ART_FRIEND_TEST(GraphTest, InsertInstructionBefore);
- ART_FRIEND_TEST(ParallelMoveTest, ConstantLast);
+ ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast);
DISALLOW_COPY_AND_ASSIGN(HIntConstant);
};
@@ -3526,7 +3526,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
// True if this blocks a move from the given location.
bool Blocks(Location loc) const {
- return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
+ return !IsEliminated() && source_.OverlapsWith(loc);
}
// A move is redundant if it's been eliminated, if its source and
@@ -3595,8 +3595,8 @@ class HParallelMove : public HTemplateInstruction<0> {
}
}
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
- << "Same destination for two moves in a parallel move.";
+ DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination()))
+ << "Overlapped destination for two moves in a parallel move.";
}
}
moves_.Add(MoveOperands(source, destination, type, instruction));
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index ad92ca59a1..54ea6f19d4 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -17,11 +17,23 @@
#include "parallel_move_resolver.h"
#include "nodes.h"
-#include "locations.h"
namespace art {
-void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+ // Perform a linear sweep of the moves to add them to the initial list of
+ // moves to perform, ignoring any move that is redundant (the source is
+ // the same as the destination, the destination is ignored and
+ // unallocated, or the move was already eliminated).
+ for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+ MoveOperands* move = parallel_move->MoveOperandsAt(i);
+ if (!move->IsRedundant()) {
+ moves_.Add(move);
+ }
+ }
+}
+
+void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
DCHECK(moves_.IsEmpty());
// Build up a worklist of moves.
BuildInitialMoveList(parallel_move);
@@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
moves_.Reset();
}
-
-void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
- // Perform a linear sweep of the moves to add them to the initial list of
- // moves to perform, ignoring any move that is redundant (the source is
- // the same as the destination, the destination is ignored and
- // unallocated, or the move was already eliminated).
- for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
- MoveOperands* move = parallel_move->MoveOperandsAt(i);
- if (!move->IsRedundant()) {
- moves_.Add(move);
- }
- }
-}
-
Location LowOf(Location location) {
if (location.IsRegisterPair()) {
return Location::RegisterLocation(location.low());
@@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati
}
}
-MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We
// mark a move as "pending" on entry to PerformMove in order to detect
@@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
}
}
-bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
for (size_t i = 0; i < moves_.Size(); ++i) {
if (moves_.Get(i)->Blocks(loc)) {
return false;
@@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) {
return false;
}
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
- int register_count,
- int if_scratch,
- bool* spilled) {
+int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
+ int register_count,
+ int if_scratch,
+ bool* spilled) {
DCHECK_NE(blocked, if_scratch);
int scratch = -1;
for (int reg = 0; reg < register_count; ++reg) {
@@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
- ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
reg_(kNoRegister),
spilled_(false) {
@@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
-ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
}
}
+void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+ DCHECK(moves_.IsEmpty());
+ DCHECK(scratches_.IsEmpty());
+
+ // Backend dependent initialization.
+ PrepareForEmitNativeCode();
+
+ // Build up a worklist of moves.
+ BuildInitialMoveList(parallel_move);
+
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& move = *moves_.Get(i);
+ // Skip constants to perform them last. They don't block other moves and
+ // skipping such moves with register destinations keeps those registers
+ // free for the whole algorithm.
+ if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+ PerformMove(i);
+ }
+ }
+
+ // Perform the moves with constant sources and register destinations with UpdateMoveSource()
+ // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
+ // from changing the constant sources to stack locations.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ Location destination = move->GetDestination();
+ if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
+ Location source = move->GetSource();
+ EmitMove(i);
+ move->Eliminate();
+ // This may introduce additional instruction dependency, but reduce number
+ // of moves and possible literal loads. For example,
+ // Original moves:
+ // 1234.5678 -> D0
+ // 1234.5678 -> D1
+ // Updated moves:
+ // 1234.5678 -> D0
+ // D0 -> D1
+ UpdateMoveSource(source, destination);
+ }
+ }
+
+ // Perform the rest of the moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (!move->IsEliminated()) {
+ EmitMove(i);
+ move->Eliminate();
+ }
+ }
+
+ // All pending moves that we have added for resolve cycles should be performed.
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+
+ // Backend dependent cleanup.
+ FinishEmitNativeCode();
+
+ moves_.Reset();
+ scratches_.Reset();
+}
+
+Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location loc = scratches_.Get(i);
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ Location loc = moves_.Get(i)->GetDestination();
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ return Location::NoLocation();
+}
+
+void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
+ if (kIsDebugBuild) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ DCHECK(!loc.Equals(scratches_.Get(i)));
+ }
+ }
+ scratches_.Add(loc);
+}
+
+void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
+ DCHECK(!IsBlockedByMoves(loc));
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ if (loc.Equals(scratches_.Get(i))) {
+ scratches_.DeleteAt(i);
+ break;
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
+ // Each call to this function performs a move and deletes it from the move
+ // graph. We first recursively perform any move blocking this one. We mark
+ // a move as "pending" on entry to PerformMove in order to detect cycles
+ // in the move graph. We use scratch location to resolve cycles, also
+ // additional pending moves might be added. After move has been performed,
+ // we will update source operand in the move graph to reduce dependencies in
+ // the graph.
+
+ MoveOperands* move = moves_.Get(index);
+ DCHECK(!move->IsPending());
+ DCHECK(!move->IsEliminated());
+ if (move->IsRedundant()) {
+ // Previous operations on the list of moves have caused this particular move
+ // to become a no-op, so we can safely eliminate it. Consider for example
+ // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
+ // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
+ // used as the scratch location, the move (1 -> 2) will occur while resolving
+ // the cycle. When that move is emitted, the code will update moves with a '1'
+ // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
+ // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
+ // eliminated here.
+ move->Eliminate();
+ return;
+ }
+
+ // Clear this move's destination to indicate a pending move. The actual
+ // destination is saved in a stack-allocated local. Recursion may allow
+ // multiple moves to be pending.
+ DCHECK(!move->GetSource().IsInvalid());
+ Location destination = move->MarkPending();
+
+ // Perform a depth-first traversal of the move graph to resolve
+ // dependencies. Any unperformed, unpending move with a source the same
+ // as this one's destination blocks this one so recursively perform all
+ // such moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& other_move = *moves_.Get(i);
+ if (other_move.Blocks(destination) && !other_move.IsPending()) {
+ PerformMove(i);
+ }
+ }
+
+ // We are about to resolve this move and don't need it marked as
+ // pending, so restore its destination.
+ move->ClearPending(destination);
+
+ // No one else should write to the move destination when the it is pending.
+ DCHECK(!move->IsRedundant());
+
+ Location source = move->GetSource();
+ // The move may be blocked on several pending moves, in case we have a cycle.
+ if (IsBlockedByMoves(destination)) {
+ // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
+ // sequence:
+ // (C -> scratch) # Emit right now.
+ // (A -> B) (B -> C) # Unblocked.
+ // (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
+ Location::Kind kind = source.GetKind();
+ DCHECK_NE(kind, Location::kConstant);
+ Location scratch = AllocateScratchLocationFor(kind);
+ // We only care about the move size.
+ Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
+ // Perform (C -> scratch)
+ move->SetDestination(scratch);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, scratch);
+ // Add (scratch -> A).
+ AddPendingMove(scratch, destination, type);
+ } else {
+ // This move is not blocked.
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, destination);
+ }
+
+ // Moves in the pending list should not block any other moves. But performing
+ // unblocked moves in the pending list can free scratch registers, so we do this
+ // as early as possible.
+ MoveOperands* pending_move;
+ while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
+ Location pending_source = pending_move->GetSource();
+ Location pending_destination = pending_move->GetDestination();
+ // We do not depend on the pending move index. So just delete the move instead
+ // of eliminating it to make the pending list cleaner.
+ DeletePendingMove(pending_move);
+ move->SetSource(pending_source);
+ move->SetDestination(pending_destination);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(pending_source, pending_destination);
+ // Free any unblocked locations in the scratch location list.
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location scratch = scratches_.Get(i);
+ // Only scratch overlapping with performed move source can be unblocked.
+ if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
+ FreeScratchLocation(pending_source);
+ }
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
+ // This function is used to reduce the dependencies in the graph after
+ // (from -> to) has been performed. Since we ensure there is no move with the same
+ // destination, (to -> X) can not be blocked while (from -> X) might still be
+ // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
+ // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
+ // a dependency between the two. If we update the source location from 1 to 2, we
+ // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
+ //
+ // This is not something we must do, but we can use fewer scratch locations with
+ // this trick. For example, we can avoid using additional scratch locations for
+ // moves (0 -> 1), (1 -> 2), (1 -> 0).
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (move->GetSource().Equals(from)) {
+ move->SetSource(to);
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
+ Location destination, Primitive::Type type) {
+ pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+}
+
+void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
+ pending_moves_.Delete(move);
+}
+
+MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ MoveOperands* move = pending_moves_.Get(i);
+ Location destination = move->GetDestination();
+ // Only moves with destination overlapping with input loc can be unblocked.
+ if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
+ return move;
+ }
+ }
+ return nullptr;
+}
+
+bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ if (pending_moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ if (moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// So far it is only used for debugging purposes to make sure all pending moves
+// have been performed.
+size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
+ return pending_moves_.Size();
+}
+
} // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 95f8ad5b74..e89417df7d 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -19,30 +19,47 @@
#include "base/value_object.h"
#include "utils/growable_array.h"
+#include "locations.h"
namespace art {
class HParallelMove;
-class Location;
class MoveOperands;
-/**
- * Helper class to resolve a set of parallel moves. Architecture dependent code
- * generator must have their own subclass that implements the `EmitMove` and `EmitSwap`
- * operations.
- */
+// Helper classes to resolve a set of parallel moves. Architecture dependent code generator must
+// have their own subclass that implements corresponding virtual functions.
class ParallelMoveResolver : public ValueObject {
public:
explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
virtual ~ParallelMoveResolver() {}
// Resolve a set of parallel moves, emitting assembler instructions.
- void EmitNativeCode(HParallelMove* parallel_move);
+ virtual void EmitNativeCode(HParallelMove* parallel_move) = 0;
+
+ protected:
+ // Build the initial list of moves.
+ void BuildInitialMoveList(HParallelMove* parallel_move);
+
+ GrowableArray<MoveOperands*> moves_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+};
+
+// This helper class uses swap to resolve dependencies and may emit swap.
+class ParallelMoveResolverWithSwap : public ParallelMoveResolver {
+ public:
+ explicit ParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolver(allocator) {}
+ virtual ~ParallelMoveResolverWithSwap() {}
+
+ // Resolve a set of parallel moves, emitting assembler instructions.
+ void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
protected:
class ScratchRegisterScope : public ValueObject {
public:
- ScratchRegisterScope(ParallelMoveResolver* resolver,
+ ScratchRegisterScope(ParallelMoveResolverWithSwap* resolver,
int blocked,
int if_scratch,
int number_of_registers);
@@ -52,11 +69,12 @@ class ParallelMoveResolver : public ValueObject {
bool IsSpilled() const { return spilled_; }
private:
- ParallelMoveResolver* resolver_;
+ ParallelMoveResolverWithSwap* resolver_;
int reg_;
bool spilled_;
};
+ // Return true if the location can be scratched.
bool IsScratchLocation(Location loc);
// Allocate a scratch register for performing a move. The method will try to use
@@ -72,15 +90,9 @@ class ParallelMoveResolver : public ValueObject {
virtual void SpillScratch(int reg) = 0;
virtual void RestoreScratch(int reg) = 0;
- // List of moves not yet resolved.
- GrowableArray<MoveOperands*> moves_;
-
static constexpr int kNoRegister = -1;
private:
- // Build the initial list of moves.
- void BuildInitialMoveList(HParallelMove* parallel_move);
-
// Perform the move at the moves_ index in question (possibly requiring
// other moves to satisfy dependencies).
//
@@ -99,7 +111,83 @@ class ParallelMoveResolver : public ValueObject {
// the right value.
MoveOperands* PerformMove(size_t index);
- DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverWithSwap);
+};
+
+// This helper class uses additional scratch registers to resolve dependencies. It supports all kind
+// of dependency cycles and does not care about the register layout.
+class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
+ public:
+ explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolver(allocator), scratches_(allocator, 32),
+ pending_moves_(allocator, 8), allocator_(allocator) {}
+ virtual ~ParallelMoveResolverNoSwap() {}
+
+ // Resolve a set of parallel moves, emitting assembler instructions.
+ void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
+
+ protected:
+ // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent
+ // initialization here.
+ virtual void PrepareForEmitNativeCode() = 0;
+
+ // Called at the end of EmitNativeCode(). A subclass may put some architecture dependent cleanup
+ // here. All scratch locations will be removed after this call.
+ virtual void FinishEmitNativeCode() = 0;
+
+ // Allocate a scratch location to perform a move from input kind of location. A subclass should
+ // implement this to get the best fit location. If there is no suitable physical register, it can
+ // also return a stack slot.
+ virtual Location AllocateScratchLocationFor(Location::Kind kind) = 0;
+
+ // Called after a move which takes a scratch location as source. A subclass can defer the cleanup
+ // to FinishEmitNativeCode().
+ virtual void FreeScratchLocation(Location loc) = 0;
+
+ // Emit a move.
+ virtual void EmitMove(size_t index) = 0;
+
+ // Return a scratch location from the moves which exactly matches the kind.
+ // Return Location::NoLocation() if no matching scratch location can be found.
+ Location GetScratchLocation(Location::Kind kind);
+
+ // Add a location to the scratch list which can be returned from GetScratchLocation() to resolve
+ // dependency cycles.
+ void AddScratchLocation(Location loc);
+
+ // Remove a location from the scratch list.
+ void RemoveScratchLocation(Location loc);
+
+ // List of scratch locations.
+ GrowableArray<Location> scratches_;
+
+ private:
+ // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy
+ // dependencies).
+ void PerformMove(size_t index);
+
+ void UpdateMoveSource(Location from, Location to);
+
+ void AddPendingMove(Location source, Location destination, Primitive::Type type);
+
+ void DeletePendingMove(MoveOperands* move);
+
+ // Find a move that may be unblocked after (loc -> XXX) is performed.
+ MoveOperands* GetUnblockedPendingMove(Location loc);
+
+ // Return true if the location is blocked by outstanding moves.
+ bool IsBlockedByMoves(Location loc);
+
+ // Return the number of pending moves.
+ size_t GetNumberOfPendingMoves();
+
+ // Additional pending moves which might be added to resolve dependency cycle.
+ GrowableArray<MoveOperands*> pending_moves_;
+
+ // Used to allocate pending MoveOperands.
+ ArenaAllocator* const allocator_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverNoSwap);
};
} // namespace art
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 95cca5172b..f8f70105cf 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -19,27 +19,41 @@
#include "parallel_move_resolver.h"
#include "gtest/gtest.h"
+#include "gtest/gtest-typed-test.h"
namespace art {
-class TestParallelMoveResolver : public ParallelMoveResolver {
- public:
- explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {}
-
- void Dump(Location location) {
- if (location.IsConstant()) {
- message_ << "C";
- } else if (location.IsPair()) {
- message_ << location.low() << "," << location.high();
- } else if (location.IsRegister()) {
- message_ << location.reg();
- } else if (location.IsStackSlot()) {
- message_ << location.GetStackIndex() << "(sp)";
- } else {
- message_ << "2x" << location.GetStackIndex() << "(sp)";
- DCHECK(location.IsDoubleStackSlot()) << location;
- }
+constexpr int kScratchRegisterStartIndexForTest = 100;
+
+static void DumpRegisterForTest(std::ostream& os, int reg) {
+ if (reg >= kScratchRegisterStartIndexForTest) {
+ os << "T" << reg - kScratchRegisterStartIndexForTest;
+ } else {
+ os << reg;
}
+}
+
+static void DumpLocationForTest(std::ostream& os, Location location) {
+ if (location.IsConstant()) {
+ os << "C";
+ } else if (location.IsPair()) {
+ DumpRegisterForTest(os, location.low());
+ os << ",";
+ DumpRegisterForTest(os, location.high());
+ } else if (location.IsRegister()) {
+ DumpRegisterForTest(os, location.reg());
+ } else if (location.IsStackSlot()) {
+ os << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsDoubleStackSlot())<< location;
+ os << "2x" << location.GetStackIndex() << "(sp)";
+ }
+}
+
+class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
+ public:
+ explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolverWithSwap(allocator) {}
void EmitMove(size_t index) OVERRIDE {
MoveOperands* move = moves_.Get(index);
@@ -47,9 +61,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << " ";
}
message_ << "(";
- Dump(move->GetSource());
+ DumpLocationForTest(message_, move->GetSource());
message_ << " -> ";
- Dump(move->GetDestination());
+ DumpLocationForTest(message_, move->GetDestination());
message_ << ")";
}
@@ -59,9 +73,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << " ";
}
message_ << "(";
- Dump(move->GetSource());
+ DumpLocationForTest(message_, move->GetSource());
message_ << " <-> ";
- Dump(move->GetDestination());
+ DumpLocationForTest(message_, move->GetDestination());
message_ << ")";
}
@@ -76,7 +90,64 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
std::ostringstream message_;
- DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver);
+ DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverWithSwap);
+};
+
+class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
+ public:
+ explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {}
+
+ void PrepareForEmitNativeCode() OVERRIDE {
+ scratch_index_ = kScratchRegisterStartIndexForTest;
+ }
+
+ void FinishEmitNativeCode() OVERRIDE {}
+
+ Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE {
+ if (kind == Location::kStackSlot || kind == Location::kFpuRegister ||
+ kind == Location::kRegister) {
+ kind = Location::kRegister;
+ } else {
+ // Allocate register pair for double stack slot which simulates 32-bit backend's behavior.
+ kind = Location::kRegisterPair;
+ }
+ Location scratch = GetScratchLocation(kind);
+ if (scratch.Equals(Location::NoLocation())) {
+ AddScratchLocation(Location::RegisterLocation(scratch_index_));
+ AddScratchLocation(Location::RegisterLocation(scratch_index_ + 1));
+ AddScratchLocation(Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1));
+ scratch = (kind == Location::kRegister) ? Location::RegisterLocation(scratch_index_)
+ : Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1);
+ scratch_index_ += 2;
+ }
+ return scratch;
+ }
+
+ void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
+
+ void EmitMove(size_t index) OVERRIDE {
+ MoveOperands* move = moves_.Get(index);
+ if (!message_.str().empty()) {
+ message_ << " ";
+ }
+ message_ << "(";
+ DumpLocationForTest(message_, move->GetSource());
+ message_ << " -> ";
+ DumpLocationForTest(message_, move->GetDestination());
+ message_ << ")";
+ }
+
+ std::string GetMessage() const {
+ return message_.str();
+ }
+
+ private:
+ std::ostringstream message_;
+
+ int scratch_index_;
+
+ DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverNoSwap);
};
static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
@@ -93,55 +164,102 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
return moves;
}
-TEST(ParallelMoveTest, Dependency) {
+template <typename T>
+class ParallelMoveTest : public ::testing::Test {
+ public:
+ static const bool has_swap;
+};
+
+template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true;
+template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false;
+
+typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>
+ ParallelMoveResolverTestTypes;
+
+TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
+
+
+TYPED_TEST(ParallelMoveTest, Dependency) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> 3) (1 -> 2) (0 -> 1) (2 -> 4)", resolver.GetMessage().c_str());
+ }
}
}
-TEST(ParallelMoveTest, Swap) {
+TYPED_TEST(ParallelMoveTest, Cycle) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1) (2 -> 0)", resolver.GetMessage().c_str());
+ }
+ }
+
+ {
+ TypeParam resolver(&allocator);
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {0, 2}};
+ resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0 -> 2) (1 -> 0) (2 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(4 -> T0) (3 -> 4) (2 -> 3) (1 -> 2) (0 -> 1) (T0 -> 0)",
+ resolver.GetMessage().c_str());
+ }
}
}
-TEST(ParallelMoveTest, ConstantLast) {
+TYPED_TEST(ParallelMoveTest, ConstantLast) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::ConstantLocation(new (&allocator) HIntConstant(0)),
@@ -157,12 +275,12 @@ TEST(ParallelMoveTest, ConstantLast) {
ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
}
-TEST(ParallelMoveTest, Pairs) {
+TYPED_TEST(ParallelMoveTest, Pairs) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -179,7 +297,7 @@ TEST(ParallelMoveTest, Pairs) {
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -196,7 +314,7 @@ TEST(ParallelMoveTest, Pairs) {
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -209,10 +327,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> T0) (0,1 -> 2,3) (T0 -> 0)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -230,10 +352,15 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -251,10 +378,15 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -272,10 +404,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(7 -> T0) (2 -> 7) (0,1 -> 2,3) (T0 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -288,10 +424,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2,3 -> T0,T1) (0,1 -> 2,3) (T0,T1 -> 0,1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(2, 3),
@@ -304,12 +444,85 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (2,3 -> 0,1) (T0,T1 -> 2,3)", resolver.GetMessage().c_str());
+ }
+ }
+}
+
+TYPED_TEST(ParallelMoveTest, MultiCycles) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ {
+ TypeParam resolver(&allocator);
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {2, 3}, {3, 2}};
+ resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 <-> 0) (3 <-> 2)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0) (3 -> T0) (2 -> 3) (T0 -> 2)",
+ resolver.GetMessage().c_str());
+ }
+ }
+ {
+ TypeParam resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(0),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(3),
+ Location::RegisterLocation(1),
+ Primitive::kPrimInt,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> T0) (3 -> T1) (0,1 -> 2,3) (T0 -> 0) (T1 -> 1)",
+ resolver.GetMessage().c_str());
+ }
+ }
+ {
+ TypeParam resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(0),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(3),
+ Location::RegisterLocation(1),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(3 -> T0) (0,1 -> T2,T3) (T0 -> 1) (2 -> 0) (T2,T3 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
// Test involving registers used in single context and pair context.
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(10),
@@ -327,17 +540,22 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2x32(sp) -> T0,T1) (4,5 -> 2x32(sp)) (10 -> 5) (T0,T1 -> 10,11)",
+ resolver.GetMessage().c_str());
+ }
}
}
// Test that we do 64bits moves before 32bits moves.
-TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(0),
@@ -355,11 +573,16 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(48(sp) -> T0) (1 -> 48(sp)) (0 -> 1) (T0 -> 0)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -377,7 +600,12 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2x32(sp) -> T0,T1) (2,3 -> 2x32(sp)) (0,1 -> 2,3) (T0,T1 -> 0,1)",
+ resolver.GetMessage().c_str());
+ }
}
}