diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2015-04-20 15:20:15 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-04-20 15:20:15 +0000 |
commit | 27eac12a66a73eb38b5ccb45b62350cf341299d0 (patch) | |
tree | 7cc7eea6dd543720da2921c11f68296f5df37b07 /compiler/optimizing | |
parent | e40d82ffe388458c2674ec051f1dd897362692eb (diff) | |
parent | ad4450e5c3ffaa9566216cc6fafbf5c11186c467 (diff) | |
download | art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.gz art-27eac12a66a73eb38b5ccb45b62350cf341299d0.tar.bz2 art-27eac12a66a73eb38b5ccb45b62350cf341299d0.zip |
Merge "Opt compiler: Implement parallel move resolver without using swap."
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 122 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 14 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 23 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/parallel_move_resolver.cc | 309 | ||||
-rw-r--r-- | compiler/optimizing/parallel_move_resolver.h | 120 | ||||
-rw-r--r-- | compiler/optimizing/parallel_move_test.cc | 344 |
10 files changed, 756 insertions, 196 deletions
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 06f425ea21..600903621d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -96,10 +96,10 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; -class ParallelMoveResolverARM : public ParallelMoveResolver { +class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index cfc8f9c2b1..76e2ec6f42 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -425,22 +425,57 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); - codegen_->MoveLocation(move->GetDestination(), move->GetSource()); -} - -void ParallelMoveResolverARM64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); - codegen_->SwapLocations(move->GetDestination(), move->GetSource()); +void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { + // Note: There are 6 kinds of moves: + // 1. constant -> GPR/FPR (non-cycle) + // 2. constant -> stack (non-cycle) + // 3. GPR/FPR -> GPR/FPR + // 4. GPR/FPR -> stack + // 5. stack -> GPR/FPR + // 6. stack -> stack (non-cycle) + // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 + // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting + // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the + // dependency. + vixl_temps_.Open(GetVIXLAssembler()); +} + +void ParallelMoveResolverARM64::FinishEmitNativeCode() { + vixl_temps_.Close(); +} + +Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { + DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister || + kind == Location::kStackSlot || kind == Location::kDoubleStackSlot); + kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister; + Location scratch = GetScratchLocation(kind); + if (!scratch.Equals(Location::NoLocation())) { + return scratch; + } + // Allocate from VIXL temp registers. + if (kind == Location::kRegister) { + scratch = LocationFrom(vixl_temps_.AcquireX()); + } else { + DCHECK(kind == Location::kFpuRegister); + scratch = LocationFrom(vixl_temps_.AcquireD()); + } + AddScratchLocation(scratch); + return scratch; } -void ParallelMoveResolverARM64::RestoreScratch(int reg) { - __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize)); +void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { + if (loc.IsRegister()) { + vixl_temps_.Release(XRegisterFrom(loc)); + } else { + DCHECK(loc.IsFpuRegister()); + vixl_temps_.Release(DRegisterFrom(loc)); + } + RemoveScratchLocation(loc); } -void ParallelMoveResolverARM64::SpillScratch(int reg) { - __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize)); +void ParallelMoveResolverARM64::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + codegen_->MoveLocation(move->GetDestination(), move->GetSource()); } void CodeGeneratorARM64::GenerateFrameEntry() { @@ -729,10 +764,10 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri if (destination.IsRegister()) { __ Mov(Register(dst), RegisterFrom(source, type)); } else { + DCHECK(destination.IsFpuRegister()); __ Fmov(FPRegister(dst), FPRegisterFrom(source, type)); } } - } else { // The destination is not a register. It must be a stack slot. DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); if (source.IsRegister() || source.IsFpuRegister()) { @@ -775,67 +810,6 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri } } -void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { - DCHECK(!loc1.IsConstant()); - DCHECK(!loc2.IsConstant()); - - if (loc1.Equals(loc2)) { - return; - } - - UseScratchRegisterScope temps(GetAssembler()->vixl_masm_); - - bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); - bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); - bool is_fp_reg1 = loc1.IsFpuRegister(); - bool is_fp_reg2 = loc2.IsFpuRegister(); - - if (loc2.IsRegister() && loc1.IsRegister()) { - Register r1 = XRegisterFrom(loc1); - Register r2 = XRegisterFrom(loc2); - Register tmp = temps.AcquireSameSizeAs(r1); - __ Mov(tmp, r2); - __ Mov(r2, r1); - __ Mov(r1, tmp); - } else if (is_fp_reg2 && is_fp_reg1) { - FPRegister r1 = DRegisterFrom(loc1); - FPRegister r2 = DRegisterFrom(loc2); - FPRegister tmp = temps.AcquireSameSizeAs(r1); - __ Fmov(tmp, r2); - __ Fmov(r2, r1); - __ Fmov(r1, tmp); - } else if (is_slot1 != is_slot2) { - MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2); - Location reg_loc = is_slot1 ? loc2 : loc1; - CPURegister reg, tmp; - if (reg_loc.IsFpuRegister()) { - reg = DRegisterFrom(reg_loc); - tmp = temps.AcquireD(); - } else { - reg = XRegisterFrom(reg_loc); - tmp = temps.AcquireX(); - } - __ Ldr(tmp, mem); - __ Str(reg, mem); - if (reg_loc.IsFpuRegister()) { - __ Fmov(FPRegister(reg), FPRegister(tmp)); - } else { - __ Mov(Register(reg), Register(tmp)); - } - } else if (is_slot1 && is_slot2) { - MemOperand mem1 = StackOperandFrom(loc1); - MemOperand mem2 = StackOperandFrom(loc2); - Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX(); - Register tmp2 = temps.AcquireSameSizeAs(tmp1); - __ Ldr(tmp1, mem1); - __ Ldr(tmp2, mem2); - __ Str(tmp1, mem2); - __ Str(tmp2, mem1); - } else { - LOG(FATAL) << "Unimplemented"; - } -} - void CodeGeneratorARM64::Load(Primitive::Type type, CPURegister dst, const MemOperand& src) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9fe60b8adc..5a358671cc 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -198,15 +198,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; -class ParallelMoveResolverARM64 : public ParallelMoveResolver { +class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { public: ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {} + protected: + void PrepareForEmitNativeCode() OVERRIDE; + void FinishEmitNativeCode() OVERRIDE; + Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE; + void FreeScratchLocation(Location loc) OVERRIDE; void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; private: Arm64Assembler* GetAssembler() const; @@ -215,6 +217,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { } CodeGeneratorARM64* const codegen_; + vixl::UseScratchRegisterScope vixl_temps_; DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64); }; @@ -322,7 +325,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // locations, and is used for optimisation and debugging. void MoveLocation(Location destination, Location source, Primitive::Type type = Primitive::kPrimVoid); - void SwapLocations(Location loc_1, Location loc_2); void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 368ae0fb0e..07476c6850 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -93,10 +93,10 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; -class ParallelMoveResolverX86 : public ParallelMoveResolver { +class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index b4876ef161..6cdc82262c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -102,10 +102,10 @@ class SlowPathCodeX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); }; -class ParallelMoveResolverX86_64 : public ParallelMoveResolver { +class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index de876be9ab..c3a99150c4 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -285,17 +285,26 @@ class Location : public ValueObject { bool Contains(Location other) const { if (Equals(other)) { return true; - } else if (IsFpuRegisterPair() && other.IsFpuRegister()) { - return other.reg() == low() || other.reg() == high(); - } else if (IsRegisterPair() && other.IsRegister()) { - return other.reg() == low() || other.reg() == high(); - } else if (IsDoubleStackSlot() && other.IsStackSlot()) { - return (GetStackIndex() == other.GetStackIndex()) - || (GetStackIndex() + 4 == other.GetStackIndex()); + } else if (IsPair() || IsDoubleStackSlot()) { + return ToLow().Equals(other) || ToHigh().Equals(other); } return false; } + bool OverlapsWith(Location other) const { + // Only check the overlapping case that can happen with our register allocation algorithm. + bool overlap = Contains(other) || other.Contains(*this); + if (kIsDebugBuild && !overlap) { + // Note: These are also overlapping cases. But we are not able to handle them in + // ParallelMoveResolverWithSwap. Make sure that we do not meet such case with our compiler. + if ((IsPair() && other.IsPair()) || (IsDoubleStackSlot() && other.IsDoubleStackSlot())) { + DCHECK(!Contains(other.ToLow())); + DCHECK(!Contains(other.ToHigh())); + } + } + return overlap; + } + const char* DebugString() const { switch (GetKind()) { case kInvalid: return "I"; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c3c35acb7a..d9d15c4b18 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2113,7 +2113,7 @@ class HIntConstant : public HConstant { friend class HGraph; ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); - ART_FRIEND_TEST(ParallelMoveTest, ConstantLast); + ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; @@ -3526,7 +3526,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // True if this blocks a move from the given location. bool Blocks(Location loc) const { - return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_)); + return !IsEliminated() && source_.OverlapsWith(loc); } // A move is redundant if it's been eliminated, if its source and @@ -3595,8 +3595,8 @@ class HParallelMove : public HTemplateInstruction<0> { } } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) - << "Same destination for two moves in a parallel move."; + DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + << "Overlapped destination for two moves in a parallel move."; } } moves_.Add(MoveOperands(source, destination, type, instruction)); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index ad92ca59a1..54ea6f19d4 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -17,11 +17,23 @@ #include "parallel_move_resolver.h" #include "nodes.h" -#include "locations.h" namespace art { -void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { +void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { + // Perform a linear sweep of the moves to add them to the initial list of + // moves to perform, ignoring any move that is redundant (the source is + // the same as the destination, the destination is ignored and + // unallocated, or the move was already eliminated). + for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { + MoveOperands* move = parallel_move->MoveOperandsAt(i); + if (!move->IsRedundant()) { + moves_.Add(move); + } + } +} + +void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK(moves_.IsEmpty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); @@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { moves_.Reset(); } - -void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { - // Perform a linear sweep of the moves to add them to the initial list of - // moves to perform, ignoring any move that is redundant (the source is - // the same as the destination, the destination is ignored and - // unallocated, or the move was already eliminated). - for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { - MoveOperands* move = parallel_move->MoveOperandsAt(i); - if (!move->IsRedundant()) { - moves_.Add(move); - } - } -} - Location LowOf(Location location) { if (location.IsRegisterPair()) { return Location::RegisterLocation(location.low()); @@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati } } -MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { +MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We // mark a move as "pending" on entry to PerformMove in order to detect @@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { } } -bool ParallelMoveResolver::IsScratchLocation(Location loc) { +bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { for (size_t i = 0; i < moves_.Size(); ++i) { if (moves_.Get(i)->Blocks(loc)) { return false; @@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) { return false; } -int ParallelMoveResolver::AllocateScratchRegister(int blocked, - int register_count, - int if_scratch, - bool* spilled) { +int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked, + int register_count, + int if_scratch, + bool* spilled) { DCHECK_NE(blocked, if_scratch); int scratch = -1; for (int reg = 0; reg < register_count; ++reg) { @@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } -ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( - ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) +ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), reg_(kNoRegister), spilled_(false) { @@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } -ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { +ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); } } +void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + DCHECK(moves_.IsEmpty()); + DCHECK(scratches_.IsEmpty()); + + // Backend dependent initialization. + PrepareForEmitNativeCode(); + + // Build up a worklist of moves. + BuildInitialMoveList(parallel_move); + + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + // Skip constants to perform them last. They don't block other moves and + // skipping such moves with register destinations keeps those registers + // free for the whole algorithm. + if (!move.IsEliminated() && !move.GetSource().IsConstant()) { + PerformMove(i); + } + } + + // Perform the moves with constant sources and register destinations with UpdateMoveSource() + // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit + // from changing the constant sources to stack locations. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + Location destination = move->GetDestination(); + if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { + Location source = move->GetSource(); + EmitMove(i); + move->Eliminate(); + // This may introduce additional instruction dependency, but reduce number + // of moves and possible literal loads. For example, + // Original moves: + // 1234.5678 -> D0 + // 1234.5678 -> D1 + // Updated moves: + // 1234.5678 -> D0 + // D0 -> D1 + UpdateMoveSource(source, destination); + } + } + + // Perform the rest of the moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (!move->IsEliminated()) { + EmitMove(i); + move->Eliminate(); + } + } + + // All pending moves that we have added for resolve cycles should be performed. + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + + // Backend dependent cleanup. + FinishEmitNativeCode(); + + moves_.Reset(); + scratches_.Reset(); +} + +Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location loc = scratches_.Get(i); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + Location loc = moves_.Get(i)->GetDestination(); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + return Location::NoLocation(); +} + +void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { + if (kIsDebugBuild) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + DCHECK(!loc.Equals(scratches_.Get(i))); + } + } + scratches_.Add(loc); +} + +void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { + DCHECK(!IsBlockedByMoves(loc)); + for (size_t i = 0; i < scratches_.Size(); ++i) { + if (loc.Equals(scratches_.Get(i))) { + scratches_.DeleteAt(i); + break; + } + } +} + +void ParallelMoveResolverNoSwap::PerformMove(size_t index) { + // Each call to this function performs a move and deletes it from the move + // graph. We first recursively perform any move blocking this one. We mark + // a move as "pending" on entry to PerformMove in order to detect cycles + // in the move graph. We use scratch location to resolve cycles, also + // additional pending moves might be added. After move has been performed, + // we will update source operand in the move graph to reduce dependencies in + // the graph. + + MoveOperands* move = moves_.Get(index); + DCHECK(!move->IsPending()); + DCHECK(!move->IsEliminated()); + if (move->IsRedundant()) { + // Previous operations on the list of moves have caused this particular move + // to become a no-op, so we can safely eliminate it. Consider for example + // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will + // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is + // used as the scratch location, the move (1 -> 2) will occur while resolving + // the cycle. When that move is emitted, the code will update moves with a '1' + // as their source to use '2' instead (see `UpdateMoveSource()`. In our example + // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be + // eliminated here. + move->Eliminate(); + return; + } + + // Clear this move's destination to indicate a pending move. The actual + // destination is saved in a stack-allocated local. Recursion may allow + // multiple moves to be pending. + DCHECK(!move->GetSource().IsInvalid()); + Location destination = move->MarkPending(); + + // Perform a depth-first traversal of the move graph to resolve + // dependencies. Any unperformed, unpending move with a source the same + // as this one's destination blocks this one so recursively perform all + // such moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination) && !other_move.IsPending()) { + PerformMove(i); + } + } + + // We are about to resolve this move and don't need it marked as + // pending, so restore its destination. + move->ClearPending(destination); + + // No one else should write to the move destination when the it is pending. + DCHECK(!move->IsRedundant()); + + Location source = move->GetSource(); + // The move may be blocked on several pending moves, in case we have a cycle. + if (IsBlockedByMoves(destination)) { + // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following + // sequence: + // (C -> scratch) # Emit right now. + // (A -> B) (B -> C) # Unblocked. + // (scratch -> A) # Add to pending_moves_, blocked by (A -> B). + Location::Kind kind = source.GetKind(); + DCHECK_NE(kind, Location::kConstant); + Location scratch = AllocateScratchLocationFor(kind); + // We only care about the move size. + Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt; + // Perform (C -> scratch) + move->SetDestination(scratch); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, scratch); + // Add (scratch -> A). + AddPendingMove(scratch, destination, type); + } else { + // This move is not blocked. + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, destination); + } + + // Moves in the pending list should not block any other moves. But performing + // unblocked moves in the pending list can free scratch registers, so we do this + // as early as possible. + MoveOperands* pending_move; + while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) { + Location pending_source = pending_move->GetSource(); + Location pending_destination = pending_move->GetDestination(); + // We do not depend on the pending move index. So just delete the move instead + // of eliminating it to make the pending list cleaner. + DeletePendingMove(pending_move); + move->SetSource(pending_source); + move->SetDestination(pending_destination); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(pending_source, pending_destination); + // Free any unblocked locations in the scratch location list. + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location scratch = scratches_.Get(i); + // Only scratch overlapping with performed move source can be unblocked. + if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { + FreeScratchLocation(pending_source); + } + } + } +} + +void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { + // This function is used to reduce the dependencies in the graph after + // (from -> to) has been performed. Since we ensure there is no move with the same + // destination, (to -> X) can not be blocked while (from -> X) might still be + // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After + // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is + // a dependency between the two. If we update the source location from 1 to 2, we + // will get (0 -> 1) and (2 -> 3). There is no dependency between the two. + // + // This is not something we must do, but we can use fewer scratch locations with + // this trick. For example, we can avoid using additional scratch locations for + // moves (0 -> 1), (1 -> 2), (1 -> 0). + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (move->GetSource().Equals(from)) { + move->SetSource(to); + } + } +} + +void ParallelMoveResolverNoSwap::AddPendingMove(Location source, + Location destination, Primitive::Type type) { + pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); +} + +void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { + pending_moves_.Delete(move); +} + +MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + MoveOperands* move = pending_moves_.Get(i); + Location destination = move->GetDestination(); + // Only moves with destination overlapping with input loc can be unblocked. + if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { + return move; + } + } + return nullptr; +} + +bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + if (pending_moves_.Get(i)->Blocks(loc)) { + return true; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->Blocks(loc)) { + return true; + } + } + return false; +} + +// So far it is only used for debugging purposes to make sure all pending moves +// have been performed. +size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { + return pending_moves_.Size(); +} + } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 95f8ad5b74..e89417df7d 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -19,30 +19,47 @@ #include "base/value_object.h" #include "utils/growable_array.h" +#include "locations.h" namespace art { class HParallelMove; -class Location; class MoveOperands; -/** - * Helper class to resolve a set of parallel moves. Architecture dependent code - * generator must have their own subclass that implements the `EmitMove` and `EmitSwap` - * operations. - */ +// Helper classes to resolve a set of parallel moves. Architecture dependent code generator must +// have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move); + virtual void EmitNativeCode(HParallelMove* parallel_move) = 0; + + protected: + // Build the initial list of moves. + void BuildInitialMoveList(HParallelMove* parallel_move); + + GrowableArray<MoveOperands*> moves_; + + private: + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); +}; + +// This helper class uses swap to resolve dependencies and may emit swap. +class ParallelMoveResolverWithSwap : public ParallelMoveResolver { + public: + explicit ParallelMoveResolverWithSwap(ArenaAllocator* allocator) + : ParallelMoveResolver(allocator) {} + virtual ~ParallelMoveResolverWithSwap() {} + + // Resolve a set of parallel moves, emitting assembler instructions. + void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; protected: class ScratchRegisterScope : public ValueObject { public: - ScratchRegisterScope(ParallelMoveResolver* resolver, + ScratchRegisterScope(ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers); @@ -52,11 +69,12 @@ class ParallelMoveResolver : public ValueObject { bool IsSpilled() const { return spilled_; } private: - ParallelMoveResolver* resolver_; + ParallelMoveResolverWithSwap* resolver_; int reg_; bool spilled_; }; + // Return true if the location can be scratched. bool IsScratchLocation(Location loc); // Allocate a scratch register for performing a move. The method will try to use @@ -72,15 +90,9 @@ class ParallelMoveResolver : public ValueObject { virtual void SpillScratch(int reg) = 0; virtual void RestoreScratch(int reg) = 0; - // List of moves not yet resolved. - GrowableArray<MoveOperands*> moves_; - static constexpr int kNoRegister = -1; private: - // Build the initial list of moves. - void BuildInitialMoveList(HParallelMove* parallel_move); - // Perform the move at the moves_ index in question (possibly requiring // other moves to satisfy dependencies). // @@ -99,7 +111,83 @@ class ParallelMoveResolver : public ValueObject { // the right value. MoveOperands* PerformMove(size_t index); - DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverWithSwap); +}; + +// This helper class uses additional scratch registers to resolve dependencies. It supports all kind +// of dependency cycles and does not care about the register layout. +class ParallelMoveResolverNoSwap : public ParallelMoveResolver { + public: + explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) + : ParallelMoveResolver(allocator), scratches_(allocator, 32), + pending_moves_(allocator, 8), allocator_(allocator) {} + virtual ~ParallelMoveResolverNoSwap() {} + + // Resolve a set of parallel moves, emitting assembler instructions. + void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + + protected: + // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent + // initialization here. + virtual void PrepareForEmitNativeCode() = 0; + + // Called at the end of EmitNativeCode(). A subclass may put some architecture dependent cleanup + // here. All scratch locations will be removed after this call. + virtual void FinishEmitNativeCode() = 0; + + // Allocate a scratch location to perform a move from input kind of location. A subclass should + // implement this to get the best fit location. If there is no suitable physical register, it can + // also return a stack slot. + virtual Location AllocateScratchLocationFor(Location::Kind kind) = 0; + + // Called after a move which takes a scratch location as source. A subclass can defer the cleanup + // to FinishEmitNativeCode(). + virtual void FreeScratchLocation(Location loc) = 0; + + // Emit a move. + virtual void EmitMove(size_t index) = 0; + + // Return a scratch location from the moves which exactly matches the kind. + // Return Location::NoLocation() if no matching scratch location can be found. + Location GetScratchLocation(Location::Kind kind); + + // Add a location to the scratch list which can be returned from GetScratchLocation() to resolve + // dependency cycles. + void AddScratchLocation(Location loc); + + // Remove a location from the scratch list. + void RemoveScratchLocation(Location loc); + + // List of scratch locations. + GrowableArray<Location> scratches_; + + private: + // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy + // dependencies). + void PerformMove(size_t index); + + void UpdateMoveSource(Location from, Location to); + + void AddPendingMove(Location source, Location destination, Primitive::Type type); + + void DeletePendingMove(MoveOperands* move); + + // Find a move that may be unblocked after (loc -> XXX) is performed. + MoveOperands* GetUnblockedPendingMove(Location loc); + + // Return true if the location is blocked by outstanding moves. + bool IsBlockedByMoves(Location loc); + + // Return the number of pending moves. + size_t GetNumberOfPendingMoves(); + + // Additional pending moves which might be added to resolve dependency cycle. + GrowableArray<MoveOperands*> pending_moves_; + + // Used to allocate pending MoveOperands. + ArenaAllocator* const allocator_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverNoSwap); }; } // namespace art diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 95cca5172b..f8f70105cf 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -19,27 +19,41 @@ #include "parallel_move_resolver.h" #include "gtest/gtest.h" +#include "gtest/gtest-typed-test.h" namespace art { -class TestParallelMoveResolver : public ParallelMoveResolver { - public: - explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {} - - void Dump(Location location) { - if (location.IsConstant()) { - message_ << "C"; - } else if (location.IsPair()) { - message_ << location.low() << "," << location.high(); - } else if (location.IsRegister()) { - message_ << location.reg(); - } else if (location.IsStackSlot()) { - message_ << location.GetStackIndex() << "(sp)"; - } else { - message_ << "2x" << location.GetStackIndex() << "(sp)"; - DCHECK(location.IsDoubleStackSlot()) << location; - } +constexpr int kScratchRegisterStartIndexForTest = 100; + +static void DumpRegisterForTest(std::ostream& os, int reg) { + if (reg >= kScratchRegisterStartIndexForTest) { + os << "T" << reg - kScratchRegisterStartIndexForTest; + } else { + os << reg; } +} + +static void DumpLocationForTest(std::ostream& os, Location location) { + if (location.IsConstant()) { + os << "C"; + } else if (location.IsPair()) { + DumpRegisterForTest(os, location.low()); + os << ","; + DumpRegisterForTest(os, location.high()); + } else if (location.IsRegister()) { + DumpRegisterForTest(os, location.reg()); + } else if (location.IsStackSlot()) { + os << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsDoubleStackSlot())<< location; + os << "2x" << location.GetStackIndex() << "(sp)"; + } +} + +class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { + public: + explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator) + : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { MoveOperands* move = moves_.Get(index); @@ -47,9 +61,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << " "; } message_ << "("; - Dump(move->GetSource()); + DumpLocationForTest(message_, move->GetSource()); message_ << " -> "; - Dump(move->GetDestination()); + DumpLocationForTest(message_, move->GetDestination()); message_ << ")"; } @@ -59,9 +73,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << " "; } message_ << "("; - Dump(move->GetSource()); + DumpLocationForTest(message_, move->GetSource()); message_ << " <-> "; - Dump(move->GetDestination()); + DumpLocationForTest(message_, move->GetDestination()); message_ << ")"; } @@ -76,7 +90,64 @@ class TestParallelMoveResolver : public ParallelMoveResolver { std::ostringstream message_; - DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver); + DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverWithSwap); +}; + +class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { + public: + explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator) + : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {} + + void PrepareForEmitNativeCode() OVERRIDE { + scratch_index_ = kScratchRegisterStartIndexForTest; + } + + void FinishEmitNativeCode() OVERRIDE {} + + Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE { + if (kind == Location::kStackSlot || kind == Location::kFpuRegister || + kind == Location::kRegister) { + kind = Location::kRegister; + } else { + // Allocate register pair for double stack slot which simulates 32-bit backend's behavior. + kind = Location::kRegisterPair; + } + Location scratch = GetScratchLocation(kind); + if (scratch.Equals(Location::NoLocation())) { + AddScratchLocation(Location::RegisterLocation(scratch_index_)); + AddScratchLocation(Location::RegisterLocation(scratch_index_ + 1)); + AddScratchLocation(Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1)); + scratch = (kind == Location::kRegister) ? Location::RegisterLocation(scratch_index_) + : Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1); + scratch_index_ += 2; + } + return scratch; + } + + void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} + + void EmitMove(size_t index) OVERRIDE { + MoveOperands* move = moves_.Get(index); + if (!message_.str().empty()) { + message_ << " "; + } + message_ << "("; + DumpLocationForTest(message_, move->GetSource()); + message_ << " -> "; + DumpLocationForTest(message_, move->GetDestination()); + message_ << ")"; + } + + std::string GetMessage() const { + return message_.str(); + } + + private: + std::ostringstream message_; + + int scratch_index_; + + DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverNoSwap); }; static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, @@ -93,55 +164,102 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, return moves; } -TEST(ParallelMoveTest, Dependency) { +template <typename T> +class ParallelMoveTest : public ::testing::Test { + public: + static const bool has_swap; +}; + +template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true; +template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; + +typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> + ParallelMoveResolverTestTypes; + +TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); + + +TYPED_TEST(ParallelMoveTest, Dependency) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> 3) (1 -> 2) (0 -> 1) (2 -> 4)", resolver.GetMessage().c_str()); + } } } -TEST(ParallelMoveTest, Swap) { +TYPED_TEST(ParallelMoveTest, Cycle) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> 2) (0 -> 1) (2 -> 0)", resolver.GetMessage().c_str()); + } + } + + { + TypeParam resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {0, 2}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0 -> 2) (1 -> 0) (2 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(4 -> T0) (3 -> 4) (2 -> 3) (1 -> 2) (0 -> 1) (T0 -> 0)", + resolver.GetMessage().c_str()); + } } } -TEST(ParallelMoveTest, ConstantLast) { +TYPED_TEST(ParallelMoveTest, ConstantLast) { ArenaPool pool; ArenaAllocator allocator(&pool); - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::ConstantLocation(new (&allocator) HIntConstant(0)), @@ -157,12 +275,12 @@ TEST(ParallelMoveTest, ConstantLast) { ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str()); } -TEST(ParallelMoveTest, Pairs) { +TYPED_TEST(ParallelMoveTest, Pairs) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -179,7 +297,7 @@ TEST(ParallelMoveTest, Pairs) { } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -196,7 +314,7 @@ TEST(ParallelMoveTest, Pairs) { } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -209,10 +327,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> T0) (0,1 -> 2,3) (T0 -> 0)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -230,10 +352,15 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -251,10 +378,15 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -272,10 +404,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(7 -> T0) (2 -> 7) (0,1 -> 2,3) (T0 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -288,10 +424,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2,3 -> T0,T1) (0,1 -> 2,3) (T0,T1 -> 0,1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(2, 3), @@ -304,12 +444,85 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (2,3 -> 0,1) (T0,T1 -> 2,3)", resolver.GetMessage().c_str()); + } + } +} + +TYPED_TEST(ParallelMoveTest, MultiCycles) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TypeParam resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {2, 3}, {3, 2}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 <-> 0) (3 <-> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0) (3 -> T0) (2 -> 3) (T0 -> 2)", + resolver.GetMessage().c_str()); + } + } + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(0), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterLocation(3), + Location::RegisterLocation(1), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> T0) (3 -> T1) (0,1 -> 2,3) (T0 -> 0) (T1 -> 1)", + resolver.GetMessage().c_str()); + } + } + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(0), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterLocation(3), + Location::RegisterLocation(1), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(3 -> T0) (0,1 -> T2,T3) (T0 -> 1) (2 -> 0) (T2,T3 -> 2,3)", + resolver.GetMessage().c_str()); + } } { // Test involving registers used in single context and pair context. - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(10), @@ -327,17 +540,22 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2x32(sp) -> T0,T1) (4,5 -> 2x32(sp)) (10 -> 5) (T0,T1 -> 10,11)", + resolver.GetMessage().c_str()); + } } } // Test that we do 64bits moves before 32bits moves. -TEST(ParallelMoveTest, CyclesWith64BitsMoves) { +TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(0), @@ -355,11 +573,16 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(48(sp) -> T0) (1 -> 48(sp)) (0 -> 1) (T0 -> 0)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -377,7 +600,12 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2x32(sp) -> T0,T1) (2,3 -> 2x32(sp)) (0,1 -> 2,3) (T0,T1 -> 0,1)", + resolver.GetMessage().c_str()); + } } } |