diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2015-01-14 10:49:16 +0000 |
---|---|---|
committer | Nicolas Geoffray <ngeoffray@google.com> | 2015-01-21 11:27:57 +0000 |
commit | 41aedbb684ccef76ff8373f39aba606ce4cb3194 (patch) | |
tree | 94929237a0fe9b24dda7409d9433f07e82af4461 | |
parent | 97c89e4c081dcf4bacbde70b6609e366c9da417e (diff) | |
download | android_art-41aedbb684ccef76ff8373f39aba606ce4cb3194.tar.gz android_art-41aedbb684ccef76ff8373f39aba606ce4cb3194.tar.bz2 android_art-41aedbb684ccef76ff8373f39aba606ce4cb3194.zip |
Fully support pairs in the register allocator.
Enabled on ARM for longs and doubles.
Change-Id: Id8792d08bd7ca9fb049c5db8a40ae694bafc2d8b
-rw-r--r-- | compiler/optimizing/code_generator.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 131 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/locations.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 11 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 25 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 142 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 6 |
10 files changed, 247 insertions, 87 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index cdfd989bb8..fe7bdc0cdf 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -632,6 +632,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 07c84bcc01..9a32ce76fa 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -37,6 +37,11 @@ static DRegister FromLowSToD(SRegister reg) { return static_cast<DRegister>(reg / 2); } +static bool ExpectedPairLayout(Location location) { + // We expected this for both core and fpu register pairs. + return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); +} + static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; @@ -625,12 +630,11 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { uint32_t index = double_index_; double_index_ += 2; - DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) + 1, - calling_convention.GetFpuRegisterAt(index + 1)); - DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) & 1, 0); - return Location::FpuRegisterPairLocation( + Location result = Location::FpuRegisterPairLocation( calling_convention.GetFpuRegisterAt(index), calling_convention.GetFpuRegisterAt(index + 1)); + DCHECK(ExpectedPairLayout(result)); + return result; } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -721,16 +725,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); } else { - // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); - if (destination.AsRegisterPairLow<Register>() == R1) { - DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2); - __ LoadFromOffset(kLoadWord, R1, SP, source.GetStackIndex()); - __ LoadFromOffset(kLoadWord, R2, SP, source.GetHighStackIndex(kArmWordSize)); - } else { - __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), - SP, source.GetStackIndex()); - } + DCHECK(ExpectedPairLayout(destination)); + __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), + SP, source.GetStackIndex()); } } else if (destination.IsFpuRegisterPair()) { if (source.IsDoubleStackSlot()) { @@ -937,6 +935,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); Register left = locations->InAt(0).AsRegister<Register>(); if (locations->InAt(1).IsRegister()) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); @@ -1282,7 +1281,9 @@ void LocationsBuilderARM::VisitNeg(HNeg* neg) { switch (neg->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), output_overlaps); break; @@ -1809,12 +1810,17 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } @@ -1849,7 +1855,8 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { } break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ adds(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -1857,6 +1864,7 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { first.AsRegisterPairHigh<Register>(), ShifterOperand(second.AsRegisterPairHigh<Register>())); break; + } case Primitive::kPrimFloat: __ vadds(out.AsFpuRegister<SRegister>(), @@ -1879,12 +1887,17 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } case Primitive::kPrimFloat: @@ -1919,6 +1932,7 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ subs(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -2054,8 +2068,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } case Primitive::kPrimFloat: @@ -2092,7 +2105,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc()); break; @@ -2275,8 +2288,8 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + // The runtime helper puts the output in R0,R1. + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } default: @@ -2330,7 +2343,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); int32_t entry_point_offset; if (op->IsShl()) { @@ -3312,16 +3325,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); } } else if (source.IsDoubleStackSlot()) { - if (destination.IsFpuRegisterPair()) { - __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), - SP, source.GetStackIndex()); - } else { - DCHECK(destination.IsDoubleStackSlot()) << destination; - __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); - } + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); } else { DCHECK(source.IsConstant()) << source; HInstruction* constant = source.GetConstant(); @@ -3334,8 +3342,47 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ LoadImmediate(IP, value); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + if (destination.IsRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value)); + } else { + __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else if (constant->IsDoubleConstant()) { + double value = constant->AsDoubleConstant()->GetValue(); + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (destination.IsFpuRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(Low32Bits(int_value))); + } else { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(High32Bits(int_value))); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } } else { - DCHECK(constant->IsFloatConstant()); + DCHECK(constant->IsFloatConstant()) << constant->DebugName(); float value = constant->AsFloatConstant()->GetValue(); if (destination.IsFpuRegister()) { __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value); @@ -3626,7 +3673,9 @@ void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), output_overlaps); } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 306845beb8..7dfb69f297 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1863,7 +1863,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), true); // The output does overlap inputs. + // The output does overlap inputs. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 9e0a5b89e9..df21c8e9c3 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -142,6 +142,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { codegen_.DumpFloatingPointRegister(output_, location.low()); output_ << " and "; codegen_.DumpFloatingPointRegister(output_, location.high()); + } else if (location.IsRegisterPair()) { + codegen_.DumpCoreRegister(output_, location.low()); + output_ << " and "; + codegen_.DumpCoreRegister(output_, location.high()); } else { DCHECK(location.IsDoubleStackSlot()); output_ << "2x" << location.GetStackIndex() << "(sp)"; diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 9f2f9ece85..990d662d86 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -27,7 +27,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->EnvironmentSize()), - output_overlaps_(true), + output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 68d605957f..dda6c94a3d 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -37,7 +37,10 @@ std::ostream& operator<<(std::ostream& os, const Location& location); */ class Location : public ValueObject { public: - static constexpr bool kNoOutputOverlap = false; + enum OutputOverlap { + kOutputOverlap, + kNoOutputOverlap + }; enum Kind { kInvalid = 0, @@ -468,7 +471,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return inputs_.Size(); } - void SetOut(Location location, bool overlaps = true) { + void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { DCHECK(output_.IsUnallocated() || output_.IsInvalid()); output_overlaps_ = overlaps; output_ = location; @@ -561,7 +564,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } bool OutputOverlapsWithInputs() const { - return output_overlaps_; + return output_overlaps_ == Location::kOutputOverlap; } bool Intrinsified() const { @@ -574,7 +577,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. - bool output_overlaps_; + Location::OutputOverlap output_overlaps_; Location output_; const CallKind call_kind_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ade31380ec..800af409a2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -646,7 +646,7 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { if (GetResultType() == Primitive::kPrimLong) { return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); } else { - DCHECK(GetResultType() == Primitive::kPrimInt); + DCHECK_EQ(GetResultType(), Primitive::kPrimInt); return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fa51f27f0a..dc8a7ee7cd 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2802,18 +2802,25 @@ class HParallelMove : public HTemplateInstruction<0> { AddMove(source.ToLow(), destination.ToLow(), instruction); AddMove(source.ToHigh(), destination.ToHigh(), nullptr); } else if (source.IsPair()) { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction); AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr); } else if (destination.IsPair()) { - DCHECK(source.IsDoubleStackSlot()); - AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); - // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to - // always be 4. - static constexpr int kHighOffset = 4; - AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), - destination.ToHigh(), - nullptr); + if (source.IsConstant()) { + // We put the same constant in the move. The code generator will handle which + // low or high part to use. + AddMove(source, destination.ToLow(), instruction); + AddMove(source, destination.ToHigh(), nullptr); + } else { + DCHECK(source.IsDoubleStackSlot()); + AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); + // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to + // always be 4. + static constexpr int kHighOffset = 4; + AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), + destination.ToHigh(), + nullptr); + } } else { if (kIsDebugBuild) { if (instruction != nullptr) { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1b42e94960..6cd20249c1 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -71,7 +71,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, if (!Supports(instruction_set)) { return false; } - if (instruction_set == kArm64 || instruction_set == kX86_64) { + if (instruction_set == kArm64 + || instruction_set == kX86_64 + || instruction_set == kArm + || instruction_set == kThumb2) { return true; } for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { @@ -85,10 +88,6 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, current->GetType() == Primitive::kPrimDouble) { return false; } - } else if (instruction_set == kArm || instruction_set == kThumb2) { - if (current->GetType() == Primitive::kPrimLong) { - return false; - } } } } @@ -680,7 +679,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } - int reg = -1; + int reg = kNoRegister; if (current->HasRegister()) { // Some instructions have a fixed register output. reg = current->GetRegister(); @@ -696,13 +695,13 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { DCHECK(!IsBlocked(hint)); reg = hint; } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(free_until); + reg = FindAvailableRegisterPair(free_until, current->GetStart()); } else { reg = FindAvailableRegister(free_until); } } - DCHECK_NE(reg, -1); + DCHECK_NE(reg, kNoRegister); // If we could not find a register, we need to spill. if (free_until[reg] == 0) { return false; @@ -730,8 +729,8 @@ bool RegisterAllocator::IsBlocked(int reg) const { : blocked_fp_registers_[reg]; } -int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const { - int reg = -1; +int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { + int reg = kNoRegister; // Pick the register pair that is used the last. for (size_t i = 0; i < number_of_registers_; ++i) { if (IsBlocked(i)) continue; @@ -739,24 +738,28 @@ int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const { int high_register = GetHighForLowRegister(i); if (IsBlocked(high_register)) continue; int existing_high_register = GetHighForLowRegister(reg); - if ((reg == -1) || (next_use[i] >= next_use[reg] + if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] && next_use[high_register] >= next_use[existing_high_register])) { reg = i; if (next_use[i] == kMaxLifetimePosition && next_use[high_register] == kMaxLifetimePosition) { break; } + } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { + // If one of the current register is known to be unavailable, just inconditionally + // try a new one. + reg = i; } } return reg; } int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { - int reg = -1; + int reg = kNoRegister; // Pick the register that is used the last. for (size_t i = 0; i < number_of_registers_; ++i) { if (IsBlocked(i)) continue; - if (reg == -1 || next_use[i] > next_use[reg]) { + if (reg == kNoRegister || next_use[i] > next_use[reg]) { reg = i; if (next_use[i] == kMaxLifetimePosition) break; } @@ -764,6 +767,28 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { return reg; } +bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { + for (size_t i = 0, e = active_.Size(); i < e; ++i) { + LiveInterval* active = active_.Get(i); + DCHECK(active->HasRegister()); + // Split the first interval found. + if (first_register_use <= next_use[active->GetRegister()] + && !active->IsLowInterval() + && !active->IsHighInterval()) { + LiveInterval* split = Split(active, position); + active_.DeleteAt(i); + if (split != active) { + handled_.Add(active); + } + AddSorted(unhandled_, split); + return true; + } + } + return false; +} + // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -824,27 +849,46 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } } - int reg = -1; + int reg = kNoRegister; + bool should_spill = false; if (current->HasRegister()) { DCHECK(current->IsHighInterval()); reg = current->GetRegister(); + // When allocating the low part, we made sure the high register was available. + DCHECK_LT(first_register_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use); + reg = FindAvailableRegisterPair(next_use, current->GetStart()); + // We should spill if both registers are not available. + should_spill = (first_register_use >= next_use[reg]) + || (first_register_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); reg = FindAvailableRegister(next_use); + should_spill = (first_register_use >= next_use[reg]); } - if ((first_register_use >= next_use[reg]) - || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) { + DCHECK_NE(reg, kNoRegister); + if (should_spill) { DCHECK(!current->IsHighInterval()); - // If the first use of that instruction is after the last use of the found - // register, we split this interval just before its first register use. - AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use - 1); - DCHECK_NE(current, split) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName(); - AddSorted(unhandled_, split); + bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + if (is_allocation_at_use_site + && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + // If we're allocating a register for `current` because the instruction at + // that position requires it, but we think we should spill, then there are + // non-pair intervals blocking the allocation. We split the first + // interval found, and put ourselves first in the `unhandled_` list. + AddSorted(unhandled_, current); + } else { + // If the first use of that instruction is after the last use of the found + // register, we split this interval just before its first register use. + AllocateSpillSlotFor(current); + LiveInterval* split = Split(current, first_register_use - 1); + DCHECK_NE(current, split) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName() << " " + << split->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1; + AddSorted(unhandled_, split); + } return false; } else { // Use this register and spill the active and inactives interval that @@ -861,6 +905,23 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { handled_.Add(active); } AddSorted(unhandled_, split); + + if (active->IsLowInterval() || active->IsHighInterval()) { + LiveInterval* other_half = active->IsLowInterval() + ? active->GetHighInterval() + : active->GetLowInterval(); + // We also need to remove the other half from the list of actives. + bool found = false; + for (size_t j = 0; j < active_.Size(); ++j) { + if (active_.Get(j) == other_half) { + found = true; + active_.DeleteAt(j); + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } break; } } @@ -893,6 +954,25 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { --e; handled_.Add(inactive); AddSorted(unhandled_, split); + + if (inactive->IsLowInterval() || inactive->IsHighInterval()) { + LiveInterval* other_half = inactive->IsLowInterval() + ? inactive->GetHighInterval() + : inactive->GetLowInterval(); + + // We also need to remove the other half from the list of inactives. + bool found = false; + for (size_t j = 0; j < inactive_.Size(); ++j) { + if (inactive_.Get(j) == other_half) { + found = true; + inactive_.DeleteAt(j); + --e; + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } } } } @@ -907,7 +987,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter size_t insert_at = 0; for (size_t i = array->Size(); i > 0; --i) { LiveInterval* current = array->Get(i - 1); - if (current->StartsAfter(interval)) { + // High intervals must be processed right after their low equivalent. + if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; break; } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { @@ -1026,6 +1107,7 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { static bool IsValidDestination(Location destination) { return destination.IsRegister() + || destination.IsRegisterPair() || destination.IsFpuRegister() || destination.IsFpuRegisterPair() || destination.IsStackSlot() @@ -1066,7 +1148,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); @@ -1130,7 +1212,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; DCHECK_EQ(block->GetSuccessors().Size(), 1u); @@ -1160,7 +1242,7 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* first = block->GetFirstInstruction(); @@ -1178,7 +1260,7 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; if (instruction->IsPhi()) { @@ -1283,6 +1365,8 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->AddLiveRegister(source); break; } + + case Location::kRegisterPair: case Location::kFpuRegisterPair: { locations->AddLiveRegister(source.ToLow()); locations->AddLiveRegister(source.ToHigh()); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index ec46a776b5..b8f70bdc18 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -128,9 +128,13 @@ class RegisterAllocator { bool ValidateInternal(bool log_fatal_on_failure) const; void DumpInterval(std::ostream& stream, LiveInterval* interval) const; void DumpAllIntervals(std::ostream& stream) const; - int FindAvailableRegisterPair(size_t* next_use) const; + int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; int FindAvailableRegister(size_t* next_use) const; + // Try splitting an active non-pair interval at the given `position`. + // Returns whether it was successful at finding such an interval. + bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); + ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; |