diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2015-03-05 11:22:00 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-03-05 11:22:01 +0000 |
commit | af8db2ea18135588b267fe9a0b2f7af734b906cc (patch) | |
tree | b8bdb820be33317f23ef1d3e43d13b2b6bfb3ba5 /compiler | |
parent | 65b50272a15c52d753f68df2468fe1792f2516ea (diff) | |
parent | 5f8741860d465410bfed495dbb5f794590d338da (diff) | |
download | android_art-af8db2ea18135588b267fe9a0b2f7af734b906cc.tar.gz android_art-af8db2ea18135588b267fe9a0b2f7af734b906cc.tar.bz2 android_art-af8db2ea18135588b267fe9a0b2f7af734b906cc.zip |
Merge "[optimizing] Use callee-save registers for x86"
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 65 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 74 |
3 files changed, 82 insertions, 58 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ba5f7d8fab..ed3f949afe 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -216,6 +216,29 @@ int32_t CodeGenerator::GetStackSlot(HLocal* local) const { } } +void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { + // The DCHECKS below check that a register is not specified twice in + // the summary. The out location can overlap with an input, so we need + // to special case it. + if (location.IsRegister()) { + DCHECK(is_out || !blocked_core_registers_[location.reg()]); + blocked_core_registers_[location.reg()] = true; + } else if (location.IsFpuRegister()) { + DCHECK(is_out || !blocked_fpu_registers_[location.reg()]); + blocked_fpu_registers_[location.reg()] = true; + } else if (location.IsFpuRegisterPair()) { + DCHECK(is_out || !blocked_fpu_registers_[location.AsFpuRegisterPairLow<int>()]); + blocked_fpu_registers_[location.AsFpuRegisterPairLow<int>()] = true; + DCHECK(is_out || !blocked_fpu_registers_[location.AsFpuRegisterPairHigh<int>()]); + blocked_fpu_registers_[location.AsFpuRegisterPairHigh<int>()] = true; + } else if (location.IsRegisterPair()) { + DCHECK(is_out || !blocked_core_registers_[location.AsRegisterPairLow<int>()]); + blocked_core_registers_[location.AsRegisterPairLow<int>()] = true; + DCHECK(is_out || !blocked_core_registers_[location.AsRegisterPairHigh<int>()]); + blocked_core_registers_[location.AsRegisterPairHigh<int>()] = true; + } +} + void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { LocationSummary* locations = instruction->GetLocations(); if (locations == nullptr) return; @@ -234,46 +257,19 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { // Mark all fixed input, temp and output registers as used. for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - Location loc = locations->InAt(i); - // The DCHECKS below check that a register is not specified twice in - // the summary. - if (loc.IsRegister()) { - DCHECK(!blocked_core_registers_[loc.reg()]); - blocked_core_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegister()) { - DCHECK(!blocked_fpu_registers_[loc.reg()]); - blocked_fpu_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegisterPair()) { - DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]); - blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true; - DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]); - blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true; - } else if (loc.IsRegisterPair()) { - DCHECK(!blocked_core_registers_[loc.AsRegisterPairLow<int>()]); - blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true; - DCHECK(!blocked_core_registers_[loc.AsRegisterPairHigh<int>()]); - blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true; - } + BlockIfInRegister(locations->InAt(i)); } for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { Location loc = locations->GetTemp(i); - // The DCHECKS below check that a register is not specified twice in - // the summary. - if (loc.IsRegister()) { - DCHECK(!blocked_core_registers_[loc.reg()]); - blocked_core_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegister()) { - DCHECK(!blocked_fpu_registers_[loc.reg()]); - blocked_fpu_registers_[loc.reg()] = true; - } else { - DCHECK(loc.GetPolicy() == Location::kRequiresRegister - || loc.GetPolicy() == Location::kRequiresFpuRegister); - } + BlockIfInRegister(loc); + } + Location result_location = locations->Out(); + if (locations->OutputCanOverlapWithInputs()) { + BlockIfInRegister(result_location, /* is_out */ true); } - static constexpr bool kBaseline = true; - SetupBlockedRegisters(kBaseline); + SetupBlockedRegisters(/* is_baseline */ true); // Allocate all unallocated input locations. for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { @@ -318,7 +314,6 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { locations->SetTempAt(i, loc); } } - Location result_location = locations->Out(); if (result_location.IsUnallocated()) { switch (result_location.GetPolicy()) { case Location::kAny: diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f46a36d02f..5146afad8d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -378,6 +378,7 @@ class CodeGenerator { void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); + void BlockIfInRegister(Location location, bool is_out = false) const; HGraph* const graph_; const CompilerOptions& compiler_options_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 2e897f1ef5..4b8adddfe6 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -37,14 +37,13 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); +static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); static constexpr int kC2ConditionMask = 0x400; -// Marker for places that can be updated once we don't follow the quick ABI. -static constexpr bool kFollowsQuickABI = true; static constexpr int kFakeReturnRegister = Register(8); class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { @@ -371,8 +370,15 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, - kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options), + : CodeGenerator(graph, + kNumberOfCpuRegisters, + kNumberOfXmmRegisters, + kNumberOfRegisterPairs, + ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), + arraysize(kCoreCalleeSaves)) + | (1 << kFakeReturnRegister), + 0, + compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -427,18 +433,18 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { +void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[ECX_EDX] = true; // Stack register is always reserved. blocked_core_registers_[ESP] = true; - // TODO: We currently don't use Quick's callee saved registers. - DCHECK(kFollowsQuickABI); - blocked_core_registers_[EBP] = true; - blocked_core_registers_[ESI] = true; - blocked_core_registers_[EDI] = true; + if (is_baseline) { + blocked_core_registers_[EBP] = true; + blocked_core_registers_[ESI] = true; + blocked_core_registers_[EDI] = true; + } UpdateBlockedPairRegisters(); } @@ -470,15 +476,33 @@ void CodeGeneratorX86::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - if (!HasEmptyFrame()) { - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); - __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + if (HasEmptyFrame()) { + return; } + + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushl(reg); + } + } + + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } void CodeGeneratorX86::GenerateFrameExit() { - if (!HasEmptyFrame()) { - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + if (HasEmptyFrame()) { + return; + } + + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ popl(reg); + } } } @@ -907,7 +931,8 @@ void LocationsBuilderX86::VisitCondition(HCondition* comp) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (comp->NeedsMaterialization()) { - locations->SetOut(Location::RequiresRegister()); + // We need a byte register. + locations->SetOut(Location::RegisterLocation(ECX)); } } @@ -1345,8 +1370,10 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0))); + // Make the output overlap to please the register allocator. This greatly simplifies + // the validation of the linear scan implementation + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; default: @@ -3161,15 +3188,16 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { + // This location builder might end up asking to up to four registers, which is + // not currently possible for baseline. The situation in which we need four + // registers cannot be met by baseline though, because it has not run any + // optimization. + Primitive::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - DCHECK(kFollowsQuickABI); - bool not_enough_registers = needs_write_barrier - && !instruction->GetValue()->IsConstant() - && !instruction->GetIndex()->IsConstant(); - bool needs_runtime_call = instruction->NeedsTypeCheck() || not_enough_registers; + bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, |