diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2015-01-13 09:20:58 -0500 |
---|---|---|
committer | Mark Mendell <mark.p.mendell@intel.com> | 2015-01-15 11:21:37 -0500 |
commit | f85a9ca9859ad843dc03d3a2b600afbaf2e9bbdd (patch) | |
tree | a802042fa7a3a8cb820916d558e630596daaa9b4 /compiler | |
parent | 8fccea249b1a6f1469eeea42c2b2cca06ce1c70d (diff) | |
download | art-f85a9ca9859ad843dc03d3a2b600afbaf2e9bbdd.tar.gz art-f85a9ca9859ad843dc03d3a2b600afbaf2e9bbdd.tar.bz2 art-f85a9ca9859ad843dc03d3a2b600afbaf2e9bbdd.zip |
[optimizing compiler] Compute live spill size
The current stack frame calculation assumes that each live register to
be saved/restored has the word size of the machine. This fails for X86,
where a double in an XMM register takes up 8 bytes. Change the
calculation to keep track of the number of core registers and number of
fp registers to handle this distinction.
This is slightly pessimal, as the registers may not be active at the
same time, but the only way to handle this would be to allocate both
classes of registers simultaneously, or remember all the active
intervals, matching them up and compute the size of each safepoint
interval.
Change-Id: If7860aa319b625c214775347728cdf49a56946eb
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 7 |
8 files changed, 46 insertions, 15 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0c1ff9bff5..9e8907078b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { + GetGraph()->GetTemporariesVRegSlots() + 1 /* filler */, 0, /* the baseline compiler does not have live registers at slow path */ + 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() + 1 /* current method */); GenerateFrameEntry(); @@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l } void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots) { first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize - + maximum_number_of_live_registers * GetWordSize() + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 8d28f3da25..88e50b6c88 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -109,9 +109,11 @@ class CodeGenerator { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; + virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c1b4eda3a4..a753ec3a3c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -175,6 +175,11 @@ class CodeGeneratorARM : public CodeGenerator { return kArmWordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in S registers, which are word sized. + return kArmWordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index e4da07be43..590bc1d778 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -191,6 +191,11 @@ class CodeGeneratorARM64 : public CodeGenerator { return kArm64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in D registers, which are word sized. + return kArm64WordSize; + } + uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { vixl::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index acde122917..2d8adb2cf1 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -166,6 +166,11 @@ class CodeGeneratorX86 : public CodeGenerator { return kX86WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // 8 bytes == 2 words for each spill. + return 2 * kX86WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 87f6b0f779..343fba3017 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -169,6 +169,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + return kX86_64WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1efc52b9ec..d2f4f9b7dc 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -56,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, blocked_core_registers_(codegen->GetBlockedCoreRegisters()), blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), reserved_out_slots_(0), - maximum_number_of_live_registers_(0) { + maximum_number_of_live_core_registers_(0), + maximum_number_of_live_fp_registers_(0) { codegen->SetupBlockedRegisters(); physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); @@ -185,9 +186,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } LinearScan(); - size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_; - maximum_number_of_live_registers_ = 0; - inactive_.Reset(); active_.Reset(); handled_.Reset(); @@ -207,7 +205,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } } LinearScan(); - maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers; } void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { @@ -602,8 +599,13 @@ void RegisterAllocator::LinearScan() { if (current->IsSlowPathSafepoint()) { // Synthesized interval to record the maximum number of live registers // at safepoints. No need to allocate a register for it. - maximum_number_of_live_registers_ = - std::max(maximum_number_of_live_registers_, active_.Size()); + if (processing_core_registers_) { + maximum_number_of_live_core_registers_ = + std::max(maximum_number_of_live_core_registers_, active_.Size()); + } else { + maximum_number_of_live_fp_registers_ = + std::max(maximum_number_of_live_fp_registers_, active_.Size()); + } DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); continue; } @@ -1255,8 +1257,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); - DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_); - + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + maximum_number_of_live_core_registers_ + + maximum_number_of_live_fp_registers_); if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } @@ -1349,7 +1352,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, void RegisterAllocator::Resolve() { codegen_->ComputeFrameSize( - spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); + spill_slots_.Size(), maximum_number_of_live_core_registers_, + maximum_number_of_live_fp_registers_, reserved_out_slots_); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index c152a8bf67..70841b8e1f 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -190,8 +190,11 @@ class RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; - // The maximum live registers at safepoints. - size_t maximum_number_of_live_registers_; + // The maximum live core registers at safepoints. + size_t maximum_number_of_live_core_registers_; + + // The maximum live FP registers at safepoints. + size_t maximum_number_of_live_fp_registers_; ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); |