diff options
-rw-r--r-- | compiler/optimizing/code_generator.cc | 53 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 23 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 62 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 21 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 30 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 6 | ||||
-rw-r--r-- | runtime/check_reference_map_visitor.h | 2 | ||||
-rw-r--r-- | runtime/thread.cc | 11 |
15 files changed, 168 insertions, 60 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bc9649fe3b..0af70f9b90 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -236,7 +236,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } - SetupBlockedRegisters(); + static constexpr bool kBaseline = true; + SetupBlockedRegisters(kBaseline); // Allocate all unallocated input locations. for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { @@ -547,8 +548,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { size_t environment_size = instruction->EnvironmentSize(); - size_t register_mask = 0; size_t inlining_depth = 0; + uint32_t register_mask = locations->GetRegisterMask(); + if (locations->OnlyCallsOnSlowPath()) { + // In case of slow path, we currently set the location of caller-save registers + // to register (instead of their stack location when pushed before the slow-path + // call). Therefore register_mask contains both callee-save and caller-save + // registers that hold objects. We must remove the caller-save from the mask, since + // they will be overwritten by the callee. + register_mask &= core_callee_save_mask_; + } + // The register mask must be a subset of callee-save registers. + DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); stack_map_stream_.AddStackMapEntry( dex_pc, pc_info.native_pc, register_mask, locations->GetStackMask(), environment_size, inlining_depth); @@ -684,20 +695,24 @@ void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (register_set->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); + if (!IsCoreCalleeSaveRegister(i)) { + if (register_set->ContainsCoreRegister(i)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += SaveCoreRegister(stack_offset, i); } - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveFloatingPointRegister(stack_offset, i); + if (!IsFloatingPointCalleeSaveRegister(i)) { + if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += SaveFloatingPointRegister(stack_offset, i); + } } } } @@ -706,16 +721,20 @@ void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (register_set->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreCoreRegister(stack_offset, i); + if (!IsCoreCalleeSaveRegister(i)) { + if (register_set->ContainsCoreRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += RestoreCoreRegister(stack_offset, i); + } } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreFloatingPointRegister(stack_offset, i); + if (!IsFloatingPointCalleeSaveRegister(i)) { + if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); + stack_offset += RestoreFloatingPointRegister(stack_offset, i); + } } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f66aed912a..16080a47eb 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -127,7 +127,7 @@ class CodeGenerator { size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } - virtual void SetupBlockedRegisters() const = 0; + virtual void SetupBlockedRegisters(bool is_baseline) const = 0; virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; @@ -151,6 +151,14 @@ class CodeGenerator { } virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + bool IsCoreCalleeSaveRegister(int reg) const { + return (core_callee_save_mask_ & (1 << reg)) != 0; + } + + bool IsFloatingPointCalleeSaveRegister(int reg) const { + return (fpu_callee_save_mask_ & (1 << reg)) != 0; + } + void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -203,11 +211,17 @@ class CodeGenerator { return type == Primitive::kPrimNot && !value->IsIntConstant(); } + void AddAllocatedRegister(Location location) { + allocated_registers_.Add(location); + } + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, size_t number_of_fpu_registers, size_t number_of_register_pairs, + uint32_t core_callee_save_mask, + uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), @@ -218,6 +232,8 @@ class CodeGenerator { number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), + core_callee_save_mask_(core_callee_save_mask), + fpu_callee_save_mask_(fpu_callee_save_mask), graph_(graph), compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), @@ -243,6 +259,9 @@ class CodeGenerator { uint32_t core_spill_mask_; uint32_t first_register_slot_in_slow_path_; + // Registers that were allocated during linear scan. + RegisterSet allocated_registers_; + // Arrays used when doing register allocation to know which // registers we can allocate. `SetupBlockedRegisters` updates the // arrays. @@ -252,6 +271,8 @@ class CodeGenerator { size_t number_of_core_registers_; size_t number_of_fpu_registers_; size_t number_of_register_pairs_; + const uint32_t core_callee_save_mask_; + const uint32_t fpu_callee_save_mask_; private: void InitLocations(HInstruction* instruction); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index c6a6974792..bc8858bc8c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -390,7 +390,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, - kNumberOfRegisterPairs, compiler_options), + kNumberOfRegisterPairs, 0, 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -453,7 +453,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorARM::SetupBlockedRegisters() const { +void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[R1_R2] = true; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 0de6669aa7..f3b1ff5edc 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -197,7 +197,7 @@ class CodeGeneratorARM : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 760d2bed32..21c1e9c5fc 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -567,6 +567,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, + 0, + 0, compiler_options), block_labels_(nullptr), location_builder_(graph, this), @@ -729,7 +731,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorARM64::SetupBlockedRegisters() const { +void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { // Block reserved registers: // ip0 (VIXL temporary) // ip1 (VIXL temporary) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 27c6fbdbf4..d81e481c4e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -217,7 +217,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; // AllocateFreeRegister() is only used when allocating registers locally // during CompileBaseline(). Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 2d304122fa..9e26ddd80d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -375,7 +375,7 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, - kNumberOfRegisterPairs, compiler_options), + kNumberOfRegisterPairs, 0, 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -431,7 +431,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorX86::SetupBlockedRegisters() const { +void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[ECX_EDX] = true; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index a9086f8876..dcfeb2fb99 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -196,7 +196,7 @@ class CodeGeneratorX86 : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index da83b76bbb..285003de58 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -46,6 +46,7 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr int kC2ConditionMask = 0x400; @@ -416,17 +417,27 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin return kX86_64WordSize; } +static uint32_t ComputeCoreCalleeSaveMask() { + uint32_t mask = 0; + for (size_t i = 0, e = arraysize(kCoreCalleeSaves); i < e; ++i) { + mask |= (1 << kCoreCalleeSaves[i]); + } + return mask; +} + CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0, compiler_options), + : CodeGenerator(graph, + kNumberOfCpuRegisters, + kNumberOfFloatRegisters, + 0, + ComputeCoreCalleeSaveMask(), + 0, + compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this) {} -size_t CodeGeneratorX86_64::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kX86_64WordSize; -} - InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) : HGraphVisitor(graph), @@ -459,21 +470,26 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -void CodeGeneratorX86_64::SetupBlockedRegisters() const { +size_t CodeGeneratorX86_64::FrameEntrySpillSize() const { + uint32_t mask = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + return kNumberOfPushedRegistersAtEntry * kX86_64WordSize + + __builtin_popcount(mask) * kX86_64WordSize; +} + +void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { // Stack register is always reserved. blocked_core_registers_[RSP] = true; // Block the register used as TMP. blocked_core_registers_[TMP] = true; - // TODO: We currently don't use Quick's callee saved registers. - blocked_core_registers_[RBX] = true; - blocked_core_registers_[RBP] = true; - blocked_core_registers_[R12] = true; - blocked_core_registers_[R13] = true; - blocked_core_registers_[R14] = true; - blocked_core_registers_[R15] = true; + if (is_baseline) { + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + blocked_core_registers_[kCoreCalleeSaves[i]] = true; + } + } + // TODO: We currently don't use Quick's FP callee saved registers. blocked_fpu_registers_[XMM12] = true; blocked_fpu_registers_[XMM13] = true; blocked_fpu_registers_[XMM14] = true; @@ -484,6 +500,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { // Create a fake register to mimic Quick. static const int kFakeReturnRegister = 16; core_spill_mask_ |= (1 << kFakeReturnRegister); + core_spill_mask_ |= (allocated_registers_.GetCoreRegisters() & core_callee_save_mask_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -494,10 +511,14 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); RecordPcInfo(nullptr, 0); } + + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) { + __ pushq(CpuRegister(kCoreCalleeSaves[i])); + } + } - // The return PC has already been pushed on the stack. - __ subq(CpuRegister(RSP), - Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize())); if (!skip_overflow_check && !implicitStackOverflowChecks) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64(); @@ -512,8 +533,13 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } void CodeGeneratorX86_64::GenerateFrameExit() { - __ addq(CpuRegister(RSP), - Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize())); + + for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { + if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) { + __ popq(CpuRegister(kCoreCalleeSaves[i])); + } + } } void CodeGeneratorX86_64::Bind(HBasicBlock* block) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index ead771a1f2..645fb17b6c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -218,7 +218,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location GetStackLocation(HLoadLocal* load) const OVERRIDE; - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index dda6c94a3d..6bf8f776fd 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -431,6 +431,14 @@ class RegisterSet : public ValueObject { return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_); } + uint32_t GetCoreRegisters() const { + return core_registers_; + } + + uint32_t GetFloatingPointRegisters() const { + return floating_point_registers_; + } + private: uint32_t core_registers_; uint32_t floating_point_registers_; @@ -529,6 +537,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { register_mask_ |= (1 << reg_id); } + uint32_t GetRegisterMask() const { + return register_mask_; + } + bool RegisterContainsObject(uint32_t reg_id) { return RegisterSet::Contains(register_mask_, reg_id); } @@ -557,7 +569,14 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return false; } Location input = inputs_.Get(input_index); - if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) { + if (input.IsRegister() + || input.IsFpuRegister() + || input.IsPair() + || input.IsStackSlot() + || input.IsDoubleStackSlot()) { + // For fixed locations, the register allocator requires to have inputs die before + // the instruction, so that input moves use the location of the input just + // before that instruction (and not potential moves due to splitting). return false; } return true; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index e120bc681e..260076acbf 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -58,7 +58,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_(0), maximum_number_of_live_core_registers_(0), maximum_number_of_live_fp_registers_(0) { - codegen->SetupBlockedRegisters(); + static constexpr bool kIsBaseline = false; + codegen->SetupBlockedRegisters(kIsBaseline); physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); // Always reserve for the current method and the graph's max out registers. @@ -278,14 +279,18 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { if (locations->WillCall()) { // Block all registers. for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - BlockRegister(Location::RegisterLocation(i), - position, - position + 1); + if (!codegen_->IsCoreCalleeSaveRegister(i)) { + BlockRegister(Location::RegisterLocation(i), + position, + position + 1); + } } for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - BlockRegister(Location::FpuRegisterLocation(i), - position, - position + 1); + if (!codegen_->IsFloatingPointCalleeSaveRegister(i)) { + BlockRegister(Location::FpuRegisterLocation(i), + position, + position + 1); + } } } @@ -627,6 +632,9 @@ void RegisterAllocator::LinearScan() { // (6) If the interval had a register allocated, add it to the list of active // intervals. if (success) { + codegen_->AddAllocatedRegister(processing_core_registers_ + ? Location::RegisterLocation(current->GetRegister()) + : Location::FpuRegisterLocation(current->GetRegister())); active_.Add(current); if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); @@ -1357,9 +1365,11 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); - DCHECK_LE(locations->GetNumberOfLiveRegisters(), - maximum_number_of_live_core_registers_ + - maximum_number_of_live_fp_registers_); + if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + maximum_number_of_live_core_registers_ + + maximum_number_of_live_fp_registers_); + } if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index e24fa1b9e4..2fc251b07a 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -180,18 +180,18 @@ class Address : public Operand { void Init(CpuRegister base_in, int32_t disp) { if (disp == 0 && base_in.LowBits() != RBP) { SetModRM(0, base_in); - if (base_in.AsRegister() == RSP) { + if (base_in.LowBits() == RSP) { SetSIB(TIMES_1, CpuRegister(RSP), base_in); } } else if (disp >= -128 && disp <= 127) { SetModRM(1, base_in); - if (base_in.AsRegister() == RSP) { + if (base_in.LowBits() == RSP) { SetSIB(TIMES_1, CpuRegister(RSP), base_in); } SetDisp8(disp); } else { SetModRM(2, base_in); - if (base_in.AsRegister() == RSP) { + if (base_in.LowBits() == RSP) { SetSIB(TIMES_1, CpuRegister(RSP), base_in); } SetDisp32(disp); diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h index 4fe3852cbc..93062a7c4b 100644 --- a/runtime/check_reference_map_visitor.h +++ b/runtime/check_reference_map_visitor.h @@ -82,7 +82,7 @@ class CheckReferenceMapVisitor : public StackVisitor { CHECK(stack_mask.LoadBit(dex_register_map.GetValue(reg) >> 2)); break; case DexRegisterMap::kInRegister: - CHECK_NE(register_mask & dex_register_map.GetValue(reg), 0u); + CHECK_NE(register_mask & (1 << dex_register_map.GetValue(reg)), 0u); break; case DexRegisterMap::kInFpuRegister: // In Fpu register, should not be a reference. diff --git a/runtime/thread.cc b/runtime/thread.cc index 78a8bf81d7..5690d51b58 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2137,6 +2137,7 @@ class ReferenceMapVisitor : public StackVisitor { uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point); StackMap map = m->GetStackMap(native_pc_offset); MemoryRegion mask = map.GetStackMask(); + // Visit stack entries that hold pointers. for (size_t i = 0; i < mask.size_in_bits(); ++i) { if (mask.LoadBit(i)) { StackReference<mirror::Object>* ref_addr = @@ -2151,6 +2152,16 @@ class ReferenceMapVisitor : public StackVisitor { } } } + // Visit callee-save registers that hold pointers. + uint32_t register_mask = map.GetRegisterMask(); + for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) { + if (register_mask & (1 << i)) { + mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i)); + if (*ref_addr != nullptr) { + visitor_(ref_addr, -1, this); + } + } + } } else { const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*)); CHECK(native_gc_map != nullptr) << PrettyMethod(m); |