diff options
Diffstat (limited to 'compiler/optimizing')
32 files changed, 1989 insertions, 668 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ed3f949afe..561dcb7315 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -547,7 +547,9 @@ void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { stack_map_stream_.FillIn(region); } -void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { +void CodeGenerator::RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { if (instruction != nullptr) { // The code generated for some type conversions may call the // runtime, thus normally requiring a subsequent call to this @@ -577,20 +579,17 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { pc_info.native_pc = GetAssembler()->CodeSize(); pc_infos_.Add(pc_info); - // Populate stack map information. + uint32_t inlining_depth = 0; if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); return; } - LocationSummary* locations = instruction->GetLocations(); HEnvironment* environment = instruction->GetEnvironment(); - size_t environment_size = instruction->EnvironmentSize(); - size_t inlining_depth = 0; uint32_t register_mask = locations->GetRegisterMask(); if (locations->OnlyCallsOnSlowPath()) { // In case of slow path, we currently set the location of caller-save registers @@ -602,15 +601,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.AddStackMapEntry( - dex_pc, pc_info.native_pc, register_mask, - locations->GetStackMask(), environment_size, inlining_depth); + stack_map_stream_.AddStackMapEntry(dex_pc, + pc_info.native_pc, + register_mask, + locations->GetStackMask(), + environment_size, + inlining_depth); // Walk over the environment, and record the location of dex registers. for (size_t i = 0; i < environment_size; ++i) { HInstruction* current = environment->GetInstructionAt(i); if (current == nullptr) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kNone, 0); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0); continue; } @@ -620,81 +622,132 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value)); - ++i; + stack_map_stream_.AddDexRegisterEntry( + i, DexRegisterLocation::Kind::kConstant, Low32Bits(value)); + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value)); DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value)); - ++i; + stack_map_stream_.AddDexRegisterEntry( + i, DexRegisterLocation::Kind::kConstant, Low32Bits(value)); + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value)); DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); } else { - DCHECK(current->IsFloatConstant()); + DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); + stack_map_stream_.AddDexRegisterEntry( + i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, - location.GetHighStackIndex(kVRegSize)); - ++i; + stack_map_stream_.AddDexRegisterEntry( + i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); DCHECK_LT(i, environment_size); break; } case Location::kRegister : { int id = location.reg(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); - if (current->GetType() == Primitive::kPrimLong) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); - ++i; - DCHECK_LT(i, environment_size); + if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { + uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + if (current->GetType() == Primitive::kPrimLong) { + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + DCHECK_LT(i, environment_size); + } + } else { + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, id); + if (current->GetType() == Primitive::kPrimLong) { + stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInRegister, id); + DCHECK_LT(i, environment_size); + } } break; } case Location::kFpuRegister : { int id = location.reg(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); - if (current->GetType() == Primitive::kPrimDouble) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); - ++i; - DCHECK_LT(i, environment_size); + if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { + uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + if (current->GetType() == Primitive::kPrimDouble) { + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + DCHECK_LT(i, environment_size); + } + } else { + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, id); + if (current->GetType() == Primitive::kPrimDouble) { + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInFpuRegister, id); + DCHECK_LT(i, environment_size); + } } break; } case Location::kFpuRegisterPair : { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high()); - ++i; + int low = location.low(); + int high = location.high(); + if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { + uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + } else { + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, low); + } + if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { + uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); + stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset); + } else { + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInFpuRegister, high); + } DCHECK_LT(i, environment_size); break; } case Location::kRegisterPair : { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high()); - ++i; + int low = location.low(); + int high = location.high(); + if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { + uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + } else { + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, low); + } + if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { + uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); + stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset); + } else { + stack_map_stream_.AddDexRegisterEntry( + ++i, DexRegisterLocation::Kind::kInRegister, high); + } DCHECK_LT(i, environment_size); break; } + case Location::kInvalid: { + stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } @@ -735,78 +788,86 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { } } -void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { +void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const { + LocationSummary* locations = suspend_check->GetLocations(); + HBasicBlock* block = suspend_check->GetBlock(); + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check); + DCHECK(block->IsLoopHeader()); + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + LiveInterval* interval = current->GetLiveInterval(); + // We only need to clear bits of loop phis containing objects and allocated in register. + // Loop phis allocated on stack already have the object in the stack. + if (current->GetType() == Primitive::kPrimNot + && interval->HasRegister() + && interval->HasSpillSlot()) { + locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize); + } + } +} + +void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) { + HParallelMove parallel_move(GetGraph()->GetArena()); + parallel_move.AddMove(from1, to1, nullptr); + parallel_move.AddMove(from2, to2, nullptr); + GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +void SlowPathCode::RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc) { + codegen->RecordPcInfo(instruction, dex_pc, this); +} + +void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - size_t stack_offset = first_register_slot_in_slow_path_; - for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (!IsCoreCalleeSaveRegister(i)) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (!codegen->IsCoreCalleeSaveRegister(i)) { if (register_set->ContainsCoreRegister(i)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); } - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveCoreRegister(stack_offset, i); + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveCoreRegister(stack_offset, i); } } } - for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!IsFloatingPointCalleeSaveRegister(i)) { + for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += SaveFloatingPointRegister(stack_offset, i); + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); } } } } -void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { +void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - size_t stack_offset = first_register_slot_in_slow_path_; - for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - if (!IsCoreCalleeSaveRegister(i)) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (!codegen->IsCoreCalleeSaveRegister(i)) { if (register_set->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreCoreRegister(stack_offset, i); + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } } } - for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!IsFloatingPointCalleeSaveRegister(i)) { + for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { if (register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); - stack_offset += RestoreFloatingPointRegister(stack_offset, i); + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } } } -void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const { - LocationSummary* locations = suspend_check->GetLocations(); - HBasicBlock* block = suspend_check->GetBlock(); - DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check); - DCHECK(block->IsLoopHeader()); - - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - LiveInterval* interval = current->GetLiveInterval(); - // We only need to clear bits of loop phis containing objects and allocated in register. - // Loop phis allocated on stack already have the object in the stack. - if (current->GetType() == Primitive::kPrimNot - && interval->HasRegister() - && interval->HasSpillSlot()) { - locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize); - } - } -} - -void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) { - HParallelMove parallel_move(GetGraph()->GetArena()); - parallel_move.AddMove(from1, to1, nullptr); - parallel_move.AddMove(from2, to2, nullptr); - GetMoveResolver()->EmitNativeCode(¶llel_move); -} - } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5146afad8d..667f686059 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -29,8 +29,6 @@ namespace art { -static size_t constexpr kVRegSize = 4; - // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); // Binary encoding of 2^31 for type double. @@ -68,12 +66,42 @@ struct PcInfo { class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: - SlowPathCode() {} + SlowPathCode() { + for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { + saved_core_stack_offsets_[i] = kRegisterNotSaved; + saved_fpu_stack_offsets_[i] = kRegisterNotSaved; + } + } + virtual ~SlowPathCode() {} virtual void EmitNativeCode(CodeGenerator* codegen) = 0; + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); + void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc); + + bool IsCoreRegisterSaved(int reg) const { + return saved_core_stack_offsets_[reg] != kRegisterNotSaved; + } + + bool IsFpuRegisterSaved(int reg) const { + return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; + } + + uint32_t GetStackOffsetOfCoreRegister(int reg) const { + return saved_core_stack_offsets_[reg]; + } + + uint32_t GetStackOffsetOfFpuRegister(int reg) const { + return saved_fpu_stack_offsets_[reg]; + } + private: + static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; + static constexpr uint32_t kRegisterNotSaved = -1; + uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; + uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; @@ -153,17 +181,13 @@ class CodeGenerator { virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; - virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } - virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } + + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + // Returns whether we should split long moves in parallel moves. + virtual bool ShouldSplitLongMoves() const { return false; } bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; @@ -173,7 +197,7 @@ class CodeGenerator { return (fpu_callee_save_mask_ & (1 << reg)) != 0; } - void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); + void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -186,8 +210,6 @@ class CodeGenerator { void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector<uint8_t>* vector); - void SaveLiveRegisters(LocationSummary* locations); - void RestoreLiveRegisters(LocationSummary* locations); bool IsLeafMethod() const { return is_leaf_; @@ -271,6 +293,15 @@ class CodeGenerator { } } + size_t GetFirstRegisterSlotInSlowPath() const { + return first_register_slot_in_slow_path_; + } + + uint32_t FrameEntrySpillSize() const { + return GetFpuSpillSize() + GetCoreSpillSize(); + } + + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, @@ -330,10 +361,6 @@ class CodeGenerator { return POPCOUNT(core_spill_mask_) * GetWordSize(); } - uint32_t FrameEntrySpillSize() const { - return GetFpuSpillSize() + GetCoreSpillSize(); - } - bool HasAllocatedCalleeSaveRegisters() const { // We check the core registers against 1 because it always comprises the return PC. return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 0403af1eaf..689f5357f4 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -82,7 +82,7 @@ class NullCheckSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); } private: @@ -98,7 +98,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); } private: @@ -114,10 +114,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(instruction_->GetLocations()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc()); - codegen->RestoreLiveRegisters(instruction_->GetLocations()); + QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); } else { @@ -162,7 +162,7 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); } private: @@ -188,7 +188,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); @@ -196,7 +196,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); - arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_); + arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); // Move the class to the desired location. Location out = locations->Out(); @@ -204,7 +204,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -235,16 +235,16 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -272,7 +272,7 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. @@ -284,14 +284,15 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { Location::RegisterLocation(calling_convention.GetRegisterAt(1))); if (instruction_->IsInstanceOf()) { - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_); + arm_codegen->InvokeRuntime( + QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -857,10 +858,11 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, - uint32_t dex_pc) { + uint32_t dex_pc, + SlowPathCode* slow_path) { __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); __ blx(LR); - RecordPcInfo(instruction, dex_pc); + RecordPcInfo(instruction, dex_pc, slow_path); DCHECK(instruction->IsSuspendCheck() || instruction->IsBoundsCheck() || instruction->IsNullCheck() @@ -901,10 +903,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ bkpt(0); - } } void LocationsBuilderARM::VisitIf(HIf* if_instr) { @@ -1205,6 +1203,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); codegen_->GenerateStaticOrDirectCall(invoke, temp); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -1673,14 +1672,16 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `float-to-long' instruction. codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l), conversion, - conversion->GetDexPc()); + conversion->GetDexPc(), + nullptr); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l), conversion, - conversion->GetDexPc()); + conversion->GetDexPc(), + nullptr); break; default: @@ -2134,7 +2135,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); break; } @@ -2228,17 +2229,17 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { } case Primitive::kPrimLong: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); break; } case Primitive::kPrimFloat: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); break; } case Primitive::kPrimDouble: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); break; } @@ -2455,7 +2456,8 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -2474,7 +2476,8 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -3204,7 +3207,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { DCHECK_EQ(value_type, Primitive::kPrimNot); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); } break; } @@ -3691,7 +3695,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( - QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc()); + QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -3784,7 +3788,8 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -3888,7 +3893,6 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ bl(GetFrameEntryLabel()); } - RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!IsLeafMethod()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index f1a3729c13..57e1d2f2f5 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -246,7 +246,8 @@ class CodeGeneratorARM : public CodeGenerator { void LoadCurrentMethod(Register reg); // Generate code to invoke a runtime entry point. - void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); + void InvokeRuntime( + int32_t offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path); // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c21084a6fe..aeec5dd1c4 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -147,7 +147,7 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), length_location_, LocationFrom(calling_convention.GetRegisterAt(1))); arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -167,7 +167,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -191,14 +191,14 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex()); arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); - arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_); + arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>(); } else { @@ -213,7 +213,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } @@ -244,18 +244,18 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } @@ -273,7 +273,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } @@ -292,11 +292,11 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(instruction_->GetLocations()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc()); + QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - codegen->RestoreLiveRegisters(instruction_->GetLocations()); + RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { @@ -338,7 +338,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. @@ -348,7 +348,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { object_class_, LocationFrom(calling_convention.GetRegisterAt(1))); if (instruction_->IsInstanceOf()) { - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_); + arm64_codegen->InvokeRuntime( + QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); @@ -356,11 +357,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } @@ -984,11 +985,12 @@ void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, - uint32_t dex_pc) { + uint32_t dex_pc, + SlowPathCode* slow_path) { __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); if (instruction != nullptr) { - RecordPcInfo(instruction, dex_pc); + RecordPcInfo(instruction, dex_pc, slow_path); DCHECK(instruction->IsSuspendCheck() || instruction->IsBoundsCheck() || instruction->IsNullCheck() @@ -1298,7 +1300,8 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); if (value_type == Primitive::kPrimNot) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime( + QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { LocationSummary* locations = instruction->GetLocations(); @@ -1593,10 +1596,6 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM64::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - down_cast<Arm64Assembler*>(GetAssembler())->Comment("Unreachable"); - __ Brk(__LINE__); // TODO: Introduce special markers for such code locations. - } } void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { @@ -1920,7 +1919,6 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Bl(&frame_entry_label_); } - RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!IsLeafMethod()); } @@ -1931,6 +1929,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); codegen_->GenerateStaticOrDirectCall(invoke, temp); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -2066,7 +2065,8 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } @@ -2172,7 +2172,8 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, mirror::ArtMethod*>(); } @@ -2198,7 +2199,8 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, - instruction->GetDexPc()); + instruction->GetDexPc(), + nullptr); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); } @@ -2352,7 +2354,7 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { case Primitive::kPrimDouble: { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); - codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc()); + codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); break; } @@ -2527,7 +2529,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( - QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc()); + QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 48961d68e9..cbb2e5c749 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -300,7 +300,10 @@ class CodeGeneratorARM64 : public CodeGenerator { void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. - void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); + void InvokeRuntime(int32_t offset, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path); ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 07d88deffa..754dd1088d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -158,16 +158,16 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { public: - explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) + SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(instruction_->GetLocations()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); - codegen->RestoreLiveRegisters(instruction_->GetLocations()); + RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -198,15 +198,15 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -231,7 +231,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { LocationSummary* locations = at_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex())); @@ -239,7 +239,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { __ fs()->call(Address::Absolute(do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType))); - codegen->RecordPcInfo(at_, dex_pc_); + RecordPcInfo(codegen, at_, dex_pc_); // Move the class to the desired location. Location out = locations->Out(); @@ -248,7 +248,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { x86_codegen->Move32(out, Location::RegisterLocation(EAX)); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -287,7 +287,7 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. @@ -306,11 +306,11 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast))); } - codegen->RecordPcInfo(instruction_, dex_pc_); + RecordPcInfo(codegen, instruction_, dex_pc_); if (instruction_->IsInstanceOf()) { x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -673,8 +673,19 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { source.AsRegisterPairHigh<Register>()); } else if (source.IsFpuRegister()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value; + if (constant->IsLongConstant()) { + value = constant->AsLongConstant()->GetValue(); + } else { + DCHECK(constant->IsDoubleConstant()); + value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } + __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value))); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value))); } else { - DCHECK(source.IsDoubleStackSlot()); + DCHECK(source.IsDoubleStackSlot()) << source; EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), @@ -801,10 +812,6 @@ void LocationsBuilderX86::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ int3(); - } } void LocationsBuilderX86::VisitIf(HIf* if_instr) { @@ -1555,8 +1562,6 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `int-to-byte' instruction. if (in.IsRegister()) { __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); - } else if (in.IsStackSlot()) { - __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); @@ -1760,6 +1765,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ addsd(result, temp); // result = double-to-float(result) __ cvtsd2ss(result, result); + // Restore low. + __ addl(low, Immediate(0x80000000)); break; } @@ -1807,6 +1814,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ addsd(result, constant); // result = result + temp __ addsd(result, temp); + // Restore low. + __ addl(low, Immediate(0x80000000)); break; } @@ -1892,10 +1901,15 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { if (second.IsRegisterPair()) { __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); - } else { + } else if (second.IsDoubleStackSlot()) { __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ adcl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); + __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); } break; } @@ -1965,10 +1979,15 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { if (second.IsRegisterPair()) { __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); - } else { + } else if (second.IsDoubleStackSlot()) { __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ sbbl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); + __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); } break; } @@ -1999,12 +2018,6 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { break; case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // TODO: Currently this handles only stack operands: - // - we don't have enough registers because we currently use Quick ABI. - // - by the time we have a working register allocator we will probably change the ABI - // and fix the above. - // - we don't have a way yet to request operands on stack but the base line compiler - // will leave the operands on the stack with Any(). locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); // Needed for imul on 32bits with 64bits output. @@ -2046,39 +2059,83 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } case Primitive::kPrimLong: { - DCHECK(second.IsDoubleStackSlot()); - Register in1_hi = first.AsRegisterPairHigh<Register>(); Register in1_lo = first.AsRegisterPairLow<Register>(); - Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); - Address in2_lo(ESP, second.GetStackIndex()); Register eax = locations->GetTemp(0).AsRegister<Register>(); Register edx = locations->GetTemp(1).AsRegister<Register>(); DCHECK_EQ(EAX, eax); DCHECK_EQ(EDX, edx); - // input: in1 - 64 bits, in2 - 64 bits + // input: in1 - 64 bits, in2 - 64 bits. // output: in1 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] // parts: in1.lo = (in1.lo * in2.lo)[31:0] - - __ movl(eax, in2_hi); - // eax <- in1.lo * in2.hi - __ imull(eax, in1_lo); - // in1.hi <- in1.hi * in2.lo - __ imull(in1_hi, in2_lo); - // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo - __ addl(in1_hi, eax); - // move in1_lo to eax to prepare for double precision - __ movl(eax, in1_lo); - // edx:eax <- in1.lo * in2.lo - __ mull(in2_lo); - // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] - __ addl(in1_hi, edx); - // in1.lo <- (in1.lo * in2.lo)[31:0]; - __ movl(in1_lo, eax); + if (second.IsConstant()) { + DCHECK(second.GetConstant()->IsLongConstant()); + + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + + __ movl(eax, high); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, low); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in2_lo to eax to prepare for double precision + __ movl(eax, low); + // edx:eax <- in1.lo * in2.lo + __ mull(in1_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else if (second.IsRegisterPair()) { + Register in2_hi = second.AsRegisterPairHigh<Register>(); + Register in2_lo = second.AsRegisterPairLow<Register>(); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else { + DCHECK(second.IsDoubleStackSlot()) << second; + Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); + Address in2_lo(ESP, second.GetStackIndex()); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } break; } @@ -2237,7 +2294,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr } void LocationsBuilderX86::VisitDiv(HDiv* div) { - LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong + LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong) ? LocationSummary::kCall : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); @@ -2306,8 +2363,10 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); + LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) + ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { case Primitive::kPrimInt: { @@ -2646,7 +2705,6 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // TODO: we set any here but we don't handle constants locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -2674,18 +2732,24 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimLong: { if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairHigh<Register>(), Address(ESP, right.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairHigh<Register>(), + Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex())); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairLow<Register>(), + Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } break; } @@ -2770,7 +2834,12 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + // The output overlaps in case of long: we don't want the low move to overwrite + // the object's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { // Long values can be loaded atomically into an XMM using movsd. @@ -2827,6 +2896,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, __ psrlq(temp, Immediate(32)); __ movd(out.AsRegisterPairHigh<Register>(), temp); } else { + DCHECK_NE(base, out.AsRegisterPairLow<Register>()); __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); @@ -3064,7 +3134,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move to overwrite + // the array's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -3138,6 +3212,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location out = locations->Out(); + DCHECK_NE(obj, out.AsRegisterPairLow<Register>()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); @@ -3569,8 +3644,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } - } else { - DCHECK(constant->IsFloatConstant()); + } else if (constant->IsFloatConstant()) { float value = constant->AsFloatConstant()->GetValue(); Immediate imm(bit_cast<float, int32_t>(value)); if (destination.IsFpuRegister()) { @@ -3583,6 +3657,43 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), imm); } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsDoubleStackSlot()) { + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } else { + __ movl(destination.AsRegisterPairLow<Register>(), low); + __ movl(destination.AsRegisterPairHigh<Register>(), high); + } + } else { + DCHECK(constant->IsDoubleConstant()); + double dbl_value = constant->AsDoubleConstant()->GetValue(); + int64_t value = bit_cast<double, int64_t>(dbl_value); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsFpuRegister()) { + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // Easy handling of 0.0. + __ xorpd(dest, dest); + } else { + __ pushl(high); + __ pushl(low); + __ movsd(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(8)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } } } else { LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source; @@ -3650,6 +3761,33 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (destination.IsFpuRegister() && source.IsStackSlot()) { Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = source.AsFpuRegister<XmmRegister>(); + Address stack(ESP, destination.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(stack, reg); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = destination.AsFpuRegister<XmmRegister>(); + Address stack(ESP, source.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(stack, reg); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { + Exchange(destination.GetStackIndex(), source.GetStackIndex()); + Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } @@ -3951,7 +4089,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); } - } else { + } else if (second.IsDoubleStackSlot()) { if (instruction->IsAnd()) { __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ andl(first.AsRegisterPairHigh<Register>(), @@ -3966,6 +4104,22 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); } + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + Immediate low(Low32Bits(value)); + Immediate high(High32Bits(value)); + if (instruction->IsAnd()) { + __ andl(first.AsRegisterPairLow<Register>(), low); + __ andl(first.AsRegisterPairHigh<Register>(), high); + } else if (instruction->IsOr()) { + __ orl(first.AsRegisterPairLow<Register>(), low); + __ orl(first.AsRegisterPairHigh<Register>(), high); + } else { + DCHECK(instruction->IsXor()); + __ xorl(first.AsRegisterPairLow<Register>(), low); + __ xorl(first.AsRegisterPairHigh<Register>(), high); + } } } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f5a9b7d1f7..c5763de05e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -245,6 +245,8 @@ class CodeGeneratorX86 : public CodeGenerator { return type == Primitive::kPrimLong; } + bool ShouldSplitLongMoves() const OVERRIDE { return true; } + Label* GetFrameEntryLabel() { return &frame_entry_label_; } private: diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 07ba95dcfb..dbd7c9e8ad 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -72,7 +72,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { __ Bind(GetEntryLabel()); __ gs()->call( Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true)); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -88,7 +88,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 { __ Bind(GetEntryLabel()); __ gs()->call( Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowDivZero), true)); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -136,10 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(instruction_->GetLocations()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true)); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); - codegen->RestoreLiveRegisters(instruction_->GetLocations()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -181,7 +181,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 { Location::RegisterLocation(calling_convention.GetRegisterAt(1))); __ gs()->call(Address::Absolute( QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true)); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -207,7 +207,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); @@ -215,7 +215,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { __ gs()->call(Address::Absolute((do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true)); - codegen->RecordPcInfo(at_, dex_pc_); + RecordPcInfo(codegen, at_, dex_pc_); Location out = locations->Out(); // Move the class to the desired location. @@ -224,7 +224,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { x64_codegen->Move(out, Location::RegisterLocation(RAX)); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -255,7 +255,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); @@ -263,9 +263,9 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { Immediate(instruction_->GetStringIndex())); __ gs()->call(Address::Absolute( QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true)); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -293,7 +293,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(locations); + SaveLiveRegisters(codegen, locations); // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. @@ -312,13 +312,13 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { __ gs()->call( Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pCheckCast), true)); } - codegen->RecordPcInfo(instruction_, dex_pc_); + RecordPcInfo(codegen, instruction_, dex_pc_); if (instruction_->IsInstanceOf()) { x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); } - codegen->RestoreLiveRegisters(locations); + RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -374,7 +374,6 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo } DCHECK(!IsLeafMethod()); - RecordPcInfo(invoke, invoke->GetDexPc()); } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -750,10 +749,6 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ int3(); - } } void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { @@ -1216,6 +1211,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi codegen_->GenerateStaticOrDirectCall( invoke, invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index fca9933872..ec0cc3e98b 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -18,7 +18,28 @@ namespace art { +// This visitor tries to simplify operations that yield a constant. For example +// `input * 0` is replaced by a null constant. +class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { + public: + explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {} + + private: + void VisitShift(HBinaryOperation* shift); + + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitRem(HRem* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitSub(HSub* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; +}; + void HConstantFolding::Run() { + InstructionWithAbsorbingInputSimplifier simplifier(graph_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second @@ -38,6 +59,8 @@ void HConstantFolding::Run() { inst->AsBinaryOperation()->TryStaticEvaluation(); if (constant != nullptr) { inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + } else { + inst->Accept(&simplifier); } } else if (inst->IsUnaryOperation()) { // Constant folding: replace `op(a)' with a constant at compile @@ -47,9 +70,166 @@ void HConstantFolding::Run() { if (constant != nullptr) { inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); } + } else if (inst->IsDivZeroCheck()) { + // We can safely remove the check if the input is a non-null constant. + HDivZeroCheck* check = inst->AsDivZeroCheck(); + HInstruction* check_input = check->InputAt(0); + if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) { + check->ReplaceWith(check_input); + check->GetBlock()->RemoveInstruction(check); + } } } } } +void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); + HInstruction* left = instruction->GetLeft(); + if (left->IsConstant() && left->AsConstant()->IsZero()) { + // Replace code looking like + // SHL dst, 0, shift_amount + // with + // CONSTANT 0 + instruction->ReplaceWith(left); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // AND dst, src, 0 + // with + // CONSTANT 0 + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + Primitive::Type type = instruction->GetType(); + if (Primitive::IsIntOrLongType(type) && + (input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // MUL dst, src, 0 + // with + // CONSTANT 0 + // Integral multiplication by zero always yields zero, but floating-point + // multiplication by zero does not always do. For example `Infinity * 0.0` + // should yield a NaN. + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitOr(HOr* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + + if (input_cst == nullptr) { + return; + } + + if (Int64FromConstant(input_cst) == -1) { + // Replace code looking like + // OR dst, src, 0xFFF...FF + // with + // CONSTANT 0xFFF...FF + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { + Primitive::Type type = instruction->GetType(); + + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + + if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsZero()) { + // Replace code looking like + // REM dst, 0, src + // with + // CONSTANT 0 + instruction->ReplaceWith(instruction->GetLeft()); + block->RemoveInstruction(instruction); + } + + HConstant* cst_right = instruction->GetRight()->AsConstant(); + if (((cst_right != nullptr) && + (cst_right->IsOne() || cst_right->IsMinusOne())) || + (instruction->GetLeft() == instruction->GetRight())) { + // Replace code looking like + // REM dst, src, 1 + // or + // REM dst, src, -1 + // or + // REM dst, src, src + // with + // CONSTANT 0 + ArenaAllocator* allocator = GetGraph()->GetArena(); + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitShl(HShl* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitShr(HShr* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { + Primitive::Type type = instruction->GetType(); + + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + // We assume that GVN has run before, so we only perform a pointer + // comparison. If for some reason the values are equal but the pointers are + // different, we are still correct and only miss an optimisation + // opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // SUB dst, src, src + // with + // CONSTANT 0 + // Note that we cannot optimise `x - x` to `0` for floating-point. It does + // not work when `x` is an infinity. + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitUShr(HUShr* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitXor(HXor* instruction) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // XOR dst, src, src + // with + // CONSTANT 0 + Primitive::Type type = instruction->GetType(); + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + } // namespace art diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index a7f1f74e27..76b9f4fe7e 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -362,6 +362,12 @@ void SSAChecker::VisitPhi(HPhi* phi) { Primitive::PrettyDescriptor(phi->GetType()))); } } + if (phi->GetType() != HPhi::ToPhiType(phi->GetType())) { + AddError(StringPrintf("Phi %d in block %d does not have an expected phi type: %s", + phi->GetId(), + phi->GetBlock()->GetBlockId(), + Primitive::PrettyDescriptor(phi->GetType()))); + } } void SSAChecker::VisitIf(HIf* instruction) { diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index ea65dc0780..74848d5d96 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -16,30 +16,12 @@ #include "gvn.h" #include "side_effects_analysis.h" +#include "utils.h" -namespace art { - -/** - * A node in the collision list of a ValueSet. Encodes the instruction, - * the hash code, and the next node in the collision list. - */ -class ValueSetNode : public ArenaObject<kArenaAllocMisc> { - public: - ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next) - : instruction_(instruction), hash_code_(hash_code), next_(next) {} +#include "utils/arena_bit_vector.h" +#include "base/bit_vector-inl.h" - size_t GetHashCode() const { return hash_code_; } - HInstruction* GetInstruction() const { return instruction_; } - ValueSetNode* GetNext() const { return next_; } - void SetNext(ValueSetNode* node) { next_ = node; } - - private: - HInstruction* const instruction_; - const size_t hash_code_; - ValueSetNode* next_; - - DISALLOW_COPY_AND_ASSIGN(ValueSetNode); -}; +namespace art { /** * A ValueSet holds instructions that can replace other instructions. It is updated @@ -52,39 +34,68 @@ class ValueSetNode : public ArenaObject<kArenaAllocMisc> { */ class ValueSet : public ArenaObject<kArenaAllocMisc> { public: + // Constructs an empty ValueSet which owns all its buckets. explicit ValueSet(ArenaAllocator* allocator) - : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; + : allocator_(allocator), + num_buckets_(kMinimumNumberOfBuckets), + buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_owned_(allocator, num_buckets_, false), + num_entries_(0) { + // ArenaAllocator returns zeroed memory, so no need to set buckets to null. + DCHECK(IsPowerOfTwo(num_buckets_)); + buckets_owned_.SetInitialBits(num_buckets_); + } + + // Copy constructor. Depending on the load factor, it will either make a deep + // copy (all buckets owned) or a shallow one (buckets pointing to the parent). + ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy) + : allocator_(allocator), + num_buckets_(to_copy.IdealBucketCount()), + buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_owned_(allocator, num_buckets_, false), + num_entries_(to_copy.num_entries_) { + // ArenaAllocator returns zeroed memory, so entries of buckets_ and + // buckets_owned_ are initialized to nullptr and false, respectively. + DCHECK(IsPowerOfTwo(num_buckets_)); + if (num_buckets_ == to_copy.num_buckets_) { + // Hash table remains the same size. We copy the bucket pointers and leave + // all buckets_owned_ bits false. + memcpy(buckets_, to_copy.buckets_, num_buckets_ * sizeof(Node*)); + } else { + // Hash table size changes. We copy and rehash all entries, and set all + // buckets_owned_ bits to true. + for (size_t i = 0; i < to_copy.num_buckets_; ++i) { + for (Node* node = to_copy.buckets_[i]; node != nullptr; node = node->GetNext()) { + size_t new_index = BucketIndex(node->GetHashCode()); + buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); + } + } + buckets_owned_.SetInitialBits(num_buckets_); } } // Adds an instruction in the set. void Add(HInstruction* instruction) { DCHECK(Lookup(instruction) == nullptr); - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - if (table_[index] == nullptr) { - table_[index] = instruction; - } else { - collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_); + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); + + if (!buckets_owned_.IsBitSet(index)) { + CloneBucket(index); } - ++number_of_entries_; + buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); + ++num_entries_; } - // If in the set, returns an equivalent instruction to the given instruction. Returns - // null otherwise. + // If in the set, returns an equivalent instruction to the given instruction. + // Returns null otherwise. HInstruction* Lookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing->Equals(instruction)) { - return existing; - } + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); + HInstruction* existing = node->GetInstruction(); if (existing->Equals(instruction)) { return existing; } @@ -93,126 +104,193 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { return nullptr; } - // Returns whether `instruction` is in the set. - HInstruction* IdentityLookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing == instruction) { - return existing; - } + // Returns whether instruction is in the set. + bool Contains(HInstruction* instruction) const { + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); - if (existing == instruction) { - return existing; - } + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { + if (node->GetInstruction() == instruction) { + return true; } } - return nullptr; + return false; } - // Removes all instructions in the set that are affected by the given side effects. + // Removes all instructions in the set affected by the given side effects. void Kill(SideEffects side_effects) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - HInstruction* instruction = table_[i]; - if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) { - table_[i] = nullptr; - --number_of_entries_; - } - } + DeleteAllImpureWhich([side_effects](Node* node) { + return node->GetInstruction()->GetSideEffects().DependsOn(side_effects); + }); + } - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - HInstruction* instruction = current->GetInstruction(); - if (instruction->GetSideEffects().DependsOn(side_effects)) { - if (previous == nullptr) { - collisions_ = current->GetNext(); - } else { - previous->SetNext(current->GetNext()); - } - --number_of_entries_; - } else { - previous = current; - } + // Updates this set by intersecting with instructions in a predecessor's set. + void IntersectWith(ValueSet* predecessor) { + if (IsEmpty()) { + return; + } else if (predecessor->IsEmpty()) { + Clear(); + } else { + // Pure instructions do not need to be tested because only impure + // instructions can be killed. + DeleteAllImpureWhich([predecessor](Node* node) { + return !predecessor->Contains(node->GetInstruction()); + }); } } - // Returns a copy of this set. - ValueSet* Copy() const { - ValueSet* copy = new (allocator_) ValueSet(allocator_); + bool IsEmpty() const { return num_entries_ == 0; } + size_t GetNumberOfEntries() const { return num_entries_; } - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - copy->table_[i] = table_[i]; + private: + class Node : public ArenaObject<kArenaAllocMisc> { + public: + Node(HInstruction* instruction, size_t hash_code, Node* next) + : instruction_(instruction), hash_code_(hash_code), next_(next) {} + + size_t GetHashCode() const { return hash_code_; } + HInstruction* GetInstruction() const { return instruction_; } + Node* GetNext() const { return next_; } + void SetNext(Node* node) { next_ = node; } + + Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) { + return new (allocator) Node(instruction_, hash_code_, new_next); } - // Note that the order will be inverted in the copy. This is fine, as the order is not - // relevant for a ValueSet. - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - copy->collisions_ = new (allocator_) ValueSetNode( - node->GetInstruction(), node->GetHashCode(), copy->collisions_); + private: + HInstruction* const instruction_; + const size_t hash_code_; + Node* next_; + + DISALLOW_COPY_AND_ASSIGN(Node); + }; + + // Creates our own copy of a bucket that is currently pointing to a parent. + // This algorithm can be called while iterating over the bucket because it + // preserves the order of entries in the bucket and will return the clone of + // the given 'iterator'. + Node* CloneBucket(size_t index, Node* iterator = nullptr) { + DCHECK(!buckets_owned_.IsBitSet(index)); + Node* clone_current = nullptr; + Node* clone_previous = nullptr; + Node* clone_iterator = nullptr; + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { + clone_current = node->Dup(allocator_, nullptr); + if (node == iterator) { + clone_iterator = clone_current; + } + if (clone_previous == nullptr) { + buckets_[index] = clone_current; + } else { + clone_previous->SetNext(clone_current); + } + clone_previous = clone_current; } - - copy->number_of_entries_ = number_of_entries_; - return copy; + buckets_owned_.SetBit(index); + return clone_iterator; } void Clear() { - number_of_entries_ = 0; - collisions_ = nullptr; - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; + num_entries_ = 0; + for (size_t i = 0; i < num_buckets_; ++i) { + buckets_[i] = nullptr; } + buckets_owned_.SetInitialBits(num_buckets_); } - // Update this `ValueSet` by intersecting with instructions in `other`. - void IntersectionWith(ValueSet* other) { - if (IsEmpty()) { - return; - } else if (other->IsEmpty()) { - Clear(); - } else { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) { - --number_of_entries_; - table_[i] = nullptr; + // Iterates over buckets with impure instructions (even indices) and deletes + // the ones on which 'cond' returns true. + template<typename Functor> + void DeleteAllImpureWhich(Functor cond) { + for (size_t i = 0; i < num_buckets_; i += 2) { + Node* node = buckets_[i]; + Node* previous = nullptr; + + if (node == nullptr) { + continue; + } + + if (!buckets_owned_.IsBitSet(i)) { + // Bucket is not owned but maybe we won't need to change it at all. + // Iterate as long as the entries don't satisfy 'cond'. + while (node != nullptr) { + if (cond(node)) { + // We do need to delete an entry but we do not own the bucket. + // Clone the bucket, make sure 'previous' and 'node' point to + // the cloned entries and break. + previous = CloneBucket(i, previous); + node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); + break; + } + previous = node; + node = node->GetNext(); } } - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - if (other->IdentityLookup(current->GetInstruction()) == nullptr) { + + // By this point we either own the bucket and can start deleting entries, + // or we do not own it but no entries matched 'cond'. + DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); + + // We iterate over the remainder of entries and delete those that match + // the given condition. + while (node != nullptr) { + Node* next = node->GetNext(); + if (cond(node)) { if (previous == nullptr) { - collisions_ = current->GetNext(); + buckets_[i] = next; } else { - previous->SetNext(current->GetNext()); + previous->SetNext(next); } - --number_of_entries_; } else { - previous = current; + previous = node; } + node = next; } } } - bool IsEmpty() const { return number_of_entries_ == 0; } - size_t GetNumberOfEntries() const { return number_of_entries_; } + // Computes a bucket count such that the load factor is reasonable. + // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. + size_t IdealBucketCount() const { + size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); + if (bucket_count > kMinimumNumberOfBuckets) { + return bucket_count; + } else { + return kMinimumNumberOfBuckets; + } + } - private: - static constexpr size_t kDefaultNumberOfEntries = 8; + // Generates a hash code for an instruction. Pure instructions are put into + // odd buckets to speed up deletion. + size_t HashCode(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + if (instruction->GetSideEffects().HasDependencies()) { + return (hash_code << 1) | 0; + } else { + return (hash_code << 1) | 1; + } + } + + // Converts a hash code to a bucket index. + size_t BucketIndex(size_t hash_code) const { + return hash_code & (num_buckets_ - 1); + } ArenaAllocator* const allocator_; + // The internal bucket implementation of the set. + size_t const num_buckets_; + Node** const buckets_; + + // Flags specifying which buckets were copied into the set from its parent. + // If a flag is not set, the corresponding bucket points to entries in the + // parent and must be cloned prior to making changes. + ArenaBitVector buckets_owned_; + // The number of entries in the set. - size_t number_of_entries_; + size_t num_entries_; - // The internal implementation of the set. It uses a combination of a hash code based - // fixed-size list, and a linked list to handle hash code collisions. - // TODO: Tune the fixed size list original size, and support growing it. - ValueSetNode* collisions_; - HInstruction* table_[kDefaultNumberOfEntries]; + static constexpr size_t kMinimumNumberOfBuckets = 8; DISALLOW_COPY_AND_ASSIGN(ValueSet); }; @@ -270,11 +348,14 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - set = sets_.Get(dominator->GetBlockId()); - if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) { + ValueSet* dominator_set = sets_.Get(dominator->GetBlockId()); + if (dominator->GetSuccessors().Size() == 1) { + DCHECK_EQ(dominator->GetSuccessors().Get(0), block); + set = dominator_set; + } else { // We have to copy if the dominator has other successors, or `block` is not a successor // of the dominator. - set = set->Copy(); + set = new (allocator_) ValueSet(allocator_, *dominator_set); } if (!set->IsEmpty()) { if (block->IsLoopHeader()) { @@ -282,7 +363,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set->Kill(side_effects_.GetLoopEffects(block)); } else if (predecessors.Size() > 1) { for (size_t i = 0, e = predecessors.Size(); i < e; ++i) { - set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId())); + set->IntersectWith(sets_.Get(predecessors.Get(i)->GetBlockId())); if (set->IsEmpty()) { break; } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index b34957a17e..bd9267c4db 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -38,6 +38,11 @@ static constexpr int kMaxInlineCodeUnits = 100; static constexpr int kDepthLimit = 5; void HInliner::Run() { + if (graph_->IsDebuggable()) { + // For simplicity, we currently never inline when the graph is debuggable. This avoids + // doing some logic in the runtime to discover if a method could have been inlined. + return; + } const GrowableArray<HBasicBlock*>& blocks = graph_->GetReversePostOrder(); for (size_t i = 0; i < blocks.Size(); ++i) { HBasicBlock* block = blocks.Get(i); @@ -124,8 +129,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, resolved_method->GetAccessFlags(), nullptr); - HGraph* callee_graph = - new (graph_->GetArena()) HGraph(graph_->GetArena(), graph_->GetCurrentInstructionId()); + HGraph* callee_graph = new (graph_->GetArena()) HGraph( + graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); OptimizingCompilerStats inline_stats; HGraphBuilder builder(callee_graph, @@ -155,15 +160,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, } // Run simple optimizations on the graph. - SsaRedundantPhiElimination redundant_phi(callee_graph); - SsaDeadPhiElimination dead_phi(callee_graph); HDeadCodeElimination dce(callee_graph); HConstantFolding fold(callee_graph); InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { - &redundant_phi, - &dead_phi, &dce, &fold, &simplify, diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fd99070780..2ef19b92a1 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -27,6 +27,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { : HGraphVisitor(graph), stats_(stats) {} private: + void VisitShift(HBinaryOperation* shift); + void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; void VisitEqual(HEqual* equal) OVERRIDE; void VisitArraySet(HArraySet* equal) OVERRIDE; @@ -34,6 +36,16 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitNullCheck(HNullCheck* instruction) OVERRIDE; void VisitArrayLength(HArrayLength* instruction) OVERRIDE; void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + void VisitAdd(HAdd* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitDiv(HDiv* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitSub(HSub* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; @@ -43,6 +55,29 @@ void InstructionSimplifier::Run() { visitor.VisitInsertionOrder(); } +namespace { + +bool AreAllBitsSet(HConstant* constant) { + return Int64FromConstant(constant) == -1; +} + +} // namespace + +void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // SHL dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { HInstruction* obj = null_check->InputAt(0); if (!obj->CanBeNull()) { @@ -137,4 +172,234 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // ADD dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { + // Replace code looking like + // AND dst, src, 0xFFF...FF + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + // We assume that GVN has run before, so we only perform a pointer comparison. + // If for some reason the values are equal but the pointers are different, we + // are still correct and only miss an optimisation opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // AND dst, src, src + // with + // src + instruction->ReplaceWith(instruction->GetLeft()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + Primitive::Type type = instruction->GetType(); + + if ((input_cst != nullptr) && input_cst->IsOne()) { + // Replace code looking like + // DIV dst, src, 1 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if ((input_cst != nullptr) && input_cst->IsMinusOne() && + (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + // Replace code looking like + // DIV dst, src, -1 + // with + // NEG dst, src + instruction->GetBlock()->ReplaceAndRemoveInstructionWith( + instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other))); + } +} + +void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + Primitive::Type type = instruction->GetType(); + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + if (input_cst == nullptr) { + return; + } + + if (input_cst->IsOne()) { + // Replace code looking like + // MUL dst, src, 1 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if (input_cst->IsMinusOne() && + (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + // Replace code looking like + // MUL dst, src, -1 + // with + // NEG dst, src + HNeg* neg = new (allocator) HNeg(type, input_other); + block->ReplaceAndRemoveInstructionWith(instruction, neg); + return; + } + + if (Primitive::IsFloatingPointType(type) && + ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->GetValue() == 2.0f) || + (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->GetValue() == 2.0))) { + // Replace code looking like + // FP_MUL dst, src, 2.0 + // with + // FP_ADD dst, src, src + // The 'int' and 'long' cases are handled below. + block->ReplaceAndRemoveInstructionWith(instruction, + new (allocator) HAdd(type, input_other, input_other)); + return; + } + + if (Primitive::IsIntOrLongType(type)) { + int64_t factor = Int64FromConstant(input_cst); + // We expect the `0` case to have been handled in the constant folding pass. + DCHECK_NE(factor, 0); + if (IsPowerOfTwo(factor)) { + // Replace code looking like + // MUL dst, src, pow_of_2 + // with + // SHL dst, src, log2(pow_of_2) + HIntConstant* shift = new (allocator) HIntConstant(WhichPowerOf2(factor)); + block->InsertInstructionBefore(shift, instruction); + HShl* shl = new(allocator) HShl(type, input_other, shift); + block->ReplaceAndRemoveInstructionWith(instruction, shl); + } + } +} + +void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // OR dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + // We assume that GVN has run before, so we only perform a pointer comparison. + // If for some reason the values are equal but the pointers are different, we + // are still correct and only miss an optimisation opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // OR dst, src, src + // with + // src + instruction->ReplaceWith(instruction->GetLeft()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitShr(HShr* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // SUB dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + if (instruction->GetLeft()->IsConstant()) { + int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant()); + if (left == 0) { + // Replace code looking like + // SUB dst, 0, src + // with + // NEG dst, src + // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When + // `x` is `0.0`, the former expression yields `0.0`, while the later + // yields `-0.0`. + HNeg* neg = new (allocator) HNeg(type, instruction->GetRight()); + block->ReplaceAndRemoveInstructionWith(instruction, neg); + } + } +} + +void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // XOR dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { + // Replace code looking like + // XOR dst, src, 0xFFF...FF + // with + // NOT dst, src + HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not); + return; + } +} + } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index a82d80af13..0c9eb94172 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -114,12 +114,13 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(invoke_->GetLocations()); + SaveLiveRegisters(codegen, invoke_->GetLocations()); MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; UNREACHABLE(); @@ -133,7 +134,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { MoveFromReturnRegister(out, invoke_->GetType(), codegen); } - codegen->RestoreLiveRegisters(invoke_->GetLocations()); + RestoreLiveRegisters(codegen, invoke_->GetLocations()); __ b(GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 1ddff8a125..19b04ae094 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -122,12 +122,13 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(invoke_->GetLocations()); + SaveLiveRegisters(codegen, invoke_->GetLocations()); MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; UNREACHABLE(); @@ -141,7 +142,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { MoveFromReturnRegister(out, invoke_->GetType(), codegen); } - codegen->RestoreLiveRegisters(invoke_->GetLocations()); + RestoreLiveRegisters(codegen, invoke_->GetLocations()); __ B(GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index c73f092a61..2064b18138 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -134,12 +134,13 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in); __ Bind(GetEntryLabel()); - codegen->SaveLiveRegisters(invoke_->GetLocations()); + SaveLiveRegisters(codegen, invoke_->GetLocations()); MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; UNREACHABLE(); @@ -153,7 +154,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { MoveFromReturnRegister(out, invoke_->GetType(), codegen); } - codegen->RestoreLiveRegisters(invoke_->GetLocations()); + RestoreLiveRegisters(codegen, invoke_->GetLocations()); __ jmp(GetExitLabel()); } diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 907eff162f..0b0cfde0cf 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -388,44 +388,44 @@ TEST(LivenessTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 8, phi in block 4) + // (constant0, constant4, constant5, phi in block 8) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" - " live in: (01000)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0100)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 3\n" - " live in: (00100)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0010)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 4\n" // loop header - " live in: (00000)\n" - " live out: (00001)\n" - " kill: (00001)\n" + " live in: (0001)\n" + " live out: (0001)\n" + " kill: (0000)\n" "Block 5\n" // back edge - " live in: (00001)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0001)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00001)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 8\n" // synthesized pre header - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00010)\n"; + " live in: (0000)\n" + " live out: (0001)\n" + " kill: (0001)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 198cc15cce..566c0daf1e 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -211,15 +211,25 @@ class Location : public ValueObject { } Location ToLow() const { - return IsRegisterPair() - ? Location::RegisterLocation(low()) - : Location::FpuRegisterLocation(low()); + if (IsRegisterPair()) { + return Location::RegisterLocation(low()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(low()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetStackIndex()); + } } Location ToHigh() const { - return IsRegisterPair() - ? Location::RegisterLocation(high()) - : Location::FpuRegisterLocation(high()); + if (IsRegisterPair()) { + return Location::RegisterLocation(high()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(high()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetHighStackIndex(4)); + } } static uintptr_t EncodeStackIndex(intptr_t stack_index) { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e51bbc330a..a90ebced69 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -673,10 +673,43 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { return nullptr; } +HConstant* HBinaryOperation::GetConstantRight() const { + if (GetRight()->IsConstant()) { + return GetRight()->AsConstant(); + } else if (IsCommutative() && GetLeft()->IsConstant()) { + return GetLeft()->AsConstant(); + } else { + return nullptr; + } +} + +// If `GetConstantRight()` returns one of the input, this returns the other +// one. Otherwise it returns nullptr. +HInstruction* HBinaryOperation::GetLeastConstantLeft() const { + HInstruction* most_constant_right = GetConstantRight(); + if (most_constant_right == nullptr) { + return nullptr; + } else if (most_constant_right == GetLeft()) { + return GetRight(); + } else { + return GetLeft(); + } +} + bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { return this == if_->GetPreviousDisregardingMoves(); } +HConstant* HConstant::NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val) { + if (type == Primitive::kPrimInt) { + DCHECK(IsInt<32>(val)); + return new (allocator) HIntConstant(val); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + return new (allocator) HLongConstant(val); + } +} + bool HInstruction::Equals(HInstruction* other) const { if (!InstructionTypeEquals(other)) return false; DCHECK_EQ(GetKind(), other->GetKind()); @@ -907,7 +940,8 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } else { if (!returns_void) { // There will be multiple returns. - return_value = new (allocator) HPhi(allocator, kNoRegNumber, 0, invoke->GetType()); + return_value = new (allocator) HPhi( + allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke->GetType())); to->AddPhi(return_value->AsPhi()); } for (size_t i = 0, e = to->GetPredecessors().Size(); i < e; ++i) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 8b56166610..a35fa1d8c3 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -103,7 +103,7 @@ class HInstructionList { // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocMisc> { public: - HGraph(ArenaAllocator* arena, int start_instruction_id = 0) + HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0) : arena_(arena), blocks_(arena, kDefaultNumberOfBlocks), reverse_post_order_(arena, kDefaultNumberOfBlocks), @@ -114,6 +114,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { number_of_in_vregs_(0), temporaries_vreg_slots_(0), has_array_accesses_(false), + debuggable_(debuggable), current_instruction_id_(start_instruction_id) {} ArenaAllocator* GetArena() const { return arena_; } @@ -132,8 +133,13 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // recognition. Returns whether it was successful in doing all these steps. bool TryBuildingSsa() { BuildDominatorTree(); + // The SSA builder requires loops to all be natural. Specifically, the dead phi + // elimination phase checks the consistency of the graph when doing a post-order + // visit for eliminating dead phis: a dead phi can only have loop header phi + // users remaining when being visited. + if (!AnalyzeNaturalLoops()) return false; TransformToSsa(); - return AnalyzeNaturalLoops(); + return true; } void BuildDominatorTree(); @@ -208,6 +214,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { has_array_accesses_ = value; } + bool IsDebuggable() const { return debuggable_; } + HNullConstant* GetNullConstant(); private: @@ -248,6 +256,11 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Has array accesses. We can totally skip BCE if it's false. bool has_array_accesses_; + // Indicates whether the graph should be compiled in a way that + // ensures full debuggability. If false, we can apply more + // aggressive optimizations that may limit the level of debugging. + const bool debuggable_; + // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; @@ -1096,6 +1109,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); } bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); } + bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); } // Does this instruction strictly dominate `other_instruction`? // Returns false if this instruction and `other_instruction` are the same. @@ -1561,6 +1575,14 @@ class HBinaryOperation : public HExpression<2> { virtual int32_t Evaluate(int32_t x, int32_t y) const = 0; virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; + // Returns an input that can legally be used as the right input and is + // constant, or nullptr. + HConstant* GetConstantRight() const; + + // If `GetConstantRight()` returns one of the input, this returns the other + // one. Otherwise it returns nullptr. + HInstruction* GetLeastConstantLeft() const; + DECLARE_INSTRUCTION(BinaryOperation); private: @@ -1832,6 +1854,12 @@ class HConstant : public HExpression<0> { bool CanBeMoved() const OVERRIDE { return true; } + virtual bool IsMinusOne() const { return false; } + virtual bool IsZero() const { return false; } + virtual bool IsOne() const { return false; } + + static HConstant* NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val); + DECLARE_INSTRUCTION(Constant); private: @@ -1851,6 +1879,16 @@ class HFloatConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { + return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f)); + } + bool IsZero() const OVERRIDE { + return AsFloatConstant()->GetValue() == 0.0f; + } + bool IsOne() const OVERRIDE { + return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f); + } + DECLARE_INSTRUCTION(FloatConstant); private: @@ -1872,6 +1910,16 @@ class HDoubleConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { + return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0)); + } + bool IsZero() const OVERRIDE { + return AsDoubleConstant()->GetValue() == 0.0; + } + bool IsOne() const OVERRIDE { + return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0); + } + DECLARE_INSTRUCTION(DoubleConstant); private: @@ -1917,6 +1965,10 @@ class HIntConstant : public HConstant { // method is an workaround until we fix the above. bool ActAsNullConstant() const OVERRIDE { return value_ == 0; } + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + DECLARE_INSTRUCTION(IntConstant); private: @@ -1937,6 +1989,10 @@ class HLongConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + DECLARE_INSTRUCTION(LongConstant); private: @@ -2498,6 +2554,19 @@ class HPhi : public HInstruction { inputs_.SetSize(number_of_inputs); } + // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. + static Primitive::Type ToPhiType(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + return Primitive::kPrimInt; + default: + return type; + } + } + size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); @@ -3289,8 +3358,19 @@ class HParallelMove : public HTemplateInstruction<0> { if (kIsDebugBuild) { if (instruction != nullptr) { for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) - << "Doing parallel moves for the same instruction."; + if (moves_.Get(i).GetInstruction() == instruction) { + // Special case the situation where the move is for the spill slot + // of the instruction. + if ((GetPrevious() == instruction) + || ((GetPrevious() == nullptr) + && instruction->IsPhi() + && instruction->GetBlock() == GetBlock())) { + DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + << "Doing parallel moves for the same instruction."; + } else { + DCHECK(false) << "Doing parallel moves for the same instruction."; + } + } } } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { @@ -3441,6 +3521,12 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +inline int64_t Int64FromConstant(HConstant* constant) { + DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); + return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() + : constant->AsLongConstant()->GetValue(); +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index eb984248a9..b70f9252ae 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -173,24 +173,40 @@ class OptimizingCompiler FINAL : public Compiler { jobject class_loader, const DexFile& dex_file) const OVERRIDE; + CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const; + CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, - const DexFile& dex_file) const OVERRIDE; + const DexFile& dex_file) const OVERRIDE { + return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); + } uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( + InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); + } bool WriteElf(art::File* file, OatWriter* oat_writer, const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, - bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } - void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {} + void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; void Init() OVERRIDE; - void UnInit() const OVERRIDE {} + void UnInit() const OVERRIDE; private: // Whether we should run any optimization or register allocation. If false, will @@ -214,6 +230,9 @@ class OptimizingCompiler FINAL : public Compiler { std::unique_ptr<std::ostream> visualizer_output_; + // Delegate to Quick in case the optimizing compiler cannot compile a method. + std::unique_ptr<Compiler> delegate_; + DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); }; @@ -222,10 +241,13 @@ static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : Compiler(driver, kMaximumCompilationTimeBeforeWarning), run_optimizations_( - driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime), - compilation_stats_() {} + (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) + && !driver->GetCompilerOptions().GetDebuggable()), + compilation_stats_(), + delegate_(Create(driver, Compiler::Kind::kQuick)) {} void OptimizingCompiler::Init() { + delegate_->Init(); // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); @@ -238,34 +260,24 @@ void OptimizingCompiler::Init() { } } +void OptimizingCompiler::UnInit() const { + delegate_->UnInit(); +} + OptimizingCompiler::~OptimizingCompiler() { compilation_stats_.Log(); } +void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { + delegate_->InitCompilationUnit(cu); +} + bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, const DexFile& dex_file ATTRIBUTE_UNUSED, CompilationUnit* cu ATTRIBUTE_UNUSED) const { return true; } -CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, - uint32_t method_idx, - const DexFile& dex_file) const { - return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); -} - -uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const { - return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( - InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); -} - -bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, bool is_host) const { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); -} - static bool IsInstructionSetSupported(InstructionSet instruction_set) { return instruction_set == kArm64 || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat) @@ -298,8 +310,6 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - SsaRedundantPhiElimination redundant_phi(graph); - SsaDeadPhiElimination dead_phi(graph); HDeadCodeElimination dce(graph); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); @@ -317,8 +327,6 @@ static void RunOptimizations(HGraph* graph, IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); HOptimization* optimizations[] = { - &redundant_phi, - &dead_phi, &intrinsics, &dce, &fold1, @@ -425,13 +433,13 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArrayRef<const uint8_t>()); } -CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file) const { +CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const { UNUSED(invoke_type); std::string method_name = PrettyMethod(method_idx, dex_file); compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); @@ -461,12 +469,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = new (&arena) HGraph( + &arena, compiler_driver->GetCompilerOptions().GetDebuggable()); // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. bool shouldCompile = method_name.find("$opt$") != std::string::npos; - bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos; + bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_; std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, @@ -504,6 +513,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, bool can_optimize = CanOptimize(*code_item); bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); + + // `run_optimizations_` is set explicitly (either through a compiler filter + // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back + // to Quick. + bool can_use_baseline = !run_optimizations_; if (run_optimizations_ && can_optimize && can_allocate_registers) { VLOG(compiler) << "Optimizing " << method_name; @@ -523,10 +537,10 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_file, dex_compilation_unit, &pass_info_printer); - } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { + } else if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); - } else { + } else if (can_use_baseline) { VLOG(compiler) << "Compile baseline " << method_name; if (!run_optimizations_) { @@ -538,7 +552,25 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); + } else { + return nullptr; + } +} + +CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const { + CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, + method_idx, class_loader, dex_file); + if (method != nullptr) { + return method; } + return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, + class_loader, dex_file); } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index fe23fcf326..c20c8a172d 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -33,7 +33,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_ // to merge with a void type, we should use the existing one. return new_type == Primitive::kPrimVoid ? existing - : new_type; + : HPhi::ToPhiType(new_type); } } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 748ab2259e..cecc210cbf 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -16,6 +16,7 @@ #include "register_allocator.h" +#include <iostream> #include <sstream> #include "base/bit_vector-inl.h" @@ -32,6 +33,9 @@ static constexpr size_t kDefaultNumberOfSpillSlots = 4; // allocate SRegister. static int GetHighForLowRegister(int reg) { return reg + 1; } static bool IsLowRegister(int reg) { return (reg & 1) == 0; } +static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { + return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); +} RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, @@ -70,28 +74,13 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } -bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, +bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, InstructionSet instruction_set) { - if (!Supports(instruction_set)) { - return false; - } - if (instruction_set == kArm64 + return instruction_set == kArm64 || instruction_set == kX86_64 || instruction_set == kArm - || instruction_set == kThumb2) { - return true; - } - for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { - for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); - !it.Done(); - it.Advance()) { - HInstruction* current = it.Current(); - if (instruction_set == kX86 && current->GetType() == Primitive::kPrimLong) { - return false; - } - } - } - return true; + || instruction_set == kX86 + || instruction_set == kThumb2; } static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { @@ -771,8 +760,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { return false; } - if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) { - return false; + if (current->IsLowInterval()) { + // If the high register of this interval is not available, we need to spill. + int high_reg = current->GetHighInterval()->GetRegister(); + if (high_reg == kNoRegister) { + high_reg = GetHighForLowRegister(reg); + } + if (free_until[high_reg] == 0) { + return false; + } } current->SetRegister(reg); @@ -831,16 +827,18 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { return reg; } -bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, - size_t first_register_use, - size_t* next_use) { +bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); DCHECK(active->HasRegister()); + if (active->IsFixed()) continue; + if (active->IsHighInterval()) continue; + if (first_register_use > next_use[active->GetRegister()]) continue; + // Split the first interval found. - if (first_register_use <= next_use[active->GetRegister()] - && !active->IsLowInterval() - && !active->IsHighInterval()) { + if (!active->IsLowInterval() || IsLowOfUnalignedPairInterval(active)) { LiveInterval* split = Split(active, position); active_.DeleteAt(i); if (split != active) { @@ -921,7 +919,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // When allocating the low part, we made sure the high register was available. DCHECK_LT(first_register_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use, current->GetStart()); + reg = FindAvailableRegisterPair(next_use, first_register_use); // We should spill if both registers are not available. should_spill = (first_register_use >= next_use[reg]) || (first_register_use >= next_use[GetHighForLowRegister(reg)]); @@ -934,14 +932,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK_NE(reg, kNoRegister); if (should_spill) { DCHECK(!current->IsHighInterval()); - bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); if (current->IsLowInterval() && is_allocation_at_use_site - && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use)) { // If we're allocating a register for `current` because the instruction at // that position requires it, but we think we should spill, then there are - // non-pair intervals blocking the allocation. We split the first - // interval found, and put ourselves first in the `unhandled_` list. + // non-pair intervals or unaligned pair intervals blocking the allocation. + // We split the first interval found, and put ourselves first in the + // `unhandled_` list. LiveInterval* existing = unhandled_->Peek(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); @@ -951,10 +952,15 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); LiveInterval* split = Split(current, first_register_use - 1); - DCHECK_NE(current, split) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName() << " " - << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1; + if (current == split) { + DumpInterval(std::cerr, current); + DumpAllIntervals(std::cerr); + // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + CHECK(false) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName() << " " + << split->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1; + } AddSorted(unhandled_, split); } return false; @@ -1203,7 +1209,24 @@ static bool IsValidDestination(Location destination) { || destination.IsDoubleStackSlot(); } -void RegisterAllocator::AddInputMoveFor(HInstruction* user, +void RegisterAllocator::AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const { + if (type == Primitive::kPrimLong + && codegen_->ShouldSplitLongMoves() + // The parallel move resolver knows how to deal with long constants. + && !source.IsConstant()) { + move->AddMove(source.ToLow(), destination.ToLow(), instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr); + } else { + move->AddMove(source, destination, instruction); + } +} + +void RegisterAllocator::AddInputMoveFor(HInstruction* input, + HInstruction* user, Location source, Location destination) const { if (source.Equals(destination)) return; @@ -1222,7 +1245,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, move = previous->AsParallelMove(); } DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); - move->AddMove(source, destination, nullptr); + AddMove(move, source, destination, nullptr, input->GetType()); } static bool IsInstructionStart(size_t position) { @@ -1251,8 +1274,16 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, at = liveness_.GetInstructionFromPosition((position + 1) / 2); // Note that parallel moves may have already been inserted, so we explicitly // ask for the first instruction of the block: `GetInstructionFromPosition` does - // not contain the moves. + // not contain the `HParallelMove` instructions. at = at->GetBlock()->GetFirstInstruction(); + + if (at->GetLifetimePosition() < position) { + // We may insert moves for split siblings and phi spills at the beginning of the block. + // Since this is a different lifetime position, we need to go to the next instruction. + DCHECK(at->IsParallelMove()); + at = at->GetNext(); + } + if (at->GetLifetimePosition() != position) { DCHECK_GT(at->GetLifetimePosition(), position); move = new (allocator_) HParallelMove(allocator_); @@ -1294,7 +1325,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, } } DCHECK_EQ(move->GetLifetimePosition(), position); - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, @@ -1324,7 +1355,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, } else { move = previous->AsParallelMove(); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, @@ -1336,14 +1367,15 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* first = block->GetFirstInstruction(); HParallelMove* move = first->AsParallelMove(); + size_t position = block->GetLifetimeStart(); // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and input moves. - if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) { + if (move == nullptr || move->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(block->GetLifetimeStart()); + move->SetLifetimePosition(position); block->InsertInstructionBefore(move, first); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, @@ -1367,7 +1399,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, move->SetLifetimePosition(position); instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { @@ -1402,7 +1434,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (expected_location.IsUnallocated()) { locations->SetInAt(use->GetInputIndex(), source); } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); } } else { DCHECK(use->GetUser()->IsInvoke()); @@ -1657,7 +1689,7 @@ void RegisterAllocator::Resolve() { Location source = input->GetLiveInterval()->GetLocationAt( predecessor->GetLifetimeEnd() - 1); Location destination = phi->GetLiveInterval()->ToLocation(); - InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination); + InsertParallelMoveAtExitOf(predecessor, phi, source, destination); } } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 579f069f5e..fcc61128a6 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -66,13 +66,6 @@ class RegisterAllocator { bool log_fatal_on_failure); static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); - static bool Supports(InstructionSet instruction_set) { - return instruction_set == kArm - || instruction_set == kArm64 - || instruction_set == kThumb2 - || instruction_set == kX86 - || instruction_set == kX86_64; - } size_t GetNumberOfSpillSlots() const { return int_spill_slots_.Size() @@ -121,12 +114,21 @@ class RegisterAllocator { Location source, Location destination) const; void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; - void AddInputMoveFor(HInstruction* user, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* input, + HInstruction* user, + Location source, + Location destination) const; void InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const; + void AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const; + // Helper methods. void AllocateRegistersInternal(); void ProcessInstruction(HInstruction* instruction); @@ -136,9 +138,11 @@ class RegisterAllocator { int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; int FindAvailableRegister(size_t* next_use) const; - // Try splitting an active non-pair interval at the given `position`. + // Try splitting an active non-pair or unaligned pair interval at the given `position`. // Returns whether it was successful at finding such an interval. - bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); + bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use); ArenaAllocator* const allocator_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 3dc75059b2..ba11e90d9c 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,158 @@ namespace art { +/** + * A debuggable application may require to reviving phis, to ensure their + * associated DEX register is available to a debugger. This class implements + * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It + * also makes sure that phis with incompatible input types are not revived + * (statement (b) of the SsaBuilder). + * + * This phase must be run after detecting dead phis through the + * DeadPhiElimination phase, and before deleting the dead phis. + */ +class DeadPhiHandling : public ValueObject { + public: + explicit DeadPhiHandling(HGraph* graph) + : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + + void Run(); + + private: + void VisitBasicBlock(HBasicBlock* block); + void ProcessWorklist(); + void AddToWorklist(HPhi* phi); + void AddDependentInstructionsToWorklist(HPhi* phi); + bool UpdateType(HPhi* phi); + + HGraph* const graph_; + GrowableArray<HPhi*> worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; + + DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); +}; + +bool DeadPhiHandling::UpdateType(HPhi* phi) { + Primitive::Type existing = phi->GetType(); + DCHECK(phi->IsLive()); + + bool conflict = false; + Primitive::Type new_type = existing; + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (input->IsPhi() && input->AsPhi()->IsDead()) { + // We are doing a reverse post order visit of the graph, reviving + // phis that have environment uses and updating their types. If an + // input is a phi, and it is dead (because its input types are + // conflicting), this phi must be marked dead as well. + conflict = true; + break; + } + Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); + + // The only acceptable transitions are: + // - From void to typed: first time we update the type of this phi. + // - From int to reference (or reference to int): the phi has to change + // to reference type. If the integer input cannot be converted to a + // reference input, the phi will remain dead. + if (new_type == Primitive::kPrimVoid) { + new_type = input_type; + } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { + HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); + if (equivalent == nullptr) { + conflict = true; + break; + } else { + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); + // We created a new phi, but that phi has the same inputs as the old phi. We + // add it to the worklist to ensure its inputs can also be converted to reference. + // If not, it will remain dead, and the algorithm will make the current phi dead + // as well. + equivalent->AsPhi()->SetLive(); + AddToWorklist(equivalent->AsPhi()); + } + } + } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { + new_type = Primitive::kPrimNot; + // Start over, we may request reference equivalents for the inputs of the phi. + i = -1; + } else if (new_type != input_type) { + conflict = true; + break; + } + } + + if (conflict) { + phi->SetType(Primitive::kPrimVoid); + phi->SetDead(); + return true; + } else { + DCHECK(phi->IsLive()); + phi->SetType(new_type); + return existing != new_type; + } +} + +void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + if (phi->IsDead() && phi->HasEnvironmentUses()) { + phi->SetLive(); + if (block->IsLoopHeader()) { + // Give a type to the loop phi, to guarantee convergence of the algorithm. + phi->SetType(phi->InputAt(0)->GetType()); + AddToWorklist(phi); + } else { + // Because we are doing a reverse post order visit, all inputs of + // this phi have been visited and therefore had their (initial) type set. + UpdateType(phi); + } + } + } +} + +void DeadPhiHandling::ProcessWorklist() { + while (!worklist_.IsEmpty()) { + HPhi* instruction = worklist_.Pop(); + // Note that the same equivalent phi can be added multiple times in the work list, if + // used by multiple phis. The first call to `UpdateType` will know whether the phi is + // dead or live. + if (instruction->IsLive() && UpdateType(instruction)) { + AddDependentInstructionsToWorklist(instruction); + } + } +} + +void DeadPhiHandling::AddToWorklist(HPhi* instruction) { + DCHECK(instruction->IsLive()); + worklist_.Add(instruction); +} + +void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) { + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->GetUser()->AsPhi(); + if (phi != nullptr && !phi->IsDead()) { + AddToWorklist(phi); + } + } +} + +void DeadPhiHandling::Run() { + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + ProcessWorklist(); +} + +static bool IsPhiEquivalentOf(HInstruction* instruction, HPhi* phi) { + return instruction != nullptr + && instruction->IsPhi() + && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber(); +} + void SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block visited // (with the exception of loops) in order to create the right environment for that @@ -47,11 +199,9 @@ void SsaBuilder::BuildSsa() { // our code generator will complain if the inputs of a phi do not have the same // type. The marking allows the type propagation to know which phis it needs // to handle. We mark but do not eliminate: the elimination will be done in - // step 5). - { - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.MarkDeadPhis(); - } + // step 9). + SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph()); + dead_phis_for_type_propagation.MarkDeadPhis(); // 4) Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference when we created an equivalent phi. So we @@ -59,17 +209,62 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run - // the algorithm and this time elimimates them. - // TODO: Make this work with debug info and reference liveness. We currently - // eagerly remove phis used in environments. - { - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.Run(); + // 5) Mark dead phis again. Steph 4) may have introduced new phis. + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.MarkDeadPhis(); + + // 6) Now that the graph is correclty typed, we can get rid of redundant phis. + // Note that we cannot do this phase before type propagation, otherwise + // we could get rid of phi equivalents, whose presence is a requirement for the + // type propagation phase. Note that this is to satisfy statement (a) of the + // SsaBuilder (see ssa_builder.h). + SsaRedundantPhiElimination redundant_phi(GetGraph()); + redundant_phi.Run(); + + // 7) Make sure environments use the right phi "equivalent": a phi marked dead + // can have a phi equivalent that is not dead. We must therefore update + // all environment uses of the dead phi to use its equivalent. Note that there + // can be multiple phis for the same Dex register that are live (for example + // when merging constants), in which case it is OK for the environments + // to just reference one. + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { + HPhi* phi = it_phis.Current()->AsPhi(); + // If the phi is not dead, or has no environment uses, there is nothing to do. + if (!phi->IsDead() || !phi->HasEnvironmentUses()) continue; + HInstruction* next = phi->GetNext(); + if (!IsPhiEquivalentOf(next, phi)) continue; + if (next->AsPhi()->IsDead()) { + // If the phi equivalent is dead, check if there is another one. + next = next->GetNext(); + if (!IsPhiEquivalentOf(next, phi)) continue; + // There can be at most two phi equivalents. + DCHECK(!IsPhiEquivalentOf(next->GetNext(), phi)); + if (next->AsPhi()->IsDead()) continue; + } + // We found a live phi equivalent. Update the environment uses of `phi` with it. + phi->ReplaceWith(next); + } } - // 6) Clear locals. - // TODO: Move this to a dead code eliminator phase. + // 8) Deal with phis to guarantee liveness of phis in case of a debuggable + // application. This is for satisfying statement (c) of the SsaBuilder + // (see ssa_builder.h). + if (GetGraph()->IsDebuggable()) { + DeadPhiHandling dead_phi_handler(GetGraph()); + dead_phi_handler.Run(); + } + + // 9) Now that the right phis are used for the environments, and we + // have potentially revive dead phis in case of a debuggable application, + // we can eliminate phis we do not need. Regardless of the debuggable status, + // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h), + // as well as for the code generation, which does not deal with phis of conflicting + // input types. + dead_phis.EliminateDeadPhis(); + + // 10) Clear locals. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); it.Advance()) { @@ -257,12 +452,12 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { - if (value->IsIntConstant()) { - DCHECK_EQ(value->AsIntConstant()->GetValue(), 0); + if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) { return value->GetBlock()->GetGraph()->GetNullConstant(); - } else { - DCHECK(value->IsPhi()); + } else if (value->IsPhi()) { return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot); + } else { + return nullptr; } } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index f50da46040..24dc449513 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -24,6 +24,28 @@ namespace art { static constexpr int kDefaultNumberOfLoops = 2; +/** + * Transforms a graph into SSA form. The liveness guarantees of + * this transformation are listed below. A DEX register + * being killed means its value at a given position in the code + * will not be available to its environment uses. A merge in the + * following text is materialized as a `HPhi`. + * + * (a) Dex registers that do not require merging (that is, they do not + * have different values at a join block) are available to all their + * environment uses. Note that it does not imply the instruction will + * have a physical location after register allocation. See the + * SsaLivenessAnalysis phase. + * + * (b) Dex registers that require merging, and the merging gives + * incompatible types, will be killed for environment uses of that merge. + * + * (c) When the `debuggable` flag is passed to the compiler, Dex registers + * that require merging and have a proper type after the merge, are + * available to all their environment uses. If the `debuggable` flag + * is not set, values of Dex registers only used by environments + * are killed. + */ class SsaBuilder : public HGraphVisitor { public: explicit SsaBuilder(HGraph* graph) diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index d009390a0f..c0d6f42ca5 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -230,11 +230,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (current->HasEnvironment()) { - // All instructions in the environment must be live. + // Handle environment uses. See statements (b) and (c) of the + // SsaLivenessAnalysis. HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); - if (instruction != nullptr) { + if (ShouldBeLiveForEnvironment(instruction)) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); instruction->GetLiveInterval()->AddUse(current, i, true); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 9ff2f205d8..b57029d1a7 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -302,7 +302,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { first_range_->start_ = from; } else { // Instruction without uses. - DCHECK(!defined_by_->HasUses()); + DCHECK(!defined_by_->HasNonEnvironmentUses()); DCHECK(from == defined_by_->GetLifetimePosition()); first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr); } @@ -373,13 +373,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (location.IsUnallocated()) { if ((location.GetPolicy() == Location::kRequiresRegister) || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { return position; } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) || (location.GetPolicy() == Location::kSameAsFirstInput && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { return position; } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return position; } } @@ -794,6 +798,22 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { DISALLOW_COPY_AND_ASSIGN(LiveInterval); }; +/** + * Analysis that computes the liveness of instructions: + * + * (a) Non-environment uses of an instruction always make + * the instruction live. + * (b) Environment uses of an instruction whose type is + * object (that is, non-primitive), make the instruction live. + * This is due to having to keep alive objects that have + * finalizers deleting native objects. + * (c) When the graph has the debuggable property, environment uses + * of an instruction that has a primitive type make the instruction live. + * If the graph does not have the debuggable property, the environment + * use has no effect, and may get a 'none' value after register allocation. + * + * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. + */ class SsaLivenessAnalysis : public ValueObject { public: SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen) @@ -878,6 +898,12 @@ class SsaLivenessAnalysis : public ValueObject { // Update the live_out set of the block and returns whether it has changed. bool UpdateLiveOut(const HBasicBlock& block); + static bool ShouldBeLiveForEnvironment(HInstruction* instruction) { + if (instruction == nullptr) return false; + if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; + return instruction->GetType() == Primitive::kPrimNot; + } + const HGraph& graph_; CodeGenerator* const codegen_; GrowableArray<HBasicBlock*> linear_order_; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index a05b38c9eb..00c241b85a 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -332,8 +332,8 @@ TEST(SsaTest, Loop5) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [4, 4]\n" - " 1: IntConstant 4 [14]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [13]\n" + " 2: IntConstant 5 [13]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 3, 2\n" " 4: Equal(0, 0) [5]\n" @@ -343,18 +343,17 @@ TEST(SsaTest, Loop5) { "BasicBlock 3, pred: 1, succ: 8\n" " 7: Goto\n" "BasicBlock 4, pred: 8, 5, succ: 6, 5\n" - " 8: Phi(14, 8) [8, 12, 9, 9]\n" - " 9: Equal(8, 8) [10]\n" - " 10: If(9)\n" + " 8: Equal(13, 13) [9]\n" + " 9: If(8)\n" "BasicBlock 5, pred: 4, succ: 4\n" - " 11: Goto\n" + " 10: Goto\n" "BasicBlock 6, pred: 4, succ: 7\n" - " 12: Return(8)\n" + " 11: Return(13)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" + " 12: Exit\n" "BasicBlock 8, pred: 2, 3, succ: 4\n" - " 14: Phi(1, 2) [8]\n" - " 15: Goto\n"; + " 13: Phi(1, 2) [8, 8, 11]\n" + " 14: Goto\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 5283d5dcca..63a02862b4 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -20,6 +20,7 @@ #include "base/bit_vector.h" #include "base/value_object.h" #include "memory_region.h" +#include "nodes.h" #include "stack_map.h" #include "utils/growable_array.h" @@ -32,8 +33,9 @@ namespace art { class StackMapStream : public ValueObject { public: explicit StackMapStream(ArenaAllocator* allocator) - : stack_maps_(allocator, 10), - dex_register_maps_(allocator, 10 * 4), + : allocator_(allocator), + stack_maps_(allocator, 10), + dex_register_locations_(allocator, 10 * 4), inline_infos_(allocator, 2), stack_mask_max_(-1), number_of_stack_maps_with_inline_info_(0) {} @@ -52,13 +54,9 @@ class StackMapStream : public ValueObject { BitVector* sp_mask; uint32_t num_dex_registers; uint8_t inlining_depth; - size_t dex_register_maps_start_index; + size_t dex_register_locations_start_index; size_t inline_infos_start_index; - }; - - struct DexRegisterEntry { - DexRegisterMap::LocationKind kind; - int32_t value; + BitVector* live_dex_registers_mask; }; struct InlineInfoEntry { @@ -78,8 +76,14 @@ class StackMapStream : public ValueObject { entry.sp_mask = sp_mask; entry.num_dex_registers = num_dex_registers; entry.inlining_depth = inlining_depth; - entry.dex_register_maps_start_index = dex_register_maps_.Size(); + entry.dex_register_locations_start_index = dex_register_locations_.Size(); entry.inline_infos_start_index = inline_infos_.Size(); + if (num_dex_registers != 0) { + entry.live_dex_registers_mask = + new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); + } else { + entry.live_dex_registers_mask = nullptr; + } stack_maps_.Add(entry); if (sp_mask != nullptr) { @@ -90,13 +94,6 @@ class StackMapStream : public ValueObject { } } - void AddDexRegisterEntry(DexRegisterMap::LocationKind kind, int32_t value) { - DexRegisterEntry entry; - entry.kind = kind; - entry.value = value; - dex_register_maps_.Add(entry); - } - void AddInlineInfoEntry(uint32_t method_index) { InlineInfoEntry entry; entry.method_index = method_index; @@ -104,47 +101,75 @@ class StackMapStream : public ValueObject { } size_t ComputeNeededSize() const { - return CodeInfo::kFixedSize - + ComputeStackMapSize() - + ComputeDexRegisterMapSize() + size_t size = CodeInfo::kFixedSize + + ComputeStackMapsSize() + + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); + // On ARM, CodeInfo data must be 4-byte aligned. + return RoundUp(size, kWordAlignment); + } + + size_t ComputeStackMaskSize() const { + return StackMaskEncodingSize(stack_mask_max_); + } + + size_t ComputeStackMapsSize() const { + return stack_maps_.Size() * StackMap::ComputeStackMapSize(ComputeStackMaskSize()); } - size_t ComputeStackMapSize() const { - return stack_maps_.Size() * StackMap::ComputeAlignedStackMapSize(stack_mask_max_); + // Compute the size of the Dex register map of `entry`. + size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + size_t size = DexRegisterMap::kFixedSize; + // Add the bit mask for the dex register liveness. + size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); + for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + DexRegisterLocation dex_register_location = dex_register_locations_.Get( + entry.dex_register_locations_start_index + index_in_dex_register_locations); + size += DexRegisterMap::EntrySize(dex_register_location); + index_in_dex_register_locations++; + } + } + return size; } - size_t ComputeDexRegisterMapSize() const { - // We currently encode all dex register information per stack map. - return stack_maps_.Size() * DexRegisterMap::kFixedSize - // For each dex register entry. - + (dex_register_maps_.Size() * DexRegisterMap::SingleEntrySize()); + // Compute the size of all the Dex register maps. + size_t ComputeDexRegisterMapsSize() const { + size_t size = 0; + for (size_t i = 0; i < stack_maps_.Size(); ++i) { + size += ComputeDexRegisterMapSize(stack_maps_.Get(i)); + } + return size; } + // Compute the size of all the inline information pieces. size_t ComputeInlineInfoSize() const { return inline_infos_.Size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } - size_t ComputeInlineInfoStart() const { - return ComputeDexRegisterMapStart() + ComputeDexRegisterMapSize(); + size_t ComputeDexRegisterMapsStart() const { + return CodeInfo::kFixedSize + ComputeStackMapsSize(); } - size_t ComputeDexRegisterMapStart() const { - return CodeInfo::kFixedSize + ComputeStackMapSize(); + size_t ComputeInlineInfoStart() const { + return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize(); } void FillIn(MemoryRegion region) { CodeInfo code_info(region); + DCHECK_EQ(region.size(), ComputeNeededSize()); code_info.SetOverallSize(region.size()); - size_t stack_mask_size = StackMaskEncodingSize(stack_mask_max_); + size_t stack_mask_size = ComputeStackMaskSize(); uint8_t* memory_start = region.start(); - MemoryRegion dex_register_maps_region = region.Subregion( - ComputeDexRegisterMapStart(), - ComputeDexRegisterMapSize()); + MemoryRegion dex_register_locations_region = region.Subregion( + ComputeDexRegisterMapsStart(), + ComputeDexRegisterMapsSize()); MemoryRegion inline_infos_region = region.Subregion( ComputeInlineInfoStart(), @@ -152,6 +177,7 @@ class StackMapStream : public ValueObject { code_info.SetNumberOfStackMaps(stack_maps_.Size()); code_info.SetStackMaskSize(stack_mask_size); + DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize()); uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; @@ -167,20 +193,34 @@ class StackMapStream : public ValueObject { } if (entry.num_dex_registers != 0) { - // Set the register map. - MemoryRegion register_region = dex_register_maps_region.Subregion( - next_dex_register_map_offset, - DexRegisterMap::kFixedSize - + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); + // Set the Dex register map. + MemoryRegion register_region = + dex_register_locations_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(entry)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); - for (size_t j = 0; j < entry.num_dex_registers; ++j) { - DexRegisterEntry register_entry = - dex_register_maps_.Get(j + entry.dex_register_maps_start_index); - dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + // Offset in `dex_register_map` where to store the next register entry. + size_t offset = DexRegisterMap::kFixedSize; + dex_register_map.SetLiveBitMask(offset, + entry.num_dex_registers, + *entry.live_dex_registers_mask); + offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); + for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + DexRegisterLocation dex_register_location = dex_register_locations_.Get( + entry.dex_register_locations_start_index + index_in_dex_register_locations); + dex_register_map.SetRegisterInfo(offset, dex_register_location); + offset += DexRegisterMap::EntrySize(dex_register_location); + ++index_in_dex_register_locations; + } } + // Ensure we reached the end of the Dex registers region. + DCHECK_EQ(offset, register_region.size()); } else { stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap); } @@ -206,13 +246,28 @@ class StackMapStream : public ValueObject { } } + void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { + if (kind != DexRegisterLocation::Kind::kNone) { + // Ensure we only use non-compressed location kind at this stage. + DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) + << DexRegisterLocation::PrettyDescriptor(kind); + dex_register_locations_.Add(DexRegisterLocation(kind, value)); + stack_maps_.Get(stack_maps_.Size() - 1).live_dex_registers_mask->SetBit(dex_register); + } + } + private: + ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; - GrowableArray<DexRegisterEntry> dex_register_maps_; + GrowableArray<DexRegisterLocation> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; size_t number_of_stack_maps_with_inline_info_; + ART_FRIEND_TEST(StackMapTest, Test1); + ART_FRIEND_TEST(StackMapTest, Test2); + ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters); + DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 5b025106ac..87ac2e79e9 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -22,7 +22,7 @@ namespace art { -bool SameBits(MemoryRegion region, const BitVector& bit_vector) { +static bool SameBits(MemoryRegion region, const BitVector& bit_vector) { for (size_t i = 0; i < region.size_in_bits(); ++i) { if (region.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; @@ -31,11 +31,6 @@ bool SameBits(MemoryRegion region, const BitVector& bit_vector) { return true; } -size_t ComputeDexRegisterMapSize(size_t number_of_dex_registers) { - return DexRegisterMap::kFixedSize - + number_of_dex_registers * DexRegisterMap::SingleEntrySize(); -} - TEST(StackMapTest, Test1) { ArenaPool pool; ArenaAllocator arena(&pool); @@ -44,8 +39,8 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); + stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -67,14 +62,16 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(SameBits(stack_mask, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); - ASSERT_EQ(0, dex_registers.GetValue(0)); - ASSERT_EQ(-2, dex_registers.GetValue(1)); + DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(7u, dex_registers.Size()); + DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); + DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); ASSERT_FALSE(stack_map.HasInlineInfo()); } @@ -89,8 +86,8 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); + stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); @@ -98,8 +95,8 @@ TEST(StackMapTest, Test2) { sp_mask2.SetBit(3); sp_mask1.SetBit(8); stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kInRegister, 18); - stream.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, 3); + stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18); + stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3); size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -111,53 +108,95 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); // First stack map. - StackMap stack_map = code_info.GetStackMapAt(0); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); - ASSERT_EQ(0u, stack_map.GetDexPc()); - ASSERT_EQ(64u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + { + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_registers = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(7u, dex_registers.Size()); + DexRegisterLocation location0 = + dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); + DexRegisterLocation location1 = + dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); + + ASSERT_TRUE(stack_map.HasInlineInfo()); + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); + ASSERT_EQ(2u, inline_info.GetDepth()); + ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); + ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); + } - MemoryRegion stack_mask = stack_map.GetStackMask(); - ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + // Second stack map. + { + StackMap stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(128u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_registers = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(3u, dex_registers.Size()); + DexRegisterLocation location0 = + dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); + DexRegisterLocation location1 = + dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); + ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind()); + ASSERT_EQ(18, location0.GetValue()); + ASSERT_EQ(3, location1.GetValue()); + + ASSERT_FALSE(stack_map.HasInlineInfo()); + } +} - ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); - ASSERT_EQ(0, dex_registers.GetValue(0)); - ASSERT_EQ(-2, dex_registers.GetValue(1)); - - ASSERT_TRUE(stack_map.HasInlineInfo()); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); - ASSERT_EQ(2u, inline_info.GetDepth()); - ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); - ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); +TEST(StackMapTest, TestNonLiveDexRegisters) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); - // Second stack map. - stack_map = code_info.GetStackMapAt(1); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); - ASSERT_EQ(1u, stack_map.GetDexPc()); - ASSERT_EQ(128u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); + ArenaBitVector sp_mask(&arena, 0, false); + uint32_t number_of_dex_registers = 2; + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0); + stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); - stack_mask = stack_map.GetStackMask(); - ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + CodeInfo code_info(region); + StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInRegister, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kInFpuRegister, dex_registers.GetLocationKind(1)); - ASSERT_EQ(18, dex_registers.GetValue(0)); - ASSERT_EQ(3, dex_registers.GetValue(1)); - + DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); + ASSERT_EQ(DexRegisterLocation::Kind::kNone, + dex_registers.GetLocationKind(0, number_of_dex_registers)); + ASSERT_EQ(DexRegisterLocation::Kind::kConstant, + dex_registers.GetLocationKind(1, number_of_dex_registers)); + ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers)); ASSERT_FALSE(stack_map.HasInlineInfo()); } |