diff options
Diffstat (limited to 'compiler/optimizing')
43 files changed, 4532 insertions, 1294 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f9054e0133..9c2facb75e 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = nullptr; if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( - arena_, number_of_arguments, return_type, dex_pc, table_index); + arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); } else if (optimized_invoke_type == kInterface) { invoke = new (arena_) HInvokeInterface( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); @@ -670,10 +670,6 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); return false; } - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); @@ -689,20 +685,20 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, null_check, value, field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction(new (arena_) HInstanceFieldGet( current_block_->GetLastInstruction(), field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; } - - bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -723,11 +719,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, return false; } - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } - Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass( soa, dex_cache, class_loader, outer_compilation_unit_))); @@ -763,10 +754,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, HInstruction* value = LoadLocal(source_or_dest_reg, field_type); DCHECK_EQ(value->GetType(), field_type); current_block_->AddInstruction( - new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction( - new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6f424ce11d..9665b0e2ae 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { + GetGraph()->GetTemporariesVRegSlots() + 1 /* filler */, 0, /* the baseline compiler does not have live registers at slow path */ + 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() + 1 /* current method */); GenerateFrameEntry(); @@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l } void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots) { first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize - + maximum_number_of_live_registers * GetWordSize() + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -325,24 +328,27 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con return current->GetBlockId() + 1 == next->GetBlockId(); } -CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, - HGraph* graph, - InstructionSet instruction_set) { +CodeGenerator* CodeGenerator::Create(HGraph* graph, + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new arm::CodeGeneratorARM(graph, + *isa_features.AsArmInstructionSetFeatures(), + compiler_options); } case kArm64: { - return new (allocator) arm64::CodeGeneratorARM64(graph); + return new arm64::CodeGeneratorARM64(graph, compiler_options); } case kMips: return nullptr; case kX86: { - return new (allocator) x86::CodeGeneratorX86(graph); + return new x86::CodeGeneratorX86(graph, compiler_options); } case kX86_64: { - return new (allocator) x86_64::CodeGeneratorX86_64(graph); + return new x86_64::CodeGeneratorX86_64(graph, compiler_options); } default: return nullptr; @@ -374,7 +380,7 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, SrcMap* src_map) const { +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; @@ -618,12 +624,62 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kFpuRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + + case Location::kRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } } +bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { + HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); + return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck(); +} + +void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { + // If we are from a static path don't record the pc as we can't throw NPE. + // NB: having the checks here makes the code much less verbose in the arch + // specific code generators. + if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) { + return; + } + + if (!compiler_options_.GetImplicitNullChecks()) { + return; + } + + if (!instr->CanDoImplicitNullCheck()) { + return; + } + + // Find the first previous instruction which is not a move. + HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves(); + + // If the instruction is a null check it means that `instr` is the first user + // and needs to record the pc. + if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) { + HNullCheck* null_check = first_prev_not_move->AsNullCheck(); + // TODO: The parallel moves modify the environment. Their changes need to be reverted + // otherwise the stack maps at the throw point will not be correct. + RecordPcInfo(null_check, null_check->GetDexPc()); + } +} + void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = first_register_slot_in_slow_path_; @@ -684,11 +740,9 @@ void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend } void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) { - MoveOperands move1(from1, to1, nullptr); - MoveOperands move2(from2, to2, nullptr); HParallelMove parallel_move(GetGraph()->GetArena()); - parallel_move.AddMove(&move1); - parallel_move.AddMove(&move2); + parallel_move.AddMove(from1, to1, nullptr); + parallel_move.AddMove(from2, to2, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 1d42c47d56..f66aed912a 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,7 +18,9 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" +#include "driver/compiler_options.h" #include "globals.h" #include "locations.h" #include "memory_region.h" @@ -44,7 +46,10 @@ class Assembler; class CodeGenerator; class DexCompilationUnit; class ParallelMoveResolver; +class SrcMapElem; +template <class Alloc> class SrcMap; +using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; class CodeAllocator { public: @@ -73,15 +78,17 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; -class CodeGenerator : public ArenaObject<kArenaAllocMisc> { +class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation // succeeded. void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); void CompileOptimized(CodeAllocator* allocator); - static CodeGenerator* Create(ArenaAllocator* allocator, - HGraph* graph, - InstructionSet instruction_set); + static CodeGenerator* Create(HGraph* graph, + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); + virtual ~CodeGenerator() {} HGraph* GetGraph() const { return graph_; } @@ -104,9 +111,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; + virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; @@ -123,6 +132,9 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; + + const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } + // Saves the register in the stack. Returns the size taken on stack. virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. @@ -137,8 +149,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { UNIMPLEMENTED(FATAL); UNREACHABLE(); } + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); + bool CanMoveNullCheckToUser(HNullCheck* null_check); + void MaybeRecordImplicitNullCheck(HInstruction* instruction); void AddSlowPath(SlowPathCode* slow_path) { slow_paths_.Add(slow_path); @@ -146,7 +161,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void GenerateSlowPaths(); - void BuildMappingTable(std::vector<uint8_t>* vector, SrcMap* src_map) const; + void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; @@ -192,7 +207,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { CodeGenerator(HGraph* graph, size_t number_of_core_registers, size_t number_of_fpu_registers, - size_t number_of_register_pairs) + size_t number_of_register_pairs, + const CompilerOptions& compiler_options) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), first_register_slot_in_slow_path_(0), @@ -203,11 +219,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), graph_(graph), + compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), is_leaf_(true), stack_map_stream_(graph->GetArena()) {} - ~CodeGenerator() {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -242,6 +258,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; + const CompilerOptions& compiler_options_; GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 002d9d4449..8c07b46173 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -36,7 +37,10 @@ static DRegister FromLowSToD(SRegister reg) { return static_cast<DRegister>(reg / 2); } -static constexpr bool kExplicitStackOverflowCheck = false; +static bool ExpectedPairLayout(Location location) { + // We expected this for both core and fpu register pairs. + return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); +} static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; @@ -255,8 +259,8 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0)); - __ LoadImmediate(calling_convention.GetRegisterAt(1), instruction_->GetStringIndex()); + arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); @@ -372,13 +376,27 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) - : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), +size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ StoreSToOffset(static_cast<SRegister>(reg_id), SP, stack_index); + return kArmWordSize; +} + +size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ LoadSFromOffset(static_cast<SRegister>(reg_id), SP, stack_index); + return kArmWordSize; +} + +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, + kNumberOfRegisterPairs, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -499,17 +517,17 @@ void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); if (!skip_overflow_check) { - if (kExplicitStackOverflowCheck) { + if (GetCompilerOptions().GetImplicitStackOverflowChecks()) { + __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); + __ LoadFromOffset(kLoadWord, IP, IP, 0); + RecordPcInfo(nullptr, 0); + } else { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM(); AddSlowPath(slow_path); __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value()); __ cmp(SP, ShifterOperand(IP)); __ b(slow_path->GetEntryLabel(), CC); - } else { - __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); - __ LoadFromOffset(kLoadWord, IP, IP, 0); - RecordPcInfo(nullptr, 0); } } @@ -577,11 +595,17 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type gp_index_ += 2; stack_index_ += 2; if (index + 1 < calling_convention.GetNumberOfRegisters()) { - ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair( - calling_convention.GetRegisterPairAt(index)); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { - return Location::QuickParameter(index, stack_index); + if (calling_convention.GetRegisterAt(index) == R1) { + // Skip R1, and use R2_R3 instead. + gp_index_++; + index++; + } + } + if (index + 1 < calling_convention.GetNumberOfRegisters()) { + DCHECK_EQ(calling_convention.GetRegisterAt(index) + 1, + calling_convention.GetRegisterAt(index + 1)); + return Location::RegisterPairLocation(calling_convention.GetRegisterAt(index), + calling_convention.GetRegisterAt(index + 1)); } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -606,9 +630,11 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { uint32_t index = double_index_; double_index_ += 2; - return Location::FpuRegisterPairLocation( + Location result = Location::FpuRegisterPairLocation( calling_convention.GetFpuRegisterAt(index), calling_convention.GetFpuRegisterAt(index + 1)); + DCHECK(ExpectedPairLayout(result)); + return result; } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -698,27 +724,11 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); - } else if (source.IsQuickParameter()) { - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - InvokeDexCallingConvention calling_convention; - EmitParallelMoves( - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), - Location::StackSlot( - calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), - Location::RegisterLocation(destination.AsRegisterPairHigh<Register>())); } else { - // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); - if (destination.AsRegisterPairLow<Register>() == R1) { - DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2); - __ LoadFromOffset(kLoadWord, R1, SP, source.GetStackIndex()); - __ LoadFromOffset(kLoadWord, R2, SP, source.GetHighStackIndex(kArmWordSize)); - } else { - __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), - SP, source.GetStackIndex()); - } + DCHECK(ExpectedPairLayout(destination)); + __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(), + SP, source.GetStackIndex()); } } else if (destination.IsFpuRegisterPair()) { if (source.IsDoubleStackSlot()) { @@ -728,22 +738,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { } else { UNIMPLEMENTED(FATAL); } - } else if (destination.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = destination.GetQuickParameterRegisterIndex(); - uint16_t stack_index = destination.GetQuickParameterStackIndex(); - if (source.IsRegisterPair()) { - UNIMPLEMENTED(FATAL); - } else if (source.IsFpuRegister()) { - UNIMPLEMENTED(FATAL); - } else { - DCHECK(source.IsDoubleStackSlot()); - EmitParallelMoves( - Location::StackSlot(source.GetStackIndex()), - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::StackSlot(source.GetHighStackIndex(kArmWordSize)), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); - } } else { DCHECK(destination.IsDoubleStackSlot()); if (source.IsRegisterPair()) { @@ -756,17 +750,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ StoreToOffset(kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex()); } - } else if (source.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - // Just move the low part. The only time a source is a quick parameter is - // when moving the parameter to its stack locations. And the (Java) caller - // of this method has already done that. - __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index), - SP, destination.GetStackIndex()); - DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), - static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize))); } else if (source.IsFpuRegisterPair()) { __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), SP, @@ -799,7 +782,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr __ LoadImmediate(IP, value); __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex()); } - } else if (const_to_move->IsLongConstant()) { + } else { + DCHECK(const_to_move->IsLongConstant()) << const_to_move->DebugName(); int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegisterPair()) { __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value)); @@ -951,6 +935,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); Register left = locations->InAt(0).AsRegister<Register>(); if (locations->InAt(1).IsRegister()) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); @@ -1196,7 +1181,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); // temp = temp[index_in_cache] __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())); + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( @@ -1240,6 +1225,7 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { } else { __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); @@ -1278,6 +1264,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) } else { __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); @@ -1296,7 +1283,9 @@ void LocationsBuilderARM::VisitNeg(HNeg* neg) { switch (neg->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), output_overlaps); break; @@ -1823,12 +1812,17 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } @@ -1863,7 +1857,8 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { } break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ adds(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -1871,6 +1866,7 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { first.AsRegisterPairHigh<Register>(), ShifterOperand(second.AsRegisterPairHigh<Register>())); break; + } case Primitive::kPrimFloat: __ vadds(out.AsFpuRegister<SRegister>(), @@ -1893,12 +1889,17 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong); + case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } case Primitive::kPrimFloat: @@ -1933,6 +1934,7 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); __ subs(out.AsRegisterPairLow<Register>(), first.AsRegisterPairLow<Register>(), ShifterOperand(second.AsRegisterPairLow<Register>())); @@ -2068,8 +2070,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } case Primitive::kPrimFloat: @@ -2106,7 +2107,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc()); break; @@ -2289,8 +2290,8 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - // The runtime helper puts the output in R0,R2. - locations->SetOut(Location::RegisterPairLocation(R0, R2)); + // The runtime helper puts the output in R0,R1. + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; } default: @@ -2344,7 +2345,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>()); DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); int32_t entry_point_offset; if (op->IsShl()) { @@ -2409,14 +2410,14 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(R0)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc()); @@ -2556,68 +2557,172 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit Arm barrier kinds + DmbOptions flavour = DmbOptions::ISH; // quiet c++ warnings + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavour = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavour = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ dmb(flavour); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, + uint32_t offset, + Register out_lo, + Register out_hi) { + if (offset != 0) { + __ LoadImmediate(out_lo, offset); + __ add(IP, addr, ShifterOperand(out_lo)); + addr = IP; + } + __ ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, + uint32_t offset, + Register value_lo, + Register value_hi, + Register temp1, + Register temp2, + HInstruction* instruction) { + Label fail; + if (offset != 0) { + __ LoadImmediate(temp1, offset); + __ add(IP, addr, ShifterOperand(temp1)); + addr = IP; + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ ldrexd(temp1, temp2, addr); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ strexd(temp1, value_lo, value_hi, addr); + __ cmp(temp1, ShifterOperand(0)); + __ b(&fail, NE); +} + +void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + + + Primitive::Type field_type = field_info.GetFieldType(); + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. - if (needs_write_barrier) { + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + } else if (generate_volatile) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(Location::RegisterLocation(R2)); + locations->AddTemp(Location::RegisterLocation(R3)); + } } } -void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, obj, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ StoreToOffset(kStoreWord, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicStore(base, offset, + value.AsRegisterPairLow<Register>(), + value.AsRegisterPairHigh<Register>(), + locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + instruction); + } else { + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } break; } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, obj, offset); + DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile && !atomic_ldrd_strd) { + Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); + Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); + + __ vmovrrd(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, offset, + value_reg_lo, + value_reg_hi, + locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(3).AsRegister<Register>(), + instruction); + } else { + __ StoreDToOffset(value_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } break; } @@ -2625,75 +2730,160 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + // Longs and doubles are handled in the switch. + if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } -void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + if (generate_volatile) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { + switch (field_type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicLoad(base, offset, + out.AsRegisterPairLow<Register>(), + out.AsRegisterPairHigh<Register>()); + } else { + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, obj, offset); + DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile && !atomic_ldrd_strd) { + Register lo = locations->GetTemp(0).AsRegister<Register>(); + Register hi = locations->GetTemp(1).AsRegister<Register>(); + GenerateWideAtomicLoad(base, offset, lo, hi); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ vmovdrr(out_reg, lo, hi); + } else { + __ LoadDFromOffset(out_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + // Doubles are handled in the switch. + if (field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } +} + +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { @@ -2705,20 +2895,32 @@ void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { } } -void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ LoadFromOffset(kLoadWord, IP, obj.AsRegister<Register>(), 0); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location obj = locations->InAt(0); - if (obj.IsRegister()) { - __ cmp(obj.AsRegister<Register>(), ShifterOperand(0)); - __ b(slow_path->GetEntryLabel(), EQ); + __ cmp(obj.AsRegister<Register>(), ShifterOperand(0)); + __ b(slow_path->GetEntryLabel(), EQ); +} + +void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); } else { - DCHECK(obj.IsConstant()) << obj; - DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0); - __ b(slow_path->GetEntryLabel()); + GenerateExplicitNullCheck(instruction); } } @@ -2822,14 +3024,39 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { @@ -2913,6 +3140,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); Register temp = locations->GetTemp(0).AsRegister<Register>(); @@ -2942,14 +3170,44 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + } + + break; + } + case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } + + // Ints and objects are handled in the switch. + if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) { @@ -2965,6 +3223,7 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3075,21 +3334,87 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (destination.IsRegister()) { __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex()); + } else if (destination.IsFpuRegister()) { + __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex()); } else { DCHECK(destination.IsStackSlot()); __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } - } else { - DCHECK(source.IsConstant()); - DCHECK(source.GetConstant()->IsIntConstant()); - int32_t value = source.GetConstant()->AsIntConstant()->GetValue(); - if (destination.IsRegister()) { - __ LoadImmediate(destination.AsRegister<Register>(), value); + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>()); } else { DCHECK(destination.IsStackSlot()); - __ LoadImmediate(IP, value); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); + } + } else if (source.IsDoubleStackSlot()) { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } else { + DCHECK(source.IsConstant()) << source; + HInstruction* constant = source.GetConstant(); + if (constant->IsIntConstant()) { + int32_t value = constant->AsIntConstant()->GetValue(); + if (destination.IsRegister()) { + __ LoadImmediate(destination.AsRegister<Register>(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, value); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + if (destination.IsRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value)); + } else { + __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else if (constant->IsDoubleConstant()) { + double value = constant->AsDoubleConstant()->GetValue(); + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (destination.IsFpuRegister()) { + // In the presence of long or double constants, the parallel move resolver will + // split the move into two, but keeps the same constant for both moves. Here, + // we use the low or high part depending on which register this move goes to. + if (destination.reg() % 2 == 0) { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(Low32Bits(int_value))); + } else { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), + bit_cast<float, uint32_t>(High32Bits(int_value))); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadImmediate(IP, High32Bits(int_value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else { + DCHECK(constant->IsFloatConstant()) << constant->DebugName(); + float value = constant->AsFloatConstant()->GetValue(); + if (destination.IsFpuRegister()) { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, bit_cast<int32_t, float>(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } } } } @@ -3128,8 +3453,25 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + __ vmovrs(IP, source.AsFpuRegister<SRegister>()); + __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>()); + __ vmovsr(destination.AsFpuRegister<SRegister>(), IP); + } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { + SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>() + : destination.AsFpuRegister<SRegister>(); + int mem = source.IsFpuRegister() + ? destination.GetStackIndex() + : source.GetStackIndex(); + + __ vmovrs(IP, reg); + __ LoadFromOffset(kLoadWord, IP, SP, mem); + __ StoreToOffset(kStoreWord, IP, SP, mem); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + Exchange(source.GetStackIndex(), destination.GetStackIndex()); + Exchange(source.GetHighStackIndex(kArmWordSize), destination.GetHighStackIndex(kArmWordSize)); } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; } } @@ -3206,146 +3548,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, cls, offset); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, cls, offset); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, cls, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); @@ -3495,7 +3697,9 @@ void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong); + Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), output_overlaps); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 226e635d05..0de6669aa7 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,6 +18,8 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" @@ -32,7 +34,6 @@ class SlowPathCodeARM; static constexpr size_t kArmWordSize = kArmPointerSize; static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 }; -static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr SRegister kParameterFpuRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 }; @@ -46,11 +47,6 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> kParameterFpuRegisters, kParameterFpuRegistersLength) {} - RegisterPair GetRegisterPairAt(size_t argument_index) { - DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); - return kParameterCorePairRegisters[argument_index]; - } - private: DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; @@ -110,6 +106,8 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +136,17 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, + Register value_lo, Register value_hi, + Register temp1, Register temp2, + HInstruction* instruction); + void GenerateWideAtomicLoad(Register addr, uint32_t offset, + Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; @@ -147,7 +156,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -156,11 +167,18 @@ class CodeGeneratorARM : public CodeGenerator { void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t GetWordSize() const OVERRIDE { return kArmWordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in S registers, which are word sized. + return kArmWordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { @@ -221,6 +239,14 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -228,6 +254,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; + const ArmInstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c7517d3abc..271eb82ee6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -17,10 +17,12 @@ #include "code_generator_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" +#include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" @@ -38,7 +40,9 @@ namespace art { namespace arm64 { -static constexpr bool kExplicitStackOverflowCheck = false; +// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. +// For now we prefer the use of load-acquire, store-release over explicit memory barriers. +static constexpr bool kUseAcquireRelease = true; static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; @@ -233,8 +237,9 @@ Location ARM64ReturnLocation(Primitive::Type return_type) { static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const FPRegister kRuntimeParameterFpuRegisters[] = { }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> { public: @@ -294,6 +299,7 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { length_location_, LocationFrom(calling_convention.GetRegisterAt(1))); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } private: @@ -313,6 +319,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } private: @@ -343,6 +350,11 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -386,10 +398,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0).W()); - __ Mov(calling_convention.GetRegisterAt(1).W(), instruction_->GetStringIndex()); + arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); + __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); @@ -412,6 +425,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } private: @@ -428,6 +442,7 @@ class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0); + CheckEntrypointTypes<kQuickThrowStackOverflow, void, void*>(); } private: @@ -446,6 +461,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { codegen->SaveLiveRegisters(instruction_->GetLocations()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); codegen->RestoreLiveRegisters(instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); @@ -502,9 +518,12 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } codegen->RestoreLiveRegisters(locations); @@ -543,11 +562,12 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return next_location; } -CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph) +CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, - kNumberOfAllocatableRegisterPairs), + kNumberOfAllocatableRegisterPairs, + compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -585,17 +605,17 @@ void CodeGeneratorARM64::GenerateFrameEntry() { if (do_overflow_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); - if (kExplicitStackOverflowCheck) { + if (GetCompilerOptions().GetImplicitStackOverflowChecks()) { + __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Ldr(wzr, MemOperand(temp, 0)); + RecordPcInfo(nullptr, 0); + } else { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64(); AddSlowPath(slow_path); __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value())); __ Cmp(sp, temp); __ B(lo, slow_path->GetEntryLabel()); - } else { - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); - __ Ldr(wzr, MemOperand(temp, 0)); - RecordPcInfo(nullptr, 0); } } @@ -949,8 +969,8 @@ void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { } void CodeGeneratorARM64::Load(Primitive::Type type, - vixl::CPURegister dst, - const vixl::MemOperand& src) { + CPURegister dst, + const MemOperand& src) { switch (type) { case Primitive::kPrimBoolean: __ Ldrb(Register(dst), src); @@ -969,7 +989,7 @@ void CodeGeneratorARM64::Load(Primitive::Type type, case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(dst.Is64Bits() == Is64BitType(type)); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); __ Ldr(dst, src); break; case Primitive::kPrimVoid: @@ -977,31 +997,130 @@ void CodeGeneratorARM64::Load(Primitive::Type type, } } +void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, + CPURegister dst, + const MemOperand& src) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + Primitive::Type type = instruction->GetType(); + + DCHECK(!src.IsRegisterOffset()); + DCHECK(!src.IsPreIndex()); + DCHECK(!src.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp_base, src.base(), src.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + __ Ldarb(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); + break; + case Primitive::kPrimByte: + __ Ldarb(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimChar: + __ Ldarh(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); + break; + case Primitive::kPrimShort: + __ Ldarh(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + __ Ldar(Register(dst), base); + MaybeRecordImplicitNullCheck(instruction); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(dst.IsFPRegister()); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + + Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Ldar(temp, base); + MaybeRecordImplicitNullCheck(instruction); + __ Fmov(FPRegister(dst), temp); + break; + } + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + void CodeGeneratorARM64::Store(Primitive::Type type, - vixl::CPURegister rt, - const vixl::MemOperand& dst) { + CPURegister src, + const MemOperand& dst) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - __ Strb(Register(rt), dst); + __ Strb(Register(src), dst); break; case Primitive::kPrimChar: case Primitive::kPrimShort: - __ Strh(Register(rt), dst); + __ Strh(Register(src), dst); break; case Primitive::kPrimInt: case Primitive::kPrimNot: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(rt.Is64Bits() == Is64BitType(type)); - __ Str(rt, dst); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Str(src, dst); break; case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << type; } } +void CodeGeneratorARM64::StoreRelease(Primitive::Type type, + CPURegister src, + const MemOperand& dst) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + + DCHECK(!dst.IsRegisterOffset()); + DCHECK(!dst.IsPreIndex()); + DCHECK(!dst.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle this. + __ Add(temp_base, dst.base(), dst.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + __ Stlrb(Register(src), base); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + __ Stlrh(Register(src), base); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Stlr(Register(src), base); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(src.IsFPRegister()); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + + Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Fmov(temp, FPRegister(src)); + __ Stlr(temp, base); + break; + } + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); @@ -1026,14 +1145,47 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod vixl::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset())); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); + size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + // Even if the initialized flag is set, we need to ensure consistent memory ordering. - __ Dmb(InnerShareable, BarrierReads); + if (kUseAcquireRelease) { + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp, class_reg, status_offset); + __ Ldar(temp, HeapOperand(temp)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + } else { + __ Ldr(temp, HeapOperand(class_reg, status_offset)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + __ Dmb(InnerShareable, BarrierReads); + } __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { + BarrierType type = BarrierAll; + + switch (kind) { + case MemBarrierKind::kAnyAny: + case MemBarrierKind::kAnyStore: { + type = BarrierAll; + break; + } + case MemBarrierKind::kLoadAny: { + type = BarrierReads; + break; + } + case MemBarrierKind::kStoreStore: { + type = BarrierWrites; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ Dmb(InnerShareable, type); +} + void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = @@ -1254,6 +1406,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { @@ -1265,6 +1418,7 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset())); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { @@ -1288,7 +1442,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); if (value_type == Primitive::kPrimNot) { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc()); - + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0); @@ -1309,6 +1463,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1660,28 +1815,60 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - Primitive::Type field_type = instruction->GetFieldType(); - CPURegister value = InputCPURegisterAt(instruction, 1); Register obj = InputRegisterAt(instruction, 0); - codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset())); - if (field_type == Primitive::kPrimNot) { + CPURegister value = InputCPURegisterAt(instruction, 1); + Offset offset = instruction->GetFieldOffset(); + Primitive::Type field_type = instruction->GetFieldType(); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(obj, Register(value)); } } @@ -1692,7 +1879,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), true); // The output does overlap inputs. + // The output does overlap inputs. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { @@ -1782,6 +1970,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok } else { __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, HeapOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -1805,7 +1994,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir // Make sure that ArtMethod* is passed in W0 as per the calling convention DCHECK(temp.Is(w0)); size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() + - invoke->GetIndexInDexCache() * kHeapRefSize; + invoke->GetDexMethodIndex() * kHeapRefSize; // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -1847,6 +2036,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { DCHECK(receiver.IsRegister()); __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, HeapOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -1959,6 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -2044,9 +2235,11 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); locations->SetOut(LocationFrom(x0)); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, + void*, uint32_t, int32_t, mirror::ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -2055,11 +2248,13 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); - DCHECK(current_method.Is(w1)); + DCHECK(current_method.Is(w2)); codegen_->LoadCurrentMethod(current_method); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, + void*, uint32_t, int32_t, mirror::ArtMethod*>(); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2069,6 +2264,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2081,6 +2277,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocObjectWithAccessCheck), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { @@ -2114,18 +2311,31 @@ void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { } } -void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location obj = locations->InAt(0); - if (obj.IsRegister()) { - __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); + + __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); +} + +void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); } else { - DCHECK(obj.IsConstant()) << obj; - DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0); - __ B(slow_path->GetEntryLabel()); + GenerateExplicitNullCheck(instruction); } } @@ -2175,9 +2385,12 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) { } void LocationsBuilderARM64::VisitRem(HRem* rem) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); - switch (rem->GetResultType()) { + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); @@ -2185,13 +2398,24 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + + break; + } + default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: { @@ -2206,6 +2430,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { break; } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) + : QUICK_ENTRY_POINT(pFmod); + codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc()); + break; + } + default: LOG(FATAL) << "Unexpected rem type " << type; } @@ -2294,7 +2526,19 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + } } void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { @@ -2305,13 +2549,24 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - CPURegister value = InputCPURegisterAt(instruction, 1); Register cls = InputRegisterAt(instruction, 0); + CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); - codegen_->Store(field_type, value, HeapOperand(cls, offset)); - if (field_type == Primitive::kPrimNot) { + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(cls, Register(value)); } } @@ -2353,6 +2608,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1d5bfb734e..27c6fbdbf4 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,8 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -108,9 +110,12 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); + void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); void HandleShift(HBinaryOperation* instr); + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -162,7 +167,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { class CodeGeneratorARM64 : public CodeGenerator { public: - explicit CodeGeneratorARM64(HGraph* graph); + CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM64() {} void GenerateFrameEntry() OVERRIDE; @@ -189,6 +194,11 @@ class CodeGeneratorARM64 : public CodeGenerator { return kArm64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in D registers, which are word sized. + return kArm64WordSize; + } + uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { vixl::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); @@ -257,12 +267,18 @@ class CodeGeneratorARM64 : public CodeGenerator { void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); + void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); + void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; } + bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return false; + } + private: // Labels for each block that will be compiled. vixl::Label* block_labels_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e7edd8a805..ac6fdbcfe9 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -31,8 +31,6 @@ namespace art { namespace x86 { -static constexpr bool kExplicitStackOverflowCheck = false; - static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; @@ -42,6 +40,8 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr int kC2ConditionMask = 0x400; + // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; @@ -215,8 +215,8 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0)); - __ movl(calling_convention.GetRegisterAt(1), Immediate(instruction_->GetStringIndex())); + x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); @@ -373,8 +373,9 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kX86WordSize; } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, kNumberOfRegisterPairs), +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) + : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, + kNumberOfRegisterPairs, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -469,7 +470,9 @@ void CodeGeneratorX86::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); - if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks(); + + if (!skip_overflow_check && implicitStackOverflowChecks) { __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); RecordPcInfo(nullptr, 0); } @@ -477,7 +480,7 @@ void CodeGeneratorX86::GenerateFrameEntry() { // The return PC has already been pushed on the stack. __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); - if (!skip_overflow_check && kExplicitStackOverflowCheck) { + if (!skip_overflow_check && !implicitStackOverflowChecks) { SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86(); AddSlowPath(slow_path); @@ -643,9 +646,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { DCHECK(source.IsDoubleStackSlot()); EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)), Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); + __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex())); } } else if (destination.IsFpuRegister()) { if (source.IsDoubleStackSlot()) { @@ -1134,7 +1138,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec // temp = temp->dex_cache_resolved_methods_; __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); // (temp + offset_of_quick_compiled_code)() __ call(Address( temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); @@ -1198,6 +1202,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } else { __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1234,6 +1239,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) } else { __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -2074,6 +2080,81 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move32(stack_temp, source); + __ flds(Address(ESP, temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move64(stack_temp, source); + __ fldl(Address(ESP, temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subl(ESP, Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(EAX, Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(ESP, 0)); + } else { + __ fstl(Address(ESP, 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } + + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2207,10 +2288,8 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = type == Primitive::kPrimInt - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: { @@ -2229,24 +2308,12 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } + case Primitive::kPrimDouble: case Primitive::kPrimFloat: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX, ECX). - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); - break; - } - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX). - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(EAX)); break; } @@ -2263,14 +2330,9 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: @@ -2503,13 +2565,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); __ fs()->call( @@ -2656,82 +2718,117 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } +} + + +void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); + __ movl(temp, object); + __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), + X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ Bind(&is_null); +} + +void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { + // Long values can be loaded atomically into an XMM using movsd. + // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM + // and then copy the XMM into the output 32bits at a time). + locations->AddTemp(Location::RequiresFpuRegister()); } } -void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(obj, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + if (is_volatile) { + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movd(out.AsRegisterPairLow<Register>(), temp); + __ psrlq(temp, Immediate(32)); + __ movd(out.AsRegisterPairHigh<Register>(), temp); + } else { + __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); + } break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2739,99 +2836,190 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } -} -void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { - Label is_null; - __ testl(value, value); - __ j(kEqual, &is_null); - __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); - __ movl(temp, object); - __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); - __ movb(Address(temp, card, TIMES_1, 0), - X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - __ Bind(&is_null); + // Longs are handled in the switch. + if (field_type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_byte_type = (field_type == Primitive::kPrimBoolean) + || (field_type == Primitive::kPrimByte); + + // The register allocator does not support multiple + // inputs that die at entry with one in a specific register. + if (is_byte_type) { + // Ensure the value is in a byte register. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // Temporary registers for the write barrier. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. + // We need two XMM registers because there's no easier way to (bit) copy a register pair + // into a single XMM register (we copy each pair part into the XMMs and then interleave them). + // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the + // isolated cases when we need this it isn't worth adding the extra complexity. + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } } -void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<Register>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<Register>()); break; } case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(obj, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(obj, kX86WordSize + offset)); + if (is_volatile) { + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, value.AsRegisterPairLow<Register>()); + __ movd(temp2, value.AsRegisterPairHigh<Register>()); + __ punpckldq(temp1, temp2); + __ movsd(Address(base, offset), temp1); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + } break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + // Longs are handled in the switch. + if (field_type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::Any()); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() + ? Location::RequiresRegister() + : Location::Any(); + locations->SetInAt(0, loc); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } } -void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + + __ testl(EAX, Address(obj.AsRegister<Register>(), 0)); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); @@ -2851,6 +3039,14 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { __ j(kEqual, slow_path->GetEntryLabel()); } +void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); + } else { + GenerateExplicitNullCheck(instruction); + } +} + void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2864,7 +3060,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); - switch (instruction->GetType()) { + Primitive::Type type = instruction->GetType(); + switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); @@ -2932,10 +3129,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); } else { __ movl(out.AsRegisterPairLow<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); } @@ -2944,12 +3143,16 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + LOG(FATAL) << "Unimplemented register type " << type; UNREACHABLE(); case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } + + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { @@ -3026,6 +3229,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -3049,6 +3253,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -3077,6 +3282,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { Register temp = locations->GetTemp(0).AsRegister<Register>(); @@ -3098,17 +3304,20 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; if (value.IsRegisterPair()) { __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { DCHECK(value.IsConstant()); int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); __ movl(Address(obj, offset), Immediate(Low32Bits(val))); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val))); } } else { if (value.IsRegisterPair()) { __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { @@ -3116,6 +3325,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), Immediate(Low32Bits(val))); + codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), Immediate(High32Bits(val))); } @@ -3146,6 +3356,7 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3264,7 +3475,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movl(Address(ESP, destination.GetStackIndex()), imm); } } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source; } } @@ -3383,159 +3594,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } -void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(cls, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(cls, kX86WordSize + offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } -} - -void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(cls, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(cls, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(cls, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index aed06c04df..a9086f8876 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -18,6 +18,8 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" @@ -105,6 +107,8 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); void HandleShift(HBinaryOperation* instruction); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -133,10 +137,19 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); + + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -146,7 +159,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - explicit CodeGeneratorX86(HGraph* graph); + CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -160,6 +173,11 @@ class CodeGeneratorX86 : public CodeGenerator { return kX86WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // 8 bytes == 2 words for each spill. + return 2 * kX86WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { @@ -216,6 +234,10 @@ class CodeGeneratorX86 : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimLong; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff7fcdcbac..350392fbf4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -18,6 +18,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_x86_64.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -32,8 +34,6 @@ namespace art { namespace x86_64 { -static constexpr bool kExplicitStackOverflowCheck = false; - // Some x86_64 instructions require a register to be available as temp. static constexpr Register TMP = R11; @@ -47,6 +47,8 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr int kC2ConditionMask = 0x400; + class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeRuntimeCallingConvention() @@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> -class SlowPathCodeX86_64 : public SlowPathCode { - public: - SlowPathCodeX86_64() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); -}; - class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} @@ -285,8 +273,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(0))); - __ movl(CpuRegister(calling_convention.GetRegisterAt(1)), + x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); __ gs()->call(Address::Absolute( QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true)); @@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) { return kEqual; } +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + CpuRegister temp) { + // All registers are assumed to be correctly set up. + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + + DCHECK(!IsLeafMethod()); + RecordPcInfo(invoke, invoke->GetDexPc()); +} + void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); } @@ -403,8 +416,8 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin return kX86_64WordSize; } -CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) - : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0), +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) + : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -474,8 +487,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); + bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks(); - if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + if (!skip_overflow_check && implicitStackOverflowChecks) { __ testq(CpuRegister(RAX), Address( CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); RecordPcInfo(nullptr, 0); @@ -485,7 +499,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); - if (!skip_overflow_check && kExplicitStackOverflowCheck) { + if (!skip_overflow_check && !implicitStackOverflowChecks) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64(); AddSlowPath(slow_path); @@ -571,8 +585,18 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int32_t value; + if (constant->IsFloatConstant()) { + value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } else { + DCHECK(constant->IsIntConstant()); + value = constant->AsIntConstant()->GetValue(); + } + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { - DCHECK(source.IsStackSlot()); + DCHECK(source.IsStackSlot()) << source; __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } @@ -584,6 +608,17 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value = constant->AsLongConstant()->GetValue(); + if (constant->IsDoubleConstant()) { + value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } else { + DCHECK(constant->IsLongConstant()); + value = constant->AsLongConstant()->GetValue(); + } + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -1123,30 +1158,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } -void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorX86_64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); +void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->GenerateStaticOrDirectCall( + invoke, + invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -1182,10 +1218,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); @@ -1199,6 +1244,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } else { __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1235,6 +1281,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo } else { __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } + codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1978,6 +2025,81 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ flds(Address(CpuRegister(RSP), temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ fldl(Address(CpuRegister(RSP), temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(CpuRegister(RSP), 0)); + } else { + __ fstl(Address(CpuRegister(RSP), 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } + + // And remove the temporary stack space we allocated. + __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2077,11 +2199,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { void LocationsBuilderX86_64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: @@ -2095,11 +2214,10 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(RAX)); break; } @@ -2116,14 +2234,9 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: @@ -2311,14 +2424,14 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(RAX)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); __ gs()->call(Address::Absolute( @@ -2389,69 +2502,87 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } -void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barier " << kind; + } +} + +void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(obj, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(obj, offset), value); + __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2459,86 +2590,155 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + codegen_->MaybeRecordImplicitNullCheck(instruction); + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1)); + locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetInAt(1, Location::RequiresRegister()); + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(obj, offset)); + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + codegen_->MaybeRecordImplicitNullCheck(instruction); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); + } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::Any()); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() + ? Location::RequiresRegister() + : Location::Any(); + locations->SetInAt(0, loc); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } } -void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { +void InstructionCodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (codegen_->CanMoveNullCheckToUser(instruction)) { + return; + } + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + + __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); @@ -2558,6 +2758,14 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { __ j(kEqual, slow_path->GetEntryLabel()); } +void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { + if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) { + GenerateImplicitNullCheck(instruction); + } else { + GenerateExplicitNullCheck(instruction); + } +} + void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2675,6 +2883,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { @@ -2743,6 +2952,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2769,6 +2979,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2797,7 +3008,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } - + codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); @@ -2825,6 +3036,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), value.AsRegister<CpuRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2839,6 +3051,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), value.AsFpuRegister<XmmRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2853,6 +3066,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), value.AsFpuRegister<XmmRegister>()); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -2875,6 +3089,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3222,146 +3437,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); } -void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(cls, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(cls, offset), value); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 794b81ffbc..ead771a1f2 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,6 +18,8 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" @@ -35,6 +37,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); +static constexpr bool kCoalescedImplicitNullCheck = false; + class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeDexCallingConvention() : CallingConvention( @@ -66,7 +70,20 @@ class InvokeDexCallingConventionVisitor { }; class CodeGeneratorX86_64; -class SlowPathCodeX86_64; + +class SlowPathCodeX86_64 : public SlowPathCode { + public: + SlowPathCodeX86_64() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); +}; class ParallelMoveResolverX86_64 : public ParallelMoveResolver { public: @@ -109,6 +126,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -136,8 +155,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); + void GenerateRemFP(HRem *rem); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateImplicitNullCheck(HNullCheck* instruction); + void GenerateExplicitNullCheck(HNullCheck* instruction); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -147,7 +174,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { class CodeGeneratorX86_64 : public CodeGenerator { public: - explicit CodeGeneratorX86_64(HGraph* graph); + CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86_64() {} void GenerateFrameEntry() OVERRIDE; @@ -163,6 +190,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + return kX86_64WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { @@ -212,6 +243,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return false; + } + + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8b75cc7c65..aa4fc8f611 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -17,6 +17,7 @@ #include <functional> #include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -26,6 +27,7 @@ #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -79,7 +81,8 @@ template <typename Expected> static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { InternalCodeAllocator allocator; - x86::CodeGeneratorX86 codegenX86(graph); + CompilerOptions compiler_options; + x86::CodeGeneratorX86 codegenX86(graph, compiler_options); // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); @@ -87,19 +90,21 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - arm::CodeGeneratorARM codegenARM(graph); + std::unique_ptr<const ArmInstructionSetFeatures> features( + ArmInstructionSetFeatures::FromCppDefines()); + arm::CodeGeneratorARM codegenARM(graph, *features.get(), compiler_options); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); } - x86_64::CodeGeneratorX86_64 codegenX86_64(graph); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); codegenX86_64.CompileBaseline(&allocator, true); if (kRuntimeISA == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); } - arm64::CodeGeneratorARM64 codegenARM64(graph); + arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); codegenARM64.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm64) { Run(allocator, codegenARM64, has_result, expected); @@ -129,17 +134,20 @@ static void RunCodeOptimized(HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { + CompilerOptions compiler_options; if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, + *ArmInstructionSetFeatures::FromCppDefines(), + compiler_options); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { - arm64::CodeGeneratorARM64 codegenARM64(graph); + arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph); + x86::CodeGeneratorX86 codegenX86(graph, compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { - x86_64::CodeGeneratorX86_64 codegenX86_64(graph); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } } diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index cad6683577..6ceccfbf0e 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -45,11 +46,11 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); HConstantFolding(graph).Run(); - SSAChecker ssa_checker(&allocator, graph); - ssa_checker.Run(); - ASSERT_TRUE(ssa_checker.IsValid()); + SSAChecker ssa_checker_cf(&allocator, graph); + ssa_checker_cf.Run(); + ASSERT_TRUE(ssa_checker_cf.IsValid()); StringPrettyPrinter printer_after_cf(graph); printer_after_cf.VisitInsertionOrder(); @@ -59,8 +60,9 @@ static void TestCode(const uint16_t* data, check_after_cf(graph); HDeadCodeElimination(graph).Run(); - ssa_checker.Run(); - ASSERT_TRUE(ssa_checker.IsValid()); + SSAChecker ssa_checker_dce(&allocator, graph); + ssa_checker_dce.Run(); + ASSERT_TRUE(ssa_checker_dce.IsValid()); StringPrettyPrinter printer_after_dce(graph); printer_after_dce.VisitInsertionOrder(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 3dbd04e250..a644719622 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" #include "dead_code_elimination.h" +#include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -39,7 +40,7 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 5d712feb2b..291b14cb52 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -16,9 +16,9 @@ #include "graph_checker.h" -#include <string> #include <map> #include <sstream> +#include <string> #include "base/bit_vector-inl.h" @@ -123,6 +123,14 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } void GraphChecker::VisitInstruction(HInstruction* instruction) { + if (seen_ids_.IsBitSet(instruction->GetId())) { + std::stringstream error; + error << "Duplicate id in graph " << instruction->GetId() << "."; + errors_.push_back(error.str()); + } else { + seen_ids_.SetBit(instruction->GetId()); + } + // Ensure `instruction` is associated with `current_block_`. if (instruction->GetBlock() != current_block_) { std::stringstream error; @@ -355,9 +363,29 @@ static Primitive::Type PrimitiveKind(Primitive::Type type) { } } +void SSAChecker::VisitIf(HIf* instruction) { + VisitInstruction(instruction); + HInstruction* input = instruction->InputAt(0); + if (input->IsIntConstant()) { + int value = input->AsIntConstant()->GetValue(); + if (value != 0 && value != 1) { + std::stringstream error; + error << "If instruction " << instruction->GetId() + << " has a non-boolean constant input whose value is: " + << value << "."; + errors_.push_back(error.str()); + } + } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) { + std::stringstream error; + error << "If instruction " << instruction->GetId() + << " has a non-boolean input type: " + << instruction->InputAt(0)->GetType() << "."; + errors_.push_back(error.str()); + } +} + void SSAChecker::VisitCondition(HCondition* op) { VisitInstruction(op); - // TODO: check inputs types, and special case the `null` check. if (op->GetType() != Primitive::kPrimBoolean) { std::stringstream error; error << "Condition " << op->DebugName() << " " << op->GetId() @@ -365,6 +393,46 @@ void SSAChecker::VisitCondition(HCondition* op) { << op->GetType() << "."; errors_.push_back(error.str()); } + HInstruction* lhs = op->InputAt(0); + HInstruction* rhs = op->InputAt(1); + if (lhs->GetType() == Primitive::kPrimNot) { + if (!op->IsEqual() && !op->IsNotEqual()) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " uses an object as left-hand side input."; + errors_.push_back(error.str()); + } + if (rhs->IsIntConstant() && rhs->AsIntConstant()->GetValue() != 0) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " compares an object with a non-0 integer: " + << rhs->AsIntConstant()->GetValue() + << "."; + errors_.push_back(error.str()); + } + } else if (rhs->GetType() == Primitive::kPrimNot) { + if (!op->IsEqual() && !op->IsNotEqual()) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " uses an object as right-hand side input."; + errors_.push_back(error.str()); + } + if (lhs->IsIntConstant() && lhs->AsIntConstant()->GetValue() != 0) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " compares a non-0 integer with an object: " + << lhs->AsIntConstant()->GetValue() + << "."; + errors_.push_back(error.str()); + } + } else if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " has inputs of different type: " + << lhs->GetType() << ", and " << rhs->GetType() + << "."; + errors_.push_back(error.str()); + } } void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index b6c9f1720c..ae1557b57c 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -30,7 +30,8 @@ class GraphChecker : public HGraphDelegateVisitor { const char* dump_prefix = "art::GraphChecker: ") : HGraphDelegateVisitor(graph), allocator_(allocator), - dump_prefix_(dump_prefix) {} + dump_prefix_(dump_prefix), + seen_ids_(allocator, graph->GetCurrentInstructionId(), false) {} // Check the whole graph (in insertion order). virtual void Run() { VisitInsertionOrder(); } @@ -68,6 +69,7 @@ class GraphChecker : public HGraphDelegateVisitor { private: // String displayed before dumped errors. const char* const dump_prefix_; + ArenaBitVector seen_ids_; DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; @@ -99,6 +101,7 @@ class SSAChecker : public GraphChecker { void VisitPhi(HPhi* phi) OVERRIDE; void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; void VisitCondition(HCondition* op) OVERRIDE; + void VisitIf(HIf* instruction) OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(SSAChecker); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5d1703e237..df21c8e9c3 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -30,10 +30,12 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HGraphVisualizerPrinter(HGraph* graph, std::ostream& output, const char* pass_name, + bool is_after_pass, const CodeGenerator& codegen) : HGraphVisitor(graph), output_(output), pass_name_(pass_name), + is_after_pass_(is_after_pass), codegen_(codegen), indent_(0) {} @@ -136,14 +138,21 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << "invalid"; } else if (location.IsStackSlot()) { output_ << location.GetStackIndex() << "(sp)"; + } else if (location.IsFpuRegisterPair()) { + codegen_.DumpFloatingPointRegister(output_, location.low()); + output_ << " and "; + codegen_.DumpFloatingPointRegister(output_, location.high()); + } else if (location.IsRegisterPair()) { + codegen_.DumpCoreRegister(output_, location.low()); + output_ << " and "; + codegen_.DumpCoreRegister(output_, location.high()); } else { DCHECK(location.IsDoubleStackSlot()); output_ << "2x" << location.GetStackIndex() << "(sp)"; } } - void VisitParallelMove(HParallelMove* instruction) { - output_ << instruction->DebugName(); + void VisitParallelMove(HParallelMove* instruction) OVERRIDE { output_ << " ("; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { MoveOperands* move = instruction->MoveOperandsAt(i); @@ -158,8 +167,25 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; } - void VisitInstruction(HInstruction* instruction) { + void VisitIntConstant(HIntConstant* instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitLongConstant(HLongConstant* instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); + instruction->Accept(this); if (instruction->InputCount() > 0) { output_ << " [ "; for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) { @@ -201,19 +227,20 @@ class HGraphVisualizerPrinter : public HGraphVisitor { int bci = 0; output_ << bci << " " << instruction->NumberOfUses() << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; - instruction->Accept(this); + PrintInstruction(instruction); output_ << kEndInstructionMarker << std::endl; } } void Run() { StartTag("cfg"); - PrintProperty("name", pass_name_); + std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)"); + PrintProperty("name", pass_desc.c_str()); VisitInsertionOrder(); EndTag("cfg"); } - void VisitBasicBlock(HBasicBlock* block) { + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { StartTag("block"); PrintProperty("name", "B", block->GetBlockId()); if (block->GetLifetimeStart() != kNoLifetime) { @@ -259,6 +286,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { private: std::ostream& output_; const char* pass_name_; + const bool is_after_pass_; const CodeGenerator& codegen_; size_t indent_; @@ -270,7 +298,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, const char* string_filter, const CodeGenerator& codegen, const char* method_name) - : output_(output), graph_(graph), codegen_(codegen), is_enabled_(false) { + : output_(output), graph_(graph), codegen_(codegen), is_enabled_(false) { if (output == nullptr) { return; } @@ -279,7 +307,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph, *output_, "", codegen_); + HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); printer.PrintProperty("method", method_name); @@ -287,12 +315,11 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, printer.EndTag("compilation"); } -void HGraphVisualizer::DumpGraph(const char* pass_name) const { - if (!is_enabled_) { - return; +void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const { + if (is_enabled_) { + HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_); + printer.Run(); } - HGraphVisualizerPrinter printer(graph_, *output_, pass_name, codegen_); - printer.Run(); } } // namespace art diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index b5baed9c99..b90d15e1ff 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -32,28 +32,18 @@ static const char* kLivenessPassName = "liveness"; static const char* kRegisterAllocatorPassName = "register"; /** - * If enabled, emits compilation information suitable for the c1visualizer tool - * and IRHydra. - * Currently only works if the compiler is single threaded. + * This class outputs the HGraph in the C1visualizer format. + * Note: Currently only works if the compiler is single threaded. */ class HGraphVisualizer : public ValueObject { public: - /** - * If output is not null, and the method name of the dex compilation - * unit contains `string_filter`, the compilation information will be - * emitted. - */ HGraphVisualizer(std::ostream* output, HGraph* graph, const char* string_filter, const CodeGenerator& codegen, const char* method_name); - /** - * If this visualizer is enabled, emit the compilation information - * in `output_`. - */ - void DumpGraph(const char* pass_name) const; + void DumpGraph(const char* pass_name, bool is_after_pass = true) const; private: std::ostream* const output_; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 94ff192264..48f1ea9e15 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -40,18 +40,22 @@ TEST(GVNTest, LocalFieldElimination) { entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* to_remove = block->GetLastInstruction(); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(43), false)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. block->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* use_after_kill = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HExit()); @@ -82,7 +86,8 @@ TEST(GVNTest, GlobalFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); HBasicBlock* then = new (&allocator) HBasicBlock(graph); @@ -98,13 +103,16 @@ TEST(GVNTest, GlobalFieldElimination) { else_->AddSuccessor(join); then->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); then->AddInstruction(new (&allocator) HGoto()); else_->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); else_->AddInstruction(new (&allocator) HGoto()); join->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); join->AddInstruction(new (&allocator) HExit()); graph->TryBuildingSsa(); @@ -132,7 +140,8 @@ TEST(GVNTest, LoopFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HGoto()); HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); @@ -148,22 +157,25 @@ TEST(GVNTest, LoopFieldElimination) { loop_body->AddSuccessor(loop_header); loop_header->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); // Kill inside the loop body to prevent field gets inside the loop header // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); HInstruction* field_set = loop_body->GetLastInstruction(); loop_body->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HGoto()); exit->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); exit->AddInstruction(new (&allocator) HExit()); @@ -242,7 +254,7 @@ TEST(GVNTest, LoopSideEffects) { { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); GlobalValueNumberer gvn(&allocator, graph); gvn.Run(); @@ -256,7 +268,7 @@ TEST(GVNTest, LoopSideEffects) { { outer_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), outer_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); @@ -273,7 +285,7 @@ TEST(GVNTest, LoopSideEffects) { outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); inner_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), inner_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 1de5b78121..532167c179 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -27,6 +27,7 @@ #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" +#include "register_allocator.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -43,10 +44,10 @@ void HInliner::Run() { instr_it.Advance()) { HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect(); if (current != nullptr) { - if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) { + if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) { if (kIsDebugBuild) { std::string callee_name = - PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile()); + PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); bool should_inline = callee_name.find("$inline$") != std::string::npos; CHECK(!should_inline) << "Could not inline " << callee_name; } @@ -143,6 +144,13 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, + compiler_driver_->GetInstructionSet())) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) + << " cannot be inlined because of the register allocator"; + return false; + } + if (!callee_graph->TryBuildingSsa()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) << " could not be transformed to SSA"; @@ -200,6 +208,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, } callee_graph->InlineInto(graph_, invoke_instruction); + + // Now that we have inlined the callee, we need to update the next + // instruction id of the caller, so that new instructions added + // after optimizations get a unique id. + graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId()); VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file); outer_stats_->RecordStat(kInlinedInvoke); return true; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc new file mode 100644 index 0000000000..fe0e7f2eb2 --- /dev/null +++ b/compiler/optimizing/intrinsics.cc @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics.h" + +#include "dex/quick/dex_file_method_inliner.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "driver/compiler_driver.h" +#include "invoke_type.h" +#include "nodes.h" +#include "quick/inline_method_analyser.h" + +namespace art { + +// Function that returns whether an intrinsic is static/direct or virtual. +static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kInterface; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + return IsStatic; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kInterface; +} + + + +static Primitive::Type GetType(uint64_t data, bool is_op_size) { + if (is_op_size) { + switch (static_cast<OpSize>(data)) { + case kSignedByte: + return Primitive::Type::kPrimByte; + case kSignedHalf: + return Primitive::Type::kPrimShort; + case k32: + return Primitive::Type::kPrimInt; + case k64: + return Primitive::Type::kPrimLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << data; + UNREACHABLE(); + } + } else { + if ((data & kIntrinsicFlagIsLong) != 0) { + return Primitive::Type::kPrimLong; + } + if ((data & kIntrinsicFlagIsObject) != 0) { + return Primitive::Type::kPrimNot; + } + return Primitive::Type::kPrimInt; + } +} + +static Intrinsics GetIntrinsic(InlineMethod method) { + switch (method.opcode) { + // Floating-point conversions. + case kIntrinsicDoubleCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble; + case kIntrinsicFloatCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat; + + // Bit manipulations. + case kIntrinsicReverseBits: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverse; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverse; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicReverseBytes: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimShort: + return Intrinsics::kShortReverseBytes; + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverseBytes; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverseBytes; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Abs. + case kIntrinsicAbsDouble: + return Intrinsics::kMathAbsDouble; + case kIntrinsicAbsFloat: + return Intrinsics::kMathAbsFloat; + case kIntrinsicAbsInt: + return Intrinsics::kMathAbsInt; + case kIntrinsicAbsLong: + return Intrinsics::kMathAbsLong; + + // Min/max. + case kIntrinsicMinMaxDouble: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble; + case kIntrinsicMinMaxFloat: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat; + case kIntrinsicMinMaxInt: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt; + case kIntrinsicMinMaxLong: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; + + // Misc math. + case kIntrinsicSqrt: + return Intrinsics::kMathSqrt; + case kIntrinsicCeil: + return Intrinsics::kMathCeil; + case kIntrinsicFloor: + return Intrinsics::kMathFloor; + case kIntrinsicRint: + return Intrinsics::kMathRint; + case kIntrinsicRoundDouble: + return Intrinsics::kMathRoundDouble; + case kIntrinsicRoundFloat: + return Intrinsics::kMathRoundFloat; + + // System.arraycopy. + case kIntrinsicSystemArrayCopyCharArray: + return Intrinsics::kSystemArrayCopyChar; + + // Thread.currentThread. + case kIntrinsicCurrentThread: + return Intrinsics::kThreadCurrentThread; + + // Memory.peek. + case kIntrinsicPeek: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPeekByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPeekShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPeekIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPeekLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Memory.poke. + case kIntrinsicPoke: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPokeByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPokeShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPokeIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPokeLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // String. + case kIntrinsicCharAt: + return Intrinsics::kStringCharAt; + case kIntrinsicCompareTo: + return Intrinsics::kStringCompareTo; + case kIntrinsicIsEmptyOrLength: + return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? + Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; + case kIntrinsicIndexOf: + return ((method.d.data & kIntrinsicFlagBase0) == 0) ? + Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; + + case kIntrinsicCas: + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimNot: + return Intrinsics::kUnsafeCASObject; + case Primitive::Type::kPrimInt: + return Intrinsics::kUnsafeCASInt; + case Primitive::Type::kPrimLong: + return Intrinsics::kUnsafeCASLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicUnsafeGet: { + const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet; + case Primitive::Type::kPrimLong: + return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + case kIntrinsicUnsafePut: { + enum Sync { kNoSync, kVolatile, kOrdered }; + const Sync sync = + ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile : + ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered : + kNoSync; + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePut; + case kVolatile: + return Intrinsics::kUnsafePutVolatile; + case kOrdered: + return Intrinsics::kUnsafePutOrdered; + } + break; + case Primitive::Type::kPrimLong: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutLong; + case kVolatile: + return Intrinsics::kUnsafePutLongVolatile; + case kOrdered: + return Intrinsics::kUnsafePutLongOrdered; + } + break; + case Primitive::Type::kPrimNot: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutObject; + case kVolatile: + return Intrinsics::kUnsafePutObjectVolatile; + case kOrdered: + return Intrinsics::kUnsafePutObjectOrdered; + } + break; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + + // Virtual cases. + + case kIntrinsicReferenceGetReferent: + return Intrinsics::kReferenceGetReferent; + + // Quick inliner cases. Remove after refactoring. They are here so that we can use the + // compiler to warn on missing cases. + + case kInlineOpNop: + case kInlineOpReturnArg: + case kInlineOpNonWideConst: + case kInlineOpIGet: + case kInlineOpIPut: + return Intrinsics::kNone; + + // No default case to make the compiler warn on missing cases. + } + return Intrinsics::kNone; +} + +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { + // The DexFileMethodInliner should have checked whether the methods are agreeing with + // what we expect, i.e., static methods are called as such. Add another check here for + // our expectations: + // Whenever the intrinsic is marked as static-or-direct, report an error if we find an + // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual + // functions that will perform a check inline. If the precise type is known, however, + // the instruction will be sharpened to an InvokeStaticOrDirect. + InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); + InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? + invoke->AsInvokeStaticOrDirect()->GetInvokeType() : + invoke->IsInvokeVirtual() ? kVirtual : kSuper; + switch (intrinsic_type) { + case kStatic: + return (invoke_type == kStatic); + case kDirect: + return (invoke_type == kDirect); + case kVirtual: + // Call might be devirtualized. + return (invoke_type == kVirtual || invoke_type == kDirect); + + default: + return false; + } +} + +// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. +void IntrinsicsRecognizer::Run() { + DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_); + DCHECK(inliner != nullptr); + + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + inst_it.Advance()) { + HInstruction* inst = inst_it.Current(); + if (inst->IsInvoke()) { + HInvoke* invoke = inst->AsInvoke(); + InlineMethod method; + if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { + Intrinsics intrinsic = GetIntrinsic(method); + + if (intrinsic != Intrinsics::kNone) { + if (!CheckInvokeType(intrinsic, invoke)) { + LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " + << intrinsic << " for " + << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_); + } else { + invoke->SetIntrinsic(intrinsic); + } + } + } + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { + switch (intrinsic) { + case Intrinsics::kNone: + os << "No intrinsic."; + break; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + os << # Name; \ + break; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef STATIC_INTRINSICS_LIST +#undef VIRTUAL_INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return os; +} + +} // namespace art + diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h new file mode 100644 index 0000000000..29cc8efcc3 --- /dev/null +++ b/compiler/optimizing/intrinsics.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CompilerDriver; +class DexFile; + +// Recognize intrinsics from HInvoke nodes. +class IntrinsicsRecognizer : public HOptimization { + public: + IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver) + : HOptimization(graph, true, "intrinsics_recognition"), + dex_file_(dex_file), driver_(driver) {} + + void Run() OVERRIDE; + + private: + const DexFile* dex_file_; + CompilerDriver* driver_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); +}; + +class IntrinsicVisitor : public ValueObject { + public: + virtual ~IntrinsicVisitor() {} + + // Dispatch logic. + + void Dispatch(HInvoke* invoke) { + switch (invoke->GetIntrinsic()) { + case Intrinsics::kNone: + return; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + Visit ## Name(invoke); \ + return; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Do not put a default case. That way the compiler will complain if we missed a case. + } + } + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ + } +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + protected: + IntrinsicVisitor() {} + + private: + DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_ diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h new file mode 100644 index 0000000000..29ca20cca0 --- /dev/null +++ b/compiler/optimizing/intrinsics_list.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ + +// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected +// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual). + +#define INTRINSICS_LIST(V) \ + V(DoubleDoubleToRawLongBits, kStatic) \ + V(DoubleLongBitsToDouble, kStatic) \ + V(FloatFloatToRawIntBits, kStatic) \ + V(FloatIntBitsToFloat, kStatic) \ + V(IntegerReverse, kStatic) \ + V(IntegerReverseBytes, kStatic) \ + V(LongReverse, kStatic) \ + V(LongReverseBytes, kStatic) \ + V(ShortReverseBytes, kStatic) \ + V(MathAbsDouble, kStatic) \ + V(MathAbsFloat, kStatic) \ + V(MathAbsLong, kStatic) \ + V(MathAbsInt, kStatic) \ + V(MathMinDoubleDouble, kStatic) \ + V(MathMinFloatFloat, kStatic) \ + V(MathMinLongLong, kStatic) \ + V(MathMinIntInt, kStatic) \ + V(MathMaxDoubleDouble, kStatic) \ + V(MathMaxFloatFloat, kStatic) \ + V(MathMaxLongLong, kStatic) \ + V(MathMaxIntInt, kStatic) \ + V(MathSqrt, kStatic) \ + V(MathCeil, kStatic) \ + V(MathFloor, kStatic) \ + V(MathRint, kStatic) \ + V(MathRoundDouble, kStatic) \ + V(MathRoundFloat, kStatic) \ + V(SystemArrayCopyChar, kStatic) \ + V(ThreadCurrentThread, kStatic) \ + V(MemoryPeekByte, kStatic) \ + V(MemoryPeekIntNative, kStatic) \ + V(MemoryPeekLongNative, kStatic) \ + V(MemoryPeekShortNative, kStatic) \ + V(MemoryPokeByte, kStatic) \ + V(MemoryPokeIntNative, kStatic) \ + V(MemoryPokeLongNative, kStatic) \ + V(MemoryPokeShortNative, kStatic) \ + V(StringCharAt, kDirect) \ + V(StringCompareTo, kDirect) \ + V(StringIsEmpty, kDirect) \ + V(StringIndexOf, kDirect) \ + V(StringIndexOfAfter, kDirect) \ + V(StringLength, kDirect) \ + V(UnsafeCASInt, kDirect) \ + V(UnsafeCASLong, kDirect) \ + V(UnsafeCASObject, kDirect) \ + V(UnsafeGet, kDirect) \ + V(UnsafeGetVolatile, kDirect) \ + V(UnsafeGetLong, kDirect) \ + V(UnsafeGetLongVolatile, kDirect) \ + V(UnsafePut, kDirect) \ + V(UnsafePutOrdered, kDirect) \ + V(UnsafePutVolatile, kDirect) \ + V(UnsafePutObject, kDirect) \ + V(UnsafePutObjectOrdered, kDirect) \ + V(UnsafePutObjectVolatile, kDirect) \ + V(UnsafePutLong, kDirect) \ + V(UnsafePutLongOrdered, kDirect) \ + V(UnsafePutLongVolatile, kDirect) \ + \ + V(ReferenceGetReferent, kVirtual) + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc new file mode 100644 index 0000000000..2c239458f1 --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -0,0 +1,984 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_x86_64.h" + +#include "code_generator_x86_64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/x86_64/assembler_x86_64.h" +#include "utils/x86_64/constants_x86_64.h" + +namespace art { + +namespace x86_64 { + +static constexpr bool kIntrinsified = true; + +X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { + return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + const LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> + +// TODO: trg as memory. +static void MoveFromReturnRegister(Location trg, + Primitive::Type type, + CodeGeneratorX86_64* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movl(trg_reg, CpuRegister(RAX)); + } + break; + } + case Primitive::kPrimLong: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movq(trg_reg, CpuRegister(RAX)); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected void type for valid location " << trg; + UNREACHABLE(); + + case Primitive::kPrimDouble: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movsd(trg_reg, XmmRegister(XMM0)); + } + break; + } + case Primitive::kPrimFloat: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movss(trg_reg, XmmRegister(XMM0)); + } + break; + } + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ jmp(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64); +}; + +#undef __ +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); +} + +void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type size, + X86_64Assembler* assembler) { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + switch (size) { + case Primitive::kPrimShort: + // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. + __ bswapl(out); + __ sarl(out, Immediate(16)); + break; + case Primitive::kPrimInt: + __ bswapl(out); + break; + case Primitive::kPrimLong: + __ bswapq(out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + + +// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we +// need is 64b. + +static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + // TODO: Enable memory operations when the assembler supports them. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + // TODO: Allow x86 to work with memory. This requires assembler support, see below. + // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. + locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (output.IsFpuRegister()) { + // In-register + XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + + if (is64bit) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } + } else { + // TODO: update when assember support is available. + UNIMPLEMENTED(FATAL) << "Needs assembler support."; +// Once assembler support is available, in-memory operations look like this: +// if (is64bit) { +// DCHECK(output.IsDoubleStackSlot()); +// // No 64b and with literal. +// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); +// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); +// } else { +// DCHECK(output.IsStackSlot()); +// // Can use and with a literal directly. +// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); +// } + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister out = output.AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (is64bit) { + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + } else { + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + Label nan, done, op2_label; + if (is_double) { + __ ucomisd(out, op2); + } else { + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (is_double) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (is_double) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. + if (is_double) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + } + __ movd(out, cpu_temp, is_double); + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (is_double) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (is_long) { + __ cmpq(out, op2); + } else { + __ cmpl(out, op2); + } + + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + + GetAssembler()->sqrtsd(out, in); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + // Starting offset within data array + const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); + // Start of char data with array_ + const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); + + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + + // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we + // move to (coalesced) implicit checks, we have to do a null check below. + DCHECK(!kCoalescedImplicitNullCheck); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + X86_64Assembler* assembler = GetAssembler(); + + __ cmpl(idx, Address(obj, count_offset)); + __ j(kAboveEqual, slow_path->GetEntryLabel()); + + // Get the actual element. + __ movl(temp, idx); // temp := idx. + __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. + __ movl(out, Address(obj, value_offset)); // obj := obj.array. + // out = out[2*temp]. + __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(slow_path->GetExitLabel()); +} + +static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movsxb(out, Address(address, 0)); + break; + case Primitive::kPrimShort: + __ movsxw(out, Address(address, 0)); + break; + case Primitive::kPrimInt: + __ movl(out, Address(address, 0)); + break; + case Primitive::kPrimLong: + __ movq(out, Address(address, 0)); + break; + default: + LOG(FATAL) << "Type not recognized for peek: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movb(Address(address, 0), value); + break; + case Primitive::kPrimShort: + __ movw(Address(address, 0), value); + break; + case Primitive::kPrimInt: + __ movl(Address(address, 0), value); + break; + case Primitive::kPrimLong: + __ movq(Address(address, 0), value); + break; + default: + LOG(FATAL) << "Type not recognized for poke: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); + GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); +} + +static void GenUnsafeGet(LocationSummary* locations, bool is_long, + bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); + + if (is_long) { + __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } else { + // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object + // pointer will entail an unpack operation. + __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler()); +} + +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, + Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} + +// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 +// memory model. +static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } else { + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } + + if (is_volatile) { + __ mfence(); + } + + if (type == Primitive::kPrimNot) { + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(MathFloor) +UNIMPLEMENTED_INTRINSIC(MathCeil) +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h new file mode 100644 index 0000000000..c1fa99c2dc --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace x86_64 { + +class CodeGeneratorX86_64; +class X86_64Assembler; + +class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); +}; + +class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + X86_64Assembler* GetAssembler(); + + ArenaAllocator* GetArena(); + + CodeGeneratorX86_64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64); +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 59404dcb14..2ab9b571ff 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -22,6 +22,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "graph_visualizer.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -44,7 +45,7 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 007c43e218..ff23eda21e 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -63,7 +64,7 @@ TEST(LiveRangesTest, CFG1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -109,7 +110,7 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -158,7 +159,7 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -235,7 +236,7 @@ TEST(LiveRangesTest, Loop1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); RemoveSuspendChecks(graph); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -313,7 +314,7 @@ TEST(LiveRangesTest, Loop2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -389,7 +390,7 @@ TEST(LiveRangesTest, CFG4) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 6f706c391d..f2d49ac397 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -51,7 +52,7 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ed5e260a5b..990d662d86 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -20,16 +20,19 @@ namespace art { -LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) +LocationSummary::LocationSummary(HInstruction* instruction, + CallKind call_kind, + bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->EnvironmentSize()), - output_overlaps_(true), + output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), - live_registers_() { + live_registers_(), + intrinsified_(intrinsified) { inputs_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 1ff26d914c..dda6c94a3d 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -37,7 +37,10 @@ std::ostream& operator<<(std::ostream& os, const Location& location); */ class Location : public ValueObject { public: - static constexpr bool kNoOutputOverlap = false; + enum OutputOverlap { + kOutputOverlap, + kNoOutputOverlap + }; enum Kind { kInvalid = 0, @@ -160,6 +163,16 @@ class Location : public ValueObject { return GetPayload(); } + int low() const { + DCHECK(IsPair()); + return GetPayload() >> 16; + } + + int high() const { + DCHECK(IsPair()); + return GetPayload() & 0xFFFF; + } + template <typename T> T AsRegister() const { DCHECK(IsRegister()); @@ -175,25 +188,41 @@ class Location : public ValueObject { template <typename T> T AsRegisterPairLow() const { DCHECK(IsRegisterPair()); - return static_cast<T>(GetPayload() >> 16); + return static_cast<T>(low()); } template <typename T> T AsRegisterPairHigh() const { DCHECK(IsRegisterPair()); - return static_cast<T>(GetPayload() & 0xFFFF); + return static_cast<T>(high()); } template <typename T> T AsFpuRegisterPairLow() const { DCHECK(IsFpuRegisterPair()); - return static_cast<T>(GetPayload() >> 16); + return static_cast<T>(low()); } template <typename T> T AsFpuRegisterPairHigh() const { DCHECK(IsFpuRegisterPair()); - return static_cast<T>(GetPayload() & 0xFFFF); + return static_cast<T>(high()); + } + + bool IsPair() const { + return IsRegisterPair() || IsFpuRegisterPair(); + } + + Location ToLow() const { + return IsRegisterPair() + ? Location::RegisterLocation(low()) + : Location::FpuRegisterLocation(low()); + } + + Location ToHigh() const { + return IsRegisterPair() + ? Location::RegisterLocation(high()) + : Location::FpuRegisterLocation(high()); } static uintptr_t EncodeStackIndex(intptr_t stack_index) { @@ -425,7 +454,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { kCall }; - LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall); + LocationSummary(HInstruction* instruction, + CallKind call_kind = kNoCall, + bool intrinsified = false); void SetInAt(uint32_t at, Location location) { DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid()); @@ -440,7 +471,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return inputs_.Size(); } - void SetOut(Location location, bool overlaps = true) { + void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { DCHECK(output_.IsUnallocated() || output_.IsInvalid()); output_overlaps_ = overlaps; output_ = location; @@ -525,14 +556,19 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { && (output_.GetPolicy() == Location::kSameAsFirstInput)) { return false; } - if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) { + Location input = inputs_.Get(input_index); + if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) { return false; } return true; } bool OutputOverlapsWithInputs() const { - return output_overlaps_; + return output_overlaps_ == Location::kOutputOverlap; + } + + bool Intrinsified() const { + return intrinsified_; } private: @@ -541,7 +577,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. - bool output_overlaps_; + Location::OutputOverlap output_overlaps_; Location output_; const CallKind call_kind_; @@ -554,6 +590,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { // Registers that are in use at this position. RegisterSet live_registers_; + // Whether these are locations for an intrinsified call. + const bool intrinsified_; + ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); DISALLOW_COPY_AND_ASSIGN(LocationSummary); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 2b3ac1ac4a..2c84df4d7f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -15,6 +15,7 @@ */ #include "nodes.h" + #include "ssa_builder.h" #include "utils/growable_array.h" @@ -459,6 +460,22 @@ static void RemoveFromUseList(T* user, } } +HInstruction* HInstruction::GetNextDisregardingMoves() const { + HInstruction* next = GetNext(); + while (next != nullptr && next->IsParallelMove()) { + next = next->GetNext(); + } + return next; +} + +HInstruction* HInstruction::GetPreviousDisregardingMoves() const { + HInstruction* previous = GetPrevious(); + while (previous != nullptr && previous->IsParallelMove()) { + previous = previous->GetPrevious(); + } + return previous; +} + void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { RemoveFromUseList(user, input_index, &uses_); } @@ -646,17 +663,18 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) { int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(), GetRight()->AsLongConstant()->GetValue()); - return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); + if (GetResultType() == Primitive::kPrimLong) { + return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); + } else { + DCHECK_EQ(GetResultType(), Primitive::kPrimInt); + return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); + } } return nullptr; } bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { - HInstruction* previous = if_->GetPrevious(); - while (previous != nullptr && previous->IsParallelMove()) { - previous = previous->GetPrevious(); - } - return previous == this; + return this == if_->GetPreviousDisregardingMoves(); } bool HInstruction::Equals(HInstruction* other) const { @@ -753,13 +771,16 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } } - // Finally, replace the invoke with the return value of the inlined graph. + // Replace the invoke with the return value of the inlined graph. if (last->IsReturn()) { invoke->ReplaceWith(last->InputAt(0)); body->RemoveInstruction(last); } else { DCHECK(last->IsReturnVoid()); } + + // Finally remove the invoke from the caller. + invoke->GetBlock()->RemoveInstruction(invoke); } } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 601d45e56f..e19bfce9de 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -697,6 +697,9 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } + HInstruction* GetNextDisregardingMoves() const; + HInstruction* GetPreviousDisregardingMoves() const; + HBasicBlock* GetBlock() const { return block_; } void SetBlock(HBasicBlock* block) { block_ = block; } bool IsInBlock() const { return block_ != nullptr; } @@ -717,6 +720,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } + virtual bool CanDoImplicitNullCheck() const { return false; } + void AddUseAt(HInstruction* user, size_t index) { uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_); } @@ -1581,25 +1586,24 @@ class HLongConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HLongConstant); }; +enum class Intrinsics { +#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name, +#include "intrinsics_list.h" + kNone, + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS +}; +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); + class HInvoke : public HInstruction { public: - HInvoke(ArenaAllocator* arena, - uint32_t number_of_arguments, - Primitive::Type return_type, - uint32_t dex_pc) - : HInstruction(SideEffects::All()), - inputs_(arena, number_of_arguments), - return_type_(return_type), - dex_pc_(dex_pc) { - inputs_.SetSize(number_of_arguments); - } - virtual size_t InputCount() const { return inputs_.Size(); } virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); } // Runtime needs to walk the stack, so Dex -> Dex calls need to // know their environment. - virtual bool NeedsEnvironment() const { return true; } + bool NeedsEnvironment() const OVERRIDE { return true; } void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); @@ -1613,12 +1617,38 @@ class HInvoke : public HInstruction { uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexMethodIndex() const { return dex_method_index_; } + + Intrinsics GetIntrinsic() { + return intrinsic_; + } + + void SetIntrinsic(Intrinsics intrinsic) { + intrinsic_ = intrinsic; + } + DECLARE_INSTRUCTION(Invoke); protected: + HInvoke(ArenaAllocator* arena, + uint32_t number_of_arguments, + Primitive::Type return_type, + uint32_t dex_pc, + uint32_t dex_method_index) + : HInstruction(SideEffects::All()), + inputs_(arena, number_of_arguments), + return_type_(return_type), + dex_pc_(dex_pc), + dex_method_index_(dex_method_index), + intrinsic_(Intrinsics::kNone) { + inputs_.SetSize(number_of_arguments); + } + GrowableArray<HInstruction*> inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; + const uint32_t dex_method_index_; + Intrinsics intrinsic_; private: DISALLOW_COPY_AND_ASSIGN(HInvoke); @@ -1630,19 +1660,22 @@ class HInvokeStaticOrDirect : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, - uint32_t index_in_dex_cache, + uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - index_in_dex_cache_(index_in_dex_cache), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), invoke_type_(invoke_type) {} - uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; } + bool CanDoImplicitNullCheck() const OVERRIDE { + // We access the method via the dex cache so we can't do an implicit null check. + // TODO: for intrinsics we can generate implicit null checks. + return false; + } + InvokeType GetInvokeType() const { return invoke_type_; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); private: - const uint32_t index_in_dex_cache_; const InvokeType invoke_type_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); @@ -1654,10 +1687,16 @@ class HInvokeVirtual : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, + uint32_t dex_method_index, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Add implicit null checks in intrinsics. + return !GetLocations()->Intrinsified(); + } + uint32_t GetVTableIndex() const { return vtable_index_; } DECLARE_INSTRUCTION(InvokeVirtual); @@ -1676,17 +1715,20 @@ class HInvokeInterface : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - dex_method_index_(dex_method_index), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Add implicit null checks in intrinsics. + return !GetLocations()->Intrinsified(); + } + uint32_t GetImtIndex() const { return imt_index_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } DECLARE_INSTRUCTION(InvokeInterface); private: - const uint32_t dex_method_index_; const uint32_t imt_index_; DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); @@ -2129,39 +2171,49 @@ class HNullCheck : public HExpression<1> { class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, Primitive::Type field_type) - : field_offset_(field_offset), field_type_(field_type) {} + FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile) + : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {} MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } + bool IsVolatile() const { return is_volatile_; } private: const MemberOffset field_offset_; const Primitive::Type field_type_; + const bool is_volatile_; }; class HInstanceFieldGet : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, value); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { - size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - virtual size_t ComputeHashCode() const { + bool CanDoImplicitNullCheck() const OVERRIDE { + return GetFieldOffset().Uint32Value() < kPageSize; + } + + size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(InstanceFieldGet); @@ -2176,16 +2228,22 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { HInstanceFieldSet(HInstruction* object, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, object); SetRawInputAt(1, value); } + bool CanDoImplicitNullCheck() const OVERRIDE { + return GetFieldOffset().Uint32Value() < kPageSize; + } + + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } - + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -2209,6 +2267,15 @@ class HArrayGet : public HExpression<2> { UNUSED(other); return true; } + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: We can be smarter here. + // Currently, the array access is always preceded by an ArrayLength or a NullCheck + // which generates the implicit null check. There are cases when these can be removed + // to produce better code. If we ever add optimizations to do so we should allow an + // implicit check here (as long as the address falls in the first page). + return false; + } + void SetType(Primitive::Type type) { type_ = type; } HInstruction* GetArray() const { return InputAt(0); } @@ -2236,12 +2303,17 @@ class HArraySet : public HTemplateInstruction<3> { SetRawInputAt(2, value); } - bool NeedsEnvironment() const { + bool NeedsEnvironment() const OVERRIDE { // We currently always call a runtime method to catch array store // exceptions. return needs_type_check_; } + bool CanDoImplicitNullCheck() const OVERRIDE { + // TODO: Same as for ArrayGet. + return false; + } + void ClearNeedsTypeCheck() { needs_type_check_ = false; } @@ -2284,11 +2356,12 @@ class HArrayLength : public HExpression<1> { SetRawInputAt(0, array); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { UNUSED(other); return true; } + bool CanDoImplicitNullCheck() const OVERRIDE { return true; } DECLARE_INSTRUCTION(ArrayLength); @@ -2497,24 +2570,29 @@ class HStaticFieldGet : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); } - bool CanBeMoved() const OVERRIDE { return true; } + + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - size_t other_offset = other->AsStaticFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + HStaticFieldGet* other_get = other->AsStaticFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(StaticFieldGet); @@ -2529,15 +2607,18 @@ class HStaticFieldSet : public HTemplateInstruction<2> { HStaticFieldSet(HInstruction* cls, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2678,7 +2759,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(MonitorOperation); - protected: + private: const OperationKind kind_; const uint32_t dex_pc_; @@ -2686,7 +2767,6 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; - class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: MoveOperands(Location source, Location destination, HInstruction* instruction) @@ -2748,8 +2828,6 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // This is only used in debug mode, to ensure we do not connect interval siblings // in the same parallel move. HInstruction* instruction_; - - DISALLOW_COPY_AND_ASSIGN(MoveOperands); }; static constexpr size_t kDefaultNumberOfMoves = 4; @@ -2759,18 +2837,53 @@ class HParallelMove : public HTemplateInstruction<0> { explicit HParallelMove(ArenaAllocator* arena) : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {} - void AddMove(MoveOperands* move) { - if (kIsDebugBuild && move->GetInstruction() != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction()) - << "Doing parallel moves for the same instruction."; + void AddMove(Location source, Location destination, HInstruction* instruction) { + DCHECK(source.IsValid()); + DCHECK(destination.IsValid()); + // The parallel move resolver does not handle pairs. So we decompose the + // pair locations into two moves. + if (source.IsPair() && destination.IsPair()) { + AddMove(source.ToLow(), destination.ToLow(), instruction); + AddMove(source.ToHigh(), destination.ToHigh(), nullptr); + } else if (source.IsPair()) { + DCHECK(destination.IsDoubleStackSlot()) << destination; + AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction); + AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr); + } else if (destination.IsPair()) { + if (source.IsConstant()) { + // We put the same constant in the move. The code generator will handle which + // low or high part to use. + AddMove(source, destination.ToLow(), instruction); + AddMove(source, destination.ToHigh(), nullptr); + } else { + DCHECK(source.IsDoubleStackSlot()); + AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); + // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to + // always be 4. + static constexpr int kHighOffset = 4; + AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), + destination.ToHigh(), + nullptr); + } + } else { + if (kIsDebugBuild) { + if (instruction != nullptr) { + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) + << "Doing parallel moves for the same instruction."; + } + } + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) + << "Same destination for two moves in a parallel move."; + } } + moves_.Add(MoveOperands(source, destination, instruction)); } - moves_.Add(move); } MoveOperands* MoveOperandsAt(size_t index) const { - return moves_.Get(index); + return moves_.GetRawStorage() + index; } size_t NumMoves() const { return moves_.Size(); } @@ -2778,7 +2891,7 @@ class HParallelMove : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(ParallelMove); private: - GrowableArray<MoveOperands*> moves_; + GrowableArray<MoveOperands> moves_; DISALLOW_COPY_AND_ASSIGN(HParallelMove); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index deebaf7414..1e0d65a945 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -25,6 +25,7 @@ #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" @@ -32,6 +33,7 @@ #include "gvn.h" #include "inliner.h" #include "instruction_simplifier.h" +#include "intrinsics.h" #include "jni/quick/jni_compiler.h" #include "mirror/art_method-inl.h" #include "nodes.h" @@ -68,13 +70,8 @@ class CodeVectorAllocator FINAL : public CodeAllocator { }; /** - * If set to true, generates a file suitable for the c1visualizer tool and IRHydra. - */ -static bool kIsVisualizerEnabled = false; - -/** * Filter to apply to the visualizer. Methods whose name contain that filter will - * be in the file. + * be dumped. */ static const char* kStringFilter = ""; @@ -114,7 +111,7 @@ class OptimizingCompiler FINAL : public Compiler { void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {} - void Init() const OVERRIDE {} + void Init() OVERRIDE; void UnInit() const OVERRIDE {} @@ -123,6 +120,18 @@ class OptimizingCompiler FINAL : public Compiler { // just run the code generation after the graph was built. const bool run_optimizations_; + // Optimize and compile `graph`. + CompiledMethod* CompileOptimized(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + const HGraphVisualizer& visualizer) const; + + // Just compile without doing optimizations. + CompiledMethod* CompileBaseline(CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit) const; + mutable OptimizingCompilerStats compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -136,9 +145,18 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : Compiler(driver, kMaximumCompilationTimeBeforeWarning), run_optimizations_( driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime), - compilation_stats_() { - if (kIsVisualizerEnabled) { - visualizer_output_.reset(new std::ofstream("art.cfg")); + compilation_stats_() {} + +void OptimizingCompiler::Init() { + // Enable C1visualizer output. Must be done in Init() because the compiler + // driver is not fully initialized when passed to the compiler's constructor. + CompilerDriver* driver = GetCompilerDriver(); + const std::string cfg_file_name = driver->GetDumpCfgFileName(); + if (!cfg_file_name.empty()) { + CHECK_EQ(driver->GetThreadCount(), 1U) + << "Graph visualizer requires the compiler to run single-threaded. " + << "Invoke the compiler with '-j1'."; + visualizer_output_.reset(new std::ofstream(cfg_file_name)); } } @@ -190,22 +208,27 @@ static void RunOptimizations(HGraph* graph, SsaRedundantPhiElimination redundant_phi(graph); SsaDeadPhiElimination dead_phi(graph); HDeadCodeElimination dce(graph); - HConstantFolding fold(graph); + HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph); HInliner inliner(graph, dex_compilation_unit, driver, stats); + HConstantFolding fold2(graph); GVNOptimization gvn(graph); BoundsCheckElimination bce(graph); InstructionSimplifier simplify2(graph); + IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); + HOptimization* optimizations[] = { &redundant_phi, &dead_phi, + &intrinsics, &dce, - &fold, + &fold1, &simplify1, &inliner, + &fold2, &gvn, &bce, &simplify2 @@ -213,21 +236,89 @@ static void RunOptimizations(HGraph* graph, for (size_t i = 0; i < arraysize(optimizations); ++i) { HOptimization* optimization = optimizations[i]; + visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/false); optimization->Run(); - visualizer.DumpGraph(optimization->GetPassName()); + visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/true); optimization->Check(); } } // The stack map we generate must be 4-byte aligned on ARM. Since existing // maps are generated alongside these stack maps, we must also align them. -static std::vector<uint8_t>& AlignVectorSize(std::vector<uint8_t>& vector) { +static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { size_t size = vector.size(); size_t aligned_size = RoundUp(size, 4); for (; size < aligned_size; ++size) { vector.push_back(0); } - return vector; + return ArrayRef<const uint8_t>(vector); +} + + +CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + const HGraphVisualizer& visualizer) const { + RunOptimizations( + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); + + PrepareForRegisterAllocation(graph).Run(); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); + visualizer.DumpGraph(kLivenessPassName); + + RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); + register_allocator.AllocateRegisters(); + visualizer.DumpGraph(kRegisterAllocatorPassName); + + CodeVectorAllocator allocator; + codegen->CompileOptimized(&allocator); + + std::vector<uint8_t> stack_map; + codegen->BuildStackMaps(&stack_map); + + compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); + + return CompiledMethod::SwapAllocCompiledMethodStackMap( + compiler_driver, + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(allocator.GetMemory()), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + ArrayRef<const uint8_t>(stack_map)); +} + + +CompiledMethod* OptimizingCompiler::CompileBaseline( + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit) const { + CodeVectorAllocator allocator; + codegen->CompileBaseline(&allocator); + + std::vector<uint8_t> mapping_table; + DefaultSrcMap src_mapping_table; + bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); + codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); + std::vector<uint8_t> vmap_table; + codegen->BuildVMapTable(&vmap_table); + std::vector<uint8_t> gc_map; + codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); + + compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); + return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(allocator.GetMemory()), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + &src_mapping_table, + AlignVectorSize(mapping_table), + AlignVectorSize(vmap_table), + AlignVectorSize(gc_map), + ArrayRef<const uint8_t>()); } CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, @@ -239,7 +330,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, const DexFile& dex_file) const { UNUSED(invoke_type); compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); - InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet(); + CompilerDriver* compiler_driver = GetCompilerDriver(); + InstructionSet instruction_set = compiler_driver->GetInstructionSet(); // Always use the thumb2 assembler: some runtime functionality (like implicit stack // overflow checks) assume thumb2. if (instruction_set == kArm) { @@ -260,7 +352,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, DexCompilationUnit dex_compilation_unit( nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, - GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx)); + compiler_driver->GetVerifiedMethod(&dex_file, method_idx)); std::string method_name = PrettyMethod(method_idx, dex_file); @@ -275,7 +367,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &dex_compilation_unit, &dex_compilation_unit, &dex_file, - GetCompilerDriver(), + compiler_driver, &compilation_stats_); VLOG(compiler) << "Building " << PrettyMethod(method_idx, dex_file); @@ -285,21 +377,24 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); - if (codegen == nullptr) { + std::unique_ptr<CodeGenerator> codegen( + CodeGenerator::Create(graph, + instruction_set, + *compiler_driver->GetInstructionSetFeatures(), + compiler_driver->GetCompilerOptions())); + if (codegen.get() == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } HGraphVisualizer visualizer( - visualizer_output_.get(), graph, kStringFilter, *codegen, method_name.c_str()); + visualizer_output_.get(), graph, kStringFilter, *codegen.get(), method_name.c_str()); visualizer.DumpGraph("builder"); - CodeVectorAllocator allocator; - bool can_optimize = CanOptimize(*code_item); bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); + CompiledMethod* result = nullptr; if (run_optimizations_ && can_optimize && can_allocate_registers) { VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file); if (!graph->TryBuildingSsa()) { @@ -308,33 +403,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, << ": it contains a non natural loop"; // We could not transform the graph to SSA, bailout. compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); - return nullptr; + } else { + result = CompileOptimized(graph, codegen.get(), compiler_driver, dex_compilation_unit, visualizer); } - RunOptimizations( - graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer); - - PrepareForRegisterAllocation(graph).Run(); - SsaLivenessAnalysis liveness(*graph, codegen); - liveness.Analyze(); - visualizer.DumpGraph(kLivenessPassName); - - RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); - register_allocator.AllocateRegisters(); - - visualizer.DumpGraph(kRegisterAllocatorPassName); - codegen->CompileOptimized(&allocator); - - std::vector<uint8_t> stack_map; - codegen->BuildStackMaps(&stack_map); - - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); - return new CompiledMethod(GetCompilerDriver(), - instruction_set, - allocator.GetMemory(), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ - stack_map); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); @@ -349,31 +420,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); } - codegen->CompileBaseline(&allocator); - - std::vector<uint8_t> mapping_table; - SrcMap src_mapping_table; - codegen->BuildMappingTable(&mapping_table, - GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? - &src_mapping_table : nullptr); - std::vector<uint8_t> vmap_table; - codegen->BuildVMapTable(&vmap_table); - std::vector<uint8_t> gc_map; - codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); - - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return new CompiledMethod(GetCompilerDriver(), - instruction_set, - allocator.GetMemory(), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ - &src_mapping_table, - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - nullptr); + result = CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); } + return result; } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 7993b19850..cc2723df99 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,6 @@ enum MethodCompilationStat { kNotCompiledUnresolvedMethod, kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, - kNotCompiledVolatile, kNotOptimizedTryCatch, kNotOptimizedDisabled, kNotCompiledCantAccesType, @@ -92,7 +91,6 @@ class OptimizingCompilerStats { case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotCompiledVolatile : return "kNotCompiledVolatile"; case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 04b56345c4..b3eb1e2d51 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -45,8 +45,12 @@ namespace art { LiveInterval* BuildInterval(const size_t ranges[][2], size_t number_of_ranges, ArenaAllocator* allocator, - int reg = -1) { - LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt); + int reg = -1, + HInstruction* defined_by = nullptr) { + LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by); + if (defined_by != nullptr) { + defined_by->SetLiveInterval(interval); + } for (size_t i = number_of_ranges; i > 0; --i) { interval->AddRange(ranges[i - 1][0], ranges[i - 1][1]); } diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 1e93ece2ef..debe466560 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -37,10 +37,12 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { // Perform the moves with constant sources. for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); - if (!move.IsEliminated()) { - DCHECK(move.GetSource().IsConstant()); + MoveOperands* move = moves_.Get(i); + if (!move->IsEliminated()) { + DCHECK(move->GetSource().IsConstant()); EmitMove(i); + // Eliminate the move, in case following moves need a scratch register. + move->Eliminate(); } } @@ -55,6 +57,9 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { // unallocated, or the move was already eliminated). for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); + // The parallel move resolver algorithm does not work with register pairs. + DCHECK(!move->GetSource().IsPair()); + DCHECK(!move->GetDestination().IsPair()); if (!move->IsRedundant()) { moves_.Add(move); } diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 309425ef4d..7ec1dd2deb 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -58,6 +58,9 @@ class ParallelMoveResolver : public ValueObject { }; bool IsScratchLocation(Location loc); + + // Allocate a scratch register for performing a move. The method will try to use + // a register that is the destination of a move, but that move has not been emitted yet. int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); // Emit a move. diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 62629bcd0c..28b5697bbd 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -26,16 +26,26 @@ class TestParallelMoveResolver : public ParallelMoveResolver { public: explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {} + void Dump(Location location) { + if (location.IsConstant()) { + message_ << "C"; + } else if (location.IsPair()) { + message_ << location.low() << "," << location.high(); + } else { + message_ << location.reg(); + } + } + virtual void EmitMove(size_t index) { MoveOperands* move = moves_.Get(index); if (!message_.str().empty()) { message_ << " "; } - message_ << "(" - << move->GetSource().reg() - << " -> " - << move->GetDestination().reg() - << ")"; + message_ << "("; + Dump(move->GetSource()); + message_ << " -> "; + Dump(move->GetDestination()); + message_ << ")"; } virtual void EmitSwap(size_t index) { @@ -43,11 +53,11 @@ class TestParallelMoveResolver : public ParallelMoveResolver { if (!message_.str().empty()) { message_ << " "; } - message_ << "(" - << move->GetSource().reg() - << " <-> " - << move->GetDestination().reg() - << ")"; + message_ << "("; + Dump(move->GetSource()); + message_ << " <-> "; + Dump(move->GetDestination()); + message_ << ")"; } virtual void SpillScratch(int reg ATTRIBUTE_UNUSED) {} @@ -69,10 +79,10 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, size_t number_of_moves) { HParallelMove* moves = new (allocator) HParallelMove(allocator); for (size_t i = 0; i < number_of_moves; ++i) { - moves->AddMove(new (allocator) MoveOperands( + moves->AddMove( Location::RegisterLocation(operands[i][0]), Location::RegisterLocation(operands[i][1]), - nullptr)); + nullptr); } return moves; } @@ -116,16 +126,76 @@ TEST(ParallelMoveTest, Swap) { { TestParallelMoveResolver resolver(&allocator); - static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}}; + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); + } +} + +TEST(ParallelMoveTest, ConstantLast) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::ConstantLocation(new (&allocator) HIntConstant(0)), + Location::RegisterLocation(0), + nullptr); + moves->AddMove( + Location::RegisterLocation(1), + Location::RegisterLocation(2), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str()); +} + +TEST(ParallelMoveTest, Pairs) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(4), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); } { TestParallelMoveResolver resolver(&allocator); - static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}}; - resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str()); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(4), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(0), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str()); } } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index c1c805dc56..e120bc681e 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -27,6 +27,12 @@ namespace art { static constexpr size_t kMaxLifetimePosition = -1; static constexpr size_t kDefaultNumberOfSpillSlots = 4; +// For simplicity, we implement register pairs as (reg, reg + 1). +// Note that this is a requirement for double registers on ARM, since we +// allocate SRegister. +static int GetHighForLowRegister(int reg) { return reg + 1; } +static bool IsLowRegister(int reg) { return (reg & 1) == 0; } + RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) @@ -50,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, blocked_core_registers_(codegen->GetBlockedCoreRegisters()), blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), reserved_out_slots_(0), - maximum_number_of_live_registers_(0) { + maximum_number_of_live_core_registers_(0), + maximum_number_of_live_fp_registers_(0) { codegen->SetupBlockedRegisters(); physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); @@ -64,7 +71,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, if (!Supports(instruction_set)) { return false; } - if (instruction_set == kArm64 || instruction_set == kX86_64) { + if (instruction_set == kArm64 + || instruction_set == kX86_64 + || instruction_set == kArm + || instruction_set == kThumb2) { return true; } for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { @@ -72,10 +82,12 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->GetType() == Primitive::kPrimLong || - current->GetType() == Primitive::kPrimFloat || - current->GetType() == Primitive::kPrimDouble) { - return false; + if (instruction_set == kX86) { + if (current->GetType() == Primitive::kPrimLong || + current->GetType() == Primitive::kPrimFloat || + current->GetType() == Primitive::kPrimDouble) { + return false; + } } } } @@ -130,7 +142,7 @@ void RegisterAllocator::BlockRegister(Location location, : physical_fp_register_intervals_.Get(reg); Primitive::Type type = location.IsRegister() ? Primitive::kPrimInt - : Primitive::kPrimDouble; + : Primitive::kPrimFloat; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); if (location.IsRegister()) { @@ -173,9 +185,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } LinearScan(); - size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_; - maximum_number_of_live_registers_ = 0; - inactive_.Reset(); active_.Reset(); handled_.Reset(); @@ -195,7 +204,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } } LinearScan(); - maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers; } void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { @@ -226,6 +234,12 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); temp_intervals_.Add(interval); interval->AddRange(position, position + 1); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + interval->AddHighInterval(true); + LiveInterval* high = interval->GetHighInterval(); + temp_intervals_.Add(high); + unhandled_fp_intervals_.Add(high); + } unhandled_fp_intervals_.Add(interval); break; } @@ -279,6 +293,9 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { Location input = locations->InAt(i); if (input.IsRegister() || input.IsFpuRegister()) { BlockRegister(input, position, position + 1); + } else if (input.IsPair()) { + BlockRegister(input.ToLow(), position, position + 1); + BlockRegister(input.ToHigh(), position, position + 1); } } @@ -291,6 +308,10 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + if (codegen_->NeedsTwoRegisters(current->GetType())) { + current->AddHighInterval(); + } + // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a // given register, we create an interval that covers these locations. The register @@ -304,14 +325,30 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { if (first.IsRegister() || first.IsFpuRegister()) { current->SetFrom(position + 1); current->SetRegister(first.reg()); + } else if (first.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(first.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(first.high()); + high->SetFrom(position + 1); } } else if (output.IsRegister() || output.IsFpuRegister()) { // Shift the interval's start by one to account for the blocked register. current->SetFrom(position + 1); current->SetRegister(output.reg()); BlockRegister(output, position, position + 1); + } else if (output.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(output.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(output.high()); + high->SetFrom(position + 1); + BlockRegister(output.ToLow(), position, position + 1); + BlockRegister(output.ToHigh(), position, position + 1); } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { current->SetSpillSlot(output.GetStackIndex()); + } else { + DCHECK(output.IsUnallocated() || output.IsConstant()); } // If needed, add interval to the list of unhandled intervals. @@ -516,6 +553,7 @@ void RegisterAllocator::LinearScan() { LiveInterval* current = unhandled_->Pop(); DCHECK(!current->IsFixed() && !current->HasSpillSlot()); DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); + DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval()); size_t position = current->GetStart(); @@ -560,12 +598,24 @@ void RegisterAllocator::LinearScan() { if (current->IsSlowPathSafepoint()) { // Synthesized interval to record the maximum number of live registers // at safepoints. No need to allocate a register for it. - maximum_number_of_live_registers_ = - std::max(maximum_number_of_live_registers_, active_.Size()); + if (processing_core_registers_) { + maximum_number_of_live_core_registers_ = + std::max(maximum_number_of_live_core_registers_, active_.Size()); + } else { + maximum_number_of_live_fp_registers_ = + std::max(maximum_number_of_live_fp_registers_, active_.Size()); + } DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); continue; } + if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { + DCHECK(!current->HasRegister()); + // Allocating the low part was unsucessful. The splitted interval for the high part + // will be handled next (it is in the `unhandled_` list). + continue; + } + // (4) Try to find an available register. bool success = TryAllocateFreeReg(current); @@ -578,6 +628,9 @@ void RegisterAllocator::LinearScan() { // intervals. if (success) { active_.Add(current); + if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { + current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); + } } } } @@ -626,30 +679,35 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } - int reg = -1; + int reg = kNoRegister; if (current->HasRegister()) { // Some instructions have a fixed register output. reg = current->GetRegister(); - DCHECK_NE(free_until[reg], 0u); + if (free_until[reg] == 0) { + DCHECK(current->IsHighInterval()); + // AllocateBlockedReg will spill the holder of the register. + return false; + } } else { + DCHECK(!current->IsHighInterval()); int hint = current->FindFirstRegisterHint(free_until); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(free_until, current->GetStart()); } else { - // Pick the register that is free the longest. - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == -1 || free_until[i] > free_until[reg]) { - reg = i; - if (free_until[i] == kMaxLifetimePosition) break; - } - } + reg = FindAvailableRegister(free_until); } } + DCHECK_NE(reg, kNoRegister); // If we could not find a register, we need to spill. - if (reg == -1 || free_until[reg] == 0) { + if (free_until[reg] == 0) { + return false; + } + + if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) { return false; } @@ -671,6 +729,66 @@ bool RegisterAllocator::IsBlocked(int reg) const { : blocked_fp_registers_[reg]; } +int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { + int reg = kNoRegister; + // Pick the register pair that is used the last. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (!IsLowRegister(i)) continue; + int high_register = GetHighForLowRegister(i); + if (IsBlocked(high_register)) continue; + int existing_high_register = GetHighForLowRegister(reg); + if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] + && next_use[high_register] >= next_use[existing_high_register])) { + reg = i; + if (next_use[i] == kMaxLifetimePosition + && next_use[high_register] == kMaxLifetimePosition) { + break; + } + } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { + // If one of the current register is known to be unavailable, just unconditionally + // try a new one. + reg = i; + } + } + return reg; +} + +int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { + int reg = kNoRegister; + // Pick the register that is used the last. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (reg == kNoRegister || next_use[i] > next_use[reg]) { + reg = i; + if (next_use[i] == kMaxLifetimePosition) break; + } + } + return reg; +} + +bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { + for (size_t i = 0, e = active_.Size(); i < e; ++i) { + LiveInterval* active = active_.Get(i); + DCHECK(active->HasRegister()); + // Split the first interval found. + if (first_register_use <= next_use[active->GetRegister()] + && !active->IsLowInterval() + && !active->IsHighInterval()) { + LiveInterval* split = Split(active, position); + active_.DeleteAt(i); + if (split != active) { + handled_.Add(active); + } + AddSorted(unhandled_, split); + return true; + } + } + return false; +} + // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -731,24 +849,50 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } } - // Pick the register that is used the last. - int reg = -1; - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == -1 || next_use[i] > next_use[reg]) { - reg = i; - if (next_use[i] == kMaxLifetimePosition) break; - } + int reg = kNoRegister; + bool should_spill = false; + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + reg = current->GetRegister(); + // When allocating the low part, we made sure the high register was available. + DCHECK_LT(first_register_use, next_use[reg]); + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(next_use, current->GetStart()); + // We should spill if both registers are not available. + should_spill = (first_register_use >= next_use[reg]) + || (first_register_use >= next_use[GetHighForLowRegister(reg)]); + } else { + DCHECK(!current->IsHighInterval()); + reg = FindAvailableRegister(next_use); + should_spill = (first_register_use >= next_use[reg]); } - if (first_register_use >= next_use[reg]) { - // If the first use of that instruction is after the last use of the found - // register, we split this interval just before its first register use. - AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use - 1); - DCHECK_NE(current, split) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName(); - AddSorted(unhandled_, split); + DCHECK_NE(reg, kNoRegister); + if (should_spill) { + DCHECK(!current->IsHighInterval()); + bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + if (current->IsLowInterval() + && is_allocation_at_use_site + && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + // If we're allocating a register for `current` because the instruction at + // that position requires it, but we think we should spill, then there are + // non-pair intervals blocking the allocation. We split the first + // interval found, and put ourselves first in the `unhandled_` list. + LiveInterval* existing = unhandled_->Peek(); + DCHECK(existing->IsHighInterval()); + DCHECK_EQ(existing->GetLowInterval(), current); + unhandled_->Add(current); + } else { + // If the first use of that instruction is after the last use of the found + // register, we split this interval just before its first register use. + AllocateSpillSlotFor(current); + LiveInterval* split = Split(current, first_register_use - 1); + DCHECK_NE(current, split) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName() << " " + << split->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1; + AddSorted(unhandled_, split); + } return false; } else { // Use this register and spill the active and inactives interval that @@ -761,8 +905,27 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); active_.DeleteAt(i); - handled_.Add(active); + if (split != active) { + handled_.Add(active); + } AddSorted(unhandled_, split); + + if (active->IsLowInterval() || active->IsHighInterval()) { + LiveInterval* other_half = active->IsLowInterval() + ? active->GetHighInterval() + : active->GetLowInterval(); + // We also need to remove the other half from the list of actives. + bool found = false; + for (size_t j = 0; j < active_.Size(); ++j) { + if (active_.Get(j) == other_half) { + found = true; + active_.DeleteAt(j); + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } break; } } @@ -782,14 +945,38 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (next_intersection != kNoLifetime) { if (inactive->IsFixed()) { LiveInterval* split = Split(current, next_intersection); + DCHECK_NE(split, current); AddSorted(unhandled_, split); } else { - LiveInterval* split = Split(inactive, next_intersection); + // Split at the start of `current`, which will lead to splitting + // at the end of the lifetime hole of `inactive`. + LiveInterval* split = Split(inactive, current->GetStart()); + // If it's inactive, it must start before the current interval. + DCHECK_NE(split, inactive); inactive_.DeleteAt(i); --i; --e; handled_.Add(inactive); AddSorted(unhandled_, split); + + if (inactive->IsLowInterval() || inactive->IsHighInterval()) { + LiveInterval* other_half = inactive->IsLowInterval() + ? inactive->GetHighInterval() + : inactive->GetLowInterval(); + + // We also need to remove the other half from the list of inactives. + bool found = false; + for (size_t j = 0; j < inactive_.Size(); ++j) { + if (inactive_.Get(j) == other_half) { + found = true; + inactive_.DeleteAt(j); + --e; + handled_.Add(other_half); + break; + } + } + DCHECK(found); + } } } } @@ -804,7 +991,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter size_t insert_at = 0; for (size_t i = array->Size(); i > 0; --i) { LiveInterval* current = array->Get(i - 1); - if (current->StartsAfter(interval)) { + // High intervals must be processed right after their low equivalent. + if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; break; } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { @@ -815,23 +1003,49 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter break; } } + array->InsertAt(insert_at, interval); + // Insert the high interval before the low, to ensure the low is processed before. + if (interval->HasHighInterval()) { + array->InsertAt(insert_at, interval->GetHighInterval()); + } else if (interval->HasLowInterval()) { + array->InsertAt(insert_at + 1, interval->GetLowInterval()); + } } LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { - DCHECK(position >= interval->GetStart()); + DCHECK_GE(position, interval->GetStart()); DCHECK(!interval->IsDeadAt(position)); if (position == interval->GetStart()) { // Spill slot will be allocated when handling `interval` again. interval->ClearRegister(); + if (interval->HasHighInterval()) { + interval->GetHighInterval()->ClearRegister(); + } else if (interval->HasLowInterval()) { + interval->GetLowInterval()->ClearRegister(); + } return interval; } else { LiveInterval* new_interval = interval->SplitAt(position); + if (interval->HasHighInterval()) { + LiveInterval* high = interval->GetHighInterval()->SplitAt(position); + new_interval->SetHighInterval(high); + high->SetLowInterval(new_interval); + } else if (interval->HasLowInterval()) { + LiveInterval* low = interval->GetLowInterval()->SplitAt(position); + new_interval->SetLowInterval(low); + low->SetHighInterval(new_interval); + } return new_interval; } } void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { + if (interval->IsHighInterval()) { + // The low interval will contain the spill slot. + return; + } + LiveInterval* parent = interval->GetParent(); // An instruction gets a spill slot for its entire lifetime. If the parent @@ -897,7 +1111,9 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { static bool IsValidDestination(Location destination) { return destination.IsRegister() + || destination.IsRegisterPair() || destination.IsFpuRegister() + || destination.IsFpuRegisterPair() || destination.IsStackSlot() || destination.IsDoubleStackSlot(); } @@ -905,7 +1121,6 @@ static bool IsValidDestination(Location destination) { void RegisterAllocator::AddInputMoveFor(HInstruction* user, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); if (source.Equals(destination)) return; DCHECK(!user->IsPhi()); @@ -922,7 +1137,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, move = previous->AsParallelMove(); } DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); - move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr)); + move->AddMove(source, destination, nullptr); } static bool IsInstructionStart(size_t position) { @@ -937,7 +1152,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); @@ -994,14 +1209,14 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, } } DCHECK_EQ(move->GetLifetimePosition(), position); - move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); + move->AddMove(source, destination, instruction); } void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; DCHECK_EQ(block->GetSuccessors().Size(), 1u); @@ -1024,14 +1239,14 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, } else { move = previous->AsParallelMove(); } - move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); + move->AddMove(source, destination, instruction); } void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; HInstruction* first = block->GetFirstInstruction(); @@ -1043,13 +1258,13 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, move->SetLifetimePosition(block->GetLifetimeStart()); block->InsertInstructionBefore(move, first); } - move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); + move->AddMove(source, destination, instruction); } void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); + DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; if (instruction->IsPhi()) { @@ -1067,7 +1282,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, move->SetLifetimePosition(position); instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); } - move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); + move->AddMove(source, destination, instruction); } void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { @@ -1075,9 +1290,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (current->HasSpillSlot() && current->HasRegister()) { // We spill eagerly, so move must be at definition. InsertMoveAfter(interval->GetDefinedBy(), - interval->IsFloatingPoint() - ? Location::FpuRegisterLocation(interval->GetRegister()) - : Location::RegisterLocation(interval->GetRegister()), + interval->ToLocation(), interval->NeedsTwoSpillSlots() ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) : Location::StackSlot(interval->GetParent()->GetSpillSlot())); @@ -1097,10 +1310,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { Location expected_location = locations->InAt(use->GetInputIndex()); - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } } use = use->GetNext(); @@ -1137,8 +1357,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); - DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_); - + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + maximum_number_of_live_core_registers_ + + maximum_number_of_live_fp_registers_); if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } @@ -1148,6 +1369,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->AddLiveRegister(source); break; } + + case Location::kRegisterPair: + case Location::kFpuRegisterPair: { + locations->AddLiveRegister(source.ToLow()); + locations->AddLiveRegister(source.ToHigh()); + break; + } case Location::kStackSlot: // Fall-through case Location::kDoubleStackSlot: // Fall-through case Location::kConstant: { @@ -1226,7 +1454,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, void RegisterAllocator::Resolve() { codegen_->ComputeFrameSize( - spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); + spill_slots_.Size(), maximum_number_of_live_core_registers_, + maximum_number_of_live_fp_registers_, reserved_out_slots_); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. @@ -1307,6 +1536,10 @@ void RegisterAllocator::Resolve() { size_t temp_index = 0; for (size_t i = 0; i < temp_intervals_.Size(); ++i) { LiveInterval* temp = temp_intervals_.Get(i); + if (temp->IsHighInterval()) { + // High intervals can be skipped, they are already handled by the low interval. + continue; + } HInstruction* at = liveness_.GetTempUser(temp); if (at != current) { temp_index = 0; @@ -1320,14 +1553,14 @@ void RegisterAllocator::Resolve() { break; case Primitive::kPrimDouble: - // TODO: Support the case of ARM, where a double value - // requires an FPU register pair (note that the ARM back end - // does not yet use this register allocator when a method uses - // floats or doubles). - DCHECK(codegen_->GetInstructionSet() != kArm - && codegen_->GetInstructionSet() != kThumb2); - locations->SetTempAt( - temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + Location location = Location::FpuRegisterPairLocation( + temp->GetRegister(), temp->GetHighInterval()->GetRegister()); + locations->SetTempAt(temp_index++, location); + } else { + locations->SetTempAt( + temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + } break; default: diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index cbe741c2b3..b8f70bdc18 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -128,6 +128,12 @@ class RegisterAllocator { bool ValidateInternal(bool log_fatal_on_failure) const; void DumpInterval(std::ostream& stream, LiveInterval* interval) const; void DumpAllIntervals(std::ostream& stream) const; + int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; + int FindAvailableRegister(size_t* next_use) const; + + // Try splitting an active non-pair interval at the given `position`. + // Returns whether it was successful at finding such an interval. + bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); ArenaAllocator* const allocator_; CodeGenerator* const codegen_; @@ -188,10 +194,14 @@ class RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; - // The maximum live registers at safepoints. - size_t maximum_number_of_live_registers_; + // The maximum live core registers at safepoints. + size_t maximum_number_of_live_core_registers_; + + // The maximum live FP registers at safepoints. + size_t maximum_number_of_live_fp_registers_; ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); }; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index f677e840ef..cb5010afcd 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -19,6 +19,7 @@ #include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator.h" @@ -40,7 +41,7 @@ static bool Check(const uint16_t* data) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -56,7 +57,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -295,7 +296,7 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -327,7 +328,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -380,7 +381,7 @@ TEST(RegisterAllocatorTest, DeadPhi) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -402,7 +403,7 @@ TEST(RegisterAllocatorTest, FreeUntil) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -462,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, entry->AddSuccessor(block); HInstruction* test = new (allocator) HInstanceFieldGet( - parameter, Primitive::kPrimBoolean, MemberOffset(22)); + parameter, Primitive::kPrimBoolean, MemberOffset(22), false); block->AddInstruction(test); block->AddInstruction(new (allocator) HIf(test)); HBasicBlock* then = new (allocator) HBasicBlock(graph); @@ -481,8 +482,10 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); join->AddPhi(*phi); - *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); - *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); + *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); then->AddInstruction(*input1); else_->AddInstruction(*input2); join->AddInstruction(new (allocator) HExit()); @@ -502,7 +505,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -517,7 +520,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -534,7 +537,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -551,7 +554,7 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -581,7 +584,8 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, graph->AddBlock(block); entry->AddSuccessor(block); - *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); block->AddInstruction(*field); *ret = new (allocator) HReturn(*field); block->AddInstruction(*ret); @@ -600,7 +604,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -613,7 +617,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -662,7 +666,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -676,7 +680,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -723,7 +727,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { { HGraph* graph = BuildDiv(&allocator, &div); - x86::CodeGeneratorX86 codegen(graph); + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -735,4 +739,106 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { } } +// Test a bug in the register allocator, where allocating a blocked +// register would lead to spilling an inactive interval at the wrong +// position. +TEST(RegisterAllocatorTest, SpillInactive) { + ArenaPool pool; + + // Create a synthesized graph to please the register_allocator and + // ssa_liveness_analysis code. + ArenaAllocator allocator(&pool); + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + entry->AddInstruction(one); + entry->AddInstruction(two); + entry->AddInstruction(three); + entry->AddInstruction(four); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction(new (&allocator) HExit()); + + // We create a synthesized user requesting a register, to avoid just spilling the + // intervals. + HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt); + user->AddInput(one); + user->SetBlock(block); + LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + static constexpr size_t phi_ranges[][2] = {{20, 30}}; + BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user); + + // Create an interval with lifetime holes. + static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; + LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_); + + locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + first = first->SplitAt(1); + + // Create an interval that conflicts with the next interval, to force the next + // interval to call `AllocateBlockedReg`. + static constexpr size_t ranges2[][2] = {{2, 4}}; + LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two); + locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + + // Create an interval that will lead to splitting the first interval. The bug occured + // by splitting at a wrong position, in this case at the next intersection between + // this interval and the first interval. We would have then put the interval with ranges + // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals + // before lifetime position 6 yet. + static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; + LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_); + locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + third = third->SplitAt(3); + + // Because the first part of the split interval was considered handled, this interval + // was free to allocate the same register, even though it conflicts with it. + static constexpr size_t ranges4[][2] = {{4, 6}}; + LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four); + locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + + x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + SsaLivenessAnalysis liveness(*graph, &codegen); + + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + register_allocator.unhandled_core_intervals_.Add(fourth); + register_allocator.unhandled_core_intervals_.Add(third); + register_allocator.unhandled_core_intervals_.Add(second); + register_allocator.unhandled_core_intervals_.Add(first); + + // Set just one register available to make all intervals compete for the same. + register_allocator.number_of_registers_ = 1; + register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); + register_allocator.processing_core_registers_ = true; + register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; + register_allocator.LinearScan(); + + // Test that there is no conflicts between intervals. + GrowableArray<LiveInterval*> intervals(&allocator, 0); + intervals.Add(first); + intervals.Add(second); + intervals.Add(third); + intervals.Add(fourth); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); +} + } // namespace art diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 660a5c5f60..d41157b8d8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -419,10 +419,21 @@ bool LiveInterval::NeedsTwoSpillSlots() const { } Location LiveInterval::ToLocation() const { + DCHECK(!IsHighInterval()); if (HasRegister()) { - return IsFloatingPoint() - ? Location::FpuRegisterLocation(GetRegister()) - : Location::RegisterLocation(GetRegister()); + if (IsFloatingPoint()) { + if (HasHighInterval()) { + return Location::FpuRegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister()); + } else { + return Location::FpuRegisterLocation(GetRegister()); + } + } else { + if (HasHighInterval()) { + return Location::RegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister()); + } else { + return Location::RegisterLocation(GetRegister()); + } + } } else { HInstruction* defined_by = GetParent()->GetDefinedBy(); if (defined_by->IsConstant()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 23123891ef..a123313426 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -77,6 +77,15 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { stream << "[" << start_ << ", " << end_ << ")"; } + LiveRange* Dup(ArenaAllocator* allocator) const { + return new (allocator) LiveRange( + start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator)); + } + + LiveRange* GetLastRange() { + return next_ == nullptr ? this : next_->GetLastRange(); + } + private: size_t start_; size_t end_; @@ -123,6 +132,12 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { stream << position_; } + UsePosition* Dup(ArenaAllocator* allocator) const { + return new (allocator) UsePosition( + user_, input_index_, is_environment_, position_, + next_ == nullptr ? nullptr : next_->Dup(allocator)); + } + private: HInstruction* const user_; const size_t input_index_; @@ -414,7 +429,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveRange* current = first_range_; LiveRange* previous = nullptr; // Iterate over the ranges, and either find a range that covers this position, or - // a two ranges in between this position (that is, the position is in a lifetime hole). + // two ranges in between this position (that is, the position is in a lifetime hole). do { if (position >= current->GetEnd()) { // Move to next range. @@ -478,6 +493,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); + stream << " is_high: " << IsHighInterval(); + stream << " is_low: " << IsLowInterval(); } LiveInterval* GetNextSibling() const { return next_sibling_; } @@ -512,6 +529,58 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns whether `other` and `this` share the same kind of register. bool SameRegisterKind(Location other) const; + bool HasHighInterval() const { + return IsLowInterval(); + } + + bool HasLowInterval() const { + return IsHighInterval(); + } + + LiveInterval* GetLowInterval() const { + DCHECK(HasLowInterval()); + return high_or_low_interval_; + } + + LiveInterval* GetHighInterval() const { + DCHECK(HasHighInterval()); + return high_or_low_interval_; + } + + bool IsHighInterval() const { + return GetParent()->is_high_interval_; + } + + bool IsLowInterval() const { + return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr); + } + + void SetLowInterval(LiveInterval* low) { + DCHECK(IsHighInterval()); + high_or_low_interval_ = low; + } + + void SetHighInterval(LiveInterval* high) { + DCHECK(IsLowInterval()); + high_or_low_interval_ = high; + } + + void AddHighInterval(bool is_temp = false) { + DCHECK_EQ(GetParent(), this); + DCHECK(!HasHighInterval()); + DCHECK(!HasLowInterval()); + high_or_low_interval_ = new (allocator_) LiveInterval( + allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true); + high_or_low_interval_->high_or_low_interval_ = this; + if (first_range_ != nullptr) { + high_or_low_interval_->first_range_ = first_range_->Dup(allocator_); + high_or_low_interval_->last_range_ = first_range_->GetLastRange(); + } + if (first_use_ != nullptr) { + high_or_low_interval_->first_use_ = first_use_->Dup(allocator_); + } + } + private: LiveInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -519,7 +588,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { bool is_fixed = false, int reg = kNoRegister, bool is_temp = false, - bool is_slow_path_safepoint = false) + bool is_slow_path_safepoint = false, + bool is_high_interval = false) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), @@ -532,6 +602,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { is_fixed_(is_fixed), is_temp_(is_temp), is_slow_path_safepoint_(is_slow_path_safepoint), + is_high_interval_(is_high_interval), + high_or_low_interval_(nullptr), defined_by_(defined_by) {} ArenaAllocator* const allocator_; @@ -568,12 +640,21 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Whether the interval is for a safepoint that calls on slow path. const bool is_slow_path_safepoint_; + // Whether this interval is a synthesized interval for register pair. + const bool is_high_interval_; + + // If this interval needs a register pair, the high or low equivalent. + // `is_high_interval_` tells whether this holds the low or the high. + LiveInterval* high_or_low_interval_; + // The instruction represented by this interval. HInstruction* const defined_by_; static constexpr int kNoRegister = -1; static constexpr int kNoSpillSlot = -1; + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); + DISALLOW_COPY_AND_ASSIGN(LiveInterval); }; |