diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2014-07-04 09:41:32 +0100 |
---|---|---|
committer | Nicolas Geoffray <ngeoffray@google.com> | 2014-07-14 10:06:11 +0100 |
commit | e50383288a75244255d3ecedcc79ffe9caf774cb (patch) | |
tree | 8858489463a57c7b50f7db4d972abec21302b7a7 /compiler | |
parent | cf90ba7ebe00346651f3b7ce1e5b1f785f7caabd (diff) | |
download | android_art-e50383288a75244255d3ecedcc79ffe9caf774cb.tar.gz android_art-e50383288a75244255d3ecedcc79ffe9caf774cb.tar.bz2 android_art-e50383288a75244255d3ecedcc79ffe9caf774cb.zip |
Support fields in optimizing compiler.
- Required support for temporaries, to be only used by baseline compiler.
- Also fixed a few invalid assumptions around locations and instructions
that don't need materialization. These instructions should not have an Out.
Change-Id: Idc4a30dd95dd18015137300d36bec55fc024cf62
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/mir_field_info.cc | 4 | ||||
-rw-r--r-- | compiler/driver/compiler_driver-inl.h | 7 | ||||
-rw-r--r-- | compiler/driver/compiler_driver.cc | 36 | ||||
-rw-r--r-- | compiler/driver/compiler_driver.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/builder.cc | 114 | ||||
-rw-r--r-- | compiler/optimizing/builder.h | 14 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 23 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 31 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 228 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 231 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 184 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/live_ranges_test.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/liveness_test.cc | 368 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 107 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 9 |
19 files changed, 1115 insertions, 263 deletions
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc index 98866d9c2d..68247b7807 100644 --- a/compiler/dex/mir_field_info.cc +++ b/compiler/dex/mir_field_info.cc @@ -62,9 +62,9 @@ void MirIFieldLoweringInfo::Resolve(CompilerDriver* compiler_driver, compiler_driver->GetResolvedFieldDexFileLocation(resolved_field, &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_field_idx_); bool is_volatile = compiler_driver->IsFieldVolatile(resolved_field); - + it->field_offset_ = resolved_field->GetOffset(); std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField( - dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx, &it->field_offset_); + dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx); it->flags_ = 0u | // Without kFlagIsStatic. (is_volatile ? kFlagIsVolatile : 0u) | (fast_path.first ? kFlagFastGet : 0u) | diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index 99fcc26aa5..e175d37914 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -43,7 +43,7 @@ inline mirror::ClassLoader* CompilerDriver::GetClassLoader(ScopedObjectAccess& s } inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass( - ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, + const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) { DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())); @@ -60,7 +60,7 @@ inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass( } inline mirror::ArtField* CompilerDriver::ResolveField( - ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, + const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t field_idx, bool is_static) { DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); @@ -96,14 +96,13 @@ inline bool CompilerDriver::IsFieldVolatile(mirror::ArtField* field) { inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, - mirror::ArtField* resolved_field, uint16_t field_idx, MemberOffset* field_offset) { + mirror::ArtField* resolved_field, uint16_t field_idx) { DCHECK(!resolved_field->IsStatic()); mirror::Class* fields_class = resolved_field->GetDeclaringClass(); bool fast_get = referrer_class != nullptr && referrer_class->CanAccessResolvedField(fields_class, resolved_field, dex_cache, field_idx); bool fast_put = fast_get && (!resolved_field->IsFinal() || fields_class == referrer_class); - *field_offset = fast_get ? resolved_field->GetOffset() : MemberOffset(0u); return std::make_pair(fast_get, fast_put); } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 4b4d0d0d25..7014c3b3f8 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -990,10 +990,10 @@ void CompilerDriver::ProcessedInvoke(InvokeType invoke_type, int flags) { stats_->ProcessedInvoke(invoke_type, flags); } -bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - bool is_put, MemberOffset* field_offset, - bool* is_volatile) { - ScopedObjectAccess soa(Thread::Current()); +mirror::ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, + const DexCompilationUnit* mUnit, + bool is_put, + const ScopedObjectAccess& soa) { // Try to resolve the field and compiling method's class. mirror::ArtField* resolved_field; mirror::Class* referrer_class; @@ -1011,20 +1011,34 @@ bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompi resolved_field = resolved_field_handle.Get(); dex_cache = dex_cache_handle.Get(); } - bool result = false; + bool can_link = false; if (resolved_field != nullptr && referrer_class != nullptr) { - *is_volatile = IsFieldVolatile(resolved_field); std::pair<bool, bool> fast_path = IsFastInstanceField( - dex_cache, referrer_class, resolved_field, field_idx, field_offset); - result = is_put ? fast_path.second : fast_path.first; + dex_cache, referrer_class, resolved_field, field_idx); + can_link = is_put ? fast_path.second : fast_path.first; } - if (!result) { + ProcessedInstanceField(can_link); + return can_link ? resolved_field : nullptr; +} + +bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, + bool is_put, MemberOffset* field_offset, + bool* is_volatile) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ArtField> resolved_field = + hs.NewHandle(ComputeInstanceFieldInfo(field_idx, mUnit, is_put, soa)); + + if (resolved_field.Get() == nullptr) { // Conservative defaults. *is_volatile = true; *field_offset = MemberOffset(static_cast<size_t>(-1)); + return false; + } else { + *is_volatile = resolved_field->IsVolatile(); + *field_offset = resolved_field->GetOffset(); + return true; } - ProcessedInstanceField(result); - return result; } bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index ae709f8b95..6dae398372 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -222,14 +222,14 @@ class CompilerDriver { // Resolve compiling method's class. Returns nullptr on failure. mirror::Class* ResolveCompilingMethodsClass( - ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, + const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Resolve a field. Returns nullptr on failure, including incompatible class change. // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static. mirror::ArtField* ResolveField( - ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, + const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t field_idx, bool is_static) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -245,7 +245,7 @@ class CompilerDriver { // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset. std::pair<bool, bool> IsFastInstanceField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, - mirror::ArtField* resolved_field, uint16_t field_idx, MemberOffset* field_offset) + mirror::ArtField* resolved_field, uint16_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Can we fast-path an SGET/SPUT access to a static field? If yes, compute the field offset, @@ -298,6 +298,13 @@ class CompilerDriver { MemberOffset* field_offset, bool* is_volatile) LOCKS_EXCLUDED(Locks::mutator_lock_); + mirror::ArtField* ComputeInstanceFieldInfo(uint32_t field_idx, + const DexCompilationUnit* mUnit, + bool is_put, + const ScopedObjectAccess& soa) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Can we fastpath static field access? Computes field's offset, volatility and whether the // field is within the referrer (which can avoid checking class initialization). bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index cc995f72a1..c96792c903 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -15,13 +15,22 @@ * limitations under the License. */ +#include "builder.h" + +#include "class_linker.h" #include "dex_file.h" #include "dex_file-inl.h" #include "dex_instruction.h" #include "dex_instruction-inl.h" -#include "builder.h" +#include "driver/compiler_driver-inl.h" +#include "mirror/art_field.h" +#include "mirror/art_field-inl.h" +#include "mirror/class_loader.h" +#include "mirror/dex_cache.h" #include "nodes.h" #include "primitive.h" +#include "scoped_thread_state_change.h" +#include "thread.h" namespace art { @@ -93,7 +102,7 @@ static bool CanHandleCodeItem(const DexFile::CodeItem& code_item) { } template<typename T> -void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset) { +void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_offset) { HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); T* comparison = new (arena_) T(first, second); @@ -110,7 +119,7 @@ void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset) { } template<typename T> -void HGraphBuilder::If_21t(const Instruction& instruction, int32_t dex_offset) { +void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_offset) { HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); T* comparison = new (arena_) T(value, GetIntConstant(0)); current_block_->AddInstruction(comparison); @@ -335,6 +344,79 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, return true; } +/** + * Helper class to add HTemporary instructions. This class is used when + * converting a DEX instruction to multiple HInstruction, and where those + * instructions do not die at the following instruction, but instead spans + * multiple instructions. + */ +class Temporaries : public ValueObject { + public: + Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) { + graph_->UpdateNumberOfTemporaries(count_); + } + + void Add(HInstruction* instruction) { + // We currently only support vreg size temps. + DCHECK(instruction->GetType() != Primitive::kPrimLong + && instruction->GetType() != Primitive::kPrimDouble); + HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++); + instruction->GetBlock()->AddInstruction(temp); + DCHECK(temp->GetPrevious() == instruction); + } + + private: + HGraph* const graph_; + + // The total number of temporaries that will be used. + const size_t count_; + + // Current index in the temporary stack, updated by `Add`. + size_t index_; +}; + +bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction, + uint32_t dex_offset, + bool is_put) { + uint32_t source_or_dest_reg = instruction.VRegA_22c(); + uint32_t obj_reg = instruction.VRegB_22c(); + uint16_t field_index = instruction.VRegC_22c(); + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ArtField> resolved_field(hs.NewHandle( + compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa))); + + if (resolved_field.Get() == nullptr) { + return false; + } + if (resolved_field->IsVolatile()) { + return false; + } + + HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot); + current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_offset)); + if (is_put) { + Temporaries temps(graph_, 1); + HInstruction* null_check = current_block_->GetLastInstruction(); + // We need one temporary for the null check. + temps.Add(null_check); + HInstruction* value = LoadLocal(source_or_dest_reg, resolved_field->GetTypeAsPrimitiveType()); + current_block_->AddInstruction(new (arena_) HInstanceFieldSet( + null_check, + value, + resolved_field->GetOffset())); + } else { + current_block_->AddInstruction(new (arena_) HInstanceFieldGet( + current_block_->GetLastInstruction(), + resolved_field->GetTypeAsPrimitiveType(), + resolved_field->GetOffset())); + + UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); + } + return true; +} + bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset) { if (current_block_ == nullptr) { return true; // Dead code @@ -581,6 +663,32 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ case Instruction::NOP: break; + case Instruction::IGET: + case Instruction::IGET_WIDE: + case Instruction::IGET_OBJECT: + case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BYTE: + case Instruction::IGET_CHAR: + case Instruction::IGET_SHORT: { + if (!BuildFieldAccess(instruction, dex_offset, false)) { + return false; + } + break; + } + + case Instruction::IPUT: + case Instruction::IPUT_WIDE: + case Instruction::IPUT_OBJECT: + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + case Instruction::IPUT_CHAR: + case Instruction::IPUT_SHORT: { + if (!BuildFieldAccess(instruction, dex_offset, true)) { + return false; + } + break; + } + default: return false; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index ee32ca80ac..f94b8e810a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ #include "dex_file.h" +#include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "primitive.h" #include "utils/allocation.h" @@ -32,7 +33,8 @@ class HGraphBuilder : public ValueObject { public: HGraphBuilder(ArenaAllocator* arena, DexCompilationUnit* dex_compilation_unit = nullptr, - const DexFile* dex_file = nullptr) + const DexFile* dex_file = nullptr, + CompilerDriver* driver = nullptr) : arena_(arena), branch_targets_(arena, 0), locals_(arena, 0), @@ -43,7 +45,8 @@ class HGraphBuilder : public ValueObject { constant0_(nullptr), constant1_(nullptr), dex_file_(dex_file), - dex_compilation_unit_(dex_compilation_unit) { } + dex_compilation_unit_(dex_compilation_unit), + compiler_driver_(driver) {} HGraph* BuildGraph(const DexFile::CodeItem& code); @@ -84,11 +87,13 @@ class HGraphBuilder : public ValueObject { template<typename T> void Binop_22s(const Instruction& instruction, bool reverse); - template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset); - template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset); + template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_offset); + template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_offset); void BuildReturn(const Instruction& instruction, Primitive::Type type); + bool BuildFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_get); + // Builds an invocation node and returns whether the instruction is supported. bool BuildInvoke(const Instruction& instruction, uint32_t dex_offset, @@ -117,6 +122,7 @@ class HGraphBuilder : public ValueObject { const DexFile* const dex_file_; DexCompilationUnit* const dex_compilation_unit_; + CompilerDriver* const compiler_driver_; DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index b8332ad2a3..b0e6a75b3d 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -39,6 +39,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator) { DCHECK_EQ(frame_size_, kUninitializedFrameSize); ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs() + + GetGraph()->GetNumberOfTemporaries() + 1 /* filler */); GenerateFrameEntry(); @@ -54,6 +55,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator) { current->Accept(instruction_visitor); } } + GenerateSlowPaths(); size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); @@ -79,6 +81,7 @@ void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { current->Accept(instruction_visitor); } } + GenerateSlowPaths(); size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); @@ -86,6 +89,12 @@ void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { GetAssembler()->FinalizeInstructions(code); } +void CodeGenerator::GenerateSlowPaths() { + for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + slow_paths_.Get(i)->EmitNativeCode(this); + } +} + size_t CodeGenerator::AllocateFreeRegisterInternal( bool* blocked_registers, size_t number_of_registers) const { for (size_t regno = 0; regno < number_of_registers; regno++) { @@ -94,7 +103,6 @@ size_t CodeGenerator::AllocateFreeRegisterInternal( return regno; } } - LOG(FATAL) << "Unreachable"; return -1; } @@ -162,13 +170,6 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { locations->SetTempAt(i, loc); } } - - // Make all registers available for the return value. - for (size_t i = 0, e = GetNumberOfRegisters(); i < e; ++i) { - blocked_registers_[i] = false; - } - SetupBlockedRegisters(blocked_registers_); - Location result_location = locations->Out(); if (result_location.IsUnallocated()) { switch (result_location.GetPolicy()) { @@ -187,6 +188,12 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { void CodeGenerator::InitLocations(HInstruction* instruction) { if (instruction->GetLocations() == nullptr) { + if (instruction->IsTemporary()) { + HInstruction* previous = instruction->GetPrevious(); + Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); + Move(previous, temp_location, instruction); + previous->GetLocations()->SetOut(temp_location); + } return; } AllocateRegistersLocally(instruction); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ae2f03080e..abfb790d8f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,12 +30,13 @@ namespace art { static size_t constexpr kVRegSize = 4; static size_t constexpr kUninitializedFrameSize = 0; +class CodeGenerator; class DexCompilationUnit; class CodeAllocator { public: - CodeAllocator() { } - virtual ~CodeAllocator() { } + CodeAllocator() {} + virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; @@ -48,6 +49,23 @@ struct PcInfo { uintptr_t native_pc; }; +class SlowPathCode : public ArenaObject { + public: + SlowPathCode() : entry_label_(), exit_label_() {} + virtual ~SlowPathCode() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + virtual void EmitNativeCode(CodeGenerator* codegen) = 0; + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCode); +}; + class CodeGenerator : public ArenaObject { public: // Compiles the graph to executable instructions. Returns whether the compilation @@ -99,6 +117,12 @@ class CodeGenerator : public ArenaObject { pc_infos_.Add(pc_info); } + void AddSlowPath(SlowPathCode* slow_path) { + slow_paths_.Add(slow_path); + } + + void GenerateSlowPaths(); + void BuildMappingTable(std::vector<uint8_t>* vector) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( @@ -110,6 +134,7 @@ class CodeGenerator : public ArenaObject { graph_(graph), block_labels_(graph->GetArena(), 0), pc_infos_(graph->GetArena(), 32), + slow_paths_(graph->GetArena(), 8), blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {} ~CodeGenerator() {} @@ -125,6 +150,7 @@ class CodeGenerator : public ArenaObject { size_t AllocateFreeRegisterInternal(bool* blocked_registers, size_t number_of_registers) const; virtual Location GetStackLocation(HLoadLocal* load) const = 0; + virtual Location GetTemporaryLocation(HTemporary* temp) const = 0; // Frame size required for this method. uint32_t frame_size_; @@ -138,6 +164,7 @@ class CodeGenerator : public ArenaObject { // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; GrowableArray<PcInfo> pc_infos_; + GrowableArray<SlowPathCode*> slow_paths_; // Temporary data structure used when doing register allocation. bool* const blocked_registers_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d87c14b4db..e70240783a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -24,8 +24,6 @@ #include "utils/arm/assembler_arm.h" #include "utils/arm/managed_register_arm.h" -#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> - namespace art { arm::ArmManagedRegister Location::AsArm() const { @@ -34,6 +32,27 @@ arm::ArmManagedRegister Location::AsArm() const { namespace arm { +#define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathARM : public SlowPathCode { + public: + explicit NullCheckSlowPathARM(uint32_t dex_pc) : dex_pc_(dex_pc) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowNullPointer).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + codegen->RecordPcInfo(dex_pc_); + } + + private: + const uint32_t dex_pc_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM); +}; + +#undef __ +#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> inline Condition ARMCondition(IfCondition cond) { switch (cond) { @@ -63,7 +82,7 @@ inline Condition ARMOppositeCondition(IfCondition cond) { return EQ; // Unreachable. } -static constexpr int kNumberOfPushedRegistersAtEntry = 1; +static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -88,12 +107,23 @@ ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type, bool* blocked_registers) const { switch (type) { case Primitive::kPrimLong: { - size_t reg = AllocateFreeRegisterInternal( - GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs); + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs); ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); blocked_registers[pair.AsRegisterPairLow()] = true; blocked_registers[pair.AsRegisterPairHigh()] = true; + // Block all other register pairs that share a register with `pair`. + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + ArmManagedRegister current = + ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() + || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() + || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() + || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { + blocked_register_pairs[i] = true; + } + } return pair; } @@ -103,7 +133,16 @@ ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type, case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: { - size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters); + int reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters); + // Block all register pairs that contain `reg`. + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + ArmManagedRegister current = + ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { + blocked_register_pairs[i] = true; + } + } return ArmManagedRegister::FromCoreRegister(static_cast<Register>(reg)); } @@ -140,13 +179,12 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const { blocked_registers[IP] = true; // TODO: We currently don't use Quick's callee saved registers. + // We always save and restore R6 and R7 to make sure we can use three + // register pairs for long operations. blocked_registers[R5] = true; - blocked_registers[R6] = true; - blocked_registers[R7] = true; blocked_registers[R8] = true; blocked_registers[R10] = true; blocked_registers[R11] = true; - blocked_register_pairs[R6_R7] = true; } size_t CodeGeneratorARM::GetNumberOfRegisters() const { @@ -171,8 +209,8 @@ void CodeGeneratorARM::ComputeFrameSize(size_t number_of_spill_slots) { } void CodeGeneratorARM::GenerateFrameEntry() { - core_spill_mask_ |= (1 << LR); - __ PushList((1 << LR)); + core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7); + __ PushList(1 << LR | 1 << R6 | 1 << R7); // The return PC has already been pushed on the stack. __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); @@ -181,13 +219,23 @@ void CodeGeneratorARM::GenerateFrameEntry() { void CodeGeneratorARM::GenerateFrameExit() { __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); - __ PopList((1 << PC)); + __ PopList(1 << PC | 1 << R6 | 1 << R7); } void CodeGeneratorARM::Bind(Label* label) { __ Bind(label); } +Location CodeGeneratorARM::GetTemporaryLocation(HTemporary* temp) const { + uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); + // Use the temporary region (right below the dex registers). + int32_t slot = GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kArmWordSize) + - kVRegSize // filler + - (number_of_vregs * kVRegSize) + - ((1 + temp->GetIndex()) * kVRegSize); + return Location::StackSlot(slot); +} + int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const { uint16_t reg_number = local->GetRegNumber(); uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); @@ -399,9 +447,7 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr LOG(FATAL) << "Unimplemented type " << instruction->GetType(); } } else { - // This can currently only happen when the instruction that requests the move - // is the next to be compiled. - DCHECK_EQ(instruction->GetNext(), move_for); + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -448,7 +494,12 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { void LocationsBuilderARM::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::Any()); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } if_instr->SetLocations(locations); } @@ -482,7 +533,9 @@ void LocationsBuilderARM::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister()); + } comp->SetLocations(locations); } @@ -960,6 +1013,147 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Primitive::Type field_type = instruction->InputAt(1)->GetType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreByte, value, obj, offset); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreHalfword, value, obj, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreWord, value, obj, offset); + break; + } + + case Primitive::kPrimLong: { + ArmManagedRegister value = locations->InAt(1).AsArm(); + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow(), obj, offset); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + break; + } + + case Primitive::kPrimByte: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + break; + } + + case Primitive::kPrimShort: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + break; + } + + case Primitive::kPrimChar: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadWord, out, obj, offset); + break; + } + + case Primitive::kPrimLong: { + // TODO: support volatile. + ArmManagedRegister out = locations->Out().AsArm(); + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow(), obj, offset); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction->GetDexPc()); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmp(obj.AsArm().AsCoreRegister(), ShifterOperand(0)); + } + __ b(slow_path->GetEntryLabel(), EQ); +} + +void LocationsBuilderARM::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. +} + void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c46c1b131c..b7322b271e 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -155,6 +155,7 @@ class CodeGeneratorARM : public CodeGenerator { int32_t GetStackSlot(HLocal* local) const; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; + virtual Location GetTemporaryLocation(HTemporary* temp) const OVERRIDE; virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { return kNumberOfCoreRegisters; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 572d494719..52cb39dc7f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -24,8 +24,6 @@ #include "mirror/art_method.h" #include "thread.h" -#define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> - namespace art { x86::X86ManagedRegister Location::AsX86() const { @@ -34,6 +32,26 @@ x86::X86ManagedRegister Location::AsX86() const { namespace x86 { +#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathX86 : public SlowPathCode { + public: + explicit NullCheckSlowPathX86(uint32_t dex_pc) : dex_pc_(dex_pc) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); + codegen->RecordPcInfo(dex_pc_); + } + + private: + const uint32_t dex_pc_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); +}; + +#undef __ +#define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> + inline Condition X86Condition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; @@ -107,8 +125,18 @@ ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type, case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: { - size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters); - return X86ManagedRegister::FromCpuRegister(static_cast<Register>(reg)); + Register reg = static_cast<Register>( + AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters)); + // Block all register pairs that contain `reg`. + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + X86ManagedRegister current = + X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { + blocked_register_pairs[i] = true; + } + } + return X86ManagedRegister::FromCpuRegister(reg); } case Primitive::kPrimFloat: @@ -176,6 +204,16 @@ void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) { __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } +Location CodeGeneratorX86::GetTemporaryLocation(HTemporary* temp) const { + uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); + // Use the temporary region (right below the dex registers). + int32_t slot = GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86WordSize) + - kVRegSize // filler + - (number_of_vregs * kVRegSize) + - ((1 + temp->GetIndex()) * kVRegSize); + return Location::StackSlot(slot); +} + int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const { uint16_t reg_number = local->GetRegNumber(); uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); @@ -392,9 +430,7 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr LOG(FATAL) << "Unimplemented local type " << instruction->GetType(); } } else { - // This can currently only happen when the instruction that requests the move - // is the next to be compiled. - DCHECK_EQ(instruction->GetNext(), move_for); + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -441,7 +477,12 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { void LocationsBuilderX86::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::Any()); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } if_instr->SetLocations(locations); } @@ -520,7 +561,9 @@ void LocationsBuilderX86::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::SameAsFirstInput()); + } comp->SetLocations(locations); } @@ -915,7 +958,7 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister()); compare->SetLocations(locations); } @@ -928,11 +971,21 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Label less, greater, done; Register output = locations->Out().AsX86().AsCpuRegister(); X86ManagedRegister left = locations->InAt(0).AsX86(); - X86ManagedRegister right = locations->InAt(1).AsX86(); - __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh()); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ cmpl(left.AsRegisterPairHigh(), right.AsX86().AsRegisterPairHigh()); + } else { + DCHECK(right.IsDoubleStackSlot()); + __ cmpl(left.AsRegisterPairHigh(), Address(ESP, right.GetHighStackIndex(kX86WordSize))); + } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. - __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow()); + if (right.IsRegister()) { + __ cmpl(left.AsRegisterPairLow(), right.AsX86().AsRegisterPairLow()); + } else { + DCHECK(right.IsDoubleStackSlot()); + __ cmpl(left.AsRegisterPairLow(), Address(ESP, right.GetStackIndex())); + } __ movl(output, Immediate(0)); __ j(kEqual, &done); __ j(kBelow, &less); // Unsigned compare. @@ -965,6 +1018,158 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + Primitive::Type field_type = instruction->InputAt(1)->GetType(); + if (field_type == Primitive::kPrimBoolean || field_type == Primitive::kPrimByte) { + // Ensure the value is in a byte register. + locations->SetInAt(1, X86CpuLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Primitive::Type field_type = instruction->InputAt(1)->GetType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + ByteRegister value = locations->InAt(1).AsX86().AsByteRegister(); + __ movb(Address(obj, offset), value); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + Register value = locations->InAt(1).AsX86().AsCpuRegister(); + __ movw(Address(obj, offset), value); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register value = locations->InAt(1).AsX86().AsCpuRegister(); + __ movl(Address(obj, offset), value); + break; + } + + case Primitive::kPrimLong: { + X86ManagedRegister value = locations->InAt(1).AsX86(); + __ movl(Address(obj, offset), value.AsRegisterPairLow()); + __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh()); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movzxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimByte: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movsxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimShort: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movsxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimChar: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movzxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movl(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimLong: { + // TODO: support volatile. + X86ManagedRegister out = locations->Out().AsX86(); + __ movl(out.AsRegisterPairLow(), Address(obj, offset)); + __ movl(out.AsRegisterPairHigh(), Address(obj, kX86WordSize + offset)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::Any()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction->GetDexPc()); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmpl(obj.AsX86().AsCpuRegister(), Immediate(0)); + } else { + DCHECK(locations->InAt(0).IsStackSlot()); + __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, slow_path->GetEntryLabel()); +} + +void LocationsBuilderX86::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. +} + void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8a8216a56d..2a4595447d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -157,6 +157,7 @@ class CodeGeneratorX86 : public CodeGenerator { int32_t GetStackSlot(HLocal* local) const; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; + virtual Location GetTemporaryLocation(HTemporary* temp) const OVERRIDE; virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { return kNumberOfCpuRegisters; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dc1d6164b1..93d74ee1a2 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -25,8 +25,6 @@ #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/managed_register_x86_64.h" -#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> - namespace art { x86_64::X86_64ManagedRegister Location::AsX86_64() const { @@ -35,6 +33,26 @@ x86_64::X86_64ManagedRegister Location::AsX86_64() const { namespace x86_64 { +#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathX86_64 : public SlowPathCode { + public: + explicit NullCheckSlowPathX86_64(uint32_t dex_pc) : dex_pc_(dex_pc) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true)); + codegen->RecordPcInfo(dex_pc_); + } + + private: + const uint32_t dex_pc_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); +}; + +#undef __ +#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> + inline Condition X86_64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; @@ -152,6 +170,16 @@ void InstructionCodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } +Location CodeGeneratorX86_64::GetTemporaryLocation(HTemporary* temp) const { + uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); + // Use the temporary region (right below the dex registers). + int32_t slot = GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize) + - kVRegSize // filler + - (number_of_vregs * kVRegSize) + - ((1 + temp->GetIndex()) * kVRegSize); + return Location::StackSlot(slot); +} + int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const { uint16_t reg_number = local->GetRegNumber(); uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); @@ -163,7 +191,7 @@ int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const { } else { // Local is a temporary in this method. It is stored in this method's frame. return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize) - - kVRegSize + - kVRegSize // filler - (number_of_vregs * kVRegSize) + (reg_number * kVRegSize); } @@ -265,9 +293,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, LOG(FATAL) << "Unimplemented local type " << instruction->GetType(); } } else { - // This can currently only happen when the instruction that requests the move - // is the next to be compiled. - DCHECK_EQ(instruction->GetNext(), move_for); + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -311,7 +337,12 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::Any()); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } if_instr->SetLocations(locations); } @@ -385,7 +416,9 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister()); + } comp->SetLocations(locations); } @@ -827,6 +860,141 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister value = locations->InAt(1).AsX86_64().AsCpuRegister(); + size_t offset = instruction->GetFieldOffset().SizeValue(); + Primitive::Type field_type = instruction->InputAt(1)->GetType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + __ movb(Address(obj, offset), value); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + __ movw(Address(obj, offset), value); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + __ movl(Address(obj, offset), value); + break; + } + + case Primitive::kPrimLong: { + __ movq(Address(obj, offset), value); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + size_t offset = instruction->GetFieldOffset().SizeValue(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + __ movzxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimByte: { + __ movsxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimChar: { + __ movzxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + __ movl(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimLong: { + __ movq(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::Any()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction->GetDexPc()); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmpl(obj.AsX86_64().AsCpuRegister(), Immediate(0)); + } else { + DCHECK(locations->InAt(0).IsStackSlot()); + __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, slow_path->GetEntryLabel()); +} + +void LocationsBuilderX86_64::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. +} + void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) { LOG(FATAL) << "Unimplemented"; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d347a4f121..97a0b2e579 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -153,6 +153,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { int32_t GetStackSlot(HLocal* local) const; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; + virtual Location GetTemporaryLocation(HTemporary* temp) const OVERRIDE; virtual size_t GetNumberOfRegisters() const OVERRIDE { return kNumberOfRegIds; diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 987c5f27b7..a6e5ca9e46 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -182,9 +182,9 @@ TEST(LiveRangesTest, CFG3) { ASSERT_TRUE(range->GetNext() == nullptr); // Test for the phi. - interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval(); + interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval(); range = interval->GetFirstRange(); - ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(3)->GetLifetimePosition()); + ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(2)->GetLifetimePosition()); ASSERT_EQ(22u, range->GetStart()); ASSERT_EQ(25u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 2d0bc39cd5..1a4d74536b 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -152,32 +152,32 @@ TEST(LivenessTest, CFG4) { // return a; // // Bitsets are made of: - // (constant0, constant4, constant5, phi, equal test) + // (constant0, constant4, constant5, phi) const char* expected = "Block 0\n" // entry block - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" // block with if - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00010)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // else block - " live in: (01000)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0100)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 3\n" // then block - " live in: (00100)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0010)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 4\n" // return block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00001)\n" + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0001)\n" "Block 5\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -197,31 +197,34 @@ TEST(LivenessTest, CFG5) { // a = 4; // } // return a; + // + // Bitsets are made of: + // (constant0, constant4, phi) const char* expected = "Block 0\n" // entry block - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" // block with if - " live in: (1100)\n" - " live out: (1100)\n" - " kill: (0010)\n" + " live in: (110)\n" + " live out: (110)\n" + " kill: (000)\n" "Block 2\n" // else block - " live in: (0100)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 3\n" // return block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0001)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (001)\n" "Block 4\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 5\n" // block to avoid critical edge. Predecessor is 1, successor is 3. - " live in: (1000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (100)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -239,31 +242,33 @@ TEST(LivenessTest, Loop1) { // a = 4; // } // return; + // Bitsets are made of: + // (constant0, constant4, phi) const char* expected = "Block 0\n" // entry block - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" // pre header - " live in: (1100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 2\n" // loop header - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0011)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (001)\n" "Block 3\n" // back edge - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 4\n" // return block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 5\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( @@ -283,31 +288,33 @@ TEST(LivenessTest, Loop3) { // a = 4; // } // return 5; + // Bitsets are made of: + // (constant0, constant4, constant5, phi) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00011)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0001)\n" "Block 3\n" // back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 4\n" // return block - " live in: (00100)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0010)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 5\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -330,36 +337,36 @@ TEST(LivenessTest, Loop4) { // } // return a; // Bitsets are made of: - // (constant0, constant4, phi, equal test) + // (constant0, constant4, phi) const char* expected = "Block 0\n" - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" - " live in: (1100)\n" - " live out: (1100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (110)\n" + " kill: (000)\n" "Block 2\n" // loop header - " live in: (0100)\n" - " live out: (0110)\n" - " kill: (0011)\n" + " live in: (010)\n" + " live out: (011)\n" + " kill: (001)\n" "Block 3\n" // back edge - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 4\n" // pre loop header - " live in: (1100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 5\n" // return block - " live in: (0010)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (001)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 6\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -377,45 +384,44 @@ TEST(LivenessTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: - // (constant0, constant4, constant5, equal in block 1, phi in block 8, phi in block 4, - // equal in block 4) + // (constant0, constant4, constant5, phi in block 8, phi in block 4) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0001000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" - " live in: (0100000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (01000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 3\n" - " live in: (0010000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00100)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 4\n" // loop header - " live in: (0000000)\n" - " live out: (0000010)\n" - " kill: (0000011)\n" + " live in: (00000)\n" + " live out: (00001)\n" + " kill: (00001)\n" "Block 5\n" // back edge - " live in: (0000010)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00001)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0000010)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00001)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized pre header - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000100)\n"; + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00010)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -432,45 +438,44 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3, - // phi in block 8) + // (constant0, constant4, constant5, phi in block 2, phi in block 8) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" // loop header - " live in: (0110000)\n" - " live out: (0111000)\n" - " kill: (0001100)\n" + " live in: (01100)\n" + " live out: (01110)\n" + " kill: (00010)\n" "Block 3\n" - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000010)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 4\n" // original back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 5\n" // original back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0001000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00010)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000001)\n"; + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00001)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -488,45 +493,44 @@ TEST(LivenessTest, Loop6) { TEST(LivenessTest, Loop7) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3, - // phi in block 6) + // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" // loop header - " live in: (0110000)\n" - " live out: (0111000)\n" - " kill: (0001100)\n" + " live in: (01100)\n" + " live out: (01110)\n" + " kill: (00010)\n" "Block 3\n" - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000010)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 4\n" // loop exit - " live in: (0010000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00100)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 5\n" // back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000001)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00001)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized block to avoid critical edge. - " live in: (0001000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n"; + " live in: (00010)\n" + " live out: (00000)\n" + " kill: (00000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 92920845c3..4036a8d4ba 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_NODES_H_ #include "locations.h" +#include "offsets.h" #include "utils/allocation.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" @@ -75,6 +76,7 @@ class HGraph : public ArenaObject { maximum_number_of_out_vregs_(0), number_of_vregs_(0), number_of_in_vregs_(0), + number_of_temporaries_(0), current_instruction_id_(0) {} ArenaAllocator* GetArena() const { return arena_; } @@ -112,6 +114,14 @@ class HGraph : public ArenaObject { maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_); } + void UpdateNumberOfTemporaries(size_t count) { + number_of_temporaries_ = std::max(count, number_of_temporaries_); + } + + size_t GetNumberOfTemporaries() const { + return number_of_temporaries_; + } + void SetNumberOfVRegs(uint16_t number_of_vregs) { number_of_vregs_ = number_of_vregs; } @@ -163,6 +173,9 @@ class HGraph : public ArenaObject { // The number of virtual registers used by parameters of this method. uint16_t number_of_in_vregs_; + // The number of temporaries that will be needed for the baseline compiler. + size_t number_of_temporaries_; + // The current id to assign to a newly added instruction. See HInstruction.id_. int current_instruction_id_; @@ -415,6 +428,10 @@ class HBasicBlock : public ArenaObject { M(StoreLocal) \ M(Sub) \ M(Compare) \ + M(InstanceFieldGet) \ + M(InstanceFieldSet) \ + M(NullCheck) \ + M(Temporary) \ #define FORWARD_DECLARATION(type) class H##type; @@ -1254,6 +1271,96 @@ class HPhi : public HInstruction { DISALLOW_COPY_AND_ASSIGN(HPhi); }; +class HNullCheck : public HExpression<1> { + public: + HNullCheck(HInstruction* value, uint32_t dex_pc) + : HExpression(value->GetType()), dex_pc_(dex_pc) { + SetRawInputAt(0, value); + } + + virtual bool NeedsEnvironment() const { return true; } + + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(NullCheck); + + private: + const uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HNullCheck); +}; + +class FieldInfo : public ValueObject { + public: + explicit FieldInfo(MemberOffset field_offset) + : field_offset_(field_offset) {} + + MemberOffset GetFieldOffset() const { return field_offset_; } + + private: + const MemberOffset field_offset_; +}; + +class HInstanceFieldGet : public HExpression<1> { + public: + HInstanceFieldGet(HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset) + : HExpression(field_type), field_info_(field_offset) { + SetRawInputAt(0, value); + } + + MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } + + DECLARE_INSTRUCTION(InstanceFieldGet); + + private: + const FieldInfo field_info_; + + DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet); +}; + +class HInstanceFieldSet : public HTemplateInstruction<2> { + public: + HInstanceFieldSet(HInstruction* object, + HInstruction* value, + MemberOffset field_offset) + : field_info_(field_offset) { + SetRawInputAt(0, object); + SetRawInputAt(1, value); + } + + MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } + + DECLARE_INSTRUCTION(InstanceFieldSet); + + private: + const FieldInfo field_info_; + + DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet); +}; + +/** + * Some DEX instructions are folded into multiple HInstructions that need + * to stay live until the last HInstruction. This class + * is used as a marker for the baseline compiler to ensure its preceding + * HInstruction stays live. `index` is the temporary number that is used + * for knowing the stack offset where to store the instruction. + */ +class HTemporary : public HTemplateInstruction<0> { + public: + explicit HTemporary(size_t index) : index_(index) {} + + size_t GetIndex() const { return index_; } + + DECLARE_INSTRUCTION(Temporary); + + private: + const size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HTemporary); +}; + class MoveOperands : public ArenaObject { public: MoveOperands(Location source, Location destination) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b4d7fff178..b14753c580 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -101,7 +101,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite ArenaPool pool; ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file); + HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file, GetCompilerDriver()); HGraph* graph = builder.BuildGraph(*code_item); if (graph == nullptr) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 50ea00f4cd..fbdc0b9593 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -204,9 +204,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // All inputs of an instruction must be live. for (size_t i = 0, e = current->InputCount(); i < e; ++i) { HInstruction* input = current->InputAt(i); - DCHECK(input->HasSsaIndex()); - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, false); + // Some instructions 'inline' their inputs, that is they do not need + // to be materialized. + if (input->HasSsaIndex()) { + live_in->SetBit(input->GetSsaIndex()); + input->GetLiveInterval()->AddUse(current, i, false); + } } if (current->HasEnvironment()) { |