diff options
Diffstat (limited to 'compiler')
45 files changed, 1327 insertions, 401 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index f41357604..25b23a2ee 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -90,7 +90,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_arm.cc \ optimizing/code_generator_x86.cc \ optimizing/code_generator_x86_64.cc \ - optimizing/constant_propagation.cc \ + optimizing/constant_folding.cc \ optimizing/dead_code_elimination.cc \ optimizing/graph_checker.cc \ optimizing/graph_visualizer.cc \ @@ -98,6 +98,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/instruction_simplifier.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ + optimizing/optimization.cc \ optimizing/optimizing_compiler.cc \ optimizing/parallel_move_resolver.cc \ optimizing/prepare_for_register_allocation.cc \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e1b5984c7..452868870 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1955,6 +1955,19 @@ void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_fil context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_); } +// Does the runtime for the InstructionSet provide an implementation returned by +// GetQuickGenericJniStub allowing down calls that aren't compiled using a JNI compiler? +static bool InstructionSetHasGenericJniStub(InstructionSet isa) { + switch (isa) { + case kArm: + case kArm64: + case kThumb2: + case kX86: + case kX86_64: return true; + default: return false; + } +} + void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, @@ -1966,13 +1979,14 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t if ((access_flags & kAccNative) != 0) { // Are we interpreting only and have support for generic JNI down calls? if (!compiler_options_->IsCompilationEnabled() && - (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) { + InstructionSetHasGenericJniStub(instruction_set_)) { // Leaving this empty will trigger the generic JNI version } else { compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file); CHECK(compiled_method != nullptr); } } else if ((access_flags & kAccAbstract) != 0) { + // Abstract methods don't have code. } else { MethodReference method_ref(&dex_file, method_idx); bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index f6795ea28..3c3aa0250 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -176,12 +176,8 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // 4. Write out the end of the quick frames. if (is_64_bit_target) { __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>()); - __ StoreImmediateToThread64(Thread::TopOfManagedStackPcOffset<8>(), 0, - mr_conv->InterproceduralScratchRegister()); } else { __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>()); - __ StoreImmediateToThread32(Thread::TopOfManagedStackPcOffset<4>(), 0, - mr_conv->InterproceduralScratchRegister()); } // 5. Move frame down to allow space for out going args. diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2648d4d67..2f1a092ea 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -267,6 +267,13 @@ HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { } template<typename T> +void HGraphBuilder::Unop_12x(const Instruction& instruction, Primitive::Type type) { + HInstruction* first = LoadLocal(instruction.VRegB(), type); + current_block_->AddInstruction(new (arena_) T(type, first)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); +} + +template<typename T> void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) { HInstruction* first = LoadLocal(instruction.VRegB(), type); HInstruction* second = LoadLocal(instruction.VRegC(), type); @@ -678,6 +685,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::NEG_INT: { + Unop_12x<HNeg>(instruction, Primitive::kPrimInt); + break; + } + case Instruction::ADD_INT: { Binop_23x<HAdd>(instruction, Primitive::kPrimInt); break; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index e68cdb0b1..90e50ad95 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -95,6 +95,9 @@ class HGraphBuilder : public ValueObject { bool InitializeParameters(uint16_t number_of_parameters); template<typename T> + void Unop_12x(const Instruction& instruction, Primitive::Type type); + + template<typename T> void Binop_23x(const Instruction& instruction, Primitive::Type type); template<typename T> diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 408e13e36..d5cd490d1 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -473,8 +473,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { case Location::kRegister : { int id = location.reg(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); - if (current->GetType() == Primitive::kPrimDouble - || current->GetType() == Primitive::kPrimLong) { + if (current->GetType() == Primitive::kPrimLong) { stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); ++i; DCHECK_LT(i, environment_size); @@ -482,52 +481,55 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kFpuRegister : { + int id = location.reg(); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + if (current->GetType() == Primitive::kPrimDouble) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + ++i; + DCHECK_LT(i, environment_size); + } + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } } -size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) { - return first_register_slot_in_slow_path_ + index * GetWordSize(); -} - void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - uint32_t count = 0; + size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { if (register_set->ContainsCoreRegister(i)) { - size_t stack_offset = GetStackOffsetOfSavedRegister(count); - ++count; - SaveCoreRegister(Location::StackSlot(stack_offset), i); // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); } + stack_offset += SaveCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { - LOG(FATAL) << "Unimplemented"; + stack_offset += SaveFloatingPointRegister(stack_offset, i); } } } void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - uint32_t count = 0; + size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { if (register_set->ContainsCoreRegister(i)) { - size_t stack_offset = GetStackOffsetOfSavedRegister(count); - ++count; - RestoreCoreRegister(Location::StackSlot(stack_offset), i); + stack_offset += RestoreCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { - LOG(FATAL) << "Unimplemented"; + stack_offset += RestoreFloatingPointRegister(stack_offset, i); } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 7aaf99108..220d74556 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -110,8 +110,18 @@ class CodeGenerator : public ArenaObject { virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0; + // Saves the register in the stack. Returns the size taken on stack. + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; + // Restores the register from the stack. Returns the size taken on stack. + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + LOG(FATAL) << "Unimplemented"; + return 0u; + } + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + LOG(FATAL) << "Unimplemented"; + return 0u; + } void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); @@ -145,6 +155,7 @@ class CodeGenerator : public ArenaObject { void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } + bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } protected: CodeGenerator(HGraph* graph, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index a2cf670b0..f07cb30a6 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -210,12 +210,14 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << ArmManagedRegister::FromDRegister(DRegister(reg)); } -void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex()); +size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_index); + return kArmWordSize; } -void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex()); +size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_index); + return kArmWordSize; } CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) @@ -656,7 +658,7 @@ void LocationsBuilderARM::VisitIf(HIf* if_instr) { new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); HInstruction* cond = if_instr->InputAt(0); if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); } } @@ -715,10 +717,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { void LocationsBuilderARM::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1))); if (comp->NeedsMaterialization()) { - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -859,6 +861,26 @@ void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) { // Will be generated at use site. } +void LocationsBuilderARM::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderARM::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -1016,16 +1038,57 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } +void LocationsBuilderARM::VisitNeg(HNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + DCHECK(in.IsRegister()); + __ rsb(out.As<Register>(), in.As<Register>(), ShifterOperand(0)); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + void LocationsBuilderARM::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - bool dies_at_entry = add->GetResultType() != Primitive::kPrimLong; - locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry); - locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)), dies_at_entry); - locations->SetOut(Location::RequiresRegister()); + bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), output_overlaps); break; } @@ -1088,10 +1151,10 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { switch (sub->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - bool dies_at_entry = sub->GetResultType() != Primitive::kPrimLong; - locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry); - locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)), dies_at_entry); - locations->SetOut(Location::RequiresRegister()); + bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), output_overlaps); break; } @@ -1150,9 +1213,9 @@ void LocationsBuilderARM::VisitMul(HMul* mul) { switch (mul->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1261,8 +1324,8 @@ void InstructionCodeGeneratorARM::VisitParameterValue(HParameterValue* instructi void LocationsBuilderARM::VisitNot(HNot* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { @@ -1274,9 +1337,9 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { void LocationsBuilderARM::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { @@ -1332,9 +1395,8 @@ void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); bool is_object_type = instruction->GetFieldType() == Primitive::kPrimNot; - bool dies_at_entry = !is_object_type; - locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry); - locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); // Temporary registers for the write barrier. if (is_object_type) { locations->AddTemp(Location::RequiresRegister()); @@ -1394,8 +1456,8 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -1481,10 +1543,9 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { @@ -1594,10 +1655,9 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry); - locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); } } @@ -1684,8 +1744,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 57b289c80..1fe8a7eac 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -142,8 +142,8 @@ class CodeGeneratorARM : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kArmWordSize; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 041acdf91..2550518db 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -182,12 +182,14 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); } -void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id)); +size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id)); + return kX86WordSize; } -void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex())); +size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index)); + return kX86WordSize; } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) @@ -588,7 +590,7 @@ void LocationsBuilderX86::VisitIf(HIf* if_instr) { new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); HInstruction* cond = if_instr->InputAt(0); if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::Any()); } } @@ -699,8 +701,8 @@ void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) { void LocationsBuilderX86::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); if (comp->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister()); } @@ -795,6 +797,26 @@ void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) { // Will be generated at use site. } +void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -957,6 +979,47 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } +void LocationsBuilderX86::VisitNeg(HNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + DCHECK(in.IsRegister()); + __ negl(out.As<Register>()); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + void LocationsBuilderX86::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); @@ -1279,9 +1342,9 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { @@ -1350,12 +1413,11 @@ void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) || (field_type == Primitive::kPrimByte); // The register allocator does not support multiple // inputs that die at entry with one in a specific register. - bool dies_at_entry = !is_object_type && !is_byte_type; if (is_byte_type) { // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX), dies_at_entry); + locations->SetInAt(1, Location::RegisterLocation(EAX)); } else { - locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry); + locations->SetInAt(1, Location::RequiresRegister()); } // Temporary registers for the write barrier. if (is_object_type) { @@ -1431,8 +1493,8 @@ void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -1521,10 +1583,9 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -1637,16 +1698,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // We need the inputs to be different than the output in case of long operation. // In case of a byte operation, the register allocator does not support multiple // inputs that die at entry with one in a specific register. - bool dies_at_entry = value_type != Primitive::kPrimLong && !is_byte_type; - locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), dies_at_entry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_byte_type) { // Ensure the value is in a byte register. - locations->SetInAt(2, Location::ByteRegisterOrConstant( - EAX, instruction->InputAt(2)), dies_at_entry); + locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), dies_at_entry); + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } } } @@ -1776,8 +1834,8 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); instruction->SetLocations(locations); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index db8b9abd9..fff91d179 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -144,8 +144,8 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86WordSize; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5fa930512..6174ac6be 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -191,12 +191,24 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); } -void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id)); +size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); + return kX86_64WordSize; } -void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex())); +size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + return kX86_64WordSize; +} + +size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + return kX86_64WordSize; +} + +size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + return kX86_64WordSize; } CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) @@ -489,7 +501,7 @@ void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); HInstruction* cond = if_instr->InputAt(0); if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::Any()); } } @@ -598,8 +610,8 @@ void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) { void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); if (comp->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister()); } @@ -676,9 +688,9 @@ void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { @@ -727,6 +739,26 @@ void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) // Will be generated at use site. } +void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -933,6 +965,47 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } +void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + DCHECK(in.IsRegister()); + __ negl(out.As<CpuRegister>()); + break; + + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType(); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); @@ -954,7 +1027,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -991,21 +1064,12 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - if (second.IsFpuRegister()) { - __ addss(first.As<XmmRegister>(), second.As<XmmRegister>()); - } else { - __ addss(first.As<XmmRegister>(), - Address(CpuRegister(RSP), second.GetStackIndex())); - } + __ addss(first.As<XmmRegister>(), second.As<XmmRegister>()); break; } case Primitive::kPrimDouble: { - if (second.IsFpuRegister()) { - __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>()); - } else { - __ addsd(first.As<XmmRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); - } + __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>()); break; } @@ -1213,9 +1277,8 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Primitive::Type field_type = instruction->GetFieldType(); bool is_object_type = field_type == Primitive::kPrimNot; - bool dies_at_entry = !is_object_type; - locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry); - locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); if (is_object_type) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); @@ -1272,8 +1335,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -1356,10 +1419,10 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + 1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -1442,10 +1505,30 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + XmmRegister out = locations->Out().As<XmmRegister>(); + if (index.IsConstant()) { + __ movss(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); + } else { + __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset)); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + XmmRegister out = locations->Out().As<XmmRegister>(); + if (index.IsConstant()) { + __ movsd(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); + } else { + __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset)); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -1463,14 +1546,16 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry); - locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry); + 1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(2, Location::RequiresRegister()); + } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), Location::kDiesAtEntry); + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } } } @@ -1541,6 +1626,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), value.As<CpuRegister>()); } else { + DCHECK(value.IsConstant()) << value; __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } @@ -1569,10 +1655,34 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + DCHECK(value.IsFpuRegister()); + __ movss(Address(obj, offset), value.As<XmmRegister>()); + } else { + DCHECK(value.IsFpuRegister()); + __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), + value.As<XmmRegister>()); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + DCHECK(value.IsFpuRegister()); + __ movsd(Address(obj, offset), value.As<XmmRegister>()); + } else { + DCHECK(value.IsFpuRegister()); + __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset), + value.As<XmmRegister>()); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -1582,8 +1692,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); - locations->SetOut(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { @@ -1706,6 +1816,9 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movl(destination.As<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else if (destination.IsFpuRegister()) { + __ movss(destination.As<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -1715,8 +1828,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movq(destination.As<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else if (destination.IsFpuRegister()) { + __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } @@ -1727,6 +1842,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movl(destination.As<CpuRegister>(), imm); } else { + DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else if (constant->IsLongConstant()) { @@ -1734,14 +1850,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movq(destination.As<CpuRegister>(), Immediate(value)); } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), Immediate(value)); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } + } else if (constant->IsFloatConstant()) { + Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue())); + if (destination.IsFpuRegister()) { + __ movl(CpuRegister(TMP), imm); + __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + } else { + DCHECK(destination.IsStackSlot()) << destination; + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + } } else { - LOG(FATAL) << "Unimplemented constant type"; + DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); + Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue())); + if (destination.IsFpuRegister()) { + __ movq(CpuRegister(TMP), imm); + __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ movq(CpuRegister(TMP), imm); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>()); + } else if (destination.IsStackSlot()) { + __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.As<XmmRegister>()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.As<XmmRegister>()); } - } else { - LOG(FATAL) << "Unimplemented"; } } @@ -1783,6 +1927,18 @@ void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { CpuRegister(ensure_scratch.GetRegister())); } +void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movss(Address(CpuRegister(RSP), mem), reg); + __ movd(reg, CpuRegister(TMP)); +} + +void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movsd(Address(CpuRegister(RSP), mem), reg); + __ movd(reg, CpuRegister(TMP)); +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_.Get(index); Location source = move->GetSource(); @@ -1802,8 +1958,20 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Exchange64(destination.As<CpuRegister>(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + __ movd(CpuRegister(TMP), source.As<XmmRegister>()); + __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>()); + __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + } else if (source.IsFpuRegister() && destination.IsStackSlot()) { + Exchange32(source.As<XmmRegister>(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsFpuRegister()) { + Exchange32(destination.As<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { + Exchange64(source.As<XmmRegister>(), destination.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { + Exchange64(destination.As<XmmRegister>(), source.GetStackIndex()); } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 5ac0189b5..e04a8d8ab 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -80,8 +80,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { private: void Exchange32(CpuRegister reg, int mem); + void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); void Exchange64(CpuRegister reg, int mem); + void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); CodeGeneratorX86_64* const codegen_; @@ -146,8 +148,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; diff --git a/compiler/optimizing/constant_propagation.cc b/compiler/optimizing/constant_folding.cc index d675164fa..10a7e4629 100644 --- a/compiler/optimizing/constant_propagation.cc +++ b/compiler/optimizing/constant_folding.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "constant_propagation.h" +#include "constant_folding.h" namespace art { -void ConstantPropagation::Run() { +void HConstantFolding::Run() { // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second @@ -31,11 +31,19 @@ void ConstantPropagation::Run() { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* inst = it.Current(); - // Constant folding: replace `c <- a op b' with a compile-time - // evaluation of `a op b' if `a' and `b' are constant. if (inst->IsBinaryOperation()) { + // Constant folding: replace `op(a, b)' with a constant at + // compile time if `a' and `b' are both constants. HConstant* constant = - inst->AsBinaryOperation()->TryStaticEvaluation(graph_->GetArena()); + inst->AsBinaryOperation()->TryStaticEvaluation(); + if (constant != nullptr) { + inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + } + } else if (inst->IsUnaryOperation()) { + // Constant folding: replace `op(a)' with a constant at compile + // time if `a' is a constant. + HConstant* constant = + inst->AsUnaryOperation()->TryStaticEvaluation(); if (constant != nullptr) { inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); } diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h new file mode 100644 index 000000000..d2acfa697 --- /dev/null +++ b/compiler/optimizing/constant_folding.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ +#define ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +/** + * Optimization pass performing a simple constant-expression + * evaluation on the SSA form. + * + * This class is named art::HConstantFolding to avoid name + * clashes with the art::ConstantPropagation class defined in + * compiler/dex/post_opt_passes.h. + */ +class HConstantFolding : public HOptimization { + public: + HConstantFolding(HGraph* graph, const HGraphVisualizer& visualizer) + : HOptimization(graph, true, kConstantFoldingPassName, visualizer) {} + + virtual void Run() OVERRIDE; + + static constexpr const char* kConstantFoldingPassName = "constant_folding"; + + private: + DISALLOW_COPY_AND_ASSIGN(HConstantFolding); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ diff --git a/compiler/optimizing/constant_propagation_test.cc b/compiler/optimizing/constant_folding_test.cc index ff44805ed..09bf2c8d7 100644 --- a/compiler/optimizing/constant_propagation_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,11 +16,12 @@ #include <functional> -#include "constant_propagation.h" +#include "code_generator_x86.h" +#include "constant_folding.h" #include "dead_code_elimination.h" -#include "pretty_printer.h" #include "graph_checker.h" #include "optimizing_unit_test.h" +#include "pretty_printer.h" #include "gtest/gtest.h" @@ -28,9 +29,9 @@ namespace art { static void TestCode(const uint16_t* data, const std::string& expected_before, - const std::string& expected_after_cp, + const std::string& expected_after_cf, const std::string& expected_after_dce, - std::function<void(HGraph*)> check_after_cp, + std::function<void(HGraph*)> check_after_cf, Primitive::Type return_type = Primitive::kPrimInt) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -45,29 +46,87 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - ConstantPropagation(graph).Run(); + x86::CodeGeneratorX86 codegen(graph); + HGraphVisualizer visualizer(nullptr, graph, codegen, ""); + HConstantFolding(graph, visualizer).Run(); + SSAChecker ssa_checker(&allocator, graph); + ssa_checker.Run(); + ASSERT_TRUE(ssa_checker.IsValid()); - StringPrettyPrinter printer_after_cp(graph); - printer_after_cp.VisitInsertionOrder(); - std::string actual_after_cp = printer_after_cp.str(); - ASSERT_EQ(expected_after_cp, actual_after_cp); + StringPrettyPrinter printer_after_cf(graph); + printer_after_cf.VisitInsertionOrder(); + std::string actual_after_cf = printer_after_cf.str(); + ASSERT_EQ(expected_after_cf, actual_after_cf); - check_after_cp(graph); + check_after_cf(graph); - DeadCodeElimination(graph).Run(); + HDeadCodeElimination(graph, visualizer).Run(); + ssa_checker.Run(); + ASSERT_TRUE(ssa_checker.IsValid()); StringPrettyPrinter printer_after_dce(graph); printer_after_dce.VisitInsertionOrder(); std::string actual_after_dce = printer_after_dce.str(); ASSERT_EQ(expected_after_dce, actual_after_dce); - - SSAChecker ssa_checker(&allocator, graph); - ssa_checker.Run(); - ASSERT_TRUE(ssa_checker.IsValid()); } /** + * Tiny three-register program exercising int constant folding on negation. + * + * 16-bit + * offset + * ------ + * v0 <- 1 0. const/4 v0, #+1 + * v1 <- -v0 1. neg-int v0, v1 + * return v1 2. return v1 + */ +TEST(ConstantFolding, IntConstantFoldingNegation) { + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 1 << 12, + Instruction::NEG_INT | 1 << 8 | 0 << 12, + Instruction::RETURN | 1 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 2: IntConstant [5]\n" + " 10: SuspendCheck\n" + " 11: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 5: Neg(2) [8]\n" + " 8: Return(5)\n" + "BasicBlock 2, pred: 1\n" + " 9: Exit\n"; + + // Expected difference after constant folding. + diff_t expected_cf_diff = { + { " 2: IntConstant [5]\n", " 2: IntConstant\n" }, + { " 5: Neg(2) [8]\n", " 12: IntConstant [8]\n" }, + { " 8: Return(5)\n", " 8: Return(12)\n" } + }; + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); + + // Check the value of the computed constant. + auto check_after_cf = [](HGraph* graph) { + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + ASSERT_TRUE(inst->IsIntConstant()); + ASSERT_EQ(inst->AsIntConstant()->GetValue(), -1); + }; + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 2: IntConstant\n", removed }, + }; + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + + TestCode(data, + expected_before, + expected_after_cf, + expected_after_dce, + check_after_cf); +} + +/** * Tiny three-register program exercising int constant folding on addition. * * 16-bit @@ -78,7 +137,7 @@ static void TestCode(const uint16_t* data, * v2 <- v0 + v1 2. add-int v2, v0, v1 * return v2 4. return v2 */ -TEST(ConstantPropagation, IntConstantFoldingOnAddition1) { +TEST(ConstantFolding, IntConstantFoldingOnAddition1) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, @@ -97,17 +156,17 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition1) { "BasicBlock 2, pred: 1\n" " 13: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, { " 9: Add(3, 5) [12]\n", " 16: IntConstant [12]\n" }, { " 12: Return(9)\n", " 12: Return(16)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 3); @@ -118,13 +177,13 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition1) { { " 3: IntConstant\n", removed }, { " 5: IntConstant\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp); + check_after_cf); } /** @@ -142,7 +201,7 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition1) { * v2 <- v0 + v1 6. add-int v2, v0, v1 * return v2 8. return v2 */ -TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { +TEST(ConstantFolding, IntConstantFoldingOnAddition2) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, @@ -169,8 +228,8 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { "BasicBlock 2, pred: 1\n" " 25: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, { " 11: IntConstant [17]\n", " 11: IntConstant\n" }, @@ -180,10 +239,10 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { { " 21: Add(9, 17) [24]\n", " 30: IntConstant [24]\n" }, { " 24: Return(21)\n", " 24: Return(30)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst1->IsIntConstant()); ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 3); @@ -204,13 +263,13 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { { " 28: IntConstant\n", removed }, { " 29: IntConstant\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp); + check_after_cf); } /** @@ -224,7 +283,7 @@ TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { * v2 <- v0 - v1 2. sub-int v2, v0, v1 * return v2 4. return v2 */ -TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) { +TEST(ConstantFolding, IntConstantFoldingOnSubtraction) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 3 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, @@ -243,17 +302,17 @@ TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) { "BasicBlock 2, pred: 1\n" " 13: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, { " 9: Sub(3, 5) [12]\n", " 16: IntConstant [12]\n" }, { " 12: Return(9)\n", " 12: Return(16)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); @@ -264,13 +323,13 @@ TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) { { " 3: IntConstant\n", removed }, { " 5: IntConstant\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp); + check_after_cf); } #define SIX_REGISTERS_CODE_ITEM(...) \ @@ -289,7 +348,7 @@ TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) { * (v0, v1) + (v1, v2) 4. add-long v4, v0, v2 * return (v4, v5) 6. return-wide v4 */ -TEST(ConstantPropagation, LongConstantFoldingOnAddition) { +TEST(ConstantFolding, LongConstantFoldingOnAddition) { const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 1, Instruction::CONST_WIDE_16 | 2 << 8, 2, @@ -308,17 +367,17 @@ TEST(ConstantPropagation, LongConstantFoldingOnAddition) { "BasicBlock 2, pred: 1\n" " 16: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, { " 12: Add(6, 8) [15]\n", " 19: LongConstant [15]\n" }, { " 15: Return(12)\n", " 15: Return(19)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst->IsLongConstant()); ASSERT_EQ(inst->AsLongConstant()->GetValue(), 3); @@ -329,13 +388,13 @@ TEST(ConstantPropagation, LongConstantFoldingOnAddition) { { " 6: LongConstant\n", removed }, { " 8: LongConstant\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp, + check_after_cf, Primitive::kPrimLong); } @@ -352,7 +411,7 @@ TEST(ConstantPropagation, LongConstantFoldingOnAddition) { * (v0, v1) - (v1, v2) 4. sub-long v4, v0, v2 * return (v4, v5) 6. return-wide v4 */ -TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) { +TEST(ConstantFolding, LongConstantFoldingOnSubtraction) { const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 3, Instruction::CONST_WIDE_16 | 2 << 8, 2, @@ -371,17 +430,17 @@ TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) { "BasicBlock 2, pred: 1\n" " 16: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, { " 12: Sub(6, 8) [15]\n", " 19: LongConstant [15]\n" }, { " 15: Return(12)\n", " 15: Return(19)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst->IsLongConstant()); ASSERT_EQ(inst->AsLongConstant()->GetValue(), 1); @@ -392,13 +451,13 @@ TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) { { " 6: LongConstant\n", removed }, { " 8: LongConstant\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp, + check_after_cf, Primitive::kPrimLong); } @@ -424,7 +483,7 @@ TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) { * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 * return v2 13. return v2 */ -TEST(ConstantPropagation, IntConstantFoldingAndJumps) { +TEST(ConstantFolding, IntConstantFoldingAndJumps) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -462,8 +521,8 @@ TEST(ConstantPropagation, IntConstantFoldingAndJumps) { "BasicBlock 5, pred: 4\n" " 29: Exit\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant []\n" }, { " 13: IntConstant [14]\n", " 13: IntConstant\n" }, @@ -475,10 +534,10 @@ TEST(ConstantPropagation, IntConstantFoldingAndJumps) { { " 25: Add(14, 24) [28]\n", " 35: IntConstant [28]\n" }, { " 28: Return(25)\n", " 28: Return(35)\n"} }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst1->IsIntConstant()); ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 1); @@ -501,13 +560,13 @@ TEST(ConstantPropagation, IntConstantFoldingAndJumps) { { " 24: IntConstant\n", removed }, { " 34: IntConstant\n", removed }, }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp); + check_after_cf); } @@ -524,7 +583,7 @@ TEST(ConstantPropagation, IntConstantFoldingAndJumps) { * L1: v2 <- v0 + v1 5. add-int v2, v0, v1 * return-void 7. return */ -TEST(ConstantPropagation, ConstantCondition) { +TEST(ConstantFolding, ConstantCondition) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, @@ -553,17 +612,17 @@ TEST(ConstantPropagation, ConstantCondition) { "BasicBlock 5, pred: 1, succ: 3\n" " 21: Goto 3\n"; - // Expected difference after constant propagation. - diff_t expected_cp_diff = { + // Expected difference after constant folding. + diff_t expected_cf_diff = { { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [15, 22]\n" }, { " 5: IntConstant [22, 8]\n", " 5: IntConstant [22]\n" }, { " 8: GreaterThanOrEqual(3, 5) [9]\n", " 23: IntConstant [9]\n" }, { " 9: If(8)\n", " 9: If(23)\n" } }; - std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. - auto check_after_cp = [](HGraph* graph) { + auto check_after_cf = [](HGraph* graph) { HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); @@ -575,13 +634,13 @@ TEST(ConstantPropagation, ConstantCondition) { { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, { " 15: Add(22, 3)\n", removed } }; - std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); TestCode(data, expected_before, - expected_after_cp, + expected_after_cf, expected_after_dce, - check_after_cp); + check_after_cf); } } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 565554442..fc3dd01ef 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -20,7 +20,7 @@ namespace art { -void DeadCodeElimination::Run() { +void HDeadCodeElimination::Run() { // Process basic blocks in post-order in the dominator tree, so that // a dead instruction depending on another dead instruction is // removed. diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 48739be49..a4446ae04 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ #include "nodes.h" +#include "optimization.h" namespace art { @@ -25,17 +26,18 @@ namespace art { * Optimization pass performing dead code elimination (removal of * unused variables/instructions) on the SSA form. */ -class DeadCodeElimination : public ValueObject { +class HDeadCodeElimination : public HOptimization { public: - explicit DeadCodeElimination(HGraph* graph) - : graph_(graph) {} + HDeadCodeElimination(HGraph* graph, const HGraphVisualizer& visualizer) + : HOptimization(graph, true, kDeadCodeEliminationPassName, visualizer) {} - void Run(); + virtual void Run() OVERRIDE; - private: - HGraph* const graph_; + static constexpr const char* kDeadCodeEliminationPassName = + "dead_code_elimination"; - DISALLOW_COPY_AND_ASSIGN(DeadCodeElimination); + private: + DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; } // namespace art diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 3e0ba3aee..0c6807482 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -14,10 +14,11 @@ * limitations under the License. */ +#include "code_generator_x86.h" #include "dead_code_elimination.h" -#include "pretty_printer.h" #include "graph_checker.h" #include "optimizing_unit_test.h" +#include "pretty_printer.h" #include "gtest/gtest.h" @@ -39,16 +40,17 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - DeadCodeElimination(graph).Run(); + x86::CodeGeneratorX86 codegen(graph); + HGraphVisualizer visualizer(nullptr, graph, codegen, ""); + HDeadCodeElimination(graph, visualizer).Run(); + SSAChecker ssa_checker(&allocator, graph); + ssa_checker.Run(); + ASSERT_TRUE(ssa_checker.IsValid()); StringPrettyPrinter printer_after(graph); printer_after.VisitInsertionOrder(); std::string actual_after = printer_after.str(); ASSERT_EQ(actual_after, expected_after); - - SSAChecker ssa_checker(&allocator, graph); - ssa_checker.Run(); - ASSERT_TRUE(ssa_checker.IsValid()); } @@ -94,6 +96,7 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) { "BasicBlock 5, pred: 1, succ: 3\n" " 21: Goto 3\n"; + // Expected difference after dead code elimination. diff_t expected_diff = { { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [22, 8]\n" }, { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, @@ -164,7 +167,7 @@ TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { "BasicBlock 5, pred: 4\n" " 28: Exit\n"; - // Expected difference after constant propagation. + // Expected difference after dead code elimination. diff_t expected_diff = { { " 13: IntConstant [14]\n", removed }, { " 24: IntConstant [25]\n", removed }, diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 9f4029785..743ffc46b 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -268,7 +268,7 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { for (HUseIterator<HInstruction> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); - if (!use->IsPhi() && !instruction->Dominates(use)) { + if (!use->IsPhi() && !instruction->StrictlyDominates(use)) { std::stringstream error; error << "Instruction " << instruction->GetId() << " in block " << current_block_->GetBlockId() @@ -285,7 +285,7 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* env_instruction = environment->GetInstructionAt(i); if (env_instruction != nullptr - && !env_instruction->Dominates(instruction)) { + && !env_instruction->StrictlyDominates(instruction)) { std::stringstream error; error << "Instruction " << env_instruction->GetId() << " in environment of instruction " << instruction->GetId() diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 862f1b600..db3130678 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -19,15 +19,19 @@ #include "nodes.h" +#include <ostream> + namespace art { // A control-flow graph visitor performing various checks. class GraphChecker : public HGraphVisitor { public: - GraphChecker(ArenaAllocator* allocator, HGraph* graph) + GraphChecker(ArenaAllocator* allocator, HGraph* graph, + const char* dump_prefix = "art::GraphChecker: ") : HGraphVisitor(graph), allocator_(allocator), - errors_(allocator, 0) {} + errors_(allocator, 0), + dump_prefix_(dump_prefix) {} // Check the whole graph (in insertion order). virtual void Run() { VisitInsertionOrder(); } @@ -48,6 +52,13 @@ class GraphChecker : public HGraphVisitor { return errors_; } + // Print detected errors on output stream `os`. + void Dump(std::ostream& os) { + for (size_t i = 0, e = errors_.Size(); i < e; ++i) { + os << dump_prefix_ << errors_.Get(i) << std::endl; + } + } + protected: ArenaAllocator* const allocator_; // The block currently visited. @@ -56,6 +67,9 @@ class GraphChecker : public HGraphVisitor { GrowableArray<std::string> errors_; private: + // String displayed before dumped errors. + const char* dump_prefix_; + DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; @@ -66,7 +80,7 @@ class SSAChecker : public GraphChecker { typedef GraphChecker super_type; SSAChecker(ArenaAllocator* allocator, HGraph* graph) - : GraphChecker(allocator, graph) {} + : GraphChecker(allocator, graph, "art::SSAChecker: ") {} // Check the whole graph (in reverse post-order). virtual void Run() { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index b4eb89d30..4ed215624 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -120,13 +120,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_<< std::endl; } - void DumpLocation(Location location, Primitive::Type type) { + void DumpLocation(Location location) { if (location.IsRegister()) { - if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) { - codegen_.DumpFloatingPointRegister(output_, location.reg()); - } else { - codegen_.DumpCoreRegister(output_, location.reg()); - } + codegen_.DumpCoreRegister(output_, location.reg()); + } else if (location.IsFpuRegister()) { + codegen_.DumpFloatingPointRegister(output_, location.reg()); } else if (location.IsConstant()) { output_ << "constant"; HConstant* constant = location.GetConstant(); @@ -150,9 +148,9 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " ("; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { MoveOperands* move = instruction->MoveOperandsAt(i); - DumpLocation(move->GetSource(), Primitive::kPrimInt); + DumpLocation(move->GetSource()); output_ << " -> "; - DumpLocation(move->GetDestination(), Primitive::kPrimInt); + DumpLocation(move->GetDestination()); if (i + 1 != e) { output_ << ", "; } @@ -183,13 +181,13 @@ class HGraphVisualizerPrinter : public HGraphVisitor { if (locations != nullptr) { output_ << " ( "; for (size_t i = 0; i < instruction->InputCount(); ++i) { - DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType()); + DumpLocation(locations->InAt(i)); output_ << " "; } output_ << ")"; if (locations->Out().IsValid()) { output_ << " -> "; - DumpLocation(locations->Out(), instruction->GetType()); + DumpLocation(locations->Out()); } } output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; @@ -309,7 +307,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, printer.EndTag("compilation"); } -void HGraphVisualizer::DumpGraph(const char* pass_name) { +void HGraphVisualizer::DumpGraph(const char* pass_name) const { if (!is_enabled_) { return; } diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index f17ba3bba..05984a080 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -61,7 +61,7 @@ class HGraphVisualizer : public ValueObject { * If this visualizer is enabled, emit the compilation information * in `output_`. */ - void DumpGraph(const char* pass_name); + void DumpGraph(const char* pass_name) const; private: std::ostream* const output_; diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index d5f4f902c..89c949563 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -73,7 +73,7 @@ TEST(LiveRangesTest, CFG1) { LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(9u, range->GetEnd()); + ASSERT_EQ(8u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(1); ASSERT_TRUE(block->GetLastInstruction()->IsReturn()); ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition()); @@ -119,7 +119,7 @@ TEST(LiveRangesTest, CFG2) { LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(23u, range->GetEnd()); + ASSERT_EQ(22u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(3); ASSERT_TRUE(block->GetLastInstruction()->IsReturn()); ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition()); @@ -193,7 +193,7 @@ TEST(LiveRangesTest, CFG3) { range = interval->GetFirstRange(); ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(2)->GetLifetimePosition()); ASSERT_EQ(22u, range->GetStart()); - ASSERT_EQ(25u, range->GetEnd()); + ASSERT_EQ(24u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } @@ -263,7 +263,7 @@ TEST(LiveRangesTest, Loop1) { range = interval->GetFirstRange(); // The instruction is live until the return instruction after the loop. ASSERT_EQ(6u, range->GetStart()); - ASSERT_EQ(27u, range->GetEnd()); + ASSERT_EQ(26u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the phi. @@ -271,7 +271,7 @@ TEST(LiveRangesTest, Loop1) { range = interval->GetFirstRange(); // Instruction is consumed by the if. ASSERT_EQ(14u, range->GetStart()); - ASSERT_EQ(16u, range->GetEnd()); + ASSERT_EQ(17u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } @@ -338,7 +338,7 @@ TEST(LiveRangesTest, Loop2) { range = range->GetNext(); ASSERT_TRUE(range != nullptr); ASSERT_EQ(24u, range->GetStart()); - ASSERT_EQ(27u, range->GetEnd()); + ASSERT_EQ(26u, range->GetEnd()); // Test for the add instruction. HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd(); @@ -410,7 +410,7 @@ TEST(LiveRangesTest, CFG4) { interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); range = interval->GetFirstRange(); ASSERT_EQ(4u, range->GetStart()); - ASSERT_EQ(29u, range->GetEnd()); + ASSERT_EQ(28u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the first add. diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 163748479..ed5e260a5 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -25,16 +25,14 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->EnvironmentSize()), - dies_at_entry_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), + output_overlaps_(true), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), live_registers_() { inputs_.SetSize(instruction->InputCount()); - dies_at_entry_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); - dies_at_entry_.Put(i, false); } environment_.SetSize(instruction->EnvironmentSize()); for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index dcf70f27b..11bcd7852 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -34,7 +34,7 @@ class HInstruction; */ class Location : public ValueObject { public: - static constexpr bool kDiesAtEntry = true; + static constexpr bool kNoOutputOverlap = false; enum Kind { kInvalid = 0, @@ -373,8 +373,7 @@ class LocationSummary : public ArenaObject { LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall); - void SetInAt(uint32_t at, Location location, bool dies_at_entry = false) { - dies_at_entry_.Put(at, dies_at_entry); + void SetInAt(uint32_t at, Location location) { inputs_.Put(at, location); } @@ -386,7 +385,8 @@ class LocationSummary : public ArenaObject { return inputs_.Size(); } - void SetOut(Location location) { + void SetOut(Location location, bool overlaps = true) { + output_overlaps_ = overlaps; output_ = Location(location); } @@ -449,23 +449,30 @@ class LocationSummary : public ArenaObject { return &live_registers_; } - bool InputOverlapsWithOutputOrTemp(uint32_t input, bool is_environment) const { + bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const { if (is_environment) return true; - Location location = Out(); - if (input == 0 && location.IsUnallocated() && location.GetPolicy() == Location::kSameAsFirstInput) { + if ((input_index == 0) + && output_.IsUnallocated() + && (output_.GetPolicy() == Location::kSameAsFirstInput)) { return false; } - if (dies_at_entry_.Get(input)) { + if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) { return false; } return true; } + bool OutputOverlapsWithInputs() const { + return output_overlaps_; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; GrowableArray<Location> environment_; - GrowableArray<bool> dies_at_entry_; + // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot + // share the same register as the inputs. + bool output_overlaps_; Location output_; const CallKind call_kind_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index aee21770b..d624ad5e5 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -363,6 +363,25 @@ void HBasicBlock::AddPhi(HPhi* phi) { Add(&phis_, this, phi); } +void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { + DCHECK_EQ(phi->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + if (cursor->next_ == nullptr) { + cursor->next_ = phi; + phi->previous_ = cursor; + DCHECK(phi->next_ == nullptr); + } else { + phi->next_ = cursor->next_; + phi->previous_ = cursor; + cursor->next_ = phi; + phi->next_->previous_ = phi; + } + phi->SetBlock(this); + phi->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(phi); +} + static void Remove(HInstructionList* instruction_list, HBasicBlock* block, HInstruction* instruction) { @@ -472,7 +491,11 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, return true; } -bool HInstruction::Dominates(HInstruction* other_instruction) const { +bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { + if (other_instruction == this) { + // An instruction does not strictly dominate itself. + return false; + } HBasicBlock* block = GetBlock(); HBasicBlock* other_block = other_instruction->GetBlock(); if (block != other_block) { @@ -527,6 +550,12 @@ void HInstruction::ReplaceWith(HInstruction* other) { env_uses_ = nullptr; } +void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { + InputAt(index)->RemoveUser(this, index); + SetRawInputAt(index, replacement); + replacement->AddUseAt(this, index); +} + size_t HInstruction::EnvironmentSize() const { return HasEnvironment() ? environment_->Size() : 0; } @@ -568,15 +597,30 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } -HConstant* HBinaryOperation::TryStaticEvaluation(ArenaAllocator* allocator) const { +HConstant* HUnaryOperation::TryStaticEvaluation() const { + if (GetInput()->IsIntConstant()) { + int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue()); + return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); + } else if (GetInput()->IsLongConstant()) { + // TODO: Implement static evaluation of long unary operations. + // + // Do not exit with a fatal condition here. Instead, simply + // return `nullptr' to notify the caller that this instruction + // cannot (yet) be statically evaluated. + return nullptr; + } + return nullptr; +} + +HConstant* HBinaryOperation::TryStaticEvaluation() const { if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(), GetRight()->AsIntConstant()->GetValue()); - return new(allocator) HIntConstant(value); + return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) { int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(), GetRight()->AsLongConstant()->GetValue()); - return new(allocator) HLongConstant(value); + return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); } return nullptr; } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index ec26c4a4d..7c933aa4f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -399,6 +399,7 @@ class HBasicBlock : public ArenaObject { void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); void AddPhi(HPhi* phi); + void InsertPhiAfter(HPhi* instruction, HPhi* cursor); void RemovePhi(HPhi* phi); bool IsLoopHeader() const { @@ -503,10 +504,14 @@ class HBasicBlock : public ArenaObject { M(Temporary, Instruction) \ M(SuspendCheck, Instruction) \ M(Mul, BinaryOperation) \ + M(Neg, UnaryOperation) \ + M(FloatConstant, Constant) \ + M(DoubleConstant, Constant) \ #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ + M(UnaryOperation, Instruction) \ M(BinaryOperation, Instruction) \ M(Invoke, Instruction) @@ -684,9 +689,10 @@ class HInstruction : public ArenaObject { return result; } - // Does this instruction dominate `other_instruction`? Aborts if - // this instruction and `other_instruction` are both phis. - bool Dominates(HInstruction* other_instruction) const; + // Does this instruction strictly dominate `other_instruction`? + // Returns false if this instruction and `other_instruction` are the same. + // Aborts if this instruction and `other_instruction` are both phis. + bool StrictlyDominates(HInstruction* other_instruction) const; int GetId() const { return id_; } void SetId(int id) { id_ = id; } @@ -707,6 +713,7 @@ class HInstruction : public ArenaObject { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); + void ReplaceInput(HInstruction* replacement, size_t index); bool HasOnlyOneUse() const { return uses_ != nullptr && uses_->GetTail() == nullptr; @@ -992,8 +999,8 @@ class HExpression : public HTemplateInstruction<N> { virtual Primitive::Type GetType() const { return type_; } - private: - const Primitive::Type type_; + protected: + Primitive::Type type_; }; // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow @@ -1085,6 +1092,34 @@ class HIf : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HIf); }; +class HUnaryOperation : public HExpression<1> { + public: + HUnaryOperation(Primitive::Type result_type, HInstruction* input) + : HExpression(result_type, SideEffects::None()) { + SetRawInputAt(0, input); + } + + HInstruction* GetInput() const { return InputAt(0); } + Primitive::Type GetResultType() const { return GetType(); } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + + // Try to statically evaluate `operation` and return a HConstant + // containing the result of this evaluation. If `operation` cannot + // be evaluated as a constant, return nullptr. + HConstant* TryStaticEvaluation() const; + + // Apply this operation to `x`. + virtual int32_t Evaluate(int32_t x) const = 0; + virtual int64_t Evaluate(int64_t x) const = 0; + + DECLARE_INSTRUCTION(UnaryOperation); + + private: + DISALLOW_COPY_AND_ASSIGN(HUnaryOperation); +}; + class HBinaryOperation : public HExpression<2> { public: HBinaryOperation(Primitive::Type result_type, @@ -1103,10 +1138,10 @@ class HBinaryOperation : public HExpression<2> { virtual bool CanBeMoved() const { return true; } virtual bool InstructionDataEquals(HInstruction* other) const { return true; } - // Try to statically evaluate `operation` and return an HConstant + // Try to statically evaluate `operation` and return a HConstant // containing the result of this evaluation. If `operation` cannot // be evaluated as a constant, return nullptr. - HConstant* TryStaticEvaluation(ArenaAllocator* allocator) const; + HConstant* TryStaticEvaluation() const; // Apply this operation to `x` and `y`. virtual int32_t Evaluate(int32_t x, int32_t y) const = 0; @@ -1370,6 +1405,48 @@ class HConstant : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HConstant); }; +class HFloatConstant : public HConstant { + public: + explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} + + float GetValue() const { return value_; } + + virtual bool InstructionDataEquals(HInstruction* other) const { + return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) == + bit_cast<float, int32_t>(value_); + } + + virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); } + + DECLARE_INSTRUCTION(FloatConstant); + + private: + const float value_; + + DISALLOW_COPY_AND_ASSIGN(HFloatConstant); +}; + +class HDoubleConstant : public HConstant { + public: + explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} + + double GetValue() const { return value_; } + + virtual bool InstructionDataEquals(HInstruction* other) const { + return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) == + bit_cast<double, int64_t>(value_); + } + + virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); } + + DECLARE_INSTRUCTION(DoubleConstant); + + private: + const double value_; + + DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); +}; + // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). class HIntConstant : public HConstant { @@ -1517,6 +1594,20 @@ class HNewInstance : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; +class HNeg : public HUnaryOperation { + public: + explicit HNeg(Primitive::Type result_type, HInstruction* input) + : HUnaryOperation(result_type, input) {} + + virtual int32_t Evaluate(int32_t x) const OVERRIDE { return -x; } + virtual int64_t Evaluate(int64_t x) const OVERRIDE { return -x; } + + DECLARE_INSTRUCTION(Neg); + + private: + DISALLOW_COPY_AND_ASSIGN(HNeg); +}; + class HAdd : public HBinaryOperation { public: HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right) @@ -1749,6 +1840,7 @@ class HArrayGet : public HExpression<2> { virtual bool CanBeMoved() const { return true; } virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + void SetType(Primitive::Type type) { type_ = type; } DECLARE_INSTRUCTION(ArrayGet); @@ -1761,11 +1853,11 @@ class HArraySet : public HTemplateInstruction<3> { HArraySet(HInstruction* array, HInstruction* index, HInstruction* value, - Primitive::Type component_type, + Primitive::Type expected_component_type, uint32_t dex_pc) : HTemplateInstruction(SideEffects::ChangesSomething()), dex_pc_(dex_pc), - component_type_(component_type) { + expected_component_type_(expected_component_type) { SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -1779,13 +1871,24 @@ class HArraySet : public HTemplateInstruction<3> { uint32_t GetDexPc() const { return dex_pc_; } - Primitive::Type GetComponentType() const { return component_type_; } + HInstruction* GetValue() const { return InputAt(2); } + + Primitive::Type GetComponentType() const { + // The Dex format does not type floating point index operations. Since the + // `expected_component_type_` is set during building and can therefore not + // be correct, we also check what is the value type. If it is a floating + // point type, we must use that type. + Primitive::Type value_type = GetValue()->GetType(); + return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble)) + ? value_type + : expected_component_type_; + } DECLARE_INSTRUCTION(ArraySet); private: const uint32_t dex_pc_; - const Primitive::Type component_type_; + const Primitive::Type expected_component_type_; DISALLOW_COPY_AND_ASSIGN(HArraySet); }; diff --git a/compiler/optimizing/constant_propagation.h b/compiler/optimizing/optimization.cc index 072988188..33dc040e7 100644 --- a/compiler/optimizing/constant_propagation.h +++ b/compiler/optimizing/optimization.cc @@ -14,30 +14,34 @@ * limitations under the License. */ -#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ -#define ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ +#include "optimization.h" -#include "nodes.h" +#include "graph_checker.h" namespace art { -/** - * Optimization pass performing a simple constant propagation on the - * SSA form. - */ -class ConstantPropagation : public ValueObject { - public: - explicit ConstantPropagation(HGraph* graph) - : graph_(graph) {} - - void Run(); - - private: - HGraph* const graph_; - - DISALLOW_COPY_AND_ASSIGN(ConstantPropagation); -}; +void HOptimization::Execute() { + Run(); + visualizer_.DumpGraph(pass_name_); + Check(); +} + +void HOptimization::Check() { + if (kIsDebugBuild) { + if (is_in_ssa_form_) { + SSAChecker checker(graph_->GetArena(), graph_); + checker.Run(); + if (!checker.IsValid()) { + LOG(FATAL) << Dumpable<SSAChecker>(checker); + } + } else { + GraphChecker checker(graph_->GetArena(), graph_); + checker.Run(); + if (!checker.IsValid()) { + LOG(FATAL) << Dumpable<GraphChecker>(checker); + } + } + } +} } // namespace art - -#endif // ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h new file mode 100644 index 000000000..59683e207 --- /dev/null +++ b/compiler/optimizing/optimization.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ + +#include "graph_visualizer.h" +#include "nodes.h" + +namespace art { + +/** + * Abstraction to implement an optimization pass. + */ +class HOptimization : public ValueObject { + public: + HOptimization(HGraph* graph, + bool is_in_ssa_form, + const char* pass_name, + const HGraphVisualizer& visualizer) + : graph_(graph), + is_in_ssa_form_(is_in_ssa_form), + pass_name_(pass_name), + visualizer_(visualizer) {} + + virtual ~HOptimization() {} + + // Execute the optimization pass. + void Execute(); + + // Return the name of the pass. + const char* GetPassName() const { return pass_name_; } + + // Peform the analysis itself. + virtual void Run() = 0; + + private: + // Verify the graph; abort if it is not valid. + void Check(); + + protected: + HGraph* const graph_; + + private: + // Does the analyzed graph use the SSA form? + const bool is_in_ssa_form_; + // Optimization pass name. + const char* pass_name_; + // A graph visualiser invoked after the execution of the optimization + // pass if enabled. + const HGraphVisualizer& visualizer_; + + DISALLOW_COPY_AND_ASSIGN(HOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3cf5a0b29..dce8e6d78 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -22,6 +22,8 @@ #include "builder.h" #include "code_generator.h" #include "compiler.h" +#include "constant_folding.h" +#include "dead_code_elimination.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "graph_visualizer.h" @@ -261,6 +263,9 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer.DumpGraph("ssa"); graph->FindNaturalLoops(); + HDeadCodeElimination(graph, visualizer).Execute(); + HConstantFolding(graph, visualizer).Execute(); + SsaRedundantPhiElimination(graph).Run(); SsaDeadPhiElimination(graph).Run(); InstructionSimplifier(graph).Run(); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 5055a7610..fc65f97f6 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -37,18 +37,21 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, handled_(allocator, 0), active_(allocator, 0), inactive_(allocator, 0), - physical_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), + physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), + physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), temp_intervals_(allocator, 4), spill_slots_(allocator, kDefaultNumberOfSpillSlots), safepoints_(allocator, 0), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), - blocked_registers_(codegen->GetBlockedCoreRegisters()), + blocked_core_registers_(codegen->GetBlockedCoreRegisters()), + blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), reserved_out_slots_(0), maximum_number_of_live_registers_(0) { codegen->SetupBlockedRegisters(); - physical_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); + physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); + physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); // Always reserve for the current method and the graph's max out registers. // TODO: compute it instead. reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); @@ -65,8 +68,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, it.Advance()) { HInstruction* current = it.Current(); if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; - if (current->GetType() == Primitive::kPrimFloat) return false; - if (current->GetType() == Primitive::kPrimDouble) return false; + if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble) + && instruction_set != kX86_64) { + return false; + } } } return true; @@ -93,14 +98,22 @@ void RegisterAllocator::AllocateRegisters() { void RegisterAllocator::BlockRegister(Location location, size_t start, - size_t end, - Primitive::Type type) { + size_t end) { int reg = location.reg(); - LiveInterval* interval = physical_register_intervals_.Get(reg); + DCHECK(location.IsRegister() || location.IsFpuRegister()); + LiveInterval* interval = location.IsRegister() + ? physical_core_register_intervals_.Get(reg) + : physical_fp_register_intervals_.Get(reg); + Primitive::Type type = location.IsRegister() + ? Primitive::kPrimInt + : Primitive::kPrimDouble; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); - physical_register_intervals_.Put(reg, interval); - inactive_.Add(interval); + if (location.IsRegister()) { + physical_core_register_intervals_.Put(reg, interval); + } else { + physical_fp_register_intervals_.Put(reg, interval); + } } DCHECK(interval->GetRegister() == reg); interval->AddRange(start, end); @@ -123,8 +136,17 @@ void RegisterAllocator::AllocateRegistersInternal() { registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); processing_core_registers_ = true; unhandled_ = &unhandled_core_intervals_; + for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_core_register_intervals_.Get(i); + if (fixed != nullptr) { + inactive_.Add(fixed); + } + } LinearScan(); + size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_; + maximum_number_of_live_registers_ = 0; + inactive_.Reset(); active_.Reset(); handled_.Reset(); @@ -133,9 +155,14 @@ void RegisterAllocator::AllocateRegistersInternal() { registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); processing_core_registers_ = false; unhandled_ = &unhandled_fp_intervals_; - // TODO: Enable FP register allocation. - DCHECK(unhandled_->IsEmpty()); + for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + if (fixed != nullptr) { + inactive_.Add(fixed); + } + } LinearScan(); + maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers; } void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { @@ -148,8 +175,9 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { for (size_t i = 0; i < locations->GetTempCount(); ++i) { Location temp = locations->GetTemp(i); if (temp.IsRegister()) { - BlockRegister(temp, position, position + 1, Primitive::kPrimInt); + BlockRegister(temp, position, position + 1); } else { + DCHECK(temp.IsUnallocated()); LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); temp_intervals_.Add(interval); interval->AddRange(position, position + 1); @@ -160,10 +188,6 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { bool core_register = (instruction->GetType() != Primitive::kPrimDouble) && (instruction->GetType() != Primitive::kPrimFloat); - GrowableArray<LiveInterval*>& unhandled = core_register - ? unhandled_core_intervals_ - : unhandled_fp_intervals_; - if (locations->CanCall()) { if (!instruction->IsSuspendCheck()) { codegen_->MarkNotLeaf(); @@ -180,7 +204,8 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // maximum before updating locations. LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); interval->AddRange(position, position + 1); - unhandled.Add(interval); + unhandled_core_intervals_.Add(interval); + unhandled_fp_intervals_.Add(interval); } } @@ -189,21 +214,29 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { BlockRegister(Location::RegisterLocation(i), position, - position + 1, - Primitive::kPrimInt); + position + 1); + } + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { + BlockRegister(Location::FpuRegisterLocation(i), + position, + position + 1); } } for (size_t i = 0; i < instruction->InputCount(); ++i) { Location input = locations->InAt(i); - if (input.IsRegister()) { - BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType()); + if (input.IsRegister() || input.IsFpuRegister()) { + BlockRegister(input, position, position + 1); } } LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; + GrowableArray<LiveInterval*>& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a @@ -213,21 +246,24 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // // The backwards walking ensures the ranges are ordered on increasing start positions. Location output = locations->Out(); - if (output.IsRegister()) { + if (output.IsRegister() || output.IsFpuRegister()) { // Shift the interval's start by one to account for the blocked register. current->SetFrom(position + 1); current->SetRegister(output.reg()); - BlockRegister(output, position, position + 1, instruction->GetType()); + BlockRegister(output, position, position + 1); + } else if (!locations->OutputOverlapsWithInputs()) { + // Shift the interval's start by one to not interfere with the inputs. + current->SetFrom(position + 1); } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { current->SetSpillSlot(output.GetStackIndex()); } // If needed, add interval to the list of unhandled intervals. if (current->HasSpillSlot() || instruction->IsConstant()) { - // Split before first register use. + // Split just before first register use. size_t first_register_use = current->FirstRegisterUse(); if (first_register_use != kNoLifetime) { - LiveInterval* split = Split(current, first_register_use); + LiveInterval* split = Split(current, first_register_use - 1); // Don't add direclty to `unhandled`, it needs to be sorted and the start // of this new interval might be after intervals already in the list. AddSorted(&unhandled, split); @@ -278,10 +314,19 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { } } - for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_register_intervals_.Get(i); - if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) { - intervals.Add(fixed); + if (processing_core_registers_) { + for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_core_register_intervals_.Get(i); + if (fixed != nullptr) { + intervals.Add(fixed); + } + } + } else { + for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + if (fixed != nullptr) { + intervals.Add(fixed); + } } } @@ -374,10 +419,10 @@ void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interva interval->Dump(stream); stream << ": "; if (interval->HasRegister()) { - if (processing_core_registers_) { - codegen_->DumpCoreRegister(stream, interval->GetRegister()); - } else { + if (interval->IsFloatingPoint()) { codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); + } else { + codegen_->DumpCoreRegister(stream, interval->GetRegister()); } } else { stream << "spilled"; @@ -391,6 +436,7 @@ void RegisterAllocator::LinearScan() { // (1) Remove interval with the lowest start position from unhandled. LiveInterval* current = unhandled_->Pop(); DCHECK(!current->IsFixed() && !current->HasSpillSlot()); + DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); size_t position = current->GetStart(); // (2) Remove currently active intervals that are dead at this position. @@ -519,10 +565,9 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } bool RegisterAllocator::IsBlocked(int reg) const { - // TODO: This only works for core registers and needs to be adjusted for - // floating point registers. - DCHECK(processing_core_registers_); - return blocked_registers_[reg]; + return processing_core_registers_ + ? blocked_core_registers_[reg] + : blocked_fp_registers_[reg]; } // Find the register that is used the last, and spill the interval @@ -591,7 +636,9 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use); + LiveInterval* split = Split(current, first_register_use - 1); + DCHECK_NE(current, split) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName(); AddSorted(unhandled_, split); return false; } else { @@ -635,6 +682,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) { + DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); size_t insert_at = 0; for (size_t i = array->Size(); i > 0; --i) { LiveInterval* current = array->Get(i - 1); @@ -723,17 +771,11 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize); } -// We create a special marker for inputs moves to differentiate them from -// moves created during resolution. They must be different instructions -// because the input moves work on the assumption that the interval moves -// have been executed. -static constexpr size_t kInputMoveLifetimePosition = 0; -static bool IsInputMove(HInstruction* instruction) { - return instruction->GetLifetimePosition() == kInputMoveLifetimePosition; -} - static bool IsValidDestination(Location destination) { - return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot(); + return destination.IsRegister() + || destination.IsFpuRegister() + || destination.IsStackSlot() + || destination.IsDoubleStackSlot(); } void RegisterAllocator::AddInputMoveFor(HInstruction* user, @@ -748,14 +790,14 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, HParallelMove* move = nullptr; if (previous == nullptr || !previous->IsParallelMove() - || !IsInputMove(previous)) { + || previous->GetLifetimePosition() < user->GetLifetimePosition()) { move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(kInputMoveLifetimePosition); + move->SetLifetimePosition(user->GetLifetimePosition()); user->GetBlock()->InsertInstructionBefore(move, user); } else { move = previous->AsParallelMove(); } - DCHECK(IsInputMove(move)); + DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr)); } @@ -778,7 +820,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, move = at->GetNext()->AsParallelMove(); // This is a parallel move for connecting siblings in a same block. We need to // differentiate it with moves for connecting blocks, and input moves. - if (move == nullptr || IsInputMove(move) || move->GetLifetimePosition() > position) { + if (move == nullptr || move->GetLifetimePosition() > position) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); @@ -786,12 +828,6 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, } else { // Move must happen before the instruction. HInstruction* previous = at->GetPrevious(); - if (previous != nullptr && previous->IsParallelMove() && IsInputMove(previous)) { - // This is a parallel move for connecting siblings in a same block. We need to - // differentiate it with input moves. - at = previous; - previous = previous->GetPrevious(); - } if (previous == nullptr || !previous->IsParallelMove() || previous->GetLifetimePosition() != position) { @@ -889,7 +925,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (current->HasSpillSlot() && current->HasRegister()) { // We spill eagerly, so move must be at definition. InsertMoveAfter(interval->GetDefinedBy(), - Location::RegisterLocation(interval->GetRegister()), + interval->IsFloatingPoint() + ? Location::FpuRegisterLocation(interval->GetRegister()) + : Location::RegisterLocation(interval->GetRegister()), interval->NeedsTwoSpillSlots() ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) : Location::StackSlot(interval->GetParent()->GetSpillSlot())); @@ -947,6 +985,10 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } break; } + case Location::kFpuRegister: { + locations->AddLiveRegister(source); + break; + } case Location::kStackSlot: // Fall-through case Location::kDoubleStackSlot: // Fall-through case Location::kConstant: { @@ -1110,6 +1152,7 @@ void RegisterAllocator::Resolve() { current = at; } LocationSummary* locations = at->GetLocations(); + DCHECK(temp->GetType() == Primitive::kPrimInt); locations->SetTempAt( temp_index++, Location::RegisterLocation(temp->GetRegister())); } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 0c3a9b381..b88153969 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -94,7 +94,7 @@ class RegisterAllocator { bool IsBlocked(int reg) const; // Update the interval for the register in `location` to cover [start, end). - void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type); + void BlockRegister(Location location, size_t start, size_t end); // Allocate a spill slot for the given interval. void AllocateSpillSlotFor(LiveInterval* interval); @@ -156,7 +156,8 @@ class RegisterAllocator { // Fixed intervals for physical registers. Such intervals cover the positions // where an instruction requires a specific register. - GrowableArray<LiveInterval*> physical_register_intervals_; + GrowableArray<LiveInterval*> physical_core_register_intervals_; + GrowableArray<LiveInterval*> physical_fp_register_intervals_; // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. @@ -179,7 +180,8 @@ class RegisterAllocator { size_t* registers_array_; // Blocked registers, as decided by the code generator. - bool* const blocked_registers_; + bool* const blocked_core_registers_; + bool* const blocked_fp_registers_; // Slots reserved for out arguments. size_t reserved_out_slots_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 7517a6b00..2d84a9d33 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -348,14 +348,14 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { // Split at the next instruction. interval = interval->SplitAt(first_add->GetLifetimePosition() + 2); // The user of the split is the last add. - ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1); + ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition()); // Split before the last add. LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1); // Ensure the current interval has no register use... ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime); // And the new interval has it for the last add. - ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1); + ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition()); } TEST(RegisterAllocatorTest, DeadPhi) { diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index be2c03957..a0cc8a94e 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -129,8 +129,112 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } } +/** + * Constants in the Dex format are not typed. So the builder types them as + * integers, but when doing the SSA form, we might realize the constant + * is used for floating point operations. We create a floating-point equivalent + * constant to make the operations correctly typed. + */ +static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) { + // We place the floating point constant next to this constant. + HFloatConstant* result = constant->GetNext()->AsFloatConstant(); + if (result == nullptr) { + HGraph* graph = constant->GetBlock()->GetGraph(); + ArenaAllocator* allocator = graph->GetArena(); + result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue())); + constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + } else { + // If there is already a constant with the expected type, we know it is + // the floating point equivalent of this constant. + DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue()); + } + return result; +} + +/** + * Wide constants in the Dex format are not typed. So the builder types them as + * longs, but when doing the SSA form, we might realize the constant + * is used for floating point operations. We create a floating-point equivalent + * constant to make the operations correctly typed. + */ +static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { + // We place the floating point constant next to this constant. + HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); + if (result == nullptr) { + HGraph* graph = constant->GetBlock()->GetGraph(); + ArenaAllocator* allocator = graph->GetArena(); + result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue())); + constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + } else { + // If there is already a constant with the expected type, we know it is + // the floating point equivalent of this constant. + DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue()); + } + return result; +} + +/** + * Because of Dex format, we might end up having the same phi being + * used for non floating point operations and floating point operations. Because + * we want the graph to be correctly typed (and thereafter avoid moves between + * floating point registers and core registers), we need to create a copy of the + * phi with a floating point type. + */ +static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + // We place the floating point phi next to this phi. + HInstruction* next = phi->GetNext(); + if (next == nullptr + || (next->GetType() != Primitive::kPrimDouble && next->GetType() != Primitive::kPrimFloat)) { + ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); + HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + // Copy the inputs. Note that the graph may not be correctly typed by doing this copy, + // but the type propagation phase will fix it. + new_phi->SetRawInputAt(i, phi->InputAt(i)); + } + phi->GetBlock()->InsertPhiAfter(new_phi, phi); + return new_phi; + } else { + // If there is already a phi with the expected type, we know it is the floating + // point equivalent of this phi. + DCHECK_EQ(next->AsPhi()->GetRegNumber(), phi->GetRegNumber()); + return next->AsPhi(); + } +} + +HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, + HInstruction* value, + Primitive::Type type) { + if (value->IsArrayGet()) { + // The verifier has checked that values in arrays cannot be used for both + // floating point and non-floating point operations. It is therefore safe to just + // change the type of the operation. + value->AsArrayGet()->SetType(type); + return value; + } else if (value->IsLongConstant()) { + return GetDoubleEquivalent(value->AsLongConstant()); + } else if (value->IsIntConstant()) { + return GetFloatEquivalent(value->AsIntConstant()); + } else if (value->IsPhi()) { + return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type); + } else { + // For other instructions, we assume the verifier has checked that the dex format is correctly + // typed and the value in a dex register will not be used for both floating point and + // non-floating point operations. So the only reason an instruction would want a floating + // point equivalent is for an unused phi that will be removed by the dead phi elimination phase. + DCHECK(user->IsPhi()); + return value; + } +} + void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - load->ReplaceWith(current_locals_->Get(load->GetLocal()->GetRegNumber())); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); + if (load->GetType() != value->GetType() + && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) { + // If the operation requests a specific type, we make sure its input is of that type. + value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + } + load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 9d8c0729a..24f5ac55f 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -52,6 +52,10 @@ class SsaBuilder : public HGraphVisitor { void VisitStoreLocal(HStoreLocal* store); void VisitInstruction(HInstruction* instruction); + static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user, + HInstruction* instruction, + Primitive::Type type); + private: // Locals for the current block being visited. GrowableArray<HInstruction*>* current_locals_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index f0edc6422..1e34670d7 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -319,7 +319,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { if (user->IsPhi()) { // If the phi has a register, try to use the same. Location phi_location = user->GetLiveInterval()->ToLocation(); - if (phi_location.IsRegister() && free_until[phi_location.reg()] >= use_position) { + if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) { return phi_location.reg(); } const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors(); @@ -345,7 +345,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { // We use the user's lifetime position - 1 (and not `use_position`) because the // register is blocked at the beginning of the user. size_t position = user->GetLifetimePosition() - 1; - if (expected.IsRegister() && free_until[expected.reg()] >= position) { + if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) { return expected.reg(); } } @@ -368,7 +368,7 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the end of the predecessor, we know its register can // be reused. Location input_location = input_interval.ToLocation(); - if (input_location.IsRegister()) { + if (SameRegisterKind(input_location)) { return input_location.reg(); } } @@ -384,7 +384,7 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the start of this instruction, we know its register can // be reused. Location location = input_interval.ToLocation(); - if (location.IsRegister()) { + if (SameRegisterKind(location)) { return location.reg(); } } @@ -393,13 +393,21 @@ int LiveInterval::FindHintAtDefinition() const { return kNoRegister; } +bool LiveInterval::SameRegisterKind(Location other) const { + return IsFloatingPoint() + ? other.IsFpuRegister() + : other.IsRegister(); +} + bool LiveInterval::NeedsTwoSpillSlots() const { return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; } Location LiveInterval::ToLocation() const { if (HasRegister()) { - return Location::RegisterLocation(GetRegister()); + return IsFloatingPoint() + ? Location::FpuRegisterLocation(GetRegister()) + : Location::RegisterLocation(GetRegister()); } else { HInstruction* defined_by = GetParent()->GetDefinedBy(); if (defined_by->IsConstant()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index e9bd30338..8f718480b 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -188,10 +188,14 @@ class LiveInterval : public ArenaObject { && (first_use_->GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. + UsePosition* cursor = first_use_; + while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) { + cursor = cursor->GetNext(); + } DCHECK(first_use_->GetPosition() + 1 == position); UsePosition* new_use = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_->GetNext()); - first_use_->SetNext(new_use); + instruction, input_index, is_environment, position, cursor->GetNext()); + cursor->SetNext(new_use); if (first_range_->GetEnd() == first_use_->GetPosition()) { first_range_->end_ = position; } @@ -354,6 +358,10 @@ class LiveInterval : public ArenaObject { || (location.GetPolicy() == Location::kSameAsFirstInput && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { return position; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { + return position; } } } @@ -362,12 +370,12 @@ class LiveInterval : public ArenaObject { size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= position && !use->GetIsEnvironment()) { + if (use_position > position && !use->GetIsEnvironment()) { Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - // Return the lifetime just before the user, so that the interval has a register - // when entering the user. - return use->GetUser()->GetLifetimePosition() - 1; + if (location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister)) { + return use_position; } } use = use->GetNext(); @@ -498,6 +506,10 @@ class LiveInterval : public ArenaObject { // slots for spilling. bool NeedsTwoSpillSlots() const; + bool IsFloatingPoint() const { + return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; + } + // Converts the location of the interval to a `Location` object. Location ToLocation() const; @@ -509,6 +521,9 @@ class LiveInterval : public ArenaObject { bool IsTemp() const { return is_temp_; } + // Returns whether `other` and `this` share the same kind of register. + bool SameRegisterKind(Location other) const; + private: ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index e02a182ec..4eda0f375 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -24,18 +24,13 @@ void SsaDeadPhiElimination::Run() { HBasicBlock* block = it.Current(); for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - if (phi->HasEnvironmentUses()) { - // TODO: Do we want to keep that phi alive? - worklist_.Add(phi); - phi->SetLive(); - continue; - } for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) { HUseListNode<HInstruction>* current = it.Current(); HInstruction* user = current->GetUser(); if (!user->IsPhi()) { worklist_.Add(phi); phi->SetLive(); + break; } else { phi->SetDead(); } @@ -76,6 +71,14 @@ void SsaDeadPhiElimination::Run() { current->RemoveUser(user, user_node->GetIndex()); } } + if (current->HasEnvironmentUses()) { + for (HUseIterator<HEnvironment> it(current->GetEnvUses()); !it.Done(); it.Advance()) { + HUseListNode<HEnvironment>* user_node = it.Current(); + HEnvironment* user = user_node->GetUser(); + user->SetRawEnvAt(user_node->GetIndex(), nullptr); + current->RemoveEnvironmentUser(user, user_node->GetIndex()); + } + } block->RemovePhi(current->AsPhi()); } current = next; diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc index a860cb7cf..3828142ed 100644 --- a/compiler/optimizing/ssa_type_propagation.cc +++ b/compiler/optimizing/ssa_type_propagation.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "ssa_builder.h" #include "ssa_type_propagation.h" #include "nodes.h" @@ -38,15 +39,31 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_ // Re-compute and update the type of the instruction. Returns // whether or not the type was changed. -static bool UpdateType(HPhi* phi) { +bool SsaTypePropagation::UpdateType(HPhi* phi) { Primitive::Type existing = phi->GetType(); - Primitive::Type new_type = Primitive::kPrimVoid; + Primitive::Type new_type = existing; for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { Primitive::Type input_type = phi->InputAt(i)->GetType(); new_type = MergeTypes(new_type, input_type); } phi->SetType(new_type); + + if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) { + // If the phi is of floating point type, we need to update its inputs to that + // type. For inputs that are phis, we need to recompute their types. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (input->GetType() != new_type) { + HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + AddToWorklist(equivalent->AsPhi()); + } + } + } + } + return existing != new_type; } @@ -63,7 +80,12 @@ void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = it.Current()->AsPhi(); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. - phi->SetType(phi->InputAt(0)->GetType()); + Primitive::Type phi_type = phi->GetType(); + // We merge with the existing type, that has been set by the SSA builder. + DCHECK(phi_type == Primitive::kPrimVoid + || phi_type == Primitive::kPrimFloat + || phi_type == Primitive::kPrimDouble); + phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType())); AddToWorklist(phi); } } else { diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h index 5f471a981..f4d3d6344 100644 --- a/compiler/optimizing/ssa_type_propagation.h +++ b/compiler/optimizing/ssa_type_propagation.h @@ -34,6 +34,7 @@ class SsaTypePropagation : public ValueObject { void ProcessWorklist(); void AddToWorklist(HPhi* phi); void AddDependentInstructionsToWorklist(HPhi* phi); + bool UpdateType(HPhi* phi); HGraph* const graph_; GrowableArray<HPhi*> worklist_; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 37429131e..5bfa462d7 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -373,7 +373,7 @@ class AssemblerTest : public testing::Test { } } else { // This will output the assembly. - EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical."; + EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical."; } } } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index db7151c3c..f4c9862f9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -317,7 +317,7 @@ void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) { EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0x28); - EmitXmmRegisterOperand(src.LowBits(), dst); + EmitXmmRegisterOperand(dst.LowBits(), src); } @@ -354,7 +354,7 @@ void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) { void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitOptionalRex32(dst, src); + EmitRex64(dst, src); EmitUint8(0x0F); EmitUint8(0x6E); EmitOperand(dst.LowBits(), Operand(src)); @@ -364,7 +364,7 @@ void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitOptionalRex32(src, dst); + EmitRex64(src, dst); EmitUint8(0x0F); EmitUint8(0x7E); EmitOperand(src.LowBits(), Operand(dst)); @@ -1748,6 +1748,10 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) { EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); } +void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) { + EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); +} + void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { uint8_t rex = 0x48 | operand.rex(); // REX.W000 if (dst.NeedsRex()) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 4ffb6b566..7e5859cc4 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -666,6 +666,7 @@ class X86_64Assembler FINAL : public Assembler { void EmitRex64(CpuRegister reg); void EmitRex64(CpuRegister dst, CpuRegister src); void EmitRex64(CpuRegister dst, const Operand& operand); + void EmitRex64(XmmRegister dst, CpuRegister src); // Emit a REX prefix to normalize byte registers plus necessary register bit encodings. void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); @@ -692,7 +693,7 @@ inline void X86_64Assembler::EmitInt64(int64_t value) { inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) { CHECK_GE(rm, 0); CHECK_LT(rm, 8); - buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg); + buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3)); } inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 69a5fa0db..37a09328f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -134,6 +134,32 @@ TEST_F(AssemblerX86_64Test, XorqImm) { DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi"); } +TEST_F(AssemblerX86_64Test, Movaps) { + GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::XmmRegister(x86_64::XMM8)); + DriverStr("movaps %xmm8, %xmm0", "movaps"); +} + +TEST_F(AssemblerX86_64Test, Movd) { + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::R11)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::R11)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM0)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM0)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM8)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM8)); + const char* expected = + "movd %r11, %xmm0\n" + "movd %rax, %xmm0\n" + "movd %r11, %xmm8\n" + "movd %rax, %xmm8\n" + "movd %xmm0, %r11\n" + "movd %xmm0, %rax\n" + "movd %xmm8, %r11\n" + "movd %xmm8, %rax\n"; + DriverStr(expected, "movd"); +} + TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11)); GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11)); |