diff options
Diffstat (limited to 'compiler/optimizing')
48 files changed, 2941 insertions, 572 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index ab77505b6f..be432c5a20 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -59,7 +59,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { return new (allocator) HGreaterThan(lhs, rhs); } else if (cond->IsGreaterThan()) { return new (allocator) HLessThanOrEqual(lhs, rhs); - } else if (cond->IsGreaterThanOrEqual()) { + } else { + DCHECK(cond->IsGreaterThanOrEqual()); return new (allocator) HLessThan(lhs, rhs); } } else if (cond->IsIntConstant()) { @@ -70,10 +71,11 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { DCHECK(int_const->IsOne()); return graph->GetIntConstant(0); } + } else { + // General case when 'cond' is another instruction of type boolean. + // Negate with 'cond == 0'. + return new (allocator) HEqual(cond, graph->GetIntConstant(0)); } - - // TODO: b/19992954 - return nullptr; } void HBooleanSimplifier::Run() { @@ -105,10 +107,6 @@ void HBooleanSimplifier::Run() { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { replacement = GetOppositeCondition(if_condition); - if (replacement == nullptr) { - // Something we could not handle. - continue; - } if (replacement->GetBlock() == nullptr) { block->InsertInstructionBefore(replacement, if_instruction); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 1d167949f4..6511120794 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -239,7 +239,6 @@ class ValueBound : public ValueObject { *underflow = true; return Min(); } - return ValueBound(instruction_, new_constant); } private: @@ -443,9 +442,31 @@ class MonotonicValueRange : public ValueRange { class BCEVisitor : public HGraphVisitor { public: + // The least number of bounds checks that should be eliminated by triggering + // the deoptimization technique. + static constexpr size_t kThresholdForAddingDeoptimize = 2; + + // Very large constant index is considered as an anomaly. This is a threshold + // beyond which we don't bother to apply the deoptimization technique since + // it's likely some AIOOBE will be thrown. + static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + explicit BCEVisitor(HGraph* graph) : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()) {} + maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + first_constant_index_bounds_check_map_.clear(); + HGraphVisitor::VisitBasicBlock(block); + if (need_to_revisit_block_) { + AddComparesWithDeoptimization(block); + need_to_revisit_block_ = false; + first_constant_index_bounds_check_map_.clear(); + GetValueRangeMap(block)->clear(); + HGraphVisitor::VisitBasicBlock(block); + } + } private: // Return the map of proven value ranges at the beginning of a basic block. @@ -701,9 +722,26 @@ class BCEVisitor : public HGraphVisitor { } } + if (first_constant_index_bounds_check_map_.find(array_length->GetId()) == + first_constant_index_bounds_check_map_.end()) { + // Remember the first bounds check against array_length of a constant index. + // That bounds check instruction has an associated HEnvironment where we + // may add an HDeoptimize to eliminate bounds checks of constant indices + // against array_length. + first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check); + } else { + // We've seen it at least twice. It's beneficial to introduce a compare with + // deoptimization fallback to eliminate the bounds checks. + need_to_revisit_block_ = true; + } + // Once we have an array access like 'array[5] = 1', we record array.length >= 6. // We currently don't do it for non-constant index since a valid array[i] can't prove // a valid array[i-1] yet due to the lower bound side. + if (constant == INT_MAX) { + // INT_MAX as an index will definitely throw AIOOBE. + return; + } ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) @@ -938,8 +976,90 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitDeoptimize(HDeoptimize* deoptimize) { + // Right now it's only HLessThanOrEqual. + DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual()); + HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual(); + HInstruction* instruction = less_than_or_equal->InputAt(0); + if (instruction->IsArrayLength()) { + HInstruction* constant = less_than_or_equal->InputAt(1); + DCHECK(constant->IsIntConstant()); + DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize); + ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max()); + GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range); + } + } + + void AddCompareWithDeoptimization(HInstruction* array_length, + HIntConstant* const_instr, + HBasicBlock* block) { + DCHECK(array_length->IsArrayLength()); + ValueRange* range = LookupValueRange(array_length, block); + ValueBound lower_bound = range->GetLower(); + DCHECK(lower_bound.IsConstant()); + DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize); + DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1); + + // If array_length is less than lower_const, deoptimize. + HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get( + array_length->GetId())->AsBoundsCheck(); + HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) + HDeoptimize(cond, bounds_check->GetDexPc()); + block->InsertInstructionBefore(cond, bounds_check); + block->InsertInstructionBefore(deoptimize, bounds_check); + deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); + } + + void AddComparesWithDeoptimization(HBasicBlock* block) { + for (ArenaSafeMap<int, HBoundsCheck*>::iterator it = + first_constant_index_bounds_check_map_.begin(); + it != first_constant_index_bounds_check_map_.end(); + ++it) { + HBoundsCheck* bounds_check = it->second; + HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength(); + HIntConstant* lower_bound_const_instr = nullptr; + int32_t lower_bound_const = INT_MIN; + size_t counter = 0; + // Count the constant indexing for which bounds checks haven't + // been removed yet. + for (HUseIterator<HInstruction*> it2(array_length->GetUses()); + !it2.Done(); + it2.Advance()) { + HInstruction* user = it2.Current()->GetUser(); + if (user->GetBlock() == block && + user->IsBoundsCheck() && + user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) { + DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1)); + HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant(); + if (const_instr->GetValue() > lower_bound_const) { + lower_bound_const = const_instr->GetValue(); + lower_bound_const_instr = const_instr; + } + counter++; + } + } + if (counter >= kThresholdForAddingDeoptimize && + lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) { + AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in + // a block that checks a constant index against that HArrayLength. + SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + + // For the block, there is at least one HArrayLength instruction for which there + // is more than one bounds check instruction with constant indexing. And it's + // beneficial to add a compare instruction that has deoptimization fallback and + // eliminate those bounds checks. + bool need_to_revisit_block_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index b3653fe903..75cf1cf063 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ASSERT_FALSE(IsRemoved(bounds_check)); } -// array[5] = 1; // Can't eliminate. -// array[4] = 1; // Can eliminate. // array[6] = 1; // Can't eliminate. +// array[5] = 1; // Can eliminate. +// array[4] = 1; // Can eliminate. TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); HArrayLength* array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check5 = new (&allocator) - HBoundsCheck(constant_5, array_length, 0); + HBoundsCheck* bounds_check6 = new (&allocator) + HBoundsCheck(constant_6, array_length, 0); HInstruction* array_set = new (&allocator) HArraySet( - null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check5); + block->AddInstruction(bounds_check6); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check4 = new (&allocator) - HBoundsCheck(constant_4, array_length, 0); + HBoundsCheck* bounds_check5 = new (&allocator) + HBoundsCheck(constant_5, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check4); + block->AddInstruction(bounds_check5); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check6 = new (&allocator) - HBoundsCheck(constant_6, array_length, 0); + HBoundsCheck* bounds_check4 = new (&allocator) + HBoundsCheck(constant_4, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check6); + block->AddInstruction(bounds_check4); block->AddInstruction(array_set); block->AddInstruction(new (&allocator) HGoto()); @@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check5)); - ASSERT_TRUE(IsRemoved(bounds_check4)); ASSERT_FALSE(IsRemoved(bounds_check6)); + ASSERT_TRUE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check4)); } // for (int i=initial; i<array.length; i+=increment) { array[i] = 10; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2cdd5af9f3..a912d4ccc4 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -23,6 +23,7 @@ #include "dex_instruction.h" #include "dex_instruction-inl.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "mirror/art_field.h" #include "mirror/art_field-inl.h" #include "mirror/class_loader.h" @@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) { } } -bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks ATTRIBUTE_UNUSED, +bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches) { const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter(); @@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, return false; } - if (compiler_options.IsHugeMethod(number_of_dex_instructions)) { + if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) { VLOG(compiler) << "Skip compilation of huge method " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod); return true; } // If it's large and contains no branches, it's likely to be machine generated initialization. - if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) { + if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_) + && (number_of_branches == 0)) { VLOG(compiler) << "Skip compilation of large method with no branch " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches); return true; } @@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Compute the number of dex instructions, blocks, and branches. We will // check these values against limits given to the compiler. - size_t number_of_dex_instructions = 0; - size_t number_of_blocks = 0; size_t number_of_branches = 0; // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets( - code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches); + ComputeBranchTargets(code_ptr, code_end, &number_of_branches); // Note that the compiler driver is null when unit testing. - if ((compiler_driver_ != nullptr) - && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) { + if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { return false; } @@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, - size_t* number_of_dex_instructions, - size_t* number_of_blocks, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // the locations these instructions branch to. uint32_t dex_pc = 0; while (code_ptr < code_end) { - (*number_of_dex_instructions)++; const Instruction& instruction = *Instruction::At(code_ptr); if (instruction.IsBranch()) { (*number_of_branches)++; @@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else if (instruction.IsSwitch()) { SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); @@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } // The next case gets its own block. if (i < num_entries) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(table.GetDexPcForIndex(i), block); - (*number_of_blocks)++; } } @@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else { code_ptr += instruction.SizeInCodeUnits(); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 6a0738a7b9..dc6d97eb0c 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject { // branches. void ComputeBranchTargets(const uint16_t* start, const uint16_t* end, - size_t* number_of_dex_instructions, - size_t* number_of_block, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); HBasicBlock* FindBlockStartingAt(int32_t index) const; @@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject { HInstruction* value, int32_t case_value_int, int32_t target_offset, uint32_t dex_pc); - bool SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks, - size_t number_of_branches); + bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches); void MaybeRecordStat(MethodCompilationStat compilation_stat); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bd6e943bf0..8736374306 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); + DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { HBasicBlock* block = block_order_->Get(current_block_index_); // Don't generate code for an empty block. Its predecessors will branch to its successor @@ -132,7 +133,6 @@ size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) { } LOG(FATAL) << "Could not find a register in baseline register allocator"; UNREACHABLE(); - return -1; } size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) { @@ -145,7 +145,6 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l } LOG(FATAL) << "Could not find a register in baseline register allocator"; UNREACHABLE(); - return -1; } void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, @@ -378,10 +377,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, case kMips: return nullptr; case kX86: { - return new x86::CodeGeneratorX86(graph, compiler_options); + return new x86::CodeGeneratorX86(graph, + *isa_features.AsX86InstructionSetFeatures(), + compiler_options); } case kX86_64: { - return new x86_64::CodeGeneratorX86_64(graph, compiler_options); + return new x86_64::CodeGeneratorX86_64(graph, + *isa_features.AsX86_64InstructionSetFeatures(), + compiler_options); } default: return nullptr; @@ -413,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const { +void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { + for (size_t i = 0; i < pc_infos_.Size(); i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + uint32_t pc2dex_offset = pc_info.native_pc; + int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); + } +} + +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; @@ -423,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* uint32_t dex2pc_offset = 0u; int32_t dex2pc_dalvik_offset = 0; - if (src_map != nullptr) { - src_map->reserve(pc2dex_entries); - } - for (size_t i = 0; i < pc2dex_entries; i++) { struct PcInfo pc_info = pc_infos_.Get(i); pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); pc2dex_offset = pc_info.native_pc; pc2dex_dalvik_offset = pc_info.dex_pc; - if (src_map != nullptr) { - src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); - } } // Walk over the blocks and find which ones correspond to catch block entries. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 07ca6b1ccf..b888aca264 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -205,7 +205,8 @@ class CodeGenerator { slow_paths_.Add(slow_path); } - void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; + void BuildSourceMap(DefaultSrcMap* src_map) const; + void BuildMappingTable(std::vector<uint8_t>* vector) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; @@ -425,6 +426,8 @@ class CodeGenerator { StackMapStream stack_map_stream_; + friend class OptimizingCFITest; + DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1f95041a92..a799a519c0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM); }; +class DeoptimizationSlowPathARM : public SlowPathCodeARM { + public: + explicit DeoptimizationSlowPathARM(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); +}; + #undef __ #undef __ @@ -493,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); @@ -511,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() { // PC is in the list of callee-save to mimic Quick, but we need to push // LR at entry instead. - __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR); + uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR; + __ PushList(push_mask); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask)); + __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpushs(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize); } - __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, -adjust); + __ cfi().AdjustCFAOffset(adjust); __ StoreToOffset(kStoreWord, R0, SP, 0); } @@ -525,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() { __ bx(LR); return; } - __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, adjust); + __ cfi().AdjustCFAOffset(-adjust); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpops(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_); } __ PopList(core_spill_mask_); } @@ -542,7 +581,6 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -555,10 +593,11 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -663,7 +702,6 @@ Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type ty return Location(); } UNREACHABLE(); - return Location(); } void CodeGeneratorARM::Move32(Location destination, Location source) { @@ -887,24 +925,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ b(always_true_target); } return; } else { @@ -913,10 +944,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { } else { if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { // Condition has been materialized, compare the output to 0 - DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); - __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(), + DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); + __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); + __ b(true_target, NE); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. @@ -938,16 +969,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { __ cmp(left, ShifterOperand(temp)); } } - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), - ARMCondition(cond->AsCondition()->GetCondition())); + __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ b(false_target); + } +} + +void LocationsBuilderARM::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); } } +void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} void LocationsBuilderARM::VisitCondition(HCondition* comp) { LocationSummary* locations = @@ -1139,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitReturn(HReturn* ret) { @@ -1150,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) { void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index bcdea7a639..06f425ea21 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 32ada3837e..5fe8adc86a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); }; +class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -445,18 +465,65 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // ... : reserved frame space. // sp[0] : current method. __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + GetAssembler()->cfi().AdjustCFAOffset(frame_size); + SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); } } void CodeGeneratorARM64::GenerateFrameExit() { if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); __ Drop(frame_size); + GetAssembler()->cfi().AdjustCFAOffset(-frame_size); + } +} + +static inline dwarf::Reg DWARFReg(CPURegister reg) { + if (reg.IsFPRegister()) { + return dwarf::Reg::Arm64Fp(reg.code()); + } else { + DCHECK_LT(reg.code(), 31u); // X0 - X30. + return dwarf::Reg::Arm64Core(reg.code()); + } +} + +void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size); + offset += 2 * size; } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Str(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + } + DCHECK(registers.IsEmpty()); +} + +void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + GetAssembler()->cfi().Restore(DWARFReg(dst1)); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Ldr(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + } + DCHECK(registers.IsEmpty()); } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -1611,25 +1678,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { } } -void LocationsBuilderARM64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); HCondition* condition = cond->AsCondition(); - vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); if (cond->IsIntConstant()) { int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ B(true_target); + if (always_true_target != nullptr) { + __ B(always_true_target); } return; } else { @@ -1637,31 +1697,87 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = if_instr->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(0); DCHECK(cond_val.IsRegister()); - __ Cbnz(InputRegisterAt(if_instr, 0), true_target); + __ Cbnz(InputRegisterAt(instruction, 0), true_target); } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); Condition arm64_cond = ARM64Condition(condition->GetCondition()); - if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) { - if (arm64_cond == eq) { - __ Cbz(lhs, true_target); - } else { - __ Cbnz(lhs, true_target); + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + switch (arm64_cond) { + case eq: + __ Cbz(lhs, true_target); + break; + case ne: + __ Cbnz(lhs, true_target); + break; + case lt: + // Test the sign bit and branch accordingly. + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + case ge: + // Test the sign bit and branch accordingly. + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + default: + // Without the `static_cast` the compiler throws an error for + // `-Werror=sign-promo`. + LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); } } else { __ Cmp(lhs, rhs); __ B(arm64_cond, true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + if (false_target != nullptr) { __ B(false_target); } } +void LocationsBuilderARM64::VisitIf(HIf* if_instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { + vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + vixl::Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM64(deoptimize); + codegen_->AddSlowPath(slow_path); + vixl::Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2349,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { @@ -2359,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitShl(HShl* shl) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2c624d2926..9430e31037 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -23,8 +23,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" #include "arch/arm64/quick_method_frame_info_arm64.h" namespace art { @@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -223,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; + void SpillRegisters(vixl::CPURegList registers, int offset); + void UnspillRegisters(vixl::CPURegList registers, int offset); vixl::CPURegList GetFramePreservedCoreRegisters() const { return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 007e25ab4a..a6fb07fa98 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -52,7 +52,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -67,7 +67,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -116,7 +116,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -137,7 +137,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -295,6 +295,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86); }; +class DeoptimizationSlowPathX86 : public SlowPathCodeX86 { + public: + explicit DeoptimizationSlowPathX86(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize))); + // No need to restore live registers. + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); +}; + #undef __ #define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> @@ -340,7 +361,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 return GetFloatingPointSpillSlotSize(); } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -353,7 +376,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -436,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); +} + void CodeGeneratorX86::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -455,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } @@ -467,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() { return; } - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ addl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popl(reg); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -491,7 +528,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -504,10 +540,11 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -785,24 +822,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -815,20 +845,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -847,14 +876,54 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -1047,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitReturn(HReturn* ret) { @@ -1106,12 +1178,15 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -2637,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. + locations->SetInAt(0, Location::Any()); + // The shift count needs to be in CL or a constant. locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2665,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - Register first_reg = first.AsRegister<Register>(); - if (second.IsRegister()) { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - __ shll(first_reg, second_reg); - } else if (op->IsShr()) { - __ sarl(first_reg, second_reg); + if (first.IsRegister()) { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } } else { - __ shrl(first_reg, second_reg); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (op->IsShl()) { - __ shll(first_reg, imm); - } else if (op->IsShr()) { - __ sarl(first_reg, imm); + DCHECK(first.IsStackSlot()) << first; + Address addr(ESP, first.GetStackIndex()); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(addr, second_reg); + } else if (op->IsShr()) { + __ sarl(addr, second_reg); + } else { + __ shrl(addr, second_reg); + } } else { - __ shrl(first_reg, imm); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(addr, imm); + } else if (op->IsShr()) { + __ sarl(addr, imm); + } else { + __ shrl(addr, imm); + } } } + break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2705,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), loc.ToHigh(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow()); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2716,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ shrl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2727,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), loc.ToLow(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh()); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3301,7 +3486,7 @@ void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruct Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.AsRegister<Register>(), Immediate(0)); + __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>()); } else if (obj.IsStackSlot()) { __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); } else { @@ -3487,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + bool is_fp_type = (value_type == Primitive::kPrimFloat) + || (value_type == Primitive::kPrimDouble); + if (is_fp_type) { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + } } // Temporary registers for the write barrier. if (needs_write_barrier) { @@ -3766,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { } void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack. + __ pushl(Address(ESP, src)); + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + } } void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack instead. + // Push src low word. + __ pushl(Address(ESP, src)); + // Push src high word. Stack offset = 4. + __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */)); + + // Pop into dst high word. Stack offset = 8. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */)); + // Finally dst low word. Stack offset = 4. + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + __ movl(temp_reg, Address(ESP, src + kX86WordSize)); + __ movl(Address(ESP, dst + kX86WordSize), temp_reg); + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -3847,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ xorps(dest, dest); } else { ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp = static_cast<Register>(ensure_scratch.GetRegister()); - __ movl(temp, Immediate(value)); - __ movd(dest, temp); + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Avoid spilling/restoring a scratch register by using the stack. + __ pushl(Immediate(value)); + __ movss(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(4)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); + } } } else { DCHECK(destination.IsStackSlot()) << destination; @@ -3899,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } -void ParallelMoveResolverX86::Exchange(Register reg, int mem) { - Register suggested_scratch = reg == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch( - this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); +void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + ScratchRegisterScope possible_scratch( + this, reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister || temp_reg == reg2) { + __ pushl(reg1); + __ movl(reg1, reg2); + __ popl(reg2); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, reg1); + __ movl(reg1, reg2); + __ movl(reg2, temp); + } +} - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); - __ movl(Address(ESP, mem + stack_offset), reg); - __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); +void ParallelMoveResolverX86::Exchange(Register reg, int mem) { + ScratchRegisterScope possible_scratch( + this, reg, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movl(Address(ESP, mem + kX86WordSize), reg); + __ popl(reg); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movl(Address(ESP, mem), reg); + __ movl(reg, temp); + } } void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, mem + stack_offset)); - __ movss(Address(ESP, mem + stack_offset), reg); - __ movd(reg, temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movss(Address(ESP, mem + kX86WordSize), reg); + __ movss(reg, Address(ESP, 0)); + __ addl(ESP, Immediate(kX86WordSize)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movss(Address(ESP, mem), reg); + __ movd(reg, temp); + } } void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch1( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch2( - this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; - stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + ScratchRegisterScope possible_scratch1( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg1 = possible_scratch1.GetRegister(); + if (temp_reg1 == kNoRegister) { + // No free registers. Use the stack. + __ pushl(Address(ESP, mem1)); + __ pushl(Address(ESP, mem2 + kX86WordSize)); + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem1 + kX86WordSize)); + __ popl(Address(ESP, mem2)); + } else { + // Got the first one. Try for a second. + ScratchRegisterScope possible_scratch2( + this, temp_reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg2 = possible_scratch2.GetRegister(); + if (temp_reg2 == kNoRegister) { + Register temp = static_cast<Register>(temp_reg1); + // Bummer. Only have one free register to use. + // Save mem1 on the stack. + __ pushl(Address(ESP, mem1)); + + // Copy mem2 into mem1. + __ movl(temp, Address(ESP, mem2 + kX86WordSize)); + __ movl(Address(ESP, mem1 + kX86WordSize), temp); + + // Now pop mem1 into mem2. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem2)); + } else { + // Great. We have 2 registers to play with. + Register temp1 = static_cast<Register>(temp_reg1); + Register temp2 = static_cast<Register>(temp_reg2); + DCHECK_NE(temp1, temp2); + __ movl(temp1, Address(ESP, mem1)); + __ movl(temp2, Address(ESP, mem2)); + __ movl(Address(ESP, mem2), temp1); + __ movl(Address(ESP, mem1), temp2); + } + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -3943,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index a5489d2844..8c56e35329 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver { X86Assembler* GetAssembler() const; private: + void Exchange(Register reg1, Register Reg2); void Exchange(Register reg, int mem); void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); @@ -171,6 +172,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -179,6 +183,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -188,7 +196,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -274,6 +284,10 @@ class CodeGeneratorX86 : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -282,6 +296,7 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; + const X86InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); }; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2bb0349932..01b24ea33f 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -315,6 +315,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); }; +class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ gs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true)); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); +}; + #undef __ #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> @@ -391,7 +412,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); -CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -405,7 +428,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features), + constant_area_start_(0) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -458,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); +} +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); +} + void CodeGeneratorX86_64::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -478,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)), - XmmRegister(kFpuCalleeSaves[i])); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); } } @@ -503,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() { size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(XmmRegister(kFpuCalleeSaves[i]), - Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i))); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); + __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); } } - __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ addq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -532,7 +575,6 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -545,10 +587,11 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } void CodeGeneratorX86_64::Move(Location destination, Location source) { @@ -607,7 +650,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int64_t value = constant->AsLongConstant()->GetValue(); + int64_t value; if (constant->IsDoubleConstant()) { value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); } else { @@ -735,24 +778,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -765,21 +801,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -797,16 +832,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); } } +void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86_64(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderX86_64::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1068,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { @@ -1120,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -1181,7 +1262,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -1242,7 +1323,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -1896,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -1960,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ addss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ addsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -1993,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2031,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ subss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ subsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2069,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2114,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: { DCHECK(first.Equals(locations->Out())); - __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ mulss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(locations->Out())); - __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2493,7 +2628,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2518,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ divss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ divsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -3668,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + this, TMP, codegen_->GetNumberOfCoreRegisters()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Use the stack as a temporary. + // Save mem1 on the stack. + __ pushq(Address(CpuRegister(RSP), mem1)); + + // Copy mem2 into mem1. + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize)); + __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP)); + + // Now pop mem1 into mem2. + __ popq(Address(CpuRegister(RSP), mem2)); + } else { + CpuRegister temp = CpuRegister(temp_reg); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1)); + __ movq(temp, Address(CpuRegister(RSP), mem2)); + __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1), temp); + } } void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { @@ -3685,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + __ movq(CpuRegister(TMP), reg1); + __ movq(reg1, reg2); + __ movq(reg2, CpuRegister(TMP)); +} + void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movsd(Address(CpuRegister(RSP), mem), reg); @@ -3697,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); + Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { @@ -4062,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + X86_64Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. If used for vectors at a later time, this will need to be + // updated to 16 bytes with the appropriate offset. + assembler->Align(4, 0); + constant_area_start_ = assembler->CodeSize(); + assembler->AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { + public: + RIPFixup(const CodeGeneratorX86_64& codegen, int offset) + : codegen_(codegen), offset_into_constant_area_(offset) {} + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; + int relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + const CodeGeneratorX86_64& codegen_; + + // Location in constant area that the fixup refers to. + int offset_into_constant_area_; +}; + +Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + return Address::RIP(fixup); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index f6fbc2e6bc..61bf6ac71d 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); + void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); @@ -185,6 +186,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -194,7 +199,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { class CodeGeneratorX86_64 : public CodeGenerator { public: - CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86_64() {} void GenerateFrameEntry() OVERRIDE; @@ -240,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kX86_64; @@ -267,6 +275,19 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + + int ConstantAreaStart() const { + return constant_area_start_; + } + + Address LiteralDoubleAddress(double v); + Address LiteralFloatAddress(float v); + Address LiteralInt32Address(int32_t v); + Address LiteralInt64Address(int64_t v); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -275,6 +296,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; + const X86_64InstructionSetFeatures& isa_features_; + + // Offset to the start of the constant area in the assembled code. + // Used for fixups to the constant area. + int constant_area_start_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6053ad51f4..2be117bf38 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -19,6 +19,8 @@ #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { InternalCodeAllocator allocator; CompilerOptions compiler_options; - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); @@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenARM, has_result, expected); } - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); codegenX86_64.CompileBaseline(&allocator, true); if (kRuntimeISA == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); @@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph, compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index fd8c0c6242..966165bf4c 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -20,8 +20,8 @@ #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" namespace art { namespace arm64 { diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 6853d54c48..02ad675dc3 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,6 +16,7 @@ #include <functional> +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HConstantFolding(graph).Run(); SSAChecker ssa_checker_cf(&allocator, graph); ssa_checker_cf.Run(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index a644719622..98ae1ec5d3 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 49c0d3884f..4c283788b5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -337,13 +337,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HGraphVisualizer::HGraphVisualizer(std::ostream* output, HGraph* graph, - const CodeGenerator& codegen, - const char* method_name) - : output_(output), graph_(graph), codegen_(codegen) { - if (output == nullptr) { - return; - } + const CodeGenerator& codegen) + : output_(output), graph_(graph), codegen_(codegen) {} +void HGraphVisualizer::PrintHeader(const char* method_name) const { + DCHECK(output_ != nullptr); HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index bc553aed74..513bceb369 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -35,9 +35,9 @@ class HGraphVisualizer : public ValueObject { public: HGraphVisualizer(std::ostream* output, HGraph* graph, - const CodeGenerator& codegen, - const char* method_name); + const CodeGenerator& codegen); + void PrintHeader(const char* method_name) const; void DumpGraph(const char* pass_name, bool is_after_pass = true) const; private: diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 628a844cc7..20aa45f197 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -90,7 +90,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; case kIntrinsicReverseBytes: switch (GetType(method.d.data, true)) { case Primitive::kPrimShort: @@ -103,7 +102,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // Abs. case kIntrinsicAbsDouble: @@ -166,7 +164,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // Memory.poke. case kIntrinsicPoke: @@ -183,7 +180,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // String. case kIntrinsicCharAt: @@ -211,7 +207,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; case kIntrinsicUnsafeGet: { const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); switch (GetType(method.d.data, false)) { @@ -225,7 +220,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; } case kIntrinsicUnsafePut: { enum Sync { kNoSync, kVolatile, kOrdered }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 33176f009c..94e27e912e 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -776,10 +776,10 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ mov(out, ShifterOperand(0), CC); } -void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) { +void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } -void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) { +void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 72d303c870..d1176c460f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,8 +28,8 @@ #include "utils/arm64/assembler_arm64.h" #include "utils/arm64/constants_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" using namespace vixl; // NOLINT(build/namespaces) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 384737f55a..aec2d19b1d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86.h" +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000; static constexpr int kDoubleNaNLow = 0x00000000; static constexpr int kFloatNaN = 0x7FC00000; +IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); } @@ -152,6 +158,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); + RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; UNREACHABLE(); @@ -313,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); } +void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { + CreateLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + X86Assembler* assembler = GetAssembler(); + // Assign the inputs to the outputs, mixing low/high. + __ movl(output_lo, input_hi); + __ movl(output_hi, input_lo); + __ bswapl(output_lo); + __ bswapl(output_hi); +} + void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -719,6 +747,149 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, + HInvoke* invoke, + X86Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, +// as it needs 64 bit instructions. +void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { + // Do we have instruction support? + if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(EAX)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + Register out = locations->Out().AsRegister<Register>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1180,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + // Offset is a long, but in 32 bit mode, we only need the low word. + // Can we update the invoke here to remove a TypeConvert to Long? + locations->SetInAt(2, Location::RequiresRegister()); + // Expected value must be in EAX or EDX:EAX. + // For long, new value must be in ECX:EBX. + if (type == Primitive::kPrimLong) { + locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); + locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); + } else { + locations->SetInAt(3, Location::RegisterLocation(EAX)); + locations->SetInAt(4, Location::RequiresRegister()); + } + + // Force a byte register for the output. + locations->SetOut(Location::RegisterLocation(EAX)); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + // Need a byte register for marking. + locations->AddTemp(Location::RegisterLocation(ECX)); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location out = locations->Out(); + DCHECK_EQ(out.AsRegister<Register>(), EAX); + + if (type == Primitive::kPrimLong) { + DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); + DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); + __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + } else { + // Integer or object. + DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); + Register value = locations->InAt(4).AsRegister<Register>(); + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setb(kZero, out.AsRegister<Register>()); + __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, + X86Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // We want to swap high/low, then bswap each one, and then do the same + // as a 32 bit reverse. + // Exchange high and low. + __ movl(temp, reg_low); + __ movl(reg_low, reg_high); + __ movl(reg_high, temp); + + // bit-reverse low + __ bswapl(reg_low); + SwapBits(reg_low, temp, 1, 0x55555555, assembler); + SwapBits(reg_low, temp, 2, 0x33333333, assembler); + SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); + + // bit-reverse high + __ bswapl(reg_high); + SwapBits(reg_high, temp, 1, 0x55555555, assembler); + SwapBits(reg_high, temp, 2, 0x33333333, assembler); + SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1188,20 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86 diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index e1e8260a5f..4292ec7b99 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -32,7 +32,7 @@ class X86Assembler; class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 736cea88cb..cbf94f0f81 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86_64.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -30,6 +31,11 @@ namespace art { namespace x86_64 { +IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); } @@ -292,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) // TODO: Allow x86 to work with memory. This requires assembler support, see below. // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. - locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. + locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, + bool is64bit, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (output.IsFpuRegister()) { // In-register - XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Can mask directly with constant area using pand if we can guarantee + // that the literal is aligned on a 16 byte boundary. This will avoid a + // temporary. if (is64bit) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } else { @@ -335,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -343,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -393,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86_64Assembler* assembler) { +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -421,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // // This removes one jmp, but needs to copy one input (op1) to out. // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); @@ -455,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. if (is_double) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); } - __ movd(out, cpu_temp, is_double); __ jmp(&done); // out := op2; @@ -477,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, __ Bind(&done); } -static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -486,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, @@ -614,6 +621,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, + HInvoke* invoke, + X86_64Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(RAX)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out, false); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movq(out, Immediate(bit_cast<int64_t, double>(0.5))); + __ movd(inPlusPointFive, out, true); + + // Add in the input. + __ addsd(inPlusPointFive, in); + + // And truncate to an integer. + __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movq(out, Immediate(kPrimLongMax)); + // maxLong = long-to-double(out) + __ cvtsi2sd(maxLong, out, true); + + // if inPlusPointFive >= maxLong goto done + __ comisd(inPlusPointFive, maxLong); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = double-to-long-truncate(input) + __ cvttsd2si(out, inPlusPointFive, true); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorq(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -999,6 +1203,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // expected value must be in EAX/RAX. + locations->SetInAt(3, Location::RegisterLocation(RAX)); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); + DCHECK_EQ(expected.AsRegister(), RAX); + CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); + } else { + // Integer or object. + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setcc(kZero, out); + __ movzxb(out, out); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, + X86_64Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, + int32_t shift, int64_t mask, X86_64Assembler* assembler) { + Immediate imm_shift(shift); + __ movq(temp_mask, Immediate(mask)); + __ movq(temp, reg); + __ shrq(reg, imm_shift); + __ andq(temp, temp_mask); + __ andq(reg, temp_mask); + __ shlq(temp, imm_shift); + __ orq(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a long number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; + * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; + * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; + */ + __ bswapq(reg); + SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); + SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); + SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1007,19 +1380,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) -UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86_64 diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index dfae7fa90e..0e0e72c1fc 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -32,7 +32,7 @@ class X86_64Assembler; class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86_64* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index f22b7a7e82..28c5555d57 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,6 +16,7 @@ #include <fstream> +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index c102c4f02f..61d6593f2b 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); RemoveSuspendChecks(graph); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 0b0cfde0cf..81250ca133 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index dca612e6b7..d8a8554610 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const { } } -bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { - return this == if_->GetPreviousDisregardingMoves(); +bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { + return this == instruction->GetPreviousDisregardingMoves(); } bool HInstruction::Equals(HInstruction* other) const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 21ed3504f1..f764eb421f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ M(Condition, BinaryOperation) \ + M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ M(DivZeroCheck, Instruction) \ M(DoubleConstant, Constant) \ @@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool HasEnvironment() const { return environment_ != nullptr; } HEnvironment* GetEnvironment() const { return environment_; } - void SetEnvironment(HEnvironment* environment) { environment_ = environment; } + // Set the `environment_` field. Raw because this method does not + // update the uses lists. + void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; } + + // Set the environment of this instruction, copying it from `environment`. While + // copying, the uses lists are being updated. + void CopyEnvironmentFrom(HEnvironment* environment) { + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_->CopyFrom(environment); + } // Returns the number of entries in the environment. Typically, that is the // number of dex registers in a method. It could be more in case of inlining. @@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); - virtual bool IsIfInstruction() const { return true; } - private: DISALLOW_COPY_AND_ASSIGN(HIf); }; +// Deoptimize to interpreter, upon checking a condition. +class HDeoptimize : public HTemplateInstruction<1> { + public: + HDeoptimize(HInstruction* cond, uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None()), + dex_pc_(dex_pc) { + SetRawInputAt(0, cond); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(Deoptimize); + + private: + uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HDeoptimize); +}; + class HUnaryOperation : public HExpression<1> { public: HUnaryOperation(Primitive::Type result_type, HInstruction* input) @@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation { void ClearNeedsMaterialization() { needs_materialization_ = false; } // For code generation purposes, returns whether this instruction is just before - // `if_`, and disregard moves in between. - bool IsBeforeWhenDisregardMoves(HIf* if_) const; + // `instruction`, and disregard moves in between. + bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; DECLARE_INSTRUCTION(Condition); @@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } + // May throw NegativeArraySizeException, OutOfMemoryError, etc. + bool CanThrow() const OVERRIDE { return true; } + bool CanBeNull() const OVERRIDE { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4cf22d3b2e..4e83ce576c 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) { exit_block->AddInstruction(new (&allocator) HExit()); HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); - null_check->SetEnvironment(environment); + null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc new file mode 100644 index 0000000000..6d986ba7d3 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <vector> + +#include "arch/instruction_set.h" +#include "cfi_test.h" +#include "gtest/gtest.h" +#include "optimizing/code_generator.h" +#include "utils/assembler.h" + +#include "optimizing/optimizing_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class OptimizingCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple context. + ArenaPool pool; + ArenaAllocator allocator(&pool); + CompilerOptions opts; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + HGraph graph(&allocator); + // Generate simple frame with some spills. + std::unique_ptr<CodeGenerator> code_gen( + CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + const int frame_size = 64; + int core_reg = 0; + int fp_reg = 0; + for (int i = 0; i < 2; i++) { // Two registers of each kind. + for (; core_reg < 32; core_reg++) { + if (code_gen->IsCoreCalleeSaveRegister(core_reg)) { + auto location = Location::RegisterLocation(core_reg); + code_gen->AddAllocatedRegister(location); + core_reg++; + break; + } + } + for (; fp_reg < 32; fp_reg++) { + if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) { + auto location = Location::FpuRegisterLocation(fp_reg); + code_gen->AddAllocatedRegister(location); + fp_reg++; + break; + } + } + } + code_gen->ComputeSpillMask(); + code_gen->SetFrameSize(frame_size); + code_gen->GenerateFrameEntry(); + code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid()); + // Get the outputs. + InternalCodeAllocator code_allocator; + code_gen->Finalize(&code_allocator); + const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory(); + Assembler* opt_asm = code_gen->GetAssembler(); + const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } + + private: + class InternalCodeAllocator : public CodeAllocator { + public: + InternalCodeAllocator() {} + + virtual uint8_t* Allocate(size_t size) { + memory_.resize(size); + return memory_.data(); + } + + const std::vector<uint8_t>& GetMemory() { return memory_; } + + private: + std::vector<uint8_t> memory_; + + DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); + }; +}; + +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc new file mode 100644 index 0000000000..2125f6eb01 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -0,0 +1,141 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: .cfi_restore_state +// 0x00000012: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9, + 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, + 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: str w0, [sp, #-64]! +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp x19, x20, [sp, #40] +// 0x00000008: .cfi_offset: r19 at cfa-24 +// 0x00000008: .cfi_offset: r20 at cfa-16 +// 0x00000008: str lr, [sp, #56] +// 0x0000000c: .cfi_offset: r30 at cfa-8 +// 0x0000000c: stp d8, d9, [sp, #24] +// 0x00000010: .cfi_offset_extended: r72 at cfa-40 +// 0x00000010: .cfi_offset_extended: r73 at cfa-32 +// 0x00000010: .cfi_remember_state +// 0x00000010: ldp d8, d9, [sp, #24] +// 0x00000014: .cfi_restore_extended: r72 +// 0x00000014: .cfi_restore_extended: r73 +// 0x00000014: ldp x19, x20, [sp, #40] +// 0x00000018: .cfi_restore: r19 +// 0x00000018: .cfi_restore: r20 +// 0x00000018: ldr lr, [sp, #56] +// 0x0000001c: .cfi_restore: r30 +// 0x0000001c: add sp, sp, #0x40 (64) +// 0x00000020: .cfi_def_cfa_offset: 0 +// 0x00000020: ret +// 0x00000024: .cfi_restore_state +// 0x00000024: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D, + 0x5E, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E, + 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E, + 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push esi +// 0x00000001: .cfi_def_cfa_offset: 8 +// 0x00000001: .cfi_offset: r6 at cfa-8 +// 0x00000001: push ebp +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: sub esp, 52 +// 0x00000005: .cfi_def_cfa_offset: 64 +// 0x00000005: mov [esp], eax +// 0x00000008: .cfi_remember_state +// 0x00000008: add esp, 52 +// 0x0000000b: .cfi_def_cfa_offset: 12 +// 0x0000000b: pop ebp +// 0x0000000c: .cfi_def_cfa_offset: 8 +// 0x0000000c: .cfi_restore: r5 +// 0x0000000c: pop esi +// 0x0000000d: .cfi_def_cfa_offset: 4 +// 0x0000000d: .cfi_restore: r6 +// 0x0000000d: ret +// 0x0000000e: .cfi_restore_state +// 0x0000000e: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, + 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2, + 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, + 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E, + 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6, + 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push rbp +// 0x00000001: .cfi_def_cfa_offset: 16 +// 0x00000001: .cfi_offset: r6 at cfa-16 +// 0x00000001: push rbx +// 0x00000002: .cfi_def_cfa_offset: 24 +// 0x00000002: .cfi_offset: r3 at cfa-24 +// 0x00000002: subq rsp, 40 +// 0x00000006: .cfi_def_cfa_offset: 64 +// 0x00000006: movsd [rsp + 32], xmm13 +// 0x0000000d: .cfi_offset: r30 at cfa-32 +// 0x0000000d: movsd [rsp + 24], xmm12 +// 0x00000014: .cfi_offset: r29 at cfa-40 +// 0x00000014: mov [rsp], edi +// 0x00000017: .cfi_remember_state +// 0x00000017: movsd xmm12, [rsp + 24] +// 0x0000001e: .cfi_restore: r29 +// 0x0000001e: movsd xmm13, [rsp + 32] +// 0x00000025: .cfi_restore: r30 +// 0x00000025: addq rsp, 40 +// 0x00000029: .cfi_def_cfa_offset: 24 +// 0x00000029: pop rbx +// 0x0000002a: .cfi_def_cfa_offset: 16 +// 0x0000002a: .cfi_restore: r3 +// 0x0000002a: pop rbp +// 0x0000002b: .cfi_def_cfa_offset: 8 +// 0x0000002b: .cfi_restore: r6 +// 0x0000002b: ret +// 0x0000002c: .cfi_restore_state +// 0x0000002c: .cfi_def_cfa_offset: 64 + diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b2f9c65153..0e02212867 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -26,11 +26,13 @@ #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" +#include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" #include "graph_visualizer.h" @@ -48,6 +50,7 @@ #include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" +#include "utils/assembler.h" #include "reference_type_propagation.h" namespace art { @@ -94,10 +97,13 @@ class PassInfoPrinter : public ValueObject { timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(method_name, true, true), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), - visualizer_(visualizer_output, graph, codegen, method_name_) { + visualizer_(visualizer_output, graph, codegen) { if (strstr(method_name, kStringFilter) == nullptr) { timing_logger_enabled_ = visualizer_enabled_ = false; } + if (visualizer_enabled_) { + visualizer_.PrintHeader(method_name_); + } } ~PassInfoPrinter() { @@ -199,8 +205,13 @@ class OptimizingCompiler FINAL : public Compiler { const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } } void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; @@ -360,6 +371,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } +// TODO: The function below uses too much stack space. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wframe-larger-than=" CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, @@ -385,12 +399,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeVectorAllocator allocator; codegen->CompileOptimized(&allocator); + DefaultSrcMap src_mapping_table; + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } + std::vector<uint8_t> stack_map; codegen->BuildStackMaps(&stack_map); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); - return CompiledMethod::SwapAllocCompiledMethodStackMap( + return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), ArrayRef<const uint8_t>(allocator.GetMemory()), @@ -400,9 +419,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const uint8_t>(stack_map)); + &src_mapping_table, + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } +#pragma GCC diagnostic pop CompiledMethod* OptimizingCompiler::CompileBaseline( CodeGenerator* codegen, @@ -412,9 +437,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( codegen->CompileBaseline(&allocator); std::vector<uint8_t> mapping_table; + codegen->BuildMappingTable(&mapping_table); DefaultSrcMap src_mapping_table; - bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); - codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; @@ -435,7 +462,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), - ArrayRef<const uint8_t>()); + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, @@ -501,6 +529,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } + codegen->GetAssembler()->cfi().SetEnabled( + compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()); PassInfoPrinter pass_info_printer(graph, method_name.c_str(), diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 7d0641ec13..4936685367 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include <iostream> #include "parallel_move_resolver.h" #include "nodes.h" @@ -63,39 +64,42 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { } } +Location LowOf(Location location) { + if (location.IsRegisterPair()) { + return Location::RegisterLocation(location.low()); + } else if (location.IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(location.low()); + } else if (location.IsDoubleStackSlot()) { + return Location::StackSlot(location.GetStackIndex()); + } else { + return Location::NoLocation(); + } +} + +Location HighOf(Location location) { + if (location.IsRegisterPair()) { + return Location::RegisterLocation(location.high()); + } else if (location.IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(location.high()); + } else if (location.IsDoubleStackSlot()) { + return Location::StackSlot(location.GetHighStackIndex(4)); + } else { + return Location::NoLocation(); + } +} + // Update the source of `move`, knowing that `updated_location` has been swapped // with `new_source`. Note that `updated_location` can be a pair, therefore if // `move` is non-pair, we need to extract which register to use. static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) { Location source = move->GetSource(); - if (new_source.GetKind() == source.GetKind()) { - DCHECK(updated_location.Equals(source)); - move->SetSource(new_source); - } else if (new_source.IsStackSlot() - || new_source.IsDoubleStackSlot() - || source.IsStackSlot() - || source.IsDoubleStackSlot()) { - // Stack slots never take part of a pair/non-pair swap. - DCHECK(updated_location.Equals(source)); + if (LowOf(updated_location).Equals(source)) { + move->SetSource(LowOf(new_source)); + } else if (HighOf(updated_location).Equals(source)) { + move->SetSource(HighOf(new_source)); + } else { + DCHECK(updated_location.Equals(source)) << updated_location << " " << source; move->SetSource(new_source); - } else if (source.IsRegister()) { - DCHECK(new_source.IsRegisterPair()) << new_source; - DCHECK(updated_location.IsRegisterPair()) << updated_location; - if (updated_location.low() == source.reg()) { - move->SetSource(Location::RegisterLocation(new_source.low())); - } else { - DCHECK_EQ(updated_location.high(), source.reg()); - move->SetSource(Location::RegisterLocation(new_source.high())); - } - } else if (source.IsFpuRegister()) { - DCHECK(new_source.IsFpuRegisterPair()) << new_source; - DCHECK(updated_location.IsFpuRegisterPair()) << updated_location; - if (updated_location.low() == source.reg()) { - move->SetSource(Location::FpuRegisterLocation(new_source.low())); - } else { - DCHECK_EQ(updated_location.high(), source.reg()); - move->SetSource(Location::FpuRegisterLocation(new_source.high())); - } } } @@ -265,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } +int ParallelMoveResolver::AllocateScratchRegister(int blocked, + int register_count) { + int scratch = -1; + for (int reg = 0; reg < register_count; ++reg) { + if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) { + scratch = reg; + break; + } + } + + return scratch; +} + + ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), @@ -278,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } +ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolver* resolver, int blocked, int number_of_registers) + : resolver_(resolver), + reg_(kNoRegister), + spilled_(false) { + // We don't want to spill a register if none are free. + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers); +} + + ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 3fa1b37afd..173cffc71e 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject { protected: class ScratchRegisterScope : public ValueObject { public: + // Spill a scratch register if no regs are free. ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers); + // Grab a scratch register only if available. + ScratchRegisterScope(ParallelMoveResolver* resolver, + int blocked, + int number_of_registers); ~ScratchRegisterScope(); int GetRegister() const { return reg_; } @@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject { // Allocate a scratch register for performing a move. The method will try to use // a register that is the destination of a move, but that move has not been emitted yet. int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); + // As above, but return -1 if no free register. + int AllocateScratchRegister(int blocked, int register_count); // Emit a move. virtual void EmitMove(size_t index) = 0; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 817a44b184..5c502f7ef4 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -31,8 +31,13 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << "C"; } else if (location.IsPair()) { message_ << location.low() << "," << location.high(); - } else { + } else if (location.IsRegister()) { message_ << location.reg(); + } else if (location.IsStackSlot()) { + message_ << location.GetStackIndex() << "(sp)"; + } else { + message_ << "2x" << location.GetStackIndex() << "(sp)"; + DCHECK(location.IsDoubleStackSlot()) << location; } } @@ -279,6 +284,26 @@ TEST(ParallelMoveTest, Pairs) { resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); } + + { + // Test involving registers used in single context and pair context. + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(10), + Location::RegisterLocation(5), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(4, 5), + Location::DoubleStackSlot(32), + nullptr); + moves->AddMove( + Location::DoubleStackSlot(32), + Location::RegisterPairLocation(10, 11), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + } } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 2d9a2bf330..f5d8d82571 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; - if (!condition->GetUses().HasOnlyOneUse()) { + if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { needs_materialization = true; } else { HInstruction* user = condition->GetUses().GetFirst()->GetUser(); - if (!user->IsIf()) { + if (!user->IsIf() && !user->IsDeoptimize()) { needs_materialization = true; } else { // TODO: if there is no intervening instructions with side-effect between this condition diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index cf38bd3f8c..4bca43499f 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { // Walk over all uses covered by this interval, and update the location // information. - while (use != nullptr && use->GetPosition() <= current->GetEnd()) { - LocationSummary* locations = use->GetUser()->GetLocations(); - if (use->GetIsEnvironment()) { - locations->SetEnvironmentAt(use->GetInputIndex(), source); - } else { - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); - } + + LiveRange* range = current->GetFirstRange(); + while (range != nullptr) { + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); + LocationSummary* locations = use->GetUser()->GetLocations(); + if (use->GetIsEnvironment()) { + locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + } } + use = use->GetNext(); } - use = use->GetNext(); + range = range->GetNext(); } // If the next interval starts just after this one, and has a register, @@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + + if (kIsDebugBuild) { + // Following uses can only be environment uses. The location for + // these environments will be none. + while (use != nullptr) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 7c3a0357d6..3951439881 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { { HGraph* graph = BuildDiv(&allocator, &div); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) { locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); RegisterAllocator register_allocator(&allocator, &codegen, liveness); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index fcc4e69b37..e154ea4ee6 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); environment->CopyFrom(current_locals_); - instruction->SetEnvironment(environment); + instruction->SetRawEnvironment(environment); } void SsaBuilder::VisitTemporary(HTemporary* temp) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 0f3973e5fb..95da6ef551 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { current->GetLiveInterval()->SetFrom(current->GetLifetimePosition()); } - // All inputs of an instruction must be live. - for (size_t i = 0, e = current->InputCount(); i < e; ++i) { - HInstruction* input = current->InputAt(i); - // Some instructions 'inline' their inputs, that is they do not need - // to be materialized. - if (input->HasSsaIndex()) { - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, false); - } - } - + // Process the environment first, because we know their uses come after + // or at the same liveness position of inputs. if (current->HasEnvironment()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); - if (ShouldBeLiveForEnvironment(instruction)) { + bool should_be_live = ShouldBeLiveForEnvironment(instruction); + if (should_be_live) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); - instruction->GetLiveInterval()->AddUse(current, i, true); } + if (instruction != nullptr) { + instruction->GetLiveInterval()->AddUse( + current, i, /* is_environment */ true, should_be_live); + } + } + } + + // All inputs of an instruction must be live. + for (size_t i = 0, e = current->InputCount(); i < e; ++i) { + HInstruction* input = current->InputAt(i); + // Some instructions 'inline' their inputs, that is they do not need + // to be materialized. + if (input->HasSsaIndex()) { + live_in->SetBit(input->GetSsaIndex()); + input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index bc78dc2e76..d2da84c0c0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { AddRange(position, position + 1); } - void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { + void AddUse(HInstruction* instruction, + size_t input_index, + bool is_environment, + bool keep_alive = false) { // Set the use within the instruction. size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); @@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { && (first_use_->GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. + DCHECK(!is_environment); UsePosition* cursor = first_use_; while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) { cursor = cursor->GetNext(); @@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return; } + first_use_ = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_); + + if (is_environment && !keep_alive) { + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. + return; + } + size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // and the check line 205 would succeed. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); } void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { @@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); - if (use_position > position) { - return use_position; + if (!use->GetIsEnvironment()) { + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; + } } use = use->GetNext(); } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 5818a37a46..a73c8d77f3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -27,6 +27,32 @@ namespace art { +// Helper to build art::StackMapStream::LocationCatalogEntriesIndices. +class LocationCatalogEntriesIndicesEmptyFn { + public: + void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const { + item.first = DexRegisterLocation::None(); + } + bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const { + return item.first == DexRegisterLocation::None(); + } +}; + +// Hash function for art::StackMapStream::LocationCatalogEntriesIndices. +// This hash function does not create collisions. +class DexRegisterLocationHashFn { + public: + size_t operator()(DexRegisterLocation key) const { + // Concatenate `key`s fields to create a 64-bit value to be hashed. + int64_t kind_and_value = + (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_); + return inner_hash_fn_(kind_and_value); + } + private: + std::hash<int64_t> inner_hash_fn_; +}; + + /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. @@ -36,11 +62,13 @@ class StackMapStream : public ValueObject { explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), stack_maps_(allocator, 10), + location_catalog_entries_(allocator, 4), dex_register_locations_(allocator, 10 * 4), inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), native_pc_offset_max_(0), + register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {} @@ -101,6 +129,7 @@ class StackMapStream : public ValueObject { dex_pc_max_ = std::max(dex_pc_max_, dex_pc); native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); + register_mask_max_ = std::max(register_mask_max_, register_mask); } void AddInlineInfoEntry(uint32_t method_index) { @@ -111,6 +140,7 @@ class StackMapStream : public ValueObject { size_t ComputeNeededSize() { size_t size = CodeInfo::kFixedSize + + ComputeDexRegisterLocationCatalogSize() + ComputeStackMapsSize() + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); @@ -128,24 +158,43 @@ class StackMapStream : public ValueObject { ComputeInlineInfoSize(), ComputeDexRegisterMapsSize(), dex_pc_max_, - native_pc_offset_max_); + native_pc_offset_max_, + register_mask_max_); + } + + // Compute the size of the Dex register location catalog of `entry`. + size_t ComputeDexRegisterLocationCatalogSize() const { + size_t size = DexRegisterLocationCatalog::kFixedSize; + for (size_t location_catalog_entry_index = 0; + location_catalog_entry_index < location_catalog_entries_.Size(); + ++location_catalog_entry_index) { + DexRegisterLocation dex_register_location = + location_catalog_entries_.Get(location_catalog_entry_index); + size += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + return size; } - // Compute the size of the Dex register map of `entry`. size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; - // Add the bit mask for the dex register liveness. - size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + // Add the live bit mask for the Dex register liveness. + size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + // Compute the size of the set of live Dex register entries. + size_t number_of_live_dex_registers = 0; + for (size_t dex_register_number = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - size += DexRegisterMap::EntrySize(dex_register_location); - index_in_dex_register_locations++; + ++number_of_live_dex_registers; } } + size_t map_entries_size_in_bits = + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + * number_of_live_dex_registers; + size_t map_entries_size_in_bytes = + RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; + size += map_entries_size_in_bytes; return size; } @@ -168,8 +217,16 @@ class StackMapStream : public ValueObject { + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } + size_t ComputeDexRegisterLocationCatalogStart() const { + return CodeInfo::kFixedSize; + } + + size_t ComputeStackMapsStart() const { + return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); + } + size_t ComputeDexRegisterMapsStart() { - return CodeInfo::kFixedSize + ComputeStackMapsSize(); + return ComputeStackMapsStart() + ComputeStackMapsSize(); } size_t ComputeInlineInfoStart() { @@ -194,11 +251,32 @@ class StackMapStream : public ValueObject { ComputeInlineInfoStart(), inline_info_size); - code_info.SetEncoding( - inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_); + code_info.SetEncoding(inline_info_size, + dex_register_map_size, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); code_info.SetNumberOfStackMaps(stack_maps_.Size()); code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize()); + DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); + + // Set the Dex register location catalog. + code_info.SetNumberOfDexRegisterLocationCatalogEntries( + location_catalog_entries_.Size()); + MemoryRegion dex_register_location_catalog_region = region.Subregion( + ComputeDexRegisterLocationCatalogStart(), + ComputeDexRegisterLocationCatalogSize()); + DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); + // Offset in `dex_register_location_catalog` where to store the next + // register location. + size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; + for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { + DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); + location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + // Ensure we reached the end of the Dex registers location_catalog. + DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; @@ -234,25 +312,25 @@ class StackMapStream : public ValueObject { stack_map.SetDexRegisterMapOffset( code_info, register_region.start() - dex_register_locations_region.start()); - // Offset in `dex_register_map` where to store the next register entry. - size_t offset = DexRegisterMap::kFixedSize; - dex_register_map.SetLiveBitMask(offset, - entry.num_dex_registers, - *entry.live_dex_registers_mask); - offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); + // Set the live bit mask. + dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); + + // Set the dex register location mapping data. for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - dex_register_map.SetRegisterInfo(offset, dex_register_location); - offset += DexRegisterMap::EntrySize(dex_register_location); + size_t location_catalog_entry_index = + dex_register_locations_.Get(entry.dex_register_locations_start_index + + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + entry.num_dex_registers, + location_catalog_entries_.Size()); ++index_in_dex_register_locations; } } - // Ensure we reached the end of the Dex registers region. - DCHECK_EQ(offset, register_region.size()); } } @@ -282,12 +360,31 @@ class StackMapStream : public ValueObject { } void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { + StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DCHECK_LT(dex_register, entry.num_dex_registers); + if (kind != DexRegisterLocation::Kind::kNone) { // Ensure we only use non-compressed location kind at this stage. DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << DexRegisterLocation::PrettyDescriptor(kind); - dex_register_locations_.Add(DexRegisterLocation(kind, value)); - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DexRegisterLocation location(kind, value); + + // Look for Dex register `location` in the location catalog (using the + // companion hash map of locations to indices). Use its index if it + // is already in the location catalog. If not, insert it (in the + // location catalog and the hash map) and use the newly created index. + auto it = location_catalog_entries_indices_.Find(location); + if (it != location_catalog_entries_indices_.end()) { + // Retrieve the index from the hash map. + dex_register_locations_.Add(it->second); + } else { + // Create a new entry in the location catalog and the hash map. + size_t index = location_catalog_entries_.Size(); + location_catalog_entries_.Add(location); + dex_register_locations_.Add(index); + location_catalog_entries_indices_.Insert(std::make_pair(location, index)); + } + entry.live_dex_registers_mask->SetBit(dex_register); entry.dex_register_map_hash += (1 << dex_register); entry.dex_register_map_hash += static_cast<uint32_t>(value); @@ -354,9 +451,9 @@ class StackMapStream : public ValueObject { return false; } if (a.live_dex_registers_mask->IsBitSet(i)) { - DexRegisterLocation a_loc = dex_register_locations_.Get( + size_t a_loc = dex_register_locations_.Get( a.dex_register_locations_start_index + index_in_dex_register_locations); - DexRegisterLocation b_loc = dex_register_locations_.Get( + size_t b_loc = dex_register_locations_.Get( b.dex_register_locations_start_index + index_in_dex_register_locations); if (a_loc != b_loc) { return false; @@ -369,21 +466,29 @@ class StackMapStream : public ValueObject { ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; - GrowableArray<DexRegisterLocation> dex_register_locations_; + + // A catalog of unique [location_kind, register_value] pairs (per method). + GrowableArray<DexRegisterLocation> location_catalog_entries_; + // Map from Dex register location catalog entries to their indices in the + // location catalog. + typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, + DexRegisterLocationHashFn> LocationCatalogEntriesIndices; + LocationCatalogEntriesIndices location_catalog_entries_indices_; + + // A set of concatenated maps of Dex register locations indices to + // `location_catalog_entries_`. + GrowableArray<size_t> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t native_pc_offset_max_; + uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; static constexpr uint32_t kNoSameDexMapFound = -1; - ART_FRIEND_TEST(StackMapTest, Test1); - ART_FRIEND_TEST(StackMapTest, Test2); - ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters); - DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index e5a9790254..8d160bc81e 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) { return true; } +using Kind = DexRegisterLocation::Kind; + TEST(StackMapTest, Test1) { ArenaPool pool; ArenaAllocator arena(&pool); @@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0u, code_info.GetStackMaskSize()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 1-byte short Dex register location, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); @@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(SameBits(stack_mask, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); @@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) { sp_mask2.SetBit(3); sp_mask1.SetBit(8); stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. + stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(1u, code_info.GetStackMaskSize()); ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(4u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - three 1-byte short Dex register locations, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 3u * 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + // First stack map. { StackMap stack_map = code_info.GetStackMapAt(0); @@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(3u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info)); + ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(2u, index0); + ASSERT_EQ(3u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInRegister, location0.GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind()); + ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind()); ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); @@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { stream.FillIn(region); CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(1u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); - ASSERT_EQ(DexRegisterLocation::Kind::kNone, - dex_registers.GetLocationKind(0, number_of_dex_registers)); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, - dex_registers.GetLocationKind(1, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers)); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask. + // No space is allocated for the sole location catalog entry index, as it is useless. + size_t expected_dex_register_map_size = 1u + 0u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); + ASSERT_EQ(0u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kNone, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kNone, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } @@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { StackMapStream stream(&arena); ArenaBitVector sp_mask(&arena, 0, false); - uint32_t number_of_dex_registers = 0xEA; + uint32_t number_of_dex_registers = 1024; + // Create the first stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; + for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { + // Use two different Dex register locations to populate this map, + // as using a single value (in the whole CodeInfo object) would + // make this Dex register mapping data empty (see + // art::DexRegisterMap::SingleEntrySizeInBits). + stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. } + // Create the second stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. } size_t size = stream.ComputeNeededSize(); @@ -225,10 +374,35 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { stream.FillIn(region); CodeInfo code_info(region); - StackMap stack_map = code_info.GetStackMapAt(1); - ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); - ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding); + // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) + // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index + // has a size of 1 bit. + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries)); + + // The first Dex register map contains: + // - a live register bit mask for 1024 registers (that is, 128 bytes of + // data); and + // - Dex register mapping information for 1016 1-bit Dex (live) register + // locations (that is, 127 bytes of data). + // Hence it has a size of 255 bytes, and therefore... + ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers)); + StackMap stack_map0 = code_info.GetStackMapAt(0); + DexRegisterMap dex_register_map0 = + code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers); + ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, + number_of_location_catalog_entries)); + ASSERT_EQ(255u, dex_register_map0.Size()); + + StackMap stack_map1 = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info)); + // ...the offset of the second Dex register map (relative to the + // beginning of the Dex register maps region) is 255 (i.e., + // kNoDexRegisterMapSmallEncoding). + ASSERT_NE(stack_map1.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); + ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(code_info), 0xFFu); } TEST(StackMapTest, TestShareDexRegisterMap) { @@ -240,16 +414,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) { uint32_t number_of_dex_registers = 2; // First stack map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Second stack map, which should share the same dex register map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Third stack map (doesn't share the dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -260,20 +434,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci)); // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci)); // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci)); // Verify dex register map offsets. ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci)); @@ -281,4 +455,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci)); } +TEST(StackMapTest, TestNoDexRegisterMap) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask(&arena, 0, false); + uint32_t number_of_dex_registers = 0; + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(0u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + ASSERT_EQ(0u, location_catalog.Size()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info)); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); +} + } // namespace art |