diff options
Diffstat (limited to 'compiler')
52 files changed, 1135 insertions, 477 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 7edb490176..39725dee38 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -345,6 +345,7 @@ enum ExtendedMIROpcode { enum MIROptimizationFlagPositions { kMIRIgnoreNullCheck = 0, kMIRIgnoreRangeCheck, + kMIRIgnoreCheckCast, kMIRStoreNonNullValue, // Storing non-null value, always mark GC card. kMIRClassIsInitialized, kMIRClassIsInDexCache, diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index fcefb6fbfc..548b6f8b63 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -180,22 +180,21 @@ void DexCompiler::Compile() { } void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { - DCHECK(inst->Opcode() == Instruction::RETURN_VOID); - // Are we compiling a non-clinit constructor? - if (!unit_.IsConstructor() || unit_.IsStatic()) { - return; - } - // Do we need a constructor barrier ? - if (!driver_.RequiresConstructorBarrier(Thread::Current(), unit_.GetDexFile(), - unit_.GetClassDefIndex())) { - return; + DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID); + if (unit_.IsConstructor()) { + // Are we compiling a non clinit constructor which needs a barrier ? + if (!unit_.IsStatic() && + driver_.RequiresConstructorBarrier(Thread::Current(), unit_.GetDexFile(), + unit_.GetClassDefIndex())) { + return; + } } - // Replace RETURN_VOID by RETURN_VOID_BARRIER. + // Replace RETURN_VOID by RETURN_VOID_NO_BARRIER. VLOG(compiler) << "Replacing " << Instruction::Name(inst->Opcode()) - << " by " << Instruction::Name(Instruction::RETURN_VOID_BARRIER) + << " by " << Instruction::Name(Instruction::RETURN_VOID_NO_BARRIER) << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true); - inst->SetOpcode(Instruction::RETURN_VOID_BARRIER); + inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER); } Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc index ab3c946897..30e3ce0354 100644 --- a/compiler/dex/global_value_numbering.cc +++ b/compiler/dex/global_value_numbering.cc @@ -16,6 +16,7 @@ #include "global_value_numbering.h" +#include "base/bit_vector-inl.h" #include "base/stl_util.h" #include "local_value_numbering.h" @@ -206,4 +207,41 @@ bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors( return true; } +bool GlobalValueNumbering::IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id) { + DCHECK_NE(cond, kNoValue); + BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id); + if (bb->predecessors.size() == 1u) { + BasicBlockId pred_id = bb->predecessors[0]; + BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id); + if (pred_bb->last_mir_insn != nullptr) { + Instruction::Code opcode = pred_bb->last_mir_insn->dalvikInsn.opcode; + if ((opcode == Instruction::IF_NEZ && pred_bb->taken == bb_id) || + (opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb_id)) { + DCHECK(lvns_[pred_id] != nullptr); + uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]); + if (operand == cond) { + return true; + } + } + } + } + return false; +} + +bool GlobalValueNumbering::IsTrueInBlock(uint16_t cond, BasicBlockId bb_id) { + // We're not doing proper value propagation, so just see if the condition is used + // with if-nez/if-eqz to branch/fall-through to this bb or one of its dominators. + DCHECK_NE(cond, kNoValue); + if (IsBlockEnteredOnTrue(cond, bb_id)) { + return true; + } + BasicBlock* bb = mir_graph_->GetBasicBlock(bb_id); + for (uint32_t dom_id : bb->dominators->Indexes()) { + if (IsBlockEnteredOnTrue(cond, dom_id)) { + return true; + } + } + return false; +} + } // namespace art diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h index 6fa658c0cc..bd2f187d17 100644 --- a/compiler/dex/global_value_numbering.h +++ b/compiler/dex/global_value_numbering.h @@ -200,6 +200,9 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const; + bool IsBlockEnteredOnTrue(uint16_t cond, BasicBlockId bb_id); + bool IsTrueInBlock(uint16_t cond, BasicBlockId bb_id); + ScopedArenaAllocator* Allocator() const { return allocator_; } diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc index b91c3cac8f..b4559ef375 100644 --- a/compiler/dex/global_value_numbering_test.cc +++ b/compiler/dex/global_value_numbering_test.cc @@ -136,6 +136,7 @@ class GlobalValueNumberingTest : public testing::Test { { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } } #define DEF_BINOP(bb, opcode, result, src1, src2) \ { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } } +#define DEF_UNOP(bb, opcode, result, src) DEF_MOVE(bb, opcode, result, src) void DoPrepareIFields(const IFieldDef* defs, size_t count) { cu_.mir_graph->ifield_lowering_infos_.clear(); @@ -2315,4 +2316,95 @@ TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) { } } +TEST_F(GlobalValueNumberingTestDiamond, CheckCastDiamond) { + static const MIRDef mirs[] = { + DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u), + DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u), + DEF_IFZ(3u, Instruction::IF_NEZ, 0u), + DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(5u, Instruction::CHECK_CAST, 200u), + DEF_INVOKE1(5u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u), + }; + + static const bool expected_ignore_check_cast[] = { + false, // instance-of + false, // instance-of + false, // if-nez + false, // Not eliminated, fall-through branch. + true, // Eliminated. + false, // Not eliminated, different value. + false, // Not eliminated, different type. + false, // Not eliminated, bottom block. + }; + + PrepareMIRs(mirs); + mirs_[0].dalvikInsn.vC = 1234; // type for instance-of + mirs_[1].dalvikInsn.vC = 1234; // type for instance-of + mirs_[3].dalvikInsn.vB = 1234; // type for check-cast + mirs_[4].dalvikInsn.vB = 1234; // type for check-cast + mirs_[5].dalvikInsn.vB = 1234; // type for check-cast + mirs_[6].dalvikInsn.vB = 4321; // type for check-cast + mirs_[7].dalvikInsn.vB = 1234; // type for check-cast + PerformGVN(); + PerformGVNCodeModifications(); + ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_); + for (size_t i = 0u; i != mir_count_; ++i) { + int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u; + EXPECT_EQ(expected, mirs_[i].optimization_flags) << i; + } +} + +TEST_F(GlobalValueNumberingTest, CheckCastDominators) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)), // Block #3, top of the diamond. + DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(3)), // Block #4, left side. + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)), // Block #5, right side. + DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(5)), // Block #6, right side. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 6)), // Block #7, bottom. + }; + static const MIRDef mirs[] = { + DEF_UNOP(3u, Instruction::INSTANCE_OF, 0u, 100u), + DEF_UNOP(3u, Instruction::INSTANCE_OF, 1u, 200u), + DEF_IFZ(3u, Instruction::IF_NEZ, 0u), + DEF_INVOKE1(4u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(6u, Instruction::CHECK_CAST, 200u), + DEF_INVOKE1(6u, Instruction::CHECK_CAST, 100u), + DEF_INVOKE1(7u, Instruction::CHECK_CAST, 100u), + }; + + static const bool expected_ignore_check_cast[] = { + false, // instance-of + false, // instance-of + false, // if-nez + false, // Not eliminated, fall-through branch. + true, // Eliminated. + false, // Not eliminated, different value. + false, // Not eliminated, different type. + false, // Not eliminated, bottom block. + }; + + PrepareBasicBlocks(bbs); + PrepareMIRs(mirs); + mirs_[0].dalvikInsn.vC = 1234; // type for instance-of + mirs_[1].dalvikInsn.vC = 1234; // type for instance-of + mirs_[3].dalvikInsn.vB = 1234; // type for check-cast + mirs_[4].dalvikInsn.vB = 1234; // type for check-cast + mirs_[5].dalvikInsn.vB = 1234; // type for check-cast + mirs_[6].dalvikInsn.vB = 4321; // type for check-cast + mirs_[7].dalvikInsn.vB = 1234; // type for check-cast + PerformGVN(); + PerformGVNCodeModifications(); + ASSERT_EQ(arraysize(expected_ignore_check_cast), mir_count_); + for (size_t i = 0u; i != mir_count_; ++i) { + int expected = expected_ignore_check_cast[i] ? MIR_IGNORE_CHECK_CAST : 0u; + EXPECT_EQ(expected, mirs_[i].optimization_flags) << i; + } +} + } // namespace art diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index 2e7f0328d2..2d4c18ff49 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -1058,7 +1058,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { case Instruction::INVOKE_INTERFACE_RANGE: case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: - case Instruction::CHECK_CAST: case Instruction::THROW: case Instruction::FILLED_NEW_ARRAY: case Instruction::FILLED_NEW_ARRAY_RANGE: @@ -1073,6 +1072,12 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { uses_all_vregs = true; break; + case Instruction::CHECK_CAST: + DCHECK_EQ(mir->ssa_rep->num_uses, 1); + must_keep = true; // Keep for type information even if MIR_IGNORE_CHECK_CAST. + uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) == 0; + break; + case kMirOpNullCheck: DCHECK_EQ(mir->ssa_rep->num_uses, 1); if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) { diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index 99b6683b26..dc222b5211 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -1520,7 +1520,6 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: - case Instruction::CHECK_CAST: case Instruction::THROW: case Instruction::FILL_ARRAY_DATA: case Instruction::PACKED_SWITCH: @@ -1612,9 +1611,32 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { HandleInvokeOrClInitOrAcquireOp(mir); break; + case Instruction::INSTANCE_OF: { + uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]); + uint16_t type = mir->dalvikInsn.vC; + res = gvn_->LookupValue(Instruction::INSTANCE_OF, operand, type, kNoValue); + SetOperandValue(mir->ssa_rep->defs[0], res); + } + break; + case Instruction::CHECK_CAST: + if (gvn_->CanModify()) { + // Check if there was an instance-of operation on the same value and if we are + // in a block where its result is true. If so, we can eliminate the check-cast. + uint16_t operand = GetOperandValue(mir->ssa_rep->uses[0]); + uint16_t type = mir->dalvikInsn.vB; + uint16_t cond = gvn_->FindValue(Instruction::INSTANCE_OF, operand, type, kNoValue); + if (cond != kNoValue && gvn_->IsTrueInBlock(cond, Id())) { + if (gvn_->GetCompilationUnit()->verbose) { + LOG(INFO) << "Removing check-cast at 0x" << std::hex << mir->offset; + } + // Don't use kMirOpNop. Keep the check-cast as it defines the type of the register. + mir->optimization_flags |= MIR_IGNORE_CHECK_CAST; + } + } + break; + case Instruction::MOVE_RESULT: case Instruction::MOVE_RESULT_OBJECT: - case Instruction::INSTANCE_OF: // 1 result, treat as unique each time, use result s_reg - will be unique. res = GetOperandValue(mir->ssa_rep->defs[0]); SetOperandValue(mir->ssa_rep->defs[0], res); diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index a89b2508d4..3d7a640ce3 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -416,7 +416,7 @@ static const uint16_t kAnalysisAttributes[kMirOpLast] = { // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA} kAnInvoke | kAnHeavyWeight, - // 73 RETURN_VOID_BARRIER + // 73 RETURN_VOID_NO_BARRIER kAnBranch, // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN} diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index dfaff6ce3d..f638b0bf4d 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -374,7 +374,7 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 72 INVOKE_INTERFACE {vD, vE, vF, vG, vA} DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS, - // 73 RETURN_VOID_BARRIER + // 73 RETURN_VOID_NO_BARRIER DF_NOP, // 74 INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN} diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index f354a49111..3103f96e4e 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -2459,11 +2459,9 @@ BasicBlock* MIRGraph::CreateNewBB(BBType block_type) { return res; } -void MIRGraph::CalculateBasicBlockInformation() { - auto* quick_compiler = down_cast<QuickCompiler*>(cu_->compiler_driver->GetCompiler()); - DCHECK(quick_compiler != nullptr); +void MIRGraph::CalculateBasicBlockInformation(const PassManager* const post_opt_pass_manager) { /* Create the pass driver and launch it */ - PassDriverMEPostOpt driver(quick_compiler->GetPostOptPassManager(), cu_); + PassDriverMEPostOpt driver(post_opt_pass_manager, cu_); driver.Launch(); } diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 3dae5b4fa9..3298af1162 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -38,6 +38,7 @@ class DexCompilationUnit; class DexFileMethodInliner; class GlobalValueNumbering; class GvnDeadCodeElimination; +class PassManager; // Forward declaration. class MIRGraph; @@ -149,6 +150,7 @@ enum OatMethodAttributes { #define MIR_IGNORE_NULL_CHECK (1 << kMIRIgnoreNullCheck) #define MIR_IGNORE_RANGE_CHECK (1 << kMIRIgnoreRangeCheck) +#define MIR_IGNORE_CHECK_CAST (1 << kMIRIgnoreCheckCast) #define MIR_STORE_NON_NULL_VALUE (1 << kMIRStoreNonNullValue) #define MIR_CLASS_IS_INITIALIZED (1 << kMIRClassIsInitialized) #define MIR_CLASS_IS_IN_DEX_CACHE (1 << kMIRClassIsInDexCache) @@ -1201,7 +1203,7 @@ class MIRGraph { void AllocateSSAUseData(MIR *mir, int num_uses); void AllocateSSADefData(MIR *mir, int num_defs); - void CalculateBasicBlockInformation(); + void CalculateBasicBlockInformation(const PassManager* const post_opt); void ComputeDFSOrders(); void ComputeDefBlockMatrix(); void ComputeDominators(); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 93749e4424..266b7c3064 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -1751,6 +1751,9 @@ bool MIRGraph::CanThrow(MIR* mir) const { DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0); // Non-throwing only if range check has been eliminated. return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0); + } else if (mir->dalvikInsn.opcode == Instruction::CHECK_CAST && + (opt_flags & MIR_IGNORE_CHECK_CAST) != 0) { + return false; } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) { // No more checks for these (null check was processed above). diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc index 320d06aa06..2e871dafef 100644 --- a/compiler/dex/pass_driver_me_opts.cc +++ b/compiler/dex/pass_driver_me_opts.cc @@ -66,7 +66,7 @@ void PassDriverMEOpts::ApplyPass(PassDataHolder* data, const Pass* pass) { // Is it dirty at least? if (pass_me_data_holder->dirty == true) { CompilationUnit* c_unit = pass_me_data_holder->c_unit; - c_unit->mir_graph.get()->CalculateBasicBlockInformation(); + c_unit->mir_graph.get()->CalculateBasicBlockInformation(post_opt_pass_manager_); } } } diff --git a/compiler/dex/pass_driver_me_opts.h b/compiler/dex/pass_driver_me_opts.h index b930d02d1e..e94c1894c9 100644 --- a/compiler/dex/pass_driver_me_opts.h +++ b/compiler/dex/pass_driver_me_opts.h @@ -29,8 +29,10 @@ class PassManager; class PassDriverMEOpts : public PassDriverME { public: - explicit PassDriverMEOpts(const PassManager* const manager, CompilationUnit* cu) - : PassDriverME(manager, cu) { + explicit PassDriverMEOpts(const PassManager* const manager, + const PassManager* const post_opt_pass_manager, + CompilationUnit* cu) + : PassDriverME(manager, cu), post_opt_pass_manager_(post_opt_pass_manager) { } ~PassDriverMEOpts() { @@ -45,6 +47,8 @@ class PassDriverMEOpts : public PassDriverME { * @brief Apply a patch: perform start/work/end functions. */ virtual void ApplyPass(PassDataHolder* data, const Pass* pass) OVERRIDE; + + const PassManager* const post_opt_pass_manager_; }; } // namespace art diff --git a/compiler/dex/pass_manager.cc b/compiler/dex/pass_manager.cc index 6d58f65b68..6377a6c07a 100644 --- a/compiler/dex/pass_manager.cc +++ b/compiler/dex/pass_manager.cc @@ -33,7 +33,7 @@ void PassManager::CreateDefaultPassList() { // Add each pass which isn't disabled into default_pass_list_. for (const auto* pass : passes_) { if (options_.GetDisablePassList().find(pass->GetName()) != std::string::npos) { - LOG(INFO) << "Skipping disabled pass " << pass->GetName(); + VLOG(compiler) << "Skipping disabled pass " << pass->GetName(); } else { default_pass_list_.push_back(pass); } diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 1a9dbeae0f..318292029d 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -433,7 +433,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { class StackOverflowSlowPath : public LIRSlowPath { public: StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr), + : LIRSlowPath(m2l, branch), restore_lr_(restore_lr), sp_displace_(sp_displace) { } void Compile() OVERRIDE { diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 82751626e3..1dcbe609b5 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -351,8 +351,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (generate_explicit_stack_overflow_check) { class StackOverflowSlowPath: public LIRSlowPath { public: - StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) : - LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), + StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) + : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) { } void Compile() OVERRIDE { diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index e57889aeb7..d613ccab66 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -86,7 +86,7 @@ void Mir2Lir::AddDivZeroCheckSlowPath(LIR* branch) { class DivZeroCheckSlowPath : public Mir2Lir::LIRSlowPath { public: DivZeroCheckSlowPath(Mir2Lir* m2l, LIR* branch_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in) { + : LIRSlowPath(m2l, branch_in) { } void Compile() OVERRIDE { @@ -105,7 +105,7 @@ void Mir2Lir::GenArrayBoundsCheck(RegStorage index, RegStorage length) { public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in, RegStorage length_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in), + : LIRSlowPath(m2l, branch_in), index_(index_in), length_(length_in) { } @@ -129,7 +129,7 @@ void Mir2Lir::GenArrayBoundsCheck(int index, RegStorage length) { class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int index_in, RegStorage length_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in), + : LIRSlowPath(m2l, branch_in), index_(index_in), length_(length_in) { } @@ -159,7 +159,7 @@ LIR* Mir2Lir::GenNullCheck(RegStorage reg) { class NullCheckSlowPath : public Mir2Lir::LIRSlowPath { public: NullCheckSlowPath(Mir2Lir* m2l, LIR* branch) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch) { + : LIRSlowPath(m2l, branch) { } void Compile() OVERRIDE { @@ -581,7 +581,7 @@ class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath { // At least one will be non-null here, otherwise we wouldn't generate the slow path. StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index, RegStorage r_base) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved != nullptr ? unresolved : uninit, cont), + : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont), second_branch_(unresolved != nullptr ? uninit : nullptr), storage_index_(storage_index), r_base_(r_base) { } @@ -1052,9 +1052,9 @@ void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { class SlowPath : public LIRSlowPath { public: SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegLocation& rl_method_in, const RegLocation& rl_result_in) : - LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in), - type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) { + const RegLocation& rl_method_in, const RegLocation& rl_result_in) + : LIRSlowPath(m2l, fromfast, cont_in), + type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) { } void Compile() { @@ -1120,9 +1120,9 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { class SlowPath : public LIRSlowPath { public: SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in, - int32_t string_idx_in) : - LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast_in, cont_in), - r_method_(r_method_in), string_idx_(string_idx_in) { + int32_t string_idx_in) + : LIRSlowPath(m2l, fromfast_in, cont_in), + r_method_(r_method_in), string_idx_(string_idx_in) { } void Compile() { @@ -1304,7 +1304,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know public: InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in, RegLocation rl_src_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), type_idx_(type_idx_in), + : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in), rl_src_(rl_src_in) { } @@ -1403,7 +1403,12 @@ void Mir2Lir::GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation } } -void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src) { +void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, + RegLocation rl_src) { + if ((opt_flags & MIR_IGNORE_CHECK_CAST) != 0) { + // Compiler analysis proved that this check-cast would never cause an exception. + return; + } bool type_known_final, type_known_abstract, use_declaring_class; bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, @@ -1448,9 +1453,9 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ class SlowPath : public LIRSlowPath { public: SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegStorage class_reg_in) : - LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont_in), - type_idx_(type_idx_in), class_reg_(class_reg_in) { + const RegStorage class_reg_in) + : LIRSlowPath(m2l, fromfast, cont_in), + type_idx_(type_idx_in), class_reg_(class_reg_in) { } void Compile() { @@ -1479,8 +1484,8 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ // to call a helper function to do the check. class SlowPath : public LIRSlowPath { public: - SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load): - LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) { + SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load) + : LIRSlowPath(m2l, fromfast, cont), load_(load) { } void Compile() { @@ -2174,7 +2179,7 @@ void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_d class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath { public: SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) { + : LIRSlowPath(m2l, branch, cont) { } void Compile() OVERRIDE { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 6b553fd181..2d41ba1795 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -48,7 +48,8 @@ void Mir2Lir::AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume) { class IntrinsicSlowPathPath : public Mir2Lir::LIRSlowPath { public: IntrinsicSlowPathPath(Mir2Lir* m2l, CallInfo* info_in, LIR* branch_in, LIR* resume_in) - : LIRSlowPath(m2l, info_in->offset, branch_in, resume_in), info_(info_in) { + : LIRSlowPath(m2l, branch_in, resume_in), info_(info_in) { + DCHECK_EQ(info_in->offset, current_dex_pc_); } void Compile() { diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc index 5c98b10b58..0218dcdd24 100644 --- a/compiler/dex/quick/mips/assemble_mips.cc +++ b/compiler/dex/quick/mips/assemble_mips.cc @@ -393,6 +393,14 @@ const MipsEncodingMap MipsMir2Lir::EncodingMap[kMipsLast] = { kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1, "mtc1", "!0r,!1s", 4), + ENCODING_MAP(kMipsMfhc1, 0x44600000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mfhc1", "!0r,!1s", 4), + ENCODING_MAP(kMipsMthc1, 0x44e00000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1, + "mthc1", "!0r,!1s", 4), ENCODING_MAP(kMipsDelta, 0x27e00000, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0, kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR | @@ -413,6 +421,21 @@ const MipsEncodingMap MipsMir2Lir::EncodingMap[kMipsLast] = { kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP, "sync", ";", 4), + + // The following are mips32r6 instructions. + ENCODING_MAP(kMipsR6Div, 0x0000009a, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "div", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsR6Mod, 0x000000da, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mod", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsR6Mul, 0x00000098, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsUndefined, 0x64000000, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index d9471f6fd1..b067221c27 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -263,7 +263,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) class StackOverflowSlowPath : public LIRSlowPath { public: StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) { + : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) { } void Compile() OVERRIDE { m2l_->ResetRegPool(); diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index e1b43ca848..47837a659b 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -76,7 +76,9 @@ class MipsMir2Lir FINAL : public Mir2Lir { // Required for target - register utilities. RegStorage Solo64ToPair64(RegStorage reg); + RegStorage Fp64ToSolo32(RegStorage reg); RegStorage TargetReg(SpecialTargetRegister reg); + RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE; RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -232,6 +234,12 @@ class MipsMir2Lir FINAL : public Mir2Lir { return false; } + // True if isa is rev R6. + const bool isaIsR6_; + + // True if floating point unit is 32bits. + const bool fpuIs32Bit_; + private: void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc index d7ed7acf57..37bf1a6b9a 100644 --- a/compiler/dex/quick/mips/fp_mips.cc +++ b/compiler/dex/quick/mips/fp_mips.cc @@ -181,6 +181,30 @@ void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, } } +// Get the reg storage for a wide FP. Is either a solo or a pair. Base is Mips-counted, e.g., even +// values are valid (0, 2). +static RegStorage GetWideArgFP(bool fpuIs32Bit, size_t base) { + // Think about how to make this be able to be computed. E.g., rMIPS_FARG0 + base. Right now + // inlining should optimize everything. + if (fpuIs32Bit) { + switch (base) { + case 0: + return RegStorage(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1); + case 2: + return RegStorage(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3); + } + } else { + switch (base) { + case 0: + return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG0); + case 2: + return RegStorage(RegStorage::k64BitSolo, rMIPS_FARG2); + } + } + LOG(FATAL) << "Unsupported Mips.GetWideFP: " << fpuIs32Bit << " " << base; + UNREACHABLE(); +} + void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { bool wide = true; @@ -208,8 +232,8 @@ void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, FlushAllRegs(); LockCallTemps(); if (wide) { - RegStorage r_tmp1(RegStorage::k64BitPair, rMIPS_FARG0, rMIPS_FARG1); - RegStorage r_tmp2(RegStorage::k64BitPair, rMIPS_FARG2, rMIPS_FARG3); + RegStorage r_tmp1 = GetWideArgFP(fpuIs32Bit_, 0); + RegStorage r_tmp2 = GetWideArgFP(fpuIs32Bit_, 2); LoadValueDirectWideFixed(rl_src1, r_tmp1); LoadValueDirectWideFixed(rl_src2, r_tmp2); } else { diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 17ac62931d..8c9acf60f1 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -194,17 +194,34 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { bool src_fp = r_src.IsFloat(); if (dest_fp) { if (src_fp) { + // Here if both src and dest are fp registers. OpRegCopy will choose the right copy + // (solo or pair). OpRegCopy(r_dest, r_src); } else { - /* note the operands are swapped for the mtc1 instr */ - NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); - NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); + // note the operands are swapped for the mtc1 and mthc1 instr. + // Here if dest is fp reg and src is core reg. + if (fpuIs32Bit_) { + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); + NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); + } else { + r_dest = Fp64ToSolo32(r_dest); + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg()); + NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg()); + } } } else { if (src_fp) { - NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg()); - NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg()); + // Here if dest is core reg and src is fp reg. + if (fpuIs32Bit_) { + NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg()); + NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg()); + } else { + r_src = Fp64ToSolo32(r_src); + NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetReg()); + NewLIR2(kMipsMfhc1, r_dest.GetHighReg(), r_src.GetReg()); + } } else { + // Here if both src and dest are core registers. // Handle overlap if (r_src.GetHighReg() == r_dest.GetLowReg()) { OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); @@ -243,12 +260,14 @@ void MipsMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2, bool is_div) { - NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg()); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - if (is_div) { - NewLIR1(kMipsMflo, rl_result.reg.GetReg()); + + if (isaIsR6_) { + NewLIR3(is_div ? kMipsR6Div : kMipsR6Mod, + rl_result.reg.GetReg(), reg1.GetReg(), reg2.GetReg()); } else { - NewLIR1(kMipsMfhi, rl_result.reg.GetReg()); + NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg()); + NewLIR1(is_div ? kMipsMflo : kMipsMfhi, rl_result.reg.GetReg()); } return rl_result; } @@ -257,13 +276,7 @@ RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int bool is_div) { RegStorage t_reg = AllocTemp(); NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit); - NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg()); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - if (is_div) { - NewLIR1(kMipsMflo, rl_result.reg.GetReg()); - } else { - NewLIR1(kMipsMfhi, rl_result.reg.GetReg()); - } + RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div); FreeTemp(t_reg); return rl_result; } diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h index 66e3894204..70370559bc 100644 --- a/compiler/dex/quick/mips/mips_lir.h +++ b/compiler/dex/quick/mips/mips_lir.h @@ -236,22 +236,22 @@ enum MipsNativeRegisterPool { // private marker to avoid generate-operator-out. #endif // Double precision registers where the FPU is in 64-bit mode. rD0_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, - rD1_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1, - rD2_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, - rD3_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3, - rD4_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4, - rD5_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, - rD6_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, - rD7_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, + rD1_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, + rD2_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4, + rD3_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, + rD4_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, + rD5_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, + rD6_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, + rD7_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, #if 0 // TODO: expand resource mask to enable use of all MIPS fp registers. - rD8_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, - rD9_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, - rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, - rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11, - rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, - rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, - rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, - rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, + rD8_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16, + rD9_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18, + rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20, + rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22, + rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24, + rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26, + rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28, + rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30, #endif }; @@ -368,10 +368,12 @@ const RegLocation mips_loc_c_return_wide const RegLocation mips_loc_c_return_float {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, RegStorage(RegStorage::k32BitSolo, rF0), INVALID_SREG, INVALID_SREG}; -// FIXME: move MIPS to k64Bitsolo for doubles -const RegLocation mips_loc_c_return_double +const RegLocation mips_loc_c_return_double_fr0 {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, RegStorage(RegStorage::k64BitPair, rF0, rF1), INVALID_SREG, INVALID_SREG}; +const RegLocation mips_loc_c_return_double_fr1 + {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, + RegStorage(RegStorage::k64BitSolo, rF0), INVALID_SREG, INVALID_SREG}; enum MipsShiftEncodings { kMipsLsl = 0x0, @@ -476,13 +478,21 @@ enum MipsOpCode { kMipsFldc1, // ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0]. kMipsFswc1, // swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0]. kMipsFsdc1, // sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0]. - kMipsMfc1, // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000]. - kMipsMtc1, // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000]. + kMipsMfc1, // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000]. + kMipsMtc1, // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000]. + kMipsMfhc1, // mfhc1 t,s [01000100011] t[20..16] s[15..11] [00000000000]. + kMipsMthc1, // mthc1 t,s [01000100111] t[20..16] s[15..11] [00000000000]. kMipsDelta, // Psuedo for ori t, s, <label>-<label>. kMipsDeltaHi, // Pseudo for lui t, high16(<label>-<label>). kMipsDeltaLo, // Pseudo for ori t, s, low16(<label>-<label>). kMipsCurrPC, // jal to .+8 to materialize pc. kMipsSync, // sync kind [000000] [0000000000000000] s[10..6] [001111]. + + // The following are mips32r6 instructions. + kMipsR6Div, // div d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011010]. + kMipsR6Mod, // mod d,s,t [000000] s[25..21] t[20..16] d[15..11] [00011011010]. + kMipsR6Mul, // mul d,s,t [000000] s[25..21] t[20..16] d[15..11] [00010011000]. + kMipsUndefined, // undefined [011001xxxxxxxxxxxxxxxx]. kMipsLast }; diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 8574ffd541..830f63ac5f 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -86,16 +86,48 @@ RegLocation MipsMir2Lir::LocCReturnFloat() { } RegLocation MipsMir2Lir::LocCReturnDouble() { - return mips_loc_c_return_double; + if (fpuIs32Bit_) { + return mips_loc_c_return_double_fr0; + } else { + return mips_loc_c_return_double_fr1; + } } // Convert k64BitSolo into k64BitPair RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) { DCHECK(reg.IsDouble()); + DCHECK_EQ(reg.GetRegNum() & 1, 0); int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint; return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1); } +// Convert 64bit FP (k64BitSolo or k64BitPair) into k32BitSolo. +// This routine is only used to allow a 64bit FPU to access FP registers 32bits at a time. +RegStorage MipsMir2Lir::Fp64ToSolo32(RegStorage reg) { + DCHECK(!fpuIs32Bit_); + DCHECK(reg.IsDouble()); + DCHECK(!reg.IsPair()); + int reg_num = reg.GetRegNum() | RegStorage::kFloatingPoint; + return RegStorage(RegStorage::k32BitSolo, reg_num); +} + +// Return a target-dependent special register. +RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg, WideKind wide_kind) { + if (wide_kind == kWide) { + DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg)); + RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg), + TargetReg(static_cast<SpecialTargetRegister>(reg + 1))); + if (!fpuIs32Bit_ && ret_reg.IsFloat()) { + // convert 64BitPair to 64BitSolo for 64bit FPUs. + RegStorage low = ret_reg.GetLow(); + ret_reg = RegStorage::FloatSolo64(low.GetRegNum()); + } + return ret_reg; + } else { + return TargetReg(reg); + } +} + // Return a target-dependent special register. RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { RegStorage res_reg; @@ -145,12 +177,7 @@ RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) { */ ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { if (reg.IsDouble()) { - if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures() - ->Is32BitFloatingPoint()) { - return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0); - } else { - return ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0); - } + return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0); } else if (reg.IsSingle()) { return ResourceMask::Bit(reg.GetRegNum() + kMipsFPReg0); } else { @@ -401,8 +428,7 @@ void MipsMir2Lir::ClobberCallerSave() { Clobber(rs_rF13); Clobber(rs_rF14); Clobber(rs_rF15); - if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures() - ->Is32BitFloatingPoint()) { + if (fpuIs32Bit_) { Clobber(rs_rD0_fr0); Clobber(rs_rD1_fr0); Clobber(rs_rD2_fr0); @@ -462,28 +488,20 @@ bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) { } void MipsMir2Lir::CompilerInitializeRegAlloc() { - const bool fpu_is_32bit = - cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures() - ->Is32BitFloatingPoint(); reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs, - fpu_is_32bit ? dp_fr0_regs : dp_fr1_regs, + fpuIs32Bit_ ? dp_fr0_regs : dp_fr1_regs, reserved_regs, empty_pool /* reserved64 */, core_temps, empty_pool /* core64_temps */, sp_temps, - fpu_is_32bit ? dp_fr0_temps : dp_fr1_temps)); + fpuIs32Bit_ ? dp_fr0_temps : dp_fr1_temps)); // Target-specific adjustments. // Alias single precision floats to appropriate half of overlapping double. for (RegisterInfo* info : reg_pool_->sp_regs_) { int sp_reg_num = info->GetReg().GetRegNum(); - int dp_reg_num; - if (fpu_is_32bit) { - dp_reg_num = sp_reg_num & ~1; - } else { - dp_reg_num = sp_reg_num >> 1; - } + int dp_reg_num = sp_reg_num & ~1; RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num); RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); // Double precision register's master storage should refer to itself. @@ -502,11 +520,7 @@ void MipsMir2Lir::CompilerInitializeRegAlloc() { // TODO: adjust when we roll to hard float calling convention. reg_pool_->next_core_reg_ = 2; reg_pool_->next_sp_reg_ = 2; - if (fpu_is_32bit) { - reg_pool_->next_dp_reg_ = 2; - } else { - reg_pool_->next_dp_reg_ = 1; - } + reg_pool_->next_dp_reg_ = 2; } /* @@ -610,7 +624,11 @@ RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volati } MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) - : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) { + : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this), + isaIsR6_(cu->compiler_driver->GetInstructionSetFeatures() + ->AsMipsInstructionSetFeatures()->IsR6()), + fpuIs32Bit_(cu->compiler_driver->GetInstructionSetFeatures() + ->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) { for (int i = 0; i < kMipsLast; i++) { DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i) << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 2d26922dca..3b7e0ed23b 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -182,7 +182,11 @@ LIR* MipsMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, R opcode = kMipsAnd; break; case kOpMul: - opcode = kMipsMul; + if (isaIsR6_) { + opcode = kMipsR6Mul; + } else { + opcode = kMipsMul; + } break; case kOpOr: opcode = kMipsOr; @@ -271,7 +275,11 @@ LIR* MipsMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, i break; case kOpMul: short_form = false; - opcode = kMipsMul; + if (isaIsR6_) { + opcode = kMipsR6Mul; + } else { + opcode = kMipsMul; + } break; default: LOG(FATAL) << "Bad case in OpRegRegImm"; @@ -359,12 +367,23 @@ LIR* MipsMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, R LIR* MipsMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { LIR *res; - if (!r_dest.IsPair()) { - // Form 64-bit pair - r_dest = Solo64ToPair64(r_dest); + if (fpuIs32Bit_ || !r_dest.IsFloat()) { + // 32bit FPU (pairs) or loading into GPR. + if (!r_dest.IsPair()) { + // Form 64-bit pair + r_dest = Solo64ToPair64(r_dest); + } + res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value)); + LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value)); + } else { + // Here if we have a 64bit FPU and loading into FPR. + RegStorage r_temp = AllocTemp(); + r_dest = Fp64ToSolo32(r_dest); + res = LoadConstantNoClobber(r_dest, Low32Bits(value)); + LoadConstantNoClobber(r_temp, High32Bits(value)); + NewLIR2(kMipsMthc1, r_temp.GetReg(), r_dest.GetReg()); + FreeTemp(r_temp); } - res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value)); - LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value)); return res; } @@ -483,32 +502,29 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora LIR *load2 = NULL; MipsOpCode opcode = kMipsNop; bool short_form = IS_SIMM16(displacement); - bool pair = r_dest.IsPair(); + bool is64bit = false; switch (size) { case k64: case kDouble: - if (!pair) { + is64bit = true; + if (fpuIs32Bit_ && !r_dest.IsPair()) { // Form 64-bit pair r_dest = Solo64ToPair64(r_dest); - pair = 1; - } - if (r_dest.IsFloat()) { - DCHECK_EQ(r_dest.GetLowReg(), r_dest.GetHighReg() - 1); - opcode = kMipsFlwc1; - } else { - opcode = kMipsLw; } short_form = IS_SIMM16_2WORD(displacement); - DCHECK_EQ((displacement & 0x3), 0); - break; + FALLTHROUGH_INTENDED; case k32: case kSingle: case kReference: opcode = kMipsLw; if (r_dest.IsFloat()) { opcode = kMipsFlwc1; - DCHECK(r_dest.IsSingle()); + if (!is64bit) { + DCHECK(r_dest.IsSingle()); + } else { + DCHECK(r_dest.IsDouble()); + } } DCHECK_EQ((displacement & 0x3), 0); break; @@ -531,35 +547,56 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora } if (short_form) { - if (!pair) { + if (!is64bit) { load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg()); } else { - load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); - load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + if (fpuIs32Bit_ || !r_dest.IsFloat()) { + DCHECK(r_dest.IsPair()); + load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); + load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + } else { + // Here if 64bit fpu and r_dest is a 64bit fp register. + RegStorage r_tmp = AllocTemp(); + // FIXME: why is r_dest a 64BitPair here??? + r_dest = Fp64ToSolo32(r_dest); + load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); + load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg()); + FreeTemp(r_tmp); + } } } else { - if (pair) { - RegStorage r_tmp = AllocTemp(); - res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement); - load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg()); - load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg()); - FreeTemp(r_tmp); - } else { - RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest; + if (!is64bit) { + RegStorage r_tmp = (r_base == r_dest || r_dest.IsFloat()) ? AllocTemp() : r_dest; res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement); load = NewLIR3(opcode, r_dest.GetReg(), 0, r_tmp.GetReg()); if (r_tmp != r_dest) FreeTemp(r_tmp); + } else { + RegStorage r_tmp = AllocTemp(); + res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement); + if (fpuIs32Bit_ || !r_dest.IsFloat()) { + DCHECK(r_dest.IsPair()); + load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg()); + load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg()); + } else { + // Here if 64bit fpu and r_dest is a 64bit fp register + r_dest = Fp64ToSolo32(r_dest); + load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), LOWORD_OFFSET, r_tmp.GetReg()); + load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), HIWORD_OFFSET, r_tmp.GetReg()); + NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg()); + } + FreeTemp(r_tmp); } } if (mem_ref_type_ == ResourceMask::kDalvikReg) { DCHECK_EQ(r_base, rs_rMIPS_SP); - AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, - true /* is_load */, pair /* is64bit */); - if (pair) { + AnnotateDalvikRegAccess(load, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2, + true /* is_load */, is64bit /* is64bit */); + if (is64bit) { AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2, - true /* is_load */, pair /* is64bit */); + true /* is_load */, is64bit /* is64bit */); } } return load; @@ -594,32 +631,29 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, LIR *store2 = NULL; MipsOpCode opcode = kMipsNop; bool short_form = IS_SIMM16(displacement); - bool pair = r_src.IsPair(); + bool is64bit = false; switch (size) { case k64: case kDouble: - if (!pair) { + is64bit = true; + if (fpuIs32Bit_ && !r_src.IsPair()) { // Form 64-bit pair r_src = Solo64ToPair64(r_src); - pair = 1; - } - if (r_src.IsFloat()) { - DCHECK_EQ(r_src.GetLowReg(), r_src.GetHighReg() - 1); - opcode = kMipsFswc1; - } else { - opcode = kMipsSw; } short_form = IS_SIMM16_2WORD(displacement); - DCHECK_EQ((displacement & 0x3), 0); - break; + FALLTHROUGH_INTENDED; case k32: case kSingle: case kReference: opcode = kMipsSw; if (r_src.IsFloat()) { opcode = kMipsFswc1; - DCHECK(r_src.IsSingle()); + if (!is64bit) { + DCHECK(r_src.IsSingle()); + } else { + DCHECK(r_src.IsDouble()); + } } DCHECK_EQ((displacement & 0x3), 0); break; @@ -637,31 +671,53 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, } if (short_form) { - if (!pair) { + if (!is64bit) { store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg()); } else { - store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); - store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + if (fpuIs32Bit_ || !r_src.IsFloat()) { + DCHECK(r_src.IsPair()); + store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); + store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + } else { + // Here if 64bit fpu and r_src is a 64bit fp register + RegStorage r_tmp = AllocTemp(); + r_src = Fp64ToSolo32(r_src); + store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg()); + NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg()); + store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg()); + FreeTemp(r_tmp); + } } } else { RegStorage r_scratch = AllocTemp(); res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement); - if (!pair) { + if (!is64bit) { store = NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg()); } else { - store = NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg()); - store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg()); + if (fpuIs32Bit_ || !r_src.IsFloat()) { + DCHECK(r_src.IsPair()); + store = NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg()); + store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg()); + } else { + // Here if 64bit fpu and r_src is a 64bit fp register + RegStorage r_tmp = AllocTemp(); + r_src = Fp64ToSolo32(r_src); + store = NewLIR3(kMipsFswc1, r_src.GetReg(), LOWORD_OFFSET, r_scratch.GetReg()); + NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg()); + store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), HIWORD_OFFSET, r_scratch.GetReg()); + FreeTemp(r_tmp); + } } FreeTemp(r_scratch); } if (mem_ref_type_ == ResourceMask::kDalvikReg) { DCHECK_EQ(r_base, rs_rMIPS_SP); - AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, - false /* is_load */, pair /* is64bit */); - if (pair) { + AnnotateDalvikRegAccess(store, (displacement + (is64bit ? LOWORD_OFFSET : 0)) >> 2, + false /* is_load */, is64bit /* is64bit */); + if (is64bit) { AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2, - false /* is_load */, pair /* is64bit */); + false /* is_load */, is64bit /* is64bit */); } } diff --git a/compiler/dex/quick/mips64/call_mips64.cc b/compiler/dex/quick/mips64/call_mips64.cc index 31be1c21a6..0e587706cc 100644 --- a/compiler/dex/quick/mips64/call_mips64.cc +++ b/compiler/dex/quick/mips64/call_mips64.cc @@ -268,7 +268,7 @@ void Mips64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method class StackOverflowSlowPath : public LIRSlowPath { public: StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) { + : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) { } void Compile() OVERRIDE { m2l_->ResetRegPool(); diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 83486265c4..afacee02d3 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -27,7 +27,7 @@ namespace art { class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath { public: SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), + : LIRSlowPath(m2l, branch, cont), num_used_args_(0u) { } @@ -540,7 +540,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenMoveException(rl_dest); break; - case Instruction::RETURN_VOID_BARRIER: + case Instruction::RETURN_VOID_NO_BARRIER: case Instruction::RETURN_VOID: if (((cu_->access_flags & kAccConstructor) != 0) && cu_->compiler_driver->RequiresConstructorBarrier(Thread::Current(), cu_->dex_file, @@ -632,7 +632,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::CHECK_CAST: { - GenCheckCast(mir->offset, vB, rl_src[0]); + GenCheckCast(opt_flags, mir->offset, vB, rl_src[0]); break; } case Instruction::INSTANCE_OF: diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 6f3f057038..70785dceef 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -490,9 +490,9 @@ class Mir2Lir { class LIRSlowPath : public ArenaObject<kArenaAllocSlowPaths> { public: - LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast, - LIR* cont = nullptr) : - m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) { + LIRSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont = nullptr) + : m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(m2l->current_dalvik_offset_), + fromfast_(fromfast), cont_(cont) { } virtual ~LIRSlowPath() {} virtual void Compile() = 0; @@ -826,7 +826,7 @@ class Mir2Lir { void GenNewInstance(uint32_t type_idx, RegLocation rl_dest); void GenThrow(RegLocation rl_src); void GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); - void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src); + void GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src); void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 02d74a0691..1673312919 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -250,7 +250,7 @@ static int kAllOpcodes[] = { Instruction::INVOKE_DIRECT, Instruction::INVOKE_STATIC, Instruction::INVOKE_INTERFACE, - Instruction::RETURN_VOID_BARRIER, + Instruction::RETURN_VOID_NO_BARRIER, Instruction::INVOKE_VIRTUAL_RANGE, Instruction::INVOKE_SUPER_RANGE, Instruction::INVOKE_DIRECT_RANGE, @@ -708,7 +708,7 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, } /* Create the pass driver and launch it */ - PassDriverMEOpts pass_driver(GetPreOptPassManager(), &cu); + PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu); pass_driver.Launch(); /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */ diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 11c146549e..abee87254b 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -193,7 +193,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { class StackOverflowSlowPath : public LIRSlowPath { public: StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) { + : LIRSlowPath(m2l, branch), sp_displace_(sp_displace) { } void Compile() OVERRIDE { m2l_->ResetRegPool(); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 91168c78bd..25e34e3027 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1412,7 +1412,7 @@ void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in), + : LIRSlowPath(m2l, branch_in), index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { } @@ -1460,7 +1460,7 @@ void X86Mir2Lir::GenArrayBoundsCheck(int32_t index, public: ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in, int32_t index_in, RegStorage array_base_in, int32_t len_offset_in) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch_in), + : LIRSlowPath(m2l, branch_in), index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) { } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 3d3d5cbc2a..100d49a99e 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -491,11 +491,12 @@ void CompilerDriver::CompileAll(jobject class_loader, } } -static DexToDexCompilationLevel GetDexToDexCompilationlevel( +DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel( Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const DexFile::ClassDef& class_def) { auto* const runtime = Runtime::Current(); - if (runtime->UseJit()) { + if (runtime->UseJit() || GetCompilerOptions().VerifyAtRuntime()) { + // Verify at runtime shouldn't dex to dex since we didn't resolve of verify. return kDontDexToDexCompile; } const char* descriptor = dex_file.GetClassDescriptor(class_def); @@ -605,12 +606,22 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De LoadImageClasses(timings); VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false); - Resolve(class_loader, dex_files, thread_pool, timings); - VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false); + const bool verification_enabled = compiler_options_->IsVerificationEnabled(); + const bool never_verify = compiler_options_->NeverVerify(); - if (!compiler_options_->IsVerificationEnabled()) { + // We need to resolve for never_verify since it needs to run dex to dex to add the + // RETURN_VOID_NO_BARRIER. + if (never_verify || verification_enabled) { + Resolve(class_loader, dex_files, thread_pool, timings); + VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false); + } + + if (never_verify) { VLOG(compiler) << "Verify none mode specified, skipping verification."; SetVerified(class_loader, dex_files, thread_pool, timings); + } + + if (!verification_enabled) { return; } @@ -1387,8 +1398,11 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType } } else { bool method_in_image = heap->FindSpaceFromObject(method, false)->IsImageSpace(); - if (method_in_image || compiling_boot) { + if (method_in_image || compiling_boot || runtime->UseJit()) { // We know we must be able to get to the method in the image, so use that pointer. + // In the case where we are the JIT, we can always use direct pointers since we know where + // the method and its code are / will be. We don't sharpen to interpreter bridge since we + // check IsQuickToInterpreterBridge above. CHECK(!method->IsAbstract()); *type = sharp_type; *direct_method = force_relocations ? -1 : reinterpret_cast<uintptr_t>(method); @@ -1901,7 +1915,7 @@ static void SetVerifiedClass(const ParallelCompilationManager* manager, size_t c if (klass->IsResolved()) { if (klass->GetStatus() < mirror::Class::kStatusVerified) { ObjectLock<mirror::Class> lock(soa.Self(), klass); - klass->SetStatus(mirror::Class::kStatusVerified, soa.Self()); + mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self()); } // Record the final class status if necessary. ClassReference ref(manager->GetDexFile(), class_def_index); @@ -2090,6 +2104,8 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz return; } + CompilerDriver* const driver = manager->GetCompiler(); + // Can we run DEX-to-DEX compiler on this class ? DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile; { @@ -2097,8 +2113,8 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz StackHandleScope<1> hs(soa.Self()); Handle<mirror::ClassLoader> class_loader( hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - dex_to_dex_compilation_level = GetDexToDexCompilationlevel(soa.Self(), class_loader, dex_file, - class_def); + dex_to_dex_compilation_level = driver->GetDexToDexCompilationlevel( + soa.Self(), class_loader, dex_file, class_def); } ClassDataItemIterator it(dex_file, class_data); // Skip fields @@ -2108,7 +2124,6 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz while (it.HasNextInstanceField()) { it.Next(); } - CompilerDriver* driver = manager->GetCompiler(); bool compilation_enabled = driver->IsClassToCompile( dex_file.StringByTypeIdx(class_def.class_idx_)); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 28a82457cc..9463c2c9bd 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -468,6 +468,10 @@ class CompilerDriver { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: + DexToDexCompilationLevel GetDexToDexCompilationlevel( + Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, + const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 5042c7594c..d06ec278ab 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -32,7 +32,8 @@ class CompilerOptions FINAL { public: enum CompilerFilter { kVerifyNone, // Skip verification and compile nothing except JNI stubs. - kInterpretOnly, // Compile nothing except JNI stubs. + kInterpretOnly, // Verify, and compile only JNI stubs. + kVerifyAtRuntime, // Only compile JNI stubs and verify at runtime. kSpace, // Maximize space savings. kBalanced, // Try to get the best performance return on compilation investment. kSpeed, // Maximize runtime performance. @@ -81,13 +82,23 @@ class CompilerOptions FINAL { compiler_filter_ = compiler_filter; } + bool VerifyAtRuntime() const { + return compiler_filter_ == CompilerOptions::kVerifyAtRuntime; + } + bool IsCompilationEnabled() const { - return ((compiler_filter_ != CompilerOptions::kVerifyNone) && - (compiler_filter_ != CompilerOptions::kInterpretOnly)); + return compiler_filter_ != CompilerOptions::kVerifyNone && + compiler_filter_ != CompilerOptions::kInterpretOnly && + compiler_filter_ != CompilerOptions::kVerifyAtRuntime; } bool IsVerificationEnabled() const { - return (compiler_filter_ != CompilerOptions::kVerifyNone); + return compiler_filter_ != CompilerOptions::kVerifyNone && + compiler_filter_ != CompilerOptions::kVerifyAtRuntime; + } + + bool NeverVerify() const { + return compiler_filter_ == CompilerOptions::kVerifyNone; } size_t GetHugeMethodThreshold() const { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2cac93dd8c..ec7fd62975 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1024,8 +1024,6 @@ void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t d HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); uint16_t num_entries = table.GetNumEntries(); - // There should be at least one entry here. - DCHECK_GT(num_entries, 0U); for (size_t i = 0; i < num_entries; i++) { BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 667f686059..ecaa6f0123 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -246,12 +246,9 @@ class CodeGenerator { void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { - if (kIsDebugBuild) { - if (type == Primitive::kPrimNot && value->IsIntConstant()) { - CHECK_EQ(value->AsIntConstant()->GetValue(), 0); - } - } - return type == Primitive::kPrimNot && !value->IsIntConstant(); + // Check that null value is not represented as an integer constant. + DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); + return type == Primitive::kPrimNot && !value->IsNullConstant(); } void AddAllocatedRegister(Location location) { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 123f690763..0a069a75ef 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -903,10 +903,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ bkpt(0); - } } void LocationsBuilderARM::VisitIf(HIf* if_instr) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 8ee150b999..99283a0056 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1597,10 +1597,6 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM64::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - down_cast<Arm64Assembler*>(GetAssembler())->Comment("Unreachable"); - __ Brk(__LINE__); // TODO: Introduce special markers for such code locations. - } } void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 1db16002c0..02b9b3294c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -812,10 +812,6 @@ void LocationsBuilderX86::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ int3(); - } } void LocationsBuilderX86::VisitIf(HIf* if_instr) { @@ -2734,26 +2730,45 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Label less, greater, done; switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { + Register left_low = left.AsRegisterPairLow<Register>(); + Register left_high = left.AsRegisterPairHigh<Register>(); + int32_t val_low = 0; + int32_t val_high = 0; + bool right_is_const = false; + + if (right.IsConstant()) { + DCHECK(right.GetConstant()->IsLongConstant()); + right_is_const = true; + int64_t val = right.GetConstant()->AsLongConstant()->GetValue(); + val_low = Low32Bits(val); + val_high = High32Bits(val); + } + if (right.IsRegisterPair()) { - __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>()); + __ cmpl(left_high, right.AsRegisterPairHigh<Register>()); } else if (right.IsDoubleStackSlot()) { - __ cmpl(left.AsRegisterPairHigh<Register>(), - Address(ESP, right.GetHighStackIndex(kX86WordSize))); + __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize))); } else { - DCHECK(right.IsConstant()) << right; - __ cmpl(left.AsRegisterPairHigh<Register>(), - Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); + DCHECK(right_is_const) << right; + if (val_high == 0) { + __ testl(left_high, left_high); + } else { + __ cmpl(left_high, Immediate(val_high)); + } } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. if (right.IsRegisterPair()) { - __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>()); + __ cmpl(left_low, right.AsRegisterPairLow<Register>()); } else if (right.IsDoubleStackSlot()) { - __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex())); + __ cmpl(left_low, Address(ESP, right.GetStackIndex())); } else { - DCHECK(right.IsConstant()) << right; - __ cmpl(left.AsRegisterPairLow<Register>(), - Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); + DCHECK(right_is_const) << right; + if (val_low == 0) { + __ testl(left_low, left_low); + } else { + __ cmpl(left_low, Immediate(val_low)); + } } break; } @@ -3649,14 +3664,21 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } } else if (constant->IsFloatConstant()) { - float value = constant->AsFloatConstant()->GetValue(); - Immediate imm(bit_cast<float, int32_t>(value)); + float fp_value = constant->AsFloatConstant()->GetValue(); + int32_t value = bit_cast<float, int32_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp = static_cast<Register>(ensure_scratch.GetRegister()); - __ movl(temp, imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), temp); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // Easy handling of 0.0. + __ xorps(dest, dest); + } else { + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + Register temp = static_cast<Register>(ensure_scratch.GetRegister()); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); + } } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), imm); @@ -4111,18 +4133,38 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } else { DCHECK(second.IsConstant()) << second; int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); - Immediate low(Low32Bits(value)); - Immediate high(High32Bits(value)); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); if (instruction->IsAnd()) { - __ andl(first.AsRegisterPairLow<Register>(), low); - __ andl(first.AsRegisterPairHigh<Register>(), high); + if (low_value == 0) { + __ xorl(first_low, first_low); + } else if (low_value != -1) { + __ andl(first_low, low); + } + if (high_value == 0) { + __ xorl(first_high, first_high); + } else if (high_value != -1) { + __ andl(first_high, high); + } } else if (instruction->IsOr()) { - __ orl(first.AsRegisterPairLow<Register>(), low); - __ orl(first.AsRegisterPairHigh<Register>(), high); + if (low_value != 0) { + __ orl(first_low, low); + } + if (high_value != 0) { + __ orl(first_high, high); + } } else { DCHECK(instruction->IsXor()); - __ xorl(first.AsRegisterPairLow<Register>(), low); - __ xorl(first.AsRegisterPairHigh<Register>(), high); + if (low_value != 0) { + __ xorl(first_low, low); + } + if (high_value != 0) { + __ xorl(first_high, high); + } } } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 90d87d4b9f..d09c8f8e51 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -749,10 +749,6 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { UNUSED(exit); - if (kIsDebugBuild) { - __ Comment("Unreachable"); - __ int3(); - } } void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { @@ -960,7 +956,7 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -986,7 +982,18 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { - __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>()); + CpuRegister left_reg = left.AsRegister<CpuRegister>(); + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + if (value == 0) { + __ testq(left_reg, left_reg); + } else { + __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + } + } else { + __ cmpq(left_reg, right.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimFloat: { @@ -1869,17 +1876,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. - HInstruction* rhs = add->InputAt(1); - bool is_int32_constant = false; - if (rhs->IsLongConstant()) { - int64_t value = rhs->AsLongConstant()->GetValue(); - if (static_cast<int32_t>(value) == value) { - is_int32_constant = true; - } - } - locations->SetInAt(1, - is_int32_constant ? Location::RegisterOrConstant(rhs) : - Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1977,7 +1974,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2011,7 +2008,13 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { break; } case Primitive::kPrimLong: { - __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second.IsConstant()) { + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); + } else { + __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } break; } @@ -2042,8 +2045,13 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1))); + if (locations->InAt(1).IsConstant()) { + // Can use 3 operand multiply. + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::SameAsFirstInput()); + } break; } case Primitive::kPrimFloat: @@ -2063,9 +2071,9 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { LocationSummary* locations = mul->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); switch (mul->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.Equals(locations->Out())); if (second.IsRegister()) { __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { @@ -2079,16 +2087,27 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { break; } case Primitive::kPrimLong: { - __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second.IsConstant()) { + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + __ imulq(locations->Out().AsRegister<CpuRegister>(), + first.AsRegister<CpuRegister>(), + Immediate(static_cast<int32_t>(value))); + } else { + DCHECK(first.Equals(locations->Out())); + __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimFloat: { + DCHECK(first.Equals(locations->Out())); __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { + DCHECK(first.Equals(locations->Out())); __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -3324,20 +3343,35 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (constant->IsFloatConstant()) { - Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue())); + float fp_value = constant->AsFloatConstant()->GetValue(); + int32_t value = bit_cast<float, int32_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - __ movl(CpuRegister(TMP), imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // easy FP 0.0. + __ xorps(dest, dest); + } else { + __ movl(CpuRegister(TMP), imm); + __ movd(dest, CpuRegister(TMP)); + } } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); - Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue())); + double fp_value = constant->AsDoubleConstant()->GetValue(); + int64_t value = bit_cast<double, int64_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - __ movq(CpuRegister(TMP), imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + __ xorpd(dest, dest); + } else { + __ movq(CpuRegister(TMP), imm); + __ movd(dest, CpuRegister(TMP)); + } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), imm); @@ -3677,8 +3711,9 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio if (instruction->GetType() == Primitive::kPrimInt) { locations->SetInAt(1, Location::Any()); } else { - // Request a register to avoid loading a 64bits constant. + // We can handle 32 bit constants. locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); } locations->SetOut(Location::SameAsFirstInput()); } @@ -3734,13 +3769,34 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } else { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + CpuRegister first_reg = first.AsRegister<CpuRegister>(); + bool second_is_constant = false; + int64_t value = 0; + if (second.IsConstant()) { + second_is_constant = true; + value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + } + if (instruction->IsAnd()) { - __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ andq(first_reg, second.AsRegister<CpuRegister>()); + } } else if (instruction->IsOr()) { - __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ orq(first_reg, second.AsRegister<CpuRegister>()); + } } else { DCHECK(instruction->IsXor()); - __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ xorq(first_reg, second.AsRegister<CpuRegister>()); + } } } } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index ea65dc0780..74848d5d96 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -16,30 +16,12 @@ #include "gvn.h" #include "side_effects_analysis.h" +#include "utils.h" -namespace art { - -/** - * A node in the collision list of a ValueSet. Encodes the instruction, - * the hash code, and the next node in the collision list. - */ -class ValueSetNode : public ArenaObject<kArenaAllocMisc> { - public: - ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next) - : instruction_(instruction), hash_code_(hash_code), next_(next) {} +#include "utils/arena_bit_vector.h" +#include "base/bit_vector-inl.h" - size_t GetHashCode() const { return hash_code_; } - HInstruction* GetInstruction() const { return instruction_; } - ValueSetNode* GetNext() const { return next_; } - void SetNext(ValueSetNode* node) { next_ = node; } - - private: - HInstruction* const instruction_; - const size_t hash_code_; - ValueSetNode* next_; - - DISALLOW_COPY_AND_ASSIGN(ValueSetNode); -}; +namespace art { /** * A ValueSet holds instructions that can replace other instructions. It is updated @@ -52,39 +34,68 @@ class ValueSetNode : public ArenaObject<kArenaAllocMisc> { */ class ValueSet : public ArenaObject<kArenaAllocMisc> { public: + // Constructs an empty ValueSet which owns all its buckets. explicit ValueSet(ArenaAllocator* allocator) - : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; + : allocator_(allocator), + num_buckets_(kMinimumNumberOfBuckets), + buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_owned_(allocator, num_buckets_, false), + num_entries_(0) { + // ArenaAllocator returns zeroed memory, so no need to set buckets to null. + DCHECK(IsPowerOfTwo(num_buckets_)); + buckets_owned_.SetInitialBits(num_buckets_); + } + + // Copy constructor. Depending on the load factor, it will either make a deep + // copy (all buckets owned) or a shallow one (buckets pointing to the parent). + ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy) + : allocator_(allocator), + num_buckets_(to_copy.IdealBucketCount()), + buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_owned_(allocator, num_buckets_, false), + num_entries_(to_copy.num_entries_) { + // ArenaAllocator returns zeroed memory, so entries of buckets_ and + // buckets_owned_ are initialized to nullptr and false, respectively. + DCHECK(IsPowerOfTwo(num_buckets_)); + if (num_buckets_ == to_copy.num_buckets_) { + // Hash table remains the same size. We copy the bucket pointers and leave + // all buckets_owned_ bits false. + memcpy(buckets_, to_copy.buckets_, num_buckets_ * sizeof(Node*)); + } else { + // Hash table size changes. We copy and rehash all entries, and set all + // buckets_owned_ bits to true. + for (size_t i = 0; i < to_copy.num_buckets_; ++i) { + for (Node* node = to_copy.buckets_[i]; node != nullptr; node = node->GetNext()) { + size_t new_index = BucketIndex(node->GetHashCode()); + buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); + } + } + buckets_owned_.SetInitialBits(num_buckets_); } } // Adds an instruction in the set. void Add(HInstruction* instruction) { DCHECK(Lookup(instruction) == nullptr); - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - if (table_[index] == nullptr) { - table_[index] = instruction; - } else { - collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_); + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); + + if (!buckets_owned_.IsBitSet(index)) { + CloneBucket(index); } - ++number_of_entries_; + buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); + ++num_entries_; } - // If in the set, returns an equivalent instruction to the given instruction. Returns - // null otherwise. + // If in the set, returns an equivalent instruction to the given instruction. + // Returns null otherwise. HInstruction* Lookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing->Equals(instruction)) { - return existing; - } + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); + HInstruction* existing = node->GetInstruction(); if (existing->Equals(instruction)) { return existing; } @@ -93,126 +104,193 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { return nullptr; } - // Returns whether `instruction` is in the set. - HInstruction* IdentityLookup(HInstruction* instruction) const { - size_t hash_code = instruction->ComputeHashCode(); - size_t index = hash_code % kDefaultNumberOfEntries; - HInstruction* existing = table_[index]; - if (existing != nullptr && existing == instruction) { - return existing; - } + // Returns whether instruction is in the set. + bool Contains(HInstruction* instruction) const { + size_t hash_code = HashCode(instruction); + size_t index = BucketIndex(hash_code); - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - if (node->GetHashCode() == hash_code) { - existing = node->GetInstruction(); - if (existing == instruction) { - return existing; - } + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { + if (node->GetInstruction() == instruction) { + return true; } } - return nullptr; + return false; } - // Removes all instructions in the set that are affected by the given side effects. + // Removes all instructions in the set affected by the given side effects. void Kill(SideEffects side_effects) { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - HInstruction* instruction = table_[i]; - if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) { - table_[i] = nullptr; - --number_of_entries_; - } - } + DeleteAllImpureWhich([side_effects](Node* node) { + return node->GetInstruction()->GetSideEffects().DependsOn(side_effects); + }); + } - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - HInstruction* instruction = current->GetInstruction(); - if (instruction->GetSideEffects().DependsOn(side_effects)) { - if (previous == nullptr) { - collisions_ = current->GetNext(); - } else { - previous->SetNext(current->GetNext()); - } - --number_of_entries_; - } else { - previous = current; - } + // Updates this set by intersecting with instructions in a predecessor's set. + void IntersectWith(ValueSet* predecessor) { + if (IsEmpty()) { + return; + } else if (predecessor->IsEmpty()) { + Clear(); + } else { + // Pure instructions do not need to be tested because only impure + // instructions can be killed. + DeleteAllImpureWhich([predecessor](Node* node) { + return !predecessor->Contains(node->GetInstruction()); + }); } } - // Returns a copy of this set. - ValueSet* Copy() const { - ValueSet* copy = new (allocator_) ValueSet(allocator_); + bool IsEmpty() const { return num_entries_ == 0; } + size_t GetNumberOfEntries() const { return num_entries_; } - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - copy->table_[i] = table_[i]; + private: + class Node : public ArenaObject<kArenaAllocMisc> { + public: + Node(HInstruction* instruction, size_t hash_code, Node* next) + : instruction_(instruction), hash_code_(hash_code), next_(next) {} + + size_t GetHashCode() const { return hash_code_; } + HInstruction* GetInstruction() const { return instruction_; } + Node* GetNext() const { return next_; } + void SetNext(Node* node) { next_ = node; } + + Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) { + return new (allocator) Node(instruction_, hash_code_, new_next); } - // Note that the order will be inverted in the copy. This is fine, as the order is not - // relevant for a ValueSet. - for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { - copy->collisions_ = new (allocator_) ValueSetNode( - node->GetInstruction(), node->GetHashCode(), copy->collisions_); + private: + HInstruction* const instruction_; + const size_t hash_code_; + Node* next_; + + DISALLOW_COPY_AND_ASSIGN(Node); + }; + + // Creates our own copy of a bucket that is currently pointing to a parent. + // This algorithm can be called while iterating over the bucket because it + // preserves the order of entries in the bucket and will return the clone of + // the given 'iterator'. + Node* CloneBucket(size_t index, Node* iterator = nullptr) { + DCHECK(!buckets_owned_.IsBitSet(index)); + Node* clone_current = nullptr; + Node* clone_previous = nullptr; + Node* clone_iterator = nullptr; + for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { + clone_current = node->Dup(allocator_, nullptr); + if (node == iterator) { + clone_iterator = clone_current; + } + if (clone_previous == nullptr) { + buckets_[index] = clone_current; + } else { + clone_previous->SetNext(clone_current); + } + clone_previous = clone_current; } - - copy->number_of_entries_ = number_of_entries_; - return copy; + buckets_owned_.SetBit(index); + return clone_iterator; } void Clear() { - number_of_entries_ = 0; - collisions_ = nullptr; - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - table_[i] = nullptr; + num_entries_ = 0; + for (size_t i = 0; i < num_buckets_; ++i) { + buckets_[i] = nullptr; } + buckets_owned_.SetInitialBits(num_buckets_); } - // Update this `ValueSet` by intersecting with instructions in `other`. - void IntersectionWith(ValueSet* other) { - if (IsEmpty()) { - return; - } else if (other->IsEmpty()) { - Clear(); - } else { - for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { - if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) { - --number_of_entries_; - table_[i] = nullptr; + // Iterates over buckets with impure instructions (even indices) and deletes + // the ones on which 'cond' returns true. + template<typename Functor> + void DeleteAllImpureWhich(Functor cond) { + for (size_t i = 0; i < num_buckets_; i += 2) { + Node* node = buckets_[i]; + Node* previous = nullptr; + + if (node == nullptr) { + continue; + } + + if (!buckets_owned_.IsBitSet(i)) { + // Bucket is not owned but maybe we won't need to change it at all. + // Iterate as long as the entries don't satisfy 'cond'. + while (node != nullptr) { + if (cond(node)) { + // We do need to delete an entry but we do not own the bucket. + // Clone the bucket, make sure 'previous' and 'node' point to + // the cloned entries and break. + previous = CloneBucket(i, previous); + node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); + break; + } + previous = node; + node = node->GetNext(); } } - for (ValueSetNode* current = collisions_, *previous = nullptr; - current != nullptr; - current = current->GetNext()) { - if (other->IdentityLookup(current->GetInstruction()) == nullptr) { + + // By this point we either own the bucket and can start deleting entries, + // or we do not own it but no entries matched 'cond'. + DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); + + // We iterate over the remainder of entries and delete those that match + // the given condition. + while (node != nullptr) { + Node* next = node->GetNext(); + if (cond(node)) { if (previous == nullptr) { - collisions_ = current->GetNext(); + buckets_[i] = next; } else { - previous->SetNext(current->GetNext()); + previous->SetNext(next); } - --number_of_entries_; } else { - previous = current; + previous = node; } + node = next; } } } - bool IsEmpty() const { return number_of_entries_ == 0; } - size_t GetNumberOfEntries() const { return number_of_entries_; } + // Computes a bucket count such that the load factor is reasonable. + // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. + size_t IdealBucketCount() const { + size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); + if (bucket_count > kMinimumNumberOfBuckets) { + return bucket_count; + } else { + return kMinimumNumberOfBuckets; + } + } - private: - static constexpr size_t kDefaultNumberOfEntries = 8; + // Generates a hash code for an instruction. Pure instructions are put into + // odd buckets to speed up deletion. + size_t HashCode(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + if (instruction->GetSideEffects().HasDependencies()) { + return (hash_code << 1) | 0; + } else { + return (hash_code << 1) | 1; + } + } + + // Converts a hash code to a bucket index. + size_t BucketIndex(size_t hash_code) const { + return hash_code & (num_buckets_ - 1); + } ArenaAllocator* const allocator_; + // The internal bucket implementation of the set. + size_t const num_buckets_; + Node** const buckets_; + + // Flags specifying which buckets were copied into the set from its parent. + // If a flag is not set, the corresponding bucket points to entries in the + // parent and must be cloned prior to making changes. + ArenaBitVector buckets_owned_; + // The number of entries in the set. - size_t number_of_entries_; + size_t num_entries_; - // The internal implementation of the set. It uses a combination of a hash code based - // fixed-size list, and a linked list to handle hash code collisions. - // TODO: Tune the fixed size list original size, and support growing it. - ValueSetNode* collisions_; - HInstruction* table_[kDefaultNumberOfEntries]; + static constexpr size_t kMinimumNumberOfBuckets = 8; DISALLOW_COPY_AND_ASSIGN(ValueSet); }; @@ -270,11 +348,14 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - set = sets_.Get(dominator->GetBlockId()); - if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) { + ValueSet* dominator_set = sets_.Get(dominator->GetBlockId()); + if (dominator->GetSuccessors().Size() == 1) { + DCHECK_EQ(dominator->GetSuccessors().Get(0), block); + set = dominator_set; + } else { // We have to copy if the dominator has other successors, or `block` is not a successor // of the dominator. - set = set->Copy(); + set = new (allocator_) ValueSet(allocator_, *dominator_set); } if (!set->IsEmpty()) { if (block->IsLoopHeader()) { @@ -282,7 +363,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set->Kill(side_effects_.GetLoopEffects(block)); } else if (predecessors.Size() > 1) { for (size_t i = 0, e = predecessors.Size(); i < e; ++i) { - set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId())); + set->IntersectWith(sets_.Get(predecessors.Get(i)->GetBlockId())); if (set->IsEmpty()) { break; } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index bd9267c4db..968fe3e73c 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -118,6 +118,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + if (resolved_method->ShouldNotInline()) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) + << " was already flagged as non inlineable"; + return false; + } + + if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index)) { + resolved_method->SetShouldNotInline(); + return false; + } + + VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file); + MaybeRecordStat(kInlinedInvoke); + return true; +} + +bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, + HInvoke* invoke_instruction, + uint32_t method_index) const { + ScopedObjectAccess soa(Thread::Current()); + const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); + const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile(); + DexCompilationUnit dex_compilation_unit( nullptr, outer_compilation_unit_.GetClassLoader(), @@ -225,8 +248,6 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, // instruction id of the caller, so that new instructions added // after optimizations get a unique id. graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId()); - VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file); - MaybeRecordStat(kInlinedInvoke); return true; } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 2b08d3d91a..1251977138 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -46,6 +46,9 @@ class HInliner : public HOptimization { private: bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const; + bool TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, + HInvoke* invoke_instruction, + uint32_t method_index) const; const DexCompilationUnit& outer_compilation_unit_; CompilerDriver* const compiler_driver_; diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 4ac1fe8573..a1ae67009e 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -56,6 +56,19 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { : Location::RequiresRegister(); } +Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) { + if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) { + return Location::RequiresRegister(); + } + + // Does the long constant fit in a 32 bit int? + int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue(); + + return IsInt<32>(value) + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresRegister(); +} + Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { return instruction->IsConstant() ? Location::ConstantLocation(instruction->AsConstant()) diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 566c0daf1e..de876be9ab 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -345,6 +345,7 @@ class Location : public ValueObject { } static Location RegisterOrConstant(HInstruction* instruction); + static Location RegisterOrInt32LongConstant(HInstruction* instruction); static Location ByteRegisterOrConstant(int reg, HInstruction* instruction); // The location of the first input to the instruction will be diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e47b4f61b2..b70f9252ae 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -173,24 +173,40 @@ class OptimizingCompiler FINAL : public Compiler { jobject class_loader, const DexFile& dex_file) const OVERRIDE; + CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const; + CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, - const DexFile& dex_file) const OVERRIDE; + const DexFile& dex_file) const OVERRIDE { + return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); + } uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( + InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); + } bool WriteElf(art::File* file, OatWriter* oat_writer, const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, - bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } - void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {} + void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; void Init() OVERRIDE; - void UnInit() const OVERRIDE {} + void UnInit() const OVERRIDE; private: // Whether we should run any optimization or register allocation. If false, will @@ -214,6 +230,9 @@ class OptimizingCompiler FINAL : public Compiler { std::unique_ptr<std::ostream> visualizer_output_; + // Delegate to Quick in case the optimizing compiler cannot compile a method. + std::unique_ptr<Compiler> delegate_; + DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); }; @@ -224,9 +243,11 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) run_optimizations_( (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) && !driver->GetCompilerOptions().GetDebuggable()), - compilation_stats_() {} + compilation_stats_(), + delegate_(Create(driver, Compiler::Kind::kQuick)) {} void OptimizingCompiler::Init() { + delegate_->Init(); // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); @@ -239,34 +260,24 @@ void OptimizingCompiler::Init() { } } +void OptimizingCompiler::UnInit() const { + delegate_->UnInit(); +} + OptimizingCompiler::~OptimizingCompiler() { compilation_stats_.Log(); } +void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { + delegate_->InitCompilationUnit(cu); +} + bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, const DexFile& dex_file ATTRIBUTE_UNUSED, CompilationUnit* cu ATTRIBUTE_UNUSED) const { return true; } -CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, - uint32_t method_idx, - const DexFile& dex_file) const { - return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); -} - -uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const { - return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( - InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); -} - -bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, bool is_host) const { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); -} - static bool IsInstructionSetSupported(InstructionSet instruction_set) { return instruction_set == kArm64 || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat) @@ -422,13 +433,13 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArrayRef<const uint8_t>()); } -CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file) const { +CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const { UNUSED(invoke_type); std::string method_name = PrettyMethod(method_idx, dex_file); compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); @@ -502,6 +513,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, bool can_optimize = CanOptimize(*code_item); bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); + + // `run_optimizations_` is set explicitly (either through a compiler filter + // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back + // to Quick. + bool can_use_baseline = !run_optimizations_; if (run_optimizations_ && can_optimize && can_allocate_registers) { VLOG(compiler) << "Optimizing " << method_name; @@ -524,7 +540,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } else if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); - } else { + } else if (can_use_baseline) { VLOG(compiler) << "Compile baseline " << method_name; if (!run_optimizations_) { @@ -536,7 +552,25 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); + } else { + return nullptr; + } +} + +CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const { + CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, + method_idx, class_loader, dex_file); + if (method != nullptr) { + return method; } + return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, + class_loader, dex_file); } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 63a02862b4..9914ef49c3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -38,6 +38,8 @@ class StackMapStream : public ValueObject { dex_register_locations_(allocator, 10 * 4), inline_infos_(allocator, 2), stack_mask_max_(-1), + dex_pc_max_(0), + native_pc_offset_max_(0), number_of_stack_maps_with_inline_info_(0) {} // Compute bytes needed to encode a mask with the given maximum element. @@ -92,6 +94,9 @@ class StackMapStream : public ValueObject { if (inlining_depth > 0) { number_of_stack_maps_with_inline_info_++; } + + dex_pc_max_ = std::max(dex_pc_max_, dex_pc); + native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); } void AddInlineInfoEntry(uint32_t method_index) { @@ -105,8 +110,8 @@ class StackMapStream : public ValueObject { + ComputeStackMapsSize() + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); - // On ARM, CodeInfo data must be 4-byte aligned. - return RoundUp(size, kWordAlignment); + // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. + return size; } size_t ComputeStackMaskSize() const { @@ -114,7 +119,12 @@ class StackMapStream : public ValueObject { } size_t ComputeStackMapsSize() const { - return stack_maps_.Size() * StackMap::ComputeStackMapSize(ComputeStackMaskSize()); + return stack_maps_.Size() * StackMap::ComputeStackMapSize( + ComputeStackMaskSize(), + ComputeInlineInfoSize(), + ComputeDexRegisterMapsSize(), + dex_pc_max_, + native_pc_offset_max_); } // Compute the size of the Dex register map of `entry`. @@ -165,16 +175,20 @@ class StackMapStream : public ValueObject { code_info.SetOverallSize(region.size()); size_t stack_mask_size = ComputeStackMaskSize(); - uint8_t* memory_start = region.start(); + + size_t dex_register_map_size = ComputeDexRegisterMapsSize(); + size_t inline_info_size = ComputeInlineInfoSize(); MemoryRegion dex_register_locations_region = region.Subregion( ComputeDexRegisterMapsStart(), - ComputeDexRegisterMapsSize()); + dex_register_map_size); MemoryRegion inline_infos_region = region.Subregion( ComputeInlineInfoStart(), - ComputeInlineInfoSize()); + inline_info_size); + code_info.SetEncoding( + inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_); code_info.SetNumberOfStackMaps(stack_maps_.Size()); code_info.SetStackMaskSize(stack_mask_size); DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize()); @@ -185,11 +199,11 @@ class StackMapStream : public ValueObject { StackMap stack_map = code_info.GetStackMapAt(i); StackMapEntry entry = stack_maps_.Get(i); - stack_map.SetDexPc(entry.dex_pc); - stack_map.SetNativePcOffset(entry.native_pc_offset); - stack_map.SetRegisterMask(entry.register_mask); + stack_map.SetDexPc(code_info, entry.dex_pc); + stack_map.SetNativePcOffset(code_info, entry.native_pc_offset); + stack_map.SetRegisterMask(code_info, entry.register_mask); if (entry.sp_mask != nullptr) { - stack_map.SetStackMask(*entry.sp_mask); + stack_map.SetStackMask(code_info, *entry.sp_mask); } if (entry.num_dex_registers != 0) { @@ -200,7 +214,8 @@ class StackMapStream : public ValueObject { ComputeDexRegisterMapSize(entry)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); - stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); + stack_map.SetDexRegisterMapOffset( + code_info, register_region.start() - dex_register_locations_region.start()); // Offset in `dex_register_map` where to store the next register entry. size_t offset = DexRegisterMap::kFixedSize; @@ -222,7 +237,7 @@ class StackMapStream : public ValueObject { // Ensure we reached the end of the Dex registers region. DCHECK_EQ(offset, register_region.size()); } else { - stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap); + stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); } // Set the inlining info. @@ -233,7 +248,9 @@ class StackMapStream : public ValueObject { next_inline_info_offset += inline_region.size(); InlineInfo inline_info(inline_region); - stack_map.SetInlineDescriptorOffset(inline_region.start() - memory_start); + // Currently relative to the dex register map. + stack_map.SetInlineDescriptorOffset( + code_info, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); for (size_t j = 0; j < entry.inlining_depth; ++j) { @@ -241,7 +258,9 @@ class StackMapStream : public ValueObject { inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); } } else { - stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo); + if (inline_info_size != 0) { + stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); + } } } } @@ -262,6 +281,8 @@ class StackMapStream : public ValueObject { GrowableArray<DexRegisterLocation> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; + uint32_t dex_pc_max_; + uint32_t native_pc_offset_max_; size_t number_of_stack_maps_with_inline_info_; ART_FRIEND_TEST(StackMapTest, Test1); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 87ac2e79e9..e7075c0aef 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -54,14 +54,14 @@ TEST(StackMapTest, Test1) { StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); - ASSERT_EQ(0u, stack_map.GetDexPc()); - ASSERT_EQ(64u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); - MemoryRegion stack_mask = stack_map.GetStackMask(); + MemoryRegion stack_mask = stack_map.GetStackMask(code_info); ASSERT_TRUE(SameBits(stack_mask, sp_mask)); - ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); ASSERT_EQ(7u, dex_registers.Size()); DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); @@ -73,7 +73,7 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } TEST(StackMapTest, Test2) { @@ -112,14 +112,14 @@ TEST(StackMapTest, Test2) { StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); - ASSERT_EQ(0u, stack_map.GetDexPc()); - ASSERT_EQ(64u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); - MemoryRegion stack_mask = stack_map.GetStackMask(); + MemoryRegion stack_mask = stack_map.GetStackMask(code_info); ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); - ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); ASSERT_EQ(7u, dex_registers.Size()); @@ -134,7 +134,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_TRUE(stack_map.HasInlineInfo()); + ASSERT_TRUE(stack_map.HasInlineInfo(code_info)); InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); ASSERT_EQ(2u, inline_info.GetDepth()); ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); @@ -146,14 +146,14 @@ TEST(StackMapTest, Test2) { StackMap stack_map = code_info.GetStackMapAt(1); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); - ASSERT_EQ(1u, stack_map.GetDexPc()); - ASSERT_EQ(128u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); + ASSERT_EQ(1u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(128u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(code_info)); - MemoryRegion stack_mask = stack_map.GetStackMask(); + MemoryRegion stack_mask = stack_map.GetStackMask(code_info); ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); - ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); ASSERT_EQ(3u, dex_registers.Size()); @@ -168,7 +168,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } } @@ -190,14 +190,45 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { CodeInfo code_info(region); StackMap stack_map = code_info.GetStackMapAt(0); - ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); ASSERT_EQ(DexRegisterLocation::Kind::kNone, dex_registers.GetLocationKind(0, number_of_dex_registers)); ASSERT_EQ(DexRegisterLocation::Kind::kConstant, dex_registers.GetLocationKind(1, number_of_dex_registers)); ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers)); - ASSERT_FALSE(stack_map.HasInlineInfo()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); +} + +// Generate a stack map whose dex register offset is +// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do +// not treat it as kNoDexRegisterMap. +TEST(StackMapTest, DexRegisterMapOffsetOverflow) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask(&arena, 0, false); + uint32_t number_of_dex_registers = 0xEA; + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) { + stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + } + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + for (uint32_t i = 0; i < number_of_dex_registers; ++i) { + stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + } + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + StackMap stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); + ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); + ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding); } } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index f2704b72a4..bd155ed788 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1277,6 +1277,14 @@ void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) { } +void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); // orq only supports 32b immediate. + EmitRex64(dst); + EmitComplex(1, Operand(dst), imm); +} + + void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitRex64(dst, src); @@ -1548,27 +1556,30 @@ void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) { void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) { + imulq(reg, reg, imm); +} + +void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); // imulq only supports 32b immediate. - EmitRex64(reg, reg); + EmitRex64(dst, reg); // See whether imm can be represented as a sign-extended 8bit value. int64_t v64 = imm.value(); if (IsInt<8>(v64)) { // Sign-extension works. EmitUint8(0x6B); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(reg)); EmitUint8(static_cast<uint8_t>(v64 & 0xFF)); } else { // Not representable, use full immediate. EmitUint8(0x69); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(reg)); EmitImmediate(imm); } } - void X86_64Assembler::imulq(CpuRegister reg, const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitRex64(reg, address); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 5dfcf4541b..495f74f498 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -429,6 +429,7 @@ class X86_64Assembler FINAL : public Assembler { void orl(CpuRegister dst, CpuRegister src); void orl(CpuRegister reg, const Address& address); void orq(CpuRegister dst, CpuRegister src); + void orq(CpuRegister dst, const Immediate& imm); void xorl(CpuRegister dst, CpuRegister src); void xorl(CpuRegister dst, const Immediate& imm); @@ -467,6 +468,7 @@ class X86_64Assembler FINAL : public Assembler { void imulq(CpuRegister dst, CpuRegister src); void imulq(CpuRegister reg, const Immediate& imm); void imulq(CpuRegister reg, const Address& address); + void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg); void imull(const Address& address); |