diff options
50 files changed, 898 insertions, 357 deletions
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index 8dbc2bb9c3..c0068b2331 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -215,17 +215,13 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::CONST_STRING_JUMBO: case Instruction::CONST_CLASS: case Instruction::NEW_ARRAY: - if ((mir->optimization_flags & MIR_INLINED) == 0) { - // 1 result, treat as unique each time, use result s_reg - will be unique. - res = MarkNonAliasingNonNull(mir); - } + // 1 result, treat as unique each time, use result s_reg - will be unique. + res = MarkNonAliasingNonNull(mir); break; case Instruction::MOVE_RESULT_WIDE: - if ((mir->optimization_flags & MIR_INLINED) == 0) { - // 1 wide result, treat as unique each time, use result s_reg - will be unique. - res = GetOperandValueWide(mir->ssa_rep->defs[0]); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } + // 1 wide result, treat as unique each time, use result s_reg - will be unique. + res = GetOperandValueWide(mir->ssa_rep->defs[0]); + SetOperandValueWide(mir->ssa_rep->defs[0], res); break; case kMirOpPhi: diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index 1784af3653..c9acd66bba 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -454,8 +454,6 @@ enum ArmOpcode { kThumb2Vcmps, // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0]. kThumb2LdrPcRel12, // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0]. kThumb2BCond, // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0]. - kThumb2Vmovd_RR, // vmov [111011101] D [110000] vd[15-12 [101101] M [0] vm[3-0]. - kThumb2Vmovs_RR, // vmov [111011101] D [110000] vd[15-12 [101001] M [0] vm[3-0]. kThumb2Fmrs, // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000]. kThumb2Fmsr, // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000]. kThumb2Fmrrd, // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 1c35018be3..f77b0a6302 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -848,14 +848,6 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, "b!1c", "!0t", 4, kFixupCondBranch), - ENCODING_MAP(kThumb2Vmovd_RR, 0xeeb00b40, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64", "!0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmovs_RR, 0xeeb00a40, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Fmrs, 0xee100a10, kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 1c563bb126..1abb91d23e 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -434,10 +434,6 @@ bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div if (pattern == DivideNone) { return false; } - // Tuning: add rem patterns - if (!is_div) { - return false; - } RegStorage r_magic = AllocTemp(); LoadConstant(r_magic, magic_table[lit].magic); @@ -445,25 +441,45 @@ bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage r_hi = AllocTemp(); RegStorage r_lo = AllocTemp(); + + // rl_dest and rl_src might overlap. + // Reuse r_hi to save the div result for reminder case. + RegStorage r_div_result = is_div ? rl_result.reg : r_hi; + NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); switch (pattern) { case Divide3: - OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31)); + OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31)); break; case Divide5: OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, + OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, EncodeShift(kArmAsr, magic_table[lit].shift)); break; case Divide7: OpRegReg(kOpAdd, r_hi, rl_src.reg); OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, + OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, EncodeShift(kArmAsr, magic_table[lit].shift)); break; default: LOG(FATAL) << "Unexpected pattern: " << pattern; } + + if (!is_div) { + // div_result = src / lit + // tmp1 = div_result * lit + // dest = src - tmp1 + RegStorage tmp1 = r_lo; + EasyMultiplyOp ops[2]; + + bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops); + DCHECK_NE(canEasyMultiply, false); + + GenEasyMultiplyTwoOps(tmp1, r_div_result, ops); + OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1); + } + StoreValue(rl_dest, rl_result); return true; } @@ -489,6 +505,7 @@ bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) { } op->op = kOpInvalid; + op->shift = 0; return false; } @@ -497,6 +514,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { GetEasyMultiplyOp(lit, &ops[0]); if (GetEasyMultiplyOp(lit, &ops[0])) { ops[1].op = kOpInvalid; + ops[1].shift = 0; return true; } @@ -527,31 +545,52 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { return false; } +// Generate instructions to do multiply. +// Additional temporary register is required, +// if it need to generate 2 instructions and src/dest overlap. void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) { - // dest = ( src << shift1) + [ src | -src | 0 ] - // dest = (dest << shift2) + [ src | -src | 0 ] - for (int i = 0; i < 2; i++) { - RegStorage r_src2; - if (i == 0) { - r_src2 = r_src; - } else { - r_src2 = r_dest; - } - switch (ops[i].op) { + // tmp1 = ( src << shift1) + [ src | -src | 0 ] + // dest = (tmp1 << shift2) + [ src | -src | 0 ] + + RegStorage r_tmp1; + if (ops[1].op == kOpInvalid) { + r_tmp1 = r_dest; + } else if (r_dest.GetReg() != r_src.GetReg()) { + r_tmp1 = r_dest; + } else { + r_tmp1 = AllocTemp(); + } + + switch (ops[0].op) { case kOpLsl: - OpRegRegImm(kOpLsl, r_dest, r_src2, ops[i].shift); + OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift); break; case kOpAdd: - OpRegRegRegShift(kOpAdd, r_dest, r_src, r_src2, EncodeShift(kArmLsl, ops[i].shift)); + OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); break; case kOpRsub: - OpRegRegRegShift(kOpRsub, r_dest, r_src, r_src2, EncodeShift(kArmLsl, ops[i].shift)); + OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); break; default: - DCHECK_NE(i, 0); - DCHECK_EQ(ops[i].op, kOpInvalid); + DCHECK_EQ(ops[0].op, kOpInvalid); + break; + } + + switch (ops[1].op) { + case kOpInvalid: + return; + case kOpLsl: + OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift); + break; + case kOpAdd: + OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); + break; + case kOpRsub: + OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); + break; + default: + LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps"; break; - } } } diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 5e9a8b0b5c..1053a8fc41 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -562,7 +562,8 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() { // Keep special registers from being allocated // Don't reserve the r4 if we are doing implicit suspend checks. - bool no_suspend = NO_SUSPEND || !Runtime::Current()->ExplicitSuspendChecks(); + // TODO: re-enable this when we can safely save r4 over the suspension code path. + bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks(); for (int i = 0; i < num_reserved; i++) { if (no_suspend && (ReservedRegs[i] == rARM_SUSPEND)) { // Don't reserve the suspend register. diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index fa6de963a0..06eff4e170 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -47,6 +47,22 @@ MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { return insn; } +uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { + DCHECK_LT(arg, invoke->dalvikInsn.vA); + if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { + return invoke->dalvikInsn.vC + arg; // Non-range invoke. + } else { + DCHECK_EQ(Instruction::FormatOf(invoke->dalvikInsn.opcode), Instruction::k35c); + return invoke->dalvikInsn.arg[arg]; // Range invoke. + } +} + +bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) { + DCHECK_LT(arg + 1, invoke->dalvikInsn.vA); + return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc || + invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u; +} + } // anonymous namespace const uint32_t DexFileMethodInliner::kIndexUnresolved; @@ -396,7 +412,8 @@ bool DexFileMethodInliner::GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* i result = GenInlineIGet(mir_graph, bb, invoke, move_result, method, method_idx); break; case kInlineOpIPut: - result = GenInlineIPut(mir_graph, bb, invoke, method, method_idx); + move_result = mir_graph->FindMoveResult(bb, invoke); + result = GenInlineIPut(mir_graph, bb, invoke, move_result, method, method_idx); break; default: LOG(FATAL) << "Unexpected inline op: " << method.opcode; @@ -578,25 +595,24 @@ bool DexFileMethodInliner::GenInlineReturnArg(MIRGraph* mir_graph, BasicBlock* b // Select opcode and argument. const InlineReturnArgData& data = method.d.return_data; Instruction::Code opcode = Instruction::MOVE_FROM16; + uint32_t arg = GetInvokeReg(invoke, data.arg); if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { DCHECK_EQ(data.is_object, 1u); + DCHECK_EQ(data.is_wide, 0u); opcode = Instruction::MOVE_OBJECT_FROM16; } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE) { DCHECK_EQ(data.is_wide, 1u); + DCHECK_EQ(data.is_object, 0u); opcode = Instruction::MOVE_WIDE_FROM16; + if (!WideArgIsInConsecutiveDalvikRegs(invoke, data.arg)) { + // The two halfs of the source value are not in consecutive dalvik registers in INVOKE. + return false; + } } else { DCHECK(move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT); DCHECK_EQ(data.is_wide, 0u); DCHECK_EQ(data.is_object, 0u); } - DCHECK_LT(data.is_wide ? data.arg + 1u : data.arg, invoke->dalvikInsn.vA); - int arg; - if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k35c) { - arg = invoke->dalvikInsn.arg[data.arg]; // Non-range invoke. - } else { - DCHECK_EQ(Instruction::FormatOf(invoke->dalvikInsn.opcode), Instruction::k3rc); - arg = invoke->dalvikInsn.vC + data.arg; // Range invoke. - } // Insert the move instruction MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result); @@ -616,33 +632,39 @@ bool DexFileMethodInliner::GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MI } const InlineIGetIPutData& data = method.d.ifield_data; - if (invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC || - invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE || - data.object_arg != 0) { - // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE). - return false; - } + Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IGET + data.op_variant); + DCHECK_EQ(InlineMethodAnalyser::IGetVariant(opcode), data.op_variant); + uint32_t object_reg = GetInvokeReg(invoke, data.object_arg); if (move_result == nullptr) { // Result is unused. If volatile, we still need to emit the IGET but we have no destination. return !data.is_volatile; } - Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IGET + data.op_variant); - DCHECK_EQ(InlineMethodAnalyser::IGetVariant(opcode), data.op_variant); + DCHECK_EQ(data.method_is_static != 0u, + invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC || + invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE); + bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u); + if (!object_is_this) { + // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE). + // Allow synthetic accessors. We don't care about losing their stack frame in NPE. + if (!InlineMethodAnalyser::IsSyntheticAccessor( + mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) { + return false; + } + } + + if (object_is_this) { + // Mark invoke as NOP, null-check is done on IGET. No aborts after this. + invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + } MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result); insn->width += insn->offset - invoke->offset; insn->offset = invoke->offset; insn->dalvikInsn.opcode = opcode; insn->dalvikInsn.vA = move_result->dalvikInsn.vA; - DCHECK_LT(data.object_arg, invoke->dalvikInsn.vA); - if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { - insn->dalvikInsn.vB = invoke->dalvikInsn.vC + data.object_arg; - } else { - DCHECK_EQ(Instruction::FormatOf(invoke->dalvikInsn.opcode), Instruction::k35c); - insn->dalvikInsn.vB = invoke->dalvikInsn.arg[data.object_arg]; - } + insn->dalvikInsn.vB = object_reg; mir_graph->ComputeInlineIFieldLoweringInfo(data.field_idx, invoke, insn); DCHECK(mir_graph->GetIFieldLoweringInfo(insn).IsResolved()); @@ -655,32 +677,55 @@ bool DexFileMethodInliner::GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MI } bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, - const InlineMethod& method, uint32_t method_idx) { + MIR* move_result, const InlineMethod& method, + uint32_t method_idx) { CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit(); if (cu->enable_debug & (1 << kDebugSlowFieldPath)) { return false; } const InlineIGetIPutData& data = method.d.ifield_data; - if (invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC || - invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE || - data.object_arg != 0) { - // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE). + Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IPUT + data.op_variant); + DCHECK_EQ(InlineMethodAnalyser::IPutVariant(opcode), data.op_variant); + uint32_t object_reg = GetInvokeReg(invoke, data.object_arg); + uint32_t src_reg = GetInvokeReg(invoke, data.src_arg); + uint32_t return_reg = + data.return_arg_plus1 != 0u ? GetInvokeReg(invoke, data.return_arg_plus1 - 1u) : 0u; + + if (opcode == Instruction::IPUT_WIDE && !WideArgIsInConsecutiveDalvikRegs(invoke, data.src_arg)) { + // The two halfs of the source value are not in consecutive dalvik registers in INVOKE. return false; } - Instruction::Code opcode = static_cast<Instruction::Code>(Instruction::IPUT + data.op_variant); - DCHECK_EQ(InlineMethodAnalyser::IPutVariant(opcode), data.op_variant); + DCHECK(move_result == nullptr || data.return_arg_plus1 != 0u); + if (move_result != nullptr && move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE && + !WideArgIsInConsecutiveDalvikRegs(invoke, data.return_arg_plus1 - 1u)) { + // The two halfs of the return value are not in consecutive dalvik registers in INVOKE. + return false; + } - MIR* insn = AllocReplacementMIR(mir_graph, invoke, nullptr); - insn->dalvikInsn.opcode = opcode; - if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { - insn->dalvikInsn.vA = invoke->dalvikInsn.vC + data.src_arg; - insn->dalvikInsn.vB = invoke->dalvikInsn.vC + data.object_arg; - } else { - insn->dalvikInsn.vA = invoke->dalvikInsn.arg[data.src_arg]; - insn->dalvikInsn.vB = invoke->dalvikInsn.arg[data.object_arg]; + DCHECK_EQ(data.method_is_static != 0u, + invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC || + invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE); + bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u); + if (!object_is_this) { + // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE). + // Allow synthetic accessors. We don't care about losing their stack frame in NPE. + if (!InlineMethodAnalyser::IsSyntheticAccessor( + mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) { + return false; + } + } + + if (object_is_this) { + // Mark invoke as NOP, null-check is done on IPUT. No aborts after this. + invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); } + + MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result); + insn->dalvikInsn.opcode = opcode; + insn->dalvikInsn.vA = src_reg; + insn->dalvikInsn.vB = object_reg; mir_graph->ComputeInlineIFieldLoweringInfo(data.field_idx, invoke, insn); DCHECK(mir_graph->GetIFieldLoweringInfo(insn).IsResolved()); @@ -689,6 +734,23 @@ bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MI DCHECK_EQ(data.is_volatile, mir_graph->GetIFieldLoweringInfo(insn).IsVolatile() ? 1u : 0u); bb->InsertMIRAfter(invoke, insn); + + if (move_result != nullptr) { + MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result); + insn->width = invoke->width; + move->offset = move_result->offset; + move->width = move_result->width; + if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) { + move->dalvikInsn.opcode = Instruction::MOVE_FROM16; + } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { + move->dalvikInsn.opcode = Instruction::MOVE_OBJECT_FROM16; + } else { + DCHECK_EQ(move_result->dalvikInsn.opcode, Instruction::MOVE_RESULT_WIDE); + move->dalvikInsn.opcode = Instruction::MOVE_WIDE_FROM16; + } + move->dalvikInsn.vA = move_result->dalvikInsn.vA; + move->dalvikInsn.vB = return_reg; + } return true; } diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index b4e190a89e..c03f89c8fa 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -302,7 +302,7 @@ class DexFileMethodInliner { static bool GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, MIR* move_result, const InlineMethod& method, uint32_t method_idx); static bool GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, - const InlineMethod& method, uint32_t method_idx); + MIR* move_result, const InlineMethod& method, uint32_t method_idx); ReaderWriterMutex lock_; /* diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index bfa22dab93..a3fb4201db 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1654,9 +1654,8 @@ bool Mir2Lir::HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int li StoreValue(rl_dest, rl_result); return true; } - // There is RegRegRegShift on Arm, so check for more special cases. - // TODO: disabled, need to handle case of "dest == src" properly. - if (false && cu_->instruction_set == kThumb2) { + // There is RegRegRegShift on Arm, so check for more special cases + if (cu_->instruction_set == kThumb2) { return EasyMultiply(rl_src, rl_dest, lit); } // Can we simplify this multiplication? diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 897d86d09a..208eadde12 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -211,7 +211,12 @@ RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) { LoadValueDirectWide(rl_src, rl_src.reg); rl_src.location = kLocPhysReg; MarkLive(rl_src.reg.GetLow(), rl_src.s_reg_low); - MarkLive(rl_src.reg.GetHigh(), GetSRegHi(rl_src.s_reg_low)); + if (rl_src.reg.GetLowReg() != rl_src.reg.GetHighReg()) { + MarkLive(rl_src.reg.GetHigh(), GetSRegHi(rl_src.s_reg_low)); + } else { + // This must be an x86 vector register value. + DCHECK(IsFpReg(rl_src.reg) && (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)); + } } return rl_src; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 73fdc82854..6fcdf70b12 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -120,7 +120,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { // FastInstance() already checked by DexFileMethodInliner. const InlineIGetIPutData& data = special.d.ifield_data; - if (data.method_is_static || data.object_arg != 0) { + if (data.method_is_static != 0u || data.object_arg != 0u) { // The object is not "this" and has to be null-checked. return false; } @@ -151,10 +151,14 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { // FastInstance() already checked by DexFileMethodInliner. const InlineIGetIPutData& data = special.d.ifield_data; - if (data.method_is_static || data.object_arg != 0) { + if (data.method_is_static != 0u || data.object_arg != 0u) { // The object is not "this" and has to be null-checked. return false; } + if (data.return_arg_plus1 != 0u) { + // The setter returns a method argument which we don't support here. + return false; + } bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE)); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index b12b6a7291..a241d51468 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1898,8 +1898,9 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t compiled_method = compiler_->Compile( *this, code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file); - } else if (dex_to_dex_compilation_level != kDontDexToDexCompile) { - // TODO: add a mode to disable DEX-to-DEX compilation ? + } + if (compiled_method == nullptr && dex_to_dex_compilation_level != kDontDexToDexCompile) { + // TODO: add a command-line option to disable DEX-to-DEX compilation ? (*dex_to_dex_compiler_)(*this, code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file, diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 05548761e0..d90405a1a4 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -193,7 +193,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } - case Instruction::INVOKE_STATIC: { + case Instruction::INVOKE_STATIC: + case Instruction::INVOKE_DIRECT: { uint32_t method_idx = instruction.VRegB_35c(); const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx); uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_; @@ -204,6 +205,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ return false; } + // Treat invoke-direct like static calls for now. HInvokeStatic* invoke = new (arena_) HInvokeStatic( arena_, number_of_arguments, dex_offset, method_idx); @@ -221,7 +223,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } - case Instruction::INVOKE_STATIC_RANGE: { + case Instruction::INVOKE_STATIC_RANGE: + case Instruction::INVOKE_DIRECT_RANGE: { uint32_t method_idx = instruction.VRegB_3rc(); const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx); uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_; @@ -232,6 +235,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ return false; } + // Treat invoke-direct like static calls for now. HInvokeStatic* invoke = new (arena_) HInvokeStatic( arena_, number_of_arguments, dex_offset, method_idx); int32_t register_index = instruction.VRegC(); @@ -277,6 +281,13 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } + case Instruction::NEW_INSTANCE: { + current_block_->AddInstruction( + new (arena_) HNewInstance(dex_offset, instruction.VRegB_21c())); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + break; + } + case Instruction::NOP: break; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 05e5d7b8ef..d6295dbf43 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -30,7 +30,7 @@ namespace art { void CodeGenerator::Compile(CodeAllocator* allocator) { - frame_size_ = GetGraph()->GetMaximumNumberOfOutVRegs() * kWordSize; + frame_size_ = GetGraph()->GetMaximumNumberOfOutVRegs() * GetWordSize(); const GrowableArray<HBasicBlock*>* blocks = GetGraph()->GetBlocks(); DCHECK(blocks->Get(0) == GetGraph()->GetEntryBlock()); DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks->Get(1))); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 01bbcc0bb6..e144733976 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -145,6 +145,7 @@ class CodeGenerator : public ArenaObject { virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; + virtual size_t GetWordSize() const = 0; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 09d6f7b36a..cb77f57499 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -39,14 +39,14 @@ void CodeGeneratorARM::GenerateFrameEntry() { __ PushList((1 << LR)); // Add the current ART method to the frame size and the return PC. - SetFrameSize(RoundUp(GetFrameSize() + 2 * kWordSize, kStackAlignment)); + SetFrameSize(RoundUp(GetFrameSize() + 2 * kArmWordSize, kStackAlignment)); // The retrn PC has already been pushed on the stack. - __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize)); + __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); __ str(R0, Address(SP, 0)); } void CodeGeneratorARM::GenerateFrameExit() { - __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize); + __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); __ PopList((1 << PC)); } @@ -55,7 +55,7 @@ void CodeGeneratorARM::Bind(Label* label) { } int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const { - return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kWordSize; + return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kArmWordSize; } void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) { @@ -134,7 +134,7 @@ void LocationsBuilderARM::VisitLocal(HLocal* local) { void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) { DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock()); - codegen_->SetFrameSize(codegen_->GetFrameSize() + kWordSize); + codegen_->SetFrameSize(codegen_->GetFrameSize() + kArmWordSize); } void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) { @@ -185,7 +185,7 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 }; -static constexpr int kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); class InvokeStaticCallingConvention : public CallingConvention<Register> { public: @@ -235,7 +235,7 @@ void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) { void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>(); size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - invoke->GetIndexInDexCache() * kWordSize; + invoke->GetIndexInDexCache() * kArmWordSize; // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -287,5 +287,37 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { } } +static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1 }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetOut(Location(R0)); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { + InvokeRuntimeCallingConvention calling_convention; + LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); + + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocObjectWithAccessCheck).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + + codegen_->RecordPcInfo(instruction->GetDexPc()); +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 52d6b2e641..a51d85e40c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -22,12 +22,10 @@ #include "utils/arm/assembler_arm.h" namespace art { - -class Assembler; -class Label; - namespace arm { +static constexpr size_t kArmWordSize = 4; + class LocationsBuilderARM : public HGraphVisitor { public: explicit LocationsBuilderARM(HGraph* graph) : HGraphVisitor(graph) { } @@ -79,6 +77,10 @@ class CodeGeneratorARM : public CodeGenerator { virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual size_t GetWordSize() const OVERRIDE { + return kArmWordSize; + } + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7b0a087356..c695e26fb4 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -40,14 +40,14 @@ void CodeGeneratorX86::GenerateFrameEntry() { core_spill_mask_ |= (1 << kFakeReturnRegister); // Add the current ART method to the frame size and the return PC. - SetFrameSize(RoundUp(GetFrameSize() + 2 * kWordSize, kStackAlignment)); + SetFrameSize(RoundUp(GetFrameSize() + 2 * kX86WordSize, kStackAlignment)); // The return PC has already been pushed on the stack. - __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize)); + __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); __ movl(Address(ESP, 0), EAX); } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize)); + __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); } void CodeGeneratorX86::Bind(Label* label) { @@ -59,7 +59,7 @@ void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) { } int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const { - return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kWordSize; + return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kX86WordSize; } void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) { @@ -122,7 +122,7 @@ void LocationsBuilderX86::VisitLocal(HLocal* local) { void InstructionCodeGeneratorX86::VisitLocal(HLocal* local) { DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock()); - codegen_->SetFrameSize(codegen_->GetFrameSize() + kWordSize); + codegen_->SetFrameSize(codegen_->GetFrameSize() + kX86WordSize); } void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) { @@ -188,7 +188,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX }; -static constexpr int kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); class InvokeStaticCallingConvention : public CallingConvention<Register> { public: @@ -199,6 +199,20 @@ class InvokeStaticCallingConvention : public CallingConvention<Register> { DISALLOW_COPY_AND_ASSIGN(InvokeStaticCallingConvention); }; +static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + void LocationsBuilderX86::VisitPushArgument(HPushArgument* argument) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(argument); InvokeStaticCallingConvention calling_convention; @@ -236,7 +250,7 @@ void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>(); size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - invoke->GetIndexInDexCache() * kWordSize; + invoke->GetIndexInDexCache() * kX86WordSize; // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -284,5 +298,23 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { } } +void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetOut(Location(EAX)); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { + InvokeRuntimeCallingConvention calling_convention; + LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ movl(calling_convention.GetRegisterAt(0), + Immediate(instruction->GetTypeIndex())); + + __ fs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck))); + + codegen_->RecordPcInfo(instruction->GetDexPc()); +} + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index dd5044f4dc..bba81c0894 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -22,9 +22,10 @@ #include "utils/x86/assembler_x86.h" namespace art { - namespace x86 { +static constexpr size_t kX86WordSize = 4; + class LocationsBuilderX86 : public HGraphVisitor { public: explicit LocationsBuilderX86(HGraph* graph) : HGraphVisitor(graph) { } @@ -77,6 +78,10 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual size_t GetWordSize() const OVERRIDE { + return kX86WordSize; + } + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 2b21905224..830d0c7846 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -201,6 +201,7 @@ class HBasicBlock : public ArenaObject { M(InvokeStatic) \ M(LoadLocal) \ M(Local) \ + M(NewInstance) \ M(PushArgument) \ M(Return) \ M(ReturnVoid) \ @@ -593,7 +594,7 @@ class HIntConstant : public HTemplateInstruction<0> { class HInvoke : public HInstruction { public: - HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc) + HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, uint32_t dex_pc) : inputs_(arena, number_of_arguments), dex_pc_(dex_pc) { inputs_.SetSize(number_of_arguments); @@ -606,11 +607,11 @@ class HInvoke : public HInstruction { inputs_.Put(index, argument); } - int32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const { return dex_pc_; } protected: GrowableArray<HInstruction*> inputs_; - const int32_t dex_pc_; + const uint32_t dex_pc_; private: DISALLOW_COPY_AND_ASSIGN(HInvoke); @@ -620,8 +621,8 @@ class HInvokeStatic : public HInvoke { public: HInvokeStatic(ArenaAllocator* arena, uint32_t number_of_arguments, - int32_t dex_pc, - int32_t index_in_dex_cache) + uint32_t dex_pc, + uint32_t index_in_dex_cache) : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) {} uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; } @@ -634,6 +635,22 @@ class HInvokeStatic : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStatic); }; +class HNewInstance : public HTemplateInstruction<0> { + public: + HNewInstance(uint32_t dex_pc, uint16_t type_index) : dex_pc_(dex_pc), type_index_(type_index) {} + + uint32_t GetDexPc() const { return dex_pc_; } + uint16_t GetTypeIndex() const { return type_index_; } + + DECLARE_INSTRUCTION(NewInstance) + + private: + const uint32_t dex_pc_; + const uint16_t type_index_; + + DISALLOW_COPY_AND_ASSIGN(HNewInstance); +}; + // HPushArgument nodes are inserted after the evaluation of an argument // of a call. Their mere purpose is to ease the code generator's work. class HPushArgument : public HTemplateInstruction<1> { diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index f665f5c767..c6e448e6ab 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -743,19 +743,7 @@ static int dex2oat(int argc, char** argv) { InstructionSetFeatures instruction_set_features = ParseFeatureList(Runtime::GetDefaultInstructionSetFeatures()); -#if defined(__arm__) - InstructionSet instruction_set = kThumb2; -#elif defined(__aarch64__) - InstructionSet instruction_set = kArm64; -#elif defined(__i386__) - InstructionSet instruction_set = kX86; -#elif defined(__x86_64__) - InstructionSet instruction_set = kX86_64; -#elif defined(__mips__) - InstructionSet instruction_set = kMips; -#else - InstructionSet instruction_set = kNone; -#endif + InstructionSet instruction_set = kRuntimeISA; // Profile file to use std::string profile_file; diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 899aa78f6d..d6d20586b7 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -115,6 +115,10 @@ static const char* kThumbDataProcessingOperations[] = { "tst", "rsb", "cmp", "cmn", "orr", "mul", "bic", "mvn", }; +static const char* const kThumb2ShiftOperations[] = { + "lsl", "lsr", "asr", "ror" +}; + static const char* kThumbReverseOperations[] = { "rev", "rev16", "rbit", "revsh" }; @@ -359,6 +363,61 @@ int32_t ThumbExpand(int32_t imm12) { } } +uint32_t VFPExpand32(uint32_t imm8) { + CHECK_EQ(imm8 & 0xffu, imm8); + uint32_t bit_a = (imm8 >> 7) & 1; + uint32_t bit_b = (imm8 >> 6) & 1; + uint32_t slice = imm8 & 0x3f; + return (bit_a << 31) | ((1 << 30) - (bit_b << 25)) | (slice << 19); +} + +uint64_t VFPExpand64(uint32_t imm8) { + CHECK_EQ(imm8 & 0xffu, imm8); + uint64_t bit_a = (imm8 >> 7) & 1; + uint64_t bit_b = (imm8 >> 6) & 1; + uint64_t slice = imm8 & 0x3f; + return (bit_a << 31) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); +} + +uint64_t AdvSIMDExpand(uint32_t op, uint32_t cmode, uint32_t imm8) { + CHECK_EQ(op & 1, op); + CHECK_EQ(cmode & 0xf, cmode); + CHECK_EQ(imm8 & 0xff, imm8); + int32_t cmode321 = cmode >> 1; + if (imm8 == 0 && cmode321 != 0 && cmode321 != 4 && cmode321 != 7) { + return INT64_C(0x00000000deadbeef); // UNPREDICTABLE + } + uint64_t imm = imm8; + switch (cmode321) { + case 3: imm <<= 8; // Fall through. + case 2: imm <<= 8; // Fall through. + case 1: imm <<= 8; // Fall through. + case 0: return static_cast<int64_t>((imm << 32) | imm); + case 5: imm <<= 8; // Fall through. + case 4: return static_cast<int64_t>((imm << 48) | (imm << 32) | (imm << 16) | imm); + case 6: + imm = ((imm + 1u) << ((cmode & 1) != 0 ? 16 : 8)) - 1u; // Add 8 or 16 ones. + return static_cast<int64_t>((imm << 32) | imm); + default: + CHECK_EQ(cmode321, 7); + if ((cmode & 1) == 0 && op == 0) { + imm = (imm << 8) | imm; + return static_cast<int64_t>((imm << 48) | (imm << 32) | (imm << 16) | imm); + } else if ((cmode & 1) == 0 && op != 0) { + for (int i = 1; i != 8; ++i) { + imm |= ((imm >> i) & UINT64_C(1)) << (i * 8); + } + imm = imm & ~UINT64_C(0xfe); + return static_cast<int64_t>((imm << 8) - imm); + } else if ((cmode & 1) != 0 && op == 0) { + imm = static_cast<uint32_t>(VFPExpand32(imm8)); + return static_cast<int64_t>((imm << 32) | imm); + } else { + return INT64_C(0xdeadbeef00000000); // UNDEFINED + } + } +} + size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) { uint32_t instr = (ReadU16(instr_ptr) << 16) | ReadU16(instr_ptr + 2); // |111|1 1|1000000|0000|1111110000000000| @@ -757,83 +816,136 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } } else if ((op3 >> 4) == 2 && op4 == 0) { // 10xxxx, op = 0 // fp data processing + // VMLA, VMLS, VMUL, VNMUL, VADD, VSUB, VDIV, VMOV, ... + // |1111|1100|0|0|00|0000|1111|110|0|0|0|0|0|0000| + // |5 2|1 8|7|6|54|3 0|5 2|1 9|8|7|6|5|4|3 0| + // |----|----|-|-|--|----|----|---|-|-|-|-|-|----| + // |3322|2222|2|2|22|1111|1111|110|0|0|0|0|0|0000| + // |1 8|7 4|3|2|10|9 6|5 2|1 9|8|7|6|5|4|3 0| + // |----|----|-|-|--|----|----|---|-|-|-|-|-|----| + // |1110|1110| op3 | Vn | Vd |101|S|N|Q|M|0| Vm | + // |1110|1110|0|D|00| Vn | Vd |101|S|N|0|M|0| Vm | VMLA + // |1110|1110|0|D|00| Vn | Vd |101|S|N|1|M|0| Vm | VMLS + // |1110|1110|0|D|10| Vn | Vd |101|S|N|0|M|0| Vm | VMUL + // |1110|1110|0|D|10| Vn | Vd |101|S|N|1|M|0| Vm | VNMUL + // |1110|1110|0|D|11| Vn | Vd |101|S|N|0|M|0| Vm | VADD + // |1110|1110|0|D|11| Vn | Vd |101|S|N|1|M|0| Vm | VSUB + // |1110|1110|1|D|00| Vn | Vd |101|S|N|0|M|0| Vm | VDIV + // |1110|1110|1|D|11| iH | Vd |101|S|0|0|0|0| iL | VMOV (imm) + // |1110|1110|1|D|11|op5 | Vd |101|S|.|1|M|0| Vm | ... (see below) + uint32_t S = (instr >> 8) & 1; + uint32_t Q = (instr >> 6) & 1; + FpRegister d(instr, 12, 22); + FpRegister n(instr, 16, 7); + FpRegister m(instr, 0, 5); if ((op3 & 0xB) == 0) { // 100x00 - // VMLA, VMLS - // |1111|1100|0|0|00|0000|1111|110|0|0|0 |0|0|0000| - // |5 2|1 8|7|6|54|3 0|5 2|1 9|8|7|6 |5|4|3 0| - // |----|----|-|-|--|----|----|---|-|-|- |-|-|----| - // |3322|2222|2|2|22|1111|1111|110|0|0|0 |0|0|0000| - // |1 8|7 4|3|2|10|9 6|5 2|1 9|8|7|6 |5|4|3 0| - // |----|----|-|-|--|----|----|---|-|-|- |-|-|----| - // |1110|1110|0|D|00| Vn | Vd |101|S|N|op|M|0| Vm | - uint32_t op = (instr >> 6) & 1; - FpRegister d(instr, 12, 22); - FpRegister n(instr, 16, 7); - FpRegister m(instr, 0, 5); - opcode << (op == 0 ? "vmla" : "vmls"); + opcode << (Q == 0 ? "vmla" : "vmls") << (S != 0 ? ".f64" : ".f32"); args << d << ", " << n << ", " << m; - } else if ((op3 & 0xB) == 0xB) { // 101x11 - uint32_t Q = (instr >> 6) & 1; - if (Q == 1) { - // VCVT (floating-point conversion) - // |1111|1100|0|0|00|0000|1111|110|0|0 |0|0|0|0000| - // |5 2|1 8|7|6|54|3 0|5 2|1 9|8|7 |6|5|4|3 0| - // |----|----|-|-|--|----|----|---|-|- |-|-|-|----| - // |3322|2222|2|2|22|1111|1111|110|0|0 |0|0|0|0000| - // |1 8|7 4|3|2|10|9 6|5 2|1 9|8|7 |6|5|4|3 0| - // |----|----|-|-|--|----|----|---|-|- |-|-|-|----| - // |1110|1110|1|D|11|op5 | Vd |101|S|op|1|M|0| Vm | - uint32_t op5 = (instr >> 16) & 0xF; - uint32_t S = (instr >> 8) & 1; - uint32_t op = (instr >> 7) & 1; - // Register types in these instructions relies on the combination of op5 and S. - FpRegister Dd(instr, 12, 22, 1); - FpRegister Sd(instr, 12, 22, 0); - FpRegister Dm(instr, 0, 5, 1); - FpRegister Sm(instr, 0, 5, 0); - if (op5 == 0xD) { - if (S == 1) { - // vcvt{r}.s32.f64 - opcode << "vcvt" << (op == 0 ? "r" : "") << ".s32.f64"; - args << Sd << ", " << Dm; - } else { - // vcvt{r}.s32.f32 - opcode << "vcvt" << (op == 0 ? "r" : "") << ".s32.f32"; - args << Sd << ", " << Sm; - } - } else if (op5 == 0xC) { - if (S == 1) { - // vcvt{r}.u32.f64 - opcode << "vcvt" << (op == 0 ? "r" : "") << ".u32.f64"; - args << Sd << ", " << Dm; - } else { - // vcvt{r}.u32.f32 - opcode << "vcvt" << (op == 0 ? "r" : "") << ".u32.f32"; - args << Sd << ", " << Sm; - } - } else if (op5 == 0x8) { + } else if ((op3 & 0xB) == 0x2) { // 100x10 + opcode << (Q == 0 ? "vmul" : "vnmul") << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << n << ", " << m; + } else if ((op3 & 0xB) == 0x3) { // 100x11 + opcode << (Q == 0 ? "vadd" : "vsub") << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << n << ", " << m; + } else if ((op3 & 0xB) == 0x8 && Q == 0) { // 101x00, Q == 0 + opcode << "vdiv" << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << n << ", " << m; + } else if ((op3 & 0xB) == 0xB && Q == 0) { // 101x11, Q == 0 + uint32_t imm8 = ((instr & 0xf0000u) >> 12) | (instr & 0xfu); + opcode << "vmov" << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << (S != 0 ? StringPrintf("0x%016" PRIx64, VFPExpand64(imm8)) + : StringPrintf("0x%08x", VFPExpand32(imm8))); + if ((instr & 0xa0) != 0) { + args << " (UNPREDICTABLE)"; + } + } else if ((op3 & 0xB) == 0xB && Q == 1) { // 101x11, Q == 1 + // VNEG, VSQRT, VCMP, VCMPE, VCVT (floating-point conversion) + // |1111|1100|0|0|00|0000|1111|110|0|0 |0|0|0|0000| + // |5 2|1 8|7|6|54|3 0|5 2|1 9|8|7 |6|5|4|3 0| + // |----|----|-|-|--|----|----|---|-|- |-|-|-|----| + // |3322|2222|2|2|22|1111|1111|110|0|0 |0|0|0|0000| + // |1 8|7 4|3|2|10|9 6|5 2|1 9|8|7 |6|5|4|3 0| + // |----|----|-|-|--|----|----|---|-|- |-|-|-|----| + // |1110|1110|1|D|11|0000| Vd |101|S|0 |1|M|0| Vm | VMOV (reg) + // |1110|1110|1|D|11|0000| Vd |101|S|1 |1|M|0| Vm | VABS + // |1110|1110|1|D|11|0001| Vd |101|S|0 |1|M|0| Vm | VNEG + // |1110|1110|1|D|11|0001| Vd |101|S|1 |1|M|0| Vm | VSQRT + // |1110|1110|1|D|11|0100| Vd |101|S|op|1|M|0| Vm | VCMP + // |1110|1110|1|D|11|0101| Vd |101|S|op|1|0|0|0000| VCMPE + // |1110|1110|1|D|11|op5 | Vd |101|S|op|1|M|0| Vm | VCVT + uint32_t op5 = (instr >> 16) & 0xF; + uint32_t op = (instr >> 7) & 1; + // Register types in VCVT instructions rely on the combination of op5 and S. + FpRegister Dd(instr, 12, 22, 1); + FpRegister Sd(instr, 12, 22, 0); + FpRegister Dm(instr, 0, 5, 1); + FpRegister Sm(instr, 0, 5, 0); + if (op5 == 0) { + opcode << (op == 0 ? "vmov" : "vabs") << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << m; + } else if (op5 == 1) { + opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << m; + } else if (op5 == 4) { + opcode << "vcmp" << (S != 0 ? ".f64" : ".f32"); + args << d << ", " << m; + if (op != 0) { + args << " (quiet nan)"; + } + } else if (op5 == 5) { + opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32"); + args << d << ", #0.0"; + if (op != 0) { + args << " (quiet nan)"; + } + if ((instr & 0x2f) != 0) { + args << " (UNPREDICTABLE)"; + } + } else if (op5 == 0xD) { + if (S == 1) { + // vcvt{r}.s32.f64 + opcode << "vcvt" << (op == 0 ? "r" : "") << ".s32.f64"; + args << Sd << ", " << Dm; + } else { + // vcvt{r}.s32.f32 + opcode << "vcvt" << (op == 0 ? "r" : "") << ".s32.f32"; + args << Sd << ", " << Sm; + } + } else if (op5 == 0xC) { + if (S == 1) { + // vcvt{r}.u32.f64 + opcode << "vcvt" << (op == 0 ? "r" : "") << ".u32.f64"; + args << Sd << ", " << Dm; + } else { + // vcvt{r}.u32.f32 + opcode << "vcvt" << (op == 0 ? "r" : "") << ".u32.f32"; + args << Sd << ", " << Sm; + } + } else if (op5 == 0x8) { + if (S == 1) { + // vcvt.f64.<Tm> + opcode << "vcvt.f64." << (op == 0 ? "u" : "s") << "32"; + args << Dd << ", " << Sm; + } else { + // vcvt.f32.<Tm> + opcode << "vcvt.f32." << (op == 0 ? "u" : "s") << "32"; + args << Sd << ", " << Sm; + } + } else if (op5 == 0x7) { + if (op == 1) { if (S == 1) { - // vcvt.f64.<Tm> - opcode << "vcvt.f64." << (op == 0 ? "u" : "s") << "32"; + // vcvt.f64.f32 + opcode << "vcvt.f64.f32"; args << Dd << ", " << Sm; } else { - // vcvt.f32.<Tm> - opcode << "vcvt.f32." << (op == 0 ? "u" : "s") << "32"; - args << Sd << ", " << Sm; - } - } else if (op5 == 0x7) { - if (op == 1) { - if (S == 1) { - // vcvt.f64.f32 - opcode << "vcvt.f64.f32"; - args << Dd << ", " << Sm; - } else { - // vcvt.f32.f64 - opcode << "vcvt.f32.f64"; - args << Sd << ", " << Dm; - } + // vcvt.f32.f64 + opcode << "vcvt.f32.f64"; + args << Sd << ", " << Dm; } } + } else if ((op5 & 0xa) == 0xa) { + opcode << "vcvt"; + args << "[undecoded: floating <-> fixed]"; } } } else if ((op3 >> 4) == 2 && op4 == 1) { // 10xxxx, op = 1 @@ -886,53 +998,6 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } } } - - if ((op3 & 0x30) == 0x20 && op4 == 0) { // 10 xxxx ... 0 - if ((coproc & 0xE) == 0xA) { - // VFP data-processing instructions - // |111|1|1100|0000|0000|1111|110|0|00 |0|0|0000| - // |5 3|2|1098|7654|3 0|54 2|10 |8|76 |5|4|3 0| - // |---|-|----|----|----|----|---|-|----|-|-|----| - // |332|2|2222|2222|1111|1111|110|0|00 |0|0|0000| - // |1 9|8|7654|3210|9 6|54 2|109|8|76 |5|4|3 0| - // |---|-|----|----|----|----|---|-|----|-|-|----| - // |111|T|1110|opc1|opc2| |101| |opc3| | | | - // 111 0 1110|1111 0100 1110 101 0 01 1 0 1001 - eef4ea69 - uint32_t opc1 = (instr >> 20) & 0xF; - uint32_t opc2 = (instr >> 16) & 0xF; - uint32_t opc3 = (instr >> 6) & 0x3; - if ((opc1 & 0xB) == 0xB) { // 1x11 - // Other VFP data-processing instructions. - uint32_t sz = (instr >> 8) & 1; - FpRegister d(instr, 12, 22); - FpRegister m(instr, 0, 5); - switch (opc2) { - case 0x1: // Vneg/Vsqrt - // 1110 11101 D 11 0001 dddd 101s o1M0 mmmm - opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (sz == 1 ? ".f64" : ".f32"); - args << d << ", " << m; - break; - case 0x4: case 0x5: { // Vector compare - // 1110 11101 D 11 0100 dddd 101 sE1M0 mmmm - opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (sz == 1 ? ".f64" : ".f32"); - args << d << ", " << m; - break; - } - } - } - } - } else if ((op3 & 0x30) == 0x30) { // 11 xxxx - // Advanced SIMD - if ((instr & 0xFFBF0ED0) == 0xeeb10ac0) { // Vsqrt - // 1110 11101 D 11 0001 dddd 101S 11M0 mmmm - // 1110 11101 0 11 0001 1101 1011 1100 1000 - eeb1dbc8 - uint32_t sz = (instr >> 8) & 1; - FpRegister d(instr, 12, 22); - FpRegister m(instr, 0, 5); - opcode << "vsqrt" << (sz == 1 ? ".f64" : ".f32"); - args << d << ", " << m; - } - } } break; case 2: @@ -1388,6 +1453,16 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) default: // more formats if ((op2 >> 4) == 2) { // 010xxxx // data processing (register) + if ((instr & 0x0080f0f0) == 0x0000f000) { + // LSL, LSR, ASR, ROR + uint32_t shift_op = (instr >> 21) & 3; + uint32_t S = (instr >> 20) & 1; + ArmRegister Rd(instr, 8); + ArmRegister Rn(instr, 16); + ArmRegister Rm(instr, 0); + opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : ""); + args << Rd << ", " << Rn << ", " << Rm; + } } else if ((op2 >> 3) == 6) { // 0110xxx // Multiply, multiply accumulate, and absolute difference op1 = (instr >> 20) & 0x7; diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc index aaba5989f8..3bbec71a7c 100644 --- a/runtime/arch/arm/fault_handler_arm.cc +++ b/runtime/arch/arm/fault_handler_arm.cc @@ -35,7 +35,7 @@ namespace art { extern "C" void art_quick_throw_null_pointer_exception(); extern "C" void art_quick_throw_stack_overflow(void*); -extern "C" void art_quick_test_suspend(); +extern "C" void art_quick_implicit_suspend(); // Get the size of a thumb2 instruction in bytes. static uint32_t GetInstructionSize(uint8_t* pc) { @@ -142,7 +142,7 @@ bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) { if (found) { LOG(DEBUG) << "suspend check match"; // This is a suspend check. Arrange for the signal handler to return to - // art_quick_test_suspend. Also set LR so that after the suspend check it + // art_quick_implicit_suspend. Also set LR so that after the suspend check it // will resume the instruction (current PC + 2). PC points to the // ldr r0,[r0,#0] instruction (r0 will be 0, set by the trigger). @@ -151,7 +151,7 @@ bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) { LOG(DEBUG) << "arm lr: " << std::hex << sc->arm_lr; LOG(DEBUG) << "arm pc: " << std::hex << sc->arm_pc; sc->arm_lr = sc->arm_pc + 3; // +2 + 1 (for thumb) - sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_test_suspend); + sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend); // Now remove the suspend trigger that caused this fault. Thread::Current()->RemoveSuspendTrigger(); diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 71dcd7f7a6..4903732a7c 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -888,6 +888,14 @@ ENTRY art_quick_test_suspend RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN END art_quick_test_suspend +ENTRY art_quick_implicit_suspend + mov r0, rSELF + SETUP_REF_ONLY_CALLEE_SAVE_FRAME @ save callee saves for stack crawl + mov r1, sp + bl artTestSuspendFromCode @ (Thread*, SP) + RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN +END art_quick_implicit_suspend + /* * Called by managed code that is attempting to call a method on a proxy class. On entry * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 8ef407d0a7..62f359346e 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -17,7 +17,7 @@ #ifndef ART_RUNTIME_ASM_SUPPORT_H_ #define ART_RUNTIME_ASM_SUPPORT_H_ -#include "read_barrier.h" +#include "read_barrier_c.h" // Value loaded into rSUSPEND for quick. When this value is counted down to zero we do a suspend // check. diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 6c5406ec1b..78b7cc0c95 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -202,7 +202,7 @@ void ClassLinker::InitFromCompiler(const std::vector<const DexFile*>& boot_class // The GC can't handle an object with a null class since we can't get the size of this object. heap->IncrementDisableMovingGC(self); SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>( - heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass)))); + heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass), VoidFunctor()))); CHECK(java_lang_Class.get() != NULL); mirror::Class::SetClassClass(java_lang_Class.get()); java_lang_Class->SetClass(java_lang_Class.get()); @@ -1180,7 +1180,8 @@ mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_fi SirtRef<mirror::Class> dex_cache_class(self, GetClassRoot(kJavaLangDexCache)); SirtRef<mirror::DexCache> dex_cache( self, down_cast<mirror::DexCache*>( - heap->AllocObject<true>(self, dex_cache_class.get(), dex_cache_class->GetObjectSize()))); + heap->AllocObject<true>(self, dex_cache_class.get(), dex_cache_class->GetObjectSize(), + VoidFunctor()))); if (dex_cache.get() == NULL) { return NULL; } diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc index 0c8a4f044b..01ca60f6c0 100644 --- a/runtime/elf_file.cc +++ b/runtime/elf_file.cc @@ -22,6 +22,7 @@ #include "base/logging.h" #include "base/stl_util.h" #include "utils.h" +#include "instruction_set.h" namespace art { @@ -773,6 +774,40 @@ size_t ElfFile::GetLoadedSize() const { bool ElfFile::Load(bool executable, std::string* error_msg) { CHECK(program_header_only_) << file_->GetPath(); + + if (executable) { + InstructionSet elf_ISA = kNone; + switch (GetHeader().e_machine) { + case EM_ARM: { + elf_ISA = kArm; + break; + } + case EM_AARCH64: { + elf_ISA = kArm64; + break; + } + case EM_386: { + elf_ISA = kX86; + break; + } + case EM_X86_64: { + elf_ISA = kX86_64; + break; + } + case EM_MIPS: { + elf_ISA = kMips; + break; + } + } + + if (elf_ISA != kRuntimeISA) { + std::ostringstream oss; + oss << "Expected ISA " << kRuntimeISA << " but found " << elf_ISA; + *error_msg = oss.str(); + return false; + } + } + for (Elf32_Word i = 0; i < GetProgramHeaderNum(); i++) { Elf32_Phdr& program_header = GetProgramHeader(i); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 9fc173ac2e..963c3d156d 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -1634,15 +1634,14 @@ extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMet } template<InvokeType type, bool access_check> -uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, mirror::ArtMethod** sp) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); +static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, mirror::ArtMethod** sp); template<InvokeType type, bool access_check> -uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, mirror::ArtMethod** sp) { +static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, mirror::ArtMethod** sp) { mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type); if (UNLIKELY(method == nullptr)) { @@ -1682,6 +1681,26 @@ uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, #endif } +// Explicit artInvokeCommon template function declarations to please analysis tool. +#define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check) \ + template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) \ + uint64_t artInvokeCommon<type, access_check>(uint32_t method_idx, \ + mirror::Object* this_object, \ + mirror::ArtMethod* caller_method, \ + Thread* self, mirror::ArtMethod** sp) \ + +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kInterface, false); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kInterface, true); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kDirect, false); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kDirect, true); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kStatic, false); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kStatic, true); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kSuper, false); +EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kSuper, true); +#undef EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL + // See comments in runtime_support_asm.S extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx, diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index f5f6f16686..920741f398 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -1997,6 +1997,8 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { CHECK_LE(obj_size, kLargeSizeThreshold) << "A run slot contains a large object " << Dump(); CHECK_EQ(SizeToIndex(obj_size), idx) + << PrettyTypeOf(obj) << " " + << "obj_size=" << obj_size << ", idx=" << idx << " " << "A run slot contains an object with wrong size " << Dump(); } } diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index ca2d0bd6db..944ef8d889 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -76,7 +76,7 @@ static constexpr bool kCountMarkedObjects = false; // Turn off kCheckLocks when profiling the GC since it slows the GC down by up to 40%. static constexpr bool kCheckLocks = kDebugLocking; -static constexpr bool kVerifyRoots = kIsDebugBuild; +static constexpr bool kVerifyRootsMarked = kIsDebugBuild; // If true, revoke the rosalloc thread-local buffers at the // checkpoint, as opposed to during the pause. @@ -466,16 +466,17 @@ void MarkSweep::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id* } void MarkSweep::VerifyRootCallback(const Object* root, void* arg, size_t vreg, - const StackVisitor* visitor) { - reinterpret_cast<MarkSweep*>(arg)->VerifyRoot(root, vreg, visitor); + const StackVisitor* visitor, RootType root_type) { + reinterpret_cast<MarkSweep*>(arg)->VerifyRoot(root, vreg, visitor, root_type); } -void MarkSweep::VerifyRoot(const Object* root, size_t vreg, const StackVisitor* visitor) { +void MarkSweep::VerifyRoot(const Object* root, size_t vreg, const StackVisitor* visitor, + RootType root_type) { // See if the root is on any space bitmap. - if (GetHeap()->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == NULL) { + if (GetHeap()->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) { space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace(); if (!large_object_space->Contains(root)) { - LOG(ERROR) << "Found invalid root: " << root; + LOG(ERROR) << "Found invalid root: " << root << " with type " << root_type; if (visitor != NULL) { LOG(ERROR) << visitor->DescribeLocation() << " in VReg: " << vreg; } @@ -918,7 +919,7 @@ void MarkSweep::ReMarkRoots() { kVisitRootFlagStopLoggingNewRoots | kVisitRootFlagClearRootLog)); timings_.EndSplit(); - if (kVerifyRoots) { + if (kVerifyRootsMarked) { timings_.StartSplit("(Paused)VerifyRoots"); Runtime::Current()->VisitRoots(VerifyRootMarked, this); timings_.EndSplit(); diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index f1fd546f38..d49e427244 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -249,10 +249,10 @@ class MarkSweep : public GarbageCollector { size_t GetThreadCount(bool paused) const; static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg, - const StackVisitor *visitor); + const StackVisitor *visitor, RootType root_type); - void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor) - NO_THREAD_SAFETY_ANALYSIS; + void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor, + RootType root_type) NO_THREAD_SAFETY_ANALYSIS; // Push a single reference on a mark stack. void PushOnMarkStack(mirror::Object* obj); diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 25f20d621e..a06f272b55 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -65,7 +65,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas bool after_is_current_allocator = allocator == GetCurrentAllocator(); if (is_current_allocator && !after_is_current_allocator) { // If the allocator changed, we need to restart the allocation. - return AllocObject<kInstrumented>(self, klass, byte_count); + return AllocObject<kInstrumented>(self, klass, byte_count, pre_fence_visitor); } return nullptr; } @@ -111,7 +111,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas DCHECK(!Runtime::Current()->HasStatsEnabled()); } if (AllocatorHasAllocationStack(allocator)) { - PushOnAllocationStack(self, obj); + PushOnAllocationStack(self, &obj); } if (kInstrumented) { if (Dbg::IsAllocTrackingEnabled()) { @@ -135,28 +135,34 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas // The size of a thread-local allocation stack in the number of references. static constexpr size_t kThreadLocalAllocationStackSize = 128; -inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object* obj) { +inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object** obj) { if (kUseThreadLocalAllocationStack) { - bool success = self->PushOnThreadLocalAllocationStack(obj); + bool success = self->PushOnThreadLocalAllocationStack(*obj); if (UNLIKELY(!success)) { // Slow path. Allocate a new thread-local allocation stack. mirror::Object** start_address; mirror::Object** end_address; while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize, &start_address, &end_address)) { + // Disable verify object in SirtRef as obj isn't on the alloc stack yet. + SirtRefNoVerify<mirror::Object> ref(self, *obj); CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false); + *obj = ref.get(); } self->SetThreadLocalAllocationStack(start_address, end_address); // Retry on the new thread-local allocation stack. - success = self->PushOnThreadLocalAllocationStack(obj); + success = self->PushOnThreadLocalAllocationStack(*obj); // Must succeed. CHECK(success); } } else { // This is safe to do since the GC will never free objects which are neither in the allocation // stack or the live bitmap. - while (!allocation_stack_->AtomicPushBack(obj)) { + while (!allocation_stack_->AtomicPushBack(*obj)) { + // Disable verify object in SirtRef as obj isn't on the alloc stack yet. + SirtRefNoVerify<mirror::Object> ref(self, *obj); CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false); + *obj = ref.get(); } } } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 5879757559..a8989ecde7 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -115,6 +115,8 @@ enum ProcessState { }; std::ostream& operator<<(std::ostream& os, const ProcessState& process_state); +std::ostream& operator<<(std::ostream& os, const RootType& root_type); + class Heap { public: // If true, measure the total allocation time. @@ -158,28 +160,28 @@ class Heap { ~Heap(); // Allocates and initializes storage for an object instance. - template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor> + template <bool kInstrumented, typename PreFenceVisitor> mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes, - const PreFenceVisitor& pre_fence_visitor = VoidFunctor()) + const PreFenceVisitor& pre_fence_visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes, GetCurrentAllocator(), pre_fence_visitor); } - template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor> + template <bool kInstrumented, typename PreFenceVisitor> mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes, - const PreFenceVisitor& pre_fence_visitor = VoidFunctor()) + const PreFenceVisitor& pre_fence_visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes, GetCurrentNonMovingAllocator(), pre_fence_visitor); } - template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor = VoidFunctor> + template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor> ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator( Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator, - const PreFenceVisitor& pre_fence_visitor = VoidFunctor()) + const PreFenceVisitor& pre_fence_visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); AllocatorType GetCurrentAllocator() const { @@ -691,7 +693,8 @@ class Heap { void SignalHeapTrimDaemon(Thread* self); // Push an object onto the allocation stack. - void PushOnAllocationStack(Thread* self, mirror::Object* obj); + void PushOnAllocationStack(Thread* self, mirror::Object** obj) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark // sweep GC, false for other GC types. diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 5c735df457..9896a4833c 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -85,8 +85,13 @@ class SpaceTest : public CommonRuntimeTest { EXPECT_GE(size, SizeOfZeroLengthByteArray()); EXPECT_TRUE(byte_array_class != nullptr); o->SetClass(byte_array_class); - if (kUseBrooksReadBarrier) { - o->SetReadBarrierPointer(o); + if (kUseBakerOrBrooksReadBarrier) { + // Like the proper heap object allocation, install and verify + // the correct read barrier pointer. + if (kUseBrooksReadBarrier) { + o->SetReadBarrierPointer(o); + } + o->AssertReadBarrierPointer(); } mirror::Array* arr = o->AsArray<kVerifyNone>(); size_t header_size = SizeOfZeroLengthByteArray(); diff --git a/runtime/globals.h b/runtime/globals.h index ee8dc07c7a..7e85231d20 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -19,7 +19,7 @@ #include <stddef.h> #include <stdint.h> -#include "read_barrier.h" +#include "read_barrier_c.h" namespace art { diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h index c5a4ec8eb6..f4eecfc85a 100644 --- a/runtime/instruction_set.h +++ b/runtime/instruction_set.h @@ -35,6 +35,20 @@ enum InstructionSet { }; std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs); +#if defined(__arm__) +static constexpr InstructionSet kRuntimeISA = kArm; +#elif defined(__aarch64__) +static constexpr InstructionSet kRuntimeISA = kArm64; +#elif defined(__mips__) +static constexpr InstructionSet kRuntimeISA = kMips; +#elif defined(__i386__) +static constexpr InstructionSet kRuntimeISA = kX86; +#elif defined(__x86_64__) +static constexpr InstructionSet kRuntimeISA = kX86_64; +#else +static constexpr InstructionSet kRuntimeISA = kNone; +#endif + enum InstructionFeatures { kHwDiv = 1 // Supports hardware divide. }; diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 89d9241f52..025e62a5cb 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -442,7 +442,14 @@ inline void Class::SetName(String* name) { } inline void Class::CheckObjectAlloc() { - DCHECK(!IsArrayClass()) << PrettyClass(this); + DCHECK(!IsArrayClass()) + << PrettyClass(this) + << "A array shouldn't be allocated through this " + << "as it requires a pre-fence visitor that sets the class size."; + DCHECK(!IsClassClass()) + << PrettyClass(this) + << "A class object shouldn't be allocated through this " + << "as it requires a pre-fence visitor that sets the class size."; DCHECK(IsInstantiable()) << PrettyClass(this); // TODO: decide whether we want this check. It currently fails during bootstrap. // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this); @@ -454,7 +461,7 @@ inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) { CheckObjectAlloc(); gc::Heap* heap = Runtime::Current()->GetHeap(); return heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_, - allocator_type); + allocator_type, VoidFunctor()); } inline Object* Class::AllocObject(Thread* self) { diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index b6c140d6d8..a6db387a08 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -26,6 +26,7 @@ #include "class.h" #include "lock_word-inl.h" #include "monitor.h" +#include "read_barrier-inl.h" #include "runtime.h" #include "reference.h" #include "throwable.h" @@ -96,7 +97,7 @@ inline void Object::Wait(Thread* self, int64_t ms, int32_t ns) { inline Object* Object::GetReadBarrierPointer() { #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER DCHECK(kUseBakerOrBrooksReadBarrier); - return GetFieldObject<Object, kVerifyNone>(OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), false); + return GetFieldObject<Object, kVerifyNone, false>(OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), false); #else LOG(FATAL) << "Unreachable"; return nullptr; @@ -116,21 +117,19 @@ inline void Object::SetReadBarrierPointer(Object* rb_pointer) { } inline void Object::AssertReadBarrierPointer() const { -#if defined(USE_BAKER_READ_BARRIER) - DCHECK(kUseBakerReadBarrier); - Object* obj = const_cast<Object*>(this); - DCHECK(obj->GetReadBarrierPointer() == nullptr) - << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj) - << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer()); -#elif defined(USE_BROOKS_READ_BARRIER) - DCHECK(kUseBrooksReadBarrier); - Object* obj = const_cast<Object*>(this); - DCHECK_EQ(obj, obj->GetReadBarrierPointer()) - << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj) - << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer()); -#else - LOG(FATAL) << "Unreachable"; -#endif + if (kUseBakerReadBarrier) { + Object* obj = const_cast<Object*>(this); + DCHECK(obj->GetReadBarrierPointer() == nullptr) + << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj) + << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer()); + } else if (kUseBrooksReadBarrier) { + Object* obj = const_cast<Object*>(this); + DCHECK_EQ(obj, obj->GetReadBarrierPointer()) + << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj) + << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer()); + } else { + LOG(FATAL) << "Unreachable"; + } } template<VerifyObjectFlags kVerifyFlags> @@ -470,19 +469,17 @@ inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int return QuasiAtomic::Cas64(old_value, new_value, addr); } -template<class T, VerifyObjectFlags kVerifyFlags> +template<class T, VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier> inline T* Object::GetFieldObject(MemberOffset field_offset, bool is_volatile) { if (kVerifyFlags & kVerifyThis) { VerifyObject(this); } byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr); - HeapReference<T> objref = *objref_addr; - + T* result = ReadBarrier::Barrier<T, kDoReadBarrier>(this, field_offset, objref_addr); if (UNLIKELY(is_volatile)) { QuasiAtomic::MembarLoadLoad(); // Ensure loads don't re-order. } - T* result = objref.AsMirrorPtr(); if (kVerifyFlags & kVerifyReads) { VerifyObject(result); } diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index f1485e529e..d9155f5302 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -66,6 +66,26 @@ static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* sr return dest; } +// An allocation pre-fence visitor that copies the object. +class CopyObjectVisitor { + public: + explicit CopyObjectVisitor(Thread* self, SirtRef<Object>* orig, size_t num_bytes) + : self_(self), orig_(orig), num_bytes_(num_bytes) { + } + + void operator()(Object* obj, size_t usable_size) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + UNUSED(usable_size); + CopyObject(self_, obj, orig_->get(), num_bytes_); + } + + private: + Thread* const self_; + SirtRef<Object>* const orig_; + const size_t num_bytes_; + DISALLOW_COPY_AND_ASSIGN(CopyObjectVisitor); +}; + Object* Object::Clone(Thread* self) { CHECK(!IsClass()) << "Can't clone classes."; // Object::SizeOf gets the right size even if we're an array. Using c->AllocObject() here would @@ -74,13 +94,11 @@ Object* Object::Clone(Thread* self) { size_t num_bytes = SizeOf(); SirtRef<Object> this_object(self, this); Object* copy; + CopyObjectVisitor visitor(self, &this_object, num_bytes); if (heap->IsMovableObject(this)) { - copy = heap->AllocObject<true>(self, GetClass(), num_bytes); + copy = heap->AllocObject<true>(self, GetClass(), num_bytes, visitor); } else { - copy = heap->AllocNonMovableObject<true>(self, GetClass(), num_bytes); - } - if (LIKELY(copy != nullptr)) { - return CopyObject(self, copy, this_object.get(), num_bytes); + copy = heap->AllocNonMovableObject<true>(self, GetClass(), num_bytes, visitor); } return copy; } diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index 1ac23ce6c3..f652202999 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -185,7 +185,7 @@ class MANAGED LOCKABLE Object { bool IsPhantomReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Accessor for Java type fields. - template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true> T* GetFieldObject(MemberOffset field_offset, bool is_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template<bool kTransactionActive, bool kCheckTransaction = true, diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h index 89ee34e813..9198c90d5a 100644 --- a/runtime/object_callbacks.h +++ b/runtime/object_callbacks.h @@ -56,7 +56,7 @@ typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg) __attribute__((warn_unused_result)); // A callback for verifying roots. typedef void (VerifyRootCallback)(const mirror::Object* root, void* arg, size_t vreg, - const StackVisitor* visitor); + const StackVisitor* visitor, RootType root_type); typedef void (MarkHeapReferenceCallback)(mirror::HeapReference<mirror::Object>* ref, void* arg); diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc index a9072d814c..8bd8dbab73 100644 --- a/runtime/quick/inline_method_analyser.cc +++ b/runtime/quick/inline_method_analyser.cc @@ -135,6 +135,12 @@ bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier, } } +bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) { + const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.dex_method_index); + const char* method_name = ref.dex_file->GetMethodName(method_id); + return strncmp(method_name, "access$", strlen("access$")) == 0; +} + bool InlineMethodAnalyser::AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result) { const Instruction* return_instruction = Instruction::At(code_item->insns_); @@ -218,13 +224,24 @@ bool InlineMethodAnalyser::AnalyseIGetMethod(verifier::MethodVerifier* verifier, uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_; DCHECK_GE(object_reg, arg_start); DCHECK_LT(object_reg, code_item->registers_size_); + uint32_t object_arg = object_reg - arg_start; + DCHECK_LT(opcode == Instruction::IGET_WIDE ? dst_reg + 1 : dst_reg, code_item->registers_size_); if (dst_reg != return_reg) { return false; // Not returning the value retrieved by IGET? } - if ((verifier->GetAccessFlags() & kAccStatic) != 0 || object_reg != arg_start) { - // TODO: Support inlining IGET on other register than "this". + if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) { + // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE). + // Allow synthetic accessors. We don't care about losing their stack frame in NPE. + if (!IsSyntheticAccessor(verifier->GetMethodReference())) { + return false; + } + } + + // InlineIGetIPutData::object_arg is only 4 bits wide. + static constexpr uint16_t kMaxObjectArg = 15u; + if (object_arg > kMaxObjectArg) { return false; } @@ -236,10 +253,10 @@ bool InlineMethodAnalyser::AnalyseIGetMethod(verifier::MethodVerifier* verifier, result->opcode = kInlineOpIGet; result->flags = kInlineSpecial; data->op_variant = IGetVariant(opcode); - data->object_arg = object_reg - arg_start; // Allow IGET on any register, not just "this". - data->src_arg = 0; - data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0; - data->reserved = 0; + data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u; + data->object_arg = object_arg; // Allow IGET on any register, not just "this". + data->src_arg = 0u; + data->return_arg_plus1 = 0u; } return true; } @@ -253,26 +270,45 @@ bool InlineMethodAnalyser::AnalyseIPutMethod(verifier::MethodVerifier* verifier, const Instruction* return_instruction = instruction->Next(); Instruction::Code return_opcode = return_instruction->Opcode(); + uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_; + uint16_t return_arg_plus1 = 0u; if (return_opcode != Instruction::RETURN_VOID) { - // TODO: Support returning an argument. - // This is needed by builder classes and generated accessor setters. - // builder.setX(value): iput value, this, fieldX; return-object this; - // object.access$nnn(value): iput value, this, fieldX; return value; - // Use InlineIGetIPutData::reserved to hold the information. - return false; + if (return_opcode != Instruction::RETURN && + return_opcode != Instruction::RETURN_OBJECT && + return_opcode != Instruction::RETURN_WIDE) { + return false; + } + // Returning an argument. + uint32_t return_reg = return_instruction->VRegA_11x(); + DCHECK_GE(return_reg, arg_start); + DCHECK_LT(return_opcode == Instruction::RETURN_WIDE ? return_reg + 1u : return_reg, + code_item->registers_size_); + return_arg_plus1 = return_reg - arg_start + 1u; } uint32_t src_reg = instruction->VRegA_22c(); uint32_t object_reg = instruction->VRegB_22c(); uint32_t field_idx = instruction->VRegC_22c(); - uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_; DCHECK_GE(object_reg, arg_start); DCHECK_LT(object_reg, code_item->registers_size_); DCHECK_GE(src_reg, arg_start); DCHECK_LT(opcode == Instruction::IPUT_WIDE ? src_reg + 1 : src_reg, code_item->registers_size_); + uint32_t object_arg = object_reg - arg_start; + uint32_t src_arg = src_reg - arg_start; + + if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) { + // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE). + // Allow synthetic accessors. We don't care about losing their stack frame in NPE. + if (!IsSyntheticAccessor(verifier->GetMethodReference())) { + return false; + } + } - if ((verifier->GetAccessFlags() & kAccStatic) != 0 || object_reg != arg_start) { - // TODO: Support inlining IPUT on other register than "this". + // InlineIGetIPutData::object_arg/src_arg/return_arg_plus1 are each only 4 bits wide. + static constexpr uint16_t kMaxObjectArg = 15u; + static constexpr uint16_t kMaxSrcArg = 15u; + static constexpr uint16_t kMaxReturnArgPlus1 = 15u; + if (object_arg > kMaxObjectArg || src_arg > kMaxSrcArg || return_arg_plus1 > kMaxReturnArgPlus1) { return false; } @@ -284,10 +320,10 @@ bool InlineMethodAnalyser::AnalyseIPutMethod(verifier::MethodVerifier* verifier, result->opcode = kInlineOpIPut; result->flags = kInlineSpecial; data->op_variant = IPutVariant(opcode); - data->object_arg = object_reg - arg_start; // Allow IPUT on any register, not just "this". - data->src_arg = src_reg - arg_start; - data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0; - data->reserved = 0; + data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u; + data->object_arg = object_arg; // Allow IPUT on any register, not just "this". + data->src_arg = src_arg; + data->return_arg_plus1 = return_arg_plus1; } return true; } diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h index 8e1a4083cd..ddee89b7bf 100644 --- a/runtime/quick/inline_method_analyser.h +++ b/runtime/quick/inline_method_analyser.h @@ -21,6 +21,7 @@ #include "base/mutex.h" #include "dex_file.h" #include "dex_instruction.h" +#include "method_reference.h" /* * NOTE: This code is part of the quick compiler. It lives in the runtime @@ -98,10 +99,10 @@ struct InlineIGetIPutData { // opcode-Instruction::IPUT for IPUTs. This is because the runtime // doesn't know the OpSize enumeration. uint16_t op_variant : 3; + uint16_t method_is_static : 1; uint16_t object_arg : 4; uint16_t src_arg : 4; // iput only - uint16_t method_is_static : 1; - uint16_t reserved : 4; + uint16_t return_arg_plus1 : 4; // iput only, method argument to return + 1, 0 = return void. uint16_t field_idx; uint32_t is_volatile : 1; uint32_t field_offset : 31; @@ -156,6 +157,9 @@ class InlineMethodAnalyser { return opcode - Instruction::IPUT; } + // Determines whether the method is a synthetic accessor (method name starts with "access$"). + static bool IsSyntheticAccessor(MethodReference ref); + private: static bool AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result); static bool AnalyseConstMethod(const DexFile::CodeItem* code_item, InlineMethod* result); diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h new file mode 100644 index 0000000000..ea2f8307b8 --- /dev/null +++ b/runtime/read_barrier-inl.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_READ_BARRIER_INL_H_ +#define ART_RUNTIME_READ_BARRIER_INL_H_ + +#include "read_barrier.h" + +#include "mirror/object_reference.h" + +namespace art { + +template <typename MirrorType, bool kDoReadBarrier> +inline MirrorType* ReadBarrier::Barrier( + mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) { + // Unused for now. + UNUSED(obj); + UNUSED(offset); + UNUSED(ref_addr); + if (kDoReadBarrier && kUseBakerReadBarrier) { + // To be implemented. + return ref_addr->AsMirrorPtr(); + } else if (kDoReadBarrier && kUseBrooksReadBarrier) { + // To be implemented. + return ref_addr->AsMirrorPtr(); + } else { + // No read barrier. + return ref_addr->AsMirrorPtr(); + } +} + +} // namespace art + +#endif // ART_RUNTIME_READ_BARRIER_INL_H_ diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h index ba0d830428..6f59004471 100644 --- a/runtime/read_barrier.h +++ b/runtime/read_barrier.h @@ -17,21 +17,28 @@ #ifndef ART_RUNTIME_READ_BARRIER_H_ #define ART_RUNTIME_READ_BARRIER_H_ -// This is in a separate file (from globals.h) because asm_support.h -// (a C header, not C++) can't include globals.h. +#include "base/mutex.h" +#include "base/macros.h" +#include "offsets.h" +#include "read_barrier_c.h" -// Uncomment one of the following two and the two fields in -// Object.java (libcore) to enable baker or brooks pointers. +// This is a C++ (not C) header file, separate from read_barrier_c.h +// which needs to be a C header file for asm_support.h. -// #define USE_BAKER_READ_BARRIER -// #define USE_BROOKS_READ_BARRIER +namespace art { +namespace mirror { + class Object; + template<typename MirrorType> class HeapReference; +} // namespace mirror -#if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER) -#define USE_BAKER_OR_BROOKS_READ_BARRIER -#endif +class ReadBarrier { + public: + template <typename MirrorType, bool kDoReadBarrier = true> + ALWAYS_INLINE static MirrorType* Barrier( + mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); +}; -#if defined(USE_BAKER_READ_BARRIER) && defined(USE_BROOKS_READ_BARRIER) -#error "Only one of Baker or Brooks can be enabled at a time." -#endif +} // namespace art #endif // ART_RUNTIME_READ_BARRIER_H_ diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h new file mode 100644 index 0000000000..f4af61f517 --- /dev/null +++ b/runtime/read_barrier_c.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_READ_BARRIER_C_H_ +#define ART_RUNTIME_READ_BARRIER_C_H_ + +// This is a C (not C++) header file and is in a separate file (from +// globals.h) because asm_support.h is a C header file and can't +// include globals.h. + +// Uncomment one of the following two and the two fields in +// Object.java (libcore) to enable baker or brooks pointers. + +// #define USE_BAKER_READ_BARRIER +// #define USE_BROOKS_READ_BARRIER + +#if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER) +#define USE_BAKER_OR_BROOKS_READ_BARRIER +#endif + +#if defined(USE_BAKER_READ_BARRIER) && defined(USE_BROOKS_READ_BARRIER) +#error "Only one of Baker or Brooks can be enabled at a time." +#endif + +#endif // ART_RUNTIME_READ_BARRIER_C_H_ diff --git a/runtime/sirt_ref-inl.h b/runtime/sirt_ref-inl.h index 7f2d847fa8..7de624aad8 100644 --- a/runtime/sirt_ref-inl.h +++ b/runtime/sirt_ref-inl.h @@ -23,8 +23,11 @@ namespace art { -template<class T> inline SirtRef<T>::SirtRef(Thread* self, T* object) : self_(self), sirt_(object) { - VerifyObject(object); +template<class T> inline SirtRef<T>::SirtRef(Thread* self, T* object, bool should_verify) + : self_(self), sirt_(object) { + if (should_verify) { + VerifyObject(object); + } self_->PushSirt(&sirt_); } @@ -33,8 +36,10 @@ template<class T> inline SirtRef<T>::~SirtRef() { DCHECK_EQ(top_sirt, &sirt_); } -template<class T> inline T* SirtRef<T>::reset(T* object) { - VerifyObject(object); +template<class T> inline T* SirtRef<T>::reset(T* object, bool should_verify) { + if (should_verify) { + VerifyObject(object); + } T* old_ref = get(); sirt_.SetReference(0, object); return old_ref; diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h index 2226e17f56..cf23891ece 100644 --- a/runtime/sirt_ref.h +++ b/runtime/sirt_ref.h @@ -28,7 +28,7 @@ namespace art { template<class T> class SirtRef { public: - SirtRef(Thread* self, T* object); + SirtRef(Thread* self, T* object, bool should_verify = true); ~SirtRef(); T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -42,7 +42,8 @@ class SirtRef { } // Returns the old reference. - T* reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + T* reset(T* object = nullptr, bool should_verify = true) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: Thread* const self_; @@ -51,6 +52,17 @@ class SirtRef { DISALLOW_COPY_AND_ASSIGN(SirtRef); }; +// A version of SirtRef which disables the object verification. +template<class T> +class SirtRefNoVerify : public SirtRef<T> { + public: + SirtRefNoVerify(Thread* self, T* object) : SirtRef<T>(self, object, false) {} + // Returns the old reference. + T* reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return SirtRef<T>::reset(object, false); + } +}; + } // namespace art #endif // ART_RUNTIME_SIRT_REF_H_ diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 7de9433b24..8dad41990f 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -823,9 +823,9 @@ class VerifyRootWrapperArg { }; static void VerifyRootWrapperCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/, - RootType /*root_type*/) { + RootType root_type) { VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg); - wrapperArg->callback_(*root, wrapperArg->arg_, 0, NULL); + wrapperArg->callback_(*root, wrapperArg->arg_, 0, NULL, root_type); } void ThreadList::VerifyRoots(VerifyRootCallback* callback, void* arg) const { diff --git a/test/401-optimizing-compiler/expected.txt b/test/401-optimizing-compiler/expected.txt index 268da55dca..a65e544efa 100644 --- a/test/401-optimizing-compiler/expected.txt +++ b/test/401-optimizing-compiler/expected.txt @@ -4,3 +4,6 @@ In static method with 5 args 1 2 3 4 5 In static method with 7 args 1 2 3 4 5 6 7 Forced GC java.lang.Error: Error +Forced GC +In static method with object arg class java.lang.Object +Forced GC diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java index 4031ff1fb8..aa08137caa 100644 --- a/test/401-optimizing-compiler/src/Main.java +++ b/test/401-optimizing-compiler/src/Main.java @@ -26,6 +26,8 @@ public class Main { error = e; } System.out.println(error); + + $opt$TestInvokeNew(); } public static void $opt$TestInvokeStatic() { @@ -37,6 +39,13 @@ public class Main { throwStaticMethod(); } + public static void $opt$TestInvokeNew() { + Object o = new Object(); + forceGCStaticMethod(); + printStaticMethodWithObjectArg(o); + forceGCStaticMethod(); + } + public static void printStaticMethod() { System.out.println("In static method"); } @@ -55,6 +64,10 @@ public class Main { + a + " " + b + " " + c + " " + d + " " + e + " " + f + " " + g); } + public static void printStaticMethodWithObjectArg(Object a) { + System.out.println("In static method with object arg " + a.getClass()); + } + public static void forceGCStaticMethod() { Runtime.getRuntime().gc(); Runtime.getRuntime().gc(); |