diff options
29 files changed, 384 insertions, 235 deletions
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 70dce7f11e..a9d5893d86 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -67,7 +67,6 @@ class ArmMir2Lir FINAL : public Mir2Lir { void MarkPreservedSingle(int v_reg, RegStorage reg); void MarkPreservedDouble(int v_reg, RegStorage reg); void CompilerInitializeRegAlloc(); - RegStorage AllocPreservedDouble(int s_reg); // Required for target - miscellaneous. void AssembleLIR(); @@ -196,6 +195,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool InexpensiveConstantFloat(int32_t value); bool InexpensiveConstantLong(int64_t value); bool InexpensiveConstantDouble(int64_t value); + RegStorage AllocPreservedDouble(int s_reg); + RegStorage AllocPreservedSingle(int s_reg); private: void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index e34d944ab2..6f0ac1a20e 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -19,6 +19,7 @@ #include "arm_lir.h" #include "codegen_arm.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array.h" diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index e1e2d5b749..ef94bbc3cc 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -771,7 +771,7 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) { int p_map_idx = SRegToPMap(s_reg); if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) { // Upper reg is already allocated. Can we fit? - int high_reg = promotion_map_[p_map_idx+1].FpReg; + int high_reg = promotion_map_[p_map_idx+1].fp_reg; if ((high_reg & 1) == 0) { // High reg is even - fail. return res; // Invalid. @@ -805,13 +805,32 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) { if (res.Valid()) { RegisterInfo* info = GetRegInfo(res); promotion_map_[p_map_idx].fp_location = kLocPhysReg; - promotion_map_[p_map_idx].FpReg = + promotion_map_[p_map_idx].fp_reg = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg(); promotion_map_[p_map_idx+1].fp_location = kLocPhysReg; - promotion_map_[p_map_idx+1].FpReg = + promotion_map_[p_map_idx+1].fp_reg = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg(); } return res; } +// Reserve a callee-save sp single register. +RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) { + RegStorage res; + GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); + for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { + if (!info->IsTemp() && !info->InUse()) { + res = info->GetReg(); + int p_map_idx = SRegToPMap(s_reg); + int v_reg = mir_graph_->SRegToVReg(s_reg); + GetRegInfo(res)->MarkInUse(); + MarkPreservedSingle(v_reg, res); + promotion_map_[p_map_idx].fp_location = kLocPhysReg; + promotion_map_[p_map_idx].fp_reg = res.GetReg(); + break; + } + } + return res; +} + } // namespace art diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index bc8f95b1ca..2d5e291442 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -17,6 +17,7 @@ #include "arm_lir.h" #include "codegen_arm.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" namespace art { diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 3e0b3cf314..56dcbe59e9 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -445,17 +445,59 @@ void Arm64Mir2Lir::GenExitSequence() { NewLIR0(kPseudoMethodExit); - /* Need to restore any FP callee saves? */ - if (fp_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_); - UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_); - } - if (core_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_; - UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_); + // Restore saves and drop stack frame. + // 2 versions: + // + // 1. (Original): Try to address directly, then drop the whole frame. + // Limitation: ldp is a 7b signed immediate. There should have been a DCHECK! + // + // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be + // in range. Then drop the rest. + // + // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads + // in variant 1. + + if (frame_size_ <= 504) { + // "Magic" constant, 63 (max signed 7b) * 8. Do variant 1. + // Could be tighter, as the last load is below frame_size_ offset. + if (fp_spill_mask_) { + int spill_offset = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_); + UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_); + } + if (core_spill_mask_) { + int spill_offset = frame_size_ - kArm64PointerSize * num_core_spills_; + UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_); + } + + OpRegImm64(kOpAdd, rs_sp, frame_size_); + } else { + // Second variant. Drop the frame part. + int drop = 0; + // TODO: Always use the first formula, as num_fp_spills would be zero? + if (fp_spill_mask_) { + drop = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_); + } else { + drop = frame_size_ - kArm64PointerSize * num_core_spills_; + } + + // Drop needs to be 16B aligned, so that SP keeps aligned. + drop = RoundDown(drop, 16); + + OpRegImm64(kOpAdd, rs_sp, drop); + + if (fp_spill_mask_) { + int offset = frame_size_ - drop - kArm64PointerSize * (num_fp_spills_ + num_core_spills_); + UnSpillFPRegs(rs_sp, offset, fp_spill_mask_); + } + if (core_spill_mask_) { + int offset = frame_size_ - drop - kArm64PointerSize * num_core_spills_; + UnSpillCoreRegs(rs_sp, offset, core_spill_mask_); + } + + OpRegImm64(kOpAdd, rs_sp, frame_size_ - drop); } - OpRegImm64(kOpAdd, rs_sp, frame_size_); + // Finally return. NewLIR0(kA64Ret); } diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index f71713fc96..7db6ab69f2 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -123,8 +123,6 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void ClobberCallerSave(); void FreeCallTemps(); void LockCallTemps(); - void MarkPreservedSingle(int v_reg, RegStorage reg); - void MarkPreservedDouble(int v_reg, RegStorage reg); void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 18a4e8f2a5..51c8723d8c 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -19,6 +19,7 @@ #include "arm64_lir.h" #include "codegen_arm64.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array.h" @@ -1054,6 +1055,7 @@ void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mas if (UNLIKELY(reg2 < 0)) { NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); } else { + DCHECK_LE(offset, 63); NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index dcb0050a80..6985de6574 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -22,6 +22,7 @@ #include "dex/compiler_internals.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" namespace art { @@ -648,29 +649,6 @@ void Arm64Mir2Lir::AdjustSpillMask() { num_core_spills_++; } -/* - * Mark a callee-save fp register as promoted. - */ -void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) { - DCHECK(reg.IsFloat()); - int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE; - // Ensure fp_vmap_table is large enough - int table_size = fp_vmap_table_.size(); - for (int i = table_size; i < (adjusted_reg_num + 1); i++) { - fp_vmap_table_.push_back(INVALID_VREG); - } - // Add the current mapping - fp_vmap_table_[adjusted_reg_num] = v_reg; - // Size of fp_vmap_table is high-water mark, use to set mask - num_fp_spills_ = fp_vmap_table_.size(); - fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE; -} - -void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { - DCHECK(reg.IsDouble()); - MarkPreservedSingle(v_reg, reg); -} - /* Clobber all regs that might be used by an external C call */ void Arm64Mir2Lir::ClobberCallerSave() { Clobber(rs_x0); @@ -904,7 +882,7 @@ static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* nu int n = *num_gpr_used; if (n < 8) { *num_gpr_used = n + 1; - if (loc->wide) { + if (loc->wide || loc->ref) { *op_size = k64; return RegStorage::Solo64(n); } else { @@ -965,35 +943,64 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; for (int i = 0; i < cu_->num_ins; i++) { - PromotionMap* v_map = &promotion_map_[start_vreg + i]; RegLocation* t_loc = &ArgLocs[i]; OpSize op_size; RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size); if (reg.Valid()) { - if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { - OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); - } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { - OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); + // If arriving in register. + + // We have already updated the arg location with promoted info + // so we can be based on it. + if (t_loc->location == kLocPhysReg) { + // Just copy it. + OpRegCopy(t_loc->reg, reg); } else { - StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile); - if (reg.Is64Bit()) { - if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) { - LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots"; - } - i += 1; + // Needs flush. + if (t_loc->ref) { + StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile); + } else { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32, + kNotVolatile); } } } else { - // If arriving in frame & promoted - if (v_map->core_location == kLocPhysReg) { - LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), - RegStorage::Solo32(v_map->core_reg)); - } - if (v_map->fp_location == kLocPhysReg) { - LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg)); + // If arriving in frame & promoted. + if (t_loc->location == kLocPhysReg) { + if (t_loc->ref) { + LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile); + } else { + LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, + t_loc->wide ? k64 : k32, kNotVolatile); + } } } + if (t_loc->wide) { + // Increment i to skip the next one. + i++; + } + // if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { + // OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); + // } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { + // OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg); + // } else { + // StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile); + // if (reg.Is64Bit()) { + // if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) { + // LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots"; + // } + // i += 1; + // } + // } + // } else { + // // If arriving in frame & promoted + // if (v_map->core_location == kLocPhysReg) { + // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), + // RegStorage::Solo32(v_map->core_reg)); + // } + // if (v_map->fp_location == kLocPhysReg) { + // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg)); + // } } } @@ -1067,7 +1074,11 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, loc = UpdateLoc(loc); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); + if (loc.ref) { + StoreRefDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); + } } next_arg++; } diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index ca78e5be72..aaee91b817 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -17,6 +17,7 @@ #include "arm64_lir.h" #include "codegen_arm64.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" namespace art { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index e571b3a407..5ba0d3f5e4 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -272,7 +272,7 @@ void Mir2Lir::DumpPromotionMap() { PromotionMap v_reg_map = promotion_map_[i]; std::string buf; if (v_reg_map.fp_location == kLocPhysReg) { - StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.FpReg)); + StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.fp_reg)); } std::string buf3; @@ -1184,6 +1184,7 @@ void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType t // resolve these invokes to the same method, so we don't care which one we record here. data_target->operands[2] = type; } + // TODO: This is actually a pointer, not a reference. LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target); AppendLIR(load_pc_rel); DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target); @@ -1211,7 +1212,7 @@ void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_re if (data_target == nullptr) { data_target = AddWordData(&class_literal_list_, type_idx); } - LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target); + LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target); AppendLIR(load_pc_rel); } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2c59055243..fe905623b2 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1122,6 +1122,9 @@ void Mir2Lir::GenThrow(RegLocation rl_src) { } } +#define IsSameReg(r1, r2) \ + (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg()) + // For final classes there are no sub-classes to check and so we can answer the instance-of // question with simple comparisons. void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, @@ -1132,8 +1135,9 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re RegLocation object = LoadValue(rl_src, kRefReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; - if (result_reg == object.reg) { + if (IsSameReg(result_reg, object.reg)) { result_reg = AllocTypedTemp(false, kCoreReg); + DCHECK(!IsSameReg(result_reg, object.reg)); } LoadConstant(result_reg, 0); // assume false LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index bf51d28be3..c75e681683 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -257,11 +257,11 @@ template <size_t pointer_size> void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0, bool safepoint_pc) { RegStorage r_tgt = CallHelperSetup(helper_offset); - DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg()); - if (TargetReg(kArg0) != arg0) { - OpRegCopy(TargetReg(kArg0), arg0); + DCHECK(!IsSameReg(TargetReg(kArg1), arg0)); + if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) { + OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0); } - LoadCurrMethodDirect(TargetReg(kArg1)); + LoadCurrMethodDirect(TargetRefReg(kArg1)); ClobberCallerSave(); CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc); } @@ -272,11 +272,11 @@ void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset<pointer_size> h RegStorage arg0, RegLocation arg2, bool safepoint_pc) { RegStorage r_tgt = CallHelperSetup(helper_offset); - DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg()); - if (TargetReg(kArg0) != arg0) { - OpRegCopy(TargetReg(kArg0), arg0); + DCHECK(!IsSameReg(TargetReg(kArg1), arg0)); + if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) { + OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0); } - LoadCurrMethodDirect(TargetReg(kArg1)); + LoadCurrMethodDirect(TargetRefReg(kArg1)); LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2)); ClobberCallerSave(); CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc); @@ -394,13 +394,6 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocation, RegLocation arg0, RegLocation arg1, bool safepoint_pc) -// TODO: This is a hack! Reshape the two macros into functions and move them to a better place. -#define IsSameReg(r1, r2) \ - (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg()) -#define TargetArgReg(arg, is_wide) \ - (GetRegInfo(TargetReg(arg))->FindMatchingView( \ - (is_wide) ? RegisterInfo::k64SoloStorageMask : RegisterInfo::k32SoloStorageMask)->GetReg()) - void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) { if (IsSameReg(arg1, TargetReg(kArg0))) { if (IsSameReg(arg0, TargetReg(kArg1))) { @@ -562,7 +555,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); need_flush = false; } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { - OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); + OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg); need_flush = false; } else { need_flush = true; @@ -584,8 +577,8 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * halves of the double are promoted. Make sure they are in a usable form. */ int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0); - int low_reg = promotion_map_[lowreg_index].FpReg; - int high_reg = promotion_map_[lowreg_index + 1].FpReg; + int low_reg = promotion_map_[lowreg_index].fp_reg; + int high_reg = promotion_map_[lowreg_index + 1].fp_reg; if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) { need_flush = true; } @@ -600,7 +593,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg)); } if (v_map->fp_location == kLocPhysReg) { - Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg)); + Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg)); } } } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index 61a047db59..0b1f7b6959 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -65,8 +65,6 @@ class MipsMir2Lir FINAL : public Mir2Lir { void ClobberCallerSave(); void FreeCallTemps(); void LockCallTemps(); - void MarkPreservedSingle(int v_reg, RegStorage reg); - void MarkPreservedDouble(int v_reg, RegStorage reg); void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index cd29e789e9..c3a4c1714c 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -18,6 +18,7 @@ #include "codegen_mips.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mips_lir.h" #include "mirror/array.h" diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 1d02cf7c0e..a5b7824cf6 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -340,20 +340,6 @@ void MipsMir2Lir::AdjustSpillMask() { num_core_spills_++; } -/* - * Mark a callee-save fp register as promoted. Note that - * vpush/vpop uses contiguous register lists so we must - * include any holes in the mask. Associate holes with - * Dalvik register INVALID_VREG (0xFFFFU). - */ -void MipsMir2Lir::MarkPreservedSingle(int s_reg, RegStorage reg) { - LOG(FATAL) << "No support yet for promoted FP regs"; -} - -void MipsMir2Lir::MarkPreservedDouble(int s_reg, RegStorage reg) { - LOG(FATAL) << "No support yet for promoted FP regs"; -} - /* Clobber all regs that might be used by an external C call */ void MipsMir2Lir::ClobberCallerSave() { Clobber(rs_rZERO); diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index d28abbf5a4..129a696625 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -16,6 +16,7 @@ #include "codegen_mips.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "mips_lir.h" namespace art { diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 9a62255f5d..9ce5bb71a8 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -31,7 +31,7 @@ inline void Mir2Lir::ClobberBody(RegisterInfo* p) { p->MarkDead(); if (p->IsWide()) { p->SetIsWide(false); - if (p->GetReg() != p->Partner()) { + if (p->GetReg().NotExactlyEquals(p->Partner())) { // Register pair - deal with the other half. p = GetRegInfo(p->Partner()); p->SetIsWide(false); diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 984e8ea5f8..237288e7b1 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -61,7 +61,7 @@ void Mir2Lir::LockArg(int in_position, bool wide) { if (reg_arg_low.Valid()) { LockTemp(reg_arg_low); } - if (reg_arg_high.Valid() && reg_arg_low != reg_arg_high) { + if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) { LockTemp(reg_arg_high); } } @@ -249,7 +249,7 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile : kNotVolatile); } - if (r_result != rl_dest.reg) { + if (r_result.NotExactlyEquals(rl_dest.reg)) { if (wide) { OpRegCopyWide(rl_dest.reg, r_result); } else { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 0c00df39f8..8ebd64af7a 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -195,8 +195,8 @@ Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, high_reg = (both_regs >> 8) & 0xff; \ } while (false) -// Mask to denote sreg as the start of a double. Must not interfere with low 16 bits. -#define STARTING_DOUBLE_SREG 0x10000 +// Mask to denote sreg as the start of a 64-bit item. Must not interfere with low 16 bits. +#define STARTING_WIDE_SREG 0x10000 // TODO: replace these macros #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath)) @@ -487,7 +487,7 @@ class Mir2Lir : public Backend { RegLocationType core_location:3; uint8_t core_reg; RegLocationType fp_location:3; - uint8_t FpReg; + uint8_t fp_reg; bool first_in_pair; }; @@ -740,9 +740,9 @@ class Mir2Lir : public Backend { int SRegToPMap(int s_reg); void RecordCorePromotion(RegStorage reg, int s_reg); RegStorage AllocPreservedCoreReg(int s_reg); - void RecordSinglePromotion(RegStorage reg, int s_reg); - void RecordDoublePromotion(RegStorage reg, int s_reg); - RegStorage AllocPreservedSingle(int s_reg); + void RecordFpPromotion(RegStorage reg, int s_reg); + RegStorage AllocPreservedFpReg(int s_reg); + virtual RegStorage AllocPreservedSingle(int s_reg); virtual RegStorage AllocPreservedDouble(int s_reg); RegStorage AllocTempBody(GrowableArray<RegisterInfo*> ®s, int* next_temp, bool required); virtual RegStorage AllocFreeTemp(); @@ -1175,6 +1175,13 @@ class Mir2Lir : public Backend { // Required for target - register utilities. + bool IsSameReg(RegStorage reg1, RegStorage reg2) { + RegisterInfo* info1 = GetRegInfo(reg1); + RegisterInfo* info2 = GetRegInfo(reg2); + return (info1->Master() == info2->Master() && + (info1->StorageMask() & info2->StorageMask()) != 0); + } + /** * @brief Portable way of getting special registers from the backend. * @param reg Enumeration describing the purpose of the register. @@ -1224,8 +1231,6 @@ class Mir2Lir : public Backend { virtual void ClobberCallerSave() = 0; virtual void FreeCallTemps() = 0; virtual void LockCallTemps() = 0; - virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0; - virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0; virtual void CompilerInitializeRegAlloc() = 0; // Required for target - miscellaneous. diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 38370ad889..6bedae868c 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -178,7 +178,7 @@ void Mir2Lir::Clobber(RegStorage reg) { } else { RegisterInfo* info = GetRegInfo(reg); if (info->IsTemp() && !info->IsDead()) { - if (info->GetReg() != info->Partner()) { + if (info->GetReg().NotExactlyEquals(info->Partner())) { ClobberBody(GetRegInfo(info->Partner())); } ClobberBody(info); @@ -225,7 +225,7 @@ void Mir2Lir::ClobberSReg(int s_reg) { GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { if (info->SReg() == s_reg) { - if (info->GetReg() != info->Partner()) { + if (info->GetReg().NotExactlyEquals(info->Partner())) { // Dealing with a pair - clobber the other half. DCHECK(!info->IsAliased()); ClobberBody(GetRegInfo(info->Partner())); @@ -284,8 +284,13 @@ void Mir2Lir::RecordCorePromotion(RegStorage reg, int s_reg) { /* Reserve a callee-save register. Return InvalidReg if none available */ RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) { - // TODO: 64-bit and refreg update RegStorage res; + /* + * Note: it really doesn't matter much whether we allocate from the core or core64 + * pool for 64-bit targets - but for some targets it does matter whether allocations + * happens from the single or double pool. This entire section of code could stand + * a good refactoring. + */ GrowableArray<RegisterInfo*>::Iterator it(®_pool_->core_regs_); for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { if (!info->IsTemp() && !info->InUse()) { @@ -297,49 +302,50 @@ RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) { return res; } -void Mir2Lir::RecordSinglePromotion(RegStorage reg, int s_reg) { +void Mir2Lir::RecordFpPromotion(RegStorage reg, int s_reg) { + DCHECK_NE(cu_->instruction_set, kThumb2); int p_map_idx = SRegToPMap(s_reg); int v_reg = mir_graph_->SRegToVReg(s_reg); + int reg_num = reg.GetRegNum(); GetRegInfo(reg)->MarkInUse(); - MarkPreservedSingle(v_reg, reg); + fp_spill_mask_ |= (1 << reg_num); + // Include reg for later sort + fp_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1))); + num_fp_spills_++; promotion_map_[p_map_idx].fp_location = kLocPhysReg; - promotion_map_[p_map_idx].FpReg = reg.GetReg(); + promotion_map_[p_map_idx].fp_reg = reg.GetReg(); } -// Reserve a callee-save sp single register. -RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) { +// Reserve a callee-save floating point. +RegStorage Mir2Lir::AllocPreservedFpReg(int s_reg) { + /* + * For targets other than Thumb2, it doesn't matter whether we allocate from + * the sp_regs_ or dp_regs_ pool. Some refactoring is in order here. + */ + DCHECK_NE(cu_->instruction_set, kThumb2); RegStorage res; GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { if (!info->IsTemp() && !info->InUse()) { res = info->GetReg(); - RecordSinglePromotion(res, s_reg); + RecordFpPromotion(res, s_reg); break; } } return res; } -void Mir2Lir::RecordDoublePromotion(RegStorage reg, int s_reg) { - int p_map_idx = SRegToPMap(s_reg); - int v_reg = mir_graph_->SRegToVReg(s_reg); - GetRegInfo(reg)->MarkInUse(); - MarkPreservedDouble(v_reg, reg); - promotion_map_[p_map_idx].fp_location = kLocPhysReg; - promotion_map_[p_map_idx].FpReg = reg.GetReg(); +// TODO: this is Thumb2 only. Remove when DoPromotion refactored. +RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) { + RegStorage res; + UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedDouble"; + return res; } -// Reserve a callee-save dp solo register. -RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) { +// TODO: this is Thumb2 only. Remove when DoPromotion refactored. +RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) { RegStorage res; - GrowableArray<RegisterInfo*>::Iterator it(®_pool_->dp_regs_); - for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { - if (!info->IsTemp() && !info->InUse()) { - res = info->GetReg(); - RecordDoublePromotion(res, s_reg); - break; - } - } + UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedSingle"; return res; } @@ -736,7 +742,8 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { RegisterInfo* info1 = GetRegInfo(reg.GetLow()); RegisterInfo* info2 = GetRegInfo(reg.GetHigh()); DCHECK(info1 && info2 && info1->IsWide() && info2->IsWide() && - (info1->Partner() == info2->GetReg()) && (info2->Partner() == info1->GetReg())); + (info1->Partner().ExactlyEquals(info2->GetReg())) && + (info2->Partner().ExactlyEquals(info1->GetReg()))); if ((info1->IsLive() && info1->IsDirty()) || (info2->IsLive() && info2->IsDirty())) { if (!(info1->IsTemp() && info2->IsTemp())) { /* Should not happen. If it does, there's a problem in eval_loc */ @@ -872,10 +879,10 @@ void Mir2Lir::MarkWide(RegStorage reg) { RegisterInfo* info_lo = GetRegInfo(reg.GetLow()); RegisterInfo* info_hi = GetRegInfo(reg.GetHigh()); // Unpair any old partners. - if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) { + if (info_lo->IsWide() && info_lo->Partner().NotExactlyEquals(info_hi->GetReg())) { GetRegInfo(info_lo->Partner())->SetIsWide(false); } - if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) { + if (info_hi->IsWide() && info_hi->Partner().NotExactlyEquals(info_lo->GetReg())) { GetRegInfo(info_hi->Partner())->SetIsWide(false); } info_lo->SetIsWide(true); @@ -1039,12 +1046,12 @@ RegLocation Mir2Lir::UpdateLocWide(RegLocation loc) { RegisterInfo* info_hi = GetRegInfo(reg.GetHigh()); match &= info_lo->IsWide(); match &= info_hi->IsWide(); - match &= (info_lo->Partner() == info_hi->GetReg()); - match &= (info_hi->Partner() == info_lo->GetReg()); + match &= (info_lo->Partner().ExactlyEquals(info_hi->GetReg())); + match &= (info_hi->Partner().ExactlyEquals(info_lo->GetReg())); } else { RegisterInfo* info = GetRegInfo(reg); match &= info->IsWide(); - match &= (info->GetReg() == info->Partner()); + match &= (info->GetReg().ExactlyEquals(info->Partner())); } if (match) { loc.location = kLocPhysReg; @@ -1147,16 +1154,23 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num RegLocation loc = mir_graph_->reg_location_[i]; RefCounts* counts = loc.fp ? fp_counts : core_counts; int p_map_idx = SRegToPMap(loc.s_reg_low); + int use_count = mir_graph_->GetUseCount(i); if (loc.fp) { if (loc.wide) { // Treat doubles as a unit, using upper half of fp_counts array. - counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i); + counts[p_map_idx + num_regs].count += use_count; i++; } else { - counts[p_map_idx].count += mir_graph_->GetUseCount(i); + counts[p_map_idx].count += use_count; } } else if (!IsInexpensiveConstant(loc)) { - counts[p_map_idx].count += mir_graph_->GetUseCount(i); + if (loc.wide && cu_->target64) { + // Treat long as a unit, using upper half of core_counts array. + counts[p_map_idx + num_regs].count += use_count; + i++; + } else { + counts[p_map_idx].count += use_count; + } } } } @@ -1176,10 +1190,10 @@ static int SortCounts(const void *val1, const void *val2) { void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) { LOG(INFO) << msg; for (int i = 0; i < size; i++) { - if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) { - LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count; + if ((arr[i].s_reg & STARTING_WIDE_SREG) != 0) { + LOG(INFO) << "s_reg[64_" << (arr[i].s_reg & ~STARTING_WIDE_SREG) << "]: " << arr[i].count; } else { - LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + LOG(INFO) << "s_reg[32_" << arr[i].s_reg << "]: " << arr[i].count; } } } @@ -1210,69 +1224,83 @@ void Mir2Lir::DoPromotion() { * TUNING: replace with linear scan once we have the ability * to describe register live ranges for GC. */ + size_t core_reg_count_size = cu_->target64 ? num_regs * 2 : num_regs; + size_t fp_reg_count_size = num_regs * 2; RefCounts *core_regs = - static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs, + static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size, kArenaAllocRegAlloc)); - RefCounts *FpRegs = - static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2, + RefCounts *fp_regs = + static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size, kArenaAllocRegAlloc)); // Set ssa names for original Dalvik registers for (int i = 0; i < dalvik_regs; i++) { - core_regs[i].s_reg = FpRegs[i].s_reg = i; + core_regs[i].s_reg = fp_regs[i].s_reg = i; } // Set ssa names for compiler temporaries for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) { CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx); core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low; - FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low; - FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low; + fp_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low; } - // Duplicate in upper half to represent possible fp double starting sregs. - for (int i = 0; i < num_regs; i++) { - FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG; + // Duplicate in upper half to represent possible wide starting sregs. + for (size_t i = num_regs; i < fp_reg_count_size; i++) { + fp_regs[i].s_reg = fp_regs[i - num_regs].s_reg | STARTING_WIDE_SREG; + } + for (size_t i = num_regs; i < core_reg_count_size; i++) { + core_regs[i].s_reg = core_regs[i - num_regs].s_reg | STARTING_WIDE_SREG; } // Sum use counts of SSA regs by original Dalvik vreg. - CountRefs(core_regs, FpRegs, num_regs); + CountRefs(core_regs, fp_regs, num_regs); // Sort the count arrays - qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts); - qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts); + qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts); + qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts); if (cu_->verbose) { - DumpCounts(core_regs, num_regs, "Core regs after sort"); - DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort"); + DumpCounts(core_regs, core_reg_count_size, "Core regs after sort"); + DumpCounts(fp_regs, fp_reg_count_size, "Fp regs after sort"); } if (!(cu_->disable_opt & (1 << kPromoteRegs))) { - // Promote FpRegs - for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) { - int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG); - if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) { - if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) && - (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) { - int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG; - // Ignore result - if can't alloc double may still be able to alloc singles. - AllocPreservedDouble(low_sreg); + // Promote fp regs + for (size_t i = 0; (i < fp_reg_count_size) && (fp_regs[i].count >= promotion_threshold); i++) { + int low_sreg = fp_regs[i].s_reg & ~STARTING_WIDE_SREG; + size_t p_map_idx = SRegToPMap(low_sreg); + RegStorage reg = RegStorage::InvalidReg(); + if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { + // TODO: break out the Thumb2-specific code. + if (cu_->instruction_set == kThumb2) { + bool wide = fp_regs[i].s_reg & STARTING_WIDE_SREG; + if (wide) { + if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) { + // Ignore result - if can't alloc double may still be able to alloc singles. + AllocPreservedDouble(low_sreg); + } + // Continue regardless of success - might still be able to grab a single. + continue; + } else { + reg = AllocPreservedSingle(low_sreg); + } + } else { + reg = AllocPreservedFpReg(low_sreg); } - } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { - RegStorage reg = AllocPreservedSingle(FpRegs[i].s_reg); if (!reg.Valid()) { - break; // No more left. + break; // No more left } } } // Promote core regs - for (int i = 0; (i < num_regs) && - (core_regs[i].count >= promotion_threshold); i++) { - int p_map_idx = SRegToPMap(core_regs[i].s_reg); - if (promotion_map_[p_map_idx].core_location != - kLocPhysReg) { - RegStorage reg = AllocPreservedCoreReg(core_regs[i].s_reg); + for (size_t i = 0; (i < core_reg_count_size) && + (core_regs[i].count >= promotion_threshold); i++) { + int low_sreg = core_regs[i].s_reg & ~STARTING_WIDE_SREG; + size_t p_map_idx = SRegToPMap(low_sreg); + if (promotion_map_[p_map_idx].core_location != kLocPhysReg) { + RegStorage reg = AllocPreservedCoreReg(low_sreg); if (!reg.Valid()) { break; // No more left } @@ -1284,52 +1312,36 @@ void Mir2Lir::DoPromotion() { for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { RegLocation *curr = &mir_graph_->reg_location_[i]; int p_map_idx = SRegToPMap(curr->s_reg_low); - if (!curr->wide) { - if (curr->fp) { - if (promotion_map_[p_map_idx].fp_location == kLocPhysReg) { - curr->location = kLocPhysReg; - curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].FpReg); - curr->home = true; - } - } else { - if (promotion_map_[p_map_idx].core_location == kLocPhysReg) { - curr->location = kLocPhysReg; - curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].core_reg); - curr->home = true; - } - } - } else { - if (curr->high_word) { - continue; - } - if (curr->fp) { - if ((promotion_map_[p_map_idx].fp_location == kLocPhysReg) && - (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg)) { - int low_reg = promotion_map_[p_map_idx].FpReg; - int high_reg = promotion_map_[p_map_idx+1].FpReg; - // Doubles require pair of singles starting at even reg + int reg_num = curr->fp ? promotion_map_[p_map_idx].fp_reg : promotion_map_[p_map_idx].core_reg; + bool wide = curr->wide || (cu_->target64 && curr->ref && cu_->instruction_set != kX86_64); + RegStorage reg = RegStorage::InvalidReg(); + if (curr->fp && promotion_map_[p_map_idx].fp_location == kLocPhysReg) { + if (wide && cu_->instruction_set == kThumb2) { + if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) { + int high_reg = promotion_map_[p_map_idx+1].fp_reg; // TODO: move target-specific restrictions out of here. - if (((low_reg & 0x1) == 0) && ((low_reg + 1) == high_reg)) { - curr->location = kLocPhysReg; - if (cu_->instruction_set == kThumb2) { - curr->reg = RegStorage::FloatSolo64(RegStorage::RegNum(low_reg) >> 1); - } else { - curr->reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); - } - curr->home = true; + if (((reg_num & 0x1) == 0) && ((reg_num + 1) == high_reg)) { + reg = RegStorage::FloatSolo64(RegStorage::RegNum(reg_num) >> 1); } } } else { - if ((promotion_map_[p_map_idx].core_location == kLocPhysReg) - && (promotion_map_[p_map_idx+1].core_location == - kLocPhysReg)) { - curr->location = kLocPhysReg; - curr->reg = RegStorage(RegStorage::k64BitPair, promotion_map_[p_map_idx].core_reg, - promotion_map_[p_map_idx+1].core_reg); - curr->home = true; + reg = wide ? RegStorage::FloatSolo64(reg_num) : RegStorage::FloatSolo32(reg_num); + } + } else if (!curr->fp && promotion_map_[p_map_idx].core_location == kLocPhysReg) { + if (wide && !cu_->target64) { + if (promotion_map_[p_map_idx + 1].core_location == kLocPhysReg) { + int high_reg = promotion_map_[p_map_idx+1].core_reg; + reg = RegStorage(RegStorage::k64BitPair, reg_num, high_reg); } + } else { + reg = wide ? RegStorage::Solo64(reg_num) : RegStorage::Solo32(reg_num); } } + if (reg.Valid()) { + curr->reg = reg; + curr->location = kLocPhysReg; + curr->home = true; + } } if (cu_->verbose) { DumpPromotionMap(); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 21d7419d66..afb618494d 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -100,8 +100,6 @@ class X86Mir2Lir : public Mir2Lir { void ClobberCallerSave(); void FreeCallTemps(); void LockCallTemps(); - void MarkPreservedSingle(int v_reg, RegStorage reg); - void MarkPreservedDouble(int v_reg, RegStorage reg); void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index 1aeacedb77..7454475555 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -16,6 +16,7 @@ #include "codegen_x86.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "x86_lir.h" namespace art { diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index bd007e7e61..14a18e5954 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -18,6 +18,7 @@ #include "codegen_x86.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "mirror/array.h" #include "x86_lir.h" diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 64b4af86a2..72b2cea7fc 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -20,6 +20,7 @@ #include "codegen_x86.h" #include "dex/compiler_internals.h" #include "dex/quick/mir_to_lir-inl.h" +#include "dex/reg_storage_eq.h" #include "mirror/array.h" #include "mirror/string.h" #include "x86_lir.h" @@ -409,20 +410,6 @@ void X86Mir2Lir::AdjustSpillMask() { num_core_spills_++; } -/* - * Mark a callee-save fp register as promoted. Note that - * vpush/vpop uses contiguous register lists so we must - * include any holes in the mask. Associate holes with - * Dalvik register INVALID_VREG (0xFFFFU). - */ -void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) { - UNIMPLEMENTED(FATAL) << "MarkPreservedSingle"; -} - -void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { - UNIMPLEMENTED(FATAL) << "MarkPreservedDouble"; -} - RegStorage X86Mir2Lir::AllocateByteRegister() { RegStorage reg = AllocTypedTemp(false, kCoreReg); if (!cu_->target64) { @@ -2206,4 +2193,3 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, } } // namespace art - diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 392eecfd25..d835b2215f 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -20,6 +20,7 @@ #include "x86_lir.h" #include "dex/quick/dex_file_method_inliner.h" #include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/reg_storage_eq.h" namespace art { diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 3b891f2f20..8ed3adc1f1 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -122,11 +122,18 @@ class RegStorage { constexpr explicit RegStorage(uint16_t val) : reg_(val) {} RegStorage() : reg_(kInvalid) {} - bool operator==(const RegStorage rhs) const { + // We do not provide a general operator overload for equality of reg storage, as this is + // dangerous in the case of architectures with multiple views, and the naming ExactEquals + // expresses the exact match expressed here. It is more likely that a comparison between the views + // is intended in most cases. Such code can be found in, for example, Mir2Lir::IsSameReg. + // + // If you know what you are doing, include reg_storage_eq.h, which defines == and != for brevity. + + bool ExactlyEquals(const RegStorage& rhs) const { return (reg_ == rhs.GetRawBits()); } - bool operator!=(const RegStorage rhs) const { + bool NotExactlyEquals(const RegStorage& rhs) const { return (reg_ != rhs.GetRawBits()); } diff --git a/compiler/dex/reg_storage_eq.h b/compiler/dex/reg_storage_eq.h new file mode 100644 index 0000000000..b688dac7ca --- /dev/null +++ b/compiler/dex/reg_storage_eq.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_REG_STORAGE_EQ_H_ +#define ART_COMPILER_DEX_REG_STORAGE_EQ_H_ + +#include "reg_storage.h" + +namespace art { + +// Define == and != operators for RegStorage. These are based on exact equality of the reg storage, +// that is, 32b and 64b views of the same physical register won't match. This is often not the +// intended behavior, so be careful when including this header. + +inline bool operator==(const RegStorage& lhs, const RegStorage& rhs) { + return lhs.ExactlyEquals(rhs); +} + +inline bool operator!=(const RegStorage& lhs, const RegStorage& rhs) { + return lhs.NotExactlyEquals(rhs); +} + +} // namespace art + +#endif // ART_COMPILER_DEX_REG_STORAGE_EQ_H_ + diff --git a/runtime/stack.cc b/runtime/stack.cc index 132ac3e795..d5405fbdf0 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -159,11 +159,22 @@ bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind); uintptr_t ptr_val; bool success = false; + bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64); if (is_float) { success = GetFPR(reg, &ptr_val); } else { success = GetGPR(reg, &ptr_val); } + if (success && target64) { + bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg); + bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg); + int64_t value_long = static_cast<int64_t>(ptr_val); + if (wide_lo) { + ptr_val = static_cast<uintptr_t>(value_long & 0xFFFFFFFF); + } else if (wide_hi) { + ptr_val = static_cast<uintptr_t>(value_long >> 32); + } + } *val = ptr_val; return success; } else { @@ -194,6 +205,28 @@ bool StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_val bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg); uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask(); const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind); + bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64); + // Deal with 32 or 64-bit wide registers in a way that builds on all targets. + if (target64) { + bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg); + bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg); + if (wide_lo || wide_hi) { + uintptr_t old_reg_val; + bool success = is_float ? GetFPR(reg, &old_reg_val) : GetGPR(reg, &old_reg_val); + if (!success) { + return false; + } + uint64_t new_vreg_portion = static_cast<uint64_t>(new_value); + uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val); + uint64_t mask = 0xffffffff; + if (wide_lo) { + mask = mask << 32; + } else { + new_vreg_portion = new_vreg_portion << 32; + } + new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion); + } + } if (is_float) { return SetFPR(reg, new_value); } else { diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h index 98217535ee..df5cd80e97 100644 --- a/runtime/vmap_table.h +++ b/runtime/vmap_table.h @@ -64,6 +64,12 @@ class VmapTable { const uint8_t* table = table_; uint16_t adjusted_vreg = vreg + kEntryAdjustment; size_t end = DecodeUnsignedLeb128(&table); + bool high_reg = (kind == kLongHiVReg) || (kind == kDoubleHiVReg); + bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64); + if (target64 && high_reg) { + // Wide promoted registers are associated with the sreg of the low portion. + adjusted_vreg--; + } for (size_t i = 0; i < end; ++i) { // Stop if we find what we are are looking for. uint16_t adjusted_entry = DecodeUnsignedLeb128(&table); |