diff options
-rw-r--r-- | compiler/dex/compiler_enums.h | 11 | ||||
-rw-r--r-- | compiler/dex/mir_graph.h | 5 | ||||
-rw-r--r-- | compiler/dex/quick/arm/arm_lir.h | 4 | ||||
-rw-r--r-- | compiler/dex/quick/gen_common.cc | 7 | ||||
-rw-r--r-- | compiler/dex/quick/gen_loadstore.cc | 14 | ||||
-rw-r--r-- | compiler/dex/quick/mips/mips_lir.h | 8 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 9 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 12 | ||||
-rw-r--r-- | compiler/dex/quick/ralloc_util.cc | 11 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 8 | ||||
-rw-r--r-- | compiler/dex/quick/x86/int_x86.cc | 14 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 252 | ||||
-rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 10 | ||||
-rw-r--r-- | compiler/dex/vreg_analysis.cc | 2 |
15 files changed, 346 insertions, 49 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 4650f25a90..18122b3dfd 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -56,6 +56,17 @@ enum RegLocationType { kLocInvalid }; +/** + * Support for vector registers. Initially used for x86 floats. This will be used + * to replace the assumption that a double takes up 2 single FP registers + */ +enum VectorLengthType { + kVectorNotUsed = 0, // This value is NOT in a vector register. + kVectorLength4, // The value occupies 4 bytes in a vector register. + kVectorLength8, // The value occupies 8 bytes in a vector register. + kVectorLength16 // The value occupies 16 bytes in a vector register (unused now). +}; + enum BBType { kNullBlock, kEntryBlock, diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 010de20ab4..d080e392cd 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -325,11 +325,14 @@ struct RegLocation { unsigned ref:1; // Something GC cares about. unsigned high_word:1; // High word of pair? unsigned home:1; // Does this represent the home location? + VectorLengthType vec_len:3; // Is this value in a vector register, and how big is it? uint8_t low_reg; // First physical register. uint8_t high_reg; // 2nd physical register (if wide). int16_t s_reg_low; // SSA name for low Dalvik word. int16_t orig_sreg; // TODO: remove after Bitcode gen complete // and consolidate usage w/ s_reg_low. + + bool IsVectorScalar() const { return vec_len == kVectorLength4 || vec_len == kVectorLength8;} }; /* @@ -354,7 +357,7 @@ struct CallInfo { }; -const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, +const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG, INVALID_SREG}; class MIRGraph { diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index b06ebcf012..37b4ec6dc7 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -118,9 +118,9 @@ namespace art { #define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET-1) // RegisterLocation templates return values (r0, or r0/r1). -#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \ +#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, INVALID_REG, \ INVALID_SREG, INVALID_SREG} -#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, \ +#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, \ INVALID_SREG, INVALID_SREG} #define ARM_LOC_C_RETURN_FLOAT ARM_LOC_C_RETURN #define ARM_LOC_C_RETURN_DOUBLE ARM_LOC_C_RETURN_WIDE diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 6b4cbd4286..3bd0298a22 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1840,4 +1840,11 @@ void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true); } +/* Generic code for generating a wide constant into a VR. */ +void Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { + RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantWide(rl_result.low_reg, rl_result.high_reg, value); + StoreValueWide(rl_dest, rl_result); +} + } // namespace art diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 353910606e..8f2f6adba8 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -265,9 +265,17 @@ void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) { // Dest is now live and dirty (until/if we flush it to home location) MarkLive(rl_dest.low_reg, rl_dest.s_reg_low); - MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low)); - MarkDirty(rl_dest); - MarkPair(rl_dest.low_reg, rl_dest.high_reg); + + // Does this wide value live in two registers (or one vector one)? + if (rl_dest.low_reg != rl_dest.high_reg) { + MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low)); + MarkDirty(rl_dest); + MarkPair(rl_dest.low_reg, rl_dest.high_reg); + } else { + // This must be an x86 vector register value, + DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86)); + MarkDirty(rl_dest); + } ResetDefLocWide(rl_dest); diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h index 278fcefcbf..00eef96cb5 100644 --- a/compiler/dex/quick/mips/mips_lir.h +++ b/compiler/dex/quick/mips/mips_lir.h @@ -142,13 +142,13 @@ namespace art { #define rMIPS_PC INVALID_REG // RegisterLocation templates return values (r_V0, or r_V0/r_V1). -#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_V0, INVALID_REG, \ +#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_V0, INVALID_REG, \ INVALID_SREG, INVALID_SREG} -#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \ +#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \ INVALID_REG, INVALID_SREG, INVALID_SREG} -#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_RESULT0, \ +#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_RESULT0, \ r_RESULT1, INVALID_SREG, INVALID_SREG} -#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \ +#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \ r_FRESULT1, INVALID_SREG, INVALID_SREG} enum MipsResourceEncodingPos { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index c5bbae1923..6281eff873 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -157,16 +157,11 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::CONST_WIDE_16: case Instruction::CONST_WIDE_32: - rl_result = EvalLoc(rl_dest, kAnyReg, true); - LoadConstantWide(rl_result.low_reg, rl_result.high_reg, - static_cast<int64_t>(static_cast<int32_t>(vB))); - StoreValueWide(rl_dest, rl_result); + GenConstWide(rl_dest, static_cast<int64_t>(static_cast<int32_t>(vB))); break; case Instruction::CONST_WIDE: - rl_result = EvalLoc(rl_dest, kAnyReg, true); - LoadConstantWide(rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide); - StoreValueWide(rl_dest, rl_result); + GenConstWide(rl_dest, mir->dalvikInsn.vB_wide); break; case Instruction::CONST_WIDE_HIGH16: diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 3f7ec1e5f0..c157327109 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -385,7 +385,7 @@ class Mir2Lir : public Backend { int AllocPreservedSingle(int s_reg); int AllocPreservedDouble(int s_reg); int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required); - int AllocTempDouble(); + virtual int AllocTempDouble(); int AllocFreeTemp(); int AllocTemp(); int AllocTempFloat(); @@ -403,7 +403,7 @@ class Mir2Lir : public Backend { void MarkDefWide(RegLocation rl, LIR *start, LIR *finish); RegLocation WideToNarrow(RegLocation rl); void ResetDefLoc(RegLocation rl); - void ResetDefLocWide(RegLocation rl); + virtual void ResetDefLocWide(RegLocation rl); void ResetDefTracking(); void ClobberAllRegs(); void FlushAllRegsBody(RegisterInfo* info, int num_regs); @@ -419,7 +419,7 @@ class Mir2Lir : public Backend { void CopyRegInfo(int new_reg, int old_reg); bool CheckCorePoolSanity(); RegLocation UpdateLoc(RegLocation loc); - RegLocation UpdateLocWide(RegLocation loc); + virtual RegLocation UpdateLocWide(RegLocation loc); RegLocation UpdateRawLoc(RegLocation loc); /** @@ -430,7 +430,7 @@ class Mir2Lir : public Backend { * @param update Whether the liveness information should be updated. * @return Returns the properly typed temporary in physical register pairs. */ - RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); + virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); /** * @brief Used to load register location into a typed temporary. @@ -439,7 +439,7 @@ class Mir2Lir : public Backend { * @param update Whether the liveness information should be updated. * @return Returns the properly typed temporary in physical register. */ - RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); + virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); @@ -507,6 +507,8 @@ class Mir2Lir : public Backend { RegLocation rl_src); void GenSuspendTest(int opt_flags); void GenSuspendTestAndBranch(int opt_flags, LIR* target); + // This will be overridden by x86 implementation. + virtual void GenConstWide(RegLocation rl_dest, int64_t value); // Shared by all targets - implemented in gen_invoke.cc. int CallHelperSetup(ThreadOffset helper_offset); diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index cef013e9d8..32c22f2c04 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -602,6 +602,7 @@ void Mir2Lir::UnmarkTemp(int reg) { } void Mir2Lir::MarkPair(int low_reg, int high_reg) { + DCHECK_NE(low_reg, high_reg); RegisterInfo* info_lo = GetRegInfo(low_reg); RegisterInfo* info_hi = GetRegInfo(high_reg); info_lo->pair = info_hi->pair = true; @@ -807,7 +808,10 @@ RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) { if (update) { loc.location = kLocPhysReg; MarkLive(loc.low_reg, loc.s_reg_low); - MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low)); + // Does this wide value live in two registers or one vector register? + if (loc.low_reg != loc.high_reg) { + MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low)); + } } DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); return loc; @@ -1059,7 +1063,10 @@ RegLocation Mir2Lir::GetReturnWide(bool is_double) { Clobber(res.high_reg); LockTemp(res.low_reg); LockTemp(res.high_reg); - MarkPair(res.low_reg, res.high_reg); + // Does this wide value live in two registers or one vector register? + if (res.low_reg != res.high_reg) { + MarkPair(res.low_reg, res.high_reg); + } return res; } diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index e6621f3bcb..816f2d0c5c 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -173,6 +173,12 @@ class X86Mir2Lir : public Mir2Lir { bool InexpensiveConstantLong(int64_t value); bool InexpensiveConstantDouble(int64_t value); + RegLocation UpdateLocWide(RegLocation loc); + RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); + RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); + int AllocTempDouble(); + void ResetDefLocWide(RegLocation rl); + private: void EmitPrefix(const X86EncodingMap* entry); void EmitOpcode(const X86EncodingMap* entry); @@ -222,6 +228,8 @@ class X86Mir2Lir : public Mir2Lir { void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); + void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg); + void GenConstWide(RegLocation rl_dest, int64_t value); }; } // namespace art diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 11ccd4b35b..01479a9021 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -155,9 +155,11 @@ void X86Mir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, // TODO: Prevent this from happening in the code. The result is often // unused or could have been loaded more easily from memory. NewLIR2(kX86MovdxrRR, dest_lo, src_lo); + dest_hi = AllocTempDouble(); NewLIR2(kX86MovdxrRR, dest_hi, src_hi); NewLIR2(kX86PsllqRI, dest_hi, 32); NewLIR2(kX86OrpsRR, dest_lo, dest_hi); + FreeTemp(dest_hi); } } else { if (src_fp) { @@ -525,7 +527,7 @@ void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1, // Compute (r1:r0) = (r1:r0) + (r2:r3) OpRegReg(kOpAdd, r0, r2); // r0 = r0 + r2 OpRegReg(kOpAdc, r1, r3); // r1 = r1 + r3 + CF - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } @@ -541,7 +543,7 @@ void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1, // Compute (r1:r0) = (r1:r0) + (r2:r3) OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } @@ -557,7 +559,7 @@ void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1, // Compute (r1:r0) = (r1:r0) & (r2:r3) OpRegReg(kOpAnd, r0, r2); // r0 = r0 & r2 OpRegReg(kOpAnd, r1, r3); // r1 = r1 & r3 - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } @@ -573,7 +575,7 @@ void X86Mir2Lir::GenOrLong(RegLocation rl_dest, // Compute (r1:r0) = (r1:r0) | (r2:r3) OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 OpRegReg(kOpOr, r1, r3); // r1 = r1 | r3 - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } @@ -589,7 +591,7 @@ void X86Mir2Lir::GenXorLong(RegLocation rl_dest, // Compute (r1:r0) = (r1:r0) ^ (r2:r3) OpRegReg(kOpXor, r0, r2); // r0 = r0 ^ r2 OpRegReg(kOpXor, r1, r3); // r1 = r1 ^ r3 - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } @@ -602,7 +604,7 @@ void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { OpRegReg(kOpNeg, r0, r0); // r0 = -r0 OpRegImm(kOpAdc, r1, 0); // r1 = r1 + CF OpRegReg(kOpNeg, r1, r1); // r1 = -r1 - RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index b281063a4a..5c993c5ac5 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -416,7 +416,7 @@ int X86Mir2Lir::AllocTypedTempPair(bool fp_hint, if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { low_reg = AllocTempDouble(); - high_reg = low_reg + 1; + high_reg = low_reg; // only one allocated! res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); return res; } @@ -546,4 +546,254 @@ const char* X86Mir2Lir::GetTargetInstFmt(int opcode) { return X86Mir2Lir::EncodingMap[opcode].fmt; } +/* + * Return an updated location record with current in-register status. + * If the value lives in live temps, reflect that fact. No code + * is generated. If the live value is part of an older pair, + * clobber both low and high. + */ +// TODO: Reunify with common code after 'pair mess' has been fixed +RegLocation X86Mir2Lir::UpdateLocWide(RegLocation loc) { + DCHECK(loc.wide); + DCHECK(CheckCorePoolSanity()); + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); + // Are the dalvik regs already live in physical registers? + RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg); + + // Handle FP registers specially on x86. + if (info_lo && IsFpReg(info_lo->reg)) { + bool match = true; + + // We can't match a FP register with a pair of Core registers. + match = match && (info_lo->pair == 0); + + if (match) { + // We can reuse;update the register usage info. + loc.low_reg = info_lo->reg; + loc.high_reg = info_lo->reg; // Play nice with existing code. + loc.location = kLocPhysReg; + loc.vec_len = kVectorLength8; + DCHECK(IsFpReg(loc.low_reg)); + return loc; + } + // We can't easily reuse; clobber and free any overlaps. + if (info_lo) { + Clobber(info_lo->reg); + FreeTemp(info_lo->reg); + if (info_lo->pair) + Clobber(info_lo->partner); + } + } else { + RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg); + bool match = true; + match = match && (info_lo != NULL); + match = match && (info_hi != NULL); + // Are they both core or both FP? + match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg)); + // If a pair of floating point singles, are they properly aligned? + if (match && IsFpReg(info_lo->reg)) { + match &= ((info_lo->reg & 0x1) == 0); + match &= ((info_hi->reg - info_lo->reg) == 1); + } + // If previously used as a pair, it is the same pair? + if (match && (info_lo->pair || info_hi->pair)) { + match = (info_lo->pair == info_hi->pair); + match &= ((info_lo->reg == info_hi->partner) && + (info_hi->reg == info_lo->partner)); + } + if (match) { + // Can reuse - update the register usage info + loc.low_reg = info_lo->reg; + loc.high_reg = info_hi->reg; + loc.location = kLocPhysReg; + MarkPair(loc.low_reg, loc.high_reg); + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + return loc; + } + // Can't easily reuse - clobber and free any overlaps + if (info_lo) { + Clobber(info_lo->reg); + FreeTemp(info_lo->reg); + if (info_lo->pair) + Clobber(info_lo->partner); + } + if (info_hi) { + Clobber(info_hi->reg); + FreeTemp(info_hi->reg); + if (info_hi->pair) + Clobber(info_hi->partner); + } + } + } + return loc; +} + +// TODO: Reunify with common code after 'pair mess' has been fixed +RegLocation X86Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) { + DCHECK(loc.wide); + int32_t new_regs; + int32_t low_reg; + int32_t high_reg; + + loc = UpdateLocWide(loc); + + /* If it is already in a register, we can assume proper form. Is it the right reg class? */ + if (loc.location == kLocPhysReg) { + DCHECK_EQ(IsFpReg(loc.low_reg), loc.IsVectorScalar()); + if (!RegClassMatches(reg_class, loc.low_reg)) { + /* It is the wrong register class. Reallocate and copy. */ + if (!IsFpReg(loc.low_reg)) { + // We want this in a FP reg, and it is in core registers. + DCHECK(reg_class != kCoreReg); + // Allocate this into any FP reg, and mark it with the right size. + low_reg = AllocTypedTemp(true, reg_class); + OpVectorRegCopyWide(low_reg, loc.low_reg, loc.high_reg); + CopyRegInfo(low_reg, loc.low_reg); + Clobber(loc.low_reg); + Clobber(loc.high_reg); + loc.low_reg = low_reg; + loc.high_reg = low_reg; // Play nice with existing code. + loc.vec_len = kVectorLength8; + } else { + // The value is in a FP register, and we want it in a pair of core registers. + DCHECK_EQ(reg_class, kCoreReg); + DCHECK_EQ(loc.low_reg, loc.high_reg); + new_regs = AllocTypedTempPair(false, kCoreReg); // Force to core registers. + low_reg = new_regs & 0xff; + high_reg = (new_regs >> 8) & 0xff; + DCHECK_NE(low_reg, high_reg); + OpRegCopyWide(low_reg, high_reg, loc.low_reg, loc.high_reg); + CopyRegInfo(low_reg, loc.low_reg); + CopyRegInfo(high_reg, loc.high_reg); + Clobber(loc.low_reg); + Clobber(loc.high_reg); + loc.low_reg = low_reg; + loc.high_reg = high_reg; + MarkPair(loc.low_reg, loc.high_reg); + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + } + } + return loc; + } + + DCHECK_NE(loc.s_reg_low, INVALID_SREG); + if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) { + // Need a wide vector register. + low_reg = AllocTypedTemp(true, reg_class); + loc.low_reg = low_reg; + loc.high_reg = low_reg; // Play nice with existing code. + loc.vec_len = kVectorLength8; + if (update) { + loc.location = kLocPhysReg; + MarkLive(loc.low_reg, loc.s_reg_low); + } + DCHECK(IsFpReg(loc.low_reg)); + } else { + DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG); + + new_regs = AllocTypedTempPair(loc.fp, reg_class); + loc.low_reg = new_regs & 0xff; + loc.high_reg = (new_regs >> 8) & 0xff; + + MarkPair(loc.low_reg, loc.high_reg); + if (update) { + loc.location = kLocPhysReg; + MarkLive(loc.low_reg, loc.s_reg_low); + MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low)); + } + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + } + return loc; +} + +// TODO: Reunify with common code after 'pair mess' has been fixed +RegLocation X86Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) { + int new_reg; + + if (loc.wide) + return EvalLocWide(loc, reg_class, update); + + loc = UpdateLoc(loc); + + if (loc.location == kLocPhysReg) { + if (!RegClassMatches(reg_class, loc.low_reg)) { + /* Wrong register class. Realloc, copy and transfer ownership. */ + new_reg = AllocTypedTemp(loc.fp, reg_class); + OpRegCopy(new_reg, loc.low_reg); + CopyRegInfo(new_reg, loc.low_reg); + Clobber(loc.low_reg); + loc.low_reg = new_reg; + if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) + loc.vec_len = kVectorLength4; + } + return loc; + } + + DCHECK_NE(loc.s_reg_low, INVALID_SREG); + + new_reg = AllocTypedTemp(loc.fp, reg_class); + loc.low_reg = new_reg; + if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) + loc.vec_len = kVectorLength4; + + if (update) { + loc.location = kLocPhysReg; + MarkLive(loc.low_reg, loc.s_reg_low); + } + return loc; +} + +int X86Mir2Lir::AllocTempDouble() { + // We really don't need a pair of registers. + return AllocTempFloat(); +} + +// TODO: Reunify with common code after 'pair mess' has been fixed +void X86Mir2Lir::ResetDefLocWide(RegLocation rl) { + DCHECK(rl.wide); + RegisterInfo* p_low = IsTemp(rl.low_reg); + if (IsFpReg(rl.low_reg)) { + // We are using only the low register. + if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) { + NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low); + } + ResetDef(rl.low_reg); + } else { + RegisterInfo* p_high = IsTemp(rl.high_reg); + if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) { + DCHECK(p_low->pair); + NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low); + } + if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) { + DCHECK(p_high->pair); + } + ResetDef(rl.low_reg); + ResetDef(rl.high_reg); + } +} + +void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { + // Can we do this directly to memory? + rl_dest = UpdateLocWide(rl_dest); + if ((rl_dest.location == kLocDalvikFrame) || + (rl_dest.location == kLocCompilerTemp)) { + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); + int rBase = TargetReg(kSp); + int displacement = SRegOffset(rl_dest.s_reg_low); + + LIR * store = NewLIR3(kX86Mov32MI, rBase, displacement + LOWORD_OFFSET, val_lo); + AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2, + false /* is_load */, true /* is64bit */); + store = NewLIR3(kX86Mov32MI, rBase, displacement + HIWORD_OFFSET, val_hi); + AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2, + false /* is_load */, true /* is64bit */); + return; + } + + // Just use the standard code to do the generation. + Mir2Lir::GenConstWide(rl_dest, value); +} } // namespace art diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index f683affaf9..91c39fa682 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -334,6 +334,7 @@ LIR* X86Mir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) { LIR *res; if (X86_FPREG(r_dest_lo)) { DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi + DCHECK_EQ(r_dest_lo, r_dest_hi); if (value == 0) { return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo); } else { @@ -343,9 +344,11 @@ LIR* X86Mir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) { res = LoadConstantNoClobber(r_dest_lo, val_lo); } if (val_hi != 0) { + r_dest_hi = AllocTempDouble(); LoadConstantNoClobber(r_dest_hi, val_hi); NewLIR2(kX86PsllqRI, r_dest_hi, 32); NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi); + FreeTemp(r_dest_hi); } } } else { @@ -370,12 +373,6 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(int rBase, int r_index, int scale, is64bit = true; if (X86_FPREG(r_dest)) { opcode = is_array ? kX86MovsdRA : kX86MovsdRM; - if (X86_SINGLEREG(r_dest)) { - DCHECK(X86_FPREG(r_dest_hi)); - DCHECK_EQ(r_dest, (r_dest_hi - 1)); - r_dest = S2d(r_dest, r_dest_hi); - } - r_dest_hi = r_dest + 1; } else { pair = true; opcode = is_array ? kX86Mov32RA : kX86Mov32RM; @@ -488,12 +485,6 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(int rBase, int r_index, int scale, is64bit = true; if (X86_FPREG(r_src)) { opcode = is_array ? kX86MovsdAR : kX86MovsdMR; - if (X86_SINGLEREG(r_src)) { - DCHECK(X86_FPREG(r_src_hi)); - DCHECK_EQ(r_src, (r_src_hi - 1)); - r_src = S2d(r_src, r_src_hi); - } - r_src_hi = r_src + 1; } else { pair = true; opcode = is_array ? kX86Mov32AR : kX86Mov32MR; @@ -573,4 +564,17 @@ LIR* X86Mir2Lir::StoreBaseDispWide(int rBase, int displacement, r_src_lo, r_src_hi, kLong, INVALID_SREG); } +/* + * Copy a long value in Core registers to an XMM register + * + */ +void X86Mir2Lir::OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg) { + NewLIR2(kX86MovdxrRR, fp_reg, low_reg); + int tmp_reg = AllocTempDouble(); + NewLIR2(kX86MovdxrRR, tmp_reg, high_reg); + NewLIR2(kX86PsllqRI, tmp_reg, 32); + NewLIR2(kX86OrpsRR, fp_reg, tmp_reg); + FreeTemp(tmp_reg); +} + } // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index f38a16dc15..1488f5d557 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -128,11 +128,11 @@ namespace art { #define X86_FP_REG_MASK 0xF // RegisterLocation templates return values (rAX, rAX/rDX or XMM0). -// location, wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg, s_reg_low -#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG} -#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG, INVALID_SREG} -#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG} -#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, fr0, fr1, INVALID_SREG, INVALID_SREG} +// location, wide, defined, const, fp, core, ref, high_word, home, vec_len, low_reg, high_reg, s_reg_low +#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, rDX, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, kVectorLength4, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, kVectorLength8, fr0, fr0, INVALID_SREG, INVALID_SREG} enum X86ResourceEncodingPos { kX86GPReg0 = 0, diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index bef966c8ff..f211e3f09e 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -405,7 +405,7 @@ void MIRGraph::DumpRegLocTable(RegLocation* table, int count) { } static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, - INVALID_REG, INVALID_REG, INVALID_SREG, + kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG, INVALID_SREG}; void MIRGraph::InitRegLocations() { |