diff options
author | Yixin Shou <yixin.shou@intel.com> | 2014-07-28 14:17:09 -0400 |
---|---|---|
committer | Ian Rogers <irogers@google.com> | 2014-08-12 21:51:55 -0700 |
commit | 8c914c02415d7673f75166e1f1efdcdc7fcadc65 (patch) | |
tree | 92fa015b9e573b7d18eebf42446c2df843bed35e /compiler | |
parent | 33e70ef9bcf916dd637c03eaad41c57c09bc4261 (diff) | |
download | art-8c914c02415d7673f75166e1f1efdcdc7fcadc65.tar.gz art-8c914c02415d7673f75166e1f1efdcdc7fcadc65.tar.bz2 art-8c914c02415d7673f75166e1f1efdcdc7fcadc65.zip |
Implement GenInlinedReverseBits
Added support for x86 inlined version of reverse method of int and long
Change-Id: I7dbdc13b4afedd56557e9eff038a31517cdb1843
Signed-off-by: Yixin Shou <yixin.shou@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 4 | ||||
-rwxr-xr-x | compiler/dex/quick/x86/int_x86.cc | 77 |
2 files changed, 81 insertions, 0 deletions
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 24a3fe3656..1f5b3500a8 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -159,6 +159,7 @@ class X86Mir2Lir : public Mir2Lir { bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE; bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE; bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE; + bool GenInlinedReverseBits(CallInfo* info, OpSize size) OVERRIDE; bool GenInlinedSqrt(CallInfo* info) OVERRIDE; bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE; bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE; @@ -957,6 +958,9 @@ class X86Mir2Lir : public Mir2Lir { private: // The number of vector registers [0..N] reserved by a call to ReserveVectorRegisters int num_reserved_vector_regs_; + + void SwapBits(RegStorage result_reg, int shift, int32_t value); + void SwapBits64(RegStorage result_reg, int shift, int64_t value); }; } // namespace art diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index fdc46e2318..afa2ae21fe 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1061,6 +1061,83 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { return true; } +void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) { + RegStorage r_temp = AllocTemp(); + OpRegCopy(r_temp, result_reg); + OpRegImm(kOpLsr, result_reg, shift); + OpRegImm(kOpAnd, r_temp, value); + OpRegImm(kOpAnd, result_reg, value); + OpRegImm(kOpLsl, r_temp, shift); + OpRegReg(kOpOr, result_reg, r_temp); + FreeTemp(r_temp); +} + +void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) { + RegStorage r_temp = AllocTempWide(); + OpRegCopy(r_temp, result_reg); + OpRegImm(kOpLsr, result_reg, shift); + RegStorage r_value = AllocTempWide(); + LoadConstantWide(r_value, value); + OpRegReg(kOpAnd, r_temp, r_value); + OpRegReg(kOpAnd, result_reg, r_value); + OpRegImm(kOpLsl, r_temp, shift); + OpRegReg(kOpOr, result_reg, r_temp); + FreeTemp(r_temp); + FreeTemp(r_value); +} + +bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { + RegLocation rl_src_i = info->args[0]; + RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) + : LoadValue(rl_src_i, kCoreReg); + RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == k64) { + if (cu_->instruction_set == kX86_64) { + /* Use one bswap instruction to reverse byte order first and then use 3 rounds of + swapping bits to reverse bits in a long number x. Using bswap to save instructions + compared to generic luni implementation which has 5 rounds of swapping bits. + x = bswap x + x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; + x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; + x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; + */ + OpRegReg(kOpRev, rl_result.reg, rl_i.reg); + SwapBits64(rl_result.reg, 1, 0x5555555555555555); + SwapBits64(rl_result.reg, 2, 0x3333333333333333); + SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f); + StoreValueWide(rl_dest, rl_result); + return true; + } + RegStorage r_i_low = rl_i.reg.GetLow(); + if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { + // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second + // REV. + r_i_low = AllocTemp(); + OpRegCopy(r_i_low, rl_i.reg); + } + OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh()); + OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low); + if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { + FreeTemp(r_i_low); + } + SwapBits(rl_result.reg.GetLow(), 1, 0x55555555); + SwapBits(rl_result.reg.GetLow(), 2, 0x33333333); + SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f); + SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555); + SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333); + SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f); + StoreValueWide(rl_dest, rl_result); + } else { + OpRegReg(kOpRev, rl_result.reg, rl_i.reg); + SwapBits(rl_result.reg, 1, 0x55555555); + SwapBits(rl_result.reg, 2, 0x33333333); + SwapBits(rl_result.reg, 4, 0x0f0f0f0f); + StoreValue(rl_dest, rl_result); + } + return true; +} + LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { CHECK(base_of_code_ != nullptr); |