summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorYixin Shou <yixin.shou@intel.com>2014-07-28 14:17:09 -0400
committerIan Rogers <irogers@google.com>2014-08-12 21:51:55 -0700
commit8c914c02415d7673f75166e1f1efdcdc7fcadc65 (patch)
tree92fa015b9e573b7d18eebf42446c2df843bed35e /compiler
parent33e70ef9bcf916dd637c03eaad41c57c09bc4261 (diff)
downloadart-8c914c02415d7673f75166e1f1efdcdc7fcadc65.tar.gz
art-8c914c02415d7673f75166e1f1efdcdc7fcadc65.tar.bz2
art-8c914c02415d7673f75166e1f1efdcdc7fcadc65.zip
Implement GenInlinedReverseBits
Added support for x86 inlined version of reverse method of int and long Change-Id: I7dbdc13b4afedd56557e9eff038a31517cdb1843 Signed-off-by: Yixin Shou <yixin.shou@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h4
-rwxr-xr-xcompiler/dex/quick/x86/int_x86.cc77
2 files changed, 81 insertions, 0 deletions
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 24a3fe3656..1f5b3500a8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -159,6 +159,7 @@ class X86Mir2Lir : public Mir2Lir {
bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE;
bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE;
bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE;
+ bool GenInlinedReverseBits(CallInfo* info, OpSize size) OVERRIDE;
bool GenInlinedSqrt(CallInfo* info) OVERRIDE;
bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
@@ -957,6 +958,9 @@ class X86Mir2Lir : public Mir2Lir {
private:
// The number of vector registers [0..N] reserved by a call to ReserveVectorRegisters
int num_reserved_vector_regs_;
+
+ void SwapBits(RegStorage result_reg, int shift, int32_t value);
+ void SwapBits64(RegStorage result_reg, int shift, int64_t value);
};
} // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index fdc46e2318..afa2ae21fe 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1061,6 +1061,83 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
return true;
}
+void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
+ RegStorage r_temp = AllocTemp();
+ OpRegCopy(r_temp, result_reg);
+ OpRegImm(kOpLsr, result_reg, shift);
+ OpRegImm(kOpAnd, r_temp, value);
+ OpRegImm(kOpAnd, result_reg, value);
+ OpRegImm(kOpLsl, r_temp, shift);
+ OpRegReg(kOpOr, result_reg, r_temp);
+ FreeTemp(r_temp);
+}
+
+void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
+ RegStorage r_temp = AllocTempWide();
+ OpRegCopy(r_temp, result_reg);
+ OpRegImm(kOpLsr, result_reg, shift);
+ RegStorage r_value = AllocTempWide();
+ LoadConstantWide(r_value, value);
+ OpRegReg(kOpAnd, r_temp, r_value);
+ OpRegReg(kOpAnd, result_reg, r_value);
+ OpRegImm(kOpLsl, r_temp, shift);
+ OpRegReg(kOpOr, result_reg, r_temp);
+ FreeTemp(r_temp);
+ FreeTemp(r_value);
+}
+
+bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
+ RegLocation rl_src_i = info->args[0];
+ RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
+ : LoadValue(rl_src_i, kCoreReg);
+ RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ if (size == k64) {
+ if (cu_->instruction_set == kX86_64) {
+ /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ swapping bits to reverse bits in a long number x. Using bswap to save instructions
+ compared to generic luni implementation which has 5 rounds of swapping bits.
+ x = bswap x
+ x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+ x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+ x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+ */
+ OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
+ SwapBits64(rl_result.reg, 1, 0x5555555555555555);
+ SwapBits64(rl_result.reg, 2, 0x3333333333333333);
+ SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
+ StoreValueWide(rl_dest, rl_result);
+ return true;
+ }
+ RegStorage r_i_low = rl_i.reg.GetLow();
+ if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
+ // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
+ // REV.
+ r_i_low = AllocTemp();
+ OpRegCopy(r_i_low, rl_i.reg);
+ }
+ OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
+ OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
+ if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
+ FreeTemp(r_i_low);
+ }
+ SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
+ SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
+ SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
+ SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
+ SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
+ SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
+ StoreValueWide(rl_dest, rl_result);
+ } else {
+ OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
+ SwapBits(rl_result.reg, 1, 0x55555555);
+ SwapBits(rl_result.reg, 2, 0x33333333);
+ SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
+ StoreValue(rl_dest, rl_result);
+ }
+ return true;
+}
+
LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
CHECK(base_of_code_ != nullptr);