diff options
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 41 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 130 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 363 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 74 | ||||
-rw-r--r-- | compiler/dex/reg_storage.h | 5 | ||||
-rw-r--r-- | disassembler/disassembler_x86.cc | 197 |
6 files changed, 775 insertions, 35 deletions
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 92001065d9..91a66d38e0 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -279,6 +279,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +#define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0), { kX86MovsdMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" }, { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" }, @@ -310,10 +315,42 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E, REG_DEF0_USE0), EXT_0F_ENCODING_MAP(Divss, 0xF3, 0x5E, REG_DEF0_USE0), EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0_USE0), - + EXT_0F_ENCODING_MAP(Sqrtsd, 0xF2, 0x51, REG_DEF0_USE0), + EXT_0F_ENCODING2_MAP(Pmulld, 0x66, 0x38, 0x40, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Pmullw, 0x66, 0xD5, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Mulps, 0x00, 0x59, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Mulpd, 0x66, 0x59, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Paddb, 0x66, 0xFC, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Paddw, 0x66, 0xFD, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Paddd, 0x66, 0xFE, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Addps, 0x00, 0x58, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Addpd, 0xF2, 0x58, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Psubb, 0x66, 0xF8, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Psubw, 0x66, 0xF9, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Psubd, 0x66, 0xFA, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Subps, 0x00, 0x5C, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Subpd, 0x66, 0x5C, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Pand, 0x66, 0xDB, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Por, 0x66, 0xEB, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Pxor, 0x66, 0xEF, REG_DEF0_USE0), + EXT_0F_ENCODING2_MAP(Phaddw, 0x66, 0x38, 0x01, REG_DEF0_USE0), + EXT_0F_ENCODING2_MAP(Phaddd, 0x66, 0x38, 0x02, REG_DEF0_USE0), + + { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1 }, "PextbRRI", "!0r,!1r,!2d" }, + { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1 }, "PextwRRI", "!0r,!1r,!2d" }, + { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1 }, "PextdRRI", "!0r,!1r,!2d" }, + + { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuflwRRI", "!0r,!1r,!2d" }, + { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuffRRI", "!0r,!1r,!2d" }, + + { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1 }, "PsrawRI", "!0r,!1d" }, + { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1 }, "PsradRI", "!0r,!1d" }, + { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1 }, "PsrlwRI", "!0r,!1d" }, + { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1 }, "PsrldRI", "!0r,!1d" }, { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" }, + { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1 }, "PsllwRI", "!0r,!1d" }, + { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1 }, "PslldRI", "!0r,!1d" }, { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" }, - { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" }, { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" }, { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" }, diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 72cdbbd840..1807d5c13e 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -429,6 +429,136 @@ class X86Mir2Lir : public Mir2Lir { void GenConst128(BasicBlock* bb, MIR* mir); /* + * @brief MIR to move a vectorized register to another. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination + * @note vC: source + */ + void GenMoveVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenMultiplyVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenAddVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenSubtractVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenShiftLeftVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from.. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenAndVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenOrVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenXorVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Reduce a 128-bit packed element into a single VR by taking lower bits + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @details Instruction does a horizontal addition of the packed elements and then adds it to VR. + * @note vA: TypeSize + * @note vB: destination and source VR (not vector register) + * @note vC: source (vector register) + */ + void GenAddReduceVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Extract a packed element into a single VR. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination VR (not vector register) + * @note vC: source (vector register) + * @note arg[0]: The index to use for extraction from vector register (which packed element). + */ + void GenReduceVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Create a vector value, with all TypeSize values equal to vC + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize. + * @note vB: destination vector register. + * @note vC: source VR (not vector register). + */ + void GenSetVector(BasicBlock *bb, MIR *mir); + + /* * @brief Generate code for a vector opcode. * @param bb The basic block in which the MIR is from. * @param mir The MIR whose opcode is a non-standard opcode. diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index e7a629aa0b..889ea8b377 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -81,6 +81,16 @@ static const RegStorage dp_temps_arr_64[] = { #endif }; +static const RegStorage xp_temps_arr_32[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, +}; +static const RegStorage xp_temps_arr_64[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, +#ifdef TARGET_REX_SUPPORT + rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 +#endif +}; + static const std::vector<RegStorage> empty_pool; static const std::vector<RegStorage> core_regs_32(core_regs_arr_32, core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0])); @@ -111,6 +121,11 @@ static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32, static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64, dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0])); +static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32, + xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0])); +static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64, + xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0])); + RegStorage rs_rX86_SP; X86NativeRegisterPool rX86_ARG0; @@ -209,7 +224,7 @@ uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) { /* Double registers in x86 are just a single FP register */ seed = 1; /* FP register starts at bit position 16 */ - shift = reg.IsFloat() ? kX86FPReg0 : 0; + shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0; /* Expand the double register id into single offset */ shift += reg_id; return (seed << shift); @@ -542,17 +557,31 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Target-specific adjustments. + // Add in XMM registers. + const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32; + for (RegStorage reg : *xp_temps) { + RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); + reginfo_map_.Put(reg.GetReg(), info); + info->SetIsTemp(true); + } + // Alias single precision xmm to double xmms. // TODO: as needed, add larger vector sizes - alias all to the largest. GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { int sp_reg_num = info->GetReg().GetRegNum(); + RegStorage xp_reg = RegStorage::Solo128(sp_reg_num); + RegisterInfo* xp_reg_info = GetRegInfo(xp_reg); + // 128-bit xmm vector register's master storage should refer to itself. + DCHECK_EQ(xp_reg_info, xp_reg_info->Master()); + + // Redirect 32-bit vector's master storage to 128-bit vector. + info->SetMaster(xp_reg_info); + RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num); RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); - // 64-bit xmm vector register's master storage should refer to itself. - DCHECK_EQ(dp_reg_info, dp_reg_info->Master()); - // Redirect 32-bit vector's master storage to 64-bit vector. - info->SetMaster(dp_reg_info); + // Redirect 64-bit vector's master storage to 128-bit vector. + dp_reg_info->SetMaster(xp_reg_info); } // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. @@ -1240,6 +1269,45 @@ void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { case kMirOpConstVector: GenConst128(bb, mir); break; + case kMirOpMoveVector: + GenMoveVector(bb, mir); + break; + case kMirOpPackedMultiply: + GenMultiplyVector(bb, mir); + break; + case kMirOpPackedAddition: + GenAddVector(bb, mir); + break; + case kMirOpPackedSubtract: + GenSubtractVector(bb, mir); + break; + case kMirOpPackedShiftLeft: + GenShiftLeftVector(bb, mir); + break; + case kMirOpPackedSignedShiftRight: + GenSignedShiftRightVector(bb, mir); + break; + case kMirOpPackedUnsignedShiftRight: + GenUnsignedShiftRightVector(bb, mir); + break; + case kMirOpPackedAnd: + GenAndVector(bb, mir); + break; + case kMirOpPackedOr: + GenOrVector(bb, mir); + break; + case kMirOpPackedXor: + GenXorVector(bb, mir); + break; + case kMirOpPackedAddReduce: + GenAddReduceVector(bb, mir); + break; + case kMirOpPackedReduce: + GenReduceVector(bb, mir); + break; + case kMirOpPackedSet: + GenSetVector(bb, mir); + break; default: break; } @@ -1249,9 +1317,9 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { int type_size = mir->dalvikInsn.vA; // We support 128 bit vectors. DCHECK_EQ(type_size & 0xFFFF, 128); - int reg = mir->dalvikInsn.vB; - DCHECK_LT(reg, 8); + RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); uint32_t *args = mir->dalvikInsn.arg; + int reg = rs_dest.GetReg(); // Check for all 0 case. if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) { NewLIR2(kX86XorpsRR, reg, reg); @@ -1277,6 +1345,287 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { SetMemRefType(load, true, kLiteral); } +void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) { + // We only support 128 bit registers. + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC); + NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg()); +} + +void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PmulldRR; + break; + case kSignedHalf: + opcode = kX86PmullwRR; + break; + case kSingle: + opcode = kX86MulpsRR; + break; + case kDouble: + opcode = kX86MulpdRR; + break; + default: + LOG(FATAL) << "Unsupported vector multiply " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PadddRR; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PaddwRR; + break; + case kUnsignedByte: + case kSignedByte: + opcode = kX86PaddbRR; + break; + case kSingle: + opcode = kX86AddpsRR; + break; + case kDouble: + opcode = kX86AddpdRR; + break; + default: + LOG(FATAL) << "Unsupported vector addition " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PsubdRR; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PsubwRR; + break; + case kUnsignedByte: + case kSignedByte: + opcode = kX86PsubbRR; + break; + case kSingle: + opcode = kX86SubpsRR; + break; + case kDouble: + opcode = kX86SubpdRR; + break; + default: + LOG(FATAL) << "Unsupported vector subtraction " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + int imm = mir->dalvikInsn.vC; + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PslldRI; + break; + case k64: + opcode = kX86PsllqRI; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PsllwRI; + break; + default: + LOG(FATAL) << "Unsupported vector shift left " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), imm); +} + +void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + int imm = mir->dalvikInsn.vC; + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PsradRI; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PsrawRI; + break; + default: + LOG(FATAL) << "Unsupported vector signed shift right " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), imm); +} + +void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + int imm = mir->dalvikInsn.vC; + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PsrldRI; + break; + case k64: + opcode = kX86PsrlqRI; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PsrlwRI; + break; + default: + LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), imm); +} + +void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) { + // We only support 128 bit registers. + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) { + // We only support 128 bit registers. + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) { + // We only support 128 bit registers. + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); + NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); +} + +void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); + int imm = mir->dalvikInsn.vC; + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PhadddRR; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PhaddwRR; + break; + default: + LOG(FATAL) << "Unsupported vector add reduce " << opsize; + break; + } + NewLIR2(opcode, rs_dest_src1.GetReg(), imm); +} + +void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB); + int index = mir->dalvikInsn.arg[0]; + int opcode = 0; + switch (opsize) { + case k32: + opcode = kX86PextrdRRI; + break; + case kSignedHalf: + case kUnsignedHalf: + opcode = kX86PextrwRRI; + break; + case kUnsignedByte: + case kSignedByte: + opcode = kX86PextrbRRI; + break; + default: + LOG(FATAL) << "Unsupported vector reduce " << opsize; + break; + } + // We need to extract to a GPR. + RegStorage temp = AllocTemp(); + NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index); + + // Assume that the destination VR is in the def for the mir. + RegLocation rl_dest = mir_graph_->GetDest(mir); + RegLocation rl_temp = + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG}; + StoreValue(rl_dest, rl_temp); +} + +void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) { + DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); + RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); + int op_low = 0, op_high = 0; + switch (opsize) { + case k32: + op_low = kX86PshufdRRI; + break; + case kSignedHalf: + case kUnsignedHalf: + // Handles low quadword. + op_low = kX86PshuflwRRI; + // Handles upper quadword. + op_high = kX86PshufdRRI; + break; + default: + LOG(FATAL) << "Unsupported vector set " << opsize; + break; + } + + // Load the value from the VR into a GPR. + RegLocation rl_src = mir_graph_->GetSrc(mir, 0); + rl_src = LoadValue(rl_src, kCoreReg); + + // Load the value into the XMM register. + NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg()); + + // Now shuffle the value across the destination. + NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0); + + // And then repeat as needed. + if (op_high != 0) { + NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0); + } +} + + LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) { int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); for (LIR *p = const_vectors_; p != nullptr; p = p->next) { diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index adfed0c4f3..430bc7d48f 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -151,7 +151,7 @@ enum X86NativeRegisterPool { rRET = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16, #endif - // xmm registers, single precision view + // xmm registers, single precision view. fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0, fr1 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1, fr2 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2, @@ -161,7 +161,7 @@ enum X86NativeRegisterPool { fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, - // xmm registers, double precision alises + // xmm registers, double precision aliases. dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, dr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1, dr2 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, @@ -171,15 +171,15 @@ enum X86NativeRegisterPool { dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, - // xmm registers, quad precision alises - qr0 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 0, - qr1 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 1, - qr2 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 2, - qr3 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 3, - qr4 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 4, - qr5 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 5, - qr6 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 6, - qr7 = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 7, + // xmm registers aliases. + xr0 = RegStorage::k128BitSolo | 0, + xr1 = RegStorage::k128BitSolo | 1, + xr2 = RegStorage::k128BitSolo | 2, + xr3 = RegStorage::k128BitSolo | 3, + xr4 = RegStorage::k128BitSolo | 4, + xr5 = RegStorage::k128BitSolo | 5, + xr6 = RegStorage::k128BitSolo | 6, + xr7 = RegStorage::k128BitSolo | 7, // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; @@ -221,14 +221,14 @@ constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); -constexpr RegStorage rs_qr0(RegStorage::kValid | qr0); -constexpr RegStorage rs_qr1(RegStorage::kValid | qr1); -constexpr RegStorage rs_qr2(RegStorage::kValid | qr2); -constexpr RegStorage rs_qr3(RegStorage::kValid | qr3); -constexpr RegStorage rs_qr4(RegStorage::kValid | qr4); -constexpr RegStorage rs_qr5(RegStorage::kValid | qr5); -constexpr RegStorage rs_qr6(RegStorage::kValid | qr6); -constexpr RegStorage rs_qr7(RegStorage::kValid | qr7); +constexpr RegStorage rs_xr0(RegStorage::kValid | xr0); +constexpr RegStorage rs_xr1(RegStorage::kValid | xr1); +constexpr RegStorage rs_xr2(RegStorage::kValid | xr2); +constexpr RegStorage rs_xr3(RegStorage::kValid | xr3); +constexpr RegStorage rs_xr4(RegStorage::kValid | xr4); +constexpr RegStorage rs_xr5(RegStorage::kValid | xr5); +constexpr RegStorage rs_xr6(RegStorage::kValid | xr6); +constexpr RegStorage rs_xr7(RegStorage::kValid | xr7); extern X86NativeRegisterPool rX86_ARG0; extern X86NativeRegisterPool rX86_ARG1; @@ -418,9 +418,39 @@ enum X86OpCode { Binary0fOpCode(kX86Divsd), // double divide Binary0fOpCode(kX86Divss), // float divide Binary0fOpCode(kX86Punpckldq), // Interleave low-order double words - kX86PsrlqRI, // right shift of floating point registers - kX86PsllqRI, // left shift of floating point registers - kX86SqrtsdRR, // sqrt of floating point register + Binary0fOpCode(kX86Sqrtsd), // square root + Binary0fOpCode(kX86Pmulld), // parallel integer multiply 32 bits x 4 + Binary0fOpCode(kX86Pmullw), // parallel integer multiply 16 bits x 8 + Binary0fOpCode(kX86Mulps), // parallel FP multiply 32 bits x 4 + Binary0fOpCode(kX86Mulpd), // parallel FP multiply 64 bits x 2 + Binary0fOpCode(kX86Paddb), // parallel integer addition 8 bits x 16 + Binary0fOpCode(kX86Paddw), // parallel integer addition 16 bits x 8 + Binary0fOpCode(kX86Paddd), // parallel integer addition 32 bits x 4 + Binary0fOpCode(kX86Addps), // parallel FP addition 32 bits x 4 + Binary0fOpCode(kX86Addpd), // parallel FP addition 64 bits x 2 + Binary0fOpCode(kX86Psubb), // parallel integer subtraction 8 bits x 16 + Binary0fOpCode(kX86Psubw), // parallel integer subtraction 16 bits x 8 + Binary0fOpCode(kX86Psubd), // parallel integer subtraction 32 bits x 4 + Binary0fOpCode(kX86Subps), // parallel FP subtraction 32 bits x 4 + Binary0fOpCode(kX86Subpd), // parallel FP subtraction 64 bits x 2 + Binary0fOpCode(kX86Pand), // parallel AND 128 bits x 1 + Binary0fOpCode(kX86Por), // parallel OR 128 bits x 1 + Binary0fOpCode(kX86Pxor), // parallel XOR 128 bits x 1 + Binary0fOpCode(kX86Phaddw), // parallel horizontal addition 16 bits x 8 + Binary0fOpCode(kX86Phaddd), // parallel horizontal addition 32 bits x 4 + kX86PextrbRRI, // Extract 8 bits from XMM into GPR + kX86PextrwRRI, // Extract 16 bits from XMM into GPR + kX86PextrdRRI, // Extract 32 bits from XMM into GPR + kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM. + kX86PshufdRRI, // Shuffle 32 bits in XMM. + kX86PsrawRI, // signed right shift of floating point registers 16 bits x 8 + kX86PsradRI, // signed right shift of floating point registers 32 bits x 4 + kX86PsrlwRI, // logical right shift of floating point registers 16 bits x 8 + kX86PsrldRI, // logical right shift of floating point registers 32 bits x 4 + kX86PsrlqRI, // logical right shift of floating point registers 64 bits x 2 + kX86PsllwRI, // left shift of floating point registers 16 bits x 8 + kX86PslldRI, // left shift of floating point registers 32 bits x 4 + kX86PsllqRI, // left shift of floating point registers 64 bits x 2 kX86Fild32M, // push 32-bit integer on x87 stack kX86Fild64M, // push 64-bit integer on x87 stack kX86Fstp32M, // pop top x87 fp stack and do 32-bit store diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index df21343884..2f7e701219 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -280,6 +280,11 @@ class RegStorage { return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } + // Create a 128-bit solo. + static RegStorage Solo128(int reg_num) { + return RegStorage(k128BitSolo, reg_num & kRegTypeMask); + } + // Create a 64-bit solo. static RegStorage Solo64(int reg_num) { return RegStorage(k64BitSolo, reg_num & kRegTypeMask); diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 5cc6acf0bf..cba4ebf4e8 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -363,10 +363,49 @@ DISASSEMBLER_ENTRY(cmp, src_reg_file = dst_reg_file = SSE; break; case 0x38: // 3 byte extended opcode - opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); + instr++; + if (prefix[2] == 0x66) { + switch (*instr) { + case 0x40: + opcode << "pmulld"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + default: + opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); + } + } else { + opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); + } break; case 0x3A: // 3 byte extended opcode - opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); + instr++; + if (prefix[2] == 0x66) { + switch (*instr) { + case 0x14: + opcode << "pextrb"; + prefix[2] = 0; + has_modrm = true; + store = true; + dst_reg_file = SSE; + immediate_bytes = 1; + break; + case 0x16: + opcode << "pextrd"; + prefix[2] = 0; + has_modrm = true; + store = true; + dst_reg_file = SSE; + immediate_bytes = 1; + break; + default: + opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); + } + } else { + opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); + } break; case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: @@ -467,11 +506,11 @@ DISASSEMBLER_ENTRY(cmp, break; case 0x6F: if (prefix[2] == 0x66) { - dst_reg_file = SSE; + src_reg_file = dst_reg_file = SSE; opcode << "movdqa"; prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode } else if (prefix[0] == 0xF3) { - dst_reg_file = SSE; + src_reg_file = dst_reg_file = SSE; opcode << "movdqu"; prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode } else { @@ -481,6 +520,25 @@ DISASSEMBLER_ENTRY(cmp, load = true; has_modrm = true; break; + case 0x70: + if (prefix[2] == 0x66) { + opcode << "pshufd"; + prefix[2] = 0; + has_modrm = true; + store = true; + src_reg_file = dst_reg_file = SSE; + immediate_bytes = 1; + } else if (prefix[0] == 0xF2) { + opcode << "pshuflw"; + prefix[0] = 0; + has_modrm = true; + store = true; + src_reg_file = dst_reg_file = SSE; + immediate_bytes = 1; + } else { + opcode << StringPrintf("unknown opcode '0F %02X'", *instr); + } + break; case 0x71: if (prefix[2] == 0x66) { dst_reg_file = SSE; @@ -603,6 +661,18 @@ DISASSEMBLER_ENTRY(cmp, case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break; case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; break; case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break; + case 0xC5: + if (prefix[2] == 0x66) { + opcode << "pextrw"; + prefix[2] = 0; + has_modrm = true; + store = true; + src_reg_file = dst_reg_file = SSE; + immediate_bytes = 1; + } else { + opcode << StringPrintf("unknown opcode '0F %02X'", *instr); + } + break; case 0xC7: static const char* x0FxC7_opcodes[] = { "unknown-0f-c7", "cmpxchg8b", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7" }; modrm_opcodes = x0FxC7_opcodes; @@ -614,6 +684,125 @@ DISASSEMBLER_ENTRY(cmp, opcode << "bswap"; reg_in_opcode = true; break; + case 0xDB: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "pand"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xD5: + if (prefix[2] == 0x66) { + opcode << "pmullw"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + } else { + opcode << StringPrintf("unknown opcode '0F %02X'", *instr); + } + break; + case 0xEB: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "por"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xEF: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "pxor"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xF8: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "psubb"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xF9: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "psubw"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xFA: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "psubd"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xFC: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "paddb"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xFD: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "paddw"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; + case 0xFE: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + opcode << "paddd"; + prefix[2] = 0; + has_modrm = true; + load = true; + break; default: opcode << StringPrintf("unknown opcode '0F %02X'", *instr); break; |